aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/6lowpan_i.h28
-rw-r--r--net/6lowpan/Kconfig47
-rw-r--r--net/6lowpan/Makefile9
-rw-r--r--net/6lowpan/core.c59
-rw-r--r--net/6lowpan/debugfs.c53
-rw-r--r--net/6lowpan/nhc_ghc_ext_dest.c27
-rw-r--r--net/6lowpan/nhc_ghc_ext_frag.c28
-rw-r--r--net/6lowpan/nhc_ghc_ext_hop.c27
-rw-r--r--net/6lowpan/nhc_ghc_ext_route.c27
-rw-r--r--net/6lowpan/nhc_ghc_icmpv6.c27
-rw-r--r--net/6lowpan/nhc_ghc_udp.c27
-rw-r--r--net/8021q/vlan_core.c4
-rw-r--r--net/8021q/vlan_dev.c11
-rw-r--r--net/9p/trans_fd.c88
-rw-r--r--net/9p/trans_rdma.c4
-rw-r--r--net/9p/trans_virtio.c18
-rw-r--r--net/Kconfig9
-rw-r--r--net/appletalk/ddp.c2
-rw-r--r--net/atm/common.c4
-rw-r--r--net/atm/mpc.h4
-rw-r--r--net/atm/mpoa_caches.c4
-rw-r--r--net/ax25/af_ax25.c3
-rw-r--r--net/batman-adv/bat_iv_ogm.c80
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c65
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h2
-rw-r--r--net/batman-adv/debugfs.c9
-rw-r--r--net/batman-adv/distributed-arp-table.c5
-rw-r--r--net/batman-adv/fragmentation.c8
-rw-r--r--net/batman-adv/gateway_client.c7
-rw-r--r--net/batman-adv/gateway_common.c117
-rw-r--r--net/batman-adv/hard-interface.c36
-rw-r--r--net/batman-adv/hard-interface.h12
-rw-r--r--net/batman-adv/main.c23
-rw-r--r--net/batman-adv/main.h4
-rw-r--r--net/batman-adv/multicast.c2
-rw-r--r--net/batman-adv/network-coding.c23
-rw-r--r--net/batman-adv/originator.c325
-rw-r--r--net/batman-adv/originator.h8
-rw-r--r--net/batman-adv/packet.h3
-rw-r--r--net/batman-adv/routing.c25
-rw-r--r--net/batman-adv/send.c3
-rw-r--r--net/batman-adv/sysfs.c16
-rw-r--r--net/batman-adv/translation-table.c74
-rw-r--r--net/batman-adv/types.h66
-rw-r--r--net/bluetooth/6lowpan.c15
-rw-r--r--net/bluetooth/af_bluetooth.c32
-rw-r--r--net/bluetooth/bnep/core.c7
-rw-r--r--net/bluetooth/cmtp/capi.c8
-rw-r--r--net/bluetooth/cmtp/core.c3
-rw-r--r--net/bluetooth/hci_conn.c123
-rw-r--r--net/bluetooth/hci_core.c670
-rw-r--r--net/bluetooth/hci_event.c14
-rw-r--r--net/bluetooth/hci_request.c1806
-rw-r--r--net/bluetooth/hci_request.h53
-rw-r--r--net/bluetooth/hci_sock.c209
-rw-r--r--net/bluetooth/l2cap_core.c53
-rw-r--r--net/bluetooth/l2cap_sock.c12
-rw-r--r--net/bluetooth/mgmt.c1899
-rw-r--r--net/bluetooth/rfcomm/core.c46
-rw-r--r--net/bluetooth/sco.c3
-rw-r--r--net/bluetooth/smp.c158
-rw-r--r--net/bridge/br.c3
-rw-r--r--net/bridge/br_device.c8
-rw-r--r--net/bridge/br_fdb.c16
-rw-r--r--net/bridge/br_if.c13
-rw-r--r--net/bridge/br_mdb.c27
-rw-r--r--net/bridge/br_stp.c17
-rw-r--r--net/bridge/br_stp_if.c10
-rw-r--r--net/bridge/br_sysfs_br.c3
-rw-r--r--net/bridge/br_vlan.c31
-rw-r--r--net/bridge/netfilter/ebt_ip6.c4
-rw-r--r--net/bridge/netfilter/ebt_log.c9
-rw-r--r--net/bridge/netfilter/ebt_stp.c2
-rw-r--r--net/bridge/netfilter/ebt_vlan.c15
-rw-r--r--net/bridge/netfilter/ebtable_filter.c2
-rw-r--r--net/bridge/netfilter/ebtable_nat.c2
-rw-r--r--net/bridge/netfilter/ebtables.c139
-rw-r--r--net/bridge/netfilter/nf_tables_bridge.c2
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c1
-rw-r--r--net/caif/caif_socket.c4
-rw-r--r--net/caif/cfrfml.c2
-rw-r--r--net/ceph/auth_x.c85
-rw-r--r--net/ceph/auth_x.h2
-rw-r--r--net/ceph/ceph_common.c18
-rw-r--r--net/ceph/crush/mapper.c33
-rw-r--r--net/ceph/crypto.c101
-rw-r--r--net/ceph/crypto.h4
-rw-r--r--net/ceph/messenger.c208
-rw-r--r--net/ceph/mon_client.c4
-rw-r--r--net/ceph/osd_client.c48
-rw-r--r--net/ceph/osdmap.c19
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/datagram.c79
-rw-r--r--net/core/dev.c459
-rw-r--r--net/core/dst.c3
-rw-r--r--net/core/ethtool.c85
-rw-r--r--net/core/filter.c303
-rw-r--r--net/core/flow_dissector.c16
-rw-r--r--net/core/neighbour.c6
-rw-r--r--net/core/net-sysfs.c5
-rw-r--r--net/core/net-traces.c4
-rw-r--r--net/core/netclassid_cgroup.c39
-rw-r--r--net/core/netprio_cgroup.c28
-rw-r--r--net/core/pktgen.c8
-rw-r--r--net/core/rtnetlink.c301
-rw-r--r--net/core/scm.c13
-rw-r--r--net/core/skbuff.c38
-rw-r--r--net/core/sock.c157
-rw-r--r--net/core/sock_diag.c23
-rw-r--r--net/core/sock_reuseport.c258
-rw-r--r--net/core/stream.c8
-rw-r--r--net/core/sysctl_net_core.c10
-rw-r--r--net/dccp/ipv4.c14
-rw-r--r--net/dccp/ipv6.c51
-rw-r--r--net/dccp/output.c2
-rw-r--r--net/dccp/proto.c3
-rw-r--r--net/decnet/af_decnet.c11
-rw-r--r--net/dns_resolver/dns_query.c2
-rw-r--r--net/dsa/dsa.c80
-rw-r--r--net/dsa/dsa_priv.h1
-rw-r--r--net/dsa/slave.c35
-rw-r--r--net/ethernet/eth.c31
-rw-r--r--net/hsr/hsr_device.c2
-rw-r--r--net/ieee802154/6lowpan/core.c6
-rw-r--r--net/ieee802154/6lowpan/reassembly.c1
-rw-r--r--net/ipv4/Kconfig14
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c3
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/fib_frontend.c9
-rw-r--r--net/ipv4/fib_semantics.c13
-rw-r--r--net/ipv4/fib_trie.c11
-rw-r--r--net/ipv4/fou.c5
-rw-r--r--net/ipv4/igmp.c20
-rw-r--r--net/ipv4/inet_connection_sock.c18
-rw-r--r--net/ipv4/inet_diag.c86
-rw-r--r--net/ipv4/inet_fragment.c10
-rw-r--r--net/ipv4/ip_fragment.c2
-rw-r--r--net/ipv4/ip_gre.c21
-rw-r--r--net/ipv4/ip_input.c5
-rw-r--r--net/ipv4/ip_output.c11
-rw-r--r--net/ipv4/ip_sockglue.c47
-rw-r--r--net/ipv4/ip_tunnel.c31
-rw-r--r--net/ipv4/ip_tunnel_core.c26
-rw-r--r--net/ipv4/ip_vti.c3
-rw-r--r--net/ipv4/ipconfig.c66
-rw-r--r--net/ipv4/ipip.c4
-rw-r--r--net/ipv4/ipmr.c764
-rw-r--r--net/ipv4/netfilter/Kconfig1
-rw-r--r--net/ipv4/netfilter/arp_tables.c6
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c7
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c3
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c22
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c1
-rw-r--r--net/ipv4/netfilter/nf_tables_arp.c2
-rw-r--r--net/ipv4/netfilter/nf_tables_ipv4.c2
-rw-r--r--net/ipv4/ping.c6
-rw-r--r--net/ipv4/raw.c19
-rw-r--r--net/ipv4/route.c77
-rw-r--r--net/ipv4/syncookies.c7
-rw-r--r--net/ipv4/sysctl_net_ipv4.c58
-rw-r--r--net/ipv4/tcp.c138
-rw-r--r--net/ipv4/tcp_diag.c21
-rw-r--r--net/ipv4/tcp_fastopen.c1
-rw-r--r--net/ipv4/tcp_input.c45
-rw-r--r--net/ipv4/tcp_ipv4.c128
-rw-r--r--net/ipv4/tcp_memcontrol.c233
-rw-r--r--net/ipv4/tcp_metrics.c2
-rw-r--r--net/ipv4/tcp_minisocks.c13
-rw-r--r--net/ipv4/tcp_output.c44
-rw-r--r--net/ipv4/tcp_timer.c17
-rw-r--r--net/ipv4/tcp_yeah.c2
-rw-r--r--net/ipv4/udp.c166
-rw-r--r--net/ipv4/udp_diag.c4
-rw-r--r--net/ipv4/udp_offload.c15
-rw-r--r--net/ipv4/udp_tunnel.c13
-rw-r--r--net/ipv4/xfrm4_policy.c46
-rw-r--r--net/ipv6/Kconfig2
-rw-r--r--net/ipv6/Makefile2
-rw-r--r--net/ipv6/addrconf.c106
-rw-r--r--net/ipv6/addrlabel.c2
-rw-r--r--net/ipv6/af_inet6.c18
-rw-r--r--net/ipv6/datagram.c7
-rw-r--r--net/ipv6/exthdrs.c3
-rw-r--r--net/ipv6/exthdrs_core.c6
-rw-r--r--net/ipv6/icmp.c14
-rw-r--r--net/ipv6/ila/Makefile7
-rw-r--r--net/ipv6/ila/ila.h48
-rw-r--r--net/ipv6/ila/ila_common.c103
-rw-r--r--net/ipv6/ila/ila_lwt.c (renamed from net/ipv6/ila.c)83
-rw-r--r--net/ipv6/ila/ila_xlat.c680
-rw-r--r--net/ipv6/inet6_connection_sock.c25
-rw-r--r--net/ipv6/ip6_flowlabel.c5
-rw-r--r--net/ipv6/ip6_gre.c12
-rw-r--r--net/ipv6/ip6_output.c10
-rw-r--r--net/ipv6/ip6_tunnel.c4
-rw-r--r--net/ipv6/ip6mr.c19
-rw-r--r--net/ipv6/ipv6_sockglue.c33
-rw-r--r--net/ipv6/mcast.c5
-rw-r--r--net/ipv6/ndisc.c14
-rw-r--r--net/ipv6/netfilter/Kconfig1
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c172
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c20
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c3
-rw-r--r--net/ipv6/netfilter/nf_nat_masquerade_ipv6.c74
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c1
-rw-r--r--net/ipv6/netfilter/nf_tables_ipv6.c2
-rw-r--r--net/ipv6/raw.c28
-rw-r--r--net/ipv6/reassembly.c11
-rw-r--r--net/ipv6/route.c51
-rw-r--r--net/ipv6/sit.c11
-rw-r--r--net/ipv6/syncookies.c9
-rw-r--r--net/ipv6/tcp_ipv6.c137
-rw-r--r--net/ipv6/udp.c94
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c2
-rw-r--r--net/ipv6/xfrm6_policy.c53
-rw-r--r--net/irda/af_irda.c3
-rw-r--r--net/irda/ircomm/ircomm_param.c3
-rw-r--r--net/irda/ircomm/ircomm_tty.c15
-rw-r--r--net/irda/ircomm/ircomm_tty_ioctl.c13
-rw-r--r--net/iucv/af_iucv.c29
-rw-r--r--net/l2tp/l2tp_ip6.c8
-rw-r--r--net/l2tp/l2tp_netlink.c18
-rw-r--r--net/l2tp/l2tp_ppp.c19
-rw-r--r--net/mac80211/agg-rx.c2
-rw-r--r--net/mac80211/agg-tx.c3
-rw-r--r--net/mac80211/cfg.c531
-rw-r--r--net/mac80211/debugfs.c8
-rw-r--r--net/mac80211/ibss.c2
-rw-r--r--net/mac80211/ieee80211_i.h31
-rw-r--r--net/mac80211/iface.c5
-rw-r--r--net/mac80211/key.c56
-rw-r--r--net/mac80211/main.c7
-rw-r--r--net/mac80211/mesh.c11
-rw-r--r--net/mac80211/mesh.h4
-rw-r--r--net/mac80211/mesh_pathtbl.c12
-rw-r--r--net/mac80211/mlme.c25
-rw-r--r--net/mac80211/offchannel.c831
-rw-r--r--net/mac80211/rc80211_minstrel.c2
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c16
-rw-r--r--net/mac80211/rx.c43
-rw-r--r--net/mac80211/scan.c29
-rw-r--r--net/mac80211/sta_info.c163
-rw-r--r--net/mac80211/sta_info.h2
-rw-r--r--net/mac80211/status.c5
-rw-r--r--net/mac80211/trace.h25
-rw-r--r--net/mac80211/tx.c2
-rw-r--r--net/mac80211/util.c136
-rw-r--r--net/mac80211/vht.c10
-rw-r--r--net/mac802154/driver-ops.h3
-rw-r--r--net/mac802154/llsec.c41
-rw-r--r--net/mac802154/llsec.h3
-rw-r--r--net/mac802154/mac_cmd.c2
-rw-r--r--net/mac802154/rx.c3
-rw-r--r--net/mac802154/tx.c9
-rw-r--r--net/mpls/af_mpls.c228
-rw-r--r--net/mpls/internal.h2
-rw-r--r--net/mpls/mpls_iptunnel.c6
-rw-r--r--net/netfilter/Kconfig28
-rw-r--r--net/netfilter/Makefile9
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h17
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c14
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c64
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c18
-rw-r--r--net/netfilter/ipset/ip_set_core.c122
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h26
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c4
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c16
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c43
-rw-r--r--net/netfilter/nf_conntrack_expect.c7
-rw-r--r--net/netfilter/nf_conntrack_ftp.c17
-rw-r--r--net/netfilter/nf_conntrack_helper.c2
-rw-r--r--net/netfilter/nf_conntrack_irc.c7
-rw-r--r--net/netfilter/nf_conntrack_netlink.c98
-rw-r--r--net/netfilter/nf_conntrack_sane.c19
-rw-r--r--net/netfilter/nf_conntrack_sip.c5
-rw-r--r--net/netfilter/nf_conntrack_standalone.c7
-rw-r--r--net/netfilter/nf_conntrack_tftp.c7
-rw-r--r--net/netfilter/nf_conntrack_timeout.c2
-rw-r--r--net/netfilter/nf_dup_netdev.c40
-rw-r--r--net/netfilter/nf_nat_redirect.c2
-rw-r--r--net/netfilter/nf_tables_api.c263
-rw-r--r--net/netfilter/nf_tables_core.c62
-rw-r--r--net/netfilter/nf_tables_inet.c2
-rw-r--r--net/netfilter/nf_tables_netdev.c57
-rw-r--r--net/netfilter/nf_tables_trace.c275
-rw-r--r--net/netfilter/nfnetlink.c48
-rw-r--r--net/netfilter/nfnetlink_acct.c21
-rw-r--r--net/netfilter/nfnetlink_cthelper.c18
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c122
-rw-r--r--net/netfilter/nfnetlink_log.c42
-rw-r--r--net/netfilter/nfnetlink_queue.c132
-rw-r--r--net/netfilter/nft_byteorder.c21
-rw-r--r--net/netfilter/nft_compat.c6
-rw-r--r--net/netfilter/nft_counter.c51
-rw-r--r--net/netfilter/nft_ct.c39
-rw-r--r--net/netfilter/nft_dup_netdev.c97
-rw-r--r--net/netfilter/nft_dynset.c5
-rw-r--r--net/netfilter/nft_fwd_netdev.c98
-rw-r--r--net/netfilter/nft_limit.c16
-rw-r--r--net/netfilter/nft_meta.c90
-rw-r--r--net/netfilter/nft_payload.c135
-rw-r--r--net/netfilter/x_tables.c12
-rw-r--r--net/netfilter/xt_CT.c2
-rw-r--r--net/netfilter/xt_TCPMSS.c9
-rw-r--r--net/netfilter/xt_TEE.c10
-rw-r--r--net/netfilter/xt_cgroup.c108
-rw-r--r--net/netfilter/xt_osf.c7
-rw-r--r--net/netfilter/xt_owner.c6
-rw-r--r--net/netlink/af_netlink.c9
-rw-r--r--net/netlink/genetlink.c18
-rw-r--r--net/nfc/core.c13
-rw-r--r--net/nfc/digital_core.c3
-rw-r--r--net/nfc/llcp_sock.c2
-rw-r--r--net/nfc/nci/core.c6
-rw-r--r--net/nfc/nci/hci.c2
-rw-r--r--net/nfc/nci/uart.c9
-rw-r--r--net/nfc/netlink.c37
-rw-r--r--net/nfc/nfc.h1
-rw-r--r--net/openvswitch/actions.c19
-rw-r--r--net/openvswitch/conntrack.c47
-rw-r--r--net/openvswitch/datapath.c5
-rw-r--r--net/openvswitch/dp_notify.c2
-rw-r--r--net/openvswitch/flow_netlink.c5
-rw-r--r--net/openvswitch/vport-geneve.c7
-rw-r--r--net/openvswitch/vport-gre.c1
-rw-r--r--net/openvswitch/vport-netdev.c10
-rw-r--r--net/openvswitch/vport-vxlan.c4
-rw-r--r--net/openvswitch/vport.c8
-rw-r--r--net/openvswitch/vport.h31
-rw-r--r--net/packet/af_packet.c176
-rw-r--r--net/phonet/af_phonet.c4
-rw-r--r--net/rds/connection.c6
-rw-r--r--net/rds/ib.c36
-rw-r--r--net/rds/ib.h6
-rw-r--r--net/rds/ib_cm.c2
-rw-r--r--net/rds/ib_recv.c4
-rw-r--r--net/rds/ib_send.c71
-rw-r--r--net/rds/iw.c25
-rw-r--r--net/rds/iw.h9
-rw-r--r--net/rds/iw_cm.c2
-rw-r--r--net/rds/iw_rdma.c129
-rw-r--r--net/rds/iw_send.c154
-rw-r--r--net/rds/page.c35
-rw-r--r--net/rds/rdma_transport.c4
-rw-r--r--net/rds/send.c4
-rw-r--r--net/rfkill/core.c22
-rw-r--r--net/rfkill/rfkill-gpio.c4
-rw-r--r--net/rxrpc/af_rxrpc.c2
-rw-r--r--net/rxrpc/ar-ack.c4
-rw-r--r--net/rxrpc/ar-connection.c2
-rw-r--r--net/rxrpc/ar-internal.h2
-rw-r--r--net/rxrpc/ar-key.c36
-rw-r--r--net/rxrpc/ar-output.c2
-rw-r--r--net/rxrpc/rxkad.c172
-rw-r--r--net/sched/Kconfig14
-rw-r--r--net/sched/act_ipt.c2
-rw-r--r--net/sched/cls_bpf.c8
-rw-r--r--net/sched/cls_flow.c15
-rw-r--r--net/sched/cls_flower.c10
-rw-r--r--net/sched/em_meta.c138
-rw-r--r--net/sched/sch_api.c26
-rw-r--r--net/sched/sch_drr.c2
-rw-r--r--net/sched/sch_generic.c6
-rw-r--r--net/sched/sch_ingress.c88
-rw-r--r--net/sched/sch_mq.c4
-rw-r--r--net/sched/sch_mqprio.c4
-rw-r--r--net/sctp/associola.c7
-rw-r--r--net/sctp/auth.c40
-rw-r--r--net/sctp/endpointola.c53
-rw-r--r--net/sctp/input.c200
-rw-r--r--net/sctp/ipv6.c26
-rw-r--r--net/sctp/output.c2
-rw-r--r--net/sctp/outqueue.c2
-rw-r--r--net/sctp/proc.c326
-rw-r--r--net/sctp/protocol.c83
-rw-r--r--net/sctp/sm_make_chunk.c55
-rw-r--r--net/sctp/sm_sideeffect.c30
-rw-r--r--net/sctp/sm_statefuns.c26
-rw-r--r--net/sctp/socket.c92
-rw-r--r--net/sctp/sysctl.c9
-rw-r--r--net/sctp/transport.c10
-rw-r--r--net/socket.c27
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c15
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c350
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_keys.c12
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c89
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c22
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c24
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c3
-rw-r--r--net/sunrpc/backchannel_rqst.c24
-rw-r--r--net/sunrpc/cache.c65
-rw-r--r--net/sunrpc/clnt.c1
-rw-r--r--net/sunrpc/rpc_pipe.c62
-rw-r--r--net/sunrpc/sched.c6
-rw-r--r--net/sunrpc/svc.c18
-rw-r--r--net/sunrpc/svc_xprt.c45
-rw-r--r--net/sunrpc/svcauth.c2
-rw-r--r--net/sunrpc/svcauth_unix.c8
-rw-r--r--net/sunrpc/svcsock.c40
-rw-r--r--net/sunrpc/sysctl.c23
-rw-r--r--net/sunrpc/xprt.c1
-rw-r--r--net/sunrpc/xprtrdma/Makefile3
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c394
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c64
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c281
-rw-r--r--net/sunrpc/xprtrdma/physical_ops.c13
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c164
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c47
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c371
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c179
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c51
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c456
-rw-r--r--net/sunrpc/xprtrdma/transport.c51
-rw-r--r--net/sunrpc/xprtrdma/verbs.c508
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h92
-rw-r--r--net/sunrpc/xprtsock.c323
-rw-r--r--net/switchdev/switchdev.c34
-rw-r--r--net/tipc/bcast.c126
-rw-r--r--net/tipc/bcast.h1
-rw-r--r--net/tipc/bearer.c140
-rw-r--r--net/tipc/bearer.h8
-rw-r--r--net/tipc/core.h5
-rw-r--r--net/tipc/discover.c38
-rw-r--r--net/tipc/link.c630
-rw-r--r--net/tipc/link.h175
-rw-r--r--net/tipc/name_distr.c68
-rw-r--r--net/tipc/name_distr.h1
-rw-r--r--net/tipc/name_table.c5
-rw-r--r--net/tipc/netlink.c8
-rw-r--r--net/tipc/netlink_compat.c8
-rw-r--r--net/tipc/node.c881
-rw-r--r--net/tipc/node.h127
-rw-r--r--net/tipc/socket.c47
-rw-r--r--net/tipc/subscr.c8
-rw-r--r--net/tipc/udp_media.c24
-rw-r--r--net/unix/af_unix.c465
-rw-r--r--net/unix/diag.c2
-rw-r--r--net/unix/garbage.c17
-rw-r--r--net/vmw_vsock/af_vsock.c19
-rw-r--r--net/vmw_vsock/vmci_transport.c2
-rw-r--r--net/vmw_vsock/vmci_transport.h2
-rw-r--r--net/vmw_vsock/vmci_transport_notify.c2
-rw-r--r--net/vmw_vsock/vmci_transport_notify.h5
-rw-r--r--net/vmw_vsock/vmci_transport_notify_qstate.c2
-rw-r--r--net/wireless/core.c2
-rw-r--r--net/wireless/core.h7
-rw-r--r--net/wireless/lib80211_crypt_ccmp.c4
-rw-r--r--net/wireless/lib80211_crypt_tkip.c103
-rw-r--r--net/wireless/lib80211_crypt_wep.c46
-rw-r--r--net/wireless/nl80211.c69
-rw-r--r--net/wireless/ocb.c3
-rw-r--r--net/wireless/rdev-ops.h51
-rw-r--r--net/wireless/reg.c148
-rw-r--r--net/wireless/sme.c6
-rw-r--r--net/wireless/trace.h103
-rw-r--r--net/wireless/util.c121
-rw-r--r--net/wireless/wext-core.c52
-rw-r--r--net/xfrm/xfrm_algo.c7
-rw-r--r--net/xfrm/xfrm_output.c2
-rw-r--r--net/xfrm/xfrm_policy.c88
465 files changed, 17661 insertions, 10568 deletions
diff --git a/net/6lowpan/6lowpan_i.h b/net/6lowpan/6lowpan_i.h
new file mode 100644
index 000000000000..d16bb4b14aa1
--- /dev/null
+++ b/net/6lowpan/6lowpan_i.h
@@ -0,0 +1,28 @@
+#ifndef __6LOWPAN_I_H
+#define __6LOWPAN_I_H
+
+#include <linux/netdevice.h>
+
+#ifdef CONFIG_6LOWPAN_DEBUGFS
+int lowpan_dev_debugfs_init(struct net_device *dev);
+void lowpan_dev_debugfs_exit(struct net_device *dev);
+
+int __init lowpan_debugfs_init(void);
+void lowpan_debugfs_exit(void);
+#else
+static inline int lowpan_dev_debugfs_init(struct net_device *dev)
+{
+ return 0;
+}
+
+static inline void lowpan_dev_debugfs_exit(struct net_device *dev) { }
+
+static inline int __init lowpan_debugfs_init(void)
+{
+ return 0;
+}
+
+static inline void lowpan_debugfs_exit(void) { }
+#endif /* CONFIG_6LOWPAN_DEBUGFS */
+
+#endif /* __6LOWPAN_I_H */
diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig
index 7fa0f382e7d1..9c051512d14f 100644
--- a/net/6lowpan/Kconfig
+++ b/net/6lowpan/Kconfig
@@ -5,12 +5,21 @@ menuconfig 6LOWPAN
This enables IPv6 over Low power Wireless Personal Area Network -
"6LoWPAN" which is supported by IEEE 802.15.4 or Bluetooth stacks.
+config 6LOWPAN_DEBUGFS
+ bool "6LoWPAN debugfs support"
+ depends on 6LOWPAN
+ depends on DEBUG_FS
+ ---help---
+ This enables 6LoWPAN debugfs support. For example to manipulate
+ IPHC context information at runtime.
+
menuconfig 6LOWPAN_NHC
- tristate "Next Header Compression Support"
+ tristate "Next Header and Generic Header Compression Support"
depends on 6LOWPAN
default y
---help---
- Support for next header compression.
+ Support for next header and generic header compression defined in
+ RFC6282 and RFC7400.
if 6LOWPAN_NHC
@@ -58,4 +67,38 @@ config 6LOWPAN_NHC_UDP
---help---
6LoWPAN IPv6 UDP Header compression according to RFC6282.
+config 6LOWPAN_GHC_EXT_HDR_HOP
+ tristate "GHC Hop-by-Hop Options Header Support"
+ ---help---
+ 6LoWPAN IPv6 Hop-by-Hop option generic header compression according
+ to RFC7400.
+
+config 6LOWPAN_GHC_UDP
+ tristate "GHC UDP Support"
+ ---help---
+ 6LoWPAN IPv6 UDP generic header compression according to RFC7400.
+
+config 6LOWPAN_GHC_ICMPV6
+ tristate "GHC ICMPv6 Support"
+ ---help---
+ 6LoWPAN IPv6 ICMPv6 generic header compression according to RFC7400.
+
+config 6LOWPAN_GHC_EXT_HDR_DEST
+ tristate "GHC Destination Options Header Support"
+ ---help---
+ 6LoWPAN IPv6 destination option generic header compression according
+ to RFC7400.
+
+config 6LOWPAN_GHC_EXT_HDR_FRAG
+ tristate "GHC Fragmentation Options Header Support"
+ ---help---
+ 6LoWPAN IPv6 fragmentation option generic header compression
+ according to RFC7400.
+
+config 6LOWPAN_GHC_EXT_HDR_ROUTE
+ tristate "GHC Routing Options Header Support"
+ ---help---
+ 6LoWPAN IPv6 routing option generic header compression according
+ to RFC7400.
+
endif
diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile
index c6ffc55ee0d7..e44f3bf2dd42 100644
--- a/net/6lowpan/Makefile
+++ b/net/6lowpan/Makefile
@@ -1,6 +1,7 @@
obj-$(CONFIG_6LOWPAN) += 6lowpan.o
6lowpan-y := core.o iphc.o nhc.o
+6lowpan-$(CONFIG_6LOWPAN_DEBUGFS) += debugfs.o
#rfc6282 nhcs
obj-$(CONFIG_6LOWPAN_NHC_DEST) += nhc_dest.o
@@ -10,3 +11,11 @@ obj-$(CONFIG_6LOWPAN_NHC_IPV6) += nhc_ipv6.o
obj-$(CONFIG_6LOWPAN_NHC_MOBILITY) += nhc_mobility.o
obj-$(CONFIG_6LOWPAN_NHC_ROUTING) += nhc_routing.o
obj-$(CONFIG_6LOWPAN_NHC_UDP) += nhc_udp.o
+
+#rfc7400 ghcs
+obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_HOP) += nhc_ghc_ext_hop.o
+obj-$(CONFIG_6LOWPAN_GHC_UDP) += nhc_ghc_udp.o
+obj-$(CONFIG_6LOWPAN_GHC_ICMPV6) += nhc_ghc_icmpv6.o
+obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_DEST) += nhc_ghc_ext_dest.o
+obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG) += nhc_ghc_ext_frag.o
+obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE) += nhc_ghc_ext_route.o
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index 83b19e072224..faf65baed617 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -15,19 +15,67 @@
#include <net/6lowpan.h>
-void lowpan_netdev_setup(struct net_device *dev, enum lowpan_lltypes lltype)
+#include "6lowpan_i.h"
+
+int lowpan_register_netdevice(struct net_device *dev,
+ enum lowpan_lltypes lltype)
{
+ int ret;
+
dev->addr_len = EUI64_ADDR_LEN;
dev->type = ARPHRD_6LOWPAN;
dev->mtu = IPV6_MIN_MTU;
dev->priv_flags |= IFF_NO_QUEUE;
lowpan_priv(dev)->lltype = lltype;
+
+ ret = register_netdevice(dev);
+ if (ret < 0)
+ return ret;
+
+ ret = lowpan_dev_debugfs_init(dev);
+ if (ret < 0)
+ unregister_netdevice(dev);
+
+ return ret;
}
-EXPORT_SYMBOL(lowpan_netdev_setup);
+EXPORT_SYMBOL(lowpan_register_netdevice);
+
+int lowpan_register_netdev(struct net_device *dev,
+ enum lowpan_lltypes lltype)
+{
+ int ret;
+
+ rtnl_lock();
+ ret = lowpan_register_netdevice(dev, lltype);
+ rtnl_unlock();
+ return ret;
+}
+EXPORT_SYMBOL(lowpan_register_netdev);
+
+void lowpan_unregister_netdevice(struct net_device *dev)
+{
+ unregister_netdevice(dev);
+ lowpan_dev_debugfs_exit(dev);
+}
+EXPORT_SYMBOL(lowpan_unregister_netdevice);
+
+void lowpan_unregister_netdev(struct net_device *dev)
+{
+ rtnl_lock();
+ lowpan_unregister_netdevice(dev);
+ rtnl_unlock();
+}
+EXPORT_SYMBOL(lowpan_unregister_netdev);
static int __init lowpan_module_init(void)
{
+ int ret;
+
+ ret = lowpan_debugfs_init();
+ if (ret < 0)
+ return ret;
+
request_module_nowait("ipv6");
request_module_nowait("nhc_dest");
@@ -40,6 +88,13 @@ static int __init lowpan_module_init(void)
return 0;
}
+
+static void __exit lowpan_module_exit(void)
+{
+ lowpan_debugfs_exit();
+}
+
module_init(lowpan_module_init);
+module_exit(lowpan_module_exit);
MODULE_LICENSE("GPL");
diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c
new file mode 100644
index 000000000000..88eef84df0fc
--- /dev/null
+++ b/net/6lowpan/debugfs.c
@@ -0,0 +1,53 @@
+/* This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ * (C) 2015 Pengutronix, Alexander Aring <aar@pengutronix.de>
+ * Copyright (c) 2015 Nordic Semiconductor. All Rights Reserved.
+ */
+
+#include <net/6lowpan.h>
+
+#include "6lowpan_i.h"
+
+static struct dentry *lowpan_debugfs;
+
+int lowpan_dev_debugfs_init(struct net_device *dev)
+{
+ struct lowpan_priv *lpriv = lowpan_priv(dev);
+
+ /* creating the root */
+ lpriv->iface_debugfs = debugfs_create_dir(dev->name, lowpan_debugfs);
+ if (!lpriv->iface_debugfs)
+ goto fail;
+
+ return 0;
+
+fail:
+ return -EINVAL;
+}
+
+void lowpan_dev_debugfs_exit(struct net_device *dev)
+{
+ debugfs_remove_recursive(lowpan_priv(dev)->iface_debugfs);
+}
+
+int __init lowpan_debugfs_init(void)
+{
+ lowpan_debugfs = debugfs_create_dir("6lowpan", NULL);
+ if (!lowpan_debugfs)
+ return -EINVAL;
+
+ return 0;
+}
+
+void lowpan_debugfs_exit(void)
+{
+ debugfs_remove_recursive(lowpan_debugfs);
+}
diff --git a/net/6lowpan/nhc_ghc_ext_dest.c b/net/6lowpan/nhc_ghc_ext_dest.c
new file mode 100644
index 000000000000..9887b3a15348
--- /dev/null
+++ b/net/6lowpan/nhc_ghc_ext_dest.c
@@ -0,0 +1,27 @@
+/*
+ * 6LoWPAN Extension Header compression according to RFC7400
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "nhc.h"
+
+#define LOWPAN_GHC_EXT_DEST_IDLEN 1
+#define LOWPAN_GHC_EXT_DEST_ID_0 0xb6
+#define LOWPAN_GHC_EXT_DEST_MASK_0 0xfe
+
+static void dest_ghid_setup(struct lowpan_nhc *nhc)
+{
+ nhc->id[0] = LOWPAN_GHC_EXT_DEST_ID_0;
+ nhc->idmask[0] = LOWPAN_GHC_EXT_DEST_MASK_0;
+}
+
+LOWPAN_NHC(ghc_ext_dest, "RFC7400 Destination Extension Header", NEXTHDR_DEST,
+ 0, dest_ghid_setup, LOWPAN_GHC_EXT_DEST_IDLEN, NULL, NULL);
+
+module_lowpan_nhc(ghc_ext_dest);
+MODULE_DESCRIPTION("6LoWPAN generic header destination extension compression");
+MODULE_LICENSE("GPL");
diff --git a/net/6lowpan/nhc_ghc_ext_frag.c b/net/6lowpan/nhc_ghc_ext_frag.c
new file mode 100644
index 000000000000..1308b79e939d
--- /dev/null
+++ b/net/6lowpan/nhc_ghc_ext_frag.c
@@ -0,0 +1,28 @@
+/*
+ * 6LoWPAN Extension Header compression according to RFC7400
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "nhc.h"
+
+#define LOWPAN_GHC_EXT_FRAG_IDLEN 1
+#define LOWPAN_GHC_EXT_FRAG_ID_0 0xb4
+#define LOWPAN_GHC_EXT_FRAG_MASK_0 0xfe
+
+static void frag_ghid_setup(struct lowpan_nhc *nhc)
+{
+ nhc->id[0] = LOWPAN_GHC_EXT_FRAG_ID_0;
+ nhc->idmask[0] = LOWPAN_GHC_EXT_FRAG_MASK_0;
+}
+
+LOWPAN_NHC(ghc_ext_frag, "RFC7400 Fragmentation Extension Header",
+ NEXTHDR_FRAGMENT, 0, frag_ghid_setup,
+ LOWPAN_GHC_EXT_FRAG_IDLEN, NULL, NULL);
+
+module_lowpan_nhc(ghc_ext_frag);
+MODULE_DESCRIPTION("6LoWPAN generic header fragmentation extension compression");
+MODULE_LICENSE("GPL");
diff --git a/net/6lowpan/nhc_ghc_ext_hop.c b/net/6lowpan/nhc_ghc_ext_hop.c
new file mode 100644
index 000000000000..baec86fd1974
--- /dev/null
+++ b/net/6lowpan/nhc_ghc_ext_hop.c
@@ -0,0 +1,27 @@
+/*
+ * 6LoWPAN Extension Header compression according to RFC7400
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "nhc.h"
+
+#define LOWPAN_GHC_EXT_HOP_IDLEN 1
+#define LOWPAN_GHC_EXT_HOP_ID_0 0xb0
+#define LOWPAN_GHC_EXT_HOP_MASK_0 0xfe
+
+static void hop_ghid_setup(struct lowpan_nhc *nhc)
+{
+ nhc->id[0] = LOWPAN_GHC_EXT_HOP_ID_0;
+ nhc->idmask[0] = LOWPAN_GHC_EXT_HOP_MASK_0;
+}
+
+LOWPAN_NHC(ghc_ext_hop, "RFC7400 Hop-by-Hop Extension Header", NEXTHDR_HOP, 0,
+ hop_ghid_setup, LOWPAN_GHC_EXT_HOP_IDLEN, NULL, NULL);
+
+module_lowpan_nhc(ghc_ext_hop);
+MODULE_DESCRIPTION("6LoWPAN generic header hop-by-hop extension compression");
+MODULE_LICENSE("GPL");
diff --git a/net/6lowpan/nhc_ghc_ext_route.c b/net/6lowpan/nhc_ghc_ext_route.c
new file mode 100644
index 000000000000..d7e5bd791c62
--- /dev/null
+++ b/net/6lowpan/nhc_ghc_ext_route.c
@@ -0,0 +1,27 @@
+/*
+ * 6LoWPAN Extension Header compression according to RFC7400
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "nhc.h"
+
+#define LOWPAN_GHC_EXT_ROUTE_IDLEN 1
+#define LOWPAN_GHC_EXT_ROUTE_ID_0 0xb2
+#define LOWPAN_GHC_EXT_ROUTE_MASK_0 0xfe
+
+static void route_ghid_setup(struct lowpan_nhc *nhc)
+{
+ nhc->id[0] = LOWPAN_GHC_EXT_ROUTE_ID_0;
+ nhc->idmask[0] = LOWPAN_GHC_EXT_ROUTE_MASK_0;
+}
+
+LOWPAN_NHC(ghc_ext_route, "RFC7400 Routing Extension Header", NEXTHDR_ROUTING,
+ 0, route_ghid_setup, LOWPAN_GHC_EXT_ROUTE_IDLEN, NULL, NULL);
+
+module_lowpan_nhc(ghc_ext_route);
+MODULE_DESCRIPTION("6LoWPAN generic header routing extension compression");
+MODULE_LICENSE("GPL");
diff --git a/net/6lowpan/nhc_ghc_icmpv6.c b/net/6lowpan/nhc_ghc_icmpv6.c
new file mode 100644
index 000000000000..32e7c2c66bbc
--- /dev/null
+++ b/net/6lowpan/nhc_ghc_icmpv6.c
@@ -0,0 +1,27 @@
+/*
+ * 6LoWPAN ICMPv6 compression according to RFC7400
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "nhc.h"
+
+#define LOWPAN_GHC_ICMPV6_IDLEN 1
+#define LOWPAN_GHC_ICMPV6_ID_0 0xdf
+#define LOWPAN_GHC_ICMPV6_MASK_0 0xff
+
+static void icmpv6_ghid_setup(struct lowpan_nhc *nhc)
+{
+ nhc->id[0] = LOWPAN_GHC_ICMPV6_ID_0;
+ nhc->idmask[0] = LOWPAN_GHC_ICMPV6_MASK_0;
+}
+
+LOWPAN_NHC(ghc_icmpv6, "RFC7400 ICMPv6", NEXTHDR_ICMP, 0,
+ icmpv6_ghid_setup, LOWPAN_GHC_ICMPV6_IDLEN, NULL, NULL);
+
+module_lowpan_nhc(ghc_icmpv6);
+MODULE_DESCRIPTION("6LoWPAN generic header ICMPv6 compression");
+MODULE_LICENSE("GPL");
diff --git a/net/6lowpan/nhc_ghc_udp.c b/net/6lowpan/nhc_ghc_udp.c
new file mode 100644
index 000000000000..17beefa52ca8
--- /dev/null
+++ b/net/6lowpan/nhc_ghc_udp.c
@@ -0,0 +1,27 @@
+/*
+ * 6LoWPAN UDP compression according to RFC7400
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "nhc.h"
+
+#define LOWPAN_GHC_UDP_IDLEN 1
+#define LOWPAN_GHC_UDP_ID_0 0xd0
+#define LOWPAN_GHC_UDP_MASK_0 0xf8
+
+static void udp_ghid_setup(struct lowpan_nhc *nhc)
+{
+ nhc->id[0] = LOWPAN_GHC_UDP_ID_0;
+ nhc->idmask[0] = LOWPAN_GHC_UDP_MASK_0;
+}
+
+LOWPAN_NHC(ghc_udp, "RFC7400 UDP", NEXTHDR_UDP, 0,
+ udp_ghid_setup, LOWPAN_GHC_UDP_IDLEN, NULL, NULL);
+
+module_lowpan_nhc(ghc_udp);
+MODULE_DESCRIPTION("6LoWPAN generic header UDP compression");
+MODULE_LICENSE("GPL");
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 496b27588493..e2ed69850489 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -30,7 +30,9 @@ bool vlan_do_receive(struct sk_buff **skbp)
skb->pkt_type = PACKET_HOST;
}
- if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) {
+ if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR) &&
+ !netif_is_macvlan_port(vlan_dev) &&
+ !netif_is_bridge_port(vlan_dev)) {
unsigned int offset = skb->data - skb_mac_header(skb);
/*
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index fded86508117..ad5e2fd1012c 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -30,6 +30,7 @@
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <net/arp.h>
+#include <net/switchdev.h>
#include "vlan.h"
#include "vlanproc.h"
@@ -542,9 +543,9 @@ static int vlan_dev_init(struct net_device *dev)
(1<<__LINK_STATE_DORMANT))) |
(1<<__LINK_STATE_PRESENT);
- dev->hw_features = NETIF_F_ALL_CSUM | NETIF_F_SG |
+ dev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG |
NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE |
- NETIF_F_HIGHDMA | NETIF_F_SCTP_CSUM |
+ NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC |
NETIF_F_ALL_FCOE;
dev->features |= real_dev->vlan_features | NETIF_F_LLTX |
@@ -774,6 +775,12 @@ static const struct net_device_ops vlan_netdev_ops = {
.ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup,
#endif
.ndo_fix_features = vlan_dev_fix_features,
+ .ndo_fdb_add = switchdev_port_fdb_add,
+ .ndo_fdb_del = switchdev_port_fdb_del,
+ .ndo_fdb_dump = switchdev_port_fdb_dump,
+ .ndo_bridge_setlink = switchdev_port_bridge_setlink,
+ .ndo_bridge_getlink = switchdev_port_bridge_getlink,
+ .ndo_bridge_dellink = switchdev_port_bridge_dellink,
.ndo_get_lock_subclass = vlan_dev_get_lock_subclass,
.ndo_get_iflink = vlan_dev_get_iflink,
};
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index bced8c074c12..7bc2208b6cc4 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -108,9 +108,7 @@ struct p9_poll_wait {
* @unsent_req_list: accounting for requests that haven't been sent
* @req: current request being processed (if any)
* @tmp_buf: temporary buffer to read in header
- * @rsize: amount to read for current frame
- * @rpos: read position in current frame
- * @rbuf: current read buffer
+ * @rc: temporary fcall for reading current frame
* @wpos: write position for current frame
* @wsize: amount of data to write for current frame
* @wbuf: current write buffer
@@ -131,9 +129,7 @@ struct p9_conn {
struct list_head unsent_req_list;
struct p9_req_t *req;
char tmp_buf[7];
- int rsize;
- int rpos;
- char *rbuf;
+ struct p9_fcall rc;
int wpos;
int wsize;
char *wbuf;
@@ -305,69 +301,77 @@ static void p9_read_work(struct work_struct *work)
if (m->err < 0)
return;
- p9_debug(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos);
+ p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n", m, m->rc.offset);
- if (!m->rbuf) {
- m->rbuf = m->tmp_buf;
- m->rpos = 0;
- m->rsize = 7; /* start by reading header */
+ if (!m->rc.sdata) {
+ m->rc.sdata = m->tmp_buf;
+ m->rc.offset = 0;
+ m->rc.capacity = 7; /* start by reading header */
}
clear_bit(Rpending, &m->wsched);
- p9_debug(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n",
- m, m->rpos, m->rsize, m->rsize-m->rpos);
- err = p9_fd_read(m->client, m->rbuf + m->rpos,
- m->rsize - m->rpos);
+ p9_debug(P9_DEBUG_TRANS, "read mux %p pos %zd size: %zd = %zd\n",
+ m, m->rc.offset, m->rc.capacity,
+ m->rc.capacity - m->rc.offset);
+ err = p9_fd_read(m->client, m->rc.sdata + m->rc.offset,
+ m->rc.capacity - m->rc.offset);
p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err);
- if (err == -EAGAIN) {
+ if (err == -EAGAIN)
goto end_clear;
- }
if (err <= 0)
goto error;
- m->rpos += err;
+ m->rc.offset += err;
- if ((!m->req) && (m->rpos == m->rsize)) { /* header read in */
- u16 tag;
+ /* header read in */
+ if ((!m->req) && (m->rc.offset == m->rc.capacity)) {
p9_debug(P9_DEBUG_TRANS, "got new header\n");
- n = le32_to_cpu(*(__le32 *) m->rbuf); /* read packet size */
- if (n >= m->client->msize) {
+ err = p9_parse_header(&m->rc, NULL, NULL, NULL, 0);
+ if (err) {
+ p9_debug(P9_DEBUG_ERROR,
+ "error parsing header: %d\n", err);
+ goto error;
+ }
+
+ if (m->rc.size >= m->client->msize) {
p9_debug(P9_DEBUG_ERROR,
- "requested packet size too big: %d\n", n);
+ "requested packet size too big: %d\n",
+ m->rc.size);
err = -EIO;
goto error;
}
- tag = le16_to_cpu(*(__le16 *) (m->rbuf+5)); /* read tag */
p9_debug(P9_DEBUG_TRANS,
- "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag);
+ "mux %p pkt: size: %d bytes tag: %d\n",
+ m, m->rc.size, m->rc.tag);
- m->req = p9_tag_lookup(m->client, tag);
+ m->req = p9_tag_lookup(m->client, m->rc.tag);
if (!m->req || (m->req->status != REQ_STATUS_SENT)) {
p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
- tag);
+ m->rc.tag);
err = -EIO;
goto error;
}
if (m->req->rc == NULL) {
- m->req->rc = kmalloc(sizeof(struct p9_fcall) +
- m->client->msize, GFP_NOFS);
- if (!m->req->rc) {
- m->req = NULL;
- err = -ENOMEM;
- goto error;
- }
+ p9_debug(P9_DEBUG_ERROR,
+ "No recv fcall for tag %d (req %p), disconnecting!\n",
+ m->rc.tag, m->req);
+ m->req = NULL;
+ err = -EIO;
+ goto error;
}
- m->rbuf = (char *)m->req->rc + sizeof(struct p9_fcall);
- memcpy(m->rbuf, m->tmp_buf, m->rsize);
- m->rsize = n;
+ m->rc.sdata = (char *)m->req->rc + sizeof(struct p9_fcall);
+ memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity);
+ m->rc.capacity = m->rc.size;
}
- /* not an else because some packets (like clunk) have no payload */
- if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */
+ /* packet is read in
+ * not an else because some packets (like clunk) have no payload
+ */
+ if ((m->req) && (m->rc.offset == m->rc.capacity)) {
p9_debug(P9_DEBUG_TRANS, "got new packet\n");
spin_lock(&m->client->lock);
if (m->req->status != REQ_STATUS_ERROR)
@@ -375,9 +379,9 @@ static void p9_read_work(struct work_struct *work)
list_del(&m->req->req_list);
spin_unlock(&m->client->lock);
p9_client_cb(m->client, m->req, status);
- m->rbuf = NULL;
- m->rpos = 0;
- m->rsize = 0;
+ m->rc.sdata = NULL;
+ m->rc.offset = 0;
+ m->rc.capacity = 0;
m->req = NULL;
}
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index ba1210253f5e..52b4a2f993f2 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -655,8 +655,8 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
return -ENOMEM;
/* Create the RDMA CM ID */
- rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP,
- IB_QPT_RC);
+ rdma->cm_id = rdma_create_id(&init_net, p9_cm_event_handler, client,
+ RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(rdma->cm_id))
goto error;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 6e70ddb158b4..4acb1d5417aa 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -105,7 +105,7 @@ static struct list_head virtio_chan_list;
/* How many bytes left in this page. */
static unsigned int rest_of_page(void *data)
{
- return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE);
+ return PAGE_SIZE - offset_in_page(data);
}
/**
@@ -143,7 +143,6 @@ static void p9_virtio_close(struct p9_client *client)
static void req_done(struct virtqueue *vq)
{
struct virtio_chan *chan = vq->vdev->priv;
- struct p9_fcall *rc;
unsigned int len;
struct p9_req_t *req;
unsigned long flags;
@@ -152,8 +151,8 @@ static void req_done(struct virtqueue *vq)
while (1) {
spin_lock_irqsave(&chan->lock, flags);
- rc = virtqueue_get_buf(chan->vq, &len);
- if (rc == NULL) {
+ req = virtqueue_get_buf(chan->vq, &len);
+ if (req == NULL) {
spin_unlock_irqrestore(&chan->lock, flags);
break;
}
@@ -161,9 +160,6 @@ static void req_done(struct virtqueue *vq)
spin_unlock_irqrestore(&chan->lock, flags);
/* Wakeup if anyone waiting for VirtIO ring space. */
wake_up(chan->vc_wq);
- p9_debug(P9_DEBUG_TRANS, ": rc %p\n", rc);
- p9_debug(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
- req = p9_tag_lookup(chan->client, rc->tag);
p9_client_cb(chan->client, req, REQ_STATUS_RCVD);
}
}
@@ -284,7 +280,7 @@ req_retry:
if (in)
sgs[out_sgs + in_sgs++] = chan->sg + out;
- err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc,
+ err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req,
GFP_ATOMIC);
if (err < 0) {
if (err == -ENOSPC) {
@@ -369,7 +365,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
return -ENOMEM;
*need_drop = 0;
- p -= (*offs = (unsigned long)p % PAGE_SIZE);
+ p -= (*offs = offset_in_page(p));
for (index = 0; index < nr_pages; index++) {
if (is_vmalloc_addr(p))
(*pages)[index] = vmalloc_to_page(p);
@@ -469,7 +465,7 @@ req_retry_pinned:
}
BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs));
- err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc,
+ err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req,
GFP_ATOMIC);
if (err < 0) {
if (err == -ENOSPC) {
@@ -662,7 +658,7 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
mutex_unlock(&virtio_9p_lock);
if (!found) {
- pr_err("no channels available\n");
+ pr_err("no channels available for device %s\n", devname);
return ret;
}
diff --git a/net/Kconfig b/net/Kconfig
index 127da94ae25e..174354618f8a 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -48,6 +48,9 @@ config COMPAT_NETLINK_MESSAGES
config NET_INGRESS
bool
+config NET_EGRESS
+ bool
+
menu "Networking options"
source "net/packet/Kconfig"
@@ -250,9 +253,14 @@ config XPS
depends on SMP
default y
+config SOCK_CGROUP_DATA
+ bool
+ default n
+
config CGROUP_NET_PRIO
bool "Network priority cgroup"
depends on CGROUPS
+ select SOCK_CGROUP_DATA
---help---
Cgroup subsystem for use in assigning processes to network priorities on
a per-interface basis.
@@ -260,6 +268,7 @@ config CGROUP_NET_PRIO
config CGROUP_NET_CLASSID
bool "Network classid cgroup"
depends on CGROUPS
+ select SOCK_CGROUP_DATA
---help---
Cgroup subsystem for use as general purpose socket classid marker that is
being used in cls_cgroup and for netfilter matching.
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index d5871ac493eb..f066781be3c8 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1625,7 +1625,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
rt = atrtr_find(&at_hint);
}
- err = ENETUNREACH;
+ err = -ENETUNREACH;
if (!rt)
goto out;
diff --git a/net/atm/common.c b/net/atm/common.c
index 49a872db7e42..6dc12305799e 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -96,7 +96,7 @@ static void vcc_def_wakeup(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
- if (wq_has_sleeper(wq))
+ if (skwq_has_sleeper(wq))
wake_up(&wq->wait);
rcu_read_unlock();
}
@@ -117,7 +117,7 @@ static void vcc_write_space(struct sock *sk)
if (vcc_writable(sk)) {
wq = rcu_dereference(sk->sk_wq);
- if (wq_has_sleeper(wq))
+ if (skwq_has_sleeper(wq))
wake_up_interruptible(&wq->wait);
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
diff --git a/net/atm/mpc.h b/net/atm/mpc.h
index 0919a88bbc70..cfc7b745aa91 100644
--- a/net/atm/mpc.h
+++ b/net/atm/mpc.h
@@ -21,11 +21,11 @@ struct mpoa_client {
uint8_t our_ctrl_addr[ATM_ESA_LEN]; /* MPC's control ATM address */
rwlock_t ingress_lock;
- struct in_cache_ops *in_ops; /* ingress cache operations */
+ const struct in_cache_ops *in_ops; /* ingress cache operations */
in_cache_entry *in_cache; /* the ingress cache of this MPC */
rwlock_t egress_lock;
- struct eg_cache_ops *eg_ops; /* egress cache operations */
+ const struct eg_cache_ops *eg_ops; /* egress cache operations */
eg_cache_entry *eg_cache; /* the egress cache of this MPC */
uint8_t *mps_macs; /* array of MPS MAC addresses, >=1 */
diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c
index d1b2d9a03144..9e60e74c807d 100644
--- a/net/atm/mpoa_caches.c
+++ b/net/atm/mpoa_caches.c
@@ -534,7 +534,7 @@ static void eg_destroy_cache(struct mpoa_client *mpc)
}
-static struct in_cache_ops ingress_ops = {
+static const struct in_cache_ops ingress_ops = {
in_cache_add_entry, /* add_entry */
in_cache_get, /* get */
in_cache_get_with_mask, /* get_with_mask */
@@ -548,7 +548,7 @@ static struct in_cache_ops ingress_ops = {
in_destroy_cache /* destroy_cache */
};
-static struct eg_cache_ops egress_ops = {
+static const struct eg_cache_ops egress_ops = {
eg_cache_add_entry, /* add_entry */
eg_cache_get_by_cache_id, /* get_by_cache_id */
eg_cache_get_by_tag, /* get_by_tag */
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index ae3a47f9d1d5..fbd0acf80b13 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -805,6 +805,9 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol,
struct sock *sk;
ax25_cb *ax25;
+ if (protocol < 0 || protocol > SK_PROTOCOL_MAX)
+ return -EINVAL;
+
if (!net_eq(net, &init_net))
return -EAFNOSUPPORT;
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 912d9c36fb1c..df625de55ef2 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -185,7 +185,8 @@ unlock:
static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node,
int max_if_num, int del_if_num)
{
- int chunk_size, ret = -ENOMEM, if_offset;
+ int ret = -ENOMEM;
+ size_t chunk_size, if_offset;
void *data_ptr = NULL;
spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
@@ -203,8 +204,9 @@ static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node,
memcpy(data_ptr, orig_node->bat_iv.bcast_own, del_if_num * chunk_size);
/* copy second part */
+ if_offset = (del_if_num + 1) * chunk_size;
memcpy((char *)data_ptr + del_if_num * chunk_size,
- orig_node->bat_iv.bcast_own + ((del_if_num + 1) * chunk_size),
+ (uint8_t *)orig_node->bat_iv.bcast_own + if_offset,
(max_if_num - del_if_num) * chunk_size);
free_bcast_own:
@@ -361,7 +363,6 @@ batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface)
unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff;
batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff;
- batadv_ogm_packet->flags = BATADV_PRIMARIES_FIRST_HOP;
batadv_ogm_packet->ttl = BATADV_TTL;
}
@@ -842,8 +843,6 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node,
"Forwarding packet: tq: %i, ttl: %i\n",
batadv_ogm_packet->tq, batadv_ogm_packet->ttl);
- /* switch of primaries first hop flag when forwarding */
- batadv_ogm_packet->flags &= ~BATADV_PRIMARIES_FIRST_HOP;
if (is_single_hop_neigh)
batadv_ogm_packet->flags |= BATADV_DIRECTLINK;
else
@@ -1379,6 +1378,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
struct batadv_hard_iface *if_outgoing)
{
struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
+ struct batadv_hardif_neigh_node *hardif_neigh = NULL;
struct batadv_neigh_node *router = NULL;
struct batadv_neigh_node *router_router = NULL;
struct batadv_orig_node *orig_neigh_node;
@@ -1423,6 +1423,13 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
goto out;
}
+ if (is_single_hop_neigh) {
+ hardif_neigh = batadv_hardif_neigh_get(if_incoming,
+ ethhdr->h_source);
+ if (hardif_neigh)
+ hardif_neigh->last_seen = jiffies;
+ }
+
router = batadv_orig_router_get(orig_node, if_outgoing);
if (router) {
router_router = batadv_orig_router_get(router->orig_node,
@@ -1557,6 +1564,8 @@ out:
batadv_neigh_node_free_ref(router_router);
if (orig_neigh_router)
batadv_neigh_node_free_ref(orig_neigh_router);
+ if (hardif_neigh)
+ batadv_hardif_neigh_free_ref(hardif_neigh);
kfree_skb(skb_priv);
}
@@ -1862,6 +1871,58 @@ next:
}
/**
+ * batadv_iv_hardif_neigh_print - print a single hop neighbour node
+ * @seq: neighbour table seq_file struct
+ * @hardif_neigh: hardif neighbour information
+ */
+static void
+batadv_iv_hardif_neigh_print(struct seq_file *seq,
+ struct batadv_hardif_neigh_node *hardif_neigh)
+{
+ int last_secs, last_msecs;
+
+ last_secs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen) / 1000;
+ last_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen) % 1000;
+
+ seq_printf(seq, " %10s %pM %4i.%03is\n",
+ hardif_neigh->if_incoming->net_dev->name,
+ hardif_neigh->addr, last_secs, last_msecs);
+}
+
+/**
+ * batadv_iv_ogm_neigh_print - print the single hop neighbour list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @seq: neighbour table seq_file struct
+ */
+static void batadv_iv_neigh_print(struct batadv_priv *bat_priv,
+ struct seq_file *seq)
+{
+ struct net_device *net_dev = (struct net_device *)seq->private;
+ struct batadv_hardif_neigh_node *hardif_neigh;
+ struct batadv_hard_iface *hard_iface;
+ int batman_count = 0;
+
+ seq_printf(seq, " %10s %-13s %s\n",
+ "IF", "Neighbor", "last-seen");
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+ if (hard_iface->soft_iface != net_dev)
+ continue;
+
+ hlist_for_each_entry_rcu(hardif_neigh,
+ &hard_iface->neigh_list, list) {
+ batadv_iv_hardif_neigh_print(seq, hardif_neigh);
+ batman_count++;
+ }
+ }
+ rcu_read_unlock();
+
+ if (batman_count == 0)
+ seq_puts(seq, "No batman nodes in range ...\n");
+}
+
+/**
* batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors
* @neigh1: the first neighbor object of the comparison
* @if_outgoing1: outgoing interface for the first neighbor
@@ -1902,8 +1963,8 @@ out:
}
/**
- * batadv_iv_ogm_neigh_is_eob - check if neigh1 is equally good or better than
- * neigh2 from the metric prospective
+ * batadv_iv_ogm_neigh_is_sob - check if neigh1 is similarly good or better
+ * than neigh2 from the metric prospective
* @neigh1: the first neighbor object of the comparison
* @if_outgoing1: outgoing interface for the first neighbor
* @neigh2: the second neighbor object of the comparison
@@ -1913,7 +1974,7 @@ out:
* the metric via neigh2, false otherwise.
*/
static bool
-batadv_iv_ogm_neigh_is_eob(struct batadv_neigh_node *neigh1,
+batadv_iv_ogm_neigh_is_sob(struct batadv_neigh_node *neigh1,
struct batadv_hard_iface *if_outgoing1,
struct batadv_neigh_node *neigh2,
struct batadv_hard_iface *if_outgoing2)
@@ -1953,7 +2014,8 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
.bat_ogm_schedule = batadv_iv_ogm_schedule,
.bat_ogm_emit = batadv_iv_ogm_emit,
.bat_neigh_cmp = batadv_iv_ogm_neigh_cmp,
- .bat_neigh_is_equiv_or_better = batadv_iv_ogm_neigh_is_eob,
+ .bat_neigh_is_similar_or_better = batadv_iv_ogm_neigh_is_sob,
+ .bat_neigh_print = batadv_iv_neigh_print,
.bat_orig_print = batadv_iv_ogm_orig_print,
.bat_orig_free = batadv_iv_ogm_orig_free,
.bat_orig_add_if = batadv_iv_ogm_orig_add_if,
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 191a70290dca..c24c481b666f 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -127,21 +127,17 @@ batadv_backbone_gw_free_ref(struct batadv_bla_backbone_gw *backbone_gw)
}
/* finally deinitialize the claim */
-static void batadv_claim_free_rcu(struct rcu_head *rcu)
+static void batadv_claim_release(struct batadv_bla_claim *claim)
{
- struct batadv_bla_claim *claim;
-
- claim = container_of(rcu, struct batadv_bla_claim, rcu);
-
batadv_backbone_gw_free_ref(claim->backbone_gw);
- kfree(claim);
+ kfree_rcu(claim, rcu);
}
/* free a claim, call claim_free_rcu if its the last reference */
static void batadv_claim_free_ref(struct batadv_bla_claim *claim)
{
if (atomic_dec_and_test(&claim->refcount))
- call_rcu(&claim->rcu, batadv_claim_free_rcu);
+ batadv_claim_release(claim);
}
/**
@@ -260,7 +256,9 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw)
}
/* all claims gone, initialize CRC */
+ spin_lock_bh(&backbone_gw->crc_lock);
backbone_gw->crc = BATADV_BLA_CRC_INIT;
+ spin_unlock_bh(&backbone_gw->crc_lock);
}
/**
@@ -408,6 +406,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
entry->lasttime = jiffies;
entry->crc = BATADV_BLA_CRC_INIT;
entry->bat_priv = bat_priv;
+ spin_lock_init(&entry->crc_lock);
atomic_set(&entry->request_sent, 0);
atomic_set(&entry->wait_periods, 0);
ether_addr_copy(entry->orig, orig);
@@ -557,7 +556,9 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv,
__be16 crc;
memcpy(mac, batadv_announce_mac, 4);
+ spin_lock_bh(&backbone_gw->crc_lock);
crc = htons(backbone_gw->crc);
+ spin_unlock_bh(&backbone_gw->crc_lock);
memcpy(&mac[4], &crc, 2);
batadv_bla_send_claim(bat_priv, mac, backbone_gw->vid,
@@ -618,14 +619,18 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
"bla_add_claim(): changing ownership for %pM, vid %d\n",
mac, BATADV_PRINT_VID(vid));
+ spin_lock_bh(&claim->backbone_gw->crc_lock);
claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
+ spin_unlock_bh(&claim->backbone_gw->crc_lock);
batadv_backbone_gw_free_ref(claim->backbone_gw);
}
/* set (new) backbone gw */
atomic_inc(&backbone_gw->refcount);
claim->backbone_gw = backbone_gw;
+ spin_lock_bh(&backbone_gw->crc_lock);
backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
+ spin_unlock_bh(&backbone_gw->crc_lock);
backbone_gw->lasttime = jiffies;
claim_free_ref:
@@ -653,7 +658,9 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
batadv_choose_claim, claim);
batadv_claim_free_ref(claim); /* reference from the hash is gone */
+ spin_lock_bh(&claim->backbone_gw->crc_lock);
claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
+ spin_unlock_bh(&claim->backbone_gw->crc_lock);
/* don't need the reference from hash_find() anymore */
batadv_claim_free_ref(claim);
@@ -664,7 +671,7 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
u8 *backbone_addr, unsigned short vid)
{
struct batadv_bla_backbone_gw *backbone_gw;
- u16 crc;
+ u16 backbone_crc, crc;
if (memcmp(an_addr, batadv_announce_mac, 4) != 0)
return 0;
@@ -683,12 +690,16 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
"handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n",
BATADV_PRINT_VID(vid), backbone_gw->orig, crc);
- if (backbone_gw->crc != crc) {
+ spin_lock_bh(&backbone_gw->crc_lock);
+ backbone_crc = backbone_gw->crc;
+ spin_unlock_bh(&backbone_gw->crc_lock);
+
+ if (backbone_crc != crc) {
batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv,
"handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n",
backbone_gw->orig,
BATADV_PRINT_VID(backbone_gw->vid),
- backbone_gw->crc, crc);
+ backbone_crc, crc);
batadv_bla_send_request(backbone_gw);
} else {
@@ -1153,6 +1164,26 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
}
}
+/**
+ * batadv_bla_status_update - purge bla interfaces if necessary
+ * @net_dev: the soft interface net device
+ */
+void batadv_bla_status_update(struct net_device *net_dev)
+{
+ struct batadv_priv *bat_priv = netdev_priv(net_dev);
+ struct batadv_hard_iface *primary_if;
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if)
+ return;
+
+ /* this function already purges everything when bla is disabled,
+ * so just call that one.
+ */
+ batadv_bla_update_orig_address(bat_priv, primary_if, primary_if);
+ batadv_hardif_free_ref(primary_if);
+}
+
/* periodic work to do:
* * purge structures when they are too old
* * send announcements
@@ -1647,6 +1678,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
struct batadv_bla_claim *claim;
struct batadv_hard_iface *primary_if;
struct hlist_head *head;
+ u16 backbone_crc;
u32 i;
bool is_own;
u8 *primary_addr;
@@ -1669,11 +1701,15 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
hlist_for_each_entry_rcu(claim, head, hash_entry) {
is_own = batadv_compare_eth(claim->backbone_gw->orig,
primary_addr);
+
+ spin_lock_bh(&claim->backbone_gw->crc_lock);
+ backbone_crc = claim->backbone_gw->crc;
+ spin_unlock_bh(&claim->backbone_gw->crc_lock);
seq_printf(seq, " * %pM on %5d by %pM [%c] (%#.4x)\n",
claim->addr, BATADV_PRINT_VID(claim->vid),
claim->backbone_gw->orig,
(is_own ? 'x' : ' '),
- claim->backbone_gw->crc);
+ backbone_crc);
}
rcu_read_unlock();
}
@@ -1692,6 +1728,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
struct batadv_hard_iface *primary_if;
struct hlist_head *head;
int secs, msecs;
+ u16 backbone_crc;
u32 i;
bool is_own;
u8 *primary_addr;
@@ -1722,10 +1759,14 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
if (is_own)
continue;
+ spin_lock_bh(&backbone_gw->crc_lock);
+ backbone_crc = backbone_gw->crc;
+ spin_unlock_bh(&backbone_gw->crc_lock);
+
seq_printf(seq, " * %pM on %5d %4i.%03is (%#.4x)\n",
backbone_gw->orig,
BATADV_PRINT_VID(backbone_gw->vid), secs,
- msecs, backbone_gw->crc);
+ msecs, backbone_crc);
}
rcu_read_unlock();
}
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 025152b34282..7ea199b8b5ab 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -22,6 +22,7 @@
#include <linux/types.h>
+struct net_device;
struct seq_file;
struct sk_buff;
@@ -42,6 +43,7 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
struct batadv_hard_iface *oldif);
+void batadv_bla_status_update(struct net_device *net_dev);
int batadv_bla_init(struct batadv_priv *bat_priv);
void batadv_bla_free(struct batadv_priv *bat_priv);
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index c4c1e8030ba0..037ad0a5f485 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -262,6 +262,13 @@ static int batadv_algorithms_open(struct inode *inode, struct file *file)
return single_open(file, batadv_algo_seq_print_text, NULL);
}
+static int neighbors_open(struct inode *inode, struct file *file)
+{
+ struct net_device *net_dev = (struct net_device *)inode->i_private;
+
+ return single_open(file, batadv_hardif_neigh_seq_print_text, net_dev);
+}
+
static int batadv_originators_open(struct inode *inode, struct file *file)
{
struct net_device *net_dev = (struct net_device *)inode->i_private;
@@ -375,6 +382,7 @@ static struct batadv_debuginfo *batadv_general_debuginfos[] = {
};
/* The following attributes are per soft interface */
+static BATADV_DEBUGINFO(neighbors, S_IRUGO, neighbors_open);
static BATADV_DEBUGINFO(originators, S_IRUGO, batadv_originators_open);
static BATADV_DEBUGINFO(gateways, S_IRUGO, batadv_gateways_open);
static BATADV_DEBUGINFO(transtable_global, S_IRUGO,
@@ -394,6 +402,7 @@ static BATADV_DEBUGINFO(nc_nodes, S_IRUGO, batadv_nc_nodes_open);
#endif
static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
+ &batadv_debuginfo_neighbors,
&batadv_debuginfo_originators,
&batadv_debuginfo_gateways,
&batadv_debuginfo_transtable_global,
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 83bc1aaf5800..a49c705fb86b 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -566,6 +566,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
int select;
batadv_dat_addr_t last_max = BATADV_DAT_ADDR_MAX, ip_key;
struct batadv_dat_candidate *res;
+ struct batadv_dat_entry dat;
if (!bat_priv->orig_hash)
return NULL;
@@ -575,7 +576,9 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
if (!res)
return NULL;
- ip_key = (batadv_dat_addr_t)batadv_hash_dat(&ip_dst,
+ dat.ip = ip_dst;
+ dat.vid = 0;
+ ip_key = (batadv_dat_addr_t)batadv_hash_dat(&dat,
BATADV_DAT_ADDR_MAX);
batadv_dbg(BATADV_DBG_DAT, bat_priv,
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 700c96c82a15..20d9282f895b 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -71,14 +71,14 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig_node,
for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) {
chain = &orig_node->fragments[i];
- spin_lock_bh(&orig_node->fragments[i].lock);
+ spin_lock_bh(&chain->lock);
if (!check_cb || check_cb(chain)) {
- batadv_frag_clear_chain(&orig_node->fragments[i].head);
- orig_node->fragments[i].size = 0;
+ batadv_frag_clear_chain(&chain->head);
+ chain->size = 0;
}
- spin_unlock_bh(&orig_node->fragments[i].lock);
+ spin_unlock_bh(&chain->lock);
}
}
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index e6c8382c79ba..ccf70bed0d0c 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -527,11 +527,12 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
* gets dereferenced.
*/
spin_lock_bh(&bat_priv->gw.list_lock);
- hlist_del_init_rcu(&gw_node->list);
+ if (!hlist_unhashed(&gw_node->list)) {
+ hlist_del_init_rcu(&gw_node->list);
+ batadv_gw_node_free_ref(gw_node);
+ }
spin_unlock_bh(&bat_priv->gw.list_lock);
- batadv_gw_node_free_ref(gw_node);
-
curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
if (gw_node == curr_gw)
batadv_gw_reselect(bat_priv);
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 0cb5e6b6f6d4..b51bface8bdd 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -31,27 +31,23 @@
#include "packet.h"
/**
- * batadv_parse_gw_bandwidth - parse supplied string buffer to extract download
- * and upload bandwidth information
+ * batadv_parse_throughput - parse supplied string buffer to extract throughput
+ * information
* @net_dev: the soft interface net device
* @buff: string buffer to parse
- * @down: pointer holding the returned download bandwidth information
- * @up: pointer holding the returned upload bandwidth information
+ * @description: text shown when throughput string cannot be parsed
+ * @throughput: pointer holding the returned throughput information
*
* Returns false on parse error and true otherwise.
*/
-static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff,
- u32 *down, u32 *up)
+static bool batadv_parse_throughput(struct net_device *net_dev, char *buff,
+ const char *description, u32 *throughput)
{
enum batadv_bandwidth_units bw_unit_type = BATADV_BW_UNIT_KBIT;
- char *slash_ptr, *tmp_ptr;
- u64 ldown, lup;
+ u64 lthroughput;
+ char *tmp_ptr;
int ret;
- slash_ptr = strchr(buff, '/');
- if (slash_ptr)
- *slash_ptr = 0;
-
if (strlen(buff) > 4) {
tmp_ptr = buff + strlen(buff) - 4;
@@ -63,90 +59,75 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff,
*tmp_ptr = '\0';
}
- ret = kstrtou64(buff, 10, &ldown);
+ ret = kstrtou64(buff, 10, &lthroughput);
if (ret) {
batadv_err(net_dev,
- "Download speed of gateway mode invalid: %s\n",
- buff);
+ "Invalid throughput speed for %s: %s\n",
+ description, buff);
return false;
}
switch (bw_unit_type) {
case BATADV_BW_UNIT_MBIT:
/* prevent overflow */
- if (U64_MAX / 10 < ldown) {
+ if (U64_MAX / 10 < lthroughput) {
batadv_err(net_dev,
- "Download speed of gateway mode too large: %s\n",
- buff);
+ "Throughput speed for %s too large: %s\n",
+ description, buff);
return false;
}
- ldown *= 10;
+ lthroughput *= 10;
break;
case BATADV_BW_UNIT_KBIT:
default:
- ldown = div_u64(ldown, 100);
+ lthroughput = div_u64(lthroughput, 100);
break;
}
- if (U32_MAX < ldown) {
+ if (lthroughput > U32_MAX) {
batadv_err(net_dev,
- "Download speed of gateway mode too large: %s\n",
- buff);
+ "Throughput speed for %s too large: %s\n",
+ description, buff);
return false;
}
- *down = ldown;
-
- /* we also got some upload info */
- if (slash_ptr) {
- bw_unit_type = BATADV_BW_UNIT_KBIT;
-
- if (strlen(slash_ptr + 1) > 4) {
- tmp_ptr = slash_ptr + 1 - 4 + strlen(slash_ptr + 1);
+ *throughput = lthroughput;
- if (strncasecmp(tmp_ptr, "mbit", 4) == 0)
- bw_unit_type = BATADV_BW_UNIT_MBIT;
+ return true;
+}
- if ((strncasecmp(tmp_ptr, "kbit", 4) == 0) ||
- (bw_unit_type == BATADV_BW_UNIT_MBIT))
- *tmp_ptr = '\0';
- }
+/**
+ * batadv_parse_gw_bandwidth - parse supplied string buffer to extract download
+ * and upload bandwidth information
+ * @net_dev: the soft interface net device
+ * @buff: string buffer to parse
+ * @down: pointer holding the returned download bandwidth information
+ * @up: pointer holding the returned upload bandwidth information
+ *
+ * Return: false on parse error and true otherwise.
+ */
+static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff,
+ u32 *down, u32 *up)
+{
+ char *slash_ptr;
+ bool ret;
- ret = kstrtou64(slash_ptr + 1, 10, &lup);
- if (ret) {
- batadv_err(net_dev,
- "Upload speed of gateway mode invalid: %s\n",
- slash_ptr + 1);
- return false;
- }
+ slash_ptr = strchr(buff, '/');
+ if (slash_ptr)
+ *slash_ptr = 0;
- switch (bw_unit_type) {
- case BATADV_BW_UNIT_MBIT:
- /* prevent overflow */
- if (U64_MAX / 10 < lup) {
- batadv_err(net_dev,
- "Upload speed of gateway mode too large: %s\n",
- slash_ptr + 1);
- return false;
- }
-
- lup *= 10;
- break;
- case BATADV_BW_UNIT_KBIT:
- default:
- lup = div_u64(lup, 100);
- break;
- }
+ ret = batadv_parse_throughput(net_dev, buff, "download gateway speed",
+ down);
+ if (!ret)
+ return false;
- if (U32_MAX < lup) {
- batadv_err(net_dev,
- "Upload speed of gateway mode too large: %s\n",
- slash_ptr + 1);
+ /* we also got some upload info */
+ if (slash_ptr) {
+ ret = batadv_parse_throughput(net_dev, slash_ptr + 1,
+ "upload gateway speed", up);
+ if (!ret)
return false;
- }
-
- *up = lup;
}
return true;
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index f11345e163d7..57f7107169f5 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -32,6 +32,7 @@
#include <linux/rculist.h>
#include <linux/rtnetlink.h>
#include <linux/slab.h>
+#include <linux/spinlock.h>
#include <linux/workqueue.h>
#include <net/net_namespace.h>
@@ -75,6 +76,28 @@ out:
}
/**
+ * batadv_mutual_parents - check if two devices are each others parent
+ * @dev1: 1st net_device
+ * @dev2: 2nd net_device
+ *
+ * veth devices come in pairs and each is the parent of the other!
+ *
+ * Return: true if the devices are each others parent, otherwise false
+ */
+static bool batadv_mutual_parents(const struct net_device *dev1,
+ const struct net_device *dev2)
+{
+ int dev1_parent_iflink = dev_get_iflink(dev1);
+ int dev2_parent_iflink = dev_get_iflink(dev2);
+
+ if (!dev1_parent_iflink || !dev2_parent_iflink)
+ return false;
+
+ return (dev1_parent_iflink == dev2->ifindex) &&
+ (dev2_parent_iflink == dev1->ifindex);
+}
+
+/**
* batadv_is_on_batman_iface - check if a device is a batman iface descendant
* @net_dev: the device to check
*
@@ -107,6 +130,9 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
if (WARN(!parent_dev, "Cannot find parent device"))
return false;
+ if (batadv_mutual_parents(net_dev, parent_dev))
+ return false;
+
ret = batadv_is_on_batman_iface(parent_dev);
return ret;
@@ -464,7 +490,8 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
hard_iface->soft_iface = soft_iface;
bat_priv = netdev_priv(hard_iface->soft_iface);
- ret = netdev_master_upper_dev_link(hard_iface->net_dev, soft_iface);
+ ret = netdev_master_upper_dev_link(hard_iface->net_dev,
+ soft_iface, NULL, NULL);
if (ret)
goto err_dev;
@@ -638,9 +665,12 @@ batadv_hardif_add_interface(struct net_device *net_dev)
goto free_sysfs;
INIT_LIST_HEAD(&hard_iface->list);
+ INIT_HLIST_HEAD(&hard_iface->neigh_list);
INIT_WORK(&hard_iface->cleanup_work,
batadv_hardif_remove_interface_finish);
+ spin_lock_init(&hard_iface->neigh_list_lock);
+
hard_iface->num_bcasts = BATADV_NUM_BCASTS_DEFAULT;
if (batadv_is_wifi_netdev(net_dev))
hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS;
@@ -708,7 +738,8 @@ static int batadv_hard_if_event(struct notifier_block *this,
}
hard_iface = batadv_hardif_get_by_netdev(net_dev);
- if (!hard_iface && event == NETDEV_REGISTER)
+ if (!hard_iface && (event == NETDEV_REGISTER ||
+ event == NETDEV_POST_TYPE_CHANGE))
hard_iface = batadv_hardif_add_interface(net_dev);
if (!hard_iface)
@@ -723,6 +754,7 @@ static int batadv_hard_if_event(struct notifier_block *this,
batadv_hardif_deactivate_interface(hard_iface);
break;
case NETDEV_UNREGISTER:
+ case NETDEV_PRE_TYPE_CHANGE:
list_del_rcu(&hard_iface->list);
batadv_hardif_remove_interface(hard_iface);
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 5a31420513e1..7b12ea8ea29d 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -75,18 +75,6 @@ batadv_hardif_free_ref(struct batadv_hard_iface *hard_iface)
call_rcu(&hard_iface->rcu, batadv_hardif_free_rcu);
}
-/**
- * batadv_hardif_free_ref_now - decrement the hard interface refcounter and
- * possibly free it (without rcu callback)
- * @hard_iface: the hard interface to free
- */
-static inline void
-batadv_hardif_free_ref_now(struct batadv_hard_iface *hard_iface)
-{
- if (atomic_dec_and_test(&hard_iface->refcount))
- batadv_hardif_free_rcu(&hard_iface->rcu);
-}
-
static inline struct batadv_hard_iface *
batadv_primary_if_get_selected(struct batadv_priv *bat_priv)
{
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index d7f17c1aa4a4..4b5d61fbadb1 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -552,7 +552,7 @@ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops)
!bat_algo_ops->bat_ogm_schedule ||
!bat_algo_ops->bat_ogm_emit ||
!bat_algo_ops->bat_neigh_cmp ||
- !bat_algo_ops->bat_neigh_is_equiv_or_better) {
+ !bat_algo_ops->bat_neigh_is_similar_or_better) {
pr_info("Routing algo '%s' does not implement required ops\n",
bat_algo_ops->name);
return -EINVAL;
@@ -747,7 +747,7 @@ static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv,
struct batadv_tvlv_container *tvlv)
{
- lockdep_assert_held(&bat_priv->tvlv.handler_list_lock);
+ lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
if (!tvlv)
return;
@@ -908,7 +908,7 @@ end:
* appropriate handlers
* @bat_priv: the bat priv with all the soft interface information
* @tvlv_handler: tvlv callback function handling the tvlv content
- * @ogm_source: flag indicating wether the tvlv is an ogm or a unicast packet
+ * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
* @orig_node: orig node emitting the ogm packet
* @src: source mac address of the unicast packet
* @dst: destination mac address of the unicast packet
@@ -961,7 +961,7 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
* batadv_tvlv_containers_process - parse the given tvlv buffer to call the
* appropriate handlers
* @bat_priv: the bat priv with all the soft interface information
- * @ogm_source: flag indicating wether the tvlv is an ogm or a unicast packet
+ * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
* @orig_node: orig node emitting the ogm packet
* @src: source mac address of the unicast packet
* @dst: destination mac address of the unicast packet
@@ -1143,15 +1143,14 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
struct batadv_unicast_tvlv_packet *unicast_tvlv_packet;
struct batadv_tvlv_hdr *tvlv_hdr;
struct batadv_orig_node *orig_node;
- struct sk_buff *skb = NULL;
+ struct sk_buff *skb;
unsigned char *tvlv_buff;
unsigned int tvlv_len;
ssize_t hdr_len = sizeof(*unicast_tvlv_packet);
- bool ret = false;
orig_node = batadv_orig_hash_find(bat_priv, dst);
if (!orig_node)
- goto out;
+ return;
tvlv_len = sizeof(*tvlv_hdr) + tvlv_value_len;
@@ -1180,14 +1179,10 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
tvlv_buff += sizeof(*tvlv_hdr);
memcpy(tvlv_buff, tvlv_value, tvlv_value_len);
- if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
- ret = true;
-
-out:
- if (skb && !ret)
+ if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP)
kfree_skb(skb);
- if (orig_node)
- batadv_orig_node_free_ref(orig_node);
+out:
+ batadv_orig_node_free_ref(orig_node);
}
/**
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index ebd8af0a1eb0..9dbd9107e7e1 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2015.2"
+#define BATADV_SOURCE_VERSION "2016.0"
#endif
/* B.A.T.M.A.N. parameters */
@@ -109,7 +109,7 @@
#define BATADV_MAX_AGGREGATION_MS 100
#define BATADV_BLA_PERIOD_LENGTH 10000 /* 10 seconds */
-#define BATADV_BLA_BACKBONE_TIMEOUT (BATADV_BLA_PERIOD_LENGTH * 3)
+#define BATADV_BLA_BACKBONE_TIMEOUT (BATADV_BLA_PERIOD_LENGTH * 6)
#define BATADV_BLA_CLAIM_TIMEOUT (BATADV_BLA_PERIOD_LENGTH * 10)
#define BATADV_BLA_WAIT_PERIODS 3
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index eb76386f8d4b..75fa5013af72 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -802,7 +802,9 @@ void batadv_mcast_free(struct batadv_priv *bat_priv)
batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 1);
batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 1);
+ spin_lock_bh(&bat_priv->tt.commit_lock);
batadv_mcast_mla_tt_retract(bat_priv, NULL);
+ spin_unlock_bh(&bat_priv->tt.commit_lock);
}
/**
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index f5276be2c77c..cc63b44f0d2e 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -203,28 +203,25 @@ void batadv_nc_init_orig(struct batadv_orig_node *orig_node)
}
/**
- * batadv_nc_node_free_rcu - rcu callback to free an nc node and remove
- * its refcount on the orig_node
- * @rcu: rcu pointer of the nc node
+ * batadv_nc_node_release - release nc_node from lists and queue for free after
+ * rcu grace period
+ * @nc_node: the nc node to free
*/
-static void batadv_nc_node_free_rcu(struct rcu_head *rcu)
+static void batadv_nc_node_release(struct batadv_nc_node *nc_node)
{
- struct batadv_nc_node *nc_node;
-
- nc_node = container_of(rcu, struct batadv_nc_node, rcu);
batadv_orig_node_free_ref(nc_node->orig_node);
- kfree(nc_node);
+ kfree_rcu(nc_node, rcu);
}
/**
- * batadv_nc_node_free_ref - decrements the nc node refcounter and possibly
- * frees it
+ * batadv_nc_node_free_ref - decrement the nc node refcounter and possibly
+ * release it
* @nc_node: the nc node to free
*/
static void batadv_nc_node_free_ref(struct batadv_nc_node *nc_node)
{
if (atomic_dec_and_test(&nc_node->refcount))
- call_rcu(&nc_node->rcu, batadv_nc_node_free_rcu);
+ batadv_nc_node_release(nc_node);
}
/**
@@ -244,9 +241,7 @@ static void batadv_nc_path_free_ref(struct batadv_nc_path *nc_path)
*/
static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet)
{
- if (nc_packet->skb)
- kfree_skb(nc_packet->skb);
-
+ kfree_skb(nc_packet->skb);
batadv_nc_path_free_ref(nc_packet->nc_path);
kfree(nc_packet);
}
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 7486df9ed48d..fe578f75c391 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -163,92 +163,101 @@ err:
}
/**
- * batadv_neigh_ifinfo_free_rcu - free the neigh_ifinfo object
- * @rcu: rcu pointer of the neigh_ifinfo object
+ * batadv_neigh_ifinfo_release - release neigh_ifinfo from lists and queue for
+ * free after rcu grace period
+ * @neigh_ifinfo: the neigh_ifinfo object to release
*/
-static void batadv_neigh_ifinfo_free_rcu(struct rcu_head *rcu)
+static void
+batadv_neigh_ifinfo_release(struct batadv_neigh_ifinfo *neigh_ifinfo)
{
- struct batadv_neigh_ifinfo *neigh_ifinfo;
-
- neigh_ifinfo = container_of(rcu, struct batadv_neigh_ifinfo, rcu);
-
if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT)
- batadv_hardif_free_ref_now(neigh_ifinfo->if_outgoing);
+ batadv_hardif_free_ref(neigh_ifinfo->if_outgoing);
- kfree(neigh_ifinfo);
+ kfree_rcu(neigh_ifinfo, rcu);
}
/**
- * batadv_neigh_ifinfo_free_now - decrement the refcounter and possibly free
- * the neigh_ifinfo (without rcu callback)
+ * batadv_neigh_ifinfo_free_ref - decrement the refcounter and possibly release
+ * the neigh_ifinfo
* @neigh_ifinfo: the neigh_ifinfo object to release
*/
-static void
-batadv_neigh_ifinfo_free_ref_now(struct batadv_neigh_ifinfo *neigh_ifinfo)
+void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo)
{
if (atomic_dec_and_test(&neigh_ifinfo->refcount))
- batadv_neigh_ifinfo_free_rcu(&neigh_ifinfo->rcu);
+ batadv_neigh_ifinfo_release(neigh_ifinfo);
}
/**
- * batadv_neigh_ifinfo_free_ref - decrement the refcounter and possibly free
- * the neigh_ifinfo
- * @neigh_ifinfo: the neigh_ifinfo object to release
+ * batadv_hardif_neigh_release - release hardif neigh node from lists and
+ * queue for free after rcu grace period
+ * @hardif_neigh: hardif neigh neighbor to free
*/
-void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo)
+static void
+batadv_hardif_neigh_release(struct batadv_hardif_neigh_node *hardif_neigh)
{
- if (atomic_dec_and_test(&neigh_ifinfo->refcount))
- call_rcu(&neigh_ifinfo->rcu, batadv_neigh_ifinfo_free_rcu);
+ spin_lock_bh(&hardif_neigh->if_incoming->neigh_list_lock);
+ hlist_del_init_rcu(&hardif_neigh->list);
+ spin_unlock_bh(&hardif_neigh->if_incoming->neigh_list_lock);
+
+ batadv_hardif_free_ref(hardif_neigh->if_incoming);
+ kfree_rcu(hardif_neigh, rcu);
+}
+
+/**
+ * batadv_hardif_neigh_free_ref - decrement the hardif neighbors refcounter
+ * and possibly release it
+ * @hardif_neigh: hardif neigh neighbor to free
+ */
+void batadv_hardif_neigh_free_ref(struct batadv_hardif_neigh_node *hardif_neigh)
+{
+ if (atomic_dec_and_test(&hardif_neigh->refcount))
+ batadv_hardif_neigh_release(hardif_neigh);
}
/**
- * batadv_neigh_node_free_rcu - free the neigh_node
- * @rcu: rcu pointer of the neigh_node
+ * batadv_neigh_node_release - release neigh_node from lists and queue for
+ * free after rcu grace period
+ * @neigh_node: neigh neighbor to free
*/
-static void batadv_neigh_node_free_rcu(struct rcu_head *rcu)
+static void batadv_neigh_node_release(struct batadv_neigh_node *neigh_node)
{
struct hlist_node *node_tmp;
- struct batadv_neigh_node *neigh_node;
+ struct batadv_hardif_neigh_node *hardif_neigh;
struct batadv_neigh_ifinfo *neigh_ifinfo;
struct batadv_algo_ops *bao;
- neigh_node = container_of(rcu, struct batadv_neigh_node, rcu);
bao = neigh_node->orig_node->bat_priv->bat_algo_ops;
hlist_for_each_entry_safe(neigh_ifinfo, node_tmp,
&neigh_node->ifinfo_list, list) {
- batadv_neigh_ifinfo_free_ref_now(neigh_ifinfo);
+ batadv_neigh_ifinfo_free_ref(neigh_ifinfo);
+ }
+
+ hardif_neigh = batadv_hardif_neigh_get(neigh_node->if_incoming,
+ neigh_node->addr);
+ if (hardif_neigh) {
+ /* batadv_hardif_neigh_get() increases refcount too */
+ batadv_hardif_neigh_free_ref(hardif_neigh);
+ batadv_hardif_neigh_free_ref(hardif_neigh);
}
if (bao->bat_neigh_free)
bao->bat_neigh_free(neigh_node);
- batadv_hardif_free_ref_now(neigh_node->if_incoming);
-
- kfree(neigh_node);
-}
+ batadv_hardif_free_ref(neigh_node->if_incoming);
-/**
- * batadv_neigh_node_free_ref_now - decrement the neighbors refcounter
- * and possibly free it (without rcu callback)
- * @neigh_node: neigh neighbor to free
- */
-static void
-batadv_neigh_node_free_ref_now(struct batadv_neigh_node *neigh_node)
-{
- if (atomic_dec_and_test(&neigh_node->refcount))
- batadv_neigh_node_free_rcu(&neigh_node->rcu);
+ kfree_rcu(neigh_node, rcu);
}
/**
* batadv_neigh_node_free_ref - decrement the neighbors refcounter
- * and possibly free it
+ * and possibly release it
* @neigh_node: neigh neighbor to free
*/
void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node)
{
if (atomic_dec_and_test(&neigh_node->refcount))
- call_rcu(&neigh_node->rcu, batadv_neigh_node_free_rcu);
+ batadv_neigh_node_release(neigh_node);
}
/**
@@ -479,6 +488,106 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
}
/**
+ * batadv_hardif_neigh_create - create a hardif neighbour node
+ * @hard_iface: the interface this neighbour is connected to
+ * @neigh_addr: the interface address of the neighbour to retrieve
+ *
+ * Returns the hardif neighbour node if found or created or NULL otherwise.
+ */
+static struct batadv_hardif_neigh_node *
+batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
+ const u8 *neigh_addr)
+{
+ struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
+ struct batadv_hardif_neigh_node *hardif_neigh = NULL;
+
+ spin_lock_bh(&hard_iface->neigh_list_lock);
+
+ /* check if neighbor hasn't been added in the meantime */
+ hardif_neigh = batadv_hardif_neigh_get(hard_iface, neigh_addr);
+ if (hardif_neigh)
+ goto out;
+
+ if (!atomic_inc_not_zero(&hard_iface->refcount))
+ goto out;
+
+ hardif_neigh = kzalloc(sizeof(*hardif_neigh), GFP_ATOMIC);
+ if (!hardif_neigh) {
+ batadv_hardif_free_ref(hard_iface);
+ goto out;
+ }
+
+ INIT_HLIST_NODE(&hardif_neigh->list);
+ ether_addr_copy(hardif_neigh->addr, neigh_addr);
+ hardif_neigh->if_incoming = hard_iface;
+ hardif_neigh->last_seen = jiffies;
+
+ atomic_set(&hardif_neigh->refcount, 1);
+
+ if (bat_priv->bat_algo_ops->bat_hardif_neigh_init)
+ bat_priv->bat_algo_ops->bat_hardif_neigh_init(hardif_neigh);
+
+ hlist_add_head(&hardif_neigh->list, &hard_iface->neigh_list);
+
+out:
+ spin_unlock_bh(&hard_iface->neigh_list_lock);
+ return hardif_neigh;
+}
+
+/**
+ * batadv_hardif_neigh_get_or_create - retrieve or create a hardif neighbour
+ * node
+ * @hard_iface: the interface this neighbour is connected to
+ * @neigh_addr: the interface address of the neighbour to retrieve
+ *
+ * Returns the hardif neighbour node if found or created or NULL otherwise.
+ */
+static struct batadv_hardif_neigh_node *
+batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface,
+ const u8 *neigh_addr)
+{
+ struct batadv_hardif_neigh_node *hardif_neigh = NULL;
+
+ /* first check without locking to avoid the overhead */
+ hardif_neigh = batadv_hardif_neigh_get(hard_iface, neigh_addr);
+ if (hardif_neigh)
+ return hardif_neigh;
+
+ return batadv_hardif_neigh_create(hard_iface, neigh_addr);
+}
+
+/**
+ * batadv_hardif_neigh_get - retrieve a hardif neighbour from the list
+ * @hard_iface: the interface where this neighbour is connected to
+ * @neigh_addr: the address of the neighbour
+ *
+ * Looks for and possibly returns a neighbour belonging to this hard interface.
+ * Returns NULL if the neighbour is not found.
+ */
+struct batadv_hardif_neigh_node *
+batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
+ const u8 *neigh_addr)
+{
+ struct batadv_hardif_neigh_node *tmp_hardif_neigh, *hardif_neigh = NULL;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(tmp_hardif_neigh,
+ &hard_iface->neigh_list, list) {
+ if (!batadv_compare_eth(tmp_hardif_neigh->addr, neigh_addr))
+ continue;
+
+ if (!atomic_inc_not_zero(&tmp_hardif_neigh->refcount))
+ continue;
+
+ hardif_neigh = tmp_hardif_neigh;
+ break;
+ }
+ rcu_read_unlock();
+
+ return hardif_neigh;
+}
+
+/**
* batadv_neigh_node_new - create and init a new neigh_node object
* @orig_node: originator object representing the neighbour
* @hard_iface: the interface where the neighbour is connected to
@@ -493,11 +602,17 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node,
const u8 *neigh_addr)
{
struct batadv_neigh_node *neigh_node;
+ struct batadv_hardif_neigh_node *hardif_neigh = NULL;
neigh_node = batadv_neigh_node_get(orig_node, hard_iface, neigh_addr);
if (neigh_node)
goto out;
+ hardif_neigh = batadv_hardif_neigh_get_or_create(hard_iface,
+ neigh_addr);
+ if (!hardif_neigh)
+ goto out;
+
neigh_node = kzalloc(sizeof(*neigh_node), GFP_ATOMIC);
if (!neigh_node)
goto out;
@@ -523,117 +638,147 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node,
hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
spin_unlock_bh(&orig_node->neigh_list_lock);
+ /* increment unique neighbor refcount */
+ atomic_inc(&hardif_neigh->refcount);
+
batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv,
"Creating new neighbor %pM for orig_node %pM on interface %s\n",
neigh_addr, orig_node->orig, hard_iface->net_dev->name);
out:
+ if (hardif_neigh)
+ batadv_hardif_neigh_free_ref(hardif_neigh);
return neigh_node;
}
/**
- * batadv_orig_ifinfo_free_rcu - free the orig_ifinfo object
- * @rcu: rcu pointer of the orig_ifinfo object
+ * batadv_hardif_neigh_seq_print_text - print the single hop neighbour list
+ * @seq: neighbour table seq_file struct
+ * @offset: not used
+ *
+ * Always returns 0.
*/
-static void batadv_orig_ifinfo_free_rcu(struct rcu_head *rcu)
+int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset)
{
- struct batadv_orig_ifinfo *orig_ifinfo;
- struct batadv_neigh_node *router;
+ struct net_device *net_dev = (struct net_device *)seq->private;
+ struct batadv_priv *bat_priv = netdev_priv(net_dev);
+ struct batadv_hard_iface *primary_if;
- orig_ifinfo = container_of(rcu, struct batadv_orig_ifinfo, rcu);
+ primary_if = batadv_seq_print_text_primary_if_get(seq);
+ if (!primary_if)
+ return 0;
- if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT)
- batadv_hardif_free_ref_now(orig_ifinfo->if_outgoing);
+ seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n",
+ BATADV_SOURCE_VERSION, primary_if->net_dev->name,
+ primary_if->net_dev->dev_addr, net_dev->name,
+ bat_priv->bat_algo_ops->name);
- /* this is the last reference to this object */
- router = rcu_dereference_protected(orig_ifinfo->router, true);
- if (router)
- batadv_neigh_node_free_ref_now(router);
- kfree(orig_ifinfo);
+ batadv_hardif_free_ref(primary_if);
+
+ if (!bat_priv->bat_algo_ops->bat_neigh_print) {
+ seq_puts(seq,
+ "No printing function for this routing protocol\n");
+ return 0;
+ }
+
+ bat_priv->bat_algo_ops->bat_neigh_print(bat_priv, seq);
+ return 0;
}
/**
- * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly free
- * the orig_ifinfo (without rcu callback)
+ * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for
+ * free after rcu grace period
* @orig_ifinfo: the orig_ifinfo object to release
*/
-static void
-batadv_orig_ifinfo_free_ref_now(struct batadv_orig_ifinfo *orig_ifinfo)
+static void batadv_orig_ifinfo_release(struct batadv_orig_ifinfo *orig_ifinfo)
{
- if (atomic_dec_and_test(&orig_ifinfo->refcount))
- batadv_orig_ifinfo_free_rcu(&orig_ifinfo->rcu);
+ struct batadv_neigh_node *router;
+
+ if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT)
+ batadv_hardif_free_ref(orig_ifinfo->if_outgoing);
+
+ /* this is the last reference to this object */
+ router = rcu_dereference_protected(orig_ifinfo->router, true);
+ if (router)
+ batadv_neigh_node_free_ref(router);
+
+ kfree_rcu(orig_ifinfo, rcu);
}
/**
- * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly free
+ * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly release
* the orig_ifinfo
* @orig_ifinfo: the orig_ifinfo object to release
*/
void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo)
{
if (atomic_dec_and_test(&orig_ifinfo->refcount))
- call_rcu(&orig_ifinfo->rcu, batadv_orig_ifinfo_free_rcu);
+ batadv_orig_ifinfo_release(orig_ifinfo);
}
+/**
+ * batadv_orig_node_free_rcu - free the orig_node
+ * @rcu: rcu pointer of the orig_node
+ */
static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
{
- struct hlist_node *node_tmp;
- struct batadv_neigh_node *neigh_node;
struct batadv_orig_node *orig_node;
- struct batadv_orig_ifinfo *orig_ifinfo;
orig_node = container_of(rcu, struct batadv_orig_node, rcu);
+ batadv_mcast_purge_orig(orig_node);
+
+ batadv_frag_purge_orig(orig_node, NULL);
+
+ if (orig_node->bat_priv->bat_algo_ops->bat_orig_free)
+ orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node);
+
+ kfree(orig_node->tt_buff);
+ kfree(orig_node);
+}
+
+/**
+ * batadv_orig_node_release - release orig_node from lists and queue for
+ * free after rcu grace period
+ * @orig_node: the orig node to free
+ */
+static void batadv_orig_node_release(struct batadv_orig_node *orig_node)
+{
+ struct hlist_node *node_tmp;
+ struct batadv_neigh_node *neigh_node;
+ struct batadv_orig_ifinfo *orig_ifinfo;
+
spin_lock_bh(&orig_node->neigh_list_lock);
/* for all neighbors towards this originator ... */
hlist_for_each_entry_safe(neigh_node, node_tmp,
&orig_node->neigh_list, list) {
hlist_del_rcu(&neigh_node->list);
- batadv_neigh_node_free_ref_now(neigh_node);
+ batadv_neigh_node_free_ref(neigh_node);
}
hlist_for_each_entry_safe(orig_ifinfo, node_tmp,
&orig_node->ifinfo_list, list) {
hlist_del_rcu(&orig_ifinfo->list);
- batadv_orig_ifinfo_free_ref_now(orig_ifinfo);
+ batadv_orig_ifinfo_free_ref(orig_ifinfo);
}
spin_unlock_bh(&orig_node->neigh_list_lock);
- batadv_mcast_purge_orig(orig_node);
-
/* Free nc_nodes */
batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL);
- batadv_frag_purge_orig(orig_node, NULL);
-
- if (orig_node->bat_priv->bat_algo_ops->bat_orig_free)
- orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node);
-
- kfree(orig_node->tt_buff);
- kfree(orig_node);
+ call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu);
}
/**
* batadv_orig_node_free_ref - decrement the orig node refcounter and possibly
- * schedule an rcu callback for freeing it
+ * release it
* @orig_node: the orig node to free
*/
void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node)
{
if (atomic_dec_and_test(&orig_node->refcount))
- call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu);
-}
-
-/**
- * batadv_orig_node_free_ref_now - decrement the orig node refcounter and
- * possibly free it (without rcu callback)
- * @orig_node: the orig node to free
- */
-void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node)
-{
- if (atomic_dec_and_test(&orig_node->refcount))
- batadv_orig_node_free_rcu(&orig_node->rcu);
+ batadv_orig_node_release(orig_node);
}
void batadv_originator_free(struct batadv_priv *bat_priv)
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index fa18f9bf266b..cf0730414ed2 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -38,9 +38,13 @@ int batadv_originator_init(struct batadv_priv *bat_priv);
void batadv_originator_free(struct batadv_priv *bat_priv);
void batadv_purge_orig_ref(struct batadv_priv *bat_priv);
void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node);
-void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node);
struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
const u8 *addr);
+struct batadv_hardif_neigh_node *
+batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
+ const u8 *neigh_addr);
+void
+batadv_hardif_neigh_free_ref(struct batadv_hardif_neigh_node *hardif_neigh);
struct batadv_neigh_node *
batadv_neigh_node_new(struct batadv_orig_node *orig_node,
struct batadv_hard_iface *hard_iface,
@@ -57,6 +61,8 @@ batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
struct batadv_hard_iface *if_outgoing);
void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo);
+int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset);
+
struct batadv_orig_ifinfo *
batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node,
struct batadv_hard_iface *if_outgoing);
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 11f996b39fef..0558e3237e0e 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -72,8 +72,7 @@ enum batadv_subtype {
* enum batadv_iv_flags - flags used in B.A.T.M.A.N. IV OGM packets
* @BATADV_NOT_BEST_NEXT_HOP: flag is set when ogm packet is forwarded and was
* previously received from someone else than the best neighbor.
- * @BATADV_PRIMARIES_FIRST_HOP: flag is set when the primary interface address
- * is used, and the packet travels its first hop.
+ * @BATADV_PRIMARIES_FIRST_HOP: flag unused.
* @BATADV_DIRECTLINK: flag is for the first hop or if rebroadcasted from a
* one hop neighbor on the interface where it was originally received.
*/
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 8d990b070a2e..e4f2646d9246 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -497,9 +497,9 @@ batadv_find_router(struct batadv_priv *bat_priv,
/* alternative candidate should be good enough to be
* considered
*/
- if (!bao->bat_neigh_is_equiv_or_better(cand_router,
- cand->if_outgoing,
- router, recv_if))
+ if (!bao->bat_neigh_is_similar_or_better(cand_router,
+ cand->if_outgoing,
+ router, recv_if))
goto next;
/* don't use the same router twice */
@@ -836,6 +836,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
u8 *orig_addr;
struct batadv_orig_node *orig_node = NULL;
int check, hdr_size = sizeof(*unicast_packet);
+ enum batadv_subtype subtype;
bool is4addr;
unicast_packet = (struct batadv_unicast_packet *)skb->data;
@@ -863,10 +864,20 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
/* packet for me */
if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
if (is4addr) {
- batadv_dat_inc_counter(bat_priv,
- unicast_4addr_packet->subtype);
- orig_addr = unicast_4addr_packet->src;
- orig_node = batadv_orig_hash_find(bat_priv, orig_addr);
+ subtype = unicast_4addr_packet->subtype;
+ batadv_dat_inc_counter(bat_priv, subtype);
+
+ /* Only payload data should be considered for speedy
+ * join. For example, DAT also uses unicast 4addr
+ * types, but those packets should not be considered
+ * for speedy join, since the clients do not actually
+ * reside at the sending originator.
+ */
+ if (subtype == BATADV_P_DATA) {
+ orig_addr = unicast_4addr_packet->src;
+ orig_node = batadv_orig_hash_find(bat_priv,
+ orig_addr);
+ }
}
if (batadv_dat_snoop_incoming_arp_request(bat_priv, skb,
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index f664324805eb..782fa33ec296 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -407,8 +407,7 @@ void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface)
static void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet)
{
- if (forw_packet->skb)
- kfree_skb(forw_packet->skb);
+ kfree_skb(forw_packet->skb);
if (forw_packet->if_incoming)
batadv_hardif_free_ref(forw_packet->if_incoming);
if (forw_packet->if_outgoing)
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 9de3c8804ff4..fe87777fda8a 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -40,6 +40,7 @@
#include "distributed-arp-table.h"
#include "gateway_client.h"
#include "gateway_common.h"
+#include "bridge_loop_avoidance.h"
#include "hard-interface.h"
#include "network-coding.h"
#include "packet.h"
@@ -241,10 +242,13 @@ ssize_t batadv_show_vlan_##_name(struct kobject *kobj, \
static int batadv_store_bool_attr(char *buff, size_t count,
struct net_device *net_dev,
- const char *attr_name, atomic_t *attr)
+ const char *attr_name, atomic_t *attr,
+ bool *changed)
{
int enabled = -1;
+ *changed = false;
+
if (buff[count - 1] == '\n')
buff[count - 1] = '\0';
@@ -271,6 +275,8 @@ static int batadv_store_bool_attr(char *buff, size_t count,
atomic_read(attr) == 1 ? "enabled" : "disabled",
enabled == 1 ? "enabled" : "disabled");
+ *changed = true;
+
atomic_set(attr, (unsigned int)enabled);
return count;
}
@@ -281,11 +287,12 @@ __batadv_store_bool_attr(char *buff, size_t count,
struct attribute *attr,
atomic_t *attr_store, struct net_device *net_dev)
{
+ bool changed;
int ret;
ret = batadv_store_bool_attr(buff, count, net_dev, attr->name,
- attr_store);
- if (post_func && ret)
+ attr_store, &changed);
+ if (post_func && changed)
post_func(net_dev);
return ret;
@@ -549,7 +556,8 @@ static ssize_t batadv_store_isolation_mark(struct kobject *kobj,
BATADV_ATTR_SIF_BOOL(aggregated_ogms, S_IRUGO | S_IWUSR, NULL);
BATADV_ATTR_SIF_BOOL(bonding, S_IRUGO | S_IWUSR, NULL);
#ifdef CONFIG_BATMAN_ADV_BLA
-BATADV_ATTR_SIF_BOOL(bridge_loop_avoidance, S_IRUGO | S_IWUSR, NULL);
+BATADV_ATTR_SIF_BOOL(bridge_loop_avoidance, S_IRUGO | S_IWUSR,
+ batadv_bla_status_update);
#endif
#ifdef CONFIG_BATMAN_ADV_DAT
BATADV_ATTR_SIF_BOOL(distributed_arp_table, S_IRUGO | S_IWUSR,
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 4228b10c47ea..0e80fd1461ab 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -68,13 +68,15 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
unsigned short vid, const char *message,
bool roaming);
-/* returns 1 if they are the same mac addr */
+/* returns 1 if they are the same mac addr and vid */
static int batadv_compare_tt(const struct hlist_node *node, const void *data2)
{
const void *data1 = container_of(node, struct batadv_tt_common_entry,
hash_entry);
+ const struct batadv_tt_common_entry *tt1 = data1;
+ const struct batadv_tt_common_entry *tt2 = data2;
- return batadv_compare_eth(data1, data2);
+ return (tt1->vid == tt2->vid) && batadv_compare_eth(data1, data2);
}
/**
@@ -238,20 +240,6 @@ int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
return count;
}
-static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
-{
- struct batadv_tt_orig_list_entry *orig_entry;
-
- orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu);
-
- /* We are in an rcu callback here, therefore we cannot use
- * batadv_orig_node_free_ref() and its call_rcu():
- * An rcu_barrier() wouldn't wait for that to finish
- */
- batadv_orig_node_free_ref_now(orig_entry->orig_node);
- kfree(orig_entry);
-}
-
/**
* batadv_tt_local_size_mod - change the size by v of the local table identified
* by vid
@@ -315,9 +303,11 @@ static void batadv_tt_global_size_mod(struct batadv_orig_node *orig_node,
if (atomic_add_return(v, &vlan->tt.num_entries) == 0) {
spin_lock_bh(&orig_node->vlan_list_lock);
- hlist_del_init_rcu(&vlan->list);
+ if (!hlist_unhashed(&vlan->list)) {
+ hlist_del_init_rcu(&vlan->list);
+ batadv_orig_node_vlan_free_ref(vlan);
+ }
spin_unlock_bh(&orig_node->vlan_list_lock);
- batadv_orig_node_vlan_free_ref(vlan);
}
batadv_orig_node_vlan_free_ref(vlan);
@@ -347,13 +337,25 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node,
batadv_tt_global_size_mod(orig_node, vid, -1);
}
+/**
+ * batadv_tt_orig_list_entry_release - release tt orig entry from lists and
+ * queue for free after rcu grace period
+ * @orig_entry: tt orig entry to be free'd
+ */
+static void
+batadv_tt_orig_list_entry_release(struct batadv_tt_orig_list_entry *orig_entry)
+{
+ batadv_orig_node_free_ref(orig_entry->orig_node);
+ kfree_rcu(orig_entry, rcu);
+}
+
static void
batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry)
{
if (!atomic_dec_and_test(&orig_entry->refcount))
return;
- call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu);
+ batadv_tt_orig_list_entry_release(orig_entry);
}
/**
@@ -1427,15 +1429,21 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
}
/* if the client was temporary added before receiving the first
- * OGM announcing it, we have to clear the TEMP flag
+ * OGM announcing it, we have to clear the TEMP flag. Also,
+ * remove the previous temporary orig node and re-add it
+ * if required. If the orig entry changed, the new one which
+ * is a non-temporary entry is preferred.
*/
- common->flags &= ~BATADV_TT_CLIENT_TEMP;
+ if (common->flags & BATADV_TT_CLIENT_TEMP) {
+ batadv_tt_global_del_orig_list(tt_global_entry);
+ common->flags &= ~BATADV_TT_CLIENT_TEMP;
+ }
/* the change can carry possible "attribute" flags like the
* TT_CLIENT_WIFI, therefore they have to be copied in the
* client entry
*/
- tt_global_entry->common.flags |= flags;
+ common->flags |= flags;
/* If there is the BATADV_TT_CLIENT_ROAM flag set, there is only
* one originator left in the list and we previously received a
@@ -2411,8 +2419,8 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node,
{
struct batadv_tvlv_tt_vlan_data *tt_vlan_tmp;
struct batadv_orig_node_vlan *vlan;
+ int i, orig_num_vlan;
u32 crc;
- int i;
/* check if each received CRC matches the locally stored one */
for (i = 0; i < num_vlan; i++) {
@@ -2438,6 +2446,18 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node,
return false;
}
+ /* check if any excess VLANs exist locally for the originator
+ * which are not mentioned in the TVLV from the originator.
+ */
+ rcu_read_lock();
+ orig_num_vlan = 0;
+ hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list)
+ orig_num_vlan++;
+ rcu_read_unlock();
+
+ if (orig_num_vlan > num_vlan)
+ return false;
+
return true;
}
@@ -3319,7 +3339,10 @@ bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst,
bool ret = false;
vlan = batadv_softif_vlan_get(bat_priv, vid);
- if (!vlan || !atomic_read(&vlan->ap_isolation))
+ if (!vlan)
+ return false;
+
+ if (!atomic_read(&vlan->ap_isolation))
goto out;
tt_local_entry = batadv_tt_local_hash_find(bat_priv, dst, vid);
@@ -3336,8 +3359,7 @@ bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst,
ret = true;
out:
- if (vlan)
- batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_free_ref(vlan);
if (tt_global_entry)
batadv_tt_global_entry_free_ref(tt_global_entry);
if (tt_local_entry)
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index d260efd70499..3437b667a2cd 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -100,6 +100,8 @@ struct batadv_hard_iface_bat_iv {
* @bat_iv: BATMAN IV specific per hard interface data
* @cleanup_work: work queue callback item for hard interface deinit
* @debug_dir: dentry for nc subdir in batman-adv directory in debugfs
+ * @neigh_list: list of unique single hop neighbors via this interface
+ * @neigh_list_lock: lock protecting neigh_list
*/
struct batadv_hard_iface {
struct list_head list;
@@ -115,6 +117,9 @@ struct batadv_hard_iface {
struct batadv_hard_iface_bat_iv bat_iv;
struct work_struct cleanup_work;
struct dentry *debug_dir;
+ struct hlist_head neigh_list;
+ /* neigh_list_lock protects: neigh_list */
+ spinlock_t neigh_list_lock;
};
/**
@@ -218,12 +223,12 @@ struct batadv_orig_bat_iv {
* @orig: originator ethernet address
* @ifinfo_list: list for routers per outgoing interface
* @last_bonding_candidate: pointer to last ifinfo of last used router
- * @batadv_dat_addr_t: address of the orig node in the distributed hash
+ * @dat_addr: address of the orig node in the distributed hash
* @last_seen: time when last packet from this node was received
* @bcast_seqno_reset: time when the broadcast seqno window was reset
* @mcast_handler_lock: synchronizes mcast-capability and -flag changes
* @mcast_flags: multicast flags announced by the orig node
- * @mcast_want_all_unsnoop_node: a list node for the
+ * @mcast_want_all_unsnoopables_node: a list node for the
* mcast.want_all_unsnoopables list
* @mcast_want_all_ipv4_node: a list node for the mcast.want_all_ipv4 list
* @mcast_want_all_ipv6_node: a list node for the mcast.want_all_ipv6 list
@@ -341,6 +346,23 @@ struct batadv_gw_node {
};
/**
+ * batadv_hardif_neigh_node - unique neighbor per hard interface
+ * @list: list node for batadv_hard_iface::neigh_list
+ * @addr: the MAC address of the neighboring interface
+ * @if_incoming: pointer to incoming hard interface
+ * @refcount: number of contexts the object is used
+ * @rcu: struct used for freeing in a RCU-safe manner
+ */
+struct batadv_hardif_neigh_node {
+ struct hlist_node list;
+ u8 addr[ETH_ALEN];
+ struct batadv_hard_iface *if_incoming;
+ unsigned long last_seen;
+ atomic_t refcount;
+ struct rcu_head rcu;
+};
+
+/**
* struct batadv_neigh_node - structure for single hops neighbors
* @list: list node for batadv_orig_node::neigh_list
* @orig_node: pointer to corresponding orig_node
@@ -349,9 +371,8 @@ struct batadv_gw_node {
* @ifinfo_lock: lock protecting private ifinfo members and list
* @if_incoming: pointer to incoming hard interface
* @last_seen: when last packet via this neighbor was received
- * @last_ttl: last received ttl from this neigh node
+ * @refcount: number of contexts the object is used
* @rcu: struct used for freeing in an RCU-safe manner
- * @bat_iv: B.A.T.M.A.N. IV private structure
*/
struct batadv_neigh_node {
struct hlist_node list;
@@ -401,13 +422,14 @@ struct batadv_neigh_ifinfo {
struct rcu_head rcu;
};
+#ifdef CONFIG_BATMAN_ADV_BLA
+
/**
* struct batadv_bcast_duplist_entry - structure for LAN broadcast suppression
- * @orig[ETH_ALEN]: mac address of orig node orginating the broadcast
+ * @orig: mac address of orig node orginating the broadcast
* @crc: crc32 checksum of broadcast payload
* @entrytime: time when the broadcast packet was received
*/
-#ifdef CONFIG_BATMAN_ADV_BLA
struct batadv_bcast_duplist_entry {
u8 orig[ETH_ALEN];
__be32 crc;
@@ -549,9 +571,11 @@ struct batadv_priv_tt {
struct delayed_work work;
};
+#ifdef CONFIG_BATMAN_ADV_BLA
+
/**
* struct batadv_priv_bla - per mesh interface bridge loope avoidance data
- * @num_requests; number of bla requests in flight
+ * @num_requests: number of bla requests in flight
* @claim_hash: hash table containing mesh nodes this host has claimed
* @backbone_hash: hash table containing all detected backbone gateways
* @bcast_duplist: recently received broadcast packets array (for broadcast
@@ -561,7 +585,6 @@ struct batadv_priv_tt {
* @claim_dest: local claim data (e.g. claim group)
* @work: work queue callback item for cleanups & bla announcements
*/
-#ifdef CONFIG_BATMAN_ADV_BLA
struct batadv_priv_bla {
atomic_t num_requests;
struct batadv_hashtable *claim_hash;
@@ -575,6 +598,8 @@ struct batadv_priv_bla {
};
#endif
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+
/**
* struct batadv_priv_debug_log - debug logging data
* @log_buff: buffer holding the logs (ring bufer)
@@ -583,7 +608,6 @@ struct batadv_priv_bla {
* @lock: lock protecting log_buff, log_start & log_end
* @queue_wait: log reader's wait queue
*/
-#ifdef CONFIG_BATMAN_ADV_DEBUG
struct batadv_priv_debug_log {
char log_buff[BATADV_LOG_BUF_LEN];
unsigned long log_start;
@@ -625,13 +649,14 @@ struct batadv_priv_tvlv {
spinlock_t handler_list_lock; /* protects handler_list */
};
+#ifdef CONFIG_BATMAN_ADV_DAT
+
/**
* struct batadv_priv_dat - per mesh interface DAT private data
* @addr: node DAT address
* @hash: hashtable representing the local ARP cache
* @work: work queue callback item for cache purging
*/
-#ifdef CONFIG_BATMAN_ADV_DAT
struct batadv_priv_dat {
batadv_dat_addr_t addr;
struct batadv_hashtable *hash;
@@ -773,7 +798,7 @@ struct batadv_softif_vlan {
* @dat: distributed arp table data
* @mcast: multicast data
* @network_coding: bool indicating whether network coding is enabled
- * @batadv_priv_nc: network coding data
+ * @nc: network coding data
*/
struct batadv_priv {
atomic_t mesh_state;
@@ -871,6 +896,8 @@ struct batadv_socket_packet {
u8 icmp_packet[BATADV_ICMP_MAX_PACKET_SIZE];
};
+#ifdef CONFIG_BATMAN_ADV_BLA
+
/**
* struct batadv_bla_backbone_gw - batman-adv gateway bridged into the LAN
* @orig: originator address of backbone node (mac address of primary iface)
@@ -884,10 +911,10 @@ struct batadv_socket_packet {
* backbone gateway - no bcast traffic is formwared until the situation was
* resolved
* @crc: crc16 checksum over all claims
+ * @crc_lock: lock protecting crc
* @refcount: number of contexts the object is used
* @rcu: struct used for freeing in an RCU-safe manner
*/
-#ifdef CONFIG_BATMAN_ADV_BLA
struct batadv_bla_backbone_gw {
u8 orig[ETH_ALEN];
unsigned short vid;
@@ -897,6 +924,7 @@ struct batadv_bla_backbone_gw {
atomic_t wait_periods;
atomic_t request_sent;
u16 crc;
+ spinlock_t crc_lock; /* protects crc */
atomic_t refcount;
struct rcu_head rcu;
};
@@ -905,7 +933,7 @@ struct batadv_bla_backbone_gw {
* struct batadv_bla_claim - claimed non-mesh client structure
* @addr: mac address of claimed non-mesh client
* @vid: vlan id this client was detected on
- * @batadv_bla_backbone_gw: pointer to backbone gw claiming this client
+ * @backbone_gw: pointer to backbone gw claiming this client
* @lasttime: last time we heard of claim (locals only)
* @hash_entry: hlist node for batadv_priv_bla::claim_hash
* @refcount: number of contexts the object is used
@@ -1131,11 +1159,13 @@ struct batadv_forw_packet {
* @bat_primary_iface_set: called when primary interface is selected / changed
* @bat_ogm_schedule: prepare a new outgoing OGM for the send queue
* @bat_ogm_emit: send scheduled OGM
+ * @bat_hardif_neigh_init: called on creation of single hop entry
* @bat_neigh_cmp: compare the metrics of two neighbors for their respective
* outgoing interfaces
- * @bat_neigh_is_equiv_or_better: check if neigh1 is equally good or better
- * than neigh2 for their respective outgoing interface from the metric
+ * @bat_neigh_is_similar_or_better: check if neigh1 is equally similar or
+ * better than neigh2 for their respective outgoing interface from the metric
* prospective
+ * @bat_neigh_print: print the single hop neighbor list (optional)
* @bat_neigh_free: free the resources allocated by the routing algorithm for a
* neigh_node object
* @bat_orig_print: print the originator table (optional)
@@ -1156,15 +1186,17 @@ struct batadv_algo_ops {
void (*bat_ogm_schedule)(struct batadv_hard_iface *hard_iface);
void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet);
/* neigh_node handling API */
+ void (*bat_hardif_neigh_init)(struct batadv_hardif_neigh_node *neigh);
int (*bat_neigh_cmp)(struct batadv_neigh_node *neigh1,
struct batadv_hard_iface *if_outgoing1,
struct batadv_neigh_node *neigh2,
struct batadv_hard_iface *if_outgoing2);
- bool (*bat_neigh_is_equiv_or_better)
+ bool (*bat_neigh_is_similar_or_better)
(struct batadv_neigh_node *neigh1,
struct batadv_hard_iface *if_outgoing1,
struct batadv_neigh_node *neigh2,
struct batadv_hard_iface *if_outgoing2);
+ void (*bat_neigh_print)(struct batadv_priv *priv, struct seq_file *seq);
void (*bat_neigh_free)(struct batadv_neigh_node *neigh);
/* orig_node handling API */
void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq,
@@ -1224,8 +1256,6 @@ struct batadv_dat_candidate {
* struct batadv_tvlv_container - container for tvlv appended to OGMs
* @list: hlist node for batadv_priv_tvlv::container_list
* @tvlv_hdr: tvlv header information needed to construct the tvlv
- * @value_len: length of the buffer following this struct which contains
- * the actual tvlv payload
* @refcount: number of contexts the object is used
*/
struct batadv_tvlv_container {
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 9e9cca3689a0..8a4cc2f7f0db 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -307,6 +307,9 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
/* check that it's our buffer */
if (lowpan_is_ipv6(*skb_network_header(skb))) {
+ /* Pull off the 1-byte of 6lowpan header. */
+ skb_pull(skb, 1);
+
/* Copy the packet so that the IPv6 header is
* properly aligned.
*/
@@ -317,6 +320,7 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
local_skb->protocol = htons(ETH_P_IPV6);
local_skb->pkt_type = PACKET_HOST;
+ local_skb->dev = dev;
skb_set_transport_header(local_skb, sizeof(struct ipv6hdr));
@@ -335,6 +339,8 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
if (!local_skb)
goto drop;
+ local_skb->dev = dev;
+
ret = iphc_decompress(local_skb, dev, chan);
if (ret < 0) {
kfree_skb(local_skb);
@@ -343,7 +349,6 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
local_skb->protocol = htons(ETH_P_IPV6);
local_skb->pkt_type = PACKET_HOST;
- local_skb->dev = dev;
if (give_skb_to_upper(local_skb, dev)
!= NET_RX_SUCCESS) {
@@ -825,9 +830,7 @@ static int setup_netdev(struct l2cap_chan *chan, struct lowpan_dev **dev)
list_add_rcu(&(*dev)->list, &bt_6lowpan_devices);
spin_unlock(&devices_lock);
- lowpan_netdev_setup(netdev, LOWPAN_LLTYPE_BTLE);
-
- err = register_netdev(netdev);
+ err = lowpan_register_netdev(netdev, LOWPAN_LLTYPE_BTLE);
if (err < 0) {
BT_INFO("register_netdev failed %d", err);
spin_lock(&devices_lock);
@@ -890,7 +893,7 @@ static void delete_netdev(struct work_struct *work)
struct lowpan_dev *entry = container_of(work, struct lowpan_dev,
delete_netdev);
- unregister_netdev(entry->netdev);
+ lowpan_unregister_netdev(entry->netdev);
/* The entry pointer is deleted by the netdev destructor. */
}
@@ -1348,7 +1351,7 @@ static void disconnect_devices(void)
ifdown(entry->netdev);
BT_DBG("Unregistering netdev %s %p",
entry->netdev->name, entry->netdev);
- unregister_netdev(entry->netdev);
+ lowpan_unregister_netdev(entry->netdev);
kfree(entry);
}
}
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index a3bffd1ec2b4..955eda93e66f 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -33,8 +33,6 @@
#include "selftest.h"
-#define VERSION "2.21"
-
/* Bluetooth sockets */
#define BT_MAX_PROTO 8
static const struct net_proto_family *bt_proto[BT_MAX_PROTO];
@@ -176,20 +174,20 @@ EXPORT_SYMBOL(bt_accept_unlink);
struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock)
{
- struct list_head *p, *n;
+ struct bt_sock *s, *n;
struct sock *sk;
BT_DBG("parent %p", parent);
- list_for_each_safe(p, n, &bt_sk(parent)->accept_q) {
- sk = (struct sock *) list_entry(p, struct bt_sock, accept_q);
+ list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) {
+ sk = (struct sock *)s;
lock_sock(sk);
/* FIXME: Is this check still needed */
if (sk->sk_state == BT_CLOSED) {
- release_sock(sk);
bt_accept_unlink(sk);
+ release_sock(sk);
continue;
}
@@ -271,11 +269,11 @@ static long bt_sock_data_wait(struct sock *sk, long timeo)
if (signal_pending(current) || !timeo)
break;
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
release_sock(sk);
timeo = schedule_timeout(timeo);
lock_sock(sk);
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
}
__set_current_state(TASK_RUNNING);
@@ -390,11 +388,11 @@ EXPORT_SYMBOL(bt_sock_stream_recvmsg);
static inline unsigned int bt_accept_poll(struct sock *parent)
{
- struct list_head *p, *n;
+ struct bt_sock *s, *n;
struct sock *sk;
- list_for_each_safe(p, n, &bt_sk(parent)->accept_q) {
- sk = (struct sock *) list_entry(p, struct bt_sock, accept_q);
+ list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) {
+ sk = (struct sock *)s;
if (sk->sk_state == BT_CONNECTED ||
(test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags) &&
sk->sk_state == BT_CONNECT2))
@@ -441,7 +439,7 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock,
if (!test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags) && sock_writeable(sk))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
return mask;
}
@@ -671,7 +669,7 @@ static const struct file_operations bt_fops = {
};
int bt_procfs_init(struct net *net, const char *name,
- struct bt_sock_list* sk_list,
+ struct bt_sock_list *sk_list,
int (* seq_show)(struct seq_file *, void *))
{
sk_list->custom_seq_show = seq_show;
@@ -687,7 +685,7 @@ void bt_procfs_cleanup(struct net *net, const char *name)
}
#else
int bt_procfs_init(struct net *net, const char *name,
- struct bt_sock_list* sk_list,
+ struct bt_sock_list *sk_list,
int (* seq_show)(struct seq_file *, void *))
{
return 0;
@@ -715,7 +713,7 @@ static int __init bt_init(void)
sock_skb_cb_check_size(sizeof(struct bt_skb_cb));
- BT_INFO("Core ver %s", VERSION);
+ BT_INFO("Core ver %s", BT_SUBSYS_VERSION);
err = bt_selftest();
if (err < 0)
@@ -789,7 +787,7 @@ subsys_initcall(bt_init);
module_exit(bt_exit);
MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
-MODULE_DESCRIPTION("Bluetooth Core ver " VERSION);
-MODULE_VERSION(VERSION);
+MODULE_DESCRIPTION("Bluetooth Core ver " BT_SUBSYS_VERSION);
+MODULE_VERSION(BT_SUBSYS_VERSION);
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(PF_BLUETOOTH);
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 1641367e54ca..fbf251fef70f 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -608,8 +608,11 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock)
s->msg.msg_flags = MSG_NOSIGNAL;
#ifdef CONFIG_BT_BNEP_MC_FILTER
- /* Set default mc filter */
- set_bit(bnep_mc_hash(dev->broadcast), (ulong *) &s->mc_filter);
+ /* Set default mc filter to not filter out any mc addresses
+ * as defined in the BNEP specification (revision 0.95a)
+ * http://grouper.ieee.org/groups/802/15/Bluetooth/BNEP.pdf
+ */
+ s->mc_filter = ~0LL;
#endif
#ifdef CONFIG_BT_BNEP_PROTO_FILTER
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index 9a50338772f3..46ac686c8911 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -100,10 +100,8 @@ static void cmtp_application_del(struct cmtp_session *session, struct cmtp_appli
static struct cmtp_application *cmtp_application_get(struct cmtp_session *session, int pattern, __u16 value)
{
struct cmtp_application *app;
- struct list_head *p;
- list_for_each(p, &session->applications) {
- app = list_entry(p, struct cmtp_application, list);
+ list_for_each_entry(app, &session->applications, list) {
switch (pattern) {
case CMTP_MSGNUM:
if (app->msgnum == value)
@@ -511,14 +509,12 @@ static int cmtp_proc_show(struct seq_file *m, void *v)
struct capi_ctr *ctrl = m->private;
struct cmtp_session *session = ctrl->driverdata;
struct cmtp_application *app;
- struct list_head *p;
seq_printf(m, "%s\n\n", cmtp_procinfo(ctrl));
seq_printf(m, "addr %s\n", session->name);
seq_printf(m, "ctrl %d\n", session->num);
- list_for_each(p, &session->applications) {
- app = list_entry(p, struct cmtp_application, list);
+ list_for_each_entry(app, &session->applications, list) {
seq_printf(m, "appl %d -> %d\n", app->appl, app->mapping);
}
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 298ed37010e6..9e59b6654126 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -178,8 +178,7 @@ static inline int cmtp_recv_frame(struct cmtp_session *session, struct sk_buff *
cmtp_add_msgpart(session, id, skb->data + hdrlen, len);
break;
default:
- if (session->reassembly[id] != NULL)
- kfree_skb(session->reassembly[id]);
+ kfree_skb(session->reassembly[id]);
session->reassembly[id] = NULL;
break;
}
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 85b82f7adbd2..32575b49f4a0 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -178,6 +178,10 @@ static void hci_connect_le_scan_remove(struct hci_conn *conn)
hci_dev_hold(conn->hdev);
hci_conn_get(conn);
+ /* Even though we hold a reference to the hdev, many other
+ * things might get cleaned up meanwhile, including the hdev's
+ * own workqueue, so we can't use that for scheduling.
+ */
schedule_work(&conn->le_scan_cleanup);
}
@@ -664,8 +668,16 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
conn->state = BT_CLOSED;
- mgmt_connect_failed(hdev, &conn->dst, conn->type, conn->dst_type,
- status);
+ /* If the status indicates successful cancellation of
+ * the attempt (i.e. Unkown Connection Id) there's no point of
+ * notifying failure since we'll go back to keep trying to
+ * connect. The only exception is explicit connect requests
+ * where a timeout + cancel does indicate an actual failure.
+ */
+ if (status != HCI_ERROR_UNKNOWN_CONN_ID ||
+ (params && params->explicit_connect))
+ mgmt_connect_failed(hdev, &conn->dst, conn->type,
+ conn->dst_type, status);
hci_connect_cfm(conn, status);
@@ -679,7 +691,7 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
/* Re-enable advertising in case this was a failed connection
* attempt as a peripheral.
*/
- mgmt_reenable_advertising(hdev);
+ hci_req_reenable_advertising(hdev);
}
static void create_le_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode)
@@ -722,8 +734,12 @@ static void hci_req_add_le_create_conn(struct hci_request *req,
if (hci_update_random_address(req, false, &own_addr_type))
return;
+ /* Set window to be the same value as the interval to enable
+ * continuous scanning.
+ */
cp.scan_interval = cpu_to_le16(hdev->le_scan_interval);
- cp.scan_window = cpu_to_le16(hdev->le_scan_window);
+ cp.scan_window = cp.scan_interval;
+
bacpy(&cp.peer_addr, &conn->dst);
cp.peer_addr_type = conn->dst_type;
cp.own_address_type = own_addr_type;
@@ -781,7 +797,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
u8 role)
{
struct hci_conn_params *params;
- struct hci_conn *conn, *conn_unfinished;
+ struct hci_conn *conn;
struct smp_irk *irk;
struct hci_request req;
int err;
@@ -794,35 +810,22 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
return ERR_PTR(-EOPNOTSUPP);
}
- /* Some devices send ATT messages as soon as the physical link is
- * established. To be able to handle these ATT messages, the user-
- * space first establishes the connection and then starts the pairing
- * process.
- *
- * So if a hci_conn object already exists for the following connection
- * attempt, we simply update pending_sec_level and auth_type fields
- * and return the object found.
- */
- conn = hci_conn_hash_lookup_le(hdev, dst, dst_type);
- conn_unfinished = NULL;
- if (conn) {
- if (conn->state == BT_CONNECT &&
- test_bit(HCI_CONN_SCANNING, &conn->flags)) {
- BT_DBG("will continue unfinished conn %pMR", dst);
- conn_unfinished = conn;
- } else {
- if (conn->pending_sec_level < sec_level)
- conn->pending_sec_level = sec_level;
- goto done;
- }
- }
-
/* Since the controller supports only one LE connection attempt at a
* time, we return -EBUSY if there is any connection attempt running.
*/
if (hci_lookup_le_connect(hdev))
return ERR_PTR(-EBUSY);
+ /* If there's already a connection object but it's not in
+ * scanning state it means it must already be established, in
+ * which case we can't do anything else except report a failure
+ * to connect.
+ */
+ conn = hci_conn_hash_lookup_le(hdev, dst, dst_type);
+ if (conn && !test_bit(HCI_CONN_SCANNING, &conn->flags)) {
+ return ERR_PTR(-EBUSY);
+ }
+
/* When given an identity address with existing identity
* resolving key, the connection needs to be established
* to a resolvable random address.
@@ -838,23 +841,20 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
dst_type = ADDR_LE_DEV_RANDOM;
}
- if (conn_unfinished) {
- conn = conn_unfinished;
+ if (conn) {
bacpy(&conn->dst, dst);
} else {
conn = hci_conn_add(hdev, LE_LINK, dst, role);
+ if (!conn)
+ return ERR_PTR(-ENOMEM);
+ hci_conn_hold(conn);
+ conn->pending_sec_level = sec_level;
}
- if (!conn)
- return ERR_PTR(-ENOMEM);
-
conn->dst_type = dst_type;
conn->sec_level = BT_SECURITY_LOW;
conn->conn_timeout = conn_timeout;
- if (!conn_unfinished)
- conn->pending_sec_level = sec_level;
-
hci_req_init(&req, hdev);
/* Disable advertising if we're active. For master role
@@ -918,37 +918,9 @@ create_conn:
return ERR_PTR(err);
}
-done:
- /* If this is continuation of connect started by hci_connect_le_scan,
- * it already called hci_conn_hold and calling it again would mess the
- * counter.
- */
- if (!conn_unfinished)
- hci_conn_hold(conn);
-
return conn;
}
-static void hci_connect_le_scan_complete(struct hci_dev *hdev, u8 status,
- u16 opcode)
-{
- struct hci_conn *conn;
-
- if (!status)
- return;
-
- BT_ERR("Failed to add device to auto conn whitelist: status 0x%2.2x",
- status);
-
- hci_dev_lock(hdev);
-
- conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
- if (conn)
- hci_le_conn_failed(conn, status);
-
- hci_dev_unlock(hdev);
-}
-
static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type)
{
struct hci_conn *conn;
@@ -964,10 +936,9 @@ static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type)
}
/* This function requires the caller holds hdev->lock */
-static int hci_explicit_conn_params_set(struct hci_request *req,
+static int hci_explicit_conn_params_set(struct hci_dev *hdev,
bdaddr_t *addr, u8 addr_type)
{
- struct hci_dev *hdev = req->hdev;
struct hci_conn_params *params;
if (is_connected(hdev, addr, addr_type))
@@ -995,7 +966,6 @@ static int hci_explicit_conn_params_set(struct hci_request *req,
}
params->explicit_connect = true;
- __hci_update_background_scan(req);
BT_DBG("addr %pMR (type %u) auto_connect %u", addr, addr_type,
params->auto_connect);
@@ -1006,11 +976,9 @@ static int hci_explicit_conn_params_set(struct hci_request *req,
/* This function requires the caller holds hdev->lock */
struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
u8 dst_type, u8 sec_level,
- u16 conn_timeout, u8 role)
+ u16 conn_timeout)
{
struct hci_conn *conn;
- struct hci_request req;
- int err;
/* Let's make sure that le is enabled.*/
if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) {
@@ -1038,29 +1006,22 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
BT_DBG("requesting refresh of dst_addr");
- conn = hci_conn_add(hdev, LE_LINK, dst, role);
+ conn = hci_conn_add(hdev, LE_LINK, dst, HCI_ROLE_MASTER);
if (!conn)
return ERR_PTR(-ENOMEM);
- hci_req_init(&req, hdev);
-
- if (hci_explicit_conn_params_set(&req, dst, dst_type) < 0)
+ if (hci_explicit_conn_params_set(hdev, dst, dst_type) < 0)
return ERR_PTR(-EBUSY);
conn->state = BT_CONNECT;
set_bit(HCI_CONN_SCANNING, &conn->flags);
-
- err = hci_req_run(&req, hci_connect_le_scan_complete);
- if (err && err != -ENODATA) {
- hci_conn_del(conn);
- return ERR_PTR(err);
- }
-
conn->dst_type = dst_type;
conn->sec_level = BT_SECURITY_LOW;
conn->pending_sec_level = sec_level;
conn->conn_timeout = conn_timeout;
+ hci_update_background_scan(hdev);
+
done:
hci_conn_hold(conn);
return conn;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 83a6aacfab31..883c821a9e78 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -56,15 +56,6 @@ DEFINE_MUTEX(hci_cb_list_lock);
/* HCI ID Numbering */
static DEFINE_IDA(hci_index_ida);
-/* ----- HCI requests ----- */
-
-#define HCI_REQ_DONE 0
-#define HCI_REQ_PEND 1
-#define HCI_REQ_CANCELED 2
-
-#define hci_req_lock(d) mutex_lock(&d->req_lock)
-#define hci_req_unlock(d) mutex_unlock(&d->req_lock)
-
/* ---- HCI debugfs entries ---- */
static ssize_t dut_mode_read(struct file *file, char __user *user_buf,
@@ -73,7 +64,7 @@ static ssize_t dut_mode_read(struct file *file, char __user *user_buf,
struct hci_dev *hdev = file->private_data;
char buf[3];
- buf[0] = hci_dev_test_flag(hdev, HCI_DUT_MODE) ? 'Y': 'N';
+ buf[0] = hci_dev_test_flag(hdev, HCI_DUT_MODE) ? 'Y' : 'N';
buf[1] = '\n';
buf[2] = '\0';
return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
@@ -101,14 +92,14 @@ static ssize_t dut_mode_write(struct file *file, const char __user *user_buf,
if (enable == hci_dev_test_flag(hdev, HCI_DUT_MODE))
return -EALREADY;
- hci_req_lock(hdev);
+ hci_req_sync_lock(hdev);
if (enable)
skb = __hci_cmd_sync(hdev, HCI_OP_ENABLE_DUT_MODE, 0, NULL,
HCI_CMD_TIMEOUT);
else
skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL,
HCI_CMD_TIMEOUT);
- hci_req_unlock(hdev);
+ hci_req_sync_unlock(hdev);
if (IS_ERR(skb))
return PTR_ERR(skb);
@@ -133,7 +124,7 @@ static ssize_t vendor_diag_read(struct file *file, char __user *user_buf,
struct hci_dev *hdev = file->private_data;
char buf[3];
- buf[0] = hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) ? 'Y': 'N';
+ buf[0] = hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) ? 'Y' : 'N';
buf[1] = '\n';
buf[2] = '\0';
return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
@@ -165,9 +156,9 @@ static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf,
!test_bit(HCI_RUNNING, &hdev->flags))
goto done;
- hci_req_lock(hdev);
+ hci_req_sync_lock(hdev);
err = hdev->set_diag(hdev, enable);
- hci_req_unlock(hdev);
+ hci_req_sync_unlock(hdev);
if (err < 0)
return err;
@@ -198,197 +189,14 @@ static void hci_debugfs_create_basic(struct hci_dev *hdev)
&vendor_diag_fops);
}
-/* ---- HCI requests ---- */
-
-static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode,
- struct sk_buff *skb)
-{
- BT_DBG("%s result 0x%2.2x", hdev->name, result);
-
- if (hdev->req_status == HCI_REQ_PEND) {
- hdev->req_result = result;
- hdev->req_status = HCI_REQ_DONE;
- if (skb)
- hdev->req_skb = skb_get(skb);
- wake_up_interruptible(&hdev->req_wait_q);
- }
-}
-
-static void hci_req_cancel(struct hci_dev *hdev, int err)
-{
- BT_DBG("%s err 0x%2.2x", hdev->name, err);
-
- if (hdev->req_status == HCI_REQ_PEND) {
- hdev->req_result = err;
- hdev->req_status = HCI_REQ_CANCELED;
- wake_up_interruptible(&hdev->req_wait_q);
- }
-}
-
-struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen,
- const void *param, u8 event, u32 timeout)
-{
- DECLARE_WAITQUEUE(wait, current);
- struct hci_request req;
- struct sk_buff *skb;
- int err = 0;
-
- BT_DBG("%s", hdev->name);
-
- hci_req_init(&req, hdev);
-
- hci_req_add_ev(&req, opcode, plen, param, event);
-
- hdev->req_status = HCI_REQ_PEND;
-
- add_wait_queue(&hdev->req_wait_q, &wait);
- set_current_state(TASK_INTERRUPTIBLE);
-
- err = hci_req_run_skb(&req, hci_req_sync_complete);
- if (err < 0) {
- remove_wait_queue(&hdev->req_wait_q, &wait);
- set_current_state(TASK_RUNNING);
- return ERR_PTR(err);
- }
-
- schedule_timeout(timeout);
-
- remove_wait_queue(&hdev->req_wait_q, &wait);
-
- if (signal_pending(current))
- return ERR_PTR(-EINTR);
-
- switch (hdev->req_status) {
- case HCI_REQ_DONE:
- err = -bt_to_errno(hdev->req_result);
- break;
-
- case HCI_REQ_CANCELED:
- err = -hdev->req_result;
- break;
-
- default:
- err = -ETIMEDOUT;
- break;
- }
-
- hdev->req_status = hdev->req_result = 0;
- skb = hdev->req_skb;
- hdev->req_skb = NULL;
-
- BT_DBG("%s end: err %d", hdev->name, err);
-
- if (err < 0) {
- kfree_skb(skb);
- return ERR_PTR(err);
- }
-
- if (!skb)
- return ERR_PTR(-ENODATA);
-
- return skb;
-}
-EXPORT_SYMBOL(__hci_cmd_sync_ev);
-
-struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
- const void *param, u32 timeout)
-{
- return __hci_cmd_sync_ev(hdev, opcode, plen, param, 0, timeout);
-}
-EXPORT_SYMBOL(__hci_cmd_sync);
-
-/* Execute request and wait for completion. */
-static int __hci_req_sync(struct hci_dev *hdev,
- void (*func)(struct hci_request *req,
- unsigned long opt),
- unsigned long opt, __u32 timeout)
-{
- struct hci_request req;
- DECLARE_WAITQUEUE(wait, current);
- int err = 0;
-
- BT_DBG("%s start", hdev->name);
-
- hci_req_init(&req, hdev);
-
- hdev->req_status = HCI_REQ_PEND;
-
- func(&req, opt);
-
- add_wait_queue(&hdev->req_wait_q, &wait);
- set_current_state(TASK_INTERRUPTIBLE);
-
- err = hci_req_run_skb(&req, hci_req_sync_complete);
- if (err < 0) {
- hdev->req_status = 0;
-
- remove_wait_queue(&hdev->req_wait_q, &wait);
- set_current_state(TASK_RUNNING);
-
- /* ENODATA means the HCI request command queue is empty.
- * This can happen when a request with conditionals doesn't
- * trigger any commands to be sent. This is normal behavior
- * and should not trigger an error return.
- */
- if (err == -ENODATA)
- return 0;
-
- return err;
- }
-
- schedule_timeout(timeout);
-
- remove_wait_queue(&hdev->req_wait_q, &wait);
-
- if (signal_pending(current))
- return -EINTR;
-
- switch (hdev->req_status) {
- case HCI_REQ_DONE:
- err = -bt_to_errno(hdev->req_result);
- break;
-
- case HCI_REQ_CANCELED:
- err = -hdev->req_result;
- break;
-
- default:
- err = -ETIMEDOUT;
- break;
- }
-
- hdev->req_status = hdev->req_result = 0;
-
- BT_DBG("%s end: err %d", hdev->name, err);
-
- return err;
-}
-
-static int hci_req_sync(struct hci_dev *hdev,
- void (*req)(struct hci_request *req,
- unsigned long opt),
- unsigned long opt, __u32 timeout)
-{
- int ret;
-
- if (!test_bit(HCI_UP, &hdev->flags))
- return -ENETDOWN;
-
- /* Serialize all requests */
- hci_req_lock(hdev);
- ret = __hci_req_sync(hdev, req, opt, timeout);
- hci_req_unlock(hdev);
-
- return ret;
-}
-
-static void hci_reset_req(struct hci_request *req, unsigned long opt)
+static int hci_reset_req(struct hci_request *req, unsigned long opt)
{
BT_DBG("%s %ld", req->hdev->name, opt);
/* Reset device */
set_bit(HCI_RESET, &req->hdev->flags);
hci_req_add(req, HCI_OP_RESET, 0, NULL);
+ return 0;
}
static void bredr_init(struct hci_request *req)
@@ -428,7 +236,7 @@ static void amp_init1(struct hci_request *req)
hci_req_add(req, HCI_OP_READ_LOCATION_DATA, 0, NULL);
}
-static void amp_init2(struct hci_request *req)
+static int amp_init2(struct hci_request *req)
{
/* Read Local Supported Features. Not all AMP controllers
* support this so it's placed conditionally in the second
@@ -436,9 +244,11 @@ static void amp_init2(struct hci_request *req)
*/
if (req->hdev->commands[14] & 0x20)
hci_req_add(req, HCI_OP_READ_LOCAL_FEATURES, 0, NULL);
+
+ return 0;
}
-static void hci_init1_req(struct hci_request *req, unsigned long opt)
+static int hci_init1_req(struct hci_request *req, unsigned long opt)
{
struct hci_dev *hdev = req->hdev;
@@ -461,6 +271,8 @@ static void hci_init1_req(struct hci_request *req, unsigned long opt)
BT_ERR("Unknown device type %d", hdev->dev_type);
break;
}
+
+ return 0;
}
static void bredr_setup(struct hci_request *req)
@@ -508,12 +320,6 @@ static void le_setup(struct hci_request *req)
/* Read LE Supported States */
hci_req_add(req, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL);
- /* Read LE White List Size */
- hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL);
-
- /* Clear LE White List */
- hci_req_add(req, HCI_OP_LE_CLEAR_WHITE_LIST, 0, NULL);
-
/* LE-only controllers have LE implicitly enabled */
if (!lmp_bredr_capable(hdev))
hci_dev_set_flag(hdev, HCI_LE_ENABLED);
@@ -537,20 +343,30 @@ static void hci_setup_event_mask(struct hci_request *req)
if (lmp_bredr_capable(hdev)) {
events[4] |= 0x01; /* Flow Specification Complete */
- events[4] |= 0x02; /* Inquiry Result with RSSI */
- events[4] |= 0x04; /* Read Remote Extended Features Complete */
- events[5] |= 0x08; /* Synchronous Connection Complete */
- events[5] |= 0x10; /* Synchronous Connection Changed */
} else {
/* Use a different default for LE-only devices */
memset(events, 0, sizeof(events));
- events[0] |= 0x10; /* Disconnection Complete */
- events[1] |= 0x08; /* Read Remote Version Information Complete */
events[1] |= 0x20; /* Command Complete */
events[1] |= 0x40; /* Command Status */
events[1] |= 0x80; /* Hardware Error */
- events[2] |= 0x04; /* Number of Completed Packets */
- events[3] |= 0x02; /* Data Buffer Overflow */
+
+ /* If the controller supports the Disconnect command, enable
+ * the corresponding event. In addition enable packet flow
+ * control related events.
+ */
+ if (hdev->commands[0] & 0x20) {
+ events[0] |= 0x10; /* Disconnection Complete */
+ events[2] |= 0x04; /* Number of Completed Packets */
+ events[3] |= 0x02; /* Data Buffer Overflow */
+ }
+
+ /* If the controller supports the Read Remote Version
+ * Information command, enable the corresponding event.
+ */
+ if (hdev->commands[2] & 0x80)
+ events[1] |= 0x08; /* Read Remote Version Information
+ * Complete
+ */
if (hdev->le_features[0] & HCI_LE_ENCRYPTION) {
events[0] |= 0x80; /* Encryption Change */
@@ -558,9 +374,18 @@ static void hci_setup_event_mask(struct hci_request *req)
}
}
- if (lmp_inq_rssi_capable(hdev))
+ if (lmp_inq_rssi_capable(hdev) ||
+ test_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks))
events[4] |= 0x02; /* Inquiry Result with RSSI */
+ if (lmp_ext_feat_capable(hdev))
+ events[4] |= 0x04; /* Read Remote Extended Features Complete */
+
+ if (lmp_esco_capable(hdev)) {
+ events[5] |= 0x08; /* Synchronous Connection Complete */
+ events[5] |= 0x10; /* Synchronous Connection Changed */
+ }
+
if (lmp_sniffsubr_capable(hdev))
events[5] |= 0x20; /* Sniff Subrating */
@@ -596,7 +421,7 @@ static void hci_setup_event_mask(struct hci_request *req)
hci_req_add(req, HCI_OP_SET_EVENT_MASK, sizeof(events), events);
}
-static void hci_init2_req(struct hci_request *req, unsigned long opt)
+static int hci_init2_req(struct hci_request *req, unsigned long opt)
{
struct hci_dev *hdev = req->hdev;
@@ -676,6 +501,8 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt)
hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable),
&enable);
}
+
+ return 0;
}
static void hci_setup_link_policy(struct hci_request *req)
@@ -750,7 +577,7 @@ static void hci_set_event_mask_page_2(struct hci_request *req)
hci_req_add(req, HCI_OP_SET_EVENT_MASK_PAGE_2, sizeof(events), events);
}
-static void hci_init3_req(struct hci_request *req, unsigned long opt)
+static int hci_init3_req(struct hci_request *req, unsigned long opt)
{
struct hci_dev *hdev = req->hdev;
u8 p;
@@ -783,7 +610,6 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
u8 events[8];
memset(events, 0, sizeof(events));
- events[0] = 0x0f;
if (hdev->le_features[0] & HCI_LE_ENCRYPTION)
events[0] |= 0x10; /* LE Long Term Key Request */
@@ -810,6 +636,34 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
* Report
*/
+ /* If the controller supports the LE Set Scan Enable command,
+ * enable the corresponding advertising report event.
+ */
+ if (hdev->commands[26] & 0x08)
+ events[0] |= 0x02; /* LE Advertising Report */
+
+ /* If the controller supports the LE Create Connection
+ * command, enable the corresponding event.
+ */
+ if (hdev->commands[26] & 0x10)
+ events[0] |= 0x01; /* LE Connection Complete */
+
+ /* If the controller supports the LE Connection Update
+ * command, enable the corresponding event.
+ */
+ if (hdev->commands[27] & 0x04)
+ events[0] |= 0x04; /* LE Connection Update
+ * Complete
+ */
+
+ /* If the controller supports the LE Read Remote Used Features
+ * command, enable the corresponding event.
+ */
+ if (hdev->commands[27] & 0x20)
+ events[0] |= 0x08; /* LE Read Remote Used
+ * Features Complete
+ */
+
/* If the controller supports the LE Read Local P-256
* Public Key command, enable the corresponding event.
*/
@@ -832,6 +686,17 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL);
}
+ if (hdev->commands[26] & 0x40) {
+ /* Read LE White List Size */
+ hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE,
+ 0, NULL);
+ }
+
+ if (hdev->commands[26] & 0x80) {
+ /* Clear LE White List */
+ hci_req_add(req, HCI_OP_LE_CLEAR_WHITE_LIST, 0, NULL);
+ }
+
if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) {
/* Read LE Maximum Data Length */
hci_req_add(req, HCI_OP_LE_READ_MAX_DATA_LEN, 0, NULL);
@@ -851,9 +716,11 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
hci_req_add(req, HCI_OP_READ_LOCAL_EXT_FEATURES,
sizeof(cp), &cp);
}
+
+ return 0;
}
-static void hci_init4_req(struct hci_request *req, unsigned long opt)
+static int hci_init4_req(struct hci_request *req, unsigned long opt)
{
struct hci_dev *hdev = req->hdev;
@@ -904,20 +771,22 @@ static void hci_init4_req(struct hci_request *req, unsigned long opt)
hci_req_add(req, HCI_OP_WRITE_SC_SUPPORT,
sizeof(support), &support);
}
+
+ return 0;
}
static int __hci_init(struct hci_dev *hdev)
{
int err;
- err = __hci_req_sync(hdev, hci_init1_req, 0, HCI_INIT_TIMEOUT);
+ err = __hci_req_sync(hdev, hci_init1_req, 0, HCI_INIT_TIMEOUT, NULL);
if (err < 0)
return err;
if (hci_dev_test_flag(hdev, HCI_SETUP))
hci_debugfs_create_basic(hdev);
- err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT);
+ err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT, NULL);
if (err < 0)
return err;
@@ -928,11 +797,11 @@ static int __hci_init(struct hci_dev *hdev)
if (hdev->dev_type != HCI_BREDR)
return 0;
- err = __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT);
+ err = __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT, NULL);
if (err < 0)
return err;
- err = __hci_req_sync(hdev, hci_init4_req, 0, HCI_INIT_TIMEOUT);
+ err = __hci_req_sync(hdev, hci_init4_req, 0, HCI_INIT_TIMEOUT, NULL);
if (err < 0)
return err;
@@ -963,7 +832,7 @@ static int __hci_init(struct hci_dev *hdev)
return 0;
}
-static void hci_init0_req(struct hci_request *req, unsigned long opt)
+static int hci_init0_req(struct hci_request *req, unsigned long opt)
{
struct hci_dev *hdev = req->hdev;
@@ -979,6 +848,8 @@ static void hci_init0_req(struct hci_request *req, unsigned long opt)
/* Read BD Address */
if (hdev->set_bdaddr)
hci_req_add(req, HCI_OP_READ_BD_ADDR, 0, NULL);
+
+ return 0;
}
static int __hci_unconf_init(struct hci_dev *hdev)
@@ -988,7 +859,7 @@ static int __hci_unconf_init(struct hci_dev *hdev)
if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
return 0;
- err = __hci_req_sync(hdev, hci_init0_req, 0, HCI_INIT_TIMEOUT);
+ err = __hci_req_sync(hdev, hci_init0_req, 0, HCI_INIT_TIMEOUT, NULL);
if (err < 0)
return err;
@@ -998,7 +869,7 @@ static int __hci_unconf_init(struct hci_dev *hdev)
return 0;
}
-static void hci_scan_req(struct hci_request *req, unsigned long opt)
+static int hci_scan_req(struct hci_request *req, unsigned long opt)
{
__u8 scan = opt;
@@ -1006,9 +877,10 @@ static void hci_scan_req(struct hci_request *req, unsigned long opt)
/* Inquiry and Page scans */
hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+ return 0;
}
-static void hci_auth_req(struct hci_request *req, unsigned long opt)
+static int hci_auth_req(struct hci_request *req, unsigned long opt)
{
__u8 auth = opt;
@@ -1016,9 +888,10 @@ static void hci_auth_req(struct hci_request *req, unsigned long opt)
/* Authentication */
hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth);
+ return 0;
}
-static void hci_encrypt_req(struct hci_request *req, unsigned long opt)
+static int hci_encrypt_req(struct hci_request *req, unsigned long opt)
{
__u8 encrypt = opt;
@@ -1026,9 +899,10 @@ static void hci_encrypt_req(struct hci_request *req, unsigned long opt)
/* Encryption */
hci_req_add(req, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt);
+ return 0;
}
-static void hci_linkpol_req(struct hci_request *req, unsigned long opt)
+static int hci_linkpol_req(struct hci_request *req, unsigned long opt)
{
__le16 policy = cpu_to_le16(opt);
@@ -1036,6 +910,7 @@ static void hci_linkpol_req(struct hci_request *req, unsigned long opt)
/* Default link policy */
hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy);
+ return 0;
}
/* Get HCI device by index.
@@ -1280,7 +1155,7 @@ static int inquiry_cache_dump(struct hci_dev *hdev, int num, __u8 *buf)
return copied;
}
-static void hci_inq_req(struct hci_request *req, unsigned long opt)
+static int hci_inq_req(struct hci_request *req, unsigned long opt)
{
struct hci_inquiry_req *ir = (struct hci_inquiry_req *) opt;
struct hci_dev *hdev = req->hdev;
@@ -1289,13 +1164,15 @@ static void hci_inq_req(struct hci_request *req, unsigned long opt)
BT_DBG("%s", hdev->name);
if (test_bit(HCI_INQUIRY, &hdev->flags))
- return;
+ return 0;
/* Start Inquiry */
memcpy(&cp.lap, &ir->lap, 3);
cp.length = ir->length;
cp.num_rsp = ir->num_rsp;
hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp);
+
+ return 0;
}
int hci_inquiry(void __user *arg)
@@ -1346,7 +1223,7 @@ int hci_inquiry(void __user *arg)
if (do_inquiry) {
err = hci_req_sync(hdev, hci_inq_req, (unsigned long) &ir,
- timeo);
+ timeo, NULL);
if (err < 0)
goto done;
@@ -1399,7 +1276,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
BT_DBG("%s %p", hdev->name, hdev);
- hci_req_lock(hdev);
+ hci_req_sync_lock(hdev);
if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
ret = -ENODEV;
@@ -1522,10 +1399,10 @@ static int hci_dev_do_open(struct hci_dev *hdev)
!hci_dev_test_flag(hdev, HCI_CONFIG) &&
!hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
+ hci_dev_test_flag(hdev, HCI_MGMT) &&
hdev->dev_type == HCI_BREDR) {
- hci_dev_lock(hdev);
- mgmt_powered(hdev, 1);
- hci_dev_unlock(hdev);
+ ret = __hci_req_hci_power_on(hdev);
+ mgmt_power_on(hdev, ret);
}
} else {
/* Init failed, cleanup */
@@ -1552,7 +1429,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
}
done:
- hci_req_unlock(hdev);
+ hci_req_sync_unlock(hdev);
return ret;
}
@@ -1646,12 +1523,12 @@ int hci_dev_do_close(struct hci_dev *hdev)
cancel_delayed_work(&hdev->power_off);
- hci_req_cancel(hdev, ENODEV);
- hci_req_lock(hdev);
+ hci_request_cancel_all(hdev);
+ hci_req_sync_lock(hdev);
if (!test_and_clear_bit(HCI_UP, &hdev->flags)) {
cancel_delayed_work_sync(&hdev->cmd_timer);
- hci_req_unlock(hdev);
+ hci_req_sync_unlock(hdev);
return 0;
}
@@ -1660,7 +1537,6 @@ int hci_dev_do_close(struct hci_dev *hdev)
flush_work(&hdev->rx_work);
if (hdev->discov_timeout > 0) {
- cancel_delayed_work(&hdev->discov_off);
hdev->discov_timeout = 0;
hci_dev_clear_flag(hdev, HCI_DISCOVERABLE);
hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
@@ -1669,17 +1545,9 @@ int hci_dev_do_close(struct hci_dev *hdev)
if (hci_dev_test_and_clear_flag(hdev, HCI_SERVICE_CACHE))
cancel_delayed_work(&hdev->service_cache);
- cancel_delayed_work_sync(&hdev->le_scan_disable);
- cancel_delayed_work_sync(&hdev->le_scan_restart);
-
if (hci_dev_test_flag(hdev, HCI_MGMT))
cancel_delayed_work_sync(&hdev->rpa_expired);
- if (hdev->adv_instance_timeout) {
- cancel_delayed_work_sync(&hdev->adv_instance_expire);
- hdev->adv_instance_timeout = 0;
- }
-
/* Avoid potential lockdep warnings from the *_flush() calls by
* ensuring the workqueue is empty up front.
*/
@@ -1691,8 +1559,9 @@ int hci_dev_do_close(struct hci_dev *hdev)
auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF);
- if (!auto_off && hdev->dev_type == HCI_BREDR)
- mgmt_powered(hdev, 0);
+ if (!auto_off && hdev->dev_type == HCI_BREDR &&
+ hci_dev_test_flag(hdev, HCI_MGMT))
+ __mgmt_power_off(hdev);
hci_inquiry_cache_flush(hdev);
hci_pend_le_actions_clear(hdev);
@@ -1712,7 +1581,7 @@ int hci_dev_do_close(struct hci_dev *hdev)
if (test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks) &&
!auto_off && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
set_bit(HCI_INIT, &hdev->flags);
- __hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT);
+ __hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT, NULL);
clear_bit(HCI_INIT, &hdev->flags);
}
@@ -1749,7 +1618,7 @@ int hci_dev_do_close(struct hci_dev *hdev)
memset(hdev->dev_class, 0, sizeof(hdev->dev_class));
bacpy(&hdev->random_addr, BDADDR_ANY);
- hci_req_unlock(hdev);
+ hci_req_sync_unlock(hdev);
hci_dev_put(hdev);
return 0;
@@ -1785,7 +1654,7 @@ static int hci_dev_do_reset(struct hci_dev *hdev)
BT_DBG("%s %p", hdev->name, hdev);
- hci_req_lock(hdev);
+ hci_req_sync_lock(hdev);
/* Drop queues */
skb_queue_purge(&hdev->rx_q);
@@ -1807,9 +1676,9 @@ static int hci_dev_do_reset(struct hci_dev *hdev)
atomic_set(&hdev->cmd_cnt, 1);
hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0;
- ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT);
+ ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT, NULL);
- hci_req_unlock(hdev);
+ hci_req_sync_unlock(hdev);
return ret;
}
@@ -1900,7 +1769,7 @@ static void hci_update_scan_state(struct hci_dev *hdev, u8 scan)
hci_dev_set_flag(hdev, HCI_BREDR_ENABLED);
if (hci_dev_test_flag(hdev, HCI_LE_ENABLED))
- mgmt_update_adv_data(hdev);
+ hci_req_update_adv_data(hdev, hdev->cur_adv_instance);
mgmt_new_settings(hdev);
}
@@ -1942,7 +1811,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
switch (cmd) {
case HCISETAUTH:
err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt,
- HCI_INIT_TIMEOUT);
+ HCI_INIT_TIMEOUT, NULL);
break;
case HCISETENCRYPT:
@@ -1954,18 +1823,18 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
if (!test_bit(HCI_AUTH, &hdev->flags)) {
/* Auth must be enabled first */
err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt,
- HCI_INIT_TIMEOUT);
+ HCI_INIT_TIMEOUT, NULL);
if (err)
break;
}
err = hci_req_sync(hdev, hci_encrypt_req, dr.dev_opt,
- HCI_INIT_TIMEOUT);
+ HCI_INIT_TIMEOUT, NULL);
break;
case HCISETSCAN:
err = hci_req_sync(hdev, hci_scan_req, dr.dev_opt,
- HCI_INIT_TIMEOUT);
+ HCI_INIT_TIMEOUT, NULL);
/* Ensure that the connectable and discoverable states
* get correctly modified as this was a non-mgmt change.
@@ -1976,7 +1845,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
case HCISETLINKPOL:
err = hci_req_sync(hdev, hci_linkpol_req, dr.dev_opt,
- HCI_INIT_TIMEOUT);
+ HCI_INIT_TIMEOUT, NULL);
break;
case HCISETLINKMODE:
@@ -2145,6 +2014,16 @@ static void hci_power_on(struct work_struct *work)
BT_DBG("%s", hdev->name);
+ if (test_bit(HCI_UP, &hdev->flags) &&
+ hci_dev_test_flag(hdev, HCI_MGMT) &&
+ hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) {
+ hci_req_sync_lock(hdev);
+ err = __hci_req_hci_power_on(hdev);
+ hci_req_sync_unlock(hdev);
+ mgmt_power_on(hdev, err);
+ return;
+ }
+
err = hci_dev_do_open(hdev);
if (err < 0) {
hci_dev_lock(hdev);
@@ -2227,28 +2106,6 @@ static void hci_error_reset(struct work_struct *work)
hci_dev_do_open(hdev);
}
-static void hci_discov_off(struct work_struct *work)
-{
- struct hci_dev *hdev;
-
- hdev = container_of(work, struct hci_dev, discov_off.work);
-
- BT_DBG("%s", hdev->name);
-
- mgmt_discoverable_timeout(hdev);
-}
-
-static void hci_adv_timeout_expire(struct work_struct *work)
-{
- struct hci_dev *hdev;
-
- hdev = container_of(work, struct hci_dev, adv_instance_expire.work);
-
- BT_DBG("%s", hdev->name);
-
- mgmt_adv_timeout_expired(hdev);
-}
-
void hci_uuids_clear(struct hci_dev *hdev)
{
struct bt_uuid *uuid, *tmp;
@@ -2726,7 +2583,8 @@ struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance)
}
/* This function requires the caller holds hdev->lock */
-struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance) {
+struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance)
+{
struct adv_info *cur_instance;
cur_instance = hci_find_adv_instance(hdev, instance);
@@ -2752,9 +2610,12 @@ int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance)
BT_DBG("%s removing %dMR", hdev->name, instance);
- if (hdev->cur_adv_instance == instance && hdev->adv_instance_timeout) {
- cancel_delayed_work(&hdev->adv_instance_expire);
- hdev->adv_instance_timeout = 0;
+ if (hdev->cur_adv_instance == instance) {
+ if (hdev->adv_instance_timeout) {
+ cancel_delayed_work(&hdev->adv_instance_expire);
+ hdev->adv_instance_timeout = 0;
+ }
+ hdev->cur_adv_instance = 0x00;
}
list_del(&adv_instance->list);
@@ -2781,6 +2642,7 @@ void hci_adv_instances_clear(struct hci_dev *hdev)
}
hdev->adv_instance_cnt = 0;
+ hdev->cur_adv_instance = 0x00;
}
/* This function requires the caller holds hdev->lock */
@@ -2851,12 +2713,10 @@ struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *bdaddr_list,
void hci_bdaddr_list_clear(struct list_head *bdaddr_list)
{
- struct list_head *p, *n;
-
- list_for_each_safe(p, n, bdaddr_list) {
- struct bdaddr_list *b = list_entry(p, struct bdaddr_list, list);
+ struct bdaddr_list *b, *n;
- list_del(p);
+ list_for_each_entry_safe(b, n, bdaddr_list, list) {
+ list_del(&b->list);
kfree(b);
}
}
@@ -3019,181 +2879,16 @@ void hci_conn_params_clear_disabled(struct hci_dev *hdev)
}
/* This function requires the caller holds hdev->lock */
-void hci_conn_params_clear_all(struct hci_dev *hdev)
+static void hci_conn_params_clear_all(struct hci_dev *hdev)
{
struct hci_conn_params *params, *tmp;
list_for_each_entry_safe(params, tmp, &hdev->le_conn_params, list)
hci_conn_params_free(params);
- hci_update_background_scan(hdev);
-
BT_DBG("All LE connection parameters were removed");
}
-static void inquiry_complete(struct hci_dev *hdev, u8 status, u16 opcode)
-{
- if (status) {
- BT_ERR("Failed to start inquiry: status %d", status);
-
- hci_dev_lock(hdev);
- hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
- hci_dev_unlock(hdev);
- return;
- }
-}
-
-static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status,
- u16 opcode)
-{
- /* General inquiry access code (GIAC) */
- u8 lap[3] = { 0x33, 0x8b, 0x9e };
- struct hci_cp_inquiry cp;
- int err;
-
- if (status) {
- BT_ERR("Failed to disable LE scanning: status %d", status);
- return;
- }
-
- hdev->discovery.scan_start = 0;
-
- switch (hdev->discovery.type) {
- case DISCOV_TYPE_LE:
- hci_dev_lock(hdev);
- hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
- hci_dev_unlock(hdev);
- break;
-
- case DISCOV_TYPE_INTERLEAVED:
- hci_dev_lock(hdev);
-
- if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY,
- &hdev->quirks)) {
- /* If we were running LE only scan, change discovery
- * state. If we were running both LE and BR/EDR inquiry
- * simultaneously, and BR/EDR inquiry is already
- * finished, stop discovery, otherwise BR/EDR inquiry
- * will stop discovery when finished. If we will resolve
- * remote device name, do not change discovery state.
- */
- if (!test_bit(HCI_INQUIRY, &hdev->flags) &&
- hdev->discovery.state != DISCOVERY_RESOLVING)
- hci_discovery_set_state(hdev,
- DISCOVERY_STOPPED);
- } else {
- struct hci_request req;
-
- hci_inquiry_cache_flush(hdev);
-
- hci_req_init(&req, hdev);
-
- memset(&cp, 0, sizeof(cp));
- memcpy(&cp.lap, lap, sizeof(cp.lap));
- cp.length = DISCOV_INTERLEAVED_INQUIRY_LEN;
- hci_req_add(&req, HCI_OP_INQUIRY, sizeof(cp), &cp);
-
- err = hci_req_run(&req, inquiry_complete);
- if (err) {
- BT_ERR("Inquiry request failed: err %d", err);
- hci_discovery_set_state(hdev,
- DISCOVERY_STOPPED);
- }
- }
-
- hci_dev_unlock(hdev);
- break;
- }
-}
-
-static void le_scan_disable_work(struct work_struct *work)
-{
- struct hci_dev *hdev = container_of(work, struct hci_dev,
- le_scan_disable.work);
- struct hci_request req;
- int err;
-
- BT_DBG("%s", hdev->name);
-
- cancel_delayed_work_sync(&hdev->le_scan_restart);
-
- hci_req_init(&req, hdev);
-
- hci_req_add_le_scan_disable(&req);
-
- err = hci_req_run(&req, le_scan_disable_work_complete);
- if (err)
- BT_ERR("Disable LE scanning request failed: err %d", err);
-}
-
-static void le_scan_restart_work_complete(struct hci_dev *hdev, u8 status,
- u16 opcode)
-{
- unsigned long timeout, duration, scan_start, now;
-
- BT_DBG("%s", hdev->name);
-
- if (status) {
- BT_ERR("Failed to restart LE scan: status %d", status);
- return;
- }
-
- if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
- !hdev->discovery.scan_start)
- return;
-
- /* When the scan was started, hdev->le_scan_disable has been queued
- * after duration from scan_start. During scan restart this job
- * has been canceled, and we need to queue it again after proper
- * timeout, to make sure that scan does not run indefinitely.
- */
- duration = hdev->discovery.scan_duration;
- scan_start = hdev->discovery.scan_start;
- now = jiffies;
- if (now - scan_start <= duration) {
- int elapsed;
-
- if (now >= scan_start)
- elapsed = now - scan_start;
- else
- elapsed = ULONG_MAX - scan_start + now;
-
- timeout = duration - elapsed;
- } else {
- timeout = 0;
- }
- queue_delayed_work(hdev->workqueue,
- &hdev->le_scan_disable, timeout);
-}
-
-static void le_scan_restart_work(struct work_struct *work)
-{
- struct hci_dev *hdev = container_of(work, struct hci_dev,
- le_scan_restart.work);
- struct hci_request req;
- struct hci_cp_le_set_scan_enable cp;
- int err;
-
- BT_DBG("%s", hdev->name);
-
- /* If controller is not scanning we are done. */
- if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
- return;
-
- hci_req_init(&req, hdev);
-
- hci_req_add_le_scan_disable(&req);
-
- memset(&cp, 0, sizeof(cp));
- cp.enable = LE_SCAN_ENABLE;
- cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
- hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
-
- err = hci_req_run(&req, le_scan_restart_work_complete);
- if (err)
- BT_ERR("Restart LE scan request failed: err %d", err);
-}
-
/* Copy the Identity Address of the controller.
*
* If the controller has a public BD_ADDR, then by default use that one.
@@ -3292,10 +2987,6 @@ struct hci_dev *hci_alloc_dev(void)
INIT_WORK(&hdev->error_reset, hci_error_reset);
INIT_DELAYED_WORK(&hdev->power_off, hci_power_off);
- INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off);
- INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work);
- INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work);
- INIT_DELAYED_WORK(&hdev->adv_instance_expire, hci_adv_timeout_expire);
skb_queue_head_init(&hdev->rx_q);
skb_queue_head_init(&hdev->cmd_q);
@@ -3305,6 +2996,8 @@ struct hci_dev *hci_alloc_dev(void)
INIT_DELAYED_WORK(&hdev->cmd_timer, hci_cmd_timeout);
+ hci_request_setup(hdev);
+
hci_init_sysfs(hdev);
discovery_init(hdev);
@@ -3515,7 +3208,7 @@ int hci_reset_dev(struct hci_dev *hdev)
if (!skb)
return -ENOMEM;
- bt_cb(skb)->pkt_type = HCI_EVENT_PKT;
+ hci_skb_pkt_type(skb) = HCI_EVENT_PKT;
memcpy(skb_put(skb, 3), hw_err, 3);
/* Send Hardware Error to upper stack */
@@ -3532,9 +3225,9 @@ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb)
return -ENXIO;
}
- if (bt_cb(skb)->pkt_type != HCI_EVENT_PKT &&
- bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT &&
- bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) {
+ if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT &&
+ hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT &&
+ hci_skb_pkt_type(skb) != HCI_SCODATA_PKT) {
kfree_skb(skb);
return -EINVAL;
}
@@ -3556,7 +3249,7 @@ EXPORT_SYMBOL(hci_recv_frame);
int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb)
{
/* Mark as diagnostic packet */
- bt_cb(skb)->pkt_type = HCI_DIAG_PKT;
+ hci_skb_pkt_type(skb) = HCI_DIAG_PKT;
/* Time stamp */
__net_timestamp(skb);
@@ -3598,7 +3291,8 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
{
int err;
- BT_DBG("%s type %d len %d", hdev->name, bt_cb(skb)->pkt_type, skb->len);
+ BT_DBG("%s type %d len %d", hdev->name, hci_skb_pkt_type(skb),
+ skb->len);
/* Time stamp */
__net_timestamp(skb);
@@ -3643,7 +3337,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
/* Stand-alone HCI commands must be flagged as
* single-command requests.
*/
- bt_cb(skb)->hci.req_start = true;
+ bt_cb(skb)->hci.req_flags |= HCI_REQ_START;
skb_queue_tail(&hdev->cmd_q, skb);
queue_work(hdev->workqueue, &hdev->cmd_work);
@@ -3680,9 +3374,9 @@ struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
bt_dev_dbg(hdev, "opcode 0x%4.4x plen %d", opcode, plen);
- hci_req_lock(hdev);
+ hci_req_sync_lock(hdev);
skb = __hci_cmd_sync(hdev, opcode, plen, param, timeout);
- hci_req_unlock(hdev);
+ hci_req_sync_unlock(hdev);
return skb;
}
@@ -3711,7 +3405,7 @@ static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue,
skb->len = skb_headlen(skb);
skb->data_len = 0;
- bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT;
+ hci_skb_pkt_type(skb) = HCI_ACLDATA_PKT;
switch (hdev->dev_type) {
case HCI_BREDR:
@@ -3751,7 +3445,7 @@ static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue,
do {
skb = list; list = list->next;
- bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT;
+ hci_skb_pkt_type(skb) = HCI_ACLDATA_PKT;
hci_add_acl_hdr(skb, conn->handle, flags);
BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len);
@@ -3789,7 +3483,7 @@ void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb)
skb_reset_transport_header(skb);
memcpy(skb_transport_header(skb), &hdr, HCI_SCO_HDR_SIZE);
- bt_cb(skb)->pkt_type = HCI_SCODATA_PKT;
+ hci_skb_pkt_type(skb) = HCI_SCODATA_PKT;
skb_queue_tail(&conn->data_q, skb);
queue_work(hdev->workqueue, &hdev->tx_work);
@@ -4340,7 +4034,7 @@ static bool hci_req_is_complete(struct hci_dev *hdev)
if (!skb)
return true;
- return bt_cb(skb)->hci.req_start;
+ return (bt_cb(skb)->hci.req_flags & HCI_REQ_START);
}
static void hci_resend_last(struct hci_dev *hdev)
@@ -4400,26 +4094,28 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status,
* callback would be found in hdev->sent_cmd instead of the
* command queue (hdev->cmd_q).
*/
- if (bt_cb(hdev->sent_cmd)->hci.req_complete) {
- *req_complete = bt_cb(hdev->sent_cmd)->hci.req_complete;
+ if (bt_cb(hdev->sent_cmd)->hci.req_flags & HCI_REQ_SKB) {
+ *req_complete_skb = bt_cb(hdev->sent_cmd)->hci.req_complete_skb;
return;
}
- if (bt_cb(hdev->sent_cmd)->hci.req_complete_skb) {
- *req_complete_skb = bt_cb(hdev->sent_cmd)->hci.req_complete_skb;
+ if (bt_cb(hdev->sent_cmd)->hci.req_complete) {
+ *req_complete = bt_cb(hdev->sent_cmd)->hci.req_complete;
return;
}
/* Remove all pending commands belonging to this request */
spin_lock_irqsave(&hdev->cmd_q.lock, flags);
while ((skb = __skb_dequeue(&hdev->cmd_q))) {
- if (bt_cb(skb)->hci.req_start) {
+ if (bt_cb(skb)->hci.req_flags & HCI_REQ_START) {
__skb_queue_head(&hdev->cmd_q, skb);
break;
}
- *req_complete = bt_cb(skb)->hci.req_complete;
- *req_complete_skb = bt_cb(skb)->hci.req_complete_skb;
+ if (bt_cb(skb)->hci.req_flags & HCI_REQ_SKB)
+ *req_complete_skb = bt_cb(skb)->hci.req_complete_skb;
+ else
+ *req_complete = bt_cb(skb)->hci.req_complete;
kfree_skb(skb);
}
spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
@@ -4448,7 +4144,7 @@ static void hci_rx_work(struct work_struct *work)
if (test_bit(HCI_INIT, &hdev->flags)) {
/* Don't process data packets in this states. */
- switch (bt_cb(skb)->pkt_type) {
+ switch (hci_skb_pkt_type(skb)) {
case HCI_ACLDATA_PKT:
case HCI_SCODATA_PKT:
kfree_skb(skb);
@@ -4457,7 +4153,7 @@ static void hci_rx_work(struct work_struct *work)
}
/* Process frame */
- switch (bt_cb(skb)->pkt_type) {
+ switch (hci_skb_pkt_type(skb)) {
case HCI_EVENT_PKT:
BT_DBG("%s Event packet", hdev->name);
hci_event_packet(hdev, skb);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index d57c11c1c6b5..c162af5d16bf 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1183,7 +1183,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
else if (!hci_dev_test_flag(hdev, HCI_LE_ADV) &&
hdev->discovery.state == DISCOVERY_FINDING)
- mgmt_reenable_advertising(hdev);
+ hci_req_reenable_advertising(hdev);
break;
@@ -2176,7 +2176,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_send_cmd(hdev, HCI_OP_READ_REMOTE_FEATURES,
sizeof(cp), &cp);
- hci_update_page_scan(hdev);
+ hci_req_update_scan(hdev);
}
/* Set packet type for incoming connection */
@@ -2362,7 +2362,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
if (test_bit(HCI_CONN_FLUSH_KEY, &conn->flags))
hci_remove_link_key(hdev, &conn->dst);
- hci_update_page_scan(hdev);
+ hci_req_update_scan(hdev);
}
params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type);
@@ -2401,7 +2401,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
* is timed out due to Directed Advertising."
*/
if (type == LE_LINK)
- mgmt_reenable_advertising(hdev);
+ hci_req_reenable_advertising(hdev);
unlock:
hci_dev_unlock(hdev);
@@ -3833,9 +3833,9 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev,
data.ssp_mode = 0x01;
if (hci_dev_test_flag(hdev, HCI_MGMT))
- name_known = eir_has_data_type(info->data,
- sizeof(info->data),
- EIR_NAME_COMPLETE);
+ name_known = eir_get_data(info->data,
+ sizeof(info->data),
+ EIR_NAME_COMPLETE, NULL);
else
name_known = true;
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 981f8a202c27..c78ee2dc9323 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -21,12 +21,19 @@
SOFTWARE IS DISCLAIMED.
*/
+#include <asm/unaligned.h>
+
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/mgmt.h>
#include "smp.h"
#include "hci_request.h"
+#define HCI_REQ_DONE 0
+#define HCI_REQ_PEND 1
+#define HCI_REQ_CANCELED 2
+
void hci_req_init(struct hci_request *req, struct hci_dev *hdev)
{
skb_queue_head_init(&req->cmd_q);
@@ -56,8 +63,12 @@ static int req_run(struct hci_request *req, hci_req_complete_t complete,
return -ENODATA;
skb = skb_peek_tail(&req->cmd_q);
- bt_cb(skb)->hci.req_complete = complete;
- bt_cb(skb)->hci.req_complete_skb = complete_skb;
+ if (complete) {
+ bt_cb(skb)->hci.req_complete = complete;
+ } else if (complete_skb) {
+ bt_cb(skb)->hci.req_complete_skb = complete_skb;
+ bt_cb(skb)->hci.req_flags |= HCI_REQ_SKB;
+ }
spin_lock_irqsave(&hdev->cmd_q.lock, flags);
skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q);
@@ -78,6 +89,203 @@ int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete)
return req_run(req, NULL, complete);
}
+static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode,
+ struct sk_buff *skb)
+{
+ BT_DBG("%s result 0x%2.2x", hdev->name, result);
+
+ if (hdev->req_status == HCI_REQ_PEND) {
+ hdev->req_result = result;
+ hdev->req_status = HCI_REQ_DONE;
+ if (skb)
+ hdev->req_skb = skb_get(skb);
+ wake_up_interruptible(&hdev->req_wait_q);
+ }
+}
+
+void hci_req_sync_cancel(struct hci_dev *hdev, int err)
+{
+ BT_DBG("%s err 0x%2.2x", hdev->name, err);
+
+ if (hdev->req_status == HCI_REQ_PEND) {
+ hdev->req_result = err;
+ hdev->req_status = HCI_REQ_CANCELED;
+ wake_up_interruptible(&hdev->req_wait_q);
+ }
+}
+
+struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen,
+ const void *param, u8 event, u32 timeout)
+{
+ DECLARE_WAITQUEUE(wait, current);
+ struct hci_request req;
+ struct sk_buff *skb;
+ int err = 0;
+
+ BT_DBG("%s", hdev->name);
+
+ hci_req_init(&req, hdev);
+
+ hci_req_add_ev(&req, opcode, plen, param, event);
+
+ hdev->req_status = HCI_REQ_PEND;
+
+ add_wait_queue(&hdev->req_wait_q, &wait);
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ err = hci_req_run_skb(&req, hci_req_sync_complete);
+ if (err < 0) {
+ remove_wait_queue(&hdev->req_wait_q, &wait);
+ set_current_state(TASK_RUNNING);
+ return ERR_PTR(err);
+ }
+
+ schedule_timeout(timeout);
+
+ remove_wait_queue(&hdev->req_wait_q, &wait);
+
+ if (signal_pending(current))
+ return ERR_PTR(-EINTR);
+
+ switch (hdev->req_status) {
+ case HCI_REQ_DONE:
+ err = -bt_to_errno(hdev->req_result);
+ break;
+
+ case HCI_REQ_CANCELED:
+ err = -hdev->req_result;
+ break;
+
+ default:
+ err = -ETIMEDOUT;
+ break;
+ }
+
+ hdev->req_status = hdev->req_result = 0;
+ skb = hdev->req_skb;
+ hdev->req_skb = NULL;
+
+ BT_DBG("%s end: err %d", hdev->name, err);
+
+ if (err < 0) {
+ kfree_skb(skb);
+ return ERR_PTR(err);
+ }
+
+ if (!skb)
+ return ERR_PTR(-ENODATA);
+
+ return skb;
+}
+EXPORT_SYMBOL(__hci_cmd_sync_ev);
+
+struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
+ const void *param, u32 timeout)
+{
+ return __hci_cmd_sync_ev(hdev, opcode, plen, param, 0, timeout);
+}
+EXPORT_SYMBOL(__hci_cmd_sync);
+
+/* Execute request and wait for completion. */
+int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req,
+ unsigned long opt),
+ unsigned long opt, u32 timeout, u8 *hci_status)
+{
+ struct hci_request req;
+ DECLARE_WAITQUEUE(wait, current);
+ int err = 0;
+
+ BT_DBG("%s start", hdev->name);
+
+ hci_req_init(&req, hdev);
+
+ hdev->req_status = HCI_REQ_PEND;
+
+ err = func(&req, opt);
+ if (err) {
+ if (hci_status)
+ *hci_status = HCI_ERROR_UNSPECIFIED;
+ return err;
+ }
+
+ add_wait_queue(&hdev->req_wait_q, &wait);
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ err = hci_req_run_skb(&req, hci_req_sync_complete);
+ if (err < 0) {
+ hdev->req_status = 0;
+
+ remove_wait_queue(&hdev->req_wait_q, &wait);
+ set_current_state(TASK_RUNNING);
+
+ /* ENODATA means the HCI request command queue is empty.
+ * This can happen when a request with conditionals doesn't
+ * trigger any commands to be sent. This is normal behavior
+ * and should not trigger an error return.
+ */
+ if (err == -ENODATA) {
+ if (hci_status)
+ *hci_status = 0;
+ return 0;
+ }
+
+ if (hci_status)
+ *hci_status = HCI_ERROR_UNSPECIFIED;
+
+ return err;
+ }
+
+ schedule_timeout(timeout);
+
+ remove_wait_queue(&hdev->req_wait_q, &wait);
+
+ if (signal_pending(current))
+ return -EINTR;
+
+ switch (hdev->req_status) {
+ case HCI_REQ_DONE:
+ err = -bt_to_errno(hdev->req_result);
+ if (hci_status)
+ *hci_status = hdev->req_result;
+ break;
+
+ case HCI_REQ_CANCELED:
+ err = -hdev->req_result;
+ if (hci_status)
+ *hci_status = HCI_ERROR_UNSPECIFIED;
+ break;
+
+ default:
+ err = -ETIMEDOUT;
+ if (hci_status)
+ *hci_status = HCI_ERROR_UNSPECIFIED;
+ break;
+ }
+
+ hdev->req_status = hdev->req_result = 0;
+
+ BT_DBG("%s end: err %d", hdev->name, err);
+
+ return err;
+}
+
+int hci_req_sync(struct hci_dev *hdev, int (*req)(struct hci_request *req,
+ unsigned long opt),
+ unsigned long opt, u32 timeout, u8 *hci_status)
+{
+ int ret;
+
+ if (!test_bit(HCI_UP, &hdev->flags))
+ return -ENETDOWN;
+
+ /* Serialize all requests */
+ hci_req_sync_lock(hdev);
+ ret = __hci_req_sync(hdev, req, opt, timeout, hci_status);
+ hci_req_sync_unlock(hdev);
+
+ return ret;
+}
+
struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen,
const void *param)
{
@@ -98,8 +306,8 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen,
BT_DBG("skb len %d", skb->len);
- bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
- bt_cb(skb)->hci.opcode = opcode;
+ hci_skb_pkt_type(skb) = HCI_COMMAND_PKT;
+ hci_skb_opcode(skb) = opcode;
return skb;
}
@@ -128,7 +336,7 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
}
if (skb_queue_empty(&req->cmd_q))
- bt_cb(skb)->hci.req_start = true;
+ bt_cb(skb)->hci.req_flags |= HCI_REQ_START;
bt_cb(skb)->hci.req_event = event;
@@ -141,6 +349,311 @@ void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
hci_req_add_ev(req, opcode, plen, param, 0);
}
+void __hci_req_write_fast_connectable(struct hci_request *req, bool enable)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_cp_write_page_scan_activity acp;
+ u8 type;
+
+ if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
+ return;
+
+ if (hdev->hci_ver < BLUETOOTH_VER_1_2)
+ return;
+
+ if (enable) {
+ type = PAGE_SCAN_TYPE_INTERLACED;
+
+ /* 160 msec page scan interval */
+ acp.interval = cpu_to_le16(0x0100);
+ } else {
+ type = PAGE_SCAN_TYPE_STANDARD; /* default */
+
+ /* default 1.28 sec page scan */
+ acp.interval = cpu_to_le16(0x0800);
+ }
+
+ acp.window = cpu_to_le16(0x0012);
+
+ if (__cpu_to_le16(hdev->page_scan_interval) != acp.interval ||
+ __cpu_to_le16(hdev->page_scan_window) != acp.window)
+ hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY,
+ sizeof(acp), &acp);
+
+ if (hdev->page_scan_type != type)
+ hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type);
+}
+
+/* This function controls the background scanning based on hdev->pend_le_conns
+ * list. If there are pending LE connection we start the background scanning,
+ * otherwise we stop it.
+ *
+ * This function requires the caller holds hdev->lock.
+ */
+static void __hci_update_background_scan(struct hci_request *req)
+{
+ struct hci_dev *hdev = req->hdev;
+
+ if (!test_bit(HCI_UP, &hdev->flags) ||
+ test_bit(HCI_INIT, &hdev->flags) ||
+ hci_dev_test_flag(hdev, HCI_SETUP) ||
+ hci_dev_test_flag(hdev, HCI_CONFIG) ||
+ hci_dev_test_flag(hdev, HCI_AUTO_OFF) ||
+ hci_dev_test_flag(hdev, HCI_UNREGISTER))
+ return;
+
+ /* No point in doing scanning if LE support hasn't been enabled */
+ if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
+ return;
+
+ /* If discovery is active don't interfere with it */
+ if (hdev->discovery.state != DISCOVERY_STOPPED)
+ return;
+
+ /* Reset RSSI and UUID filters when starting background scanning
+ * since these filters are meant for service discovery only.
+ *
+ * The Start Discovery and Start Service Discovery operations
+ * ensure to set proper values for RSSI threshold and UUID
+ * filter list. So it is safe to just reset them here.
+ */
+ hci_discovery_filter_clear(hdev);
+
+ if (list_empty(&hdev->pend_le_conns) &&
+ list_empty(&hdev->pend_le_reports)) {
+ /* If there is no pending LE connections or devices
+ * to be scanned for, we should stop the background
+ * scanning.
+ */
+
+ /* If controller is not scanning we are done. */
+ if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
+ return;
+
+ hci_req_add_le_scan_disable(req);
+
+ BT_DBG("%s stopping background scanning", hdev->name);
+ } else {
+ /* If there is at least one pending LE connection, we should
+ * keep the background scan running.
+ */
+
+ /* If controller is connecting, we should not start scanning
+ * since some controllers are not able to scan and connect at
+ * the same time.
+ */
+ if (hci_lookup_le_connect(hdev))
+ return;
+
+ /* If controller is currently scanning, we stop it to ensure we
+ * don't miss any advertising (due to duplicates filter).
+ */
+ if (hci_dev_test_flag(hdev, HCI_LE_SCAN))
+ hci_req_add_le_scan_disable(req);
+
+ hci_req_add_le_passive_scan(req);
+
+ BT_DBG("%s starting background scanning", hdev->name);
+ }
+}
+
+void __hci_req_update_name(struct hci_request *req)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_cp_write_local_name cp;
+
+ memcpy(cp.name, hdev->dev_name, sizeof(cp.name));
+
+ hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp);
+}
+
+#define PNP_INFO_SVCLASS_ID 0x1200
+
+static u8 *create_uuid16_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len)
+{
+ u8 *ptr = data, *uuids_start = NULL;
+ struct bt_uuid *uuid;
+
+ if (len < 4)
+ return ptr;
+
+ list_for_each_entry(uuid, &hdev->uuids, list) {
+ u16 uuid16;
+
+ if (uuid->size != 16)
+ continue;
+
+ uuid16 = get_unaligned_le16(&uuid->uuid[12]);
+ if (uuid16 < 0x1100)
+ continue;
+
+ if (uuid16 == PNP_INFO_SVCLASS_ID)
+ continue;
+
+ if (!uuids_start) {
+ uuids_start = ptr;
+ uuids_start[0] = 1;
+ uuids_start[1] = EIR_UUID16_ALL;
+ ptr += 2;
+ }
+
+ /* Stop if not enough space to put next UUID */
+ if ((ptr - data) + sizeof(u16) > len) {
+ uuids_start[1] = EIR_UUID16_SOME;
+ break;
+ }
+
+ *ptr++ = (uuid16 & 0x00ff);
+ *ptr++ = (uuid16 & 0xff00) >> 8;
+ uuids_start[0] += sizeof(uuid16);
+ }
+
+ return ptr;
+}
+
+static u8 *create_uuid32_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len)
+{
+ u8 *ptr = data, *uuids_start = NULL;
+ struct bt_uuid *uuid;
+
+ if (len < 6)
+ return ptr;
+
+ list_for_each_entry(uuid, &hdev->uuids, list) {
+ if (uuid->size != 32)
+ continue;
+
+ if (!uuids_start) {
+ uuids_start = ptr;
+ uuids_start[0] = 1;
+ uuids_start[1] = EIR_UUID32_ALL;
+ ptr += 2;
+ }
+
+ /* Stop if not enough space to put next UUID */
+ if ((ptr - data) + sizeof(u32) > len) {
+ uuids_start[1] = EIR_UUID32_SOME;
+ break;
+ }
+
+ memcpy(ptr, &uuid->uuid[12], sizeof(u32));
+ ptr += sizeof(u32);
+ uuids_start[0] += sizeof(u32);
+ }
+
+ return ptr;
+}
+
+static u8 *create_uuid128_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len)
+{
+ u8 *ptr = data, *uuids_start = NULL;
+ struct bt_uuid *uuid;
+
+ if (len < 18)
+ return ptr;
+
+ list_for_each_entry(uuid, &hdev->uuids, list) {
+ if (uuid->size != 128)
+ continue;
+
+ if (!uuids_start) {
+ uuids_start = ptr;
+ uuids_start[0] = 1;
+ uuids_start[1] = EIR_UUID128_ALL;
+ ptr += 2;
+ }
+
+ /* Stop if not enough space to put next UUID */
+ if ((ptr - data) + 16 > len) {
+ uuids_start[1] = EIR_UUID128_SOME;
+ break;
+ }
+
+ memcpy(ptr, uuid->uuid, 16);
+ ptr += 16;
+ uuids_start[0] += 16;
+ }
+
+ return ptr;
+}
+
+static void create_eir(struct hci_dev *hdev, u8 *data)
+{
+ u8 *ptr = data;
+ size_t name_len;
+
+ name_len = strlen(hdev->dev_name);
+
+ if (name_len > 0) {
+ /* EIR Data type */
+ if (name_len > 48) {
+ name_len = 48;
+ ptr[1] = EIR_NAME_SHORT;
+ } else
+ ptr[1] = EIR_NAME_COMPLETE;
+
+ /* EIR Data length */
+ ptr[0] = name_len + 1;
+
+ memcpy(ptr + 2, hdev->dev_name, name_len);
+
+ ptr += (name_len + 2);
+ }
+
+ if (hdev->inq_tx_power != HCI_TX_POWER_INVALID) {
+ ptr[0] = 2;
+ ptr[1] = EIR_TX_POWER;
+ ptr[2] = (u8) hdev->inq_tx_power;
+
+ ptr += 3;
+ }
+
+ if (hdev->devid_source > 0) {
+ ptr[0] = 9;
+ ptr[1] = EIR_DEVICE_ID;
+
+ put_unaligned_le16(hdev->devid_source, ptr + 2);
+ put_unaligned_le16(hdev->devid_vendor, ptr + 4);
+ put_unaligned_le16(hdev->devid_product, ptr + 6);
+ put_unaligned_le16(hdev->devid_version, ptr + 8);
+
+ ptr += 10;
+ }
+
+ ptr = create_uuid16_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data));
+ ptr = create_uuid32_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data));
+ ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data));
+}
+
+void __hci_req_update_eir(struct hci_request *req)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_cp_write_eir cp;
+
+ if (!hdev_is_powered(hdev))
+ return;
+
+ if (!lmp_ext_inq_capable(hdev))
+ return;
+
+ if (!hci_dev_test_flag(hdev, HCI_SSP_ENABLED))
+ return;
+
+ if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE))
+ return;
+
+ memset(&cp, 0, sizeof(cp));
+
+ create_eir(hdev, cp.data);
+
+ if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0)
+ return;
+
+ memcpy(hdev->eir, cp.data, sizeof(cp.data));
+
+ hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
+}
+
void hci_req_add_le_scan_disable(struct hci_request *req)
{
struct hci_cp_le_set_scan_enable cp;
@@ -175,21 +688,29 @@ static u8 update_white_list(struct hci_request *req)
* command to remove it from the controller.
*/
list_for_each_entry(b, &hdev->le_white_list, list) {
- struct hci_cp_le_del_from_white_list cp;
+ /* If the device is neither in pend_le_conns nor
+ * pend_le_reports then remove it from the whitelist.
+ */
+ if (!hci_pend_le_action_lookup(&hdev->pend_le_conns,
+ &b->bdaddr, b->bdaddr_type) &&
+ !hci_pend_le_action_lookup(&hdev->pend_le_reports,
+ &b->bdaddr, b->bdaddr_type)) {
+ struct hci_cp_le_del_from_white_list cp;
- if (hci_pend_le_action_lookup(&hdev->pend_le_conns,
- &b->bdaddr, b->bdaddr_type) ||
- hci_pend_le_action_lookup(&hdev->pend_le_reports,
- &b->bdaddr, b->bdaddr_type)) {
- white_list_entries++;
+ cp.bdaddr_type = b->bdaddr_type;
+ bacpy(&cp.bdaddr, &b->bdaddr);
+
+ hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST,
+ sizeof(cp), &cp);
continue;
}
- cp.bdaddr_type = b->bdaddr_type;
- bacpy(&cp.bdaddr, &b->bdaddr);
+ if (hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) {
+ /* White list can not be used with RPAs */
+ return 0x00;
+ }
- hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST,
- sizeof(cp), &cp);
+ white_list_entries++;
}
/* Since all no longer valid white list entries have been
@@ -302,6 +823,483 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
&enable_cp);
}
+static u8 get_cur_adv_instance_scan_rsp_len(struct hci_dev *hdev)
+{
+ u8 instance = hdev->cur_adv_instance;
+ struct adv_info *adv_instance;
+
+ /* Ignore instance 0 */
+ if (instance == 0x00)
+ return 0;
+
+ adv_instance = hci_find_adv_instance(hdev, instance);
+ if (!adv_instance)
+ return 0;
+
+ /* TODO: Take into account the "appearance" and "local-name" flags here.
+ * These are currently being ignored as they are not supported.
+ */
+ return adv_instance->scan_rsp_len;
+}
+
+void __hci_req_disable_advertising(struct hci_request *req)
+{
+ u8 enable = 0x00;
+
+ hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
+}
+
+static u32 get_adv_instance_flags(struct hci_dev *hdev, u8 instance)
+{
+ u32 flags;
+ struct adv_info *adv_instance;
+
+ if (instance == 0x00) {
+ /* Instance 0 always manages the "Tx Power" and "Flags"
+ * fields
+ */
+ flags = MGMT_ADV_FLAG_TX_POWER | MGMT_ADV_FLAG_MANAGED_FLAGS;
+
+ /* For instance 0, the HCI_ADVERTISING_CONNECTABLE setting
+ * corresponds to the "connectable" instance flag.
+ */
+ if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE))
+ flags |= MGMT_ADV_FLAG_CONNECTABLE;
+
+ return flags;
+ }
+
+ adv_instance = hci_find_adv_instance(hdev, instance);
+
+ /* Return 0 when we got an invalid instance identifier. */
+ if (!adv_instance)
+ return 0;
+
+ return adv_instance->flags;
+}
+
+void __hci_req_enable_advertising(struct hci_request *req)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_cp_le_set_adv_param cp;
+ u8 own_addr_type, enable = 0x01;
+ bool connectable;
+ u32 flags;
+
+ if (hci_conn_num(hdev, LE_LINK) > 0)
+ return;
+
+ if (hci_dev_test_flag(hdev, HCI_LE_ADV))
+ __hci_req_disable_advertising(req);
+
+ /* Clear the HCI_LE_ADV bit temporarily so that the
+ * hci_update_random_address knows that it's safe to go ahead
+ * and write a new random address. The flag will be set back on
+ * as soon as the SET_ADV_ENABLE HCI command completes.
+ */
+ hci_dev_clear_flag(hdev, HCI_LE_ADV);
+
+ flags = get_adv_instance_flags(hdev, hdev->cur_adv_instance);
+
+ /* If the "connectable" instance flag was not set, then choose between
+ * ADV_IND and ADV_NONCONN_IND based on the global connectable setting.
+ */
+ connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) ||
+ mgmt_get_connectable(hdev);
+
+ /* Set require_privacy to true only when non-connectable
+ * advertising is used. In that case it is fine to use a
+ * non-resolvable private address.
+ */
+ if (hci_update_random_address(req, !connectable, &own_addr_type) < 0)
+ return;
+
+ memset(&cp, 0, sizeof(cp));
+ cp.min_interval = cpu_to_le16(hdev->le_adv_min_interval);
+ cp.max_interval = cpu_to_le16(hdev->le_adv_max_interval);
+
+ if (connectable)
+ cp.type = LE_ADV_IND;
+ else if (get_cur_adv_instance_scan_rsp_len(hdev))
+ cp.type = LE_ADV_SCAN_IND;
+ else
+ cp.type = LE_ADV_NONCONN_IND;
+
+ cp.own_address_type = own_addr_type;
+ cp.channel_map = hdev->le_adv_channel_map;
+
+ hci_req_add(req, HCI_OP_LE_SET_ADV_PARAM, sizeof(cp), &cp);
+
+ hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
+}
+
+static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr)
+{
+ u8 ad_len = 0;
+ size_t name_len;
+
+ name_len = strlen(hdev->dev_name);
+ if (name_len > 0) {
+ size_t max_len = HCI_MAX_AD_LENGTH - ad_len - 2;
+
+ if (name_len > max_len) {
+ name_len = max_len;
+ ptr[1] = EIR_NAME_SHORT;
+ } else
+ ptr[1] = EIR_NAME_COMPLETE;
+
+ ptr[0] = name_len + 1;
+
+ memcpy(ptr + 2, hdev->dev_name, name_len);
+
+ ad_len += (name_len + 2);
+ ptr += (name_len + 2);
+ }
+
+ return ad_len;
+}
+
+static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance,
+ u8 *ptr)
+{
+ struct adv_info *adv_instance;
+
+ adv_instance = hci_find_adv_instance(hdev, instance);
+ if (!adv_instance)
+ return 0;
+
+ /* TODO: Set the appropriate entries based on advertising instance flags
+ * here once flags other than 0 are supported.
+ */
+ memcpy(ptr, adv_instance->scan_rsp_data,
+ adv_instance->scan_rsp_len);
+
+ return adv_instance->scan_rsp_len;
+}
+
+void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_cp_le_set_scan_rsp_data cp;
+ u8 len;
+
+ if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
+ return;
+
+ memset(&cp, 0, sizeof(cp));
+
+ if (instance)
+ len = create_instance_scan_rsp_data(hdev, instance, cp.data);
+ else
+ len = create_default_scan_rsp_data(hdev, cp.data);
+
+ if (hdev->scan_rsp_data_len == len &&
+ !memcmp(cp.data, hdev->scan_rsp_data, len))
+ return;
+
+ memcpy(hdev->scan_rsp_data, cp.data, sizeof(cp.data));
+ hdev->scan_rsp_data_len = len;
+
+ cp.length = len;
+
+ hci_req_add(req, HCI_OP_LE_SET_SCAN_RSP_DATA, sizeof(cp), &cp);
+}
+
+static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
+{
+ struct adv_info *adv_instance = NULL;
+ u8 ad_len = 0, flags = 0;
+ u32 instance_flags;
+
+ /* Return 0 when the current instance identifier is invalid. */
+ if (instance) {
+ adv_instance = hci_find_adv_instance(hdev, instance);
+ if (!adv_instance)
+ return 0;
+ }
+
+ instance_flags = get_adv_instance_flags(hdev, instance);
+
+ /* The Add Advertising command allows userspace to set both the general
+ * and limited discoverable flags.
+ */
+ if (instance_flags & MGMT_ADV_FLAG_DISCOV)
+ flags |= LE_AD_GENERAL;
+
+ if (instance_flags & MGMT_ADV_FLAG_LIMITED_DISCOV)
+ flags |= LE_AD_LIMITED;
+
+ if (flags || (instance_flags & MGMT_ADV_FLAG_MANAGED_FLAGS)) {
+ /* If a discovery flag wasn't provided, simply use the global
+ * settings.
+ */
+ if (!flags)
+ flags |= mgmt_get_adv_discov_flags(hdev);
+
+ if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
+ flags |= LE_AD_NO_BREDR;
+
+ /* If flags would still be empty, then there is no need to
+ * include the "Flags" AD field".
+ */
+ if (flags) {
+ ptr[0] = 0x02;
+ ptr[1] = EIR_FLAGS;
+ ptr[2] = flags;
+
+ ad_len += 3;
+ ptr += 3;
+ }
+ }
+
+ if (adv_instance) {
+ memcpy(ptr, adv_instance->adv_data,
+ adv_instance->adv_data_len);
+ ad_len += adv_instance->adv_data_len;
+ ptr += adv_instance->adv_data_len;
+ }
+
+ /* Provide Tx Power only if we can provide a valid value for it */
+ if (hdev->adv_tx_power != HCI_TX_POWER_INVALID &&
+ (instance_flags & MGMT_ADV_FLAG_TX_POWER)) {
+ ptr[0] = 0x02;
+ ptr[1] = EIR_TX_POWER;
+ ptr[2] = (u8)hdev->adv_tx_power;
+
+ ad_len += 3;
+ ptr += 3;
+ }
+
+ return ad_len;
+}
+
+void __hci_req_update_adv_data(struct hci_request *req, u8 instance)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_cp_le_set_adv_data cp;
+ u8 len;
+
+ if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
+ return;
+
+ memset(&cp, 0, sizeof(cp));
+
+ len = create_instance_adv_data(hdev, instance, cp.data);
+
+ /* There's nothing to do if the data hasn't changed */
+ if (hdev->adv_data_len == len &&
+ memcmp(cp.data, hdev->adv_data, len) == 0)
+ return;
+
+ memcpy(hdev->adv_data, cp.data, sizeof(cp.data));
+ hdev->adv_data_len = len;
+
+ cp.length = len;
+
+ hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp);
+}
+
+int hci_req_update_adv_data(struct hci_dev *hdev, u8 instance)
+{
+ struct hci_request req;
+
+ hci_req_init(&req, hdev);
+ __hci_req_update_adv_data(&req, instance);
+
+ return hci_req_run(&req, NULL);
+}
+
+static void adv_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode)
+{
+ BT_DBG("%s status %u", hdev->name, status);
+}
+
+void hci_req_reenable_advertising(struct hci_dev *hdev)
+{
+ struct hci_request req;
+
+ if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) &&
+ list_empty(&hdev->adv_instances))
+ return;
+
+ hci_req_init(&req, hdev);
+
+ if (hdev->cur_adv_instance) {
+ __hci_req_schedule_adv_instance(&req, hdev->cur_adv_instance,
+ true);
+ } else {
+ __hci_req_update_adv_data(&req, 0x00);
+ __hci_req_update_scan_rsp_data(&req, 0x00);
+ __hci_req_enable_advertising(&req);
+ }
+
+ hci_req_run(&req, adv_enable_complete);
+}
+
+static void adv_timeout_expire(struct work_struct *work)
+{
+ struct hci_dev *hdev = container_of(work, struct hci_dev,
+ adv_instance_expire.work);
+
+ struct hci_request req;
+ u8 instance;
+
+ BT_DBG("%s", hdev->name);
+
+ hci_dev_lock(hdev);
+
+ hdev->adv_instance_timeout = 0;
+
+ instance = hdev->cur_adv_instance;
+ if (instance == 0x00)
+ goto unlock;
+
+ hci_req_init(&req, hdev);
+
+ hci_req_clear_adv_instance(hdev, &req, instance, false);
+
+ if (list_empty(&hdev->adv_instances))
+ __hci_req_disable_advertising(&req);
+
+ hci_req_run(&req, NULL);
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
+int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance,
+ bool force)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct adv_info *adv_instance = NULL;
+ u16 timeout;
+
+ if (hci_dev_test_flag(hdev, HCI_ADVERTISING) ||
+ list_empty(&hdev->adv_instances))
+ return -EPERM;
+
+ if (hdev->adv_instance_timeout)
+ return -EBUSY;
+
+ adv_instance = hci_find_adv_instance(hdev, instance);
+ if (!adv_instance)
+ return -ENOENT;
+
+ /* A zero timeout means unlimited advertising. As long as there is
+ * only one instance, duration should be ignored. We still set a timeout
+ * in case further instances are being added later on.
+ *
+ * If the remaining lifetime of the instance is more than the duration
+ * then the timeout corresponds to the duration, otherwise it will be
+ * reduced to the remaining instance lifetime.
+ */
+ if (adv_instance->timeout == 0 ||
+ adv_instance->duration <= adv_instance->remaining_time)
+ timeout = adv_instance->duration;
+ else
+ timeout = adv_instance->remaining_time;
+
+ /* The remaining time is being reduced unless the instance is being
+ * advertised without time limit.
+ */
+ if (adv_instance->timeout)
+ adv_instance->remaining_time =
+ adv_instance->remaining_time - timeout;
+
+ hdev->adv_instance_timeout = timeout;
+ queue_delayed_work(hdev->req_workqueue,
+ &hdev->adv_instance_expire,
+ msecs_to_jiffies(timeout * 1000));
+
+ /* If we're just re-scheduling the same instance again then do not
+ * execute any HCI commands. This happens when a single instance is
+ * being advertised.
+ */
+ if (!force && hdev->cur_adv_instance == instance &&
+ hci_dev_test_flag(hdev, HCI_LE_ADV))
+ return 0;
+
+ hdev->cur_adv_instance = instance;
+ __hci_req_update_adv_data(req, instance);
+ __hci_req_update_scan_rsp_data(req, instance);
+ __hci_req_enable_advertising(req);
+
+ return 0;
+}
+
+static void cancel_adv_timeout(struct hci_dev *hdev)
+{
+ if (hdev->adv_instance_timeout) {
+ hdev->adv_instance_timeout = 0;
+ cancel_delayed_work(&hdev->adv_instance_expire);
+ }
+}
+
+/* For a single instance:
+ * - force == true: The instance will be removed even when its remaining
+ * lifetime is not zero.
+ * - force == false: the instance will be deactivated but kept stored unless
+ * the remaining lifetime is zero.
+ *
+ * For instance == 0x00:
+ * - force == true: All instances will be removed regardless of their timeout
+ * setting.
+ * - force == false: Only instances that have a timeout will be removed.
+ */
+void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req,
+ u8 instance, bool force)
+{
+ struct adv_info *adv_instance, *n, *next_instance = NULL;
+ int err;
+ u8 rem_inst;
+
+ /* Cancel any timeout concerning the removed instance(s). */
+ if (!instance || hdev->cur_adv_instance == instance)
+ cancel_adv_timeout(hdev);
+
+ /* Get the next instance to advertise BEFORE we remove
+ * the current one. This can be the same instance again
+ * if there is only one instance.
+ */
+ if (instance && hdev->cur_adv_instance == instance)
+ next_instance = hci_get_next_instance(hdev, instance);
+
+ if (instance == 0x00) {
+ list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances,
+ list) {
+ if (!(force || adv_instance->timeout))
+ continue;
+
+ rem_inst = adv_instance->instance;
+ err = hci_remove_adv_instance(hdev, rem_inst);
+ if (!err)
+ mgmt_advertising_removed(NULL, hdev, rem_inst);
+ }
+ } else {
+ adv_instance = hci_find_adv_instance(hdev, instance);
+
+ if (force || (adv_instance && adv_instance->timeout &&
+ !adv_instance->remaining_time)) {
+ /* Don't advertise a removed instance. */
+ if (next_instance &&
+ next_instance->instance == instance)
+ next_instance = NULL;
+
+ err = hci_remove_adv_instance(hdev, instance);
+ if (!err)
+ mgmt_advertising_removed(NULL, hdev, instance);
+ }
+ }
+
+ if (!req || !hdev_is_powered(hdev) ||
+ hci_dev_test_flag(hdev, HCI_ADVERTISING))
+ return;
+
+ if (next_instance)
+ __hci_req_schedule_adv_instance(req, next_instance->instance,
+ false);
+}
+
static void set_random_addr(struct hci_request *req, bdaddr_t *rpa)
{
struct hci_dev *hdev = req->hdev;
@@ -432,7 +1430,7 @@ static bool disconnected_whitelist_entries(struct hci_dev *hdev)
return false;
}
-void __hci_update_page_scan(struct hci_request *req)
+void __hci_req_update_scan(struct hci_request *req)
{
struct hci_dev *hdev = req->hdev;
u8 scan;
@@ -452,117 +1450,168 @@ void __hci_update_page_scan(struct hci_request *req)
else
scan = SCAN_DISABLED;
- if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE))
- return;
-
if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE))
scan |= SCAN_INQUIRY;
+ if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE) &&
+ test_bit(HCI_ISCAN, &hdev->flags) == !!(scan & SCAN_INQUIRY))
+ return;
+
hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
}
-void hci_update_page_scan(struct hci_dev *hdev)
+static int update_scan(struct hci_request *req, unsigned long opt)
{
- struct hci_request req;
+ hci_dev_lock(req->hdev);
+ __hci_req_update_scan(req);
+ hci_dev_unlock(req->hdev);
+ return 0;
+}
- hci_req_init(&req, hdev);
- __hci_update_page_scan(&req);
- hci_req_run(&req, NULL);
+static void scan_update_work(struct work_struct *work)
+{
+ struct hci_dev *hdev = container_of(work, struct hci_dev, scan_update);
+
+ hci_req_sync(hdev, update_scan, 0, HCI_CMD_TIMEOUT, NULL);
}
-/* This function controls the background scanning based on hdev->pend_le_conns
- * list. If there are pending LE connection we start the background scanning,
- * otherwise we stop it.
- *
- * This function requires the caller holds hdev->lock.
- */
-void __hci_update_background_scan(struct hci_request *req)
+static int connectable_update(struct hci_request *req, unsigned long opt)
{
struct hci_dev *hdev = req->hdev;
- if (!test_bit(HCI_UP, &hdev->flags) ||
- test_bit(HCI_INIT, &hdev->flags) ||
- hci_dev_test_flag(hdev, HCI_SETUP) ||
- hci_dev_test_flag(hdev, HCI_CONFIG) ||
- hci_dev_test_flag(hdev, HCI_AUTO_OFF) ||
- hci_dev_test_flag(hdev, HCI_UNREGISTER))
+ hci_dev_lock(hdev);
+
+ __hci_req_update_scan(req);
+
+ /* If BR/EDR is not enabled and we disable advertising as a
+ * by-product of disabling connectable, we need to update the
+ * advertising flags.
+ */
+ if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
+ __hci_req_update_adv_data(req, hdev->cur_adv_instance);
+
+ /* Update the advertising parameters if necessary */
+ if (hci_dev_test_flag(hdev, HCI_ADVERTISING) ||
+ !list_empty(&hdev->adv_instances))
+ __hci_req_enable_advertising(req);
+
+ __hci_update_background_scan(req);
+
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+static void connectable_update_work(struct work_struct *work)
+{
+ struct hci_dev *hdev = container_of(work, struct hci_dev,
+ connectable_update);
+ u8 status;
+
+ hci_req_sync(hdev, connectable_update, 0, HCI_CMD_TIMEOUT, &status);
+ mgmt_set_connectable_complete(hdev, status);
+}
+
+static u8 get_service_classes(struct hci_dev *hdev)
+{
+ struct bt_uuid *uuid;
+ u8 val = 0;
+
+ list_for_each_entry(uuid, &hdev->uuids, list)
+ val |= uuid->svc_hint;
+
+ return val;
+}
+
+void __hci_req_update_class(struct hci_request *req)
+{
+ struct hci_dev *hdev = req->hdev;
+ u8 cod[3];
+
+ BT_DBG("%s", hdev->name);
+
+ if (!hdev_is_powered(hdev))
return;
- /* No point in doing scanning if LE support hasn't been enabled */
- if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
+ if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
return;
- /* If discovery is active don't interfere with it */
- if (hdev->discovery.state != DISCOVERY_STOPPED)
+ if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE))
return;
- /* Reset RSSI and UUID filters when starting background scanning
- * since these filters are meant for service discovery only.
- *
- * The Start Discovery and Start Service Discovery operations
- * ensure to set proper values for RSSI threshold and UUID
- * filter list. So it is safe to just reset them here.
- */
- hci_discovery_filter_clear(hdev);
+ cod[0] = hdev->minor_class;
+ cod[1] = hdev->major_class;
+ cod[2] = get_service_classes(hdev);
- if (list_empty(&hdev->pend_le_conns) &&
- list_empty(&hdev->pend_le_reports)) {
- /* If there is no pending LE connections or devices
- * to be scanned for, we should stop the background
- * scanning.
- */
+ if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE))
+ cod[1] |= 0x20;
- /* If controller is not scanning we are done. */
- if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
- return;
+ if (memcmp(cod, hdev->dev_class, 3) == 0)
+ return;
- hci_req_add_le_scan_disable(req);
+ hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod);
+}
- BT_DBG("%s stopping background scanning", hdev->name);
+static void write_iac(struct hci_request *req)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_cp_write_current_iac_lap cp;
+
+ if (!hci_dev_test_flag(hdev, HCI_DISCOVERABLE))
+ return;
+
+ if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) {
+ /* Limited discoverable mode */
+ cp.num_iac = min_t(u8, hdev->num_iac, 2);
+ cp.iac_lap[0] = 0x00; /* LIAC */
+ cp.iac_lap[1] = 0x8b;
+ cp.iac_lap[2] = 0x9e;
+ cp.iac_lap[3] = 0x33; /* GIAC */
+ cp.iac_lap[4] = 0x8b;
+ cp.iac_lap[5] = 0x9e;
} else {
- /* If there is at least one pending LE connection, we should
- * keep the background scan running.
- */
+ /* General discoverable mode */
+ cp.num_iac = 1;
+ cp.iac_lap[0] = 0x33; /* GIAC */
+ cp.iac_lap[1] = 0x8b;
+ cp.iac_lap[2] = 0x9e;
+ }
- /* If controller is connecting, we should not start scanning
- * since some controllers are not able to scan and connect at
- * the same time.
- */
- if (hci_lookup_le_connect(hdev))
- return;
+ hci_req_add(req, HCI_OP_WRITE_CURRENT_IAC_LAP,
+ (cp.num_iac * 3) + 1, &cp);
+}
- /* If controller is currently scanning, we stop it to ensure we
- * don't miss any advertising (due to duplicates filter).
- */
- if (hci_dev_test_flag(hdev, HCI_LE_SCAN))
- hci_req_add_le_scan_disable(req);
+static int discoverable_update(struct hci_request *req, unsigned long opt)
+{
+ struct hci_dev *hdev = req->hdev;
- hci_req_add_le_passive_scan(req);
+ hci_dev_lock(hdev);
- BT_DBG("%s starting background scanning", hdev->name);
+ if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
+ write_iac(req);
+ __hci_req_update_scan(req);
+ __hci_req_update_class(req);
}
-}
-static void update_background_scan_complete(struct hci_dev *hdev, u8 status,
- u16 opcode)
-{
- if (status)
- BT_DBG("HCI request failed to update background scanning: "
- "status 0x%2.2x", status);
-}
+ /* Advertising instances don't use the global discoverable setting, so
+ * only update AD if advertising was enabled using Set Advertising.
+ */
+ if (hci_dev_test_flag(hdev, HCI_ADVERTISING))
+ __hci_req_update_adv_data(req, 0x00);
-void hci_update_background_scan(struct hci_dev *hdev)
-{
- int err;
- struct hci_request req;
+ hci_dev_unlock(hdev);
- hci_req_init(&req, hdev);
+ return 0;
+}
- __hci_update_background_scan(&req);
+static void discoverable_update_work(struct work_struct *work)
+{
+ struct hci_dev *hdev = container_of(work, struct hci_dev,
+ discoverable_update);
+ u8 status;
- err = hci_req_run(&req, update_background_scan_complete);
- if (err && err != -ENODATA)
- BT_ERR("Failed to run HCI request: err %d", err);
+ hci_req_sync(hdev, discoverable_update, 0, HCI_CMD_TIMEOUT, &status);
+ mgmt_set_discoverable_complete(hdev, status);
}
void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn,
@@ -657,3 +1706,574 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
return 0;
}
+
+static int update_bg_scan(struct hci_request *req, unsigned long opt)
+{
+ hci_dev_lock(req->hdev);
+ __hci_update_background_scan(req);
+ hci_dev_unlock(req->hdev);
+ return 0;
+}
+
+static void bg_scan_update(struct work_struct *work)
+{
+ struct hci_dev *hdev = container_of(work, struct hci_dev,
+ bg_scan_update);
+ struct hci_conn *conn;
+ u8 status;
+ int err;
+
+ err = hci_req_sync(hdev, update_bg_scan, 0, HCI_CMD_TIMEOUT, &status);
+ if (!err)
+ return;
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
+ if (conn)
+ hci_le_conn_failed(conn, status);
+
+ hci_dev_unlock(hdev);
+}
+
+static int le_scan_disable(struct hci_request *req, unsigned long opt)
+{
+ hci_req_add_le_scan_disable(req);
+ return 0;
+}
+
+static int bredr_inquiry(struct hci_request *req, unsigned long opt)
+{
+ u8 length = opt;
+ const u8 giac[3] = { 0x33, 0x8b, 0x9e };
+ const u8 liac[3] = { 0x00, 0x8b, 0x9e };
+ struct hci_cp_inquiry cp;
+
+ BT_DBG("%s", req->hdev->name);
+
+ hci_dev_lock(req->hdev);
+ hci_inquiry_cache_flush(req->hdev);
+ hci_dev_unlock(req->hdev);
+
+ memset(&cp, 0, sizeof(cp));
+
+ if (req->hdev->discovery.limited)
+ memcpy(&cp.lap, liac, sizeof(cp.lap));
+ else
+ memcpy(&cp.lap, giac, sizeof(cp.lap));
+
+ cp.length = length;
+
+ hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp);
+
+ return 0;
+}
+
+static void le_scan_disable_work(struct work_struct *work)
+{
+ struct hci_dev *hdev = container_of(work, struct hci_dev,
+ le_scan_disable.work);
+ u8 status;
+
+ BT_DBG("%s", hdev->name);
+
+ if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
+ return;
+
+ cancel_delayed_work(&hdev->le_scan_restart);
+
+ hci_req_sync(hdev, le_scan_disable, 0, HCI_CMD_TIMEOUT, &status);
+ if (status) {
+ BT_ERR("Failed to disable LE scan: status 0x%02x", status);
+ return;
+ }
+
+ hdev->discovery.scan_start = 0;
+
+ /* If we were running LE only scan, change discovery state. If
+ * we were running both LE and BR/EDR inquiry simultaneously,
+ * and BR/EDR inquiry is already finished, stop discovery,
+ * otherwise BR/EDR inquiry will stop discovery when finished.
+ * If we will resolve remote device name, do not change
+ * discovery state.
+ */
+
+ if (hdev->discovery.type == DISCOV_TYPE_LE)
+ goto discov_stopped;
+
+ if (hdev->discovery.type != DISCOV_TYPE_INTERLEAVED)
+ return;
+
+ if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) {
+ if (!test_bit(HCI_INQUIRY, &hdev->flags) &&
+ hdev->discovery.state != DISCOVERY_RESOLVING)
+ goto discov_stopped;
+
+ return;
+ }
+
+ hci_req_sync(hdev, bredr_inquiry, DISCOV_INTERLEAVED_INQUIRY_LEN,
+ HCI_CMD_TIMEOUT, &status);
+ if (status) {
+ BT_ERR("Inquiry failed: status 0x%02x", status);
+ goto discov_stopped;
+ }
+
+ return;
+
+discov_stopped:
+ hci_dev_lock(hdev);
+ hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
+ hci_dev_unlock(hdev);
+}
+
+static int le_scan_restart(struct hci_request *req, unsigned long opt)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_cp_le_set_scan_enable cp;
+
+ /* If controller is not scanning we are done. */
+ if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
+ return 0;
+
+ hci_req_add_le_scan_disable(req);
+
+ memset(&cp, 0, sizeof(cp));
+ cp.enable = LE_SCAN_ENABLE;
+ cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
+ hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
+
+ return 0;
+}
+
+static void le_scan_restart_work(struct work_struct *work)
+{
+ struct hci_dev *hdev = container_of(work, struct hci_dev,
+ le_scan_restart.work);
+ unsigned long timeout, duration, scan_start, now;
+ u8 status;
+
+ BT_DBG("%s", hdev->name);
+
+ hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status);
+ if (status) {
+ BT_ERR("Failed to restart LE scan: status %d", status);
+ return;
+ }
+
+ hci_dev_lock(hdev);
+
+ if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
+ !hdev->discovery.scan_start)
+ goto unlock;
+
+ /* When the scan was started, hdev->le_scan_disable has been queued
+ * after duration from scan_start. During scan restart this job
+ * has been canceled, and we need to queue it again after proper
+ * timeout, to make sure that scan does not run indefinitely.
+ */
+ duration = hdev->discovery.scan_duration;
+ scan_start = hdev->discovery.scan_start;
+ now = jiffies;
+ if (now - scan_start <= duration) {
+ int elapsed;
+
+ if (now >= scan_start)
+ elapsed = now - scan_start;
+ else
+ elapsed = ULONG_MAX - scan_start + now;
+
+ timeout = duration - elapsed;
+ } else {
+ timeout = 0;
+ }
+
+ queue_delayed_work(hdev->req_workqueue,
+ &hdev->le_scan_disable, timeout);
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
+static void disable_advertising(struct hci_request *req)
+{
+ u8 enable = 0x00;
+
+ hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
+}
+
+static int active_scan(struct hci_request *req, unsigned long opt)
+{
+ uint16_t interval = opt;
+ struct hci_dev *hdev = req->hdev;
+ struct hci_cp_le_set_scan_param param_cp;
+ struct hci_cp_le_set_scan_enable enable_cp;
+ u8 own_addr_type;
+ int err;
+
+ BT_DBG("%s", hdev->name);
+
+ if (hci_dev_test_flag(hdev, HCI_LE_ADV)) {
+ hci_dev_lock(hdev);
+
+ /* Don't let discovery abort an outgoing connection attempt
+ * that's using directed advertising.
+ */
+ if (hci_lookup_le_connect(hdev)) {
+ hci_dev_unlock(hdev);
+ return -EBUSY;
+ }
+
+ cancel_adv_timeout(hdev);
+ hci_dev_unlock(hdev);
+
+ disable_advertising(req);
+ }
+
+ /* If controller is scanning, it means the background scanning is
+ * running. Thus, we should temporarily stop it in order to set the
+ * discovery scanning parameters.
+ */
+ if (hci_dev_test_flag(hdev, HCI_LE_SCAN))
+ hci_req_add_le_scan_disable(req);
+
+ /* All active scans will be done with either a resolvable private
+ * address (when privacy feature has been enabled) or non-resolvable
+ * private address.
+ */
+ err = hci_update_random_address(req, true, &own_addr_type);
+ if (err < 0)
+ own_addr_type = ADDR_LE_DEV_PUBLIC;
+
+ memset(&param_cp, 0, sizeof(param_cp));
+ param_cp.type = LE_SCAN_ACTIVE;
+ param_cp.interval = cpu_to_le16(interval);
+ param_cp.window = cpu_to_le16(DISCOV_LE_SCAN_WIN);
+ param_cp.own_address_type = own_addr_type;
+
+ hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp),
+ &param_cp);
+
+ memset(&enable_cp, 0, sizeof(enable_cp));
+ enable_cp.enable = LE_SCAN_ENABLE;
+ enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
+
+ hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp),
+ &enable_cp);
+
+ return 0;
+}
+
+static int interleaved_discov(struct hci_request *req, unsigned long opt)
+{
+ int err;
+
+ BT_DBG("%s", req->hdev->name);
+
+ err = active_scan(req, opt);
+ if (err)
+ return err;
+
+ return bredr_inquiry(req, DISCOV_BREDR_INQUIRY_LEN);
+}
+
+static void start_discovery(struct hci_dev *hdev, u8 *status)
+{
+ unsigned long timeout;
+
+ BT_DBG("%s type %u", hdev->name, hdev->discovery.type);
+
+ switch (hdev->discovery.type) {
+ case DISCOV_TYPE_BREDR:
+ if (!hci_dev_test_flag(hdev, HCI_INQUIRY))
+ hci_req_sync(hdev, bredr_inquiry,
+ DISCOV_BREDR_INQUIRY_LEN, HCI_CMD_TIMEOUT,
+ status);
+ return;
+ case DISCOV_TYPE_INTERLEAVED:
+ /* When running simultaneous discovery, the LE scanning time
+ * should occupy the whole discovery time sine BR/EDR inquiry
+ * and LE scanning are scheduled by the controller.
+ *
+ * For interleaving discovery in comparison, BR/EDR inquiry
+ * and LE scanning are done sequentially with separate
+ * timeouts.
+ */
+ if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY,
+ &hdev->quirks)) {
+ timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT);
+ /* During simultaneous discovery, we double LE scan
+ * interval. We must leave some time for the controller
+ * to do BR/EDR inquiry.
+ */
+ hci_req_sync(hdev, interleaved_discov,
+ DISCOV_LE_SCAN_INT * 2, HCI_CMD_TIMEOUT,
+ status);
+ break;
+ }
+
+ timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout);
+ hci_req_sync(hdev, active_scan, DISCOV_LE_SCAN_INT,
+ HCI_CMD_TIMEOUT, status);
+ break;
+ case DISCOV_TYPE_LE:
+ timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT);
+ hci_req_sync(hdev, active_scan, DISCOV_LE_SCAN_INT,
+ HCI_CMD_TIMEOUT, status);
+ break;
+ default:
+ *status = HCI_ERROR_UNSPECIFIED;
+ return;
+ }
+
+ if (*status)
+ return;
+
+ BT_DBG("%s timeout %u ms", hdev->name, jiffies_to_msecs(timeout));
+
+ /* When service discovery is used and the controller has a
+ * strict duplicate filter, it is important to remember the
+ * start and duration of the scan. This is required for
+ * restarting scanning during the discovery phase.
+ */
+ if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) &&
+ hdev->discovery.result_filtering) {
+ hdev->discovery.scan_start = jiffies;
+ hdev->discovery.scan_duration = timeout;
+ }
+
+ queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_disable,
+ timeout);
+}
+
+bool hci_req_stop_discovery(struct hci_request *req)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct discovery_state *d = &hdev->discovery;
+ struct hci_cp_remote_name_req_cancel cp;
+ struct inquiry_entry *e;
+ bool ret = false;
+
+ BT_DBG("%s state %u", hdev->name, hdev->discovery.state);
+
+ if (d->state == DISCOVERY_FINDING || d->state == DISCOVERY_STOPPING) {
+ if (test_bit(HCI_INQUIRY, &hdev->flags))
+ hci_req_add(req, HCI_OP_INQUIRY_CANCEL, 0, NULL);
+
+ if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) {
+ cancel_delayed_work(&hdev->le_scan_disable);
+ hci_req_add_le_scan_disable(req);
+ }
+
+ ret = true;
+ } else {
+ /* Passive scanning */
+ if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) {
+ hci_req_add_le_scan_disable(req);
+ ret = true;
+ }
+ }
+
+ /* No further actions needed for LE-only discovery */
+ if (d->type == DISCOV_TYPE_LE)
+ return ret;
+
+ if (d->state == DISCOVERY_RESOLVING || d->state == DISCOVERY_STOPPING) {
+ e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY,
+ NAME_PENDING);
+ if (!e)
+ return ret;
+
+ bacpy(&cp.bdaddr, &e->data.bdaddr);
+ hci_req_add(req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp),
+ &cp);
+ ret = true;
+ }
+
+ return ret;
+}
+
+static int stop_discovery(struct hci_request *req, unsigned long opt)
+{
+ hci_dev_lock(req->hdev);
+ hci_req_stop_discovery(req);
+ hci_dev_unlock(req->hdev);
+
+ return 0;
+}
+
+static void discov_update(struct work_struct *work)
+{
+ struct hci_dev *hdev = container_of(work, struct hci_dev,
+ discov_update);
+ u8 status = 0;
+
+ switch (hdev->discovery.state) {
+ case DISCOVERY_STARTING:
+ start_discovery(hdev, &status);
+ mgmt_start_discovery_complete(hdev, status);
+ if (status)
+ hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
+ else
+ hci_discovery_set_state(hdev, DISCOVERY_FINDING);
+ break;
+ case DISCOVERY_STOPPING:
+ hci_req_sync(hdev, stop_discovery, 0, HCI_CMD_TIMEOUT, &status);
+ mgmt_stop_discovery_complete(hdev, status);
+ if (!status)
+ hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
+ break;
+ case DISCOVERY_STOPPED:
+ default:
+ return;
+ }
+}
+
+static void discov_off(struct work_struct *work)
+{
+ struct hci_dev *hdev = container_of(work, struct hci_dev,
+ discov_off.work);
+
+ BT_DBG("%s", hdev->name);
+
+ hci_dev_lock(hdev);
+
+ /* When discoverable timeout triggers, then just make sure
+ * the limited discoverable flag is cleared. Even in the case
+ * of a timeout triggered from general discoverable, it is
+ * safe to unconditionally clear the flag.
+ */
+ hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
+ hci_dev_clear_flag(hdev, HCI_DISCOVERABLE);
+ hdev->discov_timeout = 0;
+
+ hci_dev_unlock(hdev);
+
+ hci_req_sync(hdev, discoverable_update, 0, HCI_CMD_TIMEOUT, NULL);
+ mgmt_new_settings(hdev);
+}
+
+static int powered_update_hci(struct hci_request *req, unsigned long opt)
+{
+ struct hci_dev *hdev = req->hdev;
+ u8 link_sec;
+
+ hci_dev_lock(hdev);
+
+ if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED) &&
+ !lmp_host_ssp_capable(hdev)) {
+ u8 mode = 0x01;
+
+ hci_req_add(req, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode);
+
+ if (bredr_sc_enabled(hdev) && !lmp_host_sc_capable(hdev)) {
+ u8 support = 0x01;
+
+ hci_req_add(req, HCI_OP_WRITE_SC_SUPPORT,
+ sizeof(support), &support);
+ }
+ }
+
+ if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) &&
+ lmp_bredr_capable(hdev)) {
+ struct hci_cp_write_le_host_supported cp;
+
+ cp.le = 0x01;
+ cp.simul = 0x00;
+
+ /* Check first if we already have the right
+ * host state (host features set)
+ */
+ if (cp.le != lmp_host_le_capable(hdev) ||
+ cp.simul != lmp_host_le_br_capable(hdev))
+ hci_req_add(req, HCI_OP_WRITE_LE_HOST_SUPPORTED,
+ sizeof(cp), &cp);
+ }
+
+ if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) {
+ /* Make sure the controller has a good default for
+ * advertising data. This also applies to the case
+ * where BR/EDR was toggled during the AUTO_OFF phase.
+ */
+ if (hci_dev_test_flag(hdev, HCI_ADVERTISING) ||
+ list_empty(&hdev->adv_instances)) {
+ __hci_req_update_adv_data(req, 0x00);
+ __hci_req_update_scan_rsp_data(req, 0x00);
+
+ if (hci_dev_test_flag(hdev, HCI_ADVERTISING))
+ __hci_req_enable_advertising(req);
+ } else if (!list_empty(&hdev->adv_instances)) {
+ struct adv_info *adv_instance;
+
+ adv_instance = list_first_entry(&hdev->adv_instances,
+ struct adv_info, list);
+ __hci_req_schedule_adv_instance(req,
+ adv_instance->instance,
+ true);
+ }
+ }
+
+ link_sec = hci_dev_test_flag(hdev, HCI_LINK_SECURITY);
+ if (link_sec != test_bit(HCI_AUTH, &hdev->flags))
+ hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE,
+ sizeof(link_sec), &link_sec);
+
+ if (lmp_bredr_capable(hdev)) {
+ if (hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE))
+ __hci_req_write_fast_connectable(req, true);
+ else
+ __hci_req_write_fast_connectable(req, false);
+ __hci_req_update_scan(req);
+ __hci_req_update_class(req);
+ __hci_req_update_name(req);
+ __hci_req_update_eir(req);
+ }
+
+ hci_dev_unlock(hdev);
+ return 0;
+}
+
+int __hci_req_hci_power_on(struct hci_dev *hdev)
+{
+ /* Register the available SMP channels (BR/EDR and LE) only when
+ * successfully powering on the controller. This late
+ * registration is required so that LE SMP can clearly decide if
+ * the public address or static address is used.
+ */
+ smp_register(hdev);
+
+ return __hci_req_sync(hdev, powered_update_hci, 0, HCI_CMD_TIMEOUT,
+ NULL);
+}
+
+void hci_request_setup(struct hci_dev *hdev)
+{
+ INIT_WORK(&hdev->discov_update, discov_update);
+ INIT_WORK(&hdev->bg_scan_update, bg_scan_update);
+ INIT_WORK(&hdev->scan_update, scan_update_work);
+ INIT_WORK(&hdev->connectable_update, connectable_update_work);
+ INIT_WORK(&hdev->discoverable_update, discoverable_update_work);
+ INIT_DELAYED_WORK(&hdev->discov_off, discov_off);
+ INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work);
+ INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work);
+ INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire);
+}
+
+void hci_request_cancel_all(struct hci_dev *hdev)
+{
+ hci_req_sync_cancel(hdev, ENODEV);
+
+ cancel_work_sync(&hdev->discov_update);
+ cancel_work_sync(&hdev->bg_scan_update);
+ cancel_work_sync(&hdev->scan_update);
+ cancel_work_sync(&hdev->connectable_update);
+ cancel_work_sync(&hdev->discoverable_update);
+ cancel_delayed_work_sync(&hdev->discov_off);
+ cancel_delayed_work_sync(&hdev->le_scan_disable);
+ cancel_delayed_work_sync(&hdev->le_scan_restart);
+
+ if (hdev->adv_instance_timeout) {
+ cancel_delayed_work_sync(&hdev->adv_instance_expire);
+ hdev->adv_instance_timeout = 0;
+ }
+}
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index 25c7f1305dcb..64ff8c040d50 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -20,6 +20,9 @@
SOFTWARE IS DISCLAIMED.
*/
+#define hci_req_sync_lock(hdev) mutex_lock(&hdev->req_lock)
+#define hci_req_sync_unlock(hdev) mutex_unlock(&hdev->req_lock)
+
struct hci_request {
struct hci_dev *hdev;
struct sk_buff_head cmd_q;
@@ -41,21 +44,61 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status,
hci_req_complete_t *req_complete,
hci_req_complete_skb_t *req_complete_skb);
+int hci_req_sync(struct hci_dev *hdev, int (*req)(struct hci_request *req,
+ unsigned long opt),
+ unsigned long opt, u32 timeout, u8 *hci_status);
+int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req,
+ unsigned long opt),
+ unsigned long opt, u32 timeout, u8 *hci_status);
+void hci_req_sync_cancel(struct hci_dev *hdev, int err);
+
struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen,
const void *param);
+int __hci_req_hci_power_on(struct hci_dev *hdev);
+
+void __hci_req_write_fast_connectable(struct hci_request *req, bool enable);
+void __hci_req_update_name(struct hci_request *req);
+void __hci_req_update_eir(struct hci_request *req);
+
void hci_req_add_le_scan_disable(struct hci_request *req);
void hci_req_add_le_passive_scan(struct hci_request *req);
-void hci_update_page_scan(struct hci_dev *hdev);
-void __hci_update_page_scan(struct hci_request *req);
+void hci_req_reenable_advertising(struct hci_dev *hdev);
+void __hci_req_enable_advertising(struct hci_request *req);
+void __hci_req_disable_advertising(struct hci_request *req);
+void __hci_req_update_adv_data(struct hci_request *req, u8 instance);
+int hci_req_update_adv_data(struct hci_dev *hdev, u8 instance);
+void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance);
+
+int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance,
+ bool force);
+void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req,
+ u8 instance, bool force);
+
+void __hci_req_update_class(struct hci_request *req);
+
+/* Returns true if HCI commands were queued */
+bool hci_req_stop_discovery(struct hci_request *req);
+
+static inline void hci_req_update_scan(struct hci_dev *hdev)
+{
+ queue_work(hdev->req_workqueue, &hdev->scan_update);
+}
+
+void __hci_req_update_scan(struct hci_request *req);
int hci_update_random_address(struct hci_request *req, bool require_privacy,
u8 *own_addr_type);
-void hci_update_background_scan(struct hci_dev *hdev);
-void __hci_update_background_scan(struct hci_request *req);
-
int hci_abort_conn(struct hci_conn *conn, u8 reason);
void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn,
u8 reason);
+
+static inline void hci_update_background_scan(struct hci_dev *hdev)
+{
+ queue_work(hdev->req_workqueue, &hdev->bg_scan_update);
+}
+
+void hci_request_setup(struct hci_dev *hdev);
+void hci_request_cancel_all(struct hci_dev *hdev);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index b1eb8c09a660..1298d723c0e0 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -25,6 +25,7 @@
/* Bluetooth HCI sockets. */
#include <linux/export.h>
+#include <linux/utsname.h>
#include <asm/unaligned.h>
#include <net/bluetooth/bluetooth.h>
@@ -120,13 +121,13 @@ static bool is_filtered_packet(struct sock *sk, struct sk_buff *skb)
/* Apply filter */
flt = &hci_pi(sk)->filter;
- flt_type = bt_cb(skb)->pkt_type & HCI_FLT_TYPE_BITS;
+ flt_type = hci_skb_pkt_type(skb) & HCI_FLT_TYPE_BITS;
if (!test_bit(flt_type, &flt->type_mask))
return true;
/* Extra filter for event packets only */
- if (bt_cb(skb)->pkt_type != HCI_EVENT_PKT)
+ if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT)
return false;
flt_event = (*(__u8 *)skb->data & HCI_FLT_EVENT_BITS);
@@ -170,19 +171,19 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
continue;
if (hci_pi(sk)->channel == HCI_CHANNEL_RAW) {
- if (bt_cb(skb)->pkt_type != HCI_COMMAND_PKT &&
- bt_cb(skb)->pkt_type != HCI_EVENT_PKT &&
- bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT &&
- bt_cb(skb)->pkt_type != HCI_SCODATA_PKT)
+ if (hci_skb_pkt_type(skb) != HCI_COMMAND_PKT &&
+ hci_skb_pkt_type(skb) != HCI_EVENT_PKT &&
+ hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT &&
+ hci_skb_pkt_type(skb) != HCI_SCODATA_PKT)
continue;
if (is_filtered_packet(sk, skb))
continue;
} else if (hci_pi(sk)->channel == HCI_CHANNEL_USER) {
if (!bt_cb(skb)->incoming)
continue;
- if (bt_cb(skb)->pkt_type != HCI_EVENT_PKT &&
- bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT &&
- bt_cb(skb)->pkt_type != HCI_SCODATA_PKT)
+ if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT &&
+ hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT &&
+ hci_skb_pkt_type(skb) != HCI_SCODATA_PKT)
continue;
} else {
/* Don't send frame to other channel types */
@@ -196,7 +197,7 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
continue;
/* Put type byte before the data */
- memcpy(skb_push(skb_copy, 1), &bt_cb(skb)->pkt_type, 1);
+ memcpy(skb_push(skb_copy, 1), &hci_skb_pkt_type(skb), 1);
}
nskb = skb_clone(skb_copy, GFP_ATOMIC);
@@ -262,7 +263,7 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
BT_DBG("hdev %p len %d", hdev, skb->len);
- switch (bt_cb(skb)->pkt_type) {
+ switch (hci_skb_pkt_type(skb)) {
case HCI_COMMAND_PKT:
opcode = cpu_to_le16(HCI_MON_COMMAND_PKT);
break;
@@ -294,7 +295,7 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
return;
/* Put header before the data */
- hdr = (void *) skb_push(skb_copy, HCI_MON_HDR_SIZE);
+ hdr = (void *)skb_push(skb_copy, HCI_MON_HDR_SIZE);
hdr->opcode = opcode;
hdr->index = cpu_to_le16(hdev->id);
hdr->len = cpu_to_le16(skb->len);
@@ -375,7 +376,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
__net_timestamp(skb);
- hdr = (void *) skb_push(skb, HCI_MON_HDR_SIZE);
+ hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE);
hdr->opcode = opcode;
hdr->index = cpu_to_le16(hdev->id);
hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);
@@ -383,6 +384,38 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
return skb;
}
+static void __printf(2, 3)
+send_monitor_note(struct sock *sk, const char *fmt, ...)
+{
+ size_t len;
+ struct hci_mon_hdr *hdr;
+ struct sk_buff *skb;
+ va_list args;
+
+ va_start(args, fmt);
+ len = vsnprintf(NULL, 0, fmt, args);
+ va_end(args);
+
+ skb = bt_skb_alloc(len + 1, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ va_start(args, fmt);
+ vsprintf(skb_put(skb, len), fmt, args);
+ *skb_put(skb, 1) = 0;
+ va_end(args);
+
+ __net_timestamp(skb);
+
+ hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE);
+ hdr->opcode = cpu_to_le16(HCI_MON_SYSTEM_NOTE);
+ hdr->index = cpu_to_le16(HCI_DEV_NONE);
+ hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);
+
+ if (sock_queue_rcv_skb(sk, skb))
+ kfree_skb(skb);
+}
+
static void send_monitor_replay(struct sock *sk)
{
struct hci_dev *hdev;
@@ -436,18 +469,18 @@ static void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data)
if (!skb)
return;
- hdr = (void *) skb_put(skb, HCI_EVENT_HDR_SIZE);
+ hdr = (void *)skb_put(skb, HCI_EVENT_HDR_SIZE);
hdr->evt = HCI_EV_STACK_INTERNAL;
hdr->plen = sizeof(*ev) + dlen;
- ev = (void *) skb_put(skb, sizeof(*ev) + dlen);
+ ev = (void *)skb_put(skb, sizeof(*ev) + dlen);
ev->type = type;
memcpy(ev->data, data, dlen);
bt_cb(skb)->incoming = 1;
__net_timestamp(skb);
- bt_cb(skb)->pkt_type = HCI_EVENT_PKT;
+ hci_skb_pkt_type(skb) = HCI_EVENT_PKT;
hci_send_to_sock(hdev, skb);
kfree_skb(skb);
}
@@ -653,20 +686,20 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
return -EOPNOTSUPP;
case HCIGETCONNINFO:
- return hci_get_conn_info(hdev, (void __user *) arg);
+ return hci_get_conn_info(hdev, (void __user *)arg);
case HCIGETAUTHINFO:
- return hci_get_auth_info(hdev, (void __user *) arg);
+ return hci_get_auth_info(hdev, (void __user *)arg);
case HCIBLOCKADDR:
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- return hci_sock_blacklist_add(hdev, (void __user *) arg);
+ return hci_sock_blacklist_add(hdev, (void __user *)arg);
case HCIUNBLOCKADDR:
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- return hci_sock_blacklist_del(hdev, (void __user *) arg);
+ return hci_sock_blacklist_del(hdev, (void __user *)arg);
}
return -ENOIOCTLCMD;
@@ -675,7 +708,7 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
static int hci_sock_ioctl(struct socket *sock, unsigned int cmd,
unsigned long arg)
{
- void __user *argp = (void __user *) arg;
+ void __user *argp = (void __user *)arg;
struct sock *sk = sock->sk;
int err;
@@ -872,11 +905,28 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
*/
hci_sock_set_flag(sk, HCI_SOCK_TRUSTED);
+ send_monitor_note(sk, "Linux version %s (%s)",
+ init_utsname()->release,
+ init_utsname()->machine);
+ send_monitor_note(sk, "Bluetooth subsystem version %s",
+ BT_SUBSYS_VERSION);
send_monitor_replay(sk);
atomic_inc(&monitor_promisc);
break;
+ case HCI_CHANNEL_LOGGING:
+ if (haddr.hci_dev != HCI_DEV_NONE) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ if (!capable(CAP_NET_ADMIN)) {
+ err = -EPERM;
+ goto done;
+ }
+ break;
+
default:
if (!hci_mgmt_chan_find(haddr.hci_channel)) {
err = -EINVAL;
@@ -926,7 +976,7 @@ done:
static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
int *addr_len, int peer)
{
- struct sockaddr_hci *haddr = (struct sockaddr_hci *) addr;
+ struct sockaddr_hci *haddr = (struct sockaddr_hci *)addr;
struct sock *sk = sock->sk;
struct hci_dev *hdev;
int err = 0;
@@ -991,8 +1041,8 @@ static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg,
}
}
-static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
- int flags)
+static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t len, int flags)
{
int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
@@ -1004,6 +1054,9 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (flags & MSG_OOB)
return -EOPNOTSUPP;
+ if (hci_pi(sk)->channel == HCI_CHANNEL_LOGGING)
+ return -EOPNOTSUPP;
+
if (sk->sk_state == BT_CLOSED)
return 0;
@@ -1150,6 +1203,90 @@ done:
return err;
}
+static int hci_logging_frame(struct sock *sk, struct msghdr *msg, int len)
+{
+ struct hci_mon_hdr *hdr;
+ struct sk_buff *skb;
+ struct hci_dev *hdev;
+ u16 index;
+ int err;
+
+ /* The logging frame consists at minimum of the standard header,
+ * the priority byte, the ident length byte and at least one string
+ * terminator NUL byte. Anything shorter are invalid packets.
+ */
+ if (len < sizeof(*hdr) + 3)
+ return -EINVAL;
+
+ skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err);
+ if (!skb)
+ return err;
+
+ if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
+ err = -EFAULT;
+ goto drop;
+ }
+
+ hdr = (void *)skb->data;
+
+ if (__le16_to_cpu(hdr->len) != len - sizeof(*hdr)) {
+ err = -EINVAL;
+ goto drop;
+ }
+
+ if (__le16_to_cpu(hdr->opcode) == 0x0000) {
+ __u8 priority = skb->data[sizeof(*hdr)];
+ __u8 ident_len = skb->data[sizeof(*hdr) + 1];
+
+ /* Only the priorities 0-7 are valid and with that any other
+ * value results in an invalid packet.
+ *
+ * The priority byte is followed by an ident length byte and
+ * the NUL terminated ident string. Check that the ident
+ * length is not overflowing the packet and also that the
+ * ident string itself is NUL terminated. In case the ident
+ * length is zero, the length value actually doubles as NUL
+ * terminator identifier.
+ *
+ * The message follows the ident string (if present) and
+ * must be NUL terminated. Otherwise it is not a valid packet.
+ */
+ if (priority > 7 || skb->data[len - 1] != 0x00 ||
+ ident_len > len - sizeof(*hdr) - 3 ||
+ skb->data[sizeof(*hdr) + ident_len + 1] != 0x00) {
+ err = -EINVAL;
+ goto drop;
+ }
+ } else {
+ err = -EINVAL;
+ goto drop;
+ }
+
+ index = __le16_to_cpu(hdr->index);
+
+ if (index != MGMT_INDEX_NONE) {
+ hdev = hci_dev_get(index);
+ if (!hdev) {
+ err = -ENODEV;
+ goto drop;
+ }
+ } else {
+ hdev = NULL;
+ }
+
+ hdr->opcode = cpu_to_le16(HCI_MON_USER_LOGGING);
+
+ hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL);
+ err = len;
+
+ if (hdev)
+ hci_dev_put(hdev);
+
+drop:
+ kfree_skb(skb);
+ return err;
+}
+
static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
size_t len)
{
@@ -1179,6 +1316,9 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
case HCI_CHANNEL_MONITOR:
err = -EOPNOTSUPP;
goto done;
+ case HCI_CHANNEL_LOGGING:
+ err = hci_logging_frame(sk, msg, len);
+ goto done;
default:
mutex_lock(&mgmt_chan_list_lock);
chan = __hci_mgmt_chan_find(hci_pi(sk)->channel);
@@ -1211,7 +1351,7 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
goto drop;
}
- bt_cb(skb)->pkt_type = *((unsigned char *) skb->data);
+ hci_skb_pkt_type(skb) = skb->data[0];
skb_pull(skb, 1);
if (hci_pi(sk)->channel == HCI_CHANNEL_USER) {
@@ -1220,16 +1360,16 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
*
* However check that the packet type is valid.
*/
- if (bt_cb(skb)->pkt_type != HCI_COMMAND_PKT &&
- bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT &&
- bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) {
+ if (hci_skb_pkt_type(skb) != HCI_COMMAND_PKT &&
+ hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT &&
+ hci_skb_pkt_type(skb) != HCI_SCODATA_PKT) {
err = -EINVAL;
goto drop;
}
skb_queue_tail(&hdev->raw_q, skb);
queue_work(hdev->workqueue, &hdev->tx_work);
- } else if (bt_cb(skb)->pkt_type == HCI_COMMAND_PKT) {
+ } else if (hci_skb_pkt_type(skb) == HCI_COMMAND_PKT) {
u16 opcode = get_unaligned_le16(skb->data);
u16 ogf = hci_opcode_ogf(opcode);
u16 ocf = hci_opcode_ocf(opcode);
@@ -1242,6 +1382,11 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
goto drop;
}
+ /* Since the opcode has already been extracted here, store
+ * a copy of the value for later use by the drivers.
+ */
+ hci_skb_opcode(skb) = opcode;
+
if (ogf == 0x3f) {
skb_queue_tail(&hdev->raw_q, skb);
queue_work(hdev->workqueue, &hdev->tx_work);
@@ -1249,7 +1394,7 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
/* Stand-alone HCI commands must be flagged as
* single-command requests.
*/
- bt_cb(skb)->hci.req_start = true;
+ bt_cb(skb)->hci.req_flags |= HCI_REQ_START;
skb_queue_tail(&hdev->cmd_q, skb);
queue_work(hdev->workqueue, &hdev->cmd_work);
@@ -1260,8 +1405,8 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
goto drop;
}
- if (bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT &&
- bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) {
+ if (hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT &&
+ hci_skb_pkt_type(skb) != HCI_SCODATA_PKT) {
err = -EINVAL;
goto drop;
}
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 7c65ee200c29..eb4f5f24cbe3 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -197,10 +197,20 @@ int l2cap_add_psm(struct l2cap_chan *chan, bdaddr_t *src, __le16 psm)
chan->sport = psm;
err = 0;
} else {
- u16 p;
+ u16 p, start, end, incr;
+
+ if (chan->src_type == BDADDR_BREDR) {
+ start = L2CAP_PSM_DYN_START;
+ end = L2CAP_PSM_AUTO_END;
+ incr = 2;
+ } else {
+ start = L2CAP_PSM_LE_DYN_START;
+ end = L2CAP_PSM_LE_DYN_END;
+ incr = 1;
+ }
err = -EINVAL;
- for (p = 0x1001; p < 0x1100; p += 2)
+ for (p = start; p <= end; p += incr)
if (!__l2cap_global_chan_by_addr(cpu_to_le16(p), src)) {
chan->psm = cpu_to_le16(p);
chan->sport = cpu_to_le16(p);
@@ -239,7 +249,7 @@ static u16 l2cap_alloc_cid(struct l2cap_conn *conn)
else
dyn_end = L2CAP_CID_DYN_END;
- for (cid = L2CAP_CID_DYN_START; cid < dyn_end; cid++) {
+ for (cid = L2CAP_CID_DYN_START; cid <= dyn_end; cid++) {
if (!__l2cap_get_chan_by_scid(conn, cid))
return cid;
}
@@ -5250,7 +5260,9 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn,
credits = __le16_to_cpu(rsp->credits);
result = __le16_to_cpu(rsp->result);
- if (result == L2CAP_CR_SUCCESS && (mtu < 23 || mps < 23))
+ if (result == L2CAP_CR_SUCCESS && (mtu < 23 || mps < 23 ||
+ dcid < L2CAP_CID_DYN_START ||
+ dcid > L2CAP_CID_LE_DYN_END))
return -EPROTO;
BT_DBG("dcid 0x%4.4x mtu %u mps %u credits %u result 0x%2.2x",
@@ -5270,6 +5282,11 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn,
switch (result) {
case L2CAP_CR_SUCCESS:
+ if (__l2cap_get_chan_by_dcid(conn, dcid)) {
+ err = -EBADSLT;
+ break;
+ }
+
chan->ident = 0;
chan->dcid = dcid;
chan->omtu = mtu;
@@ -5437,9 +5454,16 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
goto response_unlock;
}
+ /* Check for valid dynamic CID range */
+ if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) {
+ result = L2CAP_CR_INVALID_SCID;
+ chan = NULL;
+ goto response_unlock;
+ }
+
/* Check if we already have channel with that dcid */
if (__l2cap_get_chan_by_dcid(conn, scid)) {
- result = L2CAP_CR_NO_MEM;
+ result = L2CAP_CR_SCID_IN_USE;
chan = NULL;
goto response_unlock;
}
@@ -6524,8 +6548,6 @@ static int l2cap_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
struct sk_buff *skb)
{
- int err = 0;
-
BT_DBG("chan %p, control %p, skb %p, state %d", chan, control, skb,
chan->rx_state);
@@ -6556,7 +6578,7 @@ static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
chan->last_acked_seq = control->txseq;
chan->expected_tx_seq = __next_seq(chan, control->txseq);
- return err;
+ return 0;
}
static int l2cap_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb)
@@ -7099,8 +7121,6 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
chan->dcid = cid;
if (bdaddr_type_is_le(dst_type)) {
- u8 role;
-
/* Convert from L2CAP channel address type to HCI address type
*/
if (dst_type == BDADDR_LE_PUBLIC)
@@ -7109,14 +7129,15 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
dst_type = ADDR_LE_DEV_RANDOM;
if (hci_dev_test_flag(hdev, HCI_ADVERTISING))
- role = HCI_ROLE_SLAVE;
+ hcon = hci_connect_le(hdev, dst, dst_type,
+ chan->sec_level,
+ HCI_LE_CONN_TIMEOUT,
+ HCI_ROLE_SLAVE);
else
- role = HCI_ROLE_MASTER;
+ hcon = hci_connect_le_scan(hdev, dst, dst_type,
+ chan->sec_level,
+ HCI_LE_CONN_TIMEOUT);
- hcon = hci_connect_le_scan(hdev, dst, dst_type,
- chan->sec_level,
- HCI_LE_CONN_TIMEOUT,
- role);
} else {
u8 auth_type = l2cap_get_auth_type(chan);
hcon = hci_connect_acl(hdev, dst, chan->sec_level, auth_type);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 1bb551527044..e4cae72895a7 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -58,7 +58,7 @@ static int l2cap_validate_bredr_psm(u16 psm)
return -EINVAL;
/* Restrict usage of well-known PSMs */
- if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE))
+ if (psm < L2CAP_PSM_DYN_START && !capable(CAP_NET_BIND_SERVICE))
return -EACCES;
return 0;
@@ -67,11 +67,11 @@ static int l2cap_validate_bredr_psm(u16 psm)
static int l2cap_validate_le_psm(u16 psm)
{
/* Valid LE_PSM ranges are defined only until 0x00ff */
- if (psm > 0x00ff)
+ if (psm > L2CAP_PSM_LE_DYN_END)
return -EINVAL;
/* Restrict fixed, SIG assigned PSM values to CAP_NET_BIND_SERVICE */
- if (psm <= 0x007f && !capable(CAP_NET_BIND_SERVICE))
+ if (psm < L2CAP_PSM_LE_DYN_START && !capable(CAP_NET_BIND_SERVICE))
return -EACCES;
return 0;
@@ -125,6 +125,9 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
goto done;
}
+ bacpy(&chan->src, &la.l2_bdaddr);
+ chan->src_type = la.l2_bdaddr_type;
+
if (la.l2_cid)
err = l2cap_add_scid(chan, __le16_to_cpu(la.l2_cid));
else
@@ -156,9 +159,6 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
break;
}
- bacpy(&chan->src, &la.l2_bdaddr);
- chan->src_type = la.l2_bdaddr_type;
-
if (chan->psm && bdaddr_type_is_le(chan->src_type))
chan->mode = L2CAP_MODE_LE_FLOWCTL;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 7f22119276f3..5a5089cb6570 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -38,7 +38,7 @@
#include "mgmt_util.h"
#define MGMT_VERSION 1
-#define MGMT_REVISION 10
+#define MGMT_REVISION 11
static const u16 mgmt_commands[] = {
MGMT_OP_READ_INDEX_LIST,
@@ -102,6 +102,8 @@ static const u16 mgmt_commands[] = {
MGMT_OP_READ_ADV_FEATURES,
MGMT_OP_ADD_ADVERTISING,
MGMT_OP_REMOVE_ADVERTISING,
+ MGMT_OP_GET_ADV_SIZE_INFO,
+ MGMT_OP_START_LIMITED_DISCOVERY,
};
static const u16 mgmt_events[] = {
@@ -718,116 +720,6 @@ static u32 get_current_settings(struct hci_dev *hdev)
return settings;
}
-#define PNP_INFO_SVCLASS_ID 0x1200
-
-static u8 *create_uuid16_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len)
-{
- u8 *ptr = data, *uuids_start = NULL;
- struct bt_uuid *uuid;
-
- if (len < 4)
- return ptr;
-
- list_for_each_entry(uuid, &hdev->uuids, list) {
- u16 uuid16;
-
- if (uuid->size != 16)
- continue;
-
- uuid16 = get_unaligned_le16(&uuid->uuid[12]);
- if (uuid16 < 0x1100)
- continue;
-
- if (uuid16 == PNP_INFO_SVCLASS_ID)
- continue;
-
- if (!uuids_start) {
- uuids_start = ptr;
- uuids_start[0] = 1;
- uuids_start[1] = EIR_UUID16_ALL;
- ptr += 2;
- }
-
- /* Stop if not enough space to put next UUID */
- if ((ptr - data) + sizeof(u16) > len) {
- uuids_start[1] = EIR_UUID16_SOME;
- break;
- }
-
- *ptr++ = (uuid16 & 0x00ff);
- *ptr++ = (uuid16 & 0xff00) >> 8;
- uuids_start[0] += sizeof(uuid16);
- }
-
- return ptr;
-}
-
-static u8 *create_uuid32_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len)
-{
- u8 *ptr = data, *uuids_start = NULL;
- struct bt_uuid *uuid;
-
- if (len < 6)
- return ptr;
-
- list_for_each_entry(uuid, &hdev->uuids, list) {
- if (uuid->size != 32)
- continue;
-
- if (!uuids_start) {
- uuids_start = ptr;
- uuids_start[0] = 1;
- uuids_start[1] = EIR_UUID32_ALL;
- ptr += 2;
- }
-
- /* Stop if not enough space to put next UUID */
- if ((ptr - data) + sizeof(u32) > len) {
- uuids_start[1] = EIR_UUID32_SOME;
- break;
- }
-
- memcpy(ptr, &uuid->uuid[12], sizeof(u32));
- ptr += sizeof(u32);
- uuids_start[0] += sizeof(u32);
- }
-
- return ptr;
-}
-
-static u8 *create_uuid128_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len)
-{
- u8 *ptr = data, *uuids_start = NULL;
- struct bt_uuid *uuid;
-
- if (len < 18)
- return ptr;
-
- list_for_each_entry(uuid, &hdev->uuids, list) {
- if (uuid->size != 128)
- continue;
-
- if (!uuids_start) {
- uuids_start = ptr;
- uuids_start[0] = 1;
- uuids_start[1] = EIR_UUID128_ALL;
- ptr += 2;
- }
-
- /* Stop if not enough space to put next UUID */
- if ((ptr - data) + 16 > len) {
- uuids_start[1] = EIR_UUID128_SOME;
- break;
- }
-
- memcpy(ptr, uuid->uuid, 16);
- ptr += 16;
- uuids_start[0] += 16;
- }
-
- return ptr;
-}
-
static struct mgmt_pending_cmd *pending_find(u16 opcode, struct hci_dev *hdev)
{
return mgmt_pending_find(HCI_CHANNEL_CONTROL, opcode, hdev);
@@ -840,98 +732,7 @@ static struct mgmt_pending_cmd *pending_find_data(u16 opcode,
return mgmt_pending_find_data(HCI_CHANNEL_CONTROL, opcode, hdev, data);
}
-static u8 get_current_adv_instance(struct hci_dev *hdev)
-{
- /* The "Set Advertising" setting supersedes the "Add Advertising"
- * setting. Here we set the advertising data based on which
- * setting was set. When neither apply, default to the global settings,
- * represented by instance "0".
- */
- if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) &&
- !hci_dev_test_flag(hdev, HCI_ADVERTISING))
- return hdev->cur_adv_instance;
-
- return 0x00;
-}
-
-static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr)
-{
- u8 ad_len = 0;
- size_t name_len;
-
- name_len = strlen(hdev->dev_name);
- if (name_len > 0) {
- size_t max_len = HCI_MAX_AD_LENGTH - ad_len - 2;
-
- if (name_len > max_len) {
- name_len = max_len;
- ptr[1] = EIR_NAME_SHORT;
- } else
- ptr[1] = EIR_NAME_COMPLETE;
-
- ptr[0] = name_len + 1;
-
- memcpy(ptr + 2, hdev->dev_name, name_len);
-
- ad_len += (name_len + 2);
- ptr += (name_len + 2);
- }
-
- return ad_len;
-}
-
-static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance,
- u8 *ptr)
-{
- struct adv_info *adv_instance;
-
- adv_instance = hci_find_adv_instance(hdev, instance);
- if (!adv_instance)
- return 0;
-
- /* TODO: Set the appropriate entries based on advertising instance flags
- * here once flags other than 0 are supported.
- */
- memcpy(ptr, adv_instance->scan_rsp_data,
- adv_instance->scan_rsp_len);
-
- return adv_instance->scan_rsp_len;
-}
-
-static void update_inst_scan_rsp_data(struct hci_request *req, u8 instance)
-{
- struct hci_dev *hdev = req->hdev;
- struct hci_cp_le_set_scan_rsp_data cp;
- u8 len;
-
- if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
- return;
-
- memset(&cp, 0, sizeof(cp));
-
- if (instance)
- len = create_instance_scan_rsp_data(hdev, instance, cp.data);
- else
- len = create_default_scan_rsp_data(hdev, cp.data);
-
- if (hdev->scan_rsp_data_len == len &&
- !memcmp(cp.data, hdev->scan_rsp_data, len))
- return;
-
- memcpy(hdev->scan_rsp_data, cp.data, sizeof(cp.data));
- hdev->scan_rsp_data_len = len;
-
- cp.length = len;
-
- hci_req_add(req, HCI_OP_LE_SET_SCAN_RSP_DATA, sizeof(cp), &cp);
-}
-
-static void update_scan_rsp_data(struct hci_request *req)
-{
- update_inst_scan_rsp_data(req, get_current_adv_instance(req->hdev));
-}
-
-static u8 get_adv_discov_flags(struct hci_dev *hdev)
+u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev)
{
struct mgmt_pending_cmd *cmd;
@@ -955,7 +756,7 @@ static u8 get_adv_discov_flags(struct hci_dev *hdev)
return 0;
}
-static bool get_connectable(struct hci_dev *hdev)
+bool mgmt_get_connectable(struct hci_dev *hdev)
{
struct mgmt_pending_cmd *cmd;
@@ -972,344 +773,6 @@ static bool get_connectable(struct hci_dev *hdev)
return hci_dev_test_flag(hdev, HCI_CONNECTABLE);
}
-static u32 get_adv_instance_flags(struct hci_dev *hdev, u8 instance)
-{
- u32 flags;
- struct adv_info *adv_instance;
-
- if (instance == 0x00) {
- /* Instance 0 always manages the "Tx Power" and "Flags"
- * fields
- */
- flags = MGMT_ADV_FLAG_TX_POWER | MGMT_ADV_FLAG_MANAGED_FLAGS;
-
- /* For instance 0, the HCI_ADVERTISING_CONNECTABLE setting
- * corresponds to the "connectable" instance flag.
- */
- if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE))
- flags |= MGMT_ADV_FLAG_CONNECTABLE;
-
- return flags;
- }
-
- adv_instance = hci_find_adv_instance(hdev, instance);
-
- /* Return 0 when we got an invalid instance identifier. */
- if (!adv_instance)
- return 0;
-
- return adv_instance->flags;
-}
-
-static u8 get_cur_adv_instance_scan_rsp_len(struct hci_dev *hdev)
-{
- u8 instance = get_current_adv_instance(hdev);
- struct adv_info *adv_instance;
-
- /* Ignore instance 0 */
- if (instance == 0x00)
- return 0;
-
- adv_instance = hci_find_adv_instance(hdev, instance);
- if (!adv_instance)
- return 0;
-
- /* TODO: Take into account the "appearance" and "local-name" flags here.
- * These are currently being ignored as they are not supported.
- */
- return adv_instance->scan_rsp_len;
-}
-
-static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
-{
- struct adv_info *adv_instance = NULL;
- u8 ad_len = 0, flags = 0;
- u32 instance_flags;
-
- /* Return 0 when the current instance identifier is invalid. */
- if (instance) {
- adv_instance = hci_find_adv_instance(hdev, instance);
- if (!adv_instance)
- return 0;
- }
-
- instance_flags = get_adv_instance_flags(hdev, instance);
-
- /* The Add Advertising command allows userspace to set both the general
- * and limited discoverable flags.
- */
- if (instance_flags & MGMT_ADV_FLAG_DISCOV)
- flags |= LE_AD_GENERAL;
-
- if (instance_flags & MGMT_ADV_FLAG_LIMITED_DISCOV)
- flags |= LE_AD_LIMITED;
-
- if (flags || (instance_flags & MGMT_ADV_FLAG_MANAGED_FLAGS)) {
- /* If a discovery flag wasn't provided, simply use the global
- * settings.
- */
- if (!flags)
- flags |= get_adv_discov_flags(hdev);
-
- if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
- flags |= LE_AD_NO_BREDR;
-
- /* If flags would still be empty, then there is no need to
- * include the "Flags" AD field".
- */
- if (flags) {
- ptr[0] = 0x02;
- ptr[1] = EIR_FLAGS;
- ptr[2] = flags;
-
- ad_len += 3;
- ptr += 3;
- }
- }
-
- if (adv_instance) {
- memcpy(ptr, adv_instance->adv_data,
- adv_instance->adv_data_len);
- ad_len += adv_instance->adv_data_len;
- ptr += adv_instance->adv_data_len;
- }
-
- /* Provide Tx Power only if we can provide a valid value for it */
- if (hdev->adv_tx_power != HCI_TX_POWER_INVALID &&
- (instance_flags & MGMT_ADV_FLAG_TX_POWER)) {
- ptr[0] = 0x02;
- ptr[1] = EIR_TX_POWER;
- ptr[2] = (u8)hdev->adv_tx_power;
-
- ad_len += 3;
- ptr += 3;
- }
-
- return ad_len;
-}
-
-static void update_inst_adv_data(struct hci_request *req, u8 instance)
-{
- struct hci_dev *hdev = req->hdev;
- struct hci_cp_le_set_adv_data cp;
- u8 len;
-
- if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
- return;
-
- memset(&cp, 0, sizeof(cp));
-
- len = create_instance_adv_data(hdev, instance, cp.data);
-
- /* There's nothing to do if the data hasn't changed */
- if (hdev->adv_data_len == len &&
- memcmp(cp.data, hdev->adv_data, len) == 0)
- return;
-
- memcpy(hdev->adv_data, cp.data, sizeof(cp.data));
- hdev->adv_data_len = len;
-
- cp.length = len;
-
- hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp);
-}
-
-static void update_adv_data(struct hci_request *req)
-{
- update_inst_adv_data(req, get_current_adv_instance(req->hdev));
-}
-
-int mgmt_update_adv_data(struct hci_dev *hdev)
-{
- struct hci_request req;
-
- hci_req_init(&req, hdev);
- update_adv_data(&req);
-
- return hci_req_run(&req, NULL);
-}
-
-static void create_eir(struct hci_dev *hdev, u8 *data)
-{
- u8 *ptr = data;
- size_t name_len;
-
- name_len = strlen(hdev->dev_name);
-
- if (name_len > 0) {
- /* EIR Data type */
- if (name_len > 48) {
- name_len = 48;
- ptr[1] = EIR_NAME_SHORT;
- } else
- ptr[1] = EIR_NAME_COMPLETE;
-
- /* EIR Data length */
- ptr[0] = name_len + 1;
-
- memcpy(ptr + 2, hdev->dev_name, name_len);
-
- ptr += (name_len + 2);
- }
-
- if (hdev->inq_tx_power != HCI_TX_POWER_INVALID) {
- ptr[0] = 2;
- ptr[1] = EIR_TX_POWER;
- ptr[2] = (u8) hdev->inq_tx_power;
-
- ptr += 3;
- }
-
- if (hdev->devid_source > 0) {
- ptr[0] = 9;
- ptr[1] = EIR_DEVICE_ID;
-
- put_unaligned_le16(hdev->devid_source, ptr + 2);
- put_unaligned_le16(hdev->devid_vendor, ptr + 4);
- put_unaligned_le16(hdev->devid_product, ptr + 6);
- put_unaligned_le16(hdev->devid_version, ptr + 8);
-
- ptr += 10;
- }
-
- ptr = create_uuid16_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data));
- ptr = create_uuid32_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data));
- ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data));
-}
-
-static void update_eir(struct hci_request *req)
-{
- struct hci_dev *hdev = req->hdev;
- struct hci_cp_write_eir cp;
-
- if (!hdev_is_powered(hdev))
- return;
-
- if (!lmp_ext_inq_capable(hdev))
- return;
-
- if (!hci_dev_test_flag(hdev, HCI_SSP_ENABLED))
- return;
-
- if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE))
- return;
-
- memset(&cp, 0, sizeof(cp));
-
- create_eir(hdev, cp.data);
-
- if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0)
- return;
-
- memcpy(hdev->eir, cp.data, sizeof(cp.data));
-
- hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
-}
-
-static u8 get_service_classes(struct hci_dev *hdev)
-{
- struct bt_uuid *uuid;
- u8 val = 0;
-
- list_for_each_entry(uuid, &hdev->uuids, list)
- val |= uuid->svc_hint;
-
- return val;
-}
-
-static void update_class(struct hci_request *req)
-{
- struct hci_dev *hdev = req->hdev;
- u8 cod[3];
-
- BT_DBG("%s", hdev->name);
-
- if (!hdev_is_powered(hdev))
- return;
-
- if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
- return;
-
- if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE))
- return;
-
- cod[0] = hdev->minor_class;
- cod[1] = hdev->major_class;
- cod[2] = get_service_classes(hdev);
-
- if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE))
- cod[1] |= 0x20;
-
- if (memcmp(cod, hdev->dev_class, 3) == 0)
- return;
-
- hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod);
-}
-
-static void disable_advertising(struct hci_request *req)
-{
- u8 enable = 0x00;
-
- hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
-}
-
-static void enable_advertising(struct hci_request *req)
-{
- struct hci_dev *hdev = req->hdev;
- struct hci_cp_le_set_adv_param cp;
- u8 own_addr_type, enable = 0x01;
- bool connectable;
- u8 instance;
- u32 flags;
-
- if (hci_conn_num(hdev, LE_LINK) > 0)
- return;
-
- if (hci_dev_test_flag(hdev, HCI_LE_ADV))
- disable_advertising(req);
-
- /* Clear the HCI_LE_ADV bit temporarily so that the
- * hci_update_random_address knows that it's safe to go ahead
- * and write a new random address. The flag will be set back on
- * as soon as the SET_ADV_ENABLE HCI command completes.
- */
- hci_dev_clear_flag(hdev, HCI_LE_ADV);
-
- instance = get_current_adv_instance(hdev);
- flags = get_adv_instance_flags(hdev, instance);
-
- /* If the "connectable" instance flag was not set, then choose between
- * ADV_IND and ADV_NONCONN_IND based on the global connectable setting.
- */
- connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) ||
- get_connectable(hdev);
-
- /* Set require_privacy to true only when non-connectable
- * advertising is used. In that case it is fine to use a
- * non-resolvable private address.
- */
- if (hci_update_random_address(req, !connectable, &own_addr_type) < 0)
- return;
-
- memset(&cp, 0, sizeof(cp));
- cp.min_interval = cpu_to_le16(hdev->le_adv_min_interval);
- cp.max_interval = cpu_to_le16(hdev->le_adv_max_interval);
-
- if (connectable)
- cp.type = LE_ADV_IND;
- else if (get_cur_adv_instance_scan_rsp_len(hdev))
- cp.type = LE_ADV_SCAN_IND;
- else
- cp.type = LE_ADV_NONCONN_IND;
-
- cp.own_address_type = own_addr_type;
- cp.channel_map = hdev->le_adv_channel_map;
-
- hci_req_add(req, HCI_OP_LE_SET_ADV_PARAM, sizeof(cp), &cp);
-
- hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
-}
-
static void service_cache_off(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev,
@@ -1323,8 +786,8 @@ static void service_cache_off(struct work_struct *work)
hci_dev_lock(hdev);
- update_eir(&req);
- update_class(&req);
+ __hci_req_update_eir(&req);
+ __hci_req_update_class(&req);
hci_dev_unlock(hdev);
@@ -1345,10 +808,11 @@ static void rpa_expired(struct work_struct *work)
return;
/* The generation of a new RPA and programming it into the
- * controller happens in the enable_advertising() function.
+ * controller happens in the hci_req_enable_advertising()
+ * function.
*/
hci_req_init(&req, hdev);
- enable_advertising(&req);
+ __hci_req_enable_advertising(&req);
hci_req_run(&req, NULL);
}
@@ -1416,51 +880,7 @@ static void clean_up_hci_complete(struct hci_dev *hdev, u8 status, u16 opcode)
}
}
-static bool hci_stop_discovery(struct hci_request *req)
-{
- struct hci_dev *hdev = req->hdev;
- struct hci_cp_remote_name_req_cancel cp;
- struct inquiry_entry *e;
-
- switch (hdev->discovery.state) {
- case DISCOVERY_FINDING:
- if (test_bit(HCI_INQUIRY, &hdev->flags))
- hci_req_add(req, HCI_OP_INQUIRY_CANCEL, 0, NULL);
-
- if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) {
- cancel_delayed_work(&hdev->le_scan_disable);
- hci_req_add_le_scan_disable(req);
- }
-
- return true;
-
- case DISCOVERY_RESOLVING:
- e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY,
- NAME_PENDING);
- if (!e)
- break;
-
- bacpy(&cp.bdaddr, &e->data.bdaddr);
- hci_req_add(req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp),
- &cp);
-
- return true;
-
- default:
- /* Passive scanning */
- if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) {
- hci_req_add_le_scan_disable(req);
- return true;
- }
-
- break;
- }
-
- return false;
-}
-
-static void advertising_added(struct sock *sk, struct hci_dev *hdev,
- u8 instance)
+void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance)
{
struct mgmt_ev_advertising_added ev;
@@ -1469,8 +889,8 @@ static void advertising_added(struct sock *sk, struct hci_dev *hdev,
mgmt_event(MGMT_EV_ADVERTISING_ADDED, hdev, &ev, sizeof(ev), sk);
}
-static void advertising_removed(struct sock *sk, struct hci_dev *hdev,
- u8 instance)
+void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev,
+ u8 instance)
{
struct mgmt_ev_advertising_removed ev;
@@ -1479,65 +899,6 @@ static void advertising_removed(struct sock *sk, struct hci_dev *hdev,
mgmt_event(MGMT_EV_ADVERTISING_REMOVED, hdev, &ev, sizeof(ev), sk);
}
-static int schedule_adv_instance(struct hci_request *req, u8 instance,
- bool force) {
- struct hci_dev *hdev = req->hdev;
- struct adv_info *adv_instance = NULL;
- u16 timeout;
-
- if (hci_dev_test_flag(hdev, HCI_ADVERTISING) ||
- !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))
- return -EPERM;
-
- if (hdev->adv_instance_timeout)
- return -EBUSY;
-
- adv_instance = hci_find_adv_instance(hdev, instance);
- if (!adv_instance)
- return -ENOENT;
-
- /* A zero timeout means unlimited advertising. As long as there is
- * only one instance, duration should be ignored. We still set a timeout
- * in case further instances are being added later on.
- *
- * If the remaining lifetime of the instance is more than the duration
- * then the timeout corresponds to the duration, otherwise it will be
- * reduced to the remaining instance lifetime.
- */
- if (adv_instance->timeout == 0 ||
- adv_instance->duration <= adv_instance->remaining_time)
- timeout = adv_instance->duration;
- else
- timeout = adv_instance->remaining_time;
-
- /* The remaining time is being reduced unless the instance is being
- * advertised without time limit.
- */
- if (adv_instance->timeout)
- adv_instance->remaining_time =
- adv_instance->remaining_time - timeout;
-
- hdev->adv_instance_timeout = timeout;
- queue_delayed_work(hdev->workqueue,
- &hdev->adv_instance_expire,
- msecs_to_jiffies(timeout * 1000));
-
- /* If we're just re-scheduling the same instance again then do not
- * execute any HCI commands. This happens when a single instance is
- * being advertised.
- */
- if (!force && hdev->cur_adv_instance == instance &&
- hci_dev_test_flag(hdev, HCI_LE_ADV))
- return 0;
-
- hdev->cur_adv_instance = instance;
- update_adv_data(req);
- update_scan_rsp_data(req);
- enable_advertising(req);
-
- return 0;
-}
-
static void cancel_adv_timeout(struct hci_dev *hdev)
{
if (hdev->adv_instance_timeout) {
@@ -1546,76 +907,6 @@ static void cancel_adv_timeout(struct hci_dev *hdev)
}
}
-/* For a single instance:
- * - force == true: The instance will be removed even when its remaining
- * lifetime is not zero.
- * - force == false: the instance will be deactivated but kept stored unless
- * the remaining lifetime is zero.
- *
- * For instance == 0x00:
- * - force == true: All instances will be removed regardless of their timeout
- * setting.
- * - force == false: Only instances that have a timeout will be removed.
- */
-static void clear_adv_instance(struct hci_dev *hdev, struct hci_request *req,
- u8 instance, bool force)
-{
- struct adv_info *adv_instance, *n, *next_instance = NULL;
- int err;
- u8 rem_inst;
-
- /* Cancel any timeout concerning the removed instance(s). */
- if (!instance || hdev->cur_adv_instance == instance)
- cancel_adv_timeout(hdev);
-
- /* Get the next instance to advertise BEFORE we remove
- * the current one. This can be the same instance again
- * if there is only one instance.
- */
- if (instance && hdev->cur_adv_instance == instance)
- next_instance = hci_get_next_instance(hdev, instance);
-
- if (instance == 0x00) {
- list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances,
- list) {
- if (!(force || adv_instance->timeout))
- continue;
-
- rem_inst = adv_instance->instance;
- err = hci_remove_adv_instance(hdev, rem_inst);
- if (!err)
- advertising_removed(NULL, hdev, rem_inst);
- }
- hdev->cur_adv_instance = 0x00;
- } else {
- adv_instance = hci_find_adv_instance(hdev, instance);
-
- if (force || (adv_instance && adv_instance->timeout &&
- !adv_instance->remaining_time)) {
- /* Don't advertise a removed instance. */
- if (next_instance &&
- next_instance->instance == instance)
- next_instance = NULL;
-
- err = hci_remove_adv_instance(hdev, instance);
- if (!err)
- advertising_removed(NULL, hdev, instance);
- }
- }
-
- if (list_empty(&hdev->adv_instances)) {
- hdev->cur_adv_instance = 0x00;
- hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE);
- }
-
- if (!req || !hdev_is_powered(hdev) ||
- hci_dev_test_flag(hdev, HCI_ADVERTISING))
- return;
-
- if (next_instance)
- schedule_adv_instance(req, next_instance->instance, false);
-}
-
static int clean_up_hci_state(struct hci_dev *hdev)
{
struct hci_request req;
@@ -1631,12 +922,12 @@ static int clean_up_hci_state(struct hci_dev *hdev)
hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
}
- clear_adv_instance(hdev, NULL, 0x00, false);
+ hci_req_clear_adv_instance(hdev, NULL, 0x00, false);
if (hci_dev_test_flag(hdev, HCI_LE_ADV))
- disable_advertising(&req);
+ __hci_req_disable_advertising(&req);
- discov_stopped = hci_stop_discovery(&req);
+ discov_stopped = hci_req_stop_discovery(&req);
list_for_each_entry(conn, &hdev->conn_hash.list, list) {
/* 0x15 == Terminated due to Power Off */
@@ -1671,17 +962,6 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data,
goto failed;
}
- if (hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) {
- cancel_delayed_work(&hdev->power_off);
-
- if (cp->val) {
- mgmt_pending_add(sk, MGMT_OP_SET_POWERED, hdev,
- data, len);
- err = mgmt_powered(hdev, 1);
- goto failed;
- }
- }
-
if (!!cp->val == hdev_is_powered(hdev)) {
err = send_settings_rsp(sk, MGMT_OP_SET_POWERED, hdev);
goto failed;
@@ -1805,13 +1085,9 @@ static u8 mgmt_le_support(struct hci_dev *hdev)
return MGMT_STATUS_SUCCESS;
}
-static void set_discoverable_complete(struct hci_dev *hdev, u8 status,
- u16 opcode)
+void mgmt_set_discoverable_complete(struct hci_dev *hdev, u8 status)
{
struct mgmt_pending_cmd *cmd;
- struct mgmt_mode *cp;
- struct hci_request req;
- bool changed;
BT_DBG("status 0x%02x", status);
@@ -1828,33 +1104,14 @@ static void set_discoverable_complete(struct hci_dev *hdev, u8 status,
goto remove_cmd;
}
- cp = cmd->param;
- if (cp->val) {
- changed = !hci_dev_test_and_set_flag(hdev, HCI_DISCOVERABLE);
-
- if (hdev->discov_timeout > 0) {
- int to = msecs_to_jiffies(hdev->discov_timeout * 1000);
- queue_delayed_work(hdev->workqueue, &hdev->discov_off,
- to);
- }
- } else {
- changed = hci_dev_test_and_clear_flag(hdev, HCI_DISCOVERABLE);
+ if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE) &&
+ hdev->discov_timeout > 0) {
+ int to = msecs_to_jiffies(hdev->discov_timeout * 1000);
+ queue_delayed_work(hdev->req_workqueue, &hdev->discov_off, to);
}
send_settings_rsp(cmd->sk, MGMT_OP_SET_DISCOVERABLE, hdev);
-
- if (changed)
- new_settings(hdev, cmd->sk);
-
- /* When the discoverable mode gets changed, make sure
- * that class of device has the limited discoverable
- * bit correctly set. Also update page scan based on whitelist
- * entries.
- */
- hci_req_init(&req, hdev);
- __hci_update_page_scan(&req);
- update_class(&req);
- hci_req_run(&req, NULL);
+ new_settings(hdev, cmd->sk);
remove_cmd:
mgmt_pending_remove(cmd);
@@ -1868,9 +1125,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data,
{
struct mgmt_cp_set_discoverable *cp = data;
struct mgmt_pending_cmd *cmd;
- struct hci_request req;
u16 timeout;
- u8 scan;
int err;
BT_DBG("request for %s", hdev->name);
@@ -1949,8 +1204,8 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data,
if (cp->val && hdev->discov_timeout > 0) {
int to = msecs_to_jiffies(hdev->discov_timeout * 1000);
- queue_delayed_work(hdev->workqueue, &hdev->discov_off,
- to);
+ queue_delayed_work(hdev->req_workqueue,
+ &hdev->discov_off, to);
}
err = send_settings_rsp(sk, MGMT_OP_SET_DISCOVERABLE, hdev);
@@ -1970,105 +1225,28 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data,
cancel_delayed_work(&hdev->discov_off);
hdev->discov_timeout = timeout;
+ if (cp->val)
+ hci_dev_set_flag(hdev, HCI_DISCOVERABLE);
+ else
+ hci_dev_clear_flag(hdev, HCI_DISCOVERABLE);
+
/* Limited discoverable mode */
if (cp->val == 0x02)
hci_dev_set_flag(hdev, HCI_LIMITED_DISCOVERABLE);
else
hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
- hci_req_init(&req, hdev);
-
- /* The procedure for LE-only controllers is much simpler - just
- * update the advertising data.
- */
- if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
- goto update_ad;
-
- scan = SCAN_PAGE;
-
- if (cp->val) {
- struct hci_cp_write_current_iac_lap hci_cp;
-
- if (cp->val == 0x02) {
- /* Limited discoverable mode */
- hci_cp.num_iac = min_t(u8, hdev->num_iac, 2);
- hci_cp.iac_lap[0] = 0x00; /* LIAC */
- hci_cp.iac_lap[1] = 0x8b;
- hci_cp.iac_lap[2] = 0x9e;
- hci_cp.iac_lap[3] = 0x33; /* GIAC */
- hci_cp.iac_lap[4] = 0x8b;
- hci_cp.iac_lap[5] = 0x9e;
- } else {
- /* General discoverable mode */
- hci_cp.num_iac = 1;
- hci_cp.iac_lap[0] = 0x33; /* GIAC */
- hci_cp.iac_lap[1] = 0x8b;
- hci_cp.iac_lap[2] = 0x9e;
- }
-
- hci_req_add(&req, HCI_OP_WRITE_CURRENT_IAC_LAP,
- (hci_cp.num_iac * 3) + 1, &hci_cp);
-
- scan |= SCAN_INQUIRY;
- } else {
- hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
- }
-
- hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, sizeof(scan), &scan);
-
-update_ad:
- update_adv_data(&req);
-
- err = hci_req_run(&req, set_discoverable_complete);
- if (err < 0)
- mgmt_pending_remove(cmd);
+ queue_work(hdev->req_workqueue, &hdev->discoverable_update);
+ err = 0;
failed:
hci_dev_unlock(hdev);
return err;
}
-static void write_fast_connectable(struct hci_request *req, bool enable)
-{
- struct hci_dev *hdev = req->hdev;
- struct hci_cp_write_page_scan_activity acp;
- u8 type;
-
- if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
- return;
-
- if (hdev->hci_ver < BLUETOOTH_VER_1_2)
- return;
-
- if (enable) {
- type = PAGE_SCAN_TYPE_INTERLACED;
-
- /* 160 msec page scan interval */
- acp.interval = cpu_to_le16(0x0100);
- } else {
- type = PAGE_SCAN_TYPE_STANDARD; /* default */
-
- /* default 1.28 sec page scan */
- acp.interval = cpu_to_le16(0x0800);
- }
-
- acp.window = cpu_to_le16(0x0012);
-
- if (__cpu_to_le16(hdev->page_scan_interval) != acp.interval ||
- __cpu_to_le16(hdev->page_scan_window) != acp.window)
- hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY,
- sizeof(acp), &acp);
-
- if (hdev->page_scan_type != type)
- hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type);
-}
-
-static void set_connectable_complete(struct hci_dev *hdev, u8 status,
- u16 opcode)
+void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status)
{
struct mgmt_pending_cmd *cmd;
- struct mgmt_mode *cp;
- bool conn_changed, discov_changed;
BT_DBG("status 0x%02x", status);
@@ -2084,27 +1262,8 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status,
goto remove_cmd;
}
- cp = cmd->param;
- if (cp->val) {
- conn_changed = !hci_dev_test_and_set_flag(hdev,
- HCI_CONNECTABLE);
- discov_changed = false;
- } else {
- conn_changed = hci_dev_test_and_clear_flag(hdev,
- HCI_CONNECTABLE);
- discov_changed = hci_dev_test_and_clear_flag(hdev,
- HCI_DISCOVERABLE);
- }
-
send_settings_rsp(cmd->sk, MGMT_OP_SET_CONNECTABLE, hdev);
-
- if (conn_changed || discov_changed) {
- new_settings(hdev, cmd->sk);
- hci_update_page_scan(hdev);
- if (discov_changed)
- mgmt_update_adv_data(hdev);
- hci_update_background_scan(hdev);
- }
+ new_settings(hdev, cmd->sk);
remove_cmd:
mgmt_pending_remove(cmd);
@@ -2134,7 +1293,7 @@ static int set_connectable_update_settings(struct hci_dev *hdev,
return err;
if (changed) {
- hci_update_page_scan(hdev);
+ hci_req_update_scan(hdev);
hci_update_background_scan(hdev);
return new_settings(hdev, sk);
}
@@ -2147,8 +1306,6 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
{
struct mgmt_mode *cp = data;
struct mgmt_pending_cmd *cmd;
- struct hci_request req;
- u8 scan;
int err;
BT_DBG("request for %s", hdev->name);
@@ -2182,57 +1339,19 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
goto failed;
}
- hci_req_init(&req, hdev);
-
- /* If BR/EDR is not enabled and we disable advertising as a
- * by-product of disabling connectable, we need to update the
- * advertising flags.
- */
- if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
- if (!cp->val) {
- hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
- hci_dev_clear_flag(hdev, HCI_DISCOVERABLE);
- }
- update_adv_data(&req);
- } else if (cp->val != test_bit(HCI_PSCAN, &hdev->flags)) {
- if (cp->val) {
- scan = SCAN_PAGE;
- } else {
- /* If we don't have any whitelist entries just
- * disable all scanning. If there are entries
- * and we had both page and inquiry scanning
- * enabled then fall back to only page scanning.
- * Otherwise no changes are needed.
- */
- if (list_empty(&hdev->whitelist))
- scan = SCAN_DISABLED;
- else if (test_bit(HCI_ISCAN, &hdev->flags))
- scan = SCAN_PAGE;
- else
- goto no_scan_update;
-
- if (test_bit(HCI_ISCAN, &hdev->flags) &&
- hdev->discov_timeout > 0)
- cancel_delayed_work(&hdev->discov_off);
- }
+ if (cp->val) {
+ hci_dev_set_flag(hdev, HCI_CONNECTABLE);
+ } else {
+ if (hdev->discov_timeout > 0)
+ cancel_delayed_work(&hdev->discov_off);
- hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+ hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
+ hci_dev_clear_flag(hdev, HCI_DISCOVERABLE);
+ hci_dev_clear_flag(hdev, HCI_CONNECTABLE);
}
-no_scan_update:
- /* Update the advertising parameters if necessary */
- if (hci_dev_test_flag(hdev, HCI_ADVERTISING) ||
- hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))
- enable_advertising(&req);
-
- err = hci_req_run(&req, set_connectable_complete);
- if (err < 0) {
- mgmt_pending_remove(cmd);
- if (err == -ENODATA)
- err = set_connectable_update_settings(hdev, sk,
- cp->val);
- goto failed;
- }
+ queue_work(hdev->req_workqueue, &hdev->connectable_update);
+ err = 0;
failed:
hci_dev_unlock(hdev);
@@ -2508,10 +1627,10 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode)
struct hci_request req;
hci_req_init(&req, hdev);
- update_adv_data(&req);
- update_scan_rsp_data(&req);
- __hci_update_background_scan(&req);
+ __hci_req_update_adv_data(&req, 0x00);
+ __hci_req_update_scan_rsp_data(&req, 0x00);
hci_req_run(&req, NULL);
+ hci_update_background_scan(hdev);
}
unlock:
@@ -2560,7 +1679,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
enabled = lmp_host_le_capable(hdev);
if (!val)
- clear_adv_instance(hdev, NULL, 0x00, true);
+ hci_req_clear_adv_instance(hdev, NULL, 0x00, true);
if (!hdev_is_powered(hdev) || val == enabled) {
bool changed = false;
@@ -2607,7 +1726,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
hci_cp.simul = 0x00;
} else {
if (hci_dev_test_flag(hdev, HCI_LE_ADV))
- disable_advertising(&req);
+ __hci_req_disable_advertising(&req);
}
hci_req_add(&req, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(hci_cp),
@@ -2722,8 +1841,8 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
hci_req_init(&req, hdev);
- update_class(&req);
- update_eir(&req);
+ __hci_req_update_class(&req);
+ __hci_req_update_eir(&req);
err = hci_req_run(&req, add_uuid_complete);
if (err < 0) {
@@ -2822,8 +1941,8 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,
update_class:
hci_req_init(&req, hdev);
- update_class(&req);
- update_eir(&req);
+ __hci_req_update_class(&req);
+ __hci_req_update_eir(&req);
err = hci_req_run(&req, remove_uuid_complete);
if (err < 0) {
@@ -2898,10 +2017,10 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data,
hci_dev_unlock(hdev);
cancel_delayed_work_sync(&hdev->service_cache);
hci_dev_lock(hdev);
- update_eir(&req);
+ __hci_req_update_eir(&req);
}
- update_class(&req);
+ __hci_req_update_class(&req);
err = hci_req_run(&req, set_class_complete);
if (err < 0) {
@@ -3561,8 +2680,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
conn = hci_connect_le_scan(hdev, &cp->addr.bdaddr,
addr_type, sec_level,
- HCI_LE_CONN_TIMEOUT,
- HCI_ROLE_MASTER);
+ HCI_LE_CONN_TIMEOUT);
}
if (IS_ERR(conn)) {
@@ -3803,16 +2921,6 @@ static int user_passkey_neg_reply(struct sock *sk, struct hci_dev *hdev,
HCI_OP_USER_PASSKEY_NEG_REPLY, 0);
}
-static void update_name(struct hci_request *req)
-{
- struct hci_dev *hdev = req->hdev;
- struct hci_cp_write_local_name cp;
-
- memcpy(cp.name, hdev->dev_name, sizeof(cp.name));
-
- hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp);
-}
-
static void set_name_complete(struct hci_dev *hdev, u8 status, u16 opcode)
{
struct mgmt_cp_set_local_name *cp;
@@ -3891,15 +2999,15 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data,
hci_req_init(&req, hdev);
if (lmp_bredr_capable(hdev)) {
- update_name(&req);
- update_eir(&req);
+ __hci_req_update_name(&req);
+ __hci_req_update_eir(&req);
}
/* The name is stored in the scan response data and so
* no need to udpate the advertising data here.
*/
if (lmp_le_capable(hdev))
- update_scan_rsp_data(&req);
+ __hci_req_update_scan_rsp_data(&req, hdev->cur_adv_instance);
err = hci_req_run(&req, set_name_complete);
if (err < 0)
@@ -4164,145 +3272,9 @@ done:
return err;
}
-static bool trigger_bredr_inquiry(struct hci_request *req, u8 *status)
-{
- struct hci_dev *hdev = req->hdev;
- struct hci_cp_inquiry cp;
- /* General inquiry access code (GIAC) */
- u8 lap[3] = { 0x33, 0x8b, 0x9e };
-
- *status = mgmt_bredr_support(hdev);
- if (*status)
- return false;
-
- if (hci_dev_test_flag(hdev, HCI_INQUIRY)) {
- *status = MGMT_STATUS_BUSY;
- return false;
- }
-
- hci_inquiry_cache_flush(hdev);
-
- memset(&cp, 0, sizeof(cp));
- memcpy(&cp.lap, lap, sizeof(cp.lap));
- cp.length = DISCOV_BREDR_INQUIRY_LEN;
-
- hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp);
-
- return true;
-}
-
-static bool trigger_le_scan(struct hci_request *req, u16 interval, u8 *status)
-{
- struct hci_dev *hdev = req->hdev;
- struct hci_cp_le_set_scan_param param_cp;
- struct hci_cp_le_set_scan_enable enable_cp;
- u8 own_addr_type;
- int err;
-
- *status = mgmt_le_support(hdev);
- if (*status)
- return false;
-
- if (hci_dev_test_flag(hdev, HCI_LE_ADV)) {
- /* Don't let discovery abort an outgoing connection attempt
- * that's using directed advertising.
- */
- if (hci_lookup_le_connect(hdev)) {
- *status = MGMT_STATUS_REJECTED;
- return false;
- }
-
- cancel_adv_timeout(hdev);
- disable_advertising(req);
- }
-
- /* If controller is scanning, it means the background scanning is
- * running. Thus, we should temporarily stop it in order to set the
- * discovery scanning parameters.
- */
- if (hci_dev_test_flag(hdev, HCI_LE_SCAN))
- hci_req_add_le_scan_disable(req);
-
- /* All active scans will be done with either a resolvable private
- * address (when privacy feature has been enabled) or non-resolvable
- * private address.
- */
- err = hci_update_random_address(req, true, &own_addr_type);
- if (err < 0) {
- *status = MGMT_STATUS_FAILED;
- return false;
- }
-
- memset(&param_cp, 0, sizeof(param_cp));
- param_cp.type = LE_SCAN_ACTIVE;
- param_cp.interval = cpu_to_le16(interval);
- param_cp.window = cpu_to_le16(DISCOV_LE_SCAN_WIN);
- param_cp.own_address_type = own_addr_type;
-
- hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp),
- &param_cp);
-
- memset(&enable_cp, 0, sizeof(enable_cp));
- enable_cp.enable = LE_SCAN_ENABLE;
- enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
-
- hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp),
- &enable_cp);
-
- return true;
-}
-
-static bool trigger_discovery(struct hci_request *req, u8 *status)
-{
- struct hci_dev *hdev = req->hdev;
-
- switch (hdev->discovery.type) {
- case DISCOV_TYPE_BREDR:
- if (!trigger_bredr_inquiry(req, status))
- return false;
- break;
-
- case DISCOV_TYPE_INTERLEAVED:
- if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY,
- &hdev->quirks)) {
- /* During simultaneous discovery, we double LE scan
- * interval. We must leave some time for the controller
- * to do BR/EDR inquiry.
- */
- if (!trigger_le_scan(req, DISCOV_LE_SCAN_INT * 2,
- status))
- return false;
-
- if (!trigger_bredr_inquiry(req, status))
- return false;
-
- return true;
- }
-
- if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
- *status = MGMT_STATUS_NOT_SUPPORTED;
- return false;
- }
- /* fall through */
-
- case DISCOV_TYPE_LE:
- if (!trigger_le_scan(req, DISCOV_LE_SCAN_INT, status))
- return false;
- break;
-
- default:
- *status = MGMT_STATUS_INVALID_PARAMS;
- return false;
- }
-
- return true;
-}
-
-static void start_discovery_complete(struct hci_dev *hdev, u8 status,
- u16 opcode)
+void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status)
{
struct mgmt_pending_cmd *cmd;
- unsigned long timeout;
BT_DBG("status %d", status);
@@ -4312,75 +3284,49 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status,
if (!cmd)
cmd = pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev);
+ if (!cmd)
+ cmd = pending_find(MGMT_OP_START_LIMITED_DISCOVERY, hdev);
+
if (cmd) {
cmd->cmd_complete(cmd, mgmt_status(status));
mgmt_pending_remove(cmd);
}
- if (status) {
- hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
- goto unlock;
- }
-
- hci_discovery_set_state(hdev, DISCOVERY_FINDING);
+ hci_dev_unlock(hdev);
+}
- /* If the scan involves LE scan, pick proper timeout to schedule
- * hdev->le_scan_disable that will stop it.
- */
- switch (hdev->discovery.type) {
+static bool discovery_type_is_valid(struct hci_dev *hdev, uint8_t type,
+ uint8_t *mgmt_status)
+{
+ switch (type) {
case DISCOV_TYPE_LE:
- timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT);
+ *mgmt_status = mgmt_le_support(hdev);
+ if (*mgmt_status)
+ return false;
break;
case DISCOV_TYPE_INTERLEAVED:
- /* When running simultaneous discovery, the LE scanning time
- * should occupy the whole discovery time sine BR/EDR inquiry
- * and LE scanning are scheduled by the controller.
- *
- * For interleaving discovery in comparison, BR/EDR inquiry
- * and LE scanning are done sequentially with separate
- * timeouts.
- */
- if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks))
- timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT);
- else
- timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout);
- break;
+ *mgmt_status = mgmt_le_support(hdev);
+ if (*mgmt_status)
+ return false;
+ /* Intentional fall-through */
case DISCOV_TYPE_BREDR:
- timeout = 0;
+ *mgmt_status = mgmt_bredr_support(hdev);
+ if (*mgmt_status)
+ return false;
break;
default:
- BT_ERR("Invalid discovery type %d", hdev->discovery.type);
- timeout = 0;
- break;
- }
-
- if (timeout) {
- /* When service discovery is used and the controller has
- * a strict duplicate filter, it is important to remember
- * the start and duration of the scan. This is required
- * for restarting scanning during the discovery phase.
- */
- if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER,
- &hdev->quirks) &&
- hdev->discovery.result_filtering) {
- hdev->discovery.scan_start = jiffies;
- hdev->discovery.scan_duration = timeout;
- }
-
- queue_delayed_work(hdev->workqueue,
- &hdev->le_scan_disable, timeout);
+ *mgmt_status = MGMT_STATUS_INVALID_PARAMS;
+ return false;
}
-unlock:
- hci_dev_unlock(hdev);
+ return true;
}
-static int start_discovery(struct sock *sk, struct hci_dev *hdev,
- void *data, u16 len)
+static int start_discovery_internal(struct sock *sk, struct hci_dev *hdev,
+ u16 op, void *data, u16 len)
{
struct mgmt_cp_start_discovery *cp = data;
struct mgmt_pending_cmd *cmd;
- struct hci_request req;
u8 status;
int err;
@@ -4389,7 +3335,7 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev,
hci_dev_lock(hdev);
if (!hdev_is_powered(hdev)) {
- err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY,
+ err = mgmt_cmd_complete(sk, hdev->id, op,
MGMT_STATUS_NOT_POWERED,
&cp->type, sizeof(cp->type));
goto failed;
@@ -4397,20 +3343,17 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev,
if (hdev->discovery.state != DISCOVERY_STOPPED ||
hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) {
- err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY,
- MGMT_STATUS_BUSY, &cp->type,
- sizeof(cp->type));
+ err = mgmt_cmd_complete(sk, hdev->id, op, MGMT_STATUS_BUSY,
+ &cp->type, sizeof(cp->type));
goto failed;
}
- cmd = mgmt_pending_add(sk, MGMT_OP_START_DISCOVERY, hdev, data, len);
- if (!cmd) {
- err = -ENOMEM;
+ if (!discovery_type_is_valid(hdev, cp->type, &status)) {
+ err = mgmt_cmd_complete(sk, hdev->id, op, status,
+ &cp->type, sizeof(cp->type));
goto failed;
}
- cmd->cmd_complete = generic_cmd_complete;
-
/* Clear the discovery filter first to free any previously
* allocated memory for the UUID list.
*/
@@ -4418,29 +3361,43 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev,
hdev->discovery.type = cp->type;
hdev->discovery.report_invalid_rssi = false;
+ if (op == MGMT_OP_START_LIMITED_DISCOVERY)
+ hdev->discovery.limited = true;
+ else
+ hdev->discovery.limited = false;
- hci_req_init(&req, hdev);
-
- if (!trigger_discovery(&req, &status)) {
- err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY,
- status, &cp->type, sizeof(cp->type));
- mgmt_pending_remove(cmd);
+ cmd = mgmt_pending_add(sk, op, hdev, data, len);
+ if (!cmd) {
+ err = -ENOMEM;
goto failed;
}
- err = hci_req_run(&req, start_discovery_complete);
- if (err < 0) {
- mgmt_pending_remove(cmd);
- goto failed;
- }
+ cmd->cmd_complete = generic_cmd_complete;
hci_discovery_set_state(hdev, DISCOVERY_STARTING);
+ queue_work(hdev->req_workqueue, &hdev->discov_update);
+ err = 0;
failed:
hci_dev_unlock(hdev);
return err;
}
+static int start_discovery(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 len)
+{
+ return start_discovery_internal(sk, hdev, MGMT_OP_START_DISCOVERY,
+ data, len);
+}
+
+static int start_limited_discovery(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 len)
+{
+ return start_discovery_internal(sk, hdev,
+ MGMT_OP_START_LIMITED_DISCOVERY,
+ data, len);
+}
+
static int service_discovery_cmd_complete(struct mgmt_pending_cmd *cmd,
u8 status)
{
@@ -4453,7 +3410,6 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
{
struct mgmt_cp_start_service_discovery *cp = data;
struct mgmt_pending_cmd *cmd;
- struct hci_request req;
const u16 max_uuid_count = ((U16_MAX - sizeof(*cp)) / 16);
u16 uuid_count, expected_len;
u8 status;
@@ -4502,6 +3458,13 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
goto failed;
}
+ if (!discovery_type_is_valid(hdev, cp->type, &status)) {
+ err = mgmt_cmd_complete(sk, hdev->id,
+ MGMT_OP_START_SERVICE_DISCOVERY,
+ status, &cp->type, sizeof(cp->type));
+ goto failed;
+ }
+
cmd = mgmt_pending_add(sk, MGMT_OP_START_SERVICE_DISCOVERY,
hdev, data, len);
if (!cmd) {
@@ -4534,30 +3497,16 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
}
}
- hci_req_init(&req, hdev);
-
- if (!trigger_discovery(&req, &status)) {
- err = mgmt_cmd_complete(sk, hdev->id,
- MGMT_OP_START_SERVICE_DISCOVERY,
- status, &cp->type, sizeof(cp->type));
- mgmt_pending_remove(cmd);
- goto failed;
- }
-
- err = hci_req_run(&req, start_discovery_complete);
- if (err < 0) {
- mgmt_pending_remove(cmd);
- goto failed;
- }
-
hci_discovery_set_state(hdev, DISCOVERY_STARTING);
+ queue_work(hdev->req_workqueue, &hdev->discov_update);
+ err = 0;
failed:
hci_dev_unlock(hdev);
return err;
}
-static void stop_discovery_complete(struct hci_dev *hdev, u8 status, u16 opcode)
+void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status)
{
struct mgmt_pending_cmd *cmd;
@@ -4571,9 +3520,6 @@ static void stop_discovery_complete(struct hci_dev *hdev, u8 status, u16 opcode)
mgmt_pending_remove(cmd);
}
- if (!status)
- hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
-
hci_dev_unlock(hdev);
}
@@ -4582,7 +3528,6 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data,
{
struct mgmt_cp_stop_discovery *mgmt_cp = data;
struct mgmt_pending_cmd *cmd;
- struct hci_request req;
int err;
BT_DBG("%s", hdev->name);
@@ -4611,24 +3556,9 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data,
cmd->cmd_complete = generic_cmd_complete;
- hci_req_init(&req, hdev);
-
- hci_stop_discovery(&req);
-
- err = hci_req_run(&req, stop_discovery_complete);
- if (!err) {
- hci_discovery_set_state(hdev, DISCOVERY_STOPPING);
- goto unlock;
- }
-
- mgmt_pending_remove(cmd);
-
- /* If no HCI commands were sent we're done */
- if (err == -ENODATA) {
- err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, 0,
- &mgmt_cp->type, sizeof(mgmt_cp->type));
- hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
- }
+ hci_discovery_set_state(hdev, DISCOVERY_STOPPING);
+ queue_work(hdev->req_workqueue, &hdev->discov_update);
+ err = 0;
unlock:
hci_dev_unlock(hdev);
@@ -4776,7 +3706,7 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data,
NULL, 0);
hci_req_init(&req, hdev);
- update_eir(&req);
+ __hci_req_update_eir(&req);
hci_req_run(&req, NULL);
hci_dev_unlock(hdev);
@@ -4826,7 +3756,6 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status,
* set up earlier, then re-enable multi-instance advertising.
*/
if (hci_dev_test_flag(hdev, HCI_ADVERTISING) ||
- !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) ||
list_empty(&hdev->adv_instances))
goto unlock;
@@ -4842,7 +3771,7 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status,
hci_req_init(&req, hdev);
- err = schedule_adv_instance(&req, instance, true);
+ err = __hci_req_schedule_adv_instance(&req, instance, true);
if (!err)
err = hci_req_run(&req, enable_advertising_instance);
@@ -4892,6 +3821,7 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
bool changed;
if (cp->val) {
+ hdev->cur_adv_instance = 0x00;
changed = !hci_dev_test_and_set_flag(hdev, HCI_ADVERTISING);
if (cp->val == 0x02)
hci_dev_set_flag(hdev, HCI_ADVERTISING_CONNECTABLE);
@@ -4939,11 +3869,12 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
* We cannot use update_[adv|scan_rsp]_data() here as the
* HCI_ADVERTISING flag is not yet set.
*/
- update_inst_adv_data(&req, 0x00);
- update_inst_scan_rsp_data(&req, 0x00);
- enable_advertising(&req);
+ hdev->cur_adv_instance = 0x00;
+ __hci_req_update_adv_data(&req, 0x00);
+ __hci_req_update_scan_rsp_data(&req, 0x00);
+ __hci_req_enable_advertising(&req);
} else {
- disable_advertising(&req);
+ __hci_req_disable_advertising(&req);
}
err = hci_req_run(&req, set_advertising_complete);
@@ -5140,7 +4071,7 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev,
hci_req_init(&req, hdev);
- write_fast_connectable(&req, cp->val);
+ __hci_req_write_fast_connectable(&req, cp->val);
err = hci_req_run(&req, fast_connectable_complete);
if (err < 0) {
@@ -5275,20 +4206,20 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
goto unlock;
}
- /* We need to flip the bit already here so that update_adv_data
- * generates the correct flags.
+ /* We need to flip the bit already here so that
+ * hci_req_update_adv_data generates the correct flags.
*/
hci_dev_set_flag(hdev, HCI_BREDR_ENABLED);
hci_req_init(&req, hdev);
- write_fast_connectable(&req, false);
- __hci_update_page_scan(&req);
+ __hci_req_write_fast_connectable(&req, false);
+ __hci_req_update_scan(&req);
/* Since only the advertising data flags will change, there
* is no need to update the scan response data.
*/
- update_adv_data(&req);
+ __hci_req_update_adv_data(&req, hdev->cur_adv_instance);
err = hci_req_run(&req, set_bredr_complete);
if (err < 0)
@@ -6076,10 +5007,9 @@ static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type)
}
/* This function requires the caller holds hdev->lock */
-static int hci_conn_params_set(struct hci_request *req, bdaddr_t *addr,
+static int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr,
u8 addr_type, u8 auto_connect)
{
- struct hci_dev *hdev = req->hdev;
struct hci_conn_params *params;
params = hci_conn_params_add(hdev, addr, addr_type);
@@ -6099,26 +5029,17 @@ static int hci_conn_params_set(struct hci_request *req, bdaddr_t *addr,
*/
if (params->explicit_connect)
list_add(&params->action, &hdev->pend_le_conns);
-
- __hci_update_background_scan(req);
break;
case HCI_AUTO_CONN_REPORT:
if (params->explicit_connect)
list_add(&params->action, &hdev->pend_le_conns);
else
list_add(&params->action, &hdev->pend_le_reports);
- __hci_update_background_scan(req);
break;
case HCI_AUTO_CONN_DIRECT:
case HCI_AUTO_CONN_ALWAYS:
- if (!is_connected(hdev, addr, addr_type)) {
+ if (!is_connected(hdev, addr, addr_type))
list_add(&params->action, &hdev->pend_le_conns);
- /* If we are in scan phase of connecting, we were
- * already added to pend_le_conns and scanning.
- */
- if (params->auto_connect != HCI_AUTO_CONN_EXPLICIT)
- __hci_update_background_scan(req);
- }
break;
}
@@ -6142,31 +5063,10 @@ static void device_added(struct sock *sk, struct hci_dev *hdev,
mgmt_event(MGMT_EV_DEVICE_ADDED, hdev, &ev, sizeof(ev), sk);
}
-static void add_device_complete(struct hci_dev *hdev, u8 status, u16 opcode)
-{
- struct mgmt_pending_cmd *cmd;
-
- BT_DBG("status 0x%02x", status);
-
- hci_dev_lock(hdev);
-
- cmd = pending_find(MGMT_OP_ADD_DEVICE, hdev);
- if (!cmd)
- goto unlock;
-
- cmd->cmd_complete(cmd, mgmt_status(status));
- mgmt_pending_remove(cmd);
-
-unlock:
- hci_dev_unlock(hdev);
-}
-
static int add_device(struct sock *sk, struct hci_dev *hdev,
void *data, u16 len)
{
struct mgmt_cp_add_device *cp = data;
- struct mgmt_pending_cmd *cmd;
- struct hci_request req;
u8 auto_conn, addr_type;
int err;
@@ -6183,24 +5083,15 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
MGMT_STATUS_INVALID_PARAMS,
&cp->addr, sizeof(cp->addr));
- hci_req_init(&req, hdev);
-
hci_dev_lock(hdev);
- cmd = mgmt_pending_add(sk, MGMT_OP_ADD_DEVICE, hdev, data, len);
- if (!cmd) {
- err = -ENOMEM;
- goto unlock;
- }
-
- cmd->cmd_complete = addr_cmd_complete;
-
if (cp->addr.type == BDADDR_BREDR) {
/* Only incoming connections action is supported for now */
if (cp->action != 0x01) {
- err = cmd->cmd_complete(cmd,
- MGMT_STATUS_INVALID_PARAMS);
- mgmt_pending_remove(cmd);
+ err = mgmt_cmd_complete(sk, hdev->id,
+ MGMT_OP_ADD_DEVICE,
+ MGMT_STATUS_INVALID_PARAMS,
+ &cp->addr, sizeof(cp->addr));
goto unlock;
}
@@ -6209,7 +5100,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
if (err)
goto unlock;
- __hci_update_page_scan(&req);
+ hci_req_update_scan(hdev);
goto added;
}
@@ -6229,33 +5120,31 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
* hci_conn_params_lookup.
*/
if (!hci_is_identity_address(&cp->addr.bdaddr, addr_type)) {
- err = cmd->cmd_complete(cmd, MGMT_STATUS_INVALID_PARAMS);
- mgmt_pending_remove(cmd);
+ err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE,
+ MGMT_STATUS_INVALID_PARAMS,
+ &cp->addr, sizeof(cp->addr));
goto unlock;
}
/* If the connection parameters don't exist for this device,
* they will be created and configured with defaults.
*/
- if (hci_conn_params_set(&req, &cp->addr.bdaddr, addr_type,
+ if (hci_conn_params_set(hdev, &cp->addr.bdaddr, addr_type,
auto_conn) < 0) {
- err = cmd->cmd_complete(cmd, MGMT_STATUS_FAILED);
- mgmt_pending_remove(cmd);
+ err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE,
+ MGMT_STATUS_FAILED, &cp->addr,
+ sizeof(cp->addr));
goto unlock;
}
+ hci_update_background_scan(hdev);
+
added:
device_added(sk, hdev, &cp->addr.bdaddr, cp->addr.type, cp->action);
- err = hci_req_run(&req, add_device_complete);
- if (err < 0) {
- /* ENODATA means no HCI commands were needed (e.g. if
- * the adapter is powered off).
- */
- if (err == -ENODATA)
- err = cmd->cmd_complete(cmd, MGMT_STATUS_SUCCESS);
- mgmt_pending_remove(cmd);
- }
+ err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE,
+ MGMT_STATUS_SUCCESS, &cp->addr,
+ sizeof(cp->addr));
unlock:
hci_dev_unlock(hdev);
@@ -6273,55 +5162,25 @@ static void device_removed(struct sock *sk, struct hci_dev *hdev,
mgmt_event(MGMT_EV_DEVICE_REMOVED, hdev, &ev, sizeof(ev), sk);
}
-static void remove_device_complete(struct hci_dev *hdev, u8 status, u16 opcode)
-{
- struct mgmt_pending_cmd *cmd;
-
- BT_DBG("status 0x%02x", status);
-
- hci_dev_lock(hdev);
-
- cmd = pending_find(MGMT_OP_REMOVE_DEVICE, hdev);
- if (!cmd)
- goto unlock;
-
- cmd->cmd_complete(cmd, mgmt_status(status));
- mgmt_pending_remove(cmd);
-
-unlock:
- hci_dev_unlock(hdev);
-}
-
static int remove_device(struct sock *sk, struct hci_dev *hdev,
void *data, u16 len)
{
struct mgmt_cp_remove_device *cp = data;
- struct mgmt_pending_cmd *cmd;
- struct hci_request req;
int err;
BT_DBG("%s", hdev->name);
- hci_req_init(&req, hdev);
-
hci_dev_lock(hdev);
- cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_DEVICE, hdev, data, len);
- if (!cmd) {
- err = -ENOMEM;
- goto unlock;
- }
-
- cmd->cmd_complete = addr_cmd_complete;
-
if (bacmp(&cp->addr.bdaddr, BDADDR_ANY)) {
struct hci_conn_params *params;
u8 addr_type;
if (!bdaddr_type_is_valid(cp->addr.type)) {
- err = cmd->cmd_complete(cmd,
- MGMT_STATUS_INVALID_PARAMS);
- mgmt_pending_remove(cmd);
+ err = mgmt_cmd_complete(sk, hdev->id,
+ MGMT_OP_REMOVE_DEVICE,
+ MGMT_STATUS_INVALID_PARAMS,
+ &cp->addr, sizeof(cp->addr));
goto unlock;
}
@@ -6330,13 +5189,15 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
&cp->addr.bdaddr,
cp->addr.type);
if (err) {
- err = cmd->cmd_complete(cmd,
- MGMT_STATUS_INVALID_PARAMS);
- mgmt_pending_remove(cmd);
+ err = mgmt_cmd_complete(sk, hdev->id,
+ MGMT_OP_REMOVE_DEVICE,
+ MGMT_STATUS_INVALID_PARAMS,
+ &cp->addr,
+ sizeof(cp->addr));
goto unlock;
}
- __hci_update_page_scan(&req);
+ hci_req_update_scan(hdev);
device_removed(sk, hdev, &cp->addr.bdaddr,
cp->addr.type);
@@ -6351,33 +5212,36 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
* hci_conn_params_lookup.
*/
if (!hci_is_identity_address(&cp->addr.bdaddr, addr_type)) {
- err = cmd->cmd_complete(cmd,
- MGMT_STATUS_INVALID_PARAMS);
- mgmt_pending_remove(cmd);
+ err = mgmt_cmd_complete(sk, hdev->id,
+ MGMT_OP_REMOVE_DEVICE,
+ MGMT_STATUS_INVALID_PARAMS,
+ &cp->addr, sizeof(cp->addr));
goto unlock;
}
params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
addr_type);
if (!params) {
- err = cmd->cmd_complete(cmd,
- MGMT_STATUS_INVALID_PARAMS);
- mgmt_pending_remove(cmd);
+ err = mgmt_cmd_complete(sk, hdev->id,
+ MGMT_OP_REMOVE_DEVICE,
+ MGMT_STATUS_INVALID_PARAMS,
+ &cp->addr, sizeof(cp->addr));
goto unlock;
}
if (params->auto_connect == HCI_AUTO_CONN_DISABLED ||
params->auto_connect == HCI_AUTO_CONN_EXPLICIT) {
- err = cmd->cmd_complete(cmd,
- MGMT_STATUS_INVALID_PARAMS);
- mgmt_pending_remove(cmd);
+ err = mgmt_cmd_complete(sk, hdev->id,
+ MGMT_OP_REMOVE_DEVICE,
+ MGMT_STATUS_INVALID_PARAMS,
+ &cp->addr, sizeof(cp->addr));
goto unlock;
}
list_del(&params->action);
list_del(&params->list);
kfree(params);
- __hci_update_background_scan(&req);
+ hci_update_background_scan(hdev);
device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type);
} else {
@@ -6385,9 +5249,10 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
struct bdaddr_list *b, *btmp;
if (cp->addr.type) {
- err = cmd->cmd_complete(cmd,
- MGMT_STATUS_INVALID_PARAMS);
- mgmt_pending_remove(cmd);
+ err = mgmt_cmd_complete(sk, hdev->id,
+ MGMT_OP_REMOVE_DEVICE,
+ MGMT_STATUS_INVALID_PARAMS,
+ &cp->addr, sizeof(cp->addr));
goto unlock;
}
@@ -6397,7 +5262,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
kfree(b);
}
- __hci_update_page_scan(&req);
+ hci_req_update_scan(hdev);
list_for_each_entry_safe(p, tmp, &hdev->le_conn_params, list) {
if (p->auto_connect == HCI_AUTO_CONN_DISABLED)
@@ -6414,20 +5279,13 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
BT_DBG("All LE connection parameters were removed");
- __hci_update_background_scan(&req);
+ hci_update_background_scan(hdev);
}
complete:
- err = hci_req_run(&req, remove_device_complete);
- if (err < 0) {
- /* ENODATA means no HCI commands were needed (e.g. if
- * the adapter is powered off).
- */
- if (err == -ENODATA)
- err = cmd->cmd_complete(cmd, MGMT_STATUS_SUCCESS);
- mgmt_pending_remove(cmd);
- }
-
+ err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE,
+ MGMT_STATUS_SUCCESS, &cp->addr,
+ sizeof(cp->addr));
unlock:
hci_dev_unlock(hdev);
return err;
@@ -6898,7 +5756,7 @@ static int read_local_oob_ext_data(struct sock *sk, struct hci_dev *hdev,
rand, sizeof(rand));
}
- flags = get_adv_discov_flags(hdev);
+ flags = mgmt_get_adv_discov_flags(hdev);
if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
flags |= LE_AD_NO_BREDR;
@@ -6953,10 +5811,10 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev,
{
struct mgmt_rp_read_adv_features *rp;
size_t rp_len;
- int err, i;
- bool instance;
+ int err;
struct adv_info *adv_instance;
u32 supported_flags;
+ u8 *instance;
BT_DBG("%s", hdev->name);
@@ -6966,12 +5824,7 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev,
hci_dev_lock(hdev);
- rp_len = sizeof(*rp);
-
- instance = hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE);
- if (instance)
- rp_len += hdev->adv_instance_cnt;
-
+ rp_len = sizeof(*rp) + hdev->adv_instance_cnt;
rp = kmalloc(rp_len, GFP_ATOMIC);
if (!rp) {
hci_dev_unlock(hdev);
@@ -6984,19 +5837,12 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev,
rp->max_adv_data_len = HCI_MAX_AD_LENGTH;
rp->max_scan_rsp_len = HCI_MAX_AD_LENGTH;
rp->max_instances = HCI_MAX_ADV_INSTANCES;
+ rp->num_instances = hdev->adv_instance_cnt;
- if (instance) {
- i = 0;
- list_for_each_entry(adv_instance, &hdev->adv_instances, list) {
- if (i >= hdev->adv_instance_cnt)
- break;
-
- rp->instance[i] = adv_instance->instance;
- i++;
- }
- rp->num_instances = hdev->adv_instance_cnt;
- } else {
- rp->num_instances = 0;
+ instance = rp->instance;
+ list_for_each_entry(adv_instance, &hdev->adv_instances, list) {
+ *instance = adv_instance->instance;
+ instance++;
}
hci_dev_unlock(hdev);
@@ -7016,17 +5862,19 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data,
int i, cur_len;
bool flags_managed = false;
bool tx_power_managed = false;
- u32 flags_params = MGMT_ADV_FLAG_DISCOV | MGMT_ADV_FLAG_LIMITED_DISCOV |
- MGMT_ADV_FLAG_MANAGED_FLAGS;
- if (is_adv_data && (adv_flags & flags_params)) {
- flags_managed = true;
- max_len -= 3;
- }
+ if (is_adv_data) {
+ if (adv_flags & (MGMT_ADV_FLAG_DISCOV |
+ MGMT_ADV_FLAG_LIMITED_DISCOV |
+ MGMT_ADV_FLAG_MANAGED_FLAGS)) {
+ flags_managed = true;
+ max_len -= 3;
+ }
- if (is_adv_data && (adv_flags & MGMT_ADV_FLAG_TX_POWER)) {
- tx_power_managed = true;
- max_len -= 3;
+ if (adv_flags & MGMT_ADV_FLAG_TX_POWER) {
+ tx_power_managed = true;
+ max_len -= 3;
+ }
}
if (len > max_len)
@@ -7067,9 +5915,6 @@ static void add_advertising_complete(struct hci_dev *hdev, u8 status,
cmd = pending_find(MGMT_OP_ADD_ADVERTISING, hdev);
- if (status)
- hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE);
-
list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, list) {
if (!adv_instance->pending)
continue;
@@ -7085,7 +5930,7 @@ static void add_advertising_complete(struct hci_dev *hdev, u8 status,
cancel_adv_timeout(hdev);
hci_remove_adv_instance(hdev, instance);
- advertising_removed(cmd ? cmd->sk : NULL, hdev, instance);
+ mgmt_advertising_removed(cmd ? cmd->sk : NULL, hdev, instance);
}
if (!cmd)
@@ -7107,31 +5952,6 @@ unlock:
hci_dev_unlock(hdev);
}
-void mgmt_adv_timeout_expired(struct hci_dev *hdev)
-{
- u8 instance;
- struct hci_request req;
-
- hdev->adv_instance_timeout = 0;
-
- instance = get_current_adv_instance(hdev);
- if (instance == 0x00)
- return;
-
- hci_dev_lock(hdev);
- hci_req_init(&req, hdev);
-
- clear_adv_instance(hdev, &req, instance, false);
-
- if (list_empty(&hdev->adv_instances))
- disable_advertising(&req);
-
- if (!skb_queue_empty(&req.cmd_q))
- hci_req_run(&req, NULL);
-
- hci_dev_unlock(hdev);
-}
-
static int add_advertising(struct sock *sk, struct hci_dev *hdev,
void *data, u16 data_len)
{
@@ -7155,6 +5975,10 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
status);
+ if (cp->instance < 1 || cp->instance > HCI_MAX_ADV_INSTANCES)
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
+ MGMT_STATUS_INVALID_PARAMS);
+
flags = __le32_to_cpu(cp->flags);
timeout = __le16_to_cpu(cp->timeout);
duration = __le16_to_cpu(cp->duration);
@@ -7206,9 +6030,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
* actually added.
*/
if (hdev->adv_instance_cnt > prev_instance_cnt)
- advertising_added(sk, hdev, cp->instance);
-
- hci_dev_set_flag(hdev, HCI_ADVERTISING_INSTANCE);
+ mgmt_advertising_added(sk, hdev, cp->instance);
if (hdev->cur_adv_instance == cp->instance) {
/* If the currently advertised instance is being changed then
@@ -7253,7 +6075,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
hci_req_init(&req, hdev);
- err = schedule_adv_instance(&req, schedule_instance, true);
+ err = __hci_req_schedule_adv_instance(&req, schedule_instance, true);
if (!err)
err = hci_req_run(&req, add_advertising_complete);
@@ -7325,7 +6147,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- if (!hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) {
+ if (list_empty(&hdev->adv_instances)) {
err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING,
MGMT_STATUS_INVALID_PARAMS);
goto unlock;
@@ -7333,10 +6155,10 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev,
hci_req_init(&req, hdev);
- clear_adv_instance(hdev, &req, cp->instance, true);
+ hci_req_clear_adv_instance(hdev, &req, cp->instance, true);
if (list_empty(&hdev->adv_instances))
- disable_advertising(&req);
+ __hci_req_disable_advertising(&req);
/* If no HCI commands have been collected so far or the HCI_ADVERTISING
* flag is set or the device isn't powered then we have no HCI
@@ -7369,6 +6191,62 @@ unlock:
return err;
}
+static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data)
+{
+ u8 max_len = HCI_MAX_AD_LENGTH;
+
+ if (is_adv_data) {
+ if (adv_flags & (MGMT_ADV_FLAG_DISCOV |
+ MGMT_ADV_FLAG_LIMITED_DISCOV |
+ MGMT_ADV_FLAG_MANAGED_FLAGS))
+ max_len -= 3;
+
+ if (adv_flags & MGMT_ADV_FLAG_TX_POWER)
+ max_len -= 3;
+ }
+
+ return max_len;
+}
+
+static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 data_len)
+{
+ struct mgmt_cp_get_adv_size_info *cp = data;
+ struct mgmt_rp_get_adv_size_info rp;
+ u32 flags, supported_flags;
+ int err;
+
+ BT_DBG("%s", hdev->name);
+
+ if (!lmp_le_capable(hdev))
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO,
+ MGMT_STATUS_REJECTED);
+
+ if (cp->instance < 1 || cp->instance > HCI_MAX_ADV_INSTANCES)
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ flags = __le32_to_cpu(cp->flags);
+
+ /* The current implementation only supports a subset of the specified
+ * flags.
+ */
+ supported_flags = get_supported_adv_flags(hdev);
+ if (flags & ~supported_flags)
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ rp.instance = cp->instance;
+ rp.flags = cp->flags;
+ rp.max_adv_data_len = tlv_data_max_len(flags, true);
+ rp.max_scan_rsp_len = tlv_data_max_len(flags, false);
+
+ err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO,
+ MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
+
+ return err;
+}
+
static const struct hci_mgmt_handler mgmt_handlers[] = {
{ NULL }, /* 0x0000 (no command) */
{ read_version, MGMT_READ_VERSION_SIZE,
@@ -7456,6 +6334,8 @@ static const struct hci_mgmt_handler mgmt_handlers[] = {
{ add_advertising, MGMT_ADD_ADVERTISING_SIZE,
HCI_MGMT_VAR_LEN },
{ remove_advertising, MGMT_REMOVE_ADVERTISING_SIZE },
+ { get_adv_size_info, MGMT_GET_ADV_SIZE_INFO_SIZE },
+ { start_limited_discovery, MGMT_START_DISCOVERY_SIZE },
};
void mgmt_index_added(struct hci_dev *hdev)
@@ -7526,9 +6406,8 @@ void mgmt_index_removed(struct hci_dev *hdev)
}
/* This function requires the caller holds hdev->lock */
-static void restart_le_actions(struct hci_request *req)
+static void restart_le_actions(struct hci_dev *hdev)
{
- struct hci_dev *hdev = req->hdev;
struct hci_conn_params *p;
list_for_each_entry(p, &hdev->le_conn_params, list) {
@@ -7549,141 +6428,35 @@ static void restart_le_actions(struct hci_request *req)
break;
}
}
-
- __hci_update_background_scan(req);
}
-static void powered_complete(struct hci_dev *hdev, u8 status, u16 opcode)
+void mgmt_power_on(struct hci_dev *hdev, int err)
{
struct cmd_lookup match = { NULL, hdev };
- BT_DBG("status 0x%02x", status);
-
- if (!status) {
- /* Register the available SMP channels (BR/EDR and LE) only
- * when successfully powering on the controller. This late
- * registration is required so that LE SMP can clearly
- * decide if the public address or static address is used.
- */
- smp_register(hdev);
- }
+ BT_DBG("err %d", err);
hci_dev_lock(hdev);
+ if (!err) {
+ restart_le_actions(hdev);
+ hci_update_background_scan(hdev);
+ }
+
mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
new_settings(hdev, match.sk);
- hci_dev_unlock(hdev);
-
if (match.sk)
sock_put(match.sk);
-}
-
-static int powered_update_hci(struct hci_dev *hdev)
-{
- struct hci_request req;
- struct adv_info *adv_instance;
- u8 link_sec;
-
- hci_req_init(&req, hdev);
-
- if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED) &&
- !lmp_host_ssp_capable(hdev)) {
- u8 mode = 0x01;
-
- hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode);
-
- if (bredr_sc_enabled(hdev) && !lmp_host_sc_capable(hdev)) {
- u8 support = 0x01;
-
- hci_req_add(&req, HCI_OP_WRITE_SC_SUPPORT,
- sizeof(support), &support);
- }
- }
-
- if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) &&
- lmp_bredr_capable(hdev)) {
- struct hci_cp_write_le_host_supported cp;
-
- cp.le = 0x01;
- cp.simul = 0x00;
-
- /* Check first if we already have the right
- * host state (host features set)
- */
- if (cp.le != lmp_host_le_capable(hdev) ||
- cp.simul != lmp_host_le_br_capable(hdev))
- hci_req_add(&req, HCI_OP_WRITE_LE_HOST_SUPPORTED,
- sizeof(cp), &cp);
- }
-
- if (lmp_le_capable(hdev)) {
- /* Make sure the controller has a good default for
- * advertising data. This also applies to the case
- * where BR/EDR was toggled during the AUTO_OFF phase.
- */
- if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) &&
- (hci_dev_test_flag(hdev, HCI_ADVERTISING) ||
- !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))) {
- update_adv_data(&req);
- update_scan_rsp_data(&req);
- }
-
- if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) &&
- hdev->cur_adv_instance == 0x00 &&
- !list_empty(&hdev->adv_instances)) {
- adv_instance = list_first_entry(&hdev->adv_instances,
- struct adv_info, list);
- hdev->cur_adv_instance = adv_instance->instance;
- }
-
- if (hci_dev_test_flag(hdev, HCI_ADVERTISING))
- enable_advertising(&req);
- else if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) &&
- hdev->cur_adv_instance)
- schedule_adv_instance(&req, hdev->cur_adv_instance,
- true);
- restart_le_actions(&req);
- }
-
- link_sec = hci_dev_test_flag(hdev, HCI_LINK_SECURITY);
- if (link_sec != test_bit(HCI_AUTH, &hdev->flags))
- hci_req_add(&req, HCI_OP_WRITE_AUTH_ENABLE,
- sizeof(link_sec), &link_sec);
-
- if (lmp_bredr_capable(hdev)) {
- if (hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE))
- write_fast_connectable(&req, true);
- else
- write_fast_connectable(&req, false);
- __hci_update_page_scan(&req);
- update_class(&req);
- update_name(&req);
- update_eir(&req);
- }
-
- return hci_req_run(&req, powered_complete);
+ hci_dev_unlock(hdev);
}
-int mgmt_powered(struct hci_dev *hdev, u8 powered)
+void __mgmt_power_off(struct hci_dev *hdev)
{
struct cmd_lookup match = { NULL, hdev };
u8 status, zero_cod[] = { 0, 0, 0 };
- int err;
-
- if (!hci_dev_test_flag(hdev, HCI_MGMT))
- return 0;
-
- if (powered) {
- if (powered_update_hci(hdev) == 0)
- return 0;
-
- mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp,
- &match);
- goto new_settings;
- }
mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
@@ -7705,13 +6478,10 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered)
mgmt_generic_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
zero_cod, sizeof(zero_cod), NULL);
-new_settings:
- err = new_settings(hdev, match.sk);
+ new_settings(hdev, match.sk);
if (match.sk)
sock_put(match.sk);
-
- return err;
}
void mgmt_set_powered_failed(struct hci_dev *hdev, int err)
@@ -7733,43 +6503,6 @@ void mgmt_set_powered_failed(struct hci_dev *hdev, int err)
mgmt_pending_remove(cmd);
}
-void mgmt_discoverable_timeout(struct hci_dev *hdev)
-{
- struct hci_request req;
-
- hci_dev_lock(hdev);
-
- /* When discoverable timeout triggers, then just make sure
- * the limited discoverable flag is cleared. Even in the case
- * of a timeout triggered from general discoverable, it is
- * safe to unconditionally clear the flag.
- */
- hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
- hci_dev_clear_flag(hdev, HCI_DISCOVERABLE);
-
- hci_req_init(&req, hdev);
- if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
- u8 scan = SCAN_PAGE;
- hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE,
- sizeof(scan), &scan);
- }
- update_class(&req);
-
- /* Advertising instances don't use the global discoverable setting, so
- * only update AD if advertising was enabled using Set Advertising.
- */
- if (hci_dev_test_flag(hdev, HCI_ADVERTISING))
- update_adv_data(&req);
-
- hci_req_run(&req, NULL);
-
- hdev->discov_timeout = 0;
-
- new_settings(hdev, NULL);
-
- hci_dev_unlock(hdev);
-}
-
void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key,
bool persistent)
{
@@ -8312,7 +7045,7 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS))
hci_req_add(&req, HCI_OP_WRITE_SSP_DEBUG_MODE,
sizeof(enable), &enable);
- update_eir(&req);
+ __hci_req_update_eir(&req);
} else {
clear_eir(&req);
}
@@ -8452,7 +7185,7 @@ static void restart_le_scan(struct hci_dev *hdev)
hdev->discovery.scan_duration))
return;
- queue_delayed_work(hdev->workqueue, &hdev->le_scan_restart,
+ queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_restart,
DISCOV_LE_RESTART_DELAY);
}
@@ -8527,6 +7260,18 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
return;
}
+ if (hdev->discovery.limited) {
+ /* Check for limited discoverable bit */
+ if (dev_class) {
+ if (!(dev_class[1] & 0x20))
+ return;
+ } else {
+ u8 *flags = eir_get_data(eir, eir_len, EIR_FLAGS, NULL);
+ if (!flags || !(flags[0] & LE_AD_LIMITED))
+ return;
+ }
+ }
+
/* Make sure that the buffer is big enough. The 5 extra bytes
* are for the potential CoD field.
*/
@@ -8556,7 +7301,8 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
/* Copy EIR or advertising data into event */
memcpy(ev->eir, eir, eir_len);
- if (dev_class && !eir_has_data_type(ev->eir, eir_len, EIR_CLASS_OF_DEV))
+ if (dev_class && !eir_get_data(ev->eir, eir_len, EIR_CLASS_OF_DEV,
+ NULL))
eir_len = eir_append_data(ev->eir, eir_len, EIR_CLASS_OF_DEV,
dev_class, 3);
@@ -8606,35 +7352,6 @@ void mgmt_discovering(struct hci_dev *hdev, u8 discovering)
mgmt_event(MGMT_EV_DISCOVERING, hdev, &ev, sizeof(ev), NULL);
}
-static void adv_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode)
-{
- BT_DBG("%s status %u", hdev->name, status);
-}
-
-void mgmt_reenable_advertising(struct hci_dev *hdev)
-{
- struct hci_request req;
- u8 instance;
-
- if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) &&
- !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))
- return;
-
- instance = get_current_adv_instance(hdev);
-
- hci_req_init(&req, hdev);
-
- if (instance) {
- schedule_adv_instance(&req, instance, true);
- } else {
- update_adv_data(&req);
- update_scan_rsp_data(&req);
- enable_advertising(&req);
- }
-
- hci_req_run(&req, adv_enable_complete);
-}
-
static struct hci_mgmt_chan chan = {
.channel = HCI_CHANNEL_CONTROL,
.handler_count = ARRAY_SIZE(mgmt_handlers),
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 29709fbfd1f5..f7eb02f09b54 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -692,11 +692,9 @@ static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s)
static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst)
{
- struct rfcomm_session *s;
- struct list_head *p, *n;
+ struct rfcomm_session *s, *n;
struct l2cap_chan *chan;
- list_for_each_safe(p, n, &session_list) {
- s = list_entry(p, struct rfcomm_session, list);
+ list_for_each_entry_safe(s, n, &session_list, list) {
chan = l2cap_pi(s->sock->sk)->chan;
if ((!bacmp(src, BDADDR_ANY) || !bacmp(&chan->src, src)) &&
@@ -709,16 +707,14 @@ static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst)
static struct rfcomm_session *rfcomm_session_close(struct rfcomm_session *s,
int err)
{
- struct rfcomm_dlc *d;
- struct list_head *p, *n;
+ struct rfcomm_dlc *d, *n;
s->state = BT_CLOSED;
BT_DBG("session %p state %ld err %d", s, s->state, err);
/* Close all dlcs */
- list_for_each_safe(p, n, &s->dlcs) {
- d = list_entry(p, struct rfcomm_dlc, list);
+ list_for_each_entry_safe(d, n, &s->dlcs, list) {
d->state = BT_CLOSED;
__rfcomm_dlc_close(d, err);
}
@@ -1771,13 +1767,11 @@ static struct rfcomm_session *rfcomm_recv_frame(struct rfcomm_session *s,
static void rfcomm_process_connect(struct rfcomm_session *s)
{
- struct rfcomm_dlc *d;
- struct list_head *p, *n;
+ struct rfcomm_dlc *d, *n;
BT_DBG("session %p state %ld", s, s->state);
- list_for_each_safe(p, n, &s->dlcs) {
- d = list_entry(p, struct rfcomm_dlc, list);
+ list_for_each_entry_safe(d, n, &s->dlcs, list) {
if (d->state == BT_CONFIG) {
d->mtu = s->mtu;
if (rfcomm_check_security(d)) {
@@ -1843,14 +1837,11 @@ static int rfcomm_process_tx(struct rfcomm_dlc *d)
static void rfcomm_process_dlcs(struct rfcomm_session *s)
{
- struct rfcomm_dlc *d;
- struct list_head *p, *n;
+ struct rfcomm_dlc *d, *n;
BT_DBG("session %p state %ld", s, s->state);
- list_for_each_safe(p, n, &s->dlcs) {
- d = list_entry(p, struct rfcomm_dlc, list);
-
+ list_for_each_entry_safe(d, n, &s->dlcs, list) {
if (test_bit(RFCOMM_TIMED_OUT, &d->flags)) {
__rfcomm_dlc_close(d, ETIMEDOUT);
continue;
@@ -1985,14 +1976,11 @@ static struct rfcomm_session *rfcomm_check_connection(struct rfcomm_session *s)
static void rfcomm_process_sessions(void)
{
- struct list_head *p, *n;
+ struct rfcomm_session *s, *n;
rfcomm_lock();
- list_for_each_safe(p, n, &session_list) {
- struct rfcomm_session *s;
- s = list_entry(p, struct rfcomm_session, list);
-
+ list_for_each_entry_safe(s, n, &session_list, list) {
if (test_and_clear_bit(RFCOMM_TIMED_OUT, &s->flags)) {
s->state = BT_DISCONN;
rfcomm_send_disc(s, 0);
@@ -2075,15 +2063,12 @@ failed:
static void rfcomm_kill_listener(void)
{
- struct rfcomm_session *s;
- struct list_head *p, *n;
+ struct rfcomm_session *s, *n;
BT_DBG("");
- list_for_each_safe(p, n, &session_list) {
- s = list_entry(p, struct rfcomm_session, list);
+ list_for_each_entry_safe(s, n, &session_list, list)
rfcomm_session_del(s);
- }
}
static int rfcomm_run(void *unused)
@@ -2113,8 +2098,7 @@ static int rfcomm_run(void *unused)
static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
{
struct rfcomm_session *s;
- struct rfcomm_dlc *d;
- struct list_head *p, *n;
+ struct rfcomm_dlc *d, *n;
BT_DBG("conn %p status 0x%02x encrypt 0x%02x", conn, status, encrypt);
@@ -2122,9 +2106,7 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
if (!s)
return;
- list_for_each_safe(p, n, &s->dlcs) {
- d = list_entry(p, struct rfcomm_dlc, list);
-
+ list_for_each_entry_safe(d, n, &s->dlcs, list) {
if (test_and_clear_bit(RFCOMM_SEC_PENDING, &d->flags)) {
rfcomm_dlc_clear_timer(d);
if (status || encrypt == 0x00) {
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index fe129663bd3f..f52bcbf2e58c 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -526,6 +526,9 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr,
if (!addr || addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
+ if (addr_len < sizeof(struct sockaddr_sco))
+ return -EINVAL;
+
lock_sock(sk);
if (sk->sk_state != BT_OPEN) {
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index c91353841e40..50976a6481f3 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -21,9 +21,10 @@
*/
#include <linux/debugfs.h>
-#include <linux/crypto.h>
#include <linux/scatterlist.h>
#include <crypto/b128ops.h>
+#include <crypto/hash.h>
+#include <crypto/skcipher.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -87,8 +88,8 @@ struct smp_dev {
u8 min_key_size;
u8 max_key_size;
- struct crypto_blkcipher *tfm_aes;
- struct crypto_hash *tfm_cmac;
+ struct crypto_skcipher *tfm_aes;
+ struct crypto_shash *tfm_cmac;
};
struct smp_chan {
@@ -126,8 +127,8 @@ struct smp_chan {
u8 dhkey[32];
u8 mackey[16];
- struct crypto_blkcipher *tfm_aes;
- struct crypto_hash *tfm_cmac;
+ struct crypto_skcipher *tfm_aes;
+ struct crypto_shash *tfm_cmac;
};
/* These debug key values are defined in the SMP section of the core
@@ -165,12 +166,11 @@ static inline void swap_buf(const u8 *src, u8 *dst, size_t len)
* AES-CMAC, f4, f5, f6, g2 and h6.
*/
-static int aes_cmac(struct crypto_hash *tfm, const u8 k[16], const u8 *m,
+static int aes_cmac(struct crypto_shash *tfm, const u8 k[16], const u8 *m,
size_t len, u8 mac[16])
{
uint8_t tmp[16], mac_msb[16], msg_msb[CMAC_MSG_MAX];
- struct hash_desc desc;
- struct scatterlist sg;
+ SHASH_DESC_ON_STACK(desc, tfm);
int err;
if (len > CMAC_MSG_MAX)
@@ -181,10 +181,8 @@ static int aes_cmac(struct crypto_hash *tfm, const u8 k[16], const u8 *m,
return -EINVAL;
}
- desc.tfm = tfm;
- desc.flags = 0;
-
- crypto_hash_init(&desc);
+ desc->tfm = tfm;
+ desc->flags = 0;
/* Swap key and message from LSB to MSB */
swap_buf(k, tmp, 16);
@@ -193,23 +191,16 @@ static int aes_cmac(struct crypto_hash *tfm, const u8 k[16], const u8 *m,
SMP_DBG("msg (len %zu) %*phN", len, (int) len, m);
SMP_DBG("key %16phN", k);
- err = crypto_hash_setkey(tfm, tmp, 16);
+ err = crypto_shash_setkey(tfm, tmp, 16);
if (err) {
BT_ERR("cipher setkey failed: %d", err);
return err;
}
- sg_init_one(&sg, msg_msb, len);
-
- err = crypto_hash_update(&desc, &sg, len);
- if (err) {
- BT_ERR("Hash update error %d", err);
- return err;
- }
-
- err = crypto_hash_final(&desc, mac_msb);
+ err = crypto_shash_digest(desc, msg_msb, len, mac_msb);
+ shash_desc_zero(desc);
if (err) {
- BT_ERR("Hash final error %d", err);
+ BT_ERR("Hash computation error %d", err);
return err;
}
@@ -220,8 +211,8 @@ static int aes_cmac(struct crypto_hash *tfm, const u8 k[16], const u8 *m,
return 0;
}
-static int smp_f4(struct crypto_hash *tfm_cmac, const u8 u[32], const u8 v[32],
- const u8 x[16], u8 z, u8 res[16])
+static int smp_f4(struct crypto_shash *tfm_cmac, const u8 u[32],
+ const u8 v[32], const u8 x[16], u8 z, u8 res[16])
{
u8 m[65];
int err;
@@ -243,7 +234,7 @@ static int smp_f4(struct crypto_hash *tfm_cmac, const u8 u[32], const u8 v[32],
return err;
}
-static int smp_f5(struct crypto_hash *tfm_cmac, const u8 w[32],
+static int smp_f5(struct crypto_shash *tfm_cmac, const u8 w[32],
const u8 n1[16], const u8 n2[16], const u8 a1[7],
const u8 a2[7], u8 mackey[16], u8 ltk[16])
{
@@ -296,7 +287,7 @@ static int smp_f5(struct crypto_hash *tfm_cmac, const u8 w[32],
return 0;
}
-static int smp_f6(struct crypto_hash *tfm_cmac, const u8 w[16],
+static int smp_f6(struct crypto_shash *tfm_cmac, const u8 w[16],
const u8 n1[16], const u8 n2[16], const u8 r[16],
const u8 io_cap[3], const u8 a1[7], const u8 a2[7],
u8 res[16])
@@ -324,7 +315,7 @@ static int smp_f6(struct crypto_hash *tfm_cmac, const u8 w[16],
return err;
}
-static int smp_g2(struct crypto_hash *tfm_cmac, const u8 u[32], const u8 v[32],
+static int smp_g2(struct crypto_shash *tfm_cmac, const u8 u[32], const u8 v[32],
const u8 x[16], const u8 y[16], u32 *val)
{
u8 m[80], tmp[16];
@@ -350,7 +341,7 @@ static int smp_g2(struct crypto_hash *tfm_cmac, const u8 u[32], const u8 v[32],
return 0;
}
-static int smp_h6(struct crypto_hash *tfm_cmac, const u8 w[16],
+static int smp_h6(struct crypto_shash *tfm_cmac, const u8 w[16],
const u8 key_id[4], u8 res[16])
{
int err;
@@ -370,9 +361,9 @@ static int smp_h6(struct crypto_hash *tfm_cmac, const u8 w[16],
* s1 and ah.
*/
-static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r)
+static int smp_e(struct crypto_skcipher *tfm, const u8 *k, u8 *r)
{
- struct blkcipher_desc desc;
+ SKCIPHER_REQUEST_ON_STACK(req, tfm);
struct scatterlist sg;
uint8_t tmp[16], data[16];
int err;
@@ -384,13 +375,10 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r)
return -EINVAL;
}
- desc.tfm = tfm;
- desc.flags = 0;
-
/* The most significant octet of key corresponds to k[0] */
swap_buf(k, tmp, 16);
- err = crypto_blkcipher_setkey(tfm, tmp, 16);
+ err = crypto_skcipher_setkey(tfm, tmp, 16);
if (err) {
BT_ERR("cipher setkey failed: %d", err);
return err;
@@ -401,7 +389,12 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r)
sg_init_one(&sg, data, 16);
- err = crypto_blkcipher_encrypt(&desc, &sg, &sg, 16);
+ skcipher_request_set_tfm(req, tfm);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, &sg, &sg, 16, NULL);
+
+ err = crypto_skcipher_encrypt(req);
+ skcipher_request_zero(req);
if (err)
BT_ERR("Encrypt data error %d", err);
@@ -413,7 +406,7 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r)
return err;
}
-static int smp_c1(struct crypto_blkcipher *tfm_aes, const u8 k[16],
+static int smp_c1(struct crypto_skcipher *tfm_aes, const u8 k[16],
const u8 r[16], const u8 preq[7], const u8 pres[7], u8 _iat,
const bdaddr_t *ia, u8 _rat, const bdaddr_t *ra, u8 res[16])
{
@@ -462,7 +455,7 @@ static int smp_c1(struct crypto_blkcipher *tfm_aes, const u8 k[16],
return err;
}
-static int smp_s1(struct crypto_blkcipher *tfm_aes, const u8 k[16],
+static int smp_s1(struct crypto_skcipher *tfm_aes, const u8 k[16],
const u8 r1[16], const u8 r2[16], u8 _r[16])
{
int err;
@@ -478,7 +471,7 @@ static int smp_s1(struct crypto_blkcipher *tfm_aes, const u8 k[16],
return err;
}
-static int smp_ah(struct crypto_blkcipher *tfm, const u8 irk[16],
+static int smp_ah(struct crypto_skcipher *tfm, const u8 irk[16],
const u8 r[3], u8 res[3])
{
u8 _res[16];
@@ -766,8 +759,8 @@ static void smp_chan_destroy(struct l2cap_conn *conn)
kzfree(smp->slave_csrk);
kzfree(smp->link_key);
- crypto_free_blkcipher(smp->tfm_aes);
- crypto_free_hash(smp->tfm_cmac);
+ crypto_free_skcipher(smp->tfm_aes);
+ crypto_free_shash(smp->tfm_cmac);
/* Ensure that we don't leave any debug key around if debug key
* support hasn't been explicitly enabled.
@@ -1072,22 +1065,6 @@ static void smp_notify_keys(struct l2cap_conn *conn)
hcon->dst_type = smp->remote_irk->addr_type;
queue_work(hdev->workqueue, &conn->id_addr_update_work);
}
-
- /* When receiving an indentity resolving key for
- * a remote device that does not use a resolvable
- * private address, just remove the key so that
- * it is possible to use the controller white
- * list for scanning.
- *
- * Userspace will have been told to not store
- * this key at this point. So it is safe to
- * just remove it.
- */
- if (!bacmp(&smp->remote_irk->rpa, BDADDR_ANY)) {
- list_del_rcu(&smp->remote_irk->list);
- kfree_rcu(smp->remote_irk, rcu);
- smp->remote_irk = NULL;
- }
}
if (smp->csrk) {
@@ -1382,17 +1359,17 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
if (!smp)
return NULL;
- smp->tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
+ smp->tfm_aes = crypto_alloc_skcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(smp->tfm_aes)) {
BT_ERR("Unable to create ECB crypto context");
kzfree(smp);
return NULL;
}
- smp->tfm_cmac = crypto_alloc_hash("cmac(aes)", 0, CRYPTO_ALG_ASYNC);
+ smp->tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
if (IS_ERR(smp->tfm_cmac)) {
BT_ERR("Unable to create CMAC crypto context");
- crypto_free_blkcipher(smp->tfm_aes);
+ crypto_free_skcipher(smp->tfm_aes);
kzfree(smp);
return NULL;
}
@@ -3027,8 +3004,13 @@ static void smp_ready_cb(struct l2cap_chan *chan)
BT_DBG("chan %p", chan);
+ /* No need to call l2cap_chan_hold() here since we already own
+ * the reference taken in smp_new_conn_cb(). This is just the
+ * first time that we tie it to a specific pointer. The code in
+ * l2cap_core.c ensures that there's no risk this function wont
+ * get called if smp_new_conn_cb was previously called.
+ */
conn->smp = chan;
- l2cap_chan_hold(chan);
if (hcon->type == ACL_LINK && test_bit(HCI_CONN_ENCRYPT, &hcon->flags))
bredr_pairing(chan);
@@ -3138,8 +3120,8 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
{
struct l2cap_chan *chan;
struct smp_dev *smp;
- struct crypto_blkcipher *tfm_aes;
- struct crypto_hash *tfm_cmac;
+ struct crypto_skcipher *tfm_aes;
+ struct crypto_shash *tfm_cmac;
if (cid == L2CAP_CID_SMP_BREDR) {
smp = NULL;
@@ -3150,17 +3132,17 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
if (!smp)
return ERR_PTR(-ENOMEM);
- tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
+ tfm_aes = crypto_alloc_skcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(tfm_aes)) {
BT_ERR("Unable to create ECB crypto context");
kzfree(smp);
return ERR_CAST(tfm_aes);
}
- tfm_cmac = crypto_alloc_hash("cmac(aes)", 0, CRYPTO_ALG_ASYNC);
+ tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
if (IS_ERR(tfm_cmac)) {
BT_ERR("Unable to create CMAC crypto context");
- crypto_free_blkcipher(tfm_aes);
+ crypto_free_skcipher(tfm_aes);
kzfree(smp);
return ERR_CAST(tfm_cmac);
}
@@ -3174,8 +3156,8 @@ create_chan:
chan = l2cap_chan_create();
if (!chan) {
if (smp) {
- crypto_free_blkcipher(smp->tfm_aes);
- crypto_free_hash(smp->tfm_cmac);
+ crypto_free_skcipher(smp->tfm_aes);
+ crypto_free_shash(smp->tfm_cmac);
kzfree(smp);
}
return ERR_PTR(-ENOMEM);
@@ -3221,10 +3203,8 @@ static void smp_del_chan(struct l2cap_chan *chan)
smp = chan->data;
if (smp) {
chan->data = NULL;
- if (smp->tfm_aes)
- crypto_free_blkcipher(smp->tfm_aes);
- if (smp->tfm_cmac)
- crypto_free_hash(smp->tfm_cmac);
+ crypto_free_skcipher(smp->tfm_aes);
+ crypto_free_shash(smp->tfm_cmac);
kzfree(smp);
}
@@ -3460,7 +3440,7 @@ void smp_unregister(struct hci_dev *hdev)
#if IS_ENABLED(CONFIG_BT_SELFTEST_SMP)
-static int __init test_ah(struct crypto_blkcipher *tfm_aes)
+static int __init test_ah(struct crypto_skcipher *tfm_aes)
{
const u8 irk[16] = {
0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34,
@@ -3480,7 +3460,7 @@ static int __init test_ah(struct crypto_blkcipher *tfm_aes)
return 0;
}
-static int __init test_c1(struct crypto_blkcipher *tfm_aes)
+static int __init test_c1(struct crypto_skcipher *tfm_aes)
{
const u8 k[16] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -3510,7 +3490,7 @@ static int __init test_c1(struct crypto_blkcipher *tfm_aes)
return 0;
}
-static int __init test_s1(struct crypto_blkcipher *tfm_aes)
+static int __init test_s1(struct crypto_skcipher *tfm_aes)
{
const u8 k[16] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -3535,7 +3515,7 @@ static int __init test_s1(struct crypto_blkcipher *tfm_aes)
return 0;
}
-static int __init test_f4(struct crypto_hash *tfm_cmac)
+static int __init test_f4(struct crypto_shash *tfm_cmac)
{
const u8 u[32] = {
0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc,
@@ -3567,7 +3547,7 @@ static int __init test_f4(struct crypto_hash *tfm_cmac)
return 0;
}
-static int __init test_f5(struct crypto_hash *tfm_cmac)
+static int __init test_f5(struct crypto_shash *tfm_cmac)
{
const u8 w[32] = {
0x98, 0xa6, 0xbf, 0x73, 0xf3, 0x34, 0x8d, 0x86,
@@ -3604,7 +3584,7 @@ static int __init test_f5(struct crypto_hash *tfm_cmac)
return 0;
}
-static int __init test_f6(struct crypto_hash *tfm_cmac)
+static int __init test_f6(struct crypto_shash *tfm_cmac)
{
const u8 w[16] = {
0x20, 0x6e, 0x63, 0xce, 0x20, 0x6a, 0x3f, 0xfd,
@@ -3637,7 +3617,7 @@ static int __init test_f6(struct crypto_hash *tfm_cmac)
return 0;
}
-static int __init test_g2(struct crypto_hash *tfm_cmac)
+static int __init test_g2(struct crypto_shash *tfm_cmac)
{
const u8 u[32] = {
0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc,
@@ -3669,7 +3649,7 @@ static int __init test_g2(struct crypto_hash *tfm_cmac)
return 0;
}
-static int __init test_h6(struct crypto_hash *tfm_cmac)
+static int __init test_h6(struct crypto_shash *tfm_cmac)
{
const u8 w[16] = {
0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34,
@@ -3706,8 +3686,8 @@ static const struct file_operations test_smp_fops = {
.llseek = default_llseek,
};
-static int __init run_selftests(struct crypto_blkcipher *tfm_aes,
- struct crypto_hash *tfm_cmac)
+static int __init run_selftests(struct crypto_skcipher *tfm_aes,
+ struct crypto_shash *tfm_cmac)
{
ktime_t calltime, delta, rettime;
unsigned long long duration;
@@ -3784,27 +3764,27 @@ done:
int __init bt_selftest_smp(void)
{
- struct crypto_blkcipher *tfm_aes;
- struct crypto_hash *tfm_cmac;
+ struct crypto_skcipher *tfm_aes;
+ struct crypto_shash *tfm_cmac;
int err;
- tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
+ tfm_aes = crypto_alloc_skcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(tfm_aes)) {
BT_ERR("Unable to create ECB crypto context");
return PTR_ERR(tfm_aes);
}
- tfm_cmac = crypto_alloc_hash("cmac(aes)", 0, CRYPTO_ALG_ASYNC);
+ tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(tfm_cmac)) {
BT_ERR("Unable to create CMAC crypto context");
- crypto_free_blkcipher(tfm_aes);
+ crypto_free_skcipher(tfm_aes);
return PTR_ERR(tfm_cmac);
}
err = run_selftests(tfm_aes, tfm_cmac);
- crypto_free_hash(tfm_cmac);
- crypto_free_blkcipher(tfm_aes);
+ crypto_free_shash(tfm_cmac);
+ crypto_free_skcipher(tfm_aes);
return err;
}
diff --git a/net/bridge/br.c b/net/bridge/br.c
index a1abe4936fe1..3addc05b9a16 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -121,6 +121,7 @@ static struct notifier_block br_device_notifier = {
.notifier_call = br_device_event
};
+/* called with RTNL */
static int br_switchdev_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
@@ -130,7 +131,6 @@ static int br_switchdev_event(struct notifier_block *unused,
struct switchdev_notifier_fdb_info *fdb_info;
int err = NOTIFY_DONE;
- rtnl_lock();
p = br_port_get_rtnl(dev);
if (!p)
goto out;
@@ -155,7 +155,6 @@ static int br_switchdev_event(struct notifier_block *unused,
}
out:
- rtnl_unlock();
return err;
}
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 5e88d3e17546..2c8095a5d824 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -28,6 +28,8 @@
const struct nf_br_ops __rcu *nf_br_ops __read_mostly;
EXPORT_SYMBOL_GPL(nf_br_ops);
+static struct lock_class_key bridge_netdev_addr_lock_key;
+
/* net device transmit always called with BH disabled */
netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
@@ -87,6 +89,11 @@ out:
return NETDEV_TX_OK;
}
+static void br_set_lockdep_class(struct net_device *dev)
+{
+ lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key);
+}
+
static int br_dev_init(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
@@ -99,6 +106,7 @@ static int br_dev_init(struct net_device *dev)
err = br_vlan_init(br);
if (err)
free_percpu(br->stats);
+ br_set_lockdep_class(dev);
return err;
}
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index a642bb829d09..dcea4f4c62b3 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -135,6 +135,7 @@ static void fdb_del_external_learn(struct net_bridge_fdb_entry *f)
{
struct switchdev_obj_port_fdb fdb = {
.obj = {
+ .orig_dev = f->dst->dev,
.id = SWITCHDEV_OBJ_ID_PORT_FDB,
.flags = SWITCHDEV_F_DEFER,
},
@@ -722,6 +723,8 @@ int br_fdb_dump(struct sk_buff *skb,
struct net_bridge_fdb_entry *f;
hlist_for_each_entry_rcu(f, &br->hash[i], hlist) {
+ int err;
+
if (idx < cb->args[0])
goto skip;
@@ -740,12 +743,15 @@ int br_fdb_dump(struct sk_buff *skb,
if (!filter_dev && f->dst)
goto skip;
- if (fdb_fill_info(skb, br, f,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWNEIGH,
- NLM_F_MULTI) < 0)
+ err = fdb_fill_info(skb, br, f,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNEIGH,
+ NLM_F_MULTI);
+ if (err < 0) {
+ cb->args[1] = err;
break;
+ }
skip:
++idx;
}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index ec02f5869a78..c367b3e1b5ac 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -493,7 +493,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
dev->priv_flags |= IFF_BRIDGE_PORT;
- err = netdev_master_upper_dev_link(dev, br->dev);
+ err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL);
if (err)
goto err5;
@@ -511,8 +511,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
if (br_fdb_insert(br, p, dev->dev_addr, 0))
netdev_err(dev, "failed insert local address bridge forwarding table\n");
- if (nbp_vlan_init(p))
+ err = nbp_vlan_init(p);
+ if (err) {
netdev_err(dev, "failed to initialize vlan filtering on this port\n");
+ goto err6;
+ }
spin_lock_bh(&br->lock);
changed_addr = br_stp_recalculate_bridge_id(br);
@@ -533,6 +536,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
return 0;
+err6:
+ list_del_rcu(&p->list);
+ br_fdb_delete_by_port(br, p, 0, 1);
+ nbp_update_port_count(br);
+ netdev_upper_dev_unlink(dev, br->dev);
+
err5:
dev->priv_flags &= ~IFF_BRIDGE_PORT;
netdev_rx_handler_unregister(dev);
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index cd8deea2d074..74c278e00225 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -7,6 +7,7 @@
#include <linux/if_ether.h>
#include <net/ip.h>
#include <net/netlink.h>
+#include <net/switchdev.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
#include <net/addrconf.h>
@@ -210,10 +211,32 @@ static inline size_t rtnl_mdb_nlmsg_size(void)
static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry,
int type)
{
+ struct switchdev_obj_port_mdb mdb = {
+ .obj = {
+ .id = SWITCHDEV_OBJ_ID_PORT_MDB,
+ .flags = SWITCHDEV_F_DEFER,
+ },
+ .vid = entry->vid,
+ };
+ struct net_device *port_dev;
struct net *net = dev_net(dev);
struct sk_buff *skb;
int err = -ENOBUFS;
+ port_dev = __dev_get_by_index(net, entry->ifindex);
+ if (entry->addr.proto == htons(ETH_P_IP))
+ ip_eth_mc_map(entry->addr.u.ip4, mdb.addr);
+#if IS_ENABLED(CONFIG_IPV6)
+ else
+ ipv6_eth_mc_map(&entry->addr.u.ip6, mdb.addr);
+#endif
+
+ mdb.obj.orig_dev = port_dev;
+ if (port_dev && type == RTM_NEWMDB)
+ switchdev_port_obj_add(port_dev, &mdb.obj);
+ else if (port_dev && type == RTM_DELMDB)
+ switchdev_port_obj_del(port_dev, &mdb.obj);
+
skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC);
if (!skb)
goto errout;
@@ -402,8 +425,8 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
mp = br_mdb_ip_get(mdb, group);
if (!mp) {
mp = br_multicast_new_group(br, port, group);
- err = PTR_ERR(mp);
- if (IS_ERR(mp))
+ err = PTR_ERR_OR_ZERO(mp);
+ if (err)
return err;
}
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 80c34d70218c..b3cca126b103 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -40,6 +40,7 @@ void br_log_state(const struct net_bridge_port *p)
void br_set_state(struct net_bridge_port *p, unsigned int state)
{
struct switchdev_attr attr = {
+ .orig_dev = p->dev,
.id = SWITCHDEV_ATTR_ID_PORT_STP_STATE,
.flags = SWITCHDEV_F_DEFER,
.u.stp_state = state,
@@ -48,7 +49,7 @@ void br_set_state(struct net_bridge_port *p, unsigned int state)
p->state = state;
err = switchdev_port_attr_set(p->dev, &attr);
- if (err)
+ if (err && err != -EOPNOTSUPP)
br_warn(p->br, "error setting offload STP state on port %u(%s)\n",
(unsigned int) p->port_no, p->dev->name);
}
@@ -570,6 +571,7 @@ int br_set_max_age(struct net_bridge *br, unsigned long val)
int br_set_ageing_time(struct net_bridge *br, u32 ageing_time)
{
struct switchdev_attr attr = {
+ .orig_dev = br->dev,
.id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
.flags = SWITCHDEV_F_SKIP_EOPNOTSUPP,
.u.ageing_time = ageing_time,
@@ -600,12 +602,17 @@ void __br_set_forward_delay(struct net_bridge *br, unsigned long t)
int br_set_forward_delay(struct net_bridge *br, unsigned long val)
{
unsigned long t = clock_t_to_jiffies(val);
-
- if (t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY)
- return -ERANGE;
+ int err = -ERANGE;
spin_lock_bh(&br->lock);
+ if (br->stp_enabled != BR_NO_STP &&
+ (t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY))
+ goto unlock;
+
__br_set_forward_delay(br, t);
+ err = 0;
+
+unlock:
spin_unlock_bh(&br->lock);
- return 0;
+ return err;
}
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index fa53d7a89f48..a31ac6ad76a2 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -37,9 +37,10 @@ static inline port_id br_make_port_id(__u8 priority, __u16 port_no)
void br_init_port(struct net_bridge_port *p)
{
struct switchdev_attr attr = {
+ .orig_dev = p->dev,
.id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
.flags = SWITCHDEV_F_SKIP_EOPNOTSUPP | SWITCHDEV_F_DEFER,
- .u.ageing_time = p->br->ageing_time,
+ .u.ageing_time = jiffies_to_clock_t(p->br->ageing_time),
};
int err;
@@ -50,7 +51,7 @@ void br_init_port(struct net_bridge_port *p)
p->config_pending = 0;
err = switchdev_port_attr_set(p->dev, &attr);
- if (err)
+ if (err && err != -EOPNOTSUPP)
netdev_err(p->dev, "failed to set HW ageing time\n");
}
@@ -142,7 +143,10 @@ static void br_stp_start(struct net_bridge *br)
char *envp[] = { NULL };
struct net_bridge_port *p;
- r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
+ if (net_eq(dev_net(br->dev), &init_net))
+ r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
+ else
+ r = -ENOENT;
spin_lock_bh(&br->lock);
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 8365bd53c421..6b8091407ca3 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -22,7 +22,6 @@
#include "br_private.h"
-#define to_dev(obj) container_of(obj, struct device, kobj)
#define to_bridge(cd) ((struct net_bridge *)netdev_priv(to_net_dev(cd)))
/*
@@ -814,7 +813,7 @@ static ssize_t brforward_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t count)
{
- struct device *dev = to_dev(kobj);
+ struct device *dev = kobj_to_dev(kobj);
struct net_bridge *br = to_bridge(dev);
int n;
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 1394da63614a..85e43af4af7a 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -73,6 +73,7 @@ static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br,
u16 vid, u16 flags)
{
struct switchdev_obj_port_vlan v = {
+ .obj.orig_dev = dev,
.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
.flags = flags,
.vid_begin = vid,
@@ -120,6 +121,7 @@ static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br,
u16 vid)
{
struct switchdev_obj_port_vlan v = {
+ .obj.orig_dev = dev,
.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
.vid_begin = vid,
.vid_end = vid,
@@ -624,9 +626,21 @@ void br_recalculate_fwd_mask(struct net_bridge *br)
int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
{
+ struct switchdev_attr attr = {
+ .orig_dev = br->dev,
+ .id = SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING,
+ .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP,
+ .u.vlan_filtering = val,
+ };
+ int err;
+
if (br->vlan_enabled == val)
return 0;
+ err = switchdev_port_attr_set(br->dev, &attr);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
br->vlan_enabled = val;
br_manage_promisc(br);
recalculate_group_addr(br);
@@ -637,13 +651,15 @@ int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
{
+ int err;
+
if (!rtnl_trylock())
return restart_syscall();
- __br_vlan_filter_toggle(br, val);
+ err = __br_vlan_filter_toggle(br, val);
rtnl_unlock();
- return 0;
+ return err;
}
int __br_vlan_set_proto(struct net_bridge *br, __be16 proto)
@@ -891,6 +907,12 @@ err_rhtbl:
int nbp_vlan_init(struct net_bridge_port *p)
{
+ struct switchdev_attr attr = {
+ .orig_dev = p->br->dev,
+ .id = SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING,
+ .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP,
+ .u.vlan_filtering = p->br->vlan_enabled,
+ };
struct net_bridge_vlan_group *vg;
int ret = -ENOMEM;
@@ -898,6 +920,10 @@ int nbp_vlan_init(struct net_bridge_port *p)
if (!vg)
goto out;
+ ret = switchdev_port_attr_set(p->dev, &attr);
+ if (ret && ret != -EOPNOTSUPP)
+ goto err_vlan_enabled;
+
ret = rhashtable_init(&vg->vlan_hash, &br_vlan_rht_params);
if (ret)
goto err_rhtbl;
@@ -917,6 +943,7 @@ err_vlan_add:
RCU_INIT_POINTER(p->vlgrp, NULL);
synchronize_rcu();
rhashtable_destroy(&vg->vlan_hash);
+err_vlan_enabled:
err_rhtbl:
kfree(vg);
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 17fd5f2cb4b8..98de6e7fd86d 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -65,8 +65,8 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
return false;
if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO))
return false;
- if (!(info->bitmask & ( EBT_IP6_DPORT |
- EBT_IP6_SPORT | EBT_IP6_ICMP6)))
+ if (!(info->bitmask & (EBT_IP6_DPORT |
+ EBT_IP6_SPORT | EBT_IP6_ICMP6)))
return true;
/* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 0ad639a96142..152300d164ac 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -36,14 +36,12 @@ static int ebt_log_tg_check(const struct xt_tgchk_param *par)
return 0;
}
-struct tcpudphdr
-{
+struct tcpudphdr {
__be16 src;
__be16 dst;
};
-struct arppayload
-{
+struct arppayload {
unsigned char mac_src[ETH_ALEN];
unsigned char ip_src[4];
unsigned char mac_dst[ETH_ALEN];
@@ -152,7 +150,8 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
ntohs(ah->ar_op));
/* If it's for Ethernet and the lengths are OK,
- * then log the ARP payload */
+ * then log the ARP payload
+ */
if (ah->ar_hrd == htons(1) &&
ah->ar_hln == ETH_ALEN &&
ah->ar_pln == sizeof(__be32)) {
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 0c40570069ba..6b731e12ecfa 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -41,7 +41,7 @@ struct stp_config_pdu {
#define NR32(p) ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3])
static bool ebt_filter_config(const struct ebt_stp_info *info,
- const struct stp_config_pdu *stpc)
+ const struct stp_config_pdu *stpc)
{
const struct ebt_stp_config_info *c;
uint16_t v16;
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index 618568888128..98c221dbf059 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -66,7 +66,8 @@ ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par)
* - Canonical Format Indicator (CFI). The Canonical Format Indicator
* (CFI) is a single bit flag value. Currently ignored.
* - VLAN Identifier (VID). The VID is encoded as
- * an unsigned binary number. */
+ * an unsigned binary number.
+ */
id = TCI & VLAN_VID_MASK;
prio = (TCI >> 13) & 0x7;
@@ -98,7 +99,8 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
}
/* Check for bitmask range
- * True if even one bit is out of mask */
+ * True if even one bit is out of mask
+ */
if (info->bitmask & ~EBT_VLAN_MASK) {
pr_debug("bitmask %2X is out of mask (%2X)\n",
info->bitmask, EBT_VLAN_MASK);
@@ -117,7 +119,8 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
* 0 - The null VLAN ID.
* 1 - The default Port VID (PVID)
* 0x0FFF - Reserved for implementation use.
- * if_vlan.h: VLAN_N_VID 4096. */
+ * if_vlan.h: VLAN_N_VID 4096.
+ */
if (GET_BITMASK(EBT_VLAN_ID)) {
if (!!info->id) { /* if id!=0 => check vid range */
if (info->id > VLAN_N_VID) {
@@ -128,7 +131,8 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
/* Note: This is valid VLAN-tagged frame point.
* Any value of user_priority are acceptable,
* but should be ignored according to 802.1Q Std.
- * So we just drop the prio flag. */
+ * So we just drop the prio flag.
+ */
info->bitmask &= ~EBT_VLAN_PRIO;
}
/* Else, id=0 (null VLAN ID) => user_priority range (any?) */
@@ -143,7 +147,8 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
}
/* Check for encapsulated proto range - it is possible to be
* any value for u_short range.
- * if_ether.h: ETH_ZLEN 60 - Min. octets in frame sans FCS */
+ * if_ether.h: ETH_ZLEN 60 - Min. octets in frame sans FCS
+ */
if (GET_BITMASK(EBT_VLAN_ENCAP)) {
if ((unsigned short) ntohs(info->encap) < ETH_ZLEN) {
pr_debug("encap frame length %d is less than "
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 32eccd101f26..593a1bdc079e 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -12,7 +12,7 @@
#include <linux/module.h>
#define FILTER_VALID_HOOKS ((1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | \
- (1 << NF_BR_LOCAL_OUT))
+ (1 << NF_BR_LOCAL_OUT))
static struct ebt_entries initial_chains[] = {
{
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index ec55358f00c8..eb33919821ee 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -12,7 +12,7 @@
#include <linux/module.h>
#define NAT_VALID_HOOKS ((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT) | \
- (1 << NF_BR_POST_ROUTING))
+ (1 << NF_BR_POST_ROUTING))
static struct ebt_entries initial_chains[] = {
{
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index f46ca417bf2d..67b2e27999aa 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -35,8 +35,7 @@
"report to author: "format, ## args)
/* #define BUGPRINT(format, args...) */
-/*
- * Each cpu has its own set of counters, so there is no need for write_lock in
+/* Each cpu has its own set of counters, so there is no need for write_lock in
* the softirq
* For reading or updating the counters, the user context needs to
* get a write_lock
@@ -46,7 +45,7 @@
#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
#define COUNTER_OFFSET(n) (SMP_ALIGN(n * sizeof(struct ebt_counter)))
#define COUNTER_BASE(c, n, cpu) ((struct ebt_counter *)(((char *)c) + \
- COUNTER_OFFSET(n) * cpu))
+ COUNTER_OFFSET(n) * cpu))
@@ -126,7 +125,7 @@ ebt_dev_check(const char *entry, const struct net_device *device)
/* process standard matches */
static inline int
ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out)
+ const struct net_device *in, const struct net_device *out)
{
const struct ethhdr *h = eth_hdr(skb);
const struct net_bridge_port *p;
@@ -162,7 +161,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
for (i = 0; i < 6; i++)
verdict |= (h->h_source[i] ^ e->sourcemac[i]) &
e->sourcemsk[i];
- if (FWINV2(verdict != 0, EBT_ISOURCE) )
+ if (FWINV2(verdict != 0, EBT_ISOURCE))
return 1;
}
if (e->bitmask & EBT_DESTMAC) {
@@ -170,7 +169,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
for (i = 0; i < 6; i++)
verdict |= (h->h_dest[i] ^ e->destmac[i]) &
e->destmsk[i];
- if (FWINV2(verdict != 0, EBT_IDEST) )
+ if (FWINV2(verdict != 0, EBT_IDEST))
return 1;
}
return 0;
@@ -237,7 +236,8 @@ unsigned int ebt_do_table(struct sk_buff *skb,
(*(counter_base + i)).bcnt += skb->len;
/* these should only watch: not modify, nor tell us
- what to do with the packet */
+ * what to do with the packet
+ */
EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &acpar);
t = (struct ebt_entry_target *)
@@ -323,7 +323,7 @@ letscontinue:
/* If it succeeds, returns element and locks mutex */
static inline void *
find_inlist_lock_noload(struct list_head *head, const char *name, int *error,
- struct mutex *mutex)
+ struct mutex *mutex)
{
struct {
struct list_head list;
@@ -342,7 +342,7 @@ find_inlist_lock_noload(struct list_head *head, const char *name, int *error,
static void *
find_inlist_lock(struct list_head *head, const char *name, const char *prefix,
- int *error, struct mutex *mutex)
+ int *error, struct mutex *mutex)
{
return try_then_request_module(
find_inlist_lock_noload(head, name, error, mutex),
@@ -451,7 +451,8 @@ static int ebt_verify_pointers(const struct ebt_replace *repl,
if (i != NF_BR_NUMHOOKS || !(e->bitmask & EBT_ENTRY_OR_ENTRIES)) {
if (e->bitmask != 0) {
/* we make userspace set this right,
- so there is no misunderstanding */
+ * so there is no misunderstanding
+ */
BUGPRINT("EBT_ENTRY_OR_ENTRIES shouldn't be set "
"in distinguisher\n");
return -EINVAL;
@@ -487,15 +488,14 @@ static int ebt_verify_pointers(const struct ebt_replace *repl,
return 0;
}
-/*
- * this one is very careful, as it is the first function
+/* this one is very careful, as it is the first function
* to parse the userspace data
*/
static inline int
ebt_check_entry_size_and_hooks(const struct ebt_entry *e,
- const struct ebt_table_info *newinfo,
- unsigned int *n, unsigned int *cnt,
- unsigned int *totalcnt, unsigned int *udc_cnt)
+ const struct ebt_table_info *newinfo,
+ unsigned int *n, unsigned int *cnt,
+ unsigned int *totalcnt, unsigned int *udc_cnt)
{
int i;
@@ -504,10 +504,12 @@ ebt_check_entry_size_and_hooks(const struct ebt_entry *e,
break;
}
/* beginning of a new chain
- if i == NF_BR_NUMHOOKS it must be a user defined chain */
+ * if i == NF_BR_NUMHOOKS it must be a user defined chain
+ */
if (i != NF_BR_NUMHOOKS || !e->bitmask) {
/* this checks if the previous chain has as many entries
- as it said it has */
+ * as it said it has
+ */
if (*n != *cnt) {
BUGPRINT("nentries does not equal the nr of entries "
"in the chain\n");
@@ -549,20 +551,18 @@ ebt_check_entry_size_and_hooks(const struct ebt_entry *e,
return 0;
}
-struct ebt_cl_stack
-{
+struct ebt_cl_stack {
struct ebt_chainstack cs;
int from;
unsigned int hookmask;
};
-/*
- * we need these positions to check that the jumps to a different part of the
+/* We need these positions to check that the jumps to a different part of the
* entries is a jump to the beginning of a new chain.
*/
static inline int
ebt_get_udc_positions(struct ebt_entry *e, struct ebt_table_info *newinfo,
- unsigned int *n, struct ebt_cl_stack *udc)
+ unsigned int *n, struct ebt_cl_stack *udc)
{
int i;
@@ -649,9 +649,9 @@ ebt_cleanup_entry(struct ebt_entry *e, struct net *net, unsigned int *cnt)
static inline int
ebt_check_entry(struct ebt_entry *e, struct net *net,
- const struct ebt_table_info *newinfo,
- const char *name, unsigned int *cnt,
- struct ebt_cl_stack *cl_s, unsigned int udc_cnt)
+ const struct ebt_table_info *newinfo,
+ const char *name, unsigned int *cnt,
+ struct ebt_cl_stack *cl_s, unsigned int udc_cnt)
{
struct ebt_entry_target *t;
struct xt_target *target;
@@ -673,7 +673,7 @@ ebt_check_entry(struct ebt_entry *e, struct net *net,
BUGPRINT("Unknown flag for inv bitmask\n");
return -EINVAL;
}
- if ( (e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3) ) {
+ if ((e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3)) {
BUGPRINT("NOPROTO & 802_3 not allowed\n");
return -EINVAL;
}
@@ -687,7 +687,8 @@ ebt_check_entry(struct ebt_entry *e, struct net *net,
break;
}
/* (1 << NF_BR_NUMHOOKS) tells the check functions the rule is on
- a base chain */
+ * a base chain
+ */
if (i < NF_BR_NUMHOOKS)
hookmask = (1 << hook) | (1 << NF_BR_NUMHOOKS);
else {
@@ -758,13 +759,12 @@ cleanup_matches:
return ret;
}
-/*
- * checks for loops and sets the hook mask for udc
+/* checks for loops and sets the hook mask for udc
* the hook mask for udc tells us from which base chains the udc can be
* accessed. This mask is a parameter to the check() functions of the extensions
*/
static int check_chainloops(const struct ebt_entries *chain, struct ebt_cl_stack *cl_s,
- unsigned int udc_cnt, unsigned int hooknr, char *base)
+ unsigned int udc_cnt, unsigned int hooknr, char *base)
{
int i, chain_nr = -1, pos = 0, nentries = chain->nentries, verdict;
const struct ebt_entry *e = (struct ebt_entry *)chain->data;
@@ -853,7 +853,8 @@ static int translate_table(struct net *net, const char *name,
return -EINVAL;
}
/* make sure chains are ordered after each other in same order
- as their corresponding hooks */
+ * as their corresponding hooks
+ */
for (j = i + 1; j < NF_BR_NUMHOOKS; j++) {
if (!newinfo->hook_entry[j])
continue;
@@ -868,7 +869,8 @@ static int translate_table(struct net *net, const char *name,
i = 0; /* holds the expected nr. of entries for the chain */
j = 0; /* holds the up to now counted entries for the chain */
k = 0; /* holds the total nr. of entries, should equal
- newinfo->nentries afterwards */
+ * newinfo->nentries afterwards
+ */
udc_cnt = 0; /* will hold the nr. of user defined chains (udc) */
ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size,
ebt_check_entry_size_and_hooks, newinfo,
@@ -888,10 +890,12 @@ static int translate_table(struct net *net, const char *name,
}
/* get the location of the udc, put them in an array
- while we're at it, allocate the chainstack */
+ * while we're at it, allocate the chainstack
+ */
if (udc_cnt) {
/* this will get free'd in do_replace()/ebt_register_table()
- if an error occurs */
+ * if an error occurs
+ */
newinfo->chainstack =
vmalloc(nr_cpu_ids * sizeof(*(newinfo->chainstack)));
if (!newinfo->chainstack)
@@ -932,14 +936,15 @@ static int translate_table(struct net *net, const char *name,
}
/* we now know the following (along with E=mc²):
- - the nr of entries in each chain is right
- - the size of the allocated space is right
- - all valid hooks have a corresponding chain
- - there are no loops
- - wrong data can still be on the level of a single entry
- - could be there are jumps to places that are not the
- beginning of a chain. This can only occur in chains that
- are not accessible from any base chains, so we don't care. */
+ * - the nr of entries in each chain is right
+ * - the size of the allocated space is right
+ * - all valid hooks have a corresponding chain
+ * - there are no loops
+ * - wrong data can still be on the level of a single entry
+ * - could be there are jumps to places that are not the
+ * beginning of a chain. This can only occur in chains that
+ * are not accessible from any base chains, so we don't care.
+ */
/* used to know what we need to clean up if something goes wrong */
i = 0;
@@ -955,7 +960,7 @@ static int translate_table(struct net *net, const char *name,
/* called under write_lock */
static void get_counters(const struct ebt_counter *oldcounters,
- struct ebt_counter *counters, unsigned int nentries)
+ struct ebt_counter *counters, unsigned int nentries)
{
int i, cpu;
struct ebt_counter *counter_base;
@@ -986,7 +991,8 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
struct ebt_table *t;
/* the user wants counters back
- the check on the size is done later, when we have the lock */
+ * the check on the size is done later, when we have the lock
+ */
if (repl->num_counters) {
unsigned long size = repl->num_counters * sizeof(*counterstmp);
counterstmp = vmalloc(size);
@@ -1038,9 +1044,10 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
write_unlock_bh(&t->lock);
mutex_unlock(&ebt_mutex);
/* so, a user can change the chains while having messed up her counter
- allocation. Only reason why this is done is because this way the lock
- is held only once, while this doesn't bring the kernel into a
- dangerous state. */
+ * allocation. Only reason why this is done is because this way the lock
+ * is held only once, while this doesn't bring the kernel into a
+ * dangerous state.
+ */
if (repl->num_counters &&
copy_to_user(repl->counters, counterstmp,
repl->num_counters * sizeof(struct ebt_counter))) {
@@ -1342,13 +1349,14 @@ static int update_counters(struct net *net, const void __user *user,
}
static inline int ebt_make_matchname(const struct ebt_entry_match *m,
- const char *base, char __user *ubase)
+ const char *base, char __user *ubase)
{
char __user *hlp = ubase + ((char *)m - base);
char name[EBT_FUNCTION_MAXNAMELEN] = {};
/* ebtables expects 32 bytes long names but xt_match names are 29 bytes
- long. Copy 29 bytes and fill remaining bytes with zeroes. */
+ * long. Copy 29 bytes and fill remaining bytes with zeroes.
+ */
strlcpy(name, m->u.match->name, sizeof(name));
if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN))
return -EFAULT;
@@ -1356,19 +1364,19 @@ static inline int ebt_make_matchname(const struct ebt_entry_match *m,
}
static inline int ebt_make_watchername(const struct ebt_entry_watcher *w,
- const char *base, char __user *ubase)
+ const char *base, char __user *ubase)
{
char __user *hlp = ubase + ((char *)w - base);
char name[EBT_FUNCTION_MAXNAMELEN] = {};
strlcpy(name, w->u.watcher->name, sizeof(name));
- if (copy_to_user(hlp , name, EBT_FUNCTION_MAXNAMELEN))
+ if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN))
return -EFAULT;
return 0;
}
-static inline int
-ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase)
+static inline int ebt_make_names(struct ebt_entry *e, const char *base,
+ char __user *ubase)
{
int ret;
char __user *hlp;
@@ -1394,9 +1402,9 @@ ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase)
}
static int copy_counters_to_user(struct ebt_table *t,
- const struct ebt_counter *oldcounters,
- void __user *user, unsigned int num_counters,
- unsigned int nentries)
+ const struct ebt_counter *oldcounters,
+ void __user *user, unsigned int num_counters,
+ unsigned int nentries)
{
struct ebt_counter *counterstmp;
int ret = 0;
@@ -1427,7 +1435,7 @@ static int copy_counters_to_user(struct ebt_table *t,
/* called with ebt_mutex locked */
static int copy_everything_to_user(struct ebt_table *t, void __user *user,
- const int *len, int cmd)
+ const int *len, int cmd)
{
struct ebt_replace tmp;
const struct ebt_counter *oldcounters;
@@ -1595,8 +1603,7 @@ static int ebt_compat_entry_padsize(void)
static int ebt_compat_match_offset(const struct xt_match *match,
unsigned int userlen)
{
- /*
- * ebt_among needs special handling. The kernel .matchsize is
+ /* ebt_among needs special handling. The kernel .matchsize is
* set to -1 at registration time; at runtime an EBT_ALIGN()ed
* value is expected.
* Example: userspace sends 4500, ebt_among.c wants 4504.
@@ -1966,8 +1973,7 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
return off + match_size;
}
-/*
- * return size of all matches, watchers or target, including necessary
+/* return size of all matches, watchers or target, including necessary
* alignment and padding.
*/
static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
@@ -2070,8 +2076,7 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
if (ret < 0)
return ret;
buf_start = (char *) entry;
- /*
- * 0: matches offset, always follows ebt_entry.
+ /* 0: matches offset, always follows ebt_entry.
* 1: watchers offset, from ebt_entry structure
* 2: target offset, from ebt_entry structure
* 3: next ebt_entry offset, from ebt_entry structure
@@ -2115,8 +2120,7 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
return 0;
}
-/*
- * repl->entries_size is the size of the ebt_entry blob in userspace.
+/* repl->entries_size is the size of the ebt_entry blob in userspace.
* It might need more memory when copied to a 64 bit kernel in case
* userspace is 32-bit. So, first task: find out how much memory is needed.
*
@@ -2305,7 +2309,7 @@ static int compat_do_ebt_set_ctl(struct sock *sk,
break;
default:
ret = -EINVAL;
- }
+ }
return ret;
}
@@ -2360,8 +2364,7 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd,
break;
case EBT_SO_GET_ENTRIES:
case EBT_SO_GET_INIT_ENTRIES:
- /*
- * try real handler first in case of userland-side padding.
+ /* try real handler first in case of userland-side padding.
* in case we are dealing with an 'ordinary' 32 bit binary
* without 64bit compatibility padding, this will fail right
* after copy_from_user when the *len argument is validated.
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
index 62f6b1b19589..7fcdd7261d88 100644
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -141,7 +141,7 @@ err:
static void nf_tables_bridge_exit_net(struct net *net)
{
- nft_unregister_afinfo(net->nft.bridge);
+ nft_unregister_afinfo(net, net->nft.bridge);
kfree(net->nft.bridge);
}
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index a21269b83f16..4b901d9f2e7c 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -84,6 +84,7 @@ static const struct nft_expr_ops nft_meta_bridge_set_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.eval = nft_meta_set_eval,
.init = nft_meta_set_init,
+ .destroy = nft_meta_set_destroy,
.dump = nft_meta_set_dump,
};
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index cc858919108e..aa209b1066c9 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -323,7 +323,7 @@ static long caif_stream_data_wait(struct sock *sk, long timeo)
!timeo)
break;
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
release_sock(sk);
timeo = schedule_timeout(timeo);
lock_sock(sk);
@@ -331,7 +331,7 @@ static long caif_stream_data_wait(struct sock *sk, long timeo)
if (sock_flag(sk, SOCK_DEAD))
break;
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
}
finish_wait(sk_sleep(sk), &wait);
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 61d7617d9249..b82440e1fcb4 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -159,7 +159,7 @@ static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt)
tmppkt = NULL;
/* Verify that length is correct */
- err = EPROTO;
+ err = -EPROTO;
if (rfml->pdu_size != cfpkt_getlen(pkt) - RFM_HEAD_SIZE + 1)
goto out;
}
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index ba6eb17226da..9e43a315e662 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -8,6 +8,7 @@
#include <linux/ceph/decode.h>
#include <linux/ceph/auth.h>
+#include <linux/ceph/libceph.h>
#include <linux/ceph/messenger.h>
#include "crypto.h"
@@ -151,7 +152,6 @@ static int process_one_ticket(struct ceph_auth_client *ac,
void *ticket_buf = NULL;
void *tp, *tpend;
void **ptp;
- struct ceph_timespec new_validity;
struct ceph_crypto_key new_session_key;
struct ceph_buffer *new_ticket_blob;
unsigned long new_expires, new_renew_after;
@@ -192,8 +192,8 @@ static int process_one_ticket(struct ceph_auth_client *ac,
if (ret)
goto out;
- ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
- ceph_decode_timespec(&validity, &new_validity);
+ ceph_decode_timespec(&validity, dp);
+ dp += sizeof(struct ceph_timespec);
new_expires = get_seconds() + validity.tv_sec;
new_renew_after = new_expires - (validity.tv_sec / 4);
dout(" expires=%lu renew_after=%lu\n", new_expires,
@@ -232,10 +232,10 @@ static int process_one_ticket(struct ceph_auth_client *ac,
ceph_buffer_put(th->ticket_blob);
th->session_key = new_session_key;
th->ticket_blob = new_ticket_blob;
- th->validity = new_validity;
th->secret_id = new_secret_id;
th->expires = new_expires;
th->renew_after = new_renew_after;
+ th->have_key = true;
dout(" got ticket service %d (%s) secret_id %lld len %d\n",
type, ceph_entity_type_name(type), th->secret_id,
(int)th->ticket_blob->vec.iov_len);
@@ -279,6 +279,15 @@ bad:
return -EINVAL;
}
+static void ceph_x_authorizer_cleanup(struct ceph_x_authorizer *au)
+{
+ ceph_crypto_key_destroy(&au->session_key);
+ if (au->buf) {
+ ceph_buffer_put(au->buf);
+ au->buf = NULL;
+ }
+}
+
static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
struct ceph_x_ticket_handler *th,
struct ceph_x_authorizer *au)
@@ -297,7 +306,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
ceph_crypto_key_destroy(&au->session_key);
ret = ceph_crypto_key_clone(&au->session_key, &th->session_key);
if (ret)
- return ret;
+ goto out_au;
maxlen = sizeof(*msg_a) + sizeof(msg_b) +
ceph_x_encrypt_buflen(ticket_blob_len);
@@ -309,8 +318,8 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
if (!au->buf) {
au->buf = ceph_buffer_new(maxlen, GFP_NOFS);
if (!au->buf) {
- ceph_crypto_key_destroy(&au->session_key);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out_au;
}
}
au->service = th->service;
@@ -340,7 +349,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
ret = ceph_x_encrypt(&au->session_key, &msg_b, sizeof(msg_b),
p, end - p);
if (ret < 0)
- goto out_buf;
+ goto out_au;
p += ret;
au->buf->vec.iov_len = p - au->buf->vec.iov_base;
dout(" built authorizer nonce %llx len %d\n", au->nonce,
@@ -348,9 +357,8 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
BUG_ON(au->buf->vec.iov_len > maxlen);
return 0;
-out_buf:
- ceph_buffer_put(au->buf);
- au->buf = NULL;
+out_au:
+ ceph_x_authorizer_cleanup(au);
return ret;
}
@@ -375,6 +383,24 @@ bad:
return -ERANGE;
}
+static bool need_key(struct ceph_x_ticket_handler *th)
+{
+ if (!th->have_key)
+ return true;
+
+ return get_seconds() >= th->renew_after;
+}
+
+static bool have_key(struct ceph_x_ticket_handler *th)
+{
+ if (th->have_key) {
+ if (get_seconds() >= th->expires)
+ th->have_key = false;
+ }
+
+ return th->have_key;
+}
+
static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
{
int want = ac->want_keys;
@@ -393,20 +419,18 @@ static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
continue;
th = get_ticket_handler(ac, service);
-
if (IS_ERR(th)) {
*pneed |= service;
continue;
}
- if (get_seconds() >= th->renew_after)
+ if (need_key(th))
*pneed |= service;
- if (get_seconds() >= th->expires)
+ if (!have_key(th))
xi->have_keys &= ~service;
}
}
-
static int ceph_x_build_request(struct ceph_auth_client *ac,
void *buf, void *end)
{
@@ -624,8 +648,7 @@ static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac,
{
struct ceph_x_authorizer *au = (void *)a;
- ceph_crypto_key_destroy(&au->session_key);
- ceph_buffer_put(au->buf);
+ ceph_x_authorizer_cleanup(au);
kfree(au);
}
@@ -653,21 +676,32 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
remove_ticket_handler(ac, th);
}
- if (xi->auth_authorizer.buf)
- ceph_buffer_put(xi->auth_authorizer.buf);
+ ceph_x_authorizer_cleanup(&xi->auth_authorizer);
kfree(ac->private);
ac->private = NULL;
}
-static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
- int peer_type)
+static void invalidate_ticket(struct ceph_auth_client *ac, int peer_type)
{
struct ceph_x_ticket_handler *th;
th = get_ticket_handler(ac, peer_type);
if (!IS_ERR(th))
- memset(&th->validity, 0, sizeof(th->validity));
+ th->have_key = false;
+}
+
+static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
+ int peer_type)
+{
+ /*
+ * We are to invalidate a service ticket in the hopes of
+ * getting a new, hopefully more valid, one. But, we won't get
+ * it unless our AUTH ticket is good, so invalidate AUTH ticket
+ * as well, just in case.
+ */
+ invalidate_ticket(ac, peer_type);
+ invalidate_ticket(ac, CEPH_ENTITY_TYPE_AUTH);
}
static int calcu_signature(struct ceph_x_authorizer *au,
@@ -691,8 +725,10 @@ static int ceph_x_sign_message(struct ceph_auth_handshake *auth,
struct ceph_msg *msg)
{
int ret;
- if (!auth->authorizer)
+
+ if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN))
return 0;
+
ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
msg, &msg->footer.sig);
if (ret < 0)
@@ -707,8 +743,9 @@ static int ceph_x_check_message_signature(struct ceph_auth_handshake *auth,
__le64 sig_check;
int ret;
- if (!auth->authorizer)
+ if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN))
return 0;
+
ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
msg, &sig_check);
if (ret < 0)
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index e8b7c6917d47..40b1a3cf7397 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -16,7 +16,7 @@ struct ceph_x_ticket_handler {
unsigned int service;
struct ceph_crypto_key session_key;
- struct ceph_timespec validity;
+ bool have_key;
u64 secret_id;
struct ceph_buffer *ticket_blob;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 78f098a20796..bcbec33c6a14 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -245,6 +245,8 @@ enum {
Opt_nocrc,
Opt_cephx_require_signatures,
Opt_nocephx_require_signatures,
+ Opt_cephx_sign_messages,
+ Opt_nocephx_sign_messages,
Opt_tcp_nodelay,
Opt_notcp_nodelay,
};
@@ -267,6 +269,8 @@ static match_table_t opt_tokens = {
{Opt_nocrc, "nocrc"},
{Opt_cephx_require_signatures, "cephx_require_signatures"},
{Opt_nocephx_require_signatures, "nocephx_require_signatures"},
+ {Opt_cephx_sign_messages, "cephx_sign_messages"},
+ {Opt_nocephx_sign_messages, "nocephx_sign_messages"},
{Opt_tcp_nodelay, "tcp_nodelay"},
{Opt_notcp_nodelay, "notcp_nodelay"},
{-1, NULL}
@@ -491,6 +495,12 @@ ceph_parse_options(char *options, const char *dev_name,
case Opt_nocephx_require_signatures:
opt->flags |= CEPH_OPT_NOMSGAUTH;
break;
+ case Opt_cephx_sign_messages:
+ opt->flags &= ~CEPH_OPT_NOMSGSIGN;
+ break;
+ case Opt_nocephx_sign_messages:
+ opt->flags |= CEPH_OPT_NOMSGSIGN;
+ break;
case Opt_tcp_nodelay:
opt->flags |= CEPH_OPT_TCP_NODELAY;
@@ -534,6 +544,8 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
seq_puts(m, "nocrc,");
if (opt->flags & CEPH_OPT_NOMSGAUTH)
seq_puts(m, "nocephx_require_signatures,");
+ if (opt->flags & CEPH_OPT_NOMSGSIGN)
+ seq_puts(m, "nocephx_sign_messages,");
if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0)
seq_puts(m, "notcp_nodelay,");
@@ -596,11 +608,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
if (ceph_test_opt(client, MYIP))
myaddr = &client->options->my_addr;
- ceph_messenger_init(&client->msgr, myaddr,
- client->supported_features,
- client->required_features,
- ceph_test_opt(client, NOCRC),
- ceph_test_opt(client, TCP_NODELAY));
+ ceph_messenger_init(&client->msgr, myaddr);
/* subsystems */
err = ceph_monc_init(&client->monc, client);
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 393bfb22d5bb..5fcfb98f309e 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -403,6 +403,7 @@ static int is_out(const struct crush_map *map,
* @local_retries: localized retries
* @local_fallback_retries: localized fallback retries
* @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose)
+ * @stable: stable mode starts rep=0 in the recursive call for all replicas
* @vary_r: pass r to recursive calls
* @out2: second output vector for leaf items (if @recurse_to_leaf)
* @parent_r: r value passed from the parent
@@ -419,6 +420,7 @@ static int crush_choose_firstn(const struct crush_map *map,
unsigned int local_fallback_retries,
int recurse_to_leaf,
unsigned int vary_r,
+ unsigned int stable,
int *out2,
int parent_r)
{
@@ -433,13 +435,13 @@ static int crush_choose_firstn(const struct crush_map *map,
int collide, reject;
int count = out_size;
- dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
+ dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d stable %d\n",
recurse_to_leaf ? "_LEAF" : "",
bucket->id, x, outpos, numrep,
tries, recurse_tries, local_retries, local_fallback_retries,
- parent_r);
+ parent_r, stable);
- for (rep = outpos; rep < numrep && count > 0 ; rep++) {
+ for (rep = stable ? 0 : outpos; rep < numrep && count > 0 ; rep++) {
/* keep trying until we get a non-out, non-colliding item */
ftotal = 0;
skip_rep = 0;
@@ -512,13 +514,14 @@ static int crush_choose_firstn(const struct crush_map *map,
if (crush_choose_firstn(map,
map->buckets[-1-item],
weight, weight_max,
- x, outpos+1, 0,
+ x, stable ? 1 : outpos+1, 0,
out2, outpos, count,
recurse_tries, 0,
local_retries,
local_fallback_retries,
0,
vary_r,
+ stable,
NULL,
sub_r) <= outpos)
/* didn't get leaf */
@@ -816,6 +819,7 @@ int crush_do_rule(const struct crush_map *map,
int choose_local_fallback_retries = map->choose_local_fallback_tries;
int vary_r = map->chooseleaf_vary_r;
+ int stable = map->chooseleaf_stable;
if ((__u32)ruleno >= map->max_rules) {
dprintk(" bad ruleno %d\n", ruleno);
@@ -835,7 +839,8 @@ int crush_do_rule(const struct crush_map *map,
case CRUSH_RULE_TAKE:
if ((curstep->arg1 >= 0 &&
curstep->arg1 < map->max_devices) ||
- (-1-curstep->arg1 < map->max_buckets &&
+ (-1-curstep->arg1 >= 0 &&
+ -1-curstep->arg1 < map->max_buckets &&
map->buckets[-1-curstep->arg1])) {
w[0] = curstep->arg1;
wsize = 1;
@@ -869,6 +874,11 @@ int crush_do_rule(const struct crush_map *map,
vary_r = curstep->arg1;
break;
+ case CRUSH_RULE_SET_CHOOSELEAF_STABLE:
+ if (curstep->arg1 >= 0)
+ stable = curstep->arg1;
+ break;
+
case CRUSH_RULE_CHOOSELEAF_FIRSTN:
case CRUSH_RULE_CHOOSE_FIRSTN:
firstn = 1;
@@ -888,6 +898,7 @@ int crush_do_rule(const struct crush_map *map,
osize = 0;
for (i = 0; i < wsize; i++) {
+ int bno;
/*
* see CRUSH_N, CRUSH_N_MINUS macros.
* basically, numrep <= 0 means relative to
@@ -900,6 +911,13 @@ int crush_do_rule(const struct crush_map *map,
continue;
}
j = 0;
+ /* make sure bucket id is valid */
+ bno = -1 - w[i];
+ if (bno < 0 || bno >= map->max_buckets) {
+ /* w[i] is probably CRUSH_ITEM_NONE */
+ dprintk(" bad w[i] %d\n", w[i]);
+ continue;
+ }
if (firstn) {
int recurse_tries;
if (choose_leaf_tries)
@@ -911,7 +929,7 @@ int crush_do_rule(const struct crush_map *map,
recurse_tries = choose_tries;
osize += crush_choose_firstn(
map,
- map->buckets[-1-w[i]],
+ map->buckets[bno],
weight, weight_max,
x, numrep,
curstep->arg2,
@@ -923,6 +941,7 @@ int crush_do_rule(const struct crush_map *map,
choose_local_fallback_retries,
recurse_to_leaf,
vary_r,
+ stable,
c+osize,
0);
} else {
@@ -930,7 +949,7 @@ int crush_do_rule(const struct crush_map *map,
numrep : (result_max-osize));
crush_choose_indep(
map,
- map->buckets[-1-w[i]],
+ map->buckets[bno],
weight, weight_max,
x, out_size, numrep,
curstep->arg2,
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 42e8649c6e79..db2847ac5f12 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -4,7 +4,8 @@
#include <linux/err.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
-#include <crypto/hash.h>
+#include <crypto/aes.h>
+#include <crypto/skcipher.h>
#include <linux/key-type.h>
#include <keys/ceph-type.h>
@@ -79,9 +80,9 @@ int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey)
return 0;
}
-static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void)
+static struct crypto_skcipher *ceph_crypto_alloc_cipher(void)
{
- return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
+ return crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
}
static const u8 *aes_iv = (u8 *)CEPH_AES_IV;
@@ -162,11 +163,10 @@ static int ceph_aes_encrypt(const void *key, int key_len,
{
struct scatterlist sg_in[2], prealloc_sg;
struct sg_table sg_out;
- struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
- struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 };
+ struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher();
+ SKCIPHER_REQUEST_ON_STACK(req, tfm);
int ret;
- void *iv;
- int ivsize;
+ char iv[AES_BLOCK_SIZE];
size_t zero_padding = (0x10 - (src_len & 0x0f));
char pad[16];
@@ -184,10 +184,13 @@ static int ceph_aes_encrypt(const void *key, int key_len,
if (ret)
goto out_tfm;
- crypto_blkcipher_setkey((void *)tfm, key, key_len);
- iv = crypto_blkcipher_crt(tfm)->iv;
- ivsize = crypto_blkcipher_ivsize(tfm);
- memcpy(iv, aes_iv, ivsize);
+ crypto_skcipher_setkey((void *)tfm, key, key_len);
+ memcpy(iv, aes_iv, AES_BLOCK_SIZE);
+
+ skcipher_request_set_tfm(req, tfm);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, sg_in, sg_out.sgl,
+ src_len + zero_padding, iv);
/*
print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1,
@@ -197,8 +200,8 @@ static int ceph_aes_encrypt(const void *key, int key_len,
print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1,
pad, zero_padding, 1);
*/
- ret = crypto_blkcipher_encrypt(&desc, sg_out.sgl, sg_in,
- src_len + zero_padding);
+ ret = crypto_skcipher_encrypt(req);
+ skcipher_request_zero(req);
if (ret < 0) {
pr_err("ceph_aes_crypt failed %d\n", ret);
goto out_sg;
@@ -211,7 +214,7 @@ static int ceph_aes_encrypt(const void *key, int key_len,
out_sg:
teardown_sgtable(&sg_out);
out_tfm:
- crypto_free_blkcipher(tfm);
+ crypto_free_skcipher(tfm);
return ret;
}
@@ -222,11 +225,10 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
{
struct scatterlist sg_in[3], prealloc_sg;
struct sg_table sg_out;
- struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
- struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 };
+ struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher();
+ SKCIPHER_REQUEST_ON_STACK(req, tfm);
int ret;
- void *iv;
- int ivsize;
+ char iv[AES_BLOCK_SIZE];
size_t zero_padding = (0x10 - ((src1_len + src2_len) & 0x0f));
char pad[16];
@@ -245,10 +247,13 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
if (ret)
goto out_tfm;
- crypto_blkcipher_setkey((void *)tfm, key, key_len);
- iv = crypto_blkcipher_crt(tfm)->iv;
- ivsize = crypto_blkcipher_ivsize(tfm);
- memcpy(iv, aes_iv, ivsize);
+ crypto_skcipher_setkey((void *)tfm, key, key_len);
+ memcpy(iv, aes_iv, AES_BLOCK_SIZE);
+
+ skcipher_request_set_tfm(req, tfm);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, sg_in, sg_out.sgl,
+ src1_len + src2_len + zero_padding, iv);
/*
print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1,
@@ -260,8 +265,8 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1,
pad, zero_padding, 1);
*/
- ret = crypto_blkcipher_encrypt(&desc, sg_out.sgl, sg_in,
- src1_len + src2_len + zero_padding);
+ ret = crypto_skcipher_encrypt(req);
+ skcipher_request_zero(req);
if (ret < 0) {
pr_err("ceph_aes_crypt2 failed %d\n", ret);
goto out_sg;
@@ -274,7 +279,7 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
out_sg:
teardown_sgtable(&sg_out);
out_tfm:
- crypto_free_blkcipher(tfm);
+ crypto_free_skcipher(tfm);
return ret;
}
@@ -284,11 +289,10 @@ static int ceph_aes_decrypt(const void *key, int key_len,
{
struct sg_table sg_in;
struct scatterlist sg_out[2], prealloc_sg;
- struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
- struct blkcipher_desc desc = { .tfm = tfm };
+ struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher();
+ SKCIPHER_REQUEST_ON_STACK(req, tfm);
char pad[16];
- void *iv;
- int ivsize;
+ char iv[AES_BLOCK_SIZE];
int ret;
int last_byte;
@@ -302,10 +306,13 @@ static int ceph_aes_decrypt(const void *key, int key_len,
if (ret)
goto out_tfm;
- crypto_blkcipher_setkey((void *)tfm, key, key_len);
- iv = crypto_blkcipher_crt(tfm)->iv;
- ivsize = crypto_blkcipher_ivsize(tfm);
- memcpy(iv, aes_iv, ivsize);
+ crypto_skcipher_setkey((void *)tfm, key, key_len);
+ memcpy(iv, aes_iv, AES_BLOCK_SIZE);
+
+ skcipher_request_set_tfm(req, tfm);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, sg_in.sgl, sg_out,
+ src_len, iv);
/*
print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1,
@@ -313,7 +320,8 @@ static int ceph_aes_decrypt(const void *key, int key_len,
print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1,
src, src_len, 1);
*/
- ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in.sgl, src_len);
+ ret = crypto_skcipher_decrypt(req);
+ skcipher_request_zero(req);
if (ret < 0) {
pr_err("ceph_aes_decrypt failed %d\n", ret);
goto out_sg;
@@ -338,7 +346,7 @@ static int ceph_aes_decrypt(const void *key, int key_len,
out_sg:
teardown_sgtable(&sg_in);
out_tfm:
- crypto_free_blkcipher(tfm);
+ crypto_free_skcipher(tfm);
return ret;
}
@@ -349,11 +357,10 @@ static int ceph_aes_decrypt2(const void *key, int key_len,
{
struct sg_table sg_in;
struct scatterlist sg_out[3], prealloc_sg;
- struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
- struct blkcipher_desc desc = { .tfm = tfm };
+ struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher();
+ SKCIPHER_REQUEST_ON_STACK(req, tfm);
char pad[16];
- void *iv;
- int ivsize;
+ char iv[AES_BLOCK_SIZE];
int ret;
int last_byte;
@@ -368,10 +375,13 @@ static int ceph_aes_decrypt2(const void *key, int key_len,
if (ret)
goto out_tfm;
- crypto_blkcipher_setkey((void *)tfm, key, key_len);
- iv = crypto_blkcipher_crt(tfm)->iv;
- ivsize = crypto_blkcipher_ivsize(tfm);
- memcpy(iv, aes_iv, ivsize);
+ crypto_skcipher_setkey((void *)tfm, key, key_len);
+ memcpy(iv, aes_iv, AES_BLOCK_SIZE);
+
+ skcipher_request_set_tfm(req, tfm);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, sg_in.sgl, sg_out,
+ src_len, iv);
/*
print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1,
@@ -379,7 +389,8 @@ static int ceph_aes_decrypt2(const void *key, int key_len,
print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1,
src, src_len, 1);
*/
- ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in.sgl, src_len);
+ ret = crypto_skcipher_decrypt(req);
+ skcipher_request_zero(req);
if (ret < 0) {
pr_err("ceph_aes_decrypt failed %d\n", ret);
goto out_sg;
@@ -415,7 +426,7 @@ static int ceph_aes_decrypt2(const void *key, int key_len,
out_sg:
teardown_sgtable(&sg_in);
out_tfm:
- crypto_free_blkcipher(tfm);
+ crypto_free_skcipher(tfm);
return ret;
}
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
index d1498224c49d..2e9cab09f37b 100644
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h
@@ -16,8 +16,10 @@ struct ceph_crypto_key {
static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
{
- if (key)
+ if (key) {
kfree(key->key);
+ key->key = NULL;
+ }
}
int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index b9b0e3b5da49..9382619a405b 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -23,9 +23,6 @@
#include <linux/ceph/pagelist.h>
#include <linux/export.h>
-#define list_entry_next(pos, member) \
- list_entry(pos->member.next, typeof(*pos), member)
-
/*
* Ceph uses the messenger to exchange ceph_msg messages with other
* hosts in the system. The messenger provides ordered and reliable
@@ -509,7 +506,7 @@ static int ceph_tcp_connect(struct ceph_connection *con)
return ret;
}
- if (con->msgr->tcp_nodelay) {
+ if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) {
int optval = 1;
ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
@@ -637,9 +634,6 @@ static int con_close_socket(struct ceph_connection *con)
static void ceph_msg_remove(struct ceph_msg *msg)
{
list_del_init(&msg->list_head);
- BUG_ON(msg->con == NULL);
- msg->con->ops->put(msg->con);
- msg->con = NULL;
ceph_msg_put(msg);
}
@@ -662,20 +656,21 @@ static void reset_connection(struct ceph_connection *con)
if (con->in_msg) {
BUG_ON(con->in_msg->con != con);
- con->in_msg->con = NULL;
ceph_msg_put(con->in_msg);
con->in_msg = NULL;
- con->ops->put(con);
}
con->connect_seq = 0;
con->out_seq = 0;
if (con->out_msg) {
+ BUG_ON(con->out_msg->con != con);
ceph_msg_put(con->out_msg);
con->out_msg = NULL;
}
con->in_seq = 0;
con->in_seq_acked = 0;
+
+ con->out_skip = 0;
}
/*
@@ -775,6 +770,8 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt)
static void con_out_kvec_reset(struct ceph_connection *con)
{
+ BUG_ON(con->out_skip);
+
con->out_kvec_left = 0;
con->out_kvec_bytes = 0;
con->out_kvec_cur = &con->out_kvec[0];
@@ -783,9 +780,9 @@ static void con_out_kvec_reset(struct ceph_connection *con)
static void con_out_kvec_add(struct ceph_connection *con,
size_t size, void *data)
{
- int index;
+ int index = con->out_kvec_left;
- index = con->out_kvec_left;
+ BUG_ON(con->out_skip);
BUG_ON(index >= ARRAY_SIZE(con->out_kvec));
con->out_kvec[index].iov_len = size;
@@ -794,6 +791,27 @@ static void con_out_kvec_add(struct ceph_connection *con,
con->out_kvec_bytes += size;
}
+/*
+ * Chop off a kvec from the end. Return residual number of bytes for
+ * that kvec, i.e. how many bytes would have been written if the kvec
+ * hadn't been nuked.
+ */
+static int con_out_kvec_skip(struct ceph_connection *con)
+{
+ int off = con->out_kvec_cur - con->out_kvec;
+ int skip = 0;
+
+ if (con->out_kvec_bytes > 0) {
+ skip = con->out_kvec[off + con->out_kvec_left - 1].iov_len;
+ BUG_ON(con->out_kvec_bytes < skip);
+ BUG_ON(!con->out_kvec_left);
+ con->out_kvec_bytes -= skip;
+ con->out_kvec_left--;
+ }
+
+ return skip;
+}
+
#ifdef CONFIG_BLOCK
/*
@@ -1046,7 +1064,7 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
/* Move on to the next page */
BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head));
- cursor->page = list_entry_next(cursor->page, lru);
+ cursor->page = list_next_entry(cursor->page, lru);
cursor->last_piece = cursor->resid <= PAGE_SIZE;
return true;
@@ -1170,7 +1188,7 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
if (!cursor->resid && cursor->total_resid) {
WARN_ON(!cursor->last_piece);
BUG_ON(list_is_last(&cursor->data->links, cursor->data_head));
- cursor->data = list_entry_next(cursor->data, links);
+ cursor->data = list_next_entry(cursor->data, links);
__ceph_msg_data_cursor_init(cursor);
new_piece = true;
}
@@ -1179,6 +1197,13 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
return new_piece;
}
+static size_t sizeof_footer(struct ceph_connection *con)
+{
+ return (con->peer_features & CEPH_FEATURE_MSG_AUTH) ?
+ sizeof(struct ceph_msg_footer) :
+ sizeof(struct ceph_msg_footer_old);
+}
+
static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
{
BUG_ON(!msg);
@@ -1201,11 +1226,10 @@ static void prepare_write_message_footer(struct ceph_connection *con)
m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
dout("prepare_write_message_footer %p\n", con);
- con->out_kvec_is_msg = true;
con->out_kvec[v].iov_base = &m->footer;
if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
if (con->ops->sign_message)
- con->ops->sign_message(con, m);
+ con->ops->sign_message(m);
else
m->footer.sig = 0;
con->out_kvec[v].iov_len = sizeof(m->footer);
@@ -1229,7 +1253,6 @@ static void prepare_write_message(struct ceph_connection *con)
u32 crc;
con_out_kvec_reset(con);
- con->out_kvec_is_msg = true;
con->out_msg_done = false;
/* Sneak an ack in there first? If we can get it into the same
@@ -1269,18 +1292,19 @@ static void prepare_write_message(struct ceph_connection *con)
/* tag + hdr + front + middle */
con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
- con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
+ con_out_kvec_add(con, sizeof(con->out_hdr), &con->out_hdr);
con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
if (m->middle)
con_out_kvec_add(con, m->middle->vec.iov_len,
m->middle->vec.iov_base);
- /* fill in crc (except data pages), footer */
+ /* fill in hdr crc and finalize hdr */
crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
con->out_msg->hdr.crc = cpu_to_le32(crc);
- con->out_msg->footer.flags = 0;
+ memcpy(&con->out_hdr, &con->out_msg->hdr, sizeof(con->out_hdr));
+ /* fill in front and middle crc, footer */
crc = crc32c(0, m->front.iov_base, m->front.iov_len);
con->out_msg->footer.front_crc = cpu_to_le32(crc);
if (m->middle) {
@@ -1292,6 +1316,7 @@ static void prepare_write_message(struct ceph_connection *con)
dout("%s front_crc %u middle_crc %u\n", __func__,
le32_to_cpu(con->out_msg->footer.front_crc),
le32_to_cpu(con->out_msg->footer.middle_crc));
+ con->out_msg->footer.flags = 0;
/* is there a data payload? */
con->out_msg->footer.data_crc = 0;
@@ -1432,7 +1457,8 @@ static int prepare_write_connect(struct ceph_connection *con)
dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
con->connect_seq, global_seq, proto);
- con->out_connect.features = cpu_to_le64(con->msgr->supported_features);
+ con->out_connect.features =
+ cpu_to_le64(from_msgr(con->msgr)->supported_features);
con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
con->out_connect.global_seq = cpu_to_le32(global_seq);
@@ -1495,7 +1521,6 @@ static int write_partial_kvec(struct ceph_connection *con)
}
}
con->out_kvec_left = 0;
- con->out_kvec_is_msg = false;
ret = 1;
out:
dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con,
@@ -1527,7 +1552,7 @@ static int write_partial_message_data(struct ceph_connection *con)
{
struct ceph_msg *msg = con->out_msg;
struct ceph_msg_data_cursor *cursor = &msg->cursor;
- bool do_datacrc = !con->msgr->nocrc;
+ bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
u32 crc;
dout("%s %p msg %p\n", __func__, con, msg);
@@ -1552,8 +1577,8 @@ static int write_partial_message_data(struct ceph_connection *con)
bool need_crc;
int ret;
- page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
- &last_piece);
+ page = ceph_msg_data_next(cursor, &page_offset, &length,
+ &last_piece);
ret = ceph_tcp_sendpage(con->sock, page, page_offset,
length, !last_piece);
if (ret <= 0) {
@@ -1564,7 +1589,7 @@ static int write_partial_message_data(struct ceph_connection *con)
}
if (do_datacrc && cursor->need_crc)
crc = ceph_crc32c_page(crc, page, page_offset, length);
- need_crc = ceph_msg_data_advance(&msg->cursor, (size_t)ret);
+ need_crc = ceph_msg_data_advance(cursor, (size_t)ret);
}
dout("%s %p msg %p done\n", __func__, con, msg);
@@ -1587,6 +1612,7 @@ static int write_partial_skip(struct ceph_connection *con)
{
int ret;
+ dout("%s %p %d left\n", __func__, con, con->out_skip);
while (con->out_skip > 0) {
size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE);
@@ -2005,8 +2031,8 @@ static int process_banner(struct ceph_connection *con)
static int process_connect(struct ceph_connection *con)
{
- u64 sup_feat = con->msgr->supported_features;
- u64 req_feat = con->msgr->required_features;
+ u64 sup_feat = from_msgr(con->msgr)->supported_features;
+ u64 req_feat = from_msgr(con->msgr)->required_features;
u64 server_feat = ceph_sanitize_features(
le64_to_cpu(con->in_reply.features));
int ret;
@@ -2232,7 +2258,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
{
struct ceph_msg *msg = con->in_msg;
struct ceph_msg_data_cursor *cursor = &msg->cursor;
- const bool do_datacrc = !con->msgr->nocrc;
+ bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
struct page *page;
size_t page_offset;
size_t length;
@@ -2246,8 +2272,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
if (do_datacrc)
crc = con->in_data_crc;
while (cursor->resid) {
- page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
- NULL);
+ page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
if (ret <= 0) {
if (do_datacrc)
@@ -2258,7 +2283,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
if (do_datacrc)
crc = ceph_crc32c_page(crc, page, page_offset, ret);
- (void) ceph_msg_data_advance(&msg->cursor, (size_t)ret);
+ (void) ceph_msg_data_advance(cursor, (size_t)ret);
}
if (do_datacrc)
con->in_data_crc = crc;
@@ -2278,7 +2303,7 @@ static int read_partial_message(struct ceph_connection *con)
int end;
int ret;
unsigned int front_len, middle_len, data_len;
- bool do_datacrc = !con->msgr->nocrc;
+ bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH);
u64 seq;
u32 crc;
@@ -2317,9 +2342,9 @@ static int read_partial_message(struct ceph_connection *con)
ceph_pr_addr(&con->peer_addr.in_addr),
seq, con->in_seq + 1);
con->in_base_pos = -front_len - middle_len - data_len -
- sizeof(m->footer);
+ sizeof_footer(con);
con->in_tag = CEPH_MSGR_TAG_READY;
- return 0;
+ return 1;
} else if ((s64)seq - (s64)con->in_seq > 1) {
pr_err("read_partial_message bad seq %lld expected %lld\n",
seq, con->in_seq + 1);
@@ -2342,10 +2367,10 @@ static int read_partial_message(struct ceph_connection *con)
/* skip this message */
dout("alloc_msg said skip message\n");
con->in_base_pos = -front_len - middle_len - data_len -
- sizeof(m->footer);
+ sizeof_footer(con);
con->in_tag = CEPH_MSGR_TAG_READY;
con->in_seq++;
- return 0;
+ return 1;
}
BUG_ON(!con->in_msg);
@@ -2423,7 +2448,7 @@ static int read_partial_message(struct ceph_connection *con)
}
if (need_sign && con->ops->check_message_signature &&
- con->ops->check_message_signature(con, m)) {
+ con->ops->check_message_signature(m)) {
pr_err("read_partial_message %p signature check failed\n", m);
return -EBADMSG;
}
@@ -2438,13 +2463,10 @@ static int read_partial_message(struct ceph_connection *con)
*/
static void process_message(struct ceph_connection *con)
{
- struct ceph_msg *msg;
+ struct ceph_msg *msg = con->in_msg;
BUG_ON(con->in_msg->con != con);
- con->in_msg->con = NULL;
- msg = con->in_msg;
con->in_msg = NULL;
- con->ops->put(con);
/* if first message, set peer_name */
if (con->peer_name.type == 0)
@@ -2513,13 +2535,13 @@ more:
more_kvec:
/* kvec data queued? */
- if (con->out_skip) {
- ret = write_partial_skip(con);
+ if (con->out_kvec_left) {
+ ret = write_partial_kvec(con);
if (ret <= 0)
goto out;
}
- if (con->out_kvec_left) {
- ret = write_partial_kvec(con);
+ if (con->out_skip) {
+ ret = write_partial_skip(con);
if (ret <= 0)
goto out;
}
@@ -2677,7 +2699,7 @@ more:
if (ret <= 0) {
switch (ret) {
case -EBADMSG:
- con->error_msg = "bad crc";
+ con->error_msg = "bad crc/signature";
/* fall through */
case -EBADE:
ret = -EIO;
@@ -2812,13 +2834,17 @@ static bool con_backoff(struct ceph_connection *con)
static void con_fault_finish(struct ceph_connection *con)
{
+ dout("%s %p\n", __func__, con);
+
/*
* in case we faulted due to authentication, invalidate our
* current tickets so that we can get new ones.
*/
- if (con->auth_retry && con->ops->invalidate_authorizer) {
- dout("calling invalidate_authorizer()\n");
- con->ops->invalidate_authorizer(con);
+ if (con->auth_retry) {
+ dout("auth_retry %d, invalidating\n", con->auth_retry);
+ if (con->ops->invalidate_authorizer)
+ con->ops->invalidate_authorizer(con);
+ con->auth_retry = 0;
}
if (con->ops->fault)
@@ -2918,10 +2944,8 @@ static void con_fault(struct ceph_connection *con)
if (con->in_msg) {
BUG_ON(con->in_msg->con != con);
- con->in_msg->con = NULL;
ceph_msg_put(con->in_msg);
con->in_msg = NULL;
- con->ops->put(con);
}
/* Requeue anything that hasn't been acked */
@@ -2952,15 +2976,8 @@ static void con_fault(struct ceph_connection *con)
* initialize a new messenger instance
*/
void ceph_messenger_init(struct ceph_messenger *msgr,
- struct ceph_entity_addr *myaddr,
- u64 supported_features,
- u64 required_features,
- bool nocrc,
- bool tcp_nodelay)
+ struct ceph_entity_addr *myaddr)
{
- msgr->supported_features = supported_features;
- msgr->required_features = required_features;
-
spin_lock_init(&msgr->global_seq_lock);
if (myaddr)
@@ -2970,8 +2987,6 @@ void ceph_messenger_init(struct ceph_messenger *msgr,
msgr->inst.addr.type = 0;
get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
encode_my_addr(msgr);
- msgr->nocrc = nocrc;
- msgr->tcp_nodelay = tcp_nodelay;
atomic_set(&msgr->stopping, 0);
write_pnet(&msgr->net, get_net(current->nsproxy->net_ns));
@@ -2986,6 +3001,15 @@ void ceph_messenger_fini(struct ceph_messenger *msgr)
}
EXPORT_SYMBOL(ceph_messenger_fini);
+static void msg_con_set(struct ceph_msg *msg, struct ceph_connection *con)
+{
+ if (msg->con)
+ msg->con->ops->put(msg->con);
+
+ msg->con = con ? con->ops->get(con) : NULL;
+ BUG_ON(msg->con != con);
+}
+
static void clear_standby(struct ceph_connection *con)
{
/* come back from STANDBY? */
@@ -3017,9 +3041,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
return;
}
- BUG_ON(msg->con != NULL);
- msg->con = con->ops->get(con);
- BUG_ON(msg->con == NULL);
+ msg_con_set(msg, con);
BUG_ON(!list_empty(&msg->list_head));
list_add_tail(&msg->list_head, &con->out_queue);
@@ -3047,31 +3069,45 @@ void ceph_msg_revoke(struct ceph_msg *msg)
{
struct ceph_connection *con = msg->con;
- if (!con)
+ if (!con) {
+ dout("%s msg %p null con\n", __func__, msg);
return; /* Message not in our possession */
+ }
mutex_lock(&con->mutex);
if (!list_empty(&msg->list_head)) {
dout("%s %p msg %p - was on queue\n", __func__, con, msg);
list_del_init(&msg->list_head);
- BUG_ON(msg->con == NULL);
- msg->con->ops->put(msg->con);
- msg->con = NULL;
msg->hdr.seq = 0;
ceph_msg_put(msg);
}
if (con->out_msg == msg) {
- dout("%s %p msg %p - was sending\n", __func__, con, msg);
- con->out_msg = NULL;
- if (con->out_kvec_is_msg) {
- con->out_skip = con->out_kvec_bytes;
- con->out_kvec_is_msg = false;
+ BUG_ON(con->out_skip);
+ /* footer */
+ if (con->out_msg_done) {
+ con->out_skip += con_out_kvec_skip(con);
+ } else {
+ BUG_ON(!msg->data_length);
+ if (con->peer_features & CEPH_FEATURE_MSG_AUTH)
+ con->out_skip += sizeof(msg->footer);
+ else
+ con->out_skip += sizeof(msg->old_footer);
}
+ /* data, middle, front */
+ if (msg->data_length)
+ con->out_skip += msg->cursor.total_resid;
+ if (msg->middle)
+ con->out_skip += con_out_kvec_skip(con);
+ con->out_skip += con_out_kvec_skip(con);
+
+ dout("%s %p msg %p - was sending, will write %d skip %d\n",
+ __func__, con, msg, con->out_kvec_bytes, con->out_skip);
msg->hdr.seq = 0;
-
+ con->out_msg = NULL;
ceph_msg_put(msg);
}
+
mutex_unlock(&con->mutex);
}
@@ -3080,16 +3116,13 @@ void ceph_msg_revoke(struct ceph_msg *msg)
*/
void ceph_msg_revoke_incoming(struct ceph_msg *msg)
{
- struct ceph_connection *con;
+ struct ceph_connection *con = msg->con;
- BUG_ON(msg == NULL);
- if (!msg->con) {
+ if (!con) {
dout("%s msg %p null con\n", __func__, msg);
-
return; /* Message not in our possession */
}
- con = msg->con;
mutex_lock(&con->mutex);
if (con->in_msg == msg) {
unsigned int front_len = le32_to_cpu(con->in_hdr.front_len);
@@ -3335,9 +3368,8 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
}
if (msg) {
BUG_ON(*skip);
+ msg_con_set(msg, con);
con->in_msg = msg;
- con->in_msg->con = con->ops->get(con);
- BUG_ON(con->in_msg->con == NULL);
} else {
/*
* Null message pointer means either we should skip
@@ -3377,25 +3409,21 @@ static void ceph_msg_free(struct ceph_msg *m)
static void ceph_msg_release(struct kref *kref)
{
struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
- LIST_HEAD(data);
- struct list_head *links;
- struct list_head *next;
+ struct ceph_msg_data *data, *next;
dout("%s %p\n", __func__, m);
WARN_ON(!list_empty(&m->list_head));
+ msg_con_set(m, NULL);
+
/* drop middle, data, if any */
if (m->middle) {
ceph_buffer_put(m->middle);
m->middle = NULL;
}
- list_splice_init(&m->data, &data);
- list_for_each_safe(links, next, &data) {
- struct ceph_msg_data *data;
-
- data = list_entry(links, struct ceph_msg_data, links);
- list_del_init(links);
+ list_for_each_entry_safe(data, next, &m->data, links) {
+ list_del_init(&data->links);
ceph_msg_data_destroy(data);
}
m->data_length = 0;
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index edda01626a45..de85dddc3dc0 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -364,10 +364,6 @@ static bool have_debugfs_info(struct ceph_mon_client *monc)
return monc->client->have_fsid && monc->auth->global_id > 0;
}
-/*
- * The monitor responds with mount ack indicate mount success. The
- * included client ticket allows the client to talk to MDSs and OSDs.
- */
static void ceph_monc_handle_map(struct ceph_mon_client *monc,
struct ceph_msg *msg)
{
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index f79ccac6699f..5bc053778fed 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -120,11 +120,13 @@ static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data,
}
#endif /* CONFIG_BLOCK */
-#define osd_req_op_data(oreq, whch, typ, fld) \
- ({ \
- BUG_ON(whch >= (oreq)->r_num_ops); \
- &(oreq)->r_ops[whch].typ.fld; \
- })
+#define osd_req_op_data(oreq, whch, typ, fld) \
+({ \
+ struct ceph_osd_request *__oreq = (oreq); \
+ unsigned int __whch = (whch); \
+ BUG_ON(__whch >= __oreq->r_num_ops); \
+ &__oreq->r_ops[__whch].typ.fld; \
+})
static struct ceph_osd_data *
osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
@@ -1750,8 +1752,7 @@ static void complete_request(struct ceph_osd_request *req)
* handle osd op reply. either call the callback if it is specified,
* or do the completion to wake up the waiting thread.
*/
-static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
- struct ceph_connection *con)
+static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
{
void *p, *end;
struct ceph_osd_request *req;
@@ -1769,6 +1770,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
u32 osdmap_epoch;
int already_completed;
u32 bytes;
+ u8 decode_redir;
unsigned int i;
tid = le64_to_cpu(msg->hdr.tid);
@@ -1840,6 +1842,15 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
p += 8 + 4; /* skip replay_version */
p += 8; /* skip user_version */
+ if (le16_to_cpu(msg->hdr.version) >= 7)
+ ceph_decode_8_safe(&p, end, decode_redir, bad_put);
+ else
+ decode_redir = 1;
+ } else {
+ decode_redir = 0;
+ }
+
+ if (decode_redir) {
err = ceph_redirect_decode(&p, end, &redir);
if (err)
goto bad_put;
@@ -2807,7 +2818,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
ceph_osdc_handle_map(osdc, msg);
break;
case CEPH_MSG_OSD_OPREPLY:
- handle_reply(osdc, msg, con);
+ handle_reply(osdc, msg);
break;
case CEPH_MSG_WATCH_NOTIFY:
handle_watch_notify(osdc, msg);
@@ -2842,16 +2853,13 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
mutex_lock(&osdc->request_mutex);
req = __lookup_request(osdc, tid);
if (!req) {
- pr_warn("%s osd%d tid %llu unknown, skipping\n",
- __func__, osd->o_osd, tid);
+ dout("%s osd%d tid %llu unknown, skipping\n", __func__,
+ osd->o_osd, tid);
m = NULL;
*skip = 1;
goto out;
}
- if (req->r_reply->con)
- dout("%s revoking msg %p from old con %p\n", __func__,
- req->r_reply, req->r_reply->con);
ceph_msg_revoke_incoming(req->r_reply);
if (front_len > req->r_reply->front_alloc_len) {
@@ -2978,17 +2986,19 @@ static int invalidate_authorizer(struct ceph_connection *con)
return ceph_monc_validate_auth(&osdc->client->monc);
}
-static int sign_message(struct ceph_connection *con, struct ceph_msg *msg)
+static int osd_sign_message(struct ceph_msg *msg)
{
- struct ceph_osd *o = con->private;
+ struct ceph_osd *o = msg->con->private;
struct ceph_auth_handshake *auth = &o->o_auth;
+
return ceph_auth_sign_message(auth, msg);
}
-static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg)
+static int osd_check_message_signature(struct ceph_msg *msg)
{
- struct ceph_osd *o = con->private;
+ struct ceph_osd *o = msg->con->private;
struct ceph_auth_handshake *auth = &o->o_auth;
+
return ceph_auth_check_message_signature(auth, msg);
}
@@ -3000,7 +3010,7 @@ static const struct ceph_connection_operations osd_con_ops = {
.verify_authorizer_reply = verify_authorizer_reply,
.invalidate_authorizer = invalidate_authorizer,
.alloc_msg = alloc_msg,
- .sign_message = sign_message,
- .check_message_signature = check_message_signature,
+ .sign_message = osd_sign_message,
+ .check_message_signature = osd_check_message_signature,
.fault = osd_reset,
};
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 7d8f581d9f1f..243574c8cf33 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -342,23 +342,32 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
c->choose_local_tries = ceph_decode_32(p);
c->choose_local_fallback_tries = ceph_decode_32(p);
c->choose_total_tries = ceph_decode_32(p);
- dout("crush decode tunable choose_local_tries = %d",
+ dout("crush decode tunable choose_local_tries = %d\n",
c->choose_local_tries);
- dout("crush decode tunable choose_local_fallback_tries = %d",
+ dout("crush decode tunable choose_local_fallback_tries = %d\n",
c->choose_local_fallback_tries);
- dout("crush decode tunable choose_total_tries = %d",
+ dout("crush decode tunable choose_total_tries = %d\n",
c->choose_total_tries);
ceph_decode_need(p, end, sizeof(u32), done);
c->chooseleaf_descend_once = ceph_decode_32(p);
- dout("crush decode tunable chooseleaf_descend_once = %d",
+ dout("crush decode tunable chooseleaf_descend_once = %d\n",
c->chooseleaf_descend_once);
ceph_decode_need(p, end, sizeof(u8), done);
c->chooseleaf_vary_r = ceph_decode_8(p);
- dout("crush decode tunable chooseleaf_vary_r = %d",
+ dout("crush decode tunable chooseleaf_vary_r = %d\n",
c->chooseleaf_vary_r);
+ /* skip straw_calc_version, allowed_bucket_algs */
+ ceph_decode_need(p, end, sizeof(u8) + sizeof(u32), done);
+ *p += sizeof(u8) + sizeof(u32);
+
+ ceph_decode_need(p, end, sizeof(u8), done);
+ c->chooseleaf_stable = ceph_decode_8(p);
+ dout("crush decode tunable chooseleaf_stable = %d\n",
+ c->chooseleaf_stable);
+
done:
dout("crush_decode success\n");
return c;
diff --git a/net/core/Makefile b/net/core/Makefile
index 086b01fbe1bd..0b835de04de3 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
- sock_diag.o dev_ioctl.o tso.o
+ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o
obj-$(CONFIG_XFRM) += flow.o
obj-y += net-sysfs.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 617088aee21d..fa9dc6450b08 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -83,8 +83,8 @@ static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int syn
/*
* Wait for the last received packet to be different from skb
*/
-static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
- const struct sk_buff *skb)
+int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
+ const struct sk_buff *skb)
{
int error;
DEFINE_WAIT_FUNC(wait, receiver_wake_function);
@@ -130,6 +130,7 @@ out_noerr:
error = 1;
goto out;
}
+EXPORT_SYMBOL(__skb_wait_for_more_packets);
static struct sk_buff *skb_set_peeked(struct sk_buff *skb)
{
@@ -161,13 +162,15 @@ done:
}
/**
- * __skb_recv_datagram - Receive a datagram skbuff
+ * __skb_try_recv_datagram - Receive a datagram skbuff
* @sk: socket
* @flags: MSG_ flags
* @peeked: returns non-zero if this packet has been seen before
* @off: an offset in bytes to peek skb from. Returns an offset
* within an skb where data actually starts
* @err: error code returned
+ * @last: set to last peeked message to inform the wait function
+ * what to look for when peeking
*
* Get a datagram skbuff, understands the peeking, nonblocking wakeups
* and possible races. This replaces identical code in packet, raw and
@@ -175,9 +178,11 @@ done:
* the long standing peek and read race for datagram sockets. If you
* alter this routine remember it must be re-entrant.
*
- * This function will lock the socket if a skb is returned, so the caller
- * needs to unlock the socket in that case (usually by calling
- * skb_free_datagram)
+ * This function will lock the socket if a skb is returned, so
+ * the caller needs to unlock the socket in that case (usually by
+ * calling skb_free_datagram). Returns NULL with *err set to
+ * -EAGAIN if no data was available or to some other value if an
+ * error was detected.
*
* * It does not lock socket since today. This function is
* * free of race conditions. This measure should/can improve
@@ -191,13 +196,13 @@ done:
* quite explicitly by POSIX 1003.1g, don't change them without having
* the standard around please.
*/
-struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
- int *peeked, int *off, int *err)
+struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
+ int *peeked, int *off, int *err,
+ struct sk_buff **last)
{
struct sk_buff_head *queue = &sk->sk_receive_queue;
- struct sk_buff *skb, *last;
+ struct sk_buff *skb;
unsigned long cpu_flags;
- long timeo;
/*
* Caller is allowed not to check sk->sk_err before skb_recv_datagram()
*/
@@ -206,8 +211,6 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
if (error)
goto no_packet;
- timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
-
do {
/* Again only user level code calls this function, so nothing
* interrupt level will suddenly eat the receive_queue.
@@ -217,10 +220,10 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
*/
int _off = *off;
- last = (struct sk_buff *)queue;
+ *last = (struct sk_buff *)queue;
spin_lock_irqsave(&queue->lock, cpu_flags);
skb_queue_walk(queue, skb) {
- last = skb;
+ *last = skb;
*peeked = skb->peeked;
if (flags & MSG_PEEK) {
if (_off >= skb->len && (skb->len || _off ||
@@ -231,8 +234,11 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
skb = skb_set_peeked(skb);
error = PTR_ERR(skb);
- if (IS_ERR(skb))
- goto unlock_err;
+ if (IS_ERR(skb)) {
+ spin_unlock_irqrestore(&queue->lock,
+ cpu_flags);
+ goto no_packet;
+ }
atomic_inc(&skb->users);
} else
@@ -242,25 +248,38 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
*off = _off;
return skb;
}
+
spin_unlock_irqrestore(&queue->lock, cpu_flags);
+ } while (sk_can_busy_loop(sk) &&
+ sk_busy_loop(sk, flags & MSG_DONTWAIT));
- if (sk_can_busy_loop(sk) &&
- sk_busy_loop(sk, flags & MSG_DONTWAIT))
- continue;
+ error = -EAGAIN;
- /* User doesn't want to wait */
- error = -EAGAIN;
- if (!timeo)
- goto no_packet;
+no_packet:
+ *err = error;
+ return NULL;
+}
+EXPORT_SYMBOL(__skb_try_recv_datagram);
- } while (!wait_for_more_packets(sk, err, &timeo, last));
+struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
+ int *peeked, int *off, int *err)
+{
+ struct sk_buff *skb, *last;
+ long timeo;
- return NULL;
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+
+ do {
+ skb = __skb_try_recv_datagram(sk, flags, peeked, off, err,
+ &last);
+ if (skb)
+ return skb;
+
+ if (*err != -EAGAIN)
+ break;
+ } while (timeo &&
+ !__skb_wait_for_more_packets(sk, err, &timeo, last));
-unlock_err:
- spin_unlock_irqrestore(&queue->lock, cpu_flags);
-no_packet:
- *err = error;
return NULL;
}
EXPORT_SYMBOL(__skb_recv_datagram);
@@ -785,7 +804,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
if (sock_writeable(sk))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
return mask;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 8ce3f74cd6b9..0ef061b2badc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -96,6 +96,7 @@
#include <linux/skbuff.h>
#include <net/net_namespace.h>
#include <net/sock.h>
+#include <net/busy_poll.h>
#include <linux/rtnetlink.h>
#include <linux/stat.h>
#include <net/dst.h>
@@ -137,6 +138,7 @@
#include <linux/errqueue.h>
#include <linux/hrtimer.h>
#include <linux/netfilter_ingress.h>
+#include <linux/sctp.h>
#include "net-sysfs.h"
@@ -182,8 +184,8 @@ EXPORT_SYMBOL(dev_base_lock);
/* protects napi_hash addition/deletion and napi_gen_id */
static DEFINE_SPINLOCK(napi_hash_lock);
-static unsigned int napi_gen_id;
-static DEFINE_HASHTABLE(napi_hash, 8);
+static unsigned int napi_gen_id = NR_CPUS;
+static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
static seqcount_t devnet_rename_seq;
@@ -1674,6 +1676,22 @@ void net_dec_ingress_queue(void)
EXPORT_SYMBOL_GPL(net_dec_ingress_queue);
#endif
+#ifdef CONFIG_NET_EGRESS
+static struct static_key egress_needed __read_mostly;
+
+void net_inc_egress_queue(void)
+{
+ static_key_slow_inc(&egress_needed);
+}
+EXPORT_SYMBOL_GPL(net_inc_egress_queue);
+
+void net_dec_egress_queue(void)
+{
+ static_key_slow_dec(&egress_needed);
+}
+EXPORT_SYMBOL_GPL(net_dec_egress_queue);
+#endif
+
static struct static_key netstamp_needed __read_mostly;
#ifdef HAVE_JUMP_LABEL
/* We are not allowed to call static_key_slow_dec() from irq context
@@ -2403,17 +2421,20 @@ static void skb_warn_bad_offload(const struct sk_buff *skb)
{
static const netdev_features_t null_features = 0;
struct net_device *dev = skb->dev;
- const char *driver = "";
+ const char *name = "";
if (!net_ratelimit())
return;
- if (dev && dev->dev.parent)
- driver = dev_driver_string(dev->dev.parent);
-
+ if (dev) {
+ if (dev->dev.parent)
+ name = dev_driver_string(dev->dev.parent);
+ else
+ name = netdev_name(dev);
+ }
WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
"gso_type=%d ip_summed=%d\n",
- driver, dev ? &dev->features : &null_features,
+ name, dev ? &dev->features : &null_features,
skb->sk ? &skb->sk->sk_route_caps : &null_features,
skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
skb_shinfo(skb)->gso_type, skb->ip_summed);
@@ -2467,6 +2488,141 @@ out:
}
EXPORT_SYMBOL(skb_checksum_help);
+/* skb_csum_offload_check - Driver helper function to determine if a device
+ * with limited checksum offload capabilities is able to offload the checksum
+ * for a given packet.
+ *
+ * Arguments:
+ * skb - sk_buff for the packet in question
+ * spec - contains the description of what device can offload
+ * csum_encapped - returns true if the checksum being offloaded is
+ * encpasulated. That is it is checksum for the transport header
+ * in the inner headers.
+ * checksum_help - when set indicates that helper function should
+ * call skb_checksum_help if offload checks fail
+ *
+ * Returns:
+ * true: Packet has passed the checksum checks and should be offloadable to
+ * the device (a driver may still need to check for additional
+ * restrictions of its device)
+ * false: Checksum is not offloadable. If checksum_help was set then
+ * skb_checksum_help was called to resolve checksum for non-GSO
+ * packets and when IP protocol is not SCTP
+ */
+bool __skb_csum_offload_chk(struct sk_buff *skb,
+ const struct skb_csum_offl_spec *spec,
+ bool *csum_encapped,
+ bool csum_help)
+{
+ struct iphdr *iph;
+ struct ipv6hdr *ipv6;
+ void *nhdr;
+ int protocol;
+ u8 ip_proto;
+
+ if (skb->protocol == htons(ETH_P_8021Q) ||
+ skb->protocol == htons(ETH_P_8021AD)) {
+ if (!spec->vlan_okay)
+ goto need_help;
+ }
+
+ /* We check whether the checksum refers to a transport layer checksum in
+ * the outermost header or an encapsulated transport layer checksum that
+ * corresponds to the inner headers of the skb. If the checksum is for
+ * something else in the packet we need help.
+ */
+ if (skb_checksum_start_offset(skb) == skb_transport_offset(skb)) {
+ /* Non-encapsulated checksum */
+ protocol = eproto_to_ipproto(vlan_get_protocol(skb));
+ nhdr = skb_network_header(skb);
+ *csum_encapped = false;
+ if (spec->no_not_encapped)
+ goto need_help;
+ } else if (skb->encapsulation && spec->encap_okay &&
+ skb_checksum_start_offset(skb) ==
+ skb_inner_transport_offset(skb)) {
+ /* Encapsulated checksum */
+ *csum_encapped = true;
+ switch (skb->inner_protocol_type) {
+ case ENCAP_TYPE_ETHER:
+ protocol = eproto_to_ipproto(skb->inner_protocol);
+ break;
+ case ENCAP_TYPE_IPPROTO:
+ protocol = skb->inner_protocol;
+ break;
+ }
+ nhdr = skb_inner_network_header(skb);
+ } else {
+ goto need_help;
+ }
+
+ switch (protocol) {
+ case IPPROTO_IP:
+ if (!spec->ipv4_okay)
+ goto need_help;
+ iph = nhdr;
+ ip_proto = iph->protocol;
+ if (iph->ihl != 5 && !spec->ip_options_okay)
+ goto need_help;
+ break;
+ case IPPROTO_IPV6:
+ if (!spec->ipv6_okay)
+ goto need_help;
+ if (spec->no_encapped_ipv6 && *csum_encapped)
+ goto need_help;
+ ipv6 = nhdr;
+ nhdr += sizeof(*ipv6);
+ ip_proto = ipv6->nexthdr;
+ break;
+ default:
+ goto need_help;
+ }
+
+ip_proto_again:
+ switch (ip_proto) {
+ case IPPROTO_TCP:
+ if (!spec->tcp_okay ||
+ skb->csum_offset != offsetof(struct tcphdr, check))
+ goto need_help;
+ break;
+ case IPPROTO_UDP:
+ if (!spec->udp_okay ||
+ skb->csum_offset != offsetof(struct udphdr, check))
+ goto need_help;
+ break;
+ case IPPROTO_SCTP:
+ if (!spec->sctp_okay ||
+ skb->csum_offset != offsetof(struct sctphdr, checksum))
+ goto cant_help;
+ break;
+ case NEXTHDR_HOP:
+ case NEXTHDR_ROUTING:
+ case NEXTHDR_DEST: {
+ u8 *opthdr = nhdr;
+
+ if (protocol != IPPROTO_IPV6 || !spec->ext_hdrs_okay)
+ goto need_help;
+
+ ip_proto = opthdr[0];
+ nhdr += (opthdr[1] + 1) << 3;
+
+ goto ip_proto_again;
+ }
+ default:
+ goto need_help;
+ }
+
+ /* Passed the tests for offloading checksum */
+ return true;
+
+need_help:
+ if (csum_help && !skb_shinfo(skb)->gso_size)
+ skb_checksum_help(skb);
+cant_help:
+ return false;
+}
+EXPORT_SYMBOL(__skb_csum_offload_chk);
+
__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
{
__be16 type = skb->protocol;
@@ -2539,6 +2695,8 @@ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
*
* It may return NULL if the skb requires no segmentation. This is
* only possible when GSO is used for verifying header integrity.
+ *
+ * Segmentation preserves SKB_SGO_CB_OFFSET bytes of previous skb cb.
*/
struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
netdev_features_t features, bool tx_path)
@@ -2553,6 +2711,9 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
return ERR_PTR(err);
}
+ BUILD_BUG_ON(SKB_SGO_CB_OFFSET +
+ sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
+
SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
SKB_GSO_CB(skb)->encap_level = 0;
@@ -2641,7 +2802,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
if (skb->ip_summed != CHECKSUM_NONE &&
!can_checksum_protocol(features, type)) {
- features &= ~NETIF_F_ALL_CSUM;
+ features &= ~NETIF_F_CSUM_MASK;
} else if (illegal_highdma(skb->dev, skb)) {
features &= ~NETIF_F_SG;
}
@@ -2788,7 +2949,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
else
skb_set_transport_header(skb,
skb_checksum_start_offset(skb));
- if (!(features & NETIF_F_ALL_CSUM) &&
+ if (!(features & NETIF_F_CSUM_MASK) &&
skb_checksum_help(skb))
goto out_kfree_skb;
}
@@ -2867,7 +3028,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
bool contended;
int rc;
- qdisc_pkt_len_init(skb);
qdisc_calculate_pkt_len(skb, q);
/*
* Heuristic to force contended enqueues to serialize on a
@@ -2925,7 +3085,8 @@ static void skb_update_prio(struct sk_buff *skb)
struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
if (!skb->priority && skb->sk && map) {
- unsigned int prioidx = skb->sk->sk_cgrp_prioidx;
+ unsigned int prioidx =
+ sock_cgroup_prioidx(&skb->sk->sk_cgrp_data);
if (prioidx < map->priomap_len)
skb->priority = map->priomap[prioidx];
@@ -2959,6 +3120,49 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(dev_loopback_xmit);
+#ifdef CONFIG_NET_EGRESS
+static struct sk_buff *
+sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
+{
+ struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list);
+ struct tcf_result cl_res;
+
+ if (!cl)
+ return skb;
+
+ /* skb->tc_verd and qdisc_skb_cb(skb)->pkt_len were already set
+ * earlier by the caller.
+ */
+ qdisc_bstats_cpu_update(cl->q, skb);
+
+ switch (tc_classify(skb, cl, &cl_res, false)) {
+ case TC_ACT_OK:
+ case TC_ACT_RECLASSIFY:
+ skb->tc_index = TC_H_MIN(cl_res.classid);
+ break;
+ case TC_ACT_SHOT:
+ qdisc_qstats_cpu_drop(cl->q);
+ *ret = NET_XMIT_DROP;
+ goto drop;
+ case TC_ACT_STOLEN:
+ case TC_ACT_QUEUED:
+ *ret = NET_XMIT_SUCCESS;
+drop:
+ kfree_skb(skb);
+ return NULL;
+ case TC_ACT_REDIRECT:
+ /* No need to push/pop skb's mac_header here on egress! */
+ skb_do_redirect(skb);
+ *ret = NET_XMIT_SUCCESS;
+ return NULL;
+ default:
+ break;
+ }
+
+ return skb;
+}
+#endif /* CONFIG_NET_EGRESS */
+
static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
{
#ifdef CONFIG_XPS
@@ -3018,7 +3222,9 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
int queue_index = 0;
#ifdef CONFIG_XPS
- if (skb->sender_cpu == 0)
+ u32 sender_cpu = skb->sender_cpu - 1;
+
+ if (sender_cpu >= (u32)NR_CPUS)
skb->sender_cpu = raw_smp_processor_id() + 1;
#endif
@@ -3083,6 +3289,17 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
skb_update_prio(skb);
+ qdisc_pkt_len_init(skb);
+#ifdef CONFIG_NET_CLS_ACT
+ skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
+# ifdef CONFIG_NET_EGRESS
+ if (static_key_false(&egress_needed)) {
+ skb = sch_handle_egress(skb, &rc, dev);
+ if (!skb)
+ goto out;
+ }
+# endif
+#endif
/* If device/qdisc don't need skb->dst, release it right now while
* its hot in this cpu cache.
*/
@@ -3104,9 +3321,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
txq = netdev_pick_tx(dev, skb, accel_priv);
q = rcu_dereference_bh(txq->qdisc);
-#ifdef CONFIG_NET_CLS_ACT
- skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
-#endif
trace_net_dev_queue(skb);
if (q->enqueue) {
rc = __dev_xmit_skb(skb, q, dev, txq);
@@ -3663,9 +3877,9 @@ int (*br_fdb_test_addr_hook)(struct net_device *dev,
EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
#endif
-static inline struct sk_buff *handle_ing(struct sk_buff *skb,
- struct packet_type **pt_prev,
- int *ret, struct net_device *orig_dev)
+static inline struct sk_buff *
+sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
+ struct net_device *orig_dev)
{
#ifdef CONFIG_NET_CLS_ACT
struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
@@ -3859,7 +4073,7 @@ another_round:
skip_taps:
#ifdef CONFIG_NET_INGRESS
if (static_key_false(&ingress_needed)) {
- skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
+ skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev);
if (!skb)
goto out;
@@ -4137,6 +4351,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
diffs |= p->vlan_tci ^ skb->vlan_tci;
+ diffs |= skb_metadata_dst_cmp(p, skb);
if (maclen == ETH_HLEN)
diffs |= compare_ether_header(skb_mac_header(p),
skb_mac_header(skb));
@@ -4334,10 +4549,12 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
break;
case GRO_MERGED_FREE:
- if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+ if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
+ skb_dst_drop(skb);
kmem_cache_free(skbuff_head_cache, skb);
- else
+ } else {
__kfree_skb(skb);
+ }
break;
case GRO_HELD:
@@ -4350,6 +4567,7 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
+ skb_mark_napi_id(skb, napi);
trace_napi_gro_receive_entry(skb);
skb_gro_reset_offset(skb);
@@ -4383,7 +4601,10 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
if (!skb) {
skb = napi_alloc_skb(napi, GRO_MAX_HEAD);
- napi->skb = skb;
+ if (skb) {
+ napi->skb = skb;
+ skb_mark_napi_id(skb, napi);
+ }
}
return skb;
}
@@ -4658,7 +4879,7 @@ void napi_complete_done(struct napi_struct *n, int work_done)
EXPORT_SYMBOL(napi_complete_done);
/* must be called under rcu_read_lock(), as we dont take a reference */
-struct napi_struct *napi_by_id(unsigned int napi_id)
+static struct napi_struct *napi_by_id(unsigned int napi_id)
{
unsigned int hash = napi_id % HASH_SIZE(napi_hash);
struct napi_struct *napi;
@@ -4669,43 +4890,101 @@ struct napi_struct *napi_by_id(unsigned int napi_id)
return NULL;
}
-EXPORT_SYMBOL_GPL(napi_by_id);
-void napi_hash_add(struct napi_struct *napi)
+#if defined(CONFIG_NET_RX_BUSY_POLL)
+#define BUSY_POLL_BUDGET 8
+bool sk_busy_loop(struct sock *sk, int nonblock)
{
- if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) {
+ unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
+ int (*busy_poll)(struct napi_struct *dev);
+ struct napi_struct *napi;
+ int rc = false;
- spin_lock(&napi_hash_lock);
+ rcu_read_lock();
- /* 0 is not a valid id, we also skip an id that is taken
- * we expect both events to be extremely rare
- */
- napi->napi_id = 0;
- while (!napi->napi_id) {
- napi->napi_id = ++napi_gen_id;
- if (napi_by_id(napi->napi_id))
- napi->napi_id = 0;
+ napi = napi_by_id(sk->sk_napi_id);
+ if (!napi)
+ goto out;
+
+ /* Note: ndo_busy_poll method is optional in linux-4.5 */
+ busy_poll = napi->dev->netdev_ops->ndo_busy_poll;
+
+ do {
+ rc = 0;
+ local_bh_disable();
+ if (busy_poll) {
+ rc = busy_poll(napi);
+ } else if (napi_schedule_prep(napi)) {
+ void *have = netpoll_poll_lock(napi);
+
+ if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
+ rc = napi->poll(napi, BUSY_POLL_BUDGET);
+ trace_napi_poll(napi);
+ if (rc == BUSY_POLL_BUDGET) {
+ napi_complete_done(napi, rc);
+ napi_schedule(napi);
+ }
+ }
+ netpoll_poll_unlock(have);
}
+ if (rc > 0)
+ NET_ADD_STATS_BH(sock_net(sk),
+ LINUX_MIB_BUSYPOLLRXPACKETS, rc);
+ local_bh_enable();
- hlist_add_head_rcu(&napi->napi_hash_node,
- &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
+ if (rc == LL_FLUSH_FAILED)
+ break; /* permanent failure */
- spin_unlock(&napi_hash_lock);
- }
+ cpu_relax();
+ } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) &&
+ !need_resched() && !busy_loop_timeout(end_time));
+
+ rc = !skb_queue_empty(&sk->sk_receive_queue);
+out:
+ rcu_read_unlock();
+ return rc;
+}
+EXPORT_SYMBOL(sk_busy_loop);
+
+#endif /* CONFIG_NET_RX_BUSY_POLL */
+
+void napi_hash_add(struct napi_struct *napi)
+{
+ if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) ||
+ test_and_set_bit(NAPI_STATE_HASHED, &napi->state))
+ return;
+
+ spin_lock(&napi_hash_lock);
+
+ /* 0..NR_CPUS+1 range is reserved for sender_cpu use */
+ do {
+ if (unlikely(++napi_gen_id < NR_CPUS + 1))
+ napi_gen_id = NR_CPUS + 1;
+ } while (napi_by_id(napi_gen_id));
+ napi->napi_id = napi_gen_id;
+
+ hlist_add_head_rcu(&napi->napi_hash_node,
+ &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
+
+ spin_unlock(&napi_hash_lock);
}
EXPORT_SYMBOL_GPL(napi_hash_add);
/* Warning : caller is responsible to make sure rcu grace period
* is respected before freeing memory containing @napi
*/
-void napi_hash_del(struct napi_struct *napi)
+bool napi_hash_del(struct napi_struct *napi)
{
+ bool rcu_sync_needed = false;
+
spin_lock(&napi_hash_lock);
- if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state))
+ if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) {
+ rcu_sync_needed = true;
hlist_del_rcu(&napi->napi_hash_node);
-
+ }
spin_unlock(&napi_hash_lock);
+ return rcu_sync_needed;
}
EXPORT_SYMBOL_GPL(napi_hash_del);
@@ -4741,6 +5020,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
napi->poll_owner = -1;
#endif
set_bit(NAPI_STATE_SCHED, &napi->state);
+ napi_hash_add(napi);
}
EXPORT_SYMBOL(netif_napi_add);
@@ -4760,8 +5040,12 @@ void napi_disable(struct napi_struct *n)
}
EXPORT_SYMBOL(napi_disable);
+/* Must be called in process context */
void netif_napi_del(struct napi_struct *napi)
{
+ might_sleep();
+ if (napi_hash_del(napi))
+ synchronize_net();
list_del_init(&napi->dev_list);
napi_free_frags(napi);
@@ -5095,12 +5379,12 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
{
struct netdev_adjacent *lower;
- lower = list_entry((*iter)->next, struct netdev_adjacent, list);
+ lower = list_entry(*iter, struct netdev_adjacent, list);
if (&lower->list == &dev->adj_list.lower)
return NULL;
- *iter = &lower->list;
+ *iter = lower->list.next;
return lower->dev;
}
@@ -5348,7 +5632,7 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
static int __netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev, bool master,
- void *private)
+ void *upper_priv, void *upper_info)
{
struct netdev_notifier_changeupper_info changeupper_info;
struct netdev_adjacent *i, *j, *to_i, *to_j;
@@ -5372,6 +5656,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
changeupper_info.upper_dev = upper_dev;
changeupper_info.master = master;
changeupper_info.linking = true;
+ changeupper_info.upper_info = upper_info;
ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
&changeupper_info.info);
@@ -5379,7 +5664,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (ret)
return ret;
- ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
+ ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, upper_priv,
master);
if (ret)
return ret;
@@ -5417,8 +5702,12 @@ static int __netdev_upper_dev_link(struct net_device *dev,
goto rollback_lower_mesh;
}
- call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
- &changeupper_info.info);
+ ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+ &changeupper_info.info);
+ ret = notifier_to_errno(ret);
+ if (ret)
+ goto rollback_lower_mesh;
+
return 0;
rollback_lower_mesh:
@@ -5472,7 +5761,7 @@ rollback_mesh:
int netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev)
{
- return __netdev_upper_dev_link(dev, upper_dev, false, NULL);
+ return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL);
}
EXPORT_SYMBOL(netdev_upper_dev_link);
@@ -5480,6 +5769,8 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
* netdev_master_upper_dev_link - Add a master link to the upper device
* @dev: device
* @upper_dev: new upper device
+ * @upper_priv: upper device private
+ * @upper_info: upper info to be passed down via notifier
*
* Adds a link to device which is upper to this one. In this case, only
* one master upper device can be linked, although other non-master devices
@@ -5488,20 +5779,14 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
* counts are adjusted and the function returns zero.
*/
int netdev_master_upper_dev_link(struct net_device *dev,
- struct net_device *upper_dev)
+ struct net_device *upper_dev,
+ void *upper_priv, void *upper_info)
{
- return __netdev_upper_dev_link(dev, upper_dev, true, NULL);
+ return __netdev_upper_dev_link(dev, upper_dev, true,
+ upper_priv, upper_info);
}
EXPORT_SYMBOL(netdev_master_upper_dev_link);
-int netdev_master_upper_dev_link_private(struct net_device *dev,
- struct net_device *upper_dev,
- void *private)
-{
- return __netdev_upper_dev_link(dev, upper_dev, true, private);
-}
-EXPORT_SYMBOL(netdev_master_upper_dev_link_private);
-
/**
* netdev_upper_dev_unlink - Removes a link to upper device
* @dev: device
@@ -5660,7 +5945,7 @@ EXPORT_SYMBOL(netdev_lower_dev_get_private);
int dev_get_nest_level(struct net_device *dev,
- bool (*type_check)(struct net_device *dev))
+ bool (*type_check)(const struct net_device *dev))
{
struct net_device *lower = NULL;
struct list_head *iter;
@@ -5682,6 +5967,26 @@ int dev_get_nest_level(struct net_device *dev,
}
EXPORT_SYMBOL(dev_get_nest_level);
+/**
+ * netdev_lower_change - Dispatch event about lower device state change
+ * @lower_dev: device
+ * @lower_state_info: state to dispatch
+ *
+ * Send NETDEV_CHANGELOWERSTATE to netdev notifiers with info.
+ * The caller must hold the RTNL lock.
+ */
+void netdev_lower_state_changed(struct net_device *lower_dev,
+ void *lower_state_info)
+{
+ struct netdev_notifier_changelowerstate_info changelowerstate_info;
+
+ ASSERT_RTNL();
+ changelowerstate_info.lower_state_info = lower_state_info;
+ call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev,
+ &changelowerstate_info.info);
+}
+EXPORT_SYMBOL(netdev_lower_state_changed);
+
static void dev_change_rx_flags(struct net_device *dev, int flags)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -6372,9 +6677,9 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
/* UFO needs SG and checksumming */
if (features & NETIF_F_UFO) {
/* maybe split UFO into V4 and V6? */
- if (!((features & NETIF_F_GEN_CSUM) ||
- (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
- == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
+ if (!(features & NETIF_F_HW_CSUM) &&
+ ((features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) !=
+ (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) {
netdev_dbg(dev,
"Dropping NETIF_F_UFO since no checksum offload features.\n");
features &= ~NETIF_F_UFO;
@@ -6402,7 +6707,7 @@ int __netdev_update_features(struct net_device *dev)
struct net_device *upper, *lower;
netdev_features_t features;
struct list_head *iter;
- int err = 0;
+ int err = -1;
ASSERT_RTNL();
@@ -6419,21 +6724,27 @@ int __netdev_update_features(struct net_device *dev)
features = netdev_sync_upper_features(dev, upper, features);
if (dev->features == features)
- return 0;
+ goto sync_lower;
netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
&dev->features, &features);
if (dev->netdev_ops->ndo_set_features)
err = dev->netdev_ops->ndo_set_features(dev, features);
+ else
+ err = 0;
if (unlikely(err < 0)) {
netdev_err(dev,
"set_features() failed (%d); wanted %pNF, left %pNF\n",
err, &features, &dev->features);
+ /* return non-0 since some features might have changed and
+ * it's better to fire a spurious notification than miss it
+ */
return -1;
}
+sync_lower:
/* some features must be disabled on lower devices when disabled
* on an upper device (think: bonding master or bridge)
*/
@@ -6443,7 +6754,7 @@ int __netdev_update_features(struct net_device *dev)
if (!err)
dev->features = features;
- return 1;
+ return err < 0 ? 0 : 1;
}
/**
@@ -7111,8 +7422,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
setup(dev);
- if (!dev->tx_queue_len)
+ if (!dev->tx_queue_len) {
dev->priv_flags |= IFF_NO_QUEUE;
+ dev->tx_queue_len = 1;
+ }
dev->num_tx_queues = txqs;
dev->real_num_tx_queues = txqs;
@@ -7155,11 +7468,13 @@ EXPORT_SYMBOL(alloc_netdev_mqs);
* This function does the last stage of destroying an allocated device
* interface. The reference to the device object is released.
* If this is the last reference then it will be freed.
+ * Must be called in process context.
*/
void free_netdev(struct net_device *dev)
{
struct napi_struct *p, *n;
+ might_sleep();
netif_free_tx_queues(dev);
#ifdef CONFIG_SYSFS
kvfree(dev->_rx);
@@ -7468,16 +7783,16 @@ static int dev_cpu_callback(struct notifier_block *nfb,
netdev_features_t netdev_increment_features(netdev_features_t all,
netdev_features_t one, netdev_features_t mask)
{
- if (mask & NETIF_F_GEN_CSUM)
- mask |= NETIF_F_ALL_CSUM;
+ if (mask & NETIF_F_HW_CSUM)
+ mask |= NETIF_F_CSUM_MASK;
mask |= NETIF_F_VLAN_CHALLENGED;
- all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
+ all |= one & (NETIF_F_ONE_FOR_ALL | NETIF_F_CSUM_MASK) & mask;
all &= one | ~NETIF_F_ALL_FOR_ALL;
/* If one device supports hw checksumming, set for all. */
- if (all & NETIF_F_GEN_CSUM)
- all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
+ if (all & NETIF_F_HW_CSUM)
+ all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_HW_CSUM);
return all;
}
diff --git a/net/core/dst.c b/net/core/dst.c
index 2a1818065e12..a1656e3b8d72 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -301,12 +301,13 @@ void dst_release(struct dst_entry *dst)
{
if (dst) {
int newrefcnt;
+ unsigned short nocache = dst->flags & DST_NOCACHE;
newrefcnt = atomic_dec_return(&dst->__refcnt);
if (unlikely(newrefcnt < 0))
net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
__func__, dst, newrefcnt);
- if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt)
+ if (!newrefcnt && unlikely(nocache))
call_rcu(&dst->rcu_head, dst_destroy_rcu);
}
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 29edf74846fc..daf04709dd3c 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -87,7 +87,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
- [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp",
+ [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
[NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu",
[NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter",
[NETIF_F_RXHASH_BIT] = "rx-hashing",
@@ -191,6 +191,23 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
return ret;
}
+static int phy_get_sset_count(struct phy_device *phydev)
+{
+ int ret;
+
+ if (phydev->drv->get_sset_count &&
+ phydev->drv->get_strings &&
+ phydev->drv->get_stats) {
+ mutex_lock(&phydev->lock);
+ ret = phydev->drv->get_sset_count(phydev);
+ mutex_unlock(&phydev->lock);
+
+ return ret;
+ }
+
+ return -EOPNOTSUPP;
+}
+
static int __ethtool_get_sset_count(struct net_device *dev, int sset)
{
const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -204,6 +221,13 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
if (sset == ETH_SS_TUNABLES)
return ARRAY_SIZE(tunable_strings);
+ if (sset == ETH_SS_PHY_STATS) {
+ if (dev->phydev)
+ return phy_get_sset_count(dev->phydev);
+ else
+ return -EOPNOTSUPP;
+ }
+
if (ops->get_sset_count && ops->get_strings)
return ops->get_sset_count(dev, sset);
else
@@ -223,7 +247,17 @@ static void __ethtool_get_strings(struct net_device *dev,
sizeof(rss_hash_func_strings));
else if (stringset == ETH_SS_TUNABLES)
memcpy(data, tunable_strings, sizeof(tunable_strings));
- else
+ else if (stringset == ETH_SS_PHY_STATS) {
+ struct phy_device *phydev = dev->phydev;
+
+ if (phydev) {
+ mutex_lock(&phydev->lock);
+ phydev->drv->get_strings(phydev, data);
+ mutex_unlock(&phydev->lock);
+ } else {
+ return;
+ }
+ } else
/* ops->get_strings is valid because checked earlier */
ops->get_strings(dev, stringset, data);
}
@@ -235,7 +269,7 @@ static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd)
switch (eth_cmd) {
case ETHTOOL_GTXCSUM:
case ETHTOOL_STXCSUM:
- return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CSUM;
+ return NETIF_F_CSUM_MASK | NETIF_F_SCTP_CRC;
case ETHTOOL_GRXCSUM:
case ETHTOOL_SRXCSUM:
return NETIF_F_RXCSUM;
@@ -1401,6 +1435,47 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
return ret;
}
+static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
+{
+ struct ethtool_stats stats;
+ struct phy_device *phydev = dev->phydev;
+ u64 *data;
+ int ret, n_stats;
+
+ if (!phydev)
+ return -EOPNOTSUPP;
+
+ n_stats = phy_get_sset_count(phydev);
+
+ if (n_stats < 0)
+ return n_stats;
+ WARN_ON(n_stats == 0);
+
+ if (copy_from_user(&stats, useraddr, sizeof(stats)))
+ return -EFAULT;
+
+ stats.n_stats = n_stats;
+ data = kmalloc_array(n_stats, sizeof(u64), GFP_USER);
+ if (!data)
+ return -ENOMEM;
+
+ mutex_lock(&phydev->lock);
+ phydev->drv->get_stats(phydev, &stats, data);
+ mutex_unlock(&phydev->lock);
+
+ ret = -EFAULT;
+ if (copy_to_user(useraddr, &stats, sizeof(stats)))
+ goto out;
+ useraddr += sizeof(stats);
+ if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64)))
+ goto out;
+ ret = 0;
+
+ out:
+ kfree(data);
+ return ret;
+}
+
static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr)
{
struct ethtool_perm_addr epaddr;
@@ -1779,6 +1854,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GSSET_INFO:
case ETHTOOL_GSTRINGS:
case ETHTOOL_GSTATS:
+ case ETHTOOL_GPHYSTATS:
case ETHTOOL_GTSO:
case ETHTOOL_GPERMADDR:
case ETHTOOL_GUFO:
@@ -1991,6 +2067,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_STUNABLE:
rc = ethtool_set_tunable(dev, useraddr);
break;
+ case ETHTOOL_GPHYSTATS:
+ rc = ethtool_get_phy_stats(dev, useraddr);
+ break;
default:
rc = -EOPNOTSUPP;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 672eefbfbe99..bba502f7cd57 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -50,6 +50,7 @@
#include <net/cls_cgroup.h>
#include <net/dst_metadata.h>
#include <net/dst.h>
+#include <net/sock_reuseport.h>
/**
* sk_filter - run a packet through a socket filter
@@ -348,12 +349,6 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
* jump offsets, 2nd pass remapping:
* new_prog = kmalloc(sizeof(struct bpf_insn) * new_len);
* bpf_convert_filter(old_prog, old_len, new_prog, &new_len);
- *
- * User BPF's register A is mapped to our BPF register 6, user BPF
- * register X is mapped to BPF register 7; frame pointer is always
- * register 10; Context 'void *ctx' is stored in register 1, that is,
- * for socket filters: ctx == 'struct sk_buff *', for seccomp:
- * ctx == 'struct seccomp_data *'.
*/
static int bpf_convert_filter(struct sock_filter *prog, int len,
struct bpf_insn *new_prog, int *new_len)
@@ -381,9 +376,22 @@ do_pass:
new_insn = new_prog;
fp = prog;
- if (new_insn)
- *new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
- new_insn++;
+ /* Classic BPF related prologue emission. */
+ if (new_insn) {
+ /* Classic BPF expects A and X to be reset first. These need
+ * to be guaranteed to be the first two instructions.
+ */
+ *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
+ *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
+
+ /* All programs must keep CTX in callee saved BPF_REG_CTX.
+ * In eBPF case it's done by the compiler, here we need to
+ * do this ourself. Initial CTX is present in BPF_REG_ARG1.
+ */
+ *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
+ } else {
+ new_insn += 3;
+ }
for (i = 0; i < len; fp++, i++) {
struct bpf_insn tmp_insns[6] = { };
@@ -777,6 +785,11 @@ static int bpf_check_classic(const struct sock_filter *filter,
if (ftest->k == 0)
return -EINVAL;
break;
+ case BPF_ALU | BPF_LSH | BPF_K:
+ case BPF_ALU | BPF_RSH | BPF_K:
+ if (ftest->k >= 32)
+ return -EINVAL;
+ break;
case BPF_LD | BPF_MEM:
case BPF_LDX | BPF_MEM:
case BPF_ST:
@@ -1160,17 +1173,32 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
return 0;
}
-/**
- * sk_attach_filter - attach a socket filter
- * @fprog: the filter program
- * @sk: the socket to use
- *
- * Attach the user's filter code. We first run some sanity checks on
- * it to make sure it does not explode on us later. If an error
- * occurs or there is insufficient memory for the filter a negative
- * errno code is returned. On success the return is zero.
- */
-int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
+{
+ struct bpf_prog *old_prog;
+ int err;
+
+ if (bpf_prog_size(prog->len) > sysctl_optmem_max)
+ return -ENOMEM;
+
+ if (sk_unhashed(sk)) {
+ err = reuseport_alloc(sk);
+ if (err)
+ return err;
+ } else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
+ /* The socket wasn't bound with SO_REUSEPORT */
+ return -EINVAL;
+ }
+
+ old_prog = reuseport_attach_prog(sk, prog);
+ if (old_prog)
+ bpf_prog_destroy(old_prog);
+
+ return 0;
+}
+
+static
+struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
{
unsigned int fsize = bpf_classic_proglen(fprog);
unsigned int bpf_fsize = bpf_prog_size(fprog->len);
@@ -1178,19 +1206,19 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
int err;
if (sock_flag(sk, SOCK_FILTER_LOCKED))
- return -EPERM;
+ return ERR_PTR(-EPERM);
/* Make sure new filter is there and in the right amounts. */
if (fprog->filter == NULL)
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
prog = bpf_prog_alloc(bpf_fsize, 0);
if (!prog)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
if (copy_from_user(prog->insns, fprog->filter, fsize)) {
__bpf_prog_free(prog);
- return -EFAULT;
+ return ERR_PTR(-EFAULT);
}
prog->len = fprog->len;
@@ -1198,13 +1226,30 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
err = bpf_prog_store_orig_filter(prog, fprog);
if (err) {
__bpf_prog_free(prog);
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
}
/* bpf_prepare_filter() already takes care of freeing
* memory in case something goes wrong.
*/
- prog = bpf_prepare_filter(prog, NULL);
+ return bpf_prepare_filter(prog, NULL);
+}
+
+/**
+ * sk_attach_filter - attach a socket filter
+ * @fprog: the filter program
+ * @sk: the socket to use
+ *
+ * Attach the user's filter code. We first run some sanity checks on
+ * it to make sure it does not explode on us later. If an error
+ * occurs or there is insufficient memory for the filter a negative
+ * errno code is returned. On success the return is zero.
+ */
+int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+{
+ struct bpf_prog *prog = __get_filter(fprog, sk);
+ int err;
+
if (IS_ERR(prog))
return PTR_ERR(prog);
@@ -1218,23 +1263,50 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_attach_filter);
-int sk_attach_bpf(u32 ufd, struct sock *sk)
+int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
- struct bpf_prog *prog;
+ struct bpf_prog *prog = __get_filter(fprog, sk);
int err;
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ err = __reuseport_attach_prog(prog, sk);
+ if (err < 0) {
+ __bpf_prog_release(prog);
+ return err;
+ }
+
+ return 0;
+}
+
+static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
+{
+ struct bpf_prog *prog;
+
if (sock_flag(sk, SOCK_FILTER_LOCKED))
- return -EPERM;
+ return ERR_PTR(-EPERM);
prog = bpf_prog_get(ufd);
if (IS_ERR(prog))
- return PTR_ERR(prog);
+ return prog;
if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
bpf_prog_put(prog);
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
+ return prog;
+}
+
+int sk_attach_bpf(u32 ufd, struct sock *sk)
+{
+ struct bpf_prog *prog = __get_bpf(ufd, sk);
+ int err;
+
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
err = __sk_attach_prog(prog, sk);
if (err < 0) {
bpf_prog_put(prog);
@@ -1244,7 +1316,24 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
return 0;
}
-#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1)
+int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
+{
+ struct bpf_prog *prog = __get_bpf(ufd, sk);
+ int err;
+
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ err = __reuseport_attach_prog(prog, sk);
+ if (err < 0) {
+ bpf_prog_put(prog);
+ return err;
+ }
+
+ return 0;
+}
+
+#define BPF_LDST_LEN 16U
static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
{
@@ -1252,9 +1341,12 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
int offset = (int) r2;
void *from = (void *) (long) r3;
unsigned int len = (unsigned int) r4;
- char buf[16];
+ char buf[BPF_LDST_LEN];
void *ptr;
+ if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM)))
+ return -EINVAL;
+
/* bpf verifier guarantees that:
* 'from' pointer points to bpf program stack
* 'len' bytes of it were initialized
@@ -1274,7 +1366,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
if (unlikely(!ptr))
return -EFAULT;
- if (BPF_RECOMPUTE_CSUM(flags))
+ if (flags & BPF_F_RECOMPUTE_CSUM)
skb_postpull_rcsum(skb, ptr, len);
memcpy(ptr, from, len);
@@ -1283,8 +1375,9 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
/* skb_store_bits cannot return -EFAULT here */
skb_store_bits(skb, offset, ptr, len);
- if (BPF_RECOMPUTE_CSUM(flags) && skb->ip_summed == CHECKSUM_COMPLETE)
- skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0));
+ if (flags & BPF_F_RECOMPUTE_CSUM)
+ skb_postpush_rcsum(skb, ptr, len);
+
return 0;
}
@@ -1299,8 +1392,35 @@ const struct bpf_func_proto bpf_skb_store_bytes_proto = {
.arg5_type = ARG_ANYTHING,
};
-#define BPF_HEADER_FIELD_SIZE(flags) ((flags) & 0x0f)
-#define BPF_IS_PSEUDO_HEADER(flags) ((flags) & 0x10)
+static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ const struct sk_buff *skb = (const struct sk_buff *)(unsigned long) r1;
+ int offset = (int) r2;
+ void *to = (void *)(unsigned long) r3;
+ unsigned int len = (unsigned int) r4;
+ void *ptr;
+
+ if (unlikely((u32) offset > 0xffff || len > BPF_LDST_LEN))
+ return -EFAULT;
+
+ ptr = skb_header_pointer(skb, offset, len, to);
+ if (unlikely(!ptr))
+ return -EFAULT;
+ if (ptr != to)
+ memcpy(to, ptr, len);
+
+ return 0;
+}
+
+const struct bpf_func_proto bpf_skb_load_bytes_proto = {
+ .func = bpf_skb_load_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_STACK,
+ .arg4_type = ARG_CONST_STACK_SIZE,
+};
static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
{
@@ -1308,6 +1428,8 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
int offset = (int) r2;
__sum16 sum, *ptr;
+ if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
+ return -EINVAL;
if (unlikely((u32) offset > 0xffff))
return -EFAULT;
@@ -1319,7 +1441,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
if (unlikely(!ptr))
return -EFAULT;
- switch (BPF_HEADER_FIELD_SIZE(flags)) {
+ switch (flags & BPF_F_HDR_FIELD_MASK) {
case 2:
csum_replace2(ptr, from, to);
break;
@@ -1351,10 +1473,12 @@ const struct bpf_func_proto bpf_l3_csum_replace_proto = {
static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
- bool is_pseudo = !!BPF_IS_PSEUDO_HEADER(flags);
+ bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
int offset = (int) r2;
__sum16 sum, *ptr;
+ if (unlikely(flags & ~(BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
+ return -EINVAL;
if (unlikely((u32) offset > 0xffff))
return -EFAULT;
@@ -1366,7 +1490,7 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
if (unlikely(!ptr))
return -EFAULT;
- switch (BPF_HEADER_FIELD_SIZE(flags)) {
+ switch (flags & BPF_F_HDR_FIELD_MASK) {
case 2:
inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
break;
@@ -1395,13 +1519,14 @@ const struct bpf_func_proto bpf_l4_csum_replace_proto = {
.arg5_type = ARG_ANYTHING,
};
-#define BPF_IS_REDIRECT_INGRESS(flags) ((flags) & 1)
-
static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2;
struct net_device *dev;
+ if (unlikely(flags & ~(BPF_F_INGRESS)))
+ return -EINVAL;
+
dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
if (unlikely(!dev))
return -EINVAL;
@@ -1410,8 +1535,12 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
if (unlikely(!skb2))
return -ENOMEM;
- if (BPF_IS_REDIRECT_INGRESS(flags))
+ if (flags & BPF_F_INGRESS) {
+ if (skb_at_tc_ingress(skb2))
+ skb_postpush_rcsum(skb2, skb_mac_header(skb2),
+ skb2->mac_len);
return dev_forward_skb(dev, skb2);
+ }
skb2->dev = dev;
skb_sender_cpu_clear(skb2);
@@ -1433,12 +1562,17 @@ struct redirect_info {
};
static DEFINE_PER_CPU(struct redirect_info, redirect_info);
+
static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ if (unlikely(flags & ~(BPF_F_INGRESS)))
+ return TC_ACT_SHOT;
+
ri->ifindex = ifindex;
ri->flags = flags;
+
return TC_ACT_REDIRECT;
}
@@ -1454,8 +1588,12 @@ int skb_do_redirect(struct sk_buff *skb)
return -EINVAL;
}
- if (BPF_IS_REDIRECT_INGRESS(ri->flags))
+ if (ri->flags & BPF_F_INGRESS) {
+ if (skb_at_tc_ingress(skb))
+ skb_postpush_rcsum(skb, skb_mac_header(skb),
+ skb->mac_len);
return dev_forward_skb(dev, skb);
+ }
skb->dev = dev;
skb_sender_cpu_clear(skb);
@@ -1547,19 +1685,49 @@ bool bpf_helper_changes_skb_data(void *func)
return false;
}
+static unsigned short bpf_tunnel_key_af(u64 flags)
+{
+ return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
+}
+
static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2;
- struct ip_tunnel_info *info = skb_tunnel_info(skb);
+ const struct ip_tunnel_info *info = skb_tunnel_info(skb);
+ u8 compat[sizeof(struct bpf_tunnel_key)];
- if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags || !info))
- return -EINVAL;
- if (ip_tunnel_info_af(info) != AF_INET)
+ if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6))))
return -EINVAL;
+ if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags))
+ return -EPROTO;
+ if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
+ switch (size) {
+ case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
+ /* Fixup deprecated structure layouts here, so we have
+ * a common path later on.
+ */
+ if (ip_tunnel_info_af(info) != AF_INET)
+ return -EINVAL;
+ to = (struct bpf_tunnel_key *)compat;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
to->tunnel_id = be64_to_cpu(info->key.tun_id);
- to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
+ to->tunnel_tos = info->key.tos;
+ to->tunnel_ttl = info->key.ttl;
+
+ if (flags & BPF_F_TUNINFO_IPV6)
+ memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
+ sizeof(to->remote_ipv6));
+ else
+ to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
+
+ if (unlikely(size != sizeof(struct bpf_tunnel_key)))
+ memcpy((void *)(long) r2, to, size);
return 0;
}
@@ -1581,10 +1749,25 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
struct sk_buff *skb = (struct sk_buff *) (long) r1;
struct bpf_tunnel_key *from = (struct bpf_tunnel_key *) (long) r2;
struct metadata_dst *md = this_cpu_ptr(md_dst);
+ u8 compat[sizeof(struct bpf_tunnel_key)];
struct ip_tunnel_info *info;
- if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags))
+ if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX)))
return -EINVAL;
+ if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
+ switch (size) {
+ case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
+ /* Fixup deprecated structure layouts here, so we have
+ * a common path later on.
+ */
+ memcpy(compat, from, size);
+ memset(compat + size, 0, sizeof(compat) - size);
+ from = (struct bpf_tunnel_key *)compat;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
skb_dst_drop(skb);
dst_hold((struct dst_entry *) md);
@@ -1592,9 +1775,21 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
info = &md->u.tun_info;
info->mode = IP_TUNNEL_INFO_TX;
- info->key.tun_flags = TUNNEL_KEY;
+
+ info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM;
info->key.tun_id = cpu_to_be64(from->tunnel_id);
- info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
+ info->key.tos = from->tunnel_tos;
+ info->key.ttl = from->tunnel_ttl;
+
+ if (flags & BPF_F_TUNINFO_IPV6) {
+ info->mode |= IP_TUNNEL_INFO_IPV6;
+ memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
+ sizeof(from->remote_ipv6));
+ } else {
+ info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
+ if (flags & BPF_F_ZERO_CSUM_TX)
+ info->key.tun_flags &= ~TUNNEL_CSUM;
+ }
return 0;
}
@@ -1654,6 +1849,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
switch (func_id) {
case BPF_FUNC_skb_store_bytes:
return &bpf_skb_store_bytes_proto;
+ case BPF_FUNC_skb_load_bytes:
+ return &bpf_skb_load_bytes_proto;
case BPF_FUNC_l3_csum_replace:
return &bpf_l3_csum_replace_proto;
case BPF_FUNC_l4_csum_replace:
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d79699c9d1b9..12e700332010 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -208,7 +208,6 @@ ip:
case htons(ETH_P_IPV6): {
const struct ipv6hdr *iph;
struct ipv6hdr _iph;
- __be32 flow_label;
ipv6:
iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
@@ -230,8 +229,12 @@ ipv6:
key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
}
- flow_label = ip6_flowlabel(iph);
- if (flow_label) {
+ if ((dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_FLOW_LABEL) ||
+ (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)) &&
+ ip6_flowlabel(iph)) {
+ __be32 flow_label = ip6_flowlabel(iph);
+
if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
key_tags = skb_flow_dissector_target(flow_dissector,
@@ -396,6 +399,13 @@ ip_proto_again:
goto out_bad;
proto = eth->h_proto;
nhoff += sizeof(*eth);
+
+ /* Cap headers that we access via pointers at the
+ * end of the Ethernet header as our maximum alignment
+ * at that point is only 2 bytes.
+ */
+ if (NET_IP_ALIGN)
+ hlen = nhoff;
}
key_control->flags |= FLOW_DIS_ENCAPSULATION;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 1aa8437ed6c4..f18ae91b652e 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -857,7 +857,7 @@ static void neigh_probe(struct neighbour *neigh)
struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
/* keep skb alive even if arp_queue overflows */
if (skb)
- skb = skb_copy(skb, GFP_ATOMIC);
+ skb = skb_clone(skb, GFP_ATOMIC);
write_unlock(&neigh->lock);
neigh->ops->solicit(neigh, skb);
atomic_inc(&neigh->probes);
@@ -2215,7 +2215,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
ndm->ndm_pad2 = 0;
ndm->ndm_flags = pn->flags | NTF_PROXY;
ndm->ndm_type = RTN_UNICAST;
- ndm->ndm_ifindex = pn->dev->ifindex;
+ ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
ndm->ndm_state = NUD_NONE;
if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
@@ -2333,7 +2333,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
if (h > s_h)
s_idx = 0;
for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
- if (dev_net(n->dev) != net)
+ if (pneigh_net(n) != net)
continue;
if (idx < s_idx)
goto next;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index f88a62ab019d..b6c8a6629b39 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -471,6 +471,7 @@ static ssize_t phys_switch_id_show(struct device *dev,
if (dev_isalive(netdev)) {
struct switchdev_attr attr = {
+ .orig_dev = netdev,
.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
.flags = SWITCHDEV_F_NO_RECURSE,
};
@@ -1452,8 +1453,8 @@ static void netdev_release(struct device *d)
static const void *net_namespace(struct device *d)
{
- struct net_device *dev;
- dev = container_of(d, struct net_device, dev);
+ struct net_device *dev = to_net_dev(d);
+
return dev_net(dev);
}
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index adef015b2f41..92da5e4ceb4f 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -32,6 +32,10 @@
#include <trace/events/sock.h>
#include <trace/events/udp.h>
#include <trace/events/fib.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <trace/events/fib6.h>
+EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
+#endif
EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 6441f47b1a8f..0260c84ed83c 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -56,29 +56,41 @@ static void cgrp_css_free(struct cgroup_subsys_state *css)
kfree(css_cls_state(css));
}
-static int update_classid(const void *v, struct file *file, unsigned n)
+static int update_classid_sock(const void *v, struct file *file, unsigned n)
{
int err;
struct socket *sock = sock_from_file(file, &err);
- if (sock)
- sock->sk->sk_classid = (u32)(unsigned long)v;
-
+ if (sock) {
+ spin_lock(&cgroup_sk_update_lock);
+ sock_cgroup_set_classid(&sock->sk->sk_cgrp_data,
+ (unsigned long)v);
+ spin_unlock(&cgroup_sk_update_lock);
+ }
return 0;
}
-static void cgrp_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static void update_classid(struct cgroup_subsys_state *css, void *v)
{
- struct cgroup_cls_state *cs = css_cls_state(css);
- void *v = (void *)(unsigned long)cs->classid;
+ struct css_task_iter it;
struct task_struct *p;
- cgroup_taskset_for_each(p, tset) {
+ css_task_iter_start(css, &it);
+ while ((p = css_task_iter_next(&it))) {
task_lock(p);
- iterate_fd(p->files, 0, update_classid, v);
+ iterate_fd(p->files, 0, update_classid_sock, v);
task_unlock(p);
}
+ css_task_iter_end(&it);
+}
+
+static void cgrp_attach(struct cgroup_taskset *tset)
+{
+ struct cgroup_subsys_state *css;
+
+ cgroup_taskset_first(tset, &css);
+ update_classid(css,
+ (void *)(unsigned long)css_cls_state(css)->classid);
}
static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
@@ -89,8 +101,13 @@ static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
u64 value)
{
- css_cls_state(css)->classid = (u32) value;
+ struct cgroup_cls_state *cs = css_cls_state(css);
+
+ cgroup_sk_alloc_disable();
+
+ cs->classid = (u32)value;
+ update_classid(css, (void *)(unsigned long)cs->classid);
return 0;
}
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index cbd0a199bf52..f1efbc39ef6b 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -27,6 +27,12 @@
#include <linux/fdtable.h>
+/*
+ * netprio allocates per-net_device priomap array which is indexed by
+ * css->id. Limiting css ID to 16bits doesn't lose anything.
+ */
+#define NETPRIO_ID_MAX USHRT_MAX
+
#define PRIOMAP_MIN_SZ 128
/*
@@ -144,6 +150,9 @@ static int cgrp_css_online(struct cgroup_subsys_state *css)
struct net_device *dev;
int ret = 0;
+ if (css->id > NETPRIO_ID_MAX)
+ return -ENOSPC;
+
if (!parent_css)
return 0;
@@ -200,6 +209,8 @@ static ssize_t write_priomap(struct kernfs_open_file *of,
if (!dev)
return -ENODEV;
+ cgroup_sk_alloc_disable();
+
rtnl_lock();
ret = netprio_set_prio(of_css(of), dev, prio);
@@ -213,18 +224,23 @@ static int update_netprio(const void *v, struct file *file, unsigned n)
{
int err;
struct socket *sock = sock_from_file(file, &err);
- if (sock)
- sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v;
+ if (sock) {
+ spin_lock(&cgroup_sk_update_lock);
+ sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data,
+ (unsigned long)v);
+ spin_unlock(&cgroup_sk_update_lock);
+ }
return 0;
}
-static void net_prio_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static void net_prio_attach(struct cgroup_taskset *tset)
{
struct task_struct *p;
- void *v = (void *)(unsigned long)css->cgroup->id;
+ struct cgroup_subsys_state *css;
+
+ cgroup_taskset_for_each(p, css, tset) {
+ void *v = (void *)(unsigned long)css->cgroup->id;
- cgroup_taskset_for_each(p, tset) {
task_lock(p);
iterate_fd(p->files, 0, update_netprio, v);
task_unlock(p);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index de8d5cc5eb24..1474cfd2dc1c 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2787,7 +2787,9 @@ static struct sk_buff *pktgen_alloc_skb(struct net_device *dev,
} else {
skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT);
}
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+
+ if (likely(skb))
+ skb_reserve(skb, LL_RESERVED_SPACE(dev));
return skb;
}
@@ -2898,7 +2900,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
if (!(pkt_dev->flags & F_UDPCSUM)) {
skb->ip_summed = CHECKSUM_NONE;
- } else if (odev->features & NETIF_F_V4_CSUM) {
+ } else if (odev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM)) {
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
udp4_hwcsum(skb, iph->saddr, iph->daddr);
@@ -3032,7 +3034,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
if (!(pkt_dev->flags & F_UDPCSUM)) {
skb->ip_summed = CHECKSUM_NONE;
- } else if (odev->features & NETIF_F_V6_CSUM) {
+ } else if (odev->features & (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM)) {
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 504bd17b7456..8261d95dd846 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1027,6 +1027,7 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
{
int err;
struct switchdev_attr attr = {
+ .orig_dev = dev,
.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
.flags = SWITCHDEV_F_NO_RECURSE,
};
@@ -1045,15 +1046,156 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
return 0;
}
+static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ const struct rtnl_link_stats64 *stats;
+ struct rtnl_link_stats64 temp;
+ struct nlattr *attr;
+
+ stats = dev_get_stats(dev, &temp);
+
+ attr = nla_reserve(skb, IFLA_STATS,
+ sizeof(struct rtnl_link_stats));
+ if (!attr)
+ return -EMSGSIZE;
+
+ copy_rtnl_link_stats(nla_data(attr), stats);
+
+ attr = nla_reserve(skb, IFLA_STATS64,
+ sizeof(struct rtnl_link_stats64));
+ if (!attr)
+ return -EMSGSIZE;
+
+ copy_rtnl_link_stats64(nla_data(attr), stats);
+
+ return 0;
+}
+
+static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
+ struct net_device *dev,
+ int vfs_num,
+ struct nlattr *vfinfo)
+{
+ struct ifla_vf_rss_query_en vf_rss_query_en;
+ struct ifla_vf_link_state vf_linkstate;
+ struct ifla_vf_spoofchk vf_spoofchk;
+ struct ifla_vf_tx_rate vf_tx_rate;
+ struct ifla_vf_stats vf_stats;
+ struct ifla_vf_trust vf_trust;
+ struct ifla_vf_vlan vf_vlan;
+ struct ifla_vf_rate vf_rate;
+ struct nlattr *vf, *vfstats;
+ struct ifla_vf_mac vf_mac;
+ struct ifla_vf_info ivi;
+
+ /* Not all SR-IOV capable drivers support the
+ * spoofcheck and "RSS query enable" query. Preset to
+ * -1 so the user space tool can detect that the driver
+ * didn't report anything.
+ */
+ ivi.spoofchk = -1;
+ ivi.rss_query_en = -1;
+ ivi.trusted = -1;
+ memset(ivi.mac, 0, sizeof(ivi.mac));
+ /* The default value for VF link state is "auto"
+ * IFLA_VF_LINK_STATE_AUTO which equals zero
+ */
+ ivi.linkstate = 0;
+ if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi))
+ return 0;
+
+ vf_mac.vf =
+ vf_vlan.vf =
+ vf_rate.vf =
+ vf_tx_rate.vf =
+ vf_spoofchk.vf =
+ vf_linkstate.vf =
+ vf_rss_query_en.vf =
+ vf_trust.vf = ivi.vf;
+
+ memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
+ vf_vlan.vlan = ivi.vlan;
+ vf_vlan.qos = ivi.qos;
+ vf_tx_rate.rate = ivi.max_tx_rate;
+ vf_rate.min_tx_rate = ivi.min_tx_rate;
+ vf_rate.max_tx_rate = ivi.max_tx_rate;
+ vf_spoofchk.setting = ivi.spoofchk;
+ vf_linkstate.link_state = ivi.linkstate;
+ vf_rss_query_en.setting = ivi.rss_query_en;
+ vf_trust.setting = ivi.trusted;
+ vf = nla_nest_start(skb, IFLA_VF_INFO);
+ if (!vf) {
+ nla_nest_cancel(skb, vfinfo);
+ return -EMSGSIZE;
+ }
+ if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
+ nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
+ nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
+ &vf_rate) ||
+ nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
+ &vf_tx_rate) ||
+ nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
+ &vf_spoofchk) ||
+ nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
+ &vf_linkstate) ||
+ nla_put(skb, IFLA_VF_RSS_QUERY_EN,
+ sizeof(vf_rss_query_en),
+ &vf_rss_query_en) ||
+ nla_put(skb, IFLA_VF_TRUST,
+ sizeof(vf_trust), &vf_trust))
+ return -EMSGSIZE;
+ memset(&vf_stats, 0, sizeof(vf_stats));
+ if (dev->netdev_ops->ndo_get_vf_stats)
+ dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num,
+ &vf_stats);
+ vfstats = nla_nest_start(skb, IFLA_VF_STATS);
+ if (!vfstats) {
+ nla_nest_cancel(skb, vf);
+ nla_nest_cancel(skb, vfinfo);
+ return -EMSGSIZE;
+ }
+ if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
+ vf_stats.rx_packets) ||
+ nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
+ vf_stats.tx_packets) ||
+ nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
+ vf_stats.rx_bytes) ||
+ nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
+ vf_stats.tx_bytes) ||
+ nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
+ vf_stats.broadcast) ||
+ nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
+ vf_stats.multicast))
+ return -EMSGSIZE;
+ nla_nest_end(skb, vfstats);
+ nla_nest_end(skb, vf);
+ return 0;
+}
+
+static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
+{
+ struct rtnl_link_ifmap map = {
+ .mem_start = dev->mem_start,
+ .mem_end = dev->mem_end,
+ .base_addr = dev->base_addr,
+ .irq = dev->irq,
+ .dma = dev->dma,
+ .port = dev->if_port,
+ };
+ if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags, u32 ext_filter_mask)
{
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
- struct rtnl_link_stats64 temp;
- const struct rtnl_link_stats64 *stats;
- struct nlattr *attr, *af_spec;
+ struct nlattr *af_spec;
struct rtnl_af_ops *af_ops;
struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
@@ -1096,18 +1238,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
goto nla_put_failure;
- if (1) {
- struct rtnl_link_ifmap map = {
- .mem_start = dev->mem_start,
- .mem_end = dev->mem_end,
- .base_addr = dev->base_addr,
- .irq = dev->irq,
- .dma = dev->dma,
- .port = dev->if_port,
- };
- if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
- goto nla_put_failure;
- }
+ if (rtnl_fill_link_ifmap(skb, dev))
+ goto nla_put_failure;
if (dev->addr_len) {
if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) ||
@@ -1124,128 +1256,27 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
if (rtnl_phys_switch_id_fill(skb, dev))
goto nla_put_failure;
- attr = nla_reserve(skb, IFLA_STATS,
- sizeof(struct rtnl_link_stats));
- if (attr == NULL)
+ if (rtnl_fill_stats(skb, dev))
goto nla_put_failure;
- stats = dev_get_stats(dev, &temp);
- copy_rtnl_link_stats(nla_data(attr), stats);
-
- attr = nla_reserve(skb, IFLA_STATS64,
- sizeof(struct rtnl_link_stats64));
- if (attr == NULL)
- goto nla_put_failure;
- copy_rtnl_link_stats64(nla_data(attr), stats);
-
if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) &&
nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)))
goto nla_put_failure;
- if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent
- && (ext_filter_mask & RTEXT_FILTER_VF)) {
+ if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent &&
+ ext_filter_mask & RTEXT_FILTER_VF) {
int i;
-
- struct nlattr *vfinfo, *vf, *vfstats;
+ struct nlattr *vfinfo;
int num_vfs = dev_num_vf(dev->dev.parent);
vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
if (!vfinfo)
goto nla_put_failure;
for (i = 0; i < num_vfs; i++) {
- struct ifla_vf_info ivi;
- struct ifla_vf_mac vf_mac;
- struct ifla_vf_vlan vf_vlan;
- struct ifla_vf_rate vf_rate;
- struct ifla_vf_tx_rate vf_tx_rate;
- struct ifla_vf_spoofchk vf_spoofchk;
- struct ifla_vf_link_state vf_linkstate;
- struct ifla_vf_rss_query_en vf_rss_query_en;
- struct ifla_vf_stats vf_stats;
- struct ifla_vf_trust vf_trust;
-
- /*
- * Not all SR-IOV capable drivers support the
- * spoofcheck and "RSS query enable" query. Preset to
- * -1 so the user space tool can detect that the driver
- * didn't report anything.
- */
- ivi.spoofchk = -1;
- ivi.rss_query_en = -1;
- ivi.trusted = -1;
- memset(ivi.mac, 0, sizeof(ivi.mac));
- /* The default value for VF link state is "auto"
- * IFLA_VF_LINK_STATE_AUTO which equals zero
- */
- ivi.linkstate = 0;
- if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
- break;
- vf_mac.vf =
- vf_vlan.vf =
- vf_rate.vf =
- vf_tx_rate.vf =
- vf_spoofchk.vf =
- vf_linkstate.vf =
- vf_rss_query_en.vf =
- vf_trust.vf = ivi.vf;
-
- memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
- vf_vlan.vlan = ivi.vlan;
- vf_vlan.qos = ivi.qos;
- vf_tx_rate.rate = ivi.max_tx_rate;
- vf_rate.min_tx_rate = ivi.min_tx_rate;
- vf_rate.max_tx_rate = ivi.max_tx_rate;
- vf_spoofchk.setting = ivi.spoofchk;
- vf_linkstate.link_state = ivi.linkstate;
- vf_rss_query_en.setting = ivi.rss_query_en;
- vf_trust.setting = ivi.trusted;
- vf = nla_nest_start(skb, IFLA_VF_INFO);
- if (!vf) {
- nla_nest_cancel(skb, vfinfo);
+ if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
goto nla_put_failure;
- }
- if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
- nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
- nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
- &vf_rate) ||
- nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
- &vf_tx_rate) ||
- nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
- &vf_spoofchk) ||
- nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
- &vf_linkstate) ||
- nla_put(skb, IFLA_VF_RSS_QUERY_EN,
- sizeof(vf_rss_query_en),
- &vf_rss_query_en) ||
- nla_put(skb, IFLA_VF_TRUST,
- sizeof(vf_trust), &vf_trust))
- goto nla_put_failure;
- memset(&vf_stats, 0, sizeof(vf_stats));
- if (dev->netdev_ops->ndo_get_vf_stats)
- dev->netdev_ops->ndo_get_vf_stats(dev, i,
- &vf_stats);
- vfstats = nla_nest_start(skb, IFLA_VF_STATS);
- if (!vfstats) {
- nla_nest_cancel(skb, vf);
- nla_nest_cancel(skb, vfinfo);
- goto nla_put_failure;
- }
- if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
- vf_stats.rx_packets) ||
- nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
- vf_stats.tx_packets) ||
- nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
- vf_stats.rx_bytes) ||
- nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
- vf_stats.tx_bytes) ||
- nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
- vf_stats.broadcast) ||
- nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
- vf_stats.multicast))
- goto nla_put_failure;
- nla_nest_end(skb, vfstats);
- nla_nest_end(skb, vf);
}
+
nla_nest_end(skb, vfinfo);
}
@@ -2533,7 +2564,7 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
struct net_device *dev,
u8 *addr, u16 vid, u32 pid, u32 seq,
int type, unsigned int flags,
- int nlflags)
+ int nlflags, u16 ndm_state)
{
struct nlmsghdr *nlh;
struct ndmsg *ndm;
@@ -2549,7 +2580,7 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
ndm->ndm_flags = flags;
ndm->ndm_type = 0;
ndm->ndm_ifindex = dev->ifindex;
- ndm->ndm_state = NUD_PERMANENT;
+ ndm->ndm_state = ndm_state;
if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr))
goto nla_put_failure;
@@ -2570,7 +2601,8 @@ static inline size_t rtnl_fdb_nlmsg_size(void)
return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN);
}
-static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type)
+static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type,
+ u16 ndm_state)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
@@ -2581,7 +2613,7 @@ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type)
goto errout;
err = nlmsg_populate_fdb_fill(skb, dev, addr, vid,
- 0, 0, type, NTF_SELF, 0);
+ 0, 0, type, NTF_SELF, 0, ndm_state);
if (err < 0) {
kfree_skb(skb);
goto errout;
@@ -2716,7 +2748,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
nlh->nlmsg_flags);
if (!err) {
- rtnl_fdb_notify(dev, addr, vid, RTM_NEWNEIGH);
+ rtnl_fdb_notify(dev, addr, vid, RTM_NEWNEIGH,
+ ndm->ndm_state);
ndm->ndm_flags &= ~NTF_SELF;
}
}
@@ -2817,7 +2850,8 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid);
if (!err) {
- rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH);
+ rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH,
+ ndm->ndm_state);
ndm->ndm_flags &= ~NTF_SELF;
}
}
@@ -2845,7 +2879,7 @@ static int nlmsg_populate_fdb(struct sk_buff *skb,
err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 0,
portid, seq,
RTM_NEWNEIGH, NTF_SELF,
- NLM_F_MULTI);
+ NLM_F_MULTI, NUD_PERMANENT);
if (err < 0)
return err;
skip:
@@ -2877,6 +2911,7 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb,
nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->mc);
out:
netif_addr_unlock_bh(dev);
+ cb->args[1] = err;
return idx;
}
EXPORT_SYMBOL(ndo_dflt_fdb_dump);
@@ -2910,6 +2945,7 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
ops = br_dev->netdev_ops;
}
+ cb->args[1] = 0;
for_each_netdev(net, dev) {
if (brport_idx && (dev->ifindex != brport_idx))
continue;
@@ -2937,12 +2973,16 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev,
idx);
}
+ if (cb->args[1] == -EMSGSIZE)
+ break;
if (dev->netdev_ops->ndo_fdb_dump)
idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL,
idx);
else
idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
+ if (cb->args[1] == -EMSGSIZE)
+ break;
cops = NULL;
}
@@ -3317,7 +3357,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
rtnl_doit_func doit;
- int sz_idx, kind;
+ int kind;
int family;
int type;
int err;
@@ -3333,7 +3373,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return 0;
family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
- sz_idx = type>>2;
kind = type&3;
if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN))
diff --git a/net/core/scm.c b/net/core/scm.c
index 3b6899b7d810..2696aefdc148 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -87,6 +87,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
*fplp = fpl;
fpl->count = 0;
fpl->max = SCM_MAX_FD;
+ fpl->user = NULL;
}
fpp = &fpl->fp[fpl->count];
@@ -107,6 +108,10 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
*fpp++ = file;
fpl->count++;
}
+
+ if (!fpl->user)
+ fpl->user = get_uid(current_user());
+
return num;
}
@@ -119,6 +124,7 @@ void __scm_destroy(struct scm_cookie *scm)
scm->fp = NULL;
for (i=fpl->count-1; i>=0; i--)
fput(fpl->fp[i]);
+ free_uid(fpl->user);
kfree(fpl);
}
}
@@ -289,8 +295,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
/* Bump the usage count and install the file. */
sock = sock_from_file(fp[i], &err);
if (sock) {
- sock_update_netprioidx(sock->sk);
- sock_update_classid(sock->sk);
+ sock_update_netprioidx(&sock->sk->sk_cgrp_data);
+ sock_update_classid(&sock->sk->sk_cgrp_data);
}
fd_install(new_fd, get_file(fp[i]));
}
@@ -305,6 +311,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
err = put_user(cmlen, &cm->cmsg_len);
if (!err) {
cmlen = CMSG_SPACE(i*sizeof(int));
+ if (msg->msg_controllen < cmlen)
+ cmlen = msg->msg_controllen;
msg->msg_control += cmlen;
msg->msg_controllen -= cmlen;
}
@@ -334,6 +342,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
for (i = 0; i < fpl->count; i++)
get_file(fpl->fp[i]);
new_fpl->max = new_fpl->count;
+ new_fpl->user = get_uid(fpl->user);
}
return new_fpl;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index fab4599ba8b2..8616d1147c93 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -79,6 +79,8 @@
struct kmem_cache *skbuff_head_cache __read_mostly;
static struct kmem_cache *skbuff_fclone_cache __read_mostly;
+int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
+EXPORT_SYMBOL(sysctl_max_skb_frags);
/**
* skb_panic - private function for out-of-line support
@@ -414,7 +416,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
len += NET_SKB_PAD;
if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
- (gfp_mask & (__GFP_WAIT | GFP_DMA))) {
+ (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
if (!skb)
goto skb_fail;
@@ -481,7 +483,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
len += NET_SKB_PAD + NET_IP_ALIGN;
if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
- (gfp_mask & (__GFP_WAIT | GFP_DMA))) {
+ (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
if (!skb)
goto skb_fail;
@@ -2946,6 +2948,24 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
EXPORT_SYMBOL_GPL(skb_append_pagefrags);
/**
+ * skb_push_rcsum - push skb and update receive checksum
+ * @skb: buffer to update
+ * @len: length of data pulled
+ *
+ * This function performs an skb_push on the packet and updates
+ * the CHECKSUM_COMPLETE checksum. It should be used on
+ * receive path processing instead of skb_push unless you know
+ * that the checksum difference is zero (e.g., a valid IP header)
+ * or you are setting ip_summed to CHECKSUM_NONE.
+ */
+static unsigned char *skb_push_rcsum(struct sk_buff *skb, unsigned len)
+{
+ skb_push(skb, len);
+ skb_postpush_rcsum(skb, skb->data, len);
+ return skb->data;
+}
+
+/**
* skb_pull_rcsum - pull skb and update receive checksum
* @skb: buffer to update
* @len: length of data pulled
@@ -3643,7 +3663,8 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
serr->ee.ee_info = tstype;
if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
serr->ee.ee_data = skb_shinfo(skb)->tskey;
- if (sk->sk_protocol == IPPROTO_TCP)
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ sk->sk_type == SOCK_STREAM)
serr->ee.ee_data -= sk->sk_tskey;
}
@@ -4081,9 +4102,9 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
if (!pskb_may_pull(skb_chk, offset))
goto err;
- __skb_pull(skb_chk, offset);
+ skb_pull_rcsum(skb_chk, offset);
ret = skb_chkf(skb_chk);
- __skb_push(skb_chk, offset);
+ skb_push_rcsum(skb_chk, offset);
if (ret)
goto err;
@@ -4268,7 +4289,8 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
return NULL;
}
- memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
+ memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len - VLAN_HLEN,
+ 2 * ETH_ALEN);
skb->mac_header += VLAN_HLEN;
return skb;
}
@@ -4452,7 +4474,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
return NULL;
gfp_head = gfp_mask;
- if (gfp_head & __GFP_WAIT)
+ if (gfp_head & __GFP_DIRECT_RECLAIM)
gfp_head |= __GFP_REPEAT;
*errcode = -ENOBUFS;
@@ -4467,7 +4489,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
while (order) {
if (npages >= 1 << order) {
- page = alloc_pages((gfp_mask & ~__GFP_WAIT) |
+ page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
__GFP_COMP |
__GFP_NOWARN |
__GFP_NORETRY,
diff --git a/net/core/sock.c b/net/core/sock.c
index 7529eb9463be..6c1c8bc93412 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -134,6 +134,7 @@
#include <linux/sock_diag.h>
#include <linux/filter.h>
+#include <net/sock_reuseport.h>
#include <trace/events/sock.h>
@@ -194,44 +195,6 @@ bool sk_net_capable(const struct sock *sk, int cap)
}
EXPORT_SYMBOL(sk_net_capable);
-
-#ifdef CONFIG_MEMCG_KMEM
-int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
-{
- struct proto *proto;
- int ret = 0;
-
- mutex_lock(&proto_list_mutex);
- list_for_each_entry(proto, &proto_list, node) {
- if (proto->init_cgroup) {
- ret = proto->init_cgroup(memcg, ss);
- if (ret)
- goto out;
- }
- }
-
- mutex_unlock(&proto_list_mutex);
- return ret;
-out:
- list_for_each_entry_continue_reverse(proto, &proto_list, node)
- if (proto->destroy_cgroup)
- proto->destroy_cgroup(memcg);
- mutex_unlock(&proto_list_mutex);
- return ret;
-}
-
-void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
-{
- struct proto *proto;
-
- mutex_lock(&proto_list_mutex);
- list_for_each_entry_reverse(proto, &proto_list, node)
- if (proto->destroy_cgroup)
- proto->destroy_cgroup(memcg);
- mutex_unlock(&proto_list_mutex);
-}
-#endif
-
/*
* Each address family might have different locking rules, so we have
* one slock key per address family:
@@ -239,11 +202,6 @@ void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
static struct lock_class_key af_family_keys[AF_MAX];
static struct lock_class_key af_family_slock_keys[AF_MAX];
-#if defined(CONFIG_MEMCG_KMEM)
-struct static_key memcg_socket_limit_enabled;
-EXPORT_SYMBOL(memcg_socket_limit_enabled);
-#endif
-
/*
* Make lock validator output more readable. (we pre-construct these
* strings build-time, so that runtime initialization of socket
@@ -433,8 +391,6 @@ static bool sock_needs_netstamp(const struct sock *sk)
}
}
-#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
-
static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
{
if (sk->sk_flags & flags) {
@@ -874,7 +830,8 @@ set_rcvbuf:
if (val & SOF_TIMESTAMPING_OPT_ID &&
!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
- if (sk->sk_protocol == IPPROTO_TCP) {
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ sk->sk_type == SOCK_STREAM) {
if (sk->sk_state != TCP_ESTABLISHED) {
ret = -EINVAL;
break;
@@ -933,6 +890,32 @@ set_rcvbuf:
}
break;
+ case SO_ATTACH_REUSEPORT_CBPF:
+ ret = -EINVAL;
+ if (optlen == sizeof(struct sock_fprog)) {
+ struct sock_fprog fprog;
+
+ ret = -EFAULT;
+ if (copy_from_user(&fprog, optval, sizeof(fprog)))
+ break;
+
+ ret = sk_reuseport_attach_filter(&fprog, sk);
+ }
+ break;
+
+ case SO_ATTACH_REUSEPORT_EBPF:
+ ret = -EINVAL;
+ if (optlen == sizeof(u32)) {
+ u32 ufd;
+
+ ret = -EFAULT;
+ if (copy_from_user(&ufd, optval, sizeof(ufd)))
+ break;
+
+ ret = sk_reuseport_attach_bpf(ufd, sk);
+ }
+ break;
+
case SO_DETACH_FILTER:
ret = sk_detach_filter(sk);
break;
@@ -1363,6 +1346,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
if (!try_module_get(prot->owner))
goto out_free_sec;
sk_tx_queue_clear(sk);
+ cgroup_sk_alloc(&sk->sk_cgrp_data);
}
return sk;
@@ -1385,6 +1369,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
owner = prot->owner;
slab = prot->slab;
+ cgroup_sk_free(&sk->sk_cgrp_data);
security_sk_free(sk);
if (slab != NULL)
kmem_cache_free(slab, sk);
@@ -1393,17 +1378,6 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
module_put(owner);
}
-#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
-void sock_update_netprioidx(struct sock *sk)
-{
- if (in_interrupt())
- return;
-
- sk->sk_cgrp_prioidx = task_netprioidx(current);
-}
-EXPORT_SYMBOL_GPL(sock_update_netprioidx);
-#endif
-
/**
* sk_alloc - All socket objects are allocated here
* @net: the applicable net namespace
@@ -1432,8 +1406,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
sock_net_set(sk, net);
atomic_set(&sk->sk_wmem_alloc, 1);
- sock_update_classid(sk);
- sock_update_netprioidx(sk);
+ sock_update_classid(&sk->sk_cgrp_data);
+ sock_update_netprioidx(&sk->sk_cgrp_data);
}
return sk;
@@ -1453,6 +1427,8 @@ void sk_destruct(struct sock *sk)
sk_filter_uncharge(sk, filter);
RCU_INIT_POINTER(sk->sk_filter, NULL);
}
+ if (rcu_access_pointer(sk->sk_reuseport_cb))
+ reuseport_detach_sock(sk);
sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
@@ -1488,12 +1464,6 @@ void sk_free(struct sock *sk)
}
EXPORT_SYMBOL(sk_free);
-static void sk_update_clone(const struct sock *sk, struct sock *newsk)
-{
- if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
- sock_update_memcg(newsk);
-}
-
/**
* sk_clone_lock - clone a socket, and lock its clone
* @sk: the socket to clone
@@ -1530,7 +1500,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
skb_queue_head_init(&newsk->sk_receive_queue);
skb_queue_head_init(&newsk->sk_write_queue);
- spin_lock_init(&newsk->sk_dst_lock);
rwlock_init(&newsk->sk_callback_lock);
lockdep_set_class_and_name(&newsk->sk_callback_lock,
af_callback_keys + newsk->sk_family,
@@ -1553,7 +1522,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
*/
is_charged = sk_filter_charge(newsk, filter);
- if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk))) {
+ if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
/* It is still raw copy of parent, so invalidate
* destructor and make plain sk_free() */
newsk->sk_destruct = NULL;
@@ -1589,7 +1558,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
sk_set_socket(newsk, NULL);
newsk->sk_wq = NULL;
- sk_update_clone(sk, newsk);
+ if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+ sock_update_memcg(newsk);
if (newsk->sk_prot->sockets_allocated)
sk_sockets_allocated_inc(newsk);
@@ -1607,7 +1577,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
u32 max_segs = 1;
- __sk_dst_set(sk, dst);
+ sk_dst_set(sk, dst);
sk->sk_route_caps = dst->dev->features;
if (sk->sk_route_caps & NETIF_F_GSO)
sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
@@ -1815,7 +1785,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
{
DEFINE_WAIT(wait);
- clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
for (;;) {
if (!timeo)
break;
@@ -1861,7 +1831,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
break;
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
err = -EAGAIN;
if (!timeo)
@@ -1944,8 +1914,10 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
pfrag->offset = 0;
if (SKB_FRAG_PAGE_ORDER) {
- pfrag->page = alloc_pages((gfp & ~__GFP_WAIT) | __GFP_COMP |
- __GFP_NOWARN | __GFP_NORETRY,
+ /* Avoid direct reclaim but allow kswapd to wake */
+ pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
+ __GFP_COMP | __GFP_NOWARN |
+ __GFP_NORETRY,
SKB_FRAG_PAGE_ORDER);
if (likely(pfrag->page)) {
pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
@@ -2046,9 +2018,9 @@ int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
DEFINE_WAIT(wait);
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb);
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
finish_wait(sk_sleep(sk), &wait);
return rc;
}
@@ -2069,27 +2041,27 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
struct proto *prot = sk->sk_prot;
int amt = sk_mem_pages(size);
long allocated;
- int parent_status = UNDER_LIMIT;
sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
- allocated = sk_memory_allocated_add(sk, amt, &parent_status);
+ allocated = sk_memory_allocated_add(sk, amt);
+
+ if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
+ !mem_cgroup_charge_skmem(sk->sk_memcg, amt))
+ goto suppress_allocation;
/* Under limit. */
- if (parent_status == UNDER_LIMIT &&
- allocated <= sk_prot_mem_limits(sk, 0)) {
+ if (allocated <= sk_prot_mem_limits(sk, 0)) {
sk_leave_memory_pressure(sk);
return 1;
}
- /* Under pressure. (we or our parents) */
- if ((parent_status > SOFT_LIMIT) ||
- allocated > sk_prot_mem_limits(sk, 1))
+ /* Under pressure. */
+ if (allocated > sk_prot_mem_limits(sk, 1))
sk_enter_memory_pressure(sk);
- /* Over hard limit (we or our parents) */
- if ((parent_status == OVER_LIMIT) ||
- (allocated > sk_prot_mem_limits(sk, 2)))
+ /* Over hard limit. */
+ if (allocated > sk_prot_mem_limits(sk, 2))
goto suppress_allocation;
/* guarantee minimum buffer size under pressure */
@@ -2138,6 +2110,9 @@ suppress_allocation:
sk_memory_allocated_sub(sk, amt);
+ if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+ mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
+
return 0;
}
EXPORT_SYMBOL(__sk_mem_schedule);
@@ -2153,6 +2128,9 @@ void __sk_mem_reclaim(struct sock *sk, int amount)
sk_memory_allocated_sub(sk, amount);
sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
+ if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+ mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
+
if (sk_under_memory_pressure(sk) &&
(sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
sk_leave_memory_pressure(sk);
@@ -2281,7 +2259,7 @@ static void sock_def_wakeup(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
- if (wq_has_sleeper(wq))
+ if (skwq_has_sleeper(wq))
wake_up_interruptible_all(&wq->wait);
rcu_read_unlock();
}
@@ -2292,7 +2270,7 @@ static void sock_def_error_report(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
- if (wq_has_sleeper(wq))
+ if (skwq_has_sleeper(wq))
wake_up_interruptible_poll(&wq->wait, POLLERR);
sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
rcu_read_unlock();
@@ -2304,7 +2282,7 @@ static void sock_def_readable(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
- if (wq_has_sleeper(wq))
+ if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
POLLRDNORM | POLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
@@ -2322,7 +2300,7 @@ static void sock_def_write_space(struct sock *sk)
*/
if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
wq = rcu_dereference(sk->sk_wq);
- if (wq_has_sleeper(wq))
+ if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
POLLWRNORM | POLLWRBAND);
@@ -2386,7 +2364,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
} else
sk->sk_wq = NULL;
- spin_lock_init(&sk->sk_dst_lock);
rwlock_init(&sk->sk_callback_lock);
lockdep_set_class_and_name(&sk->sk_callback_lock,
af_callback_keys + sk->sk_family,
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 0c1d58d43f67..a996ce8c8fb2 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -214,7 +214,7 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld)
}
EXPORT_SYMBOL_GPL(sock_diag_unregister);
-static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
{
int err;
struct sock_diag_req *req = nlmsg_data(nlh);
@@ -234,8 +234,12 @@ static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
hndl = sock_diag_handlers[req->sdiag_family];
if (hndl == NULL)
err = -ENOENT;
- else
+ else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY)
err = hndl->dump(skb, nlh);
+ else if (nlh->nlmsg_type == SOCK_DESTROY && hndl->destroy)
+ err = hndl->destroy(skb, nlh);
+ else
+ err = -EOPNOTSUPP;
mutex_unlock(&sock_diag_table_mutex);
return err;
@@ -261,7 +265,8 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return ret;
case SOCK_DIAG_BY_FAMILY:
- return __sock_diag_rcv_msg(skb, nlh);
+ case SOCK_DESTROY:
+ return __sock_diag_cmd(skb, nlh);
default:
return -EINVAL;
}
@@ -295,6 +300,18 @@ static int sock_diag_bind(struct net *net, int group)
return 0;
}
+int sock_diag_destroy(struct sock *sk, int err)
+{
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (!sk->sk_prot->diag_destroy)
+ return -EOPNOTSUPP;
+
+ return sk->sk_prot->diag_destroy(sk, err);
+}
+EXPORT_SYMBOL_GPL(sock_diag_destroy);
+
static int __net_init diag_net_init(struct net *net)
{
struct netlink_kernel_cfg cfg = {
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
new file mode 100644
index 000000000000..e92b759d906c
--- /dev/null
+++ b/net/core/sock_reuseport.c
@@ -0,0 +1,258 @@
+/*
+ * To speed up listener socket lookup, create an array to store all sockets
+ * listening on the same port. This allows a decision to be made after finding
+ * the first socket. An optional BPF program can also be configured for
+ * selecting the socket index from the array of available sockets.
+ */
+
+#include <net/sock_reuseport.h>
+#include <linux/bpf.h>
+#include <linux/rcupdate.h>
+
+#define INIT_SOCKS 128
+
+static DEFINE_SPINLOCK(reuseport_lock);
+
+static struct sock_reuseport *__reuseport_alloc(u16 max_socks)
+{
+ size_t size = sizeof(struct sock_reuseport) +
+ sizeof(struct sock *) * max_socks;
+ struct sock_reuseport *reuse = kzalloc(size, GFP_ATOMIC);
+
+ if (!reuse)
+ return NULL;
+
+ reuse->max_socks = max_socks;
+
+ RCU_INIT_POINTER(reuse->prog, NULL);
+ return reuse;
+}
+
+int reuseport_alloc(struct sock *sk)
+{
+ struct sock_reuseport *reuse;
+
+ /* bh lock used since this function call may precede hlist lock in
+ * soft irq of receive path or setsockopt from process context
+ */
+ spin_lock_bh(&reuseport_lock);
+ WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock)),
+ "multiple allocations for the same socket");
+ reuse = __reuseport_alloc(INIT_SOCKS);
+ if (!reuse) {
+ spin_unlock_bh(&reuseport_lock);
+ return -ENOMEM;
+ }
+
+ reuse->socks[0] = sk;
+ reuse->num_socks = 1;
+ rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
+
+ spin_unlock_bh(&reuseport_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(reuseport_alloc);
+
+static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
+{
+ struct sock_reuseport *more_reuse;
+ u32 more_socks_size, i;
+
+ more_socks_size = reuse->max_socks * 2U;
+ if (more_socks_size > U16_MAX)
+ return NULL;
+
+ more_reuse = __reuseport_alloc(more_socks_size);
+ if (!more_reuse)
+ return NULL;
+
+ more_reuse->max_socks = more_socks_size;
+ more_reuse->num_socks = reuse->num_socks;
+ more_reuse->prog = reuse->prog;
+
+ memcpy(more_reuse->socks, reuse->socks,
+ reuse->num_socks * sizeof(struct sock *));
+
+ for (i = 0; i < reuse->num_socks; ++i)
+ rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
+ more_reuse);
+
+ /* Note: we use kfree_rcu here instead of reuseport_free_rcu so
+ * that reuse and more_reuse can temporarily share a reference
+ * to prog.
+ */
+ kfree_rcu(reuse, rcu);
+ return more_reuse;
+}
+
+/**
+ * reuseport_add_sock - Add a socket to the reuseport group of another.
+ * @sk: New socket to add to the group.
+ * @sk2: Socket belonging to the existing reuseport group.
+ * May return ENOMEM and not add socket to group under memory pressure.
+ */
+int reuseport_add_sock(struct sock *sk, struct sock *sk2)
+{
+ struct sock_reuseport *reuse;
+
+ if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
+ int err = reuseport_alloc(sk2);
+
+ if (err)
+ return err;
+ }
+
+ spin_lock_bh(&reuseport_lock);
+ reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock)),
+ WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock)),
+ "socket already in reuseport group");
+
+ if (reuse->num_socks == reuse->max_socks) {
+ reuse = reuseport_grow(reuse);
+ if (!reuse) {
+ spin_unlock_bh(&reuseport_lock);
+ return -ENOMEM;
+ }
+ }
+
+ reuse->socks[reuse->num_socks] = sk;
+ /* paired with smp_rmb() in reuseport_select_sock() */
+ smp_wmb();
+ reuse->num_socks++;
+ rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
+
+ spin_unlock_bh(&reuseport_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(reuseport_add_sock);
+
+static void reuseport_free_rcu(struct rcu_head *head)
+{
+ struct sock_reuseport *reuse;
+
+ reuse = container_of(head, struct sock_reuseport, rcu);
+ if (reuse->prog)
+ bpf_prog_destroy(reuse->prog);
+ kfree(reuse);
+}
+
+void reuseport_detach_sock(struct sock *sk)
+{
+ struct sock_reuseport *reuse;
+ int i;
+
+ spin_lock_bh(&reuseport_lock);
+ reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock));
+ rcu_assign_pointer(sk->sk_reuseport_cb, NULL);
+
+ for (i = 0; i < reuse->num_socks; i++) {
+ if (reuse->socks[i] == sk) {
+ reuse->socks[i] = reuse->socks[reuse->num_socks - 1];
+ reuse->num_socks--;
+ if (reuse->num_socks == 0)
+ call_rcu(&reuse->rcu, reuseport_free_rcu);
+ break;
+ }
+ }
+ spin_unlock_bh(&reuseport_lock);
+}
+EXPORT_SYMBOL(reuseport_detach_sock);
+
+static struct sock *run_bpf(struct sock_reuseport *reuse, u16 socks,
+ struct bpf_prog *prog, struct sk_buff *skb,
+ int hdr_len)
+{
+ struct sk_buff *nskb = NULL;
+ u32 index;
+
+ if (skb_shared(skb)) {
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (!nskb)
+ return NULL;
+ skb = nskb;
+ }
+
+ /* temporarily advance data past protocol header */
+ if (!pskb_pull(skb, hdr_len)) {
+ kfree_skb(nskb);
+ return NULL;
+ }
+ index = bpf_prog_run_save_cb(prog, skb);
+ __skb_push(skb, hdr_len);
+
+ consume_skb(nskb);
+
+ if (index >= socks)
+ return NULL;
+
+ return reuse->socks[index];
+}
+
+/**
+ * reuseport_select_sock - Select a socket from an SO_REUSEPORT group.
+ * @sk: First socket in the group.
+ * @hash: When no BPF filter is available, use this hash to select.
+ * @skb: skb to run through BPF filter.
+ * @hdr_len: BPF filter expects skb data pointer at payload data. If
+ * the skb does not yet point at the payload, this parameter represents
+ * how far the pointer needs to advance to reach the payload.
+ * Returns a socket that should receive the packet (or NULL on error).
+ */
+struct sock *reuseport_select_sock(struct sock *sk,
+ u32 hash,
+ struct sk_buff *skb,
+ int hdr_len)
+{
+ struct sock_reuseport *reuse;
+ struct bpf_prog *prog;
+ struct sock *sk2 = NULL;
+ u16 socks;
+
+ rcu_read_lock();
+ reuse = rcu_dereference(sk->sk_reuseport_cb);
+
+ /* if memory allocation failed or add call is not yet complete */
+ if (!reuse)
+ goto out;
+
+ prog = rcu_dereference(reuse->prog);
+ socks = READ_ONCE(reuse->num_socks);
+ if (likely(socks)) {
+ /* paired with smp_wmb() in reuseport_add_sock() */
+ smp_rmb();
+
+ if (prog && skb)
+ sk2 = run_bpf(reuse, socks, prog, skb, hdr_len);
+ else
+ sk2 = reuse->socks[reciprocal_scale(hash, socks)];
+ }
+
+out:
+ rcu_read_unlock();
+ return sk2;
+}
+EXPORT_SYMBOL(reuseport_select_sock);
+
+struct bpf_prog *
+reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
+{
+ struct sock_reuseport *reuse;
+ struct bpf_prog *old_prog;
+
+ spin_lock_bh(&reuseport_lock);
+ reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock));
+ old_prog = rcu_dereference_protected(reuse->prog,
+ lockdep_is_held(&reuseport_lock));
+ rcu_assign_pointer(reuse->prog, prog);
+ spin_unlock_bh(&reuseport_lock);
+
+ return old_prog;
+}
+EXPORT_SYMBOL(reuseport_attach_prog);
diff --git a/net/core/stream.c b/net/core/stream.c
index d70f77a0c889..159516a11b7e 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -35,11 +35,11 @@ void sk_stream_write_space(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
- if (wq_has_sleeper(wq))
+ if (skwq_has_sleeper(wq))
wake_up_interruptible_poll(&wq->wait, POLLOUT |
POLLWRNORM | POLLWRBAND);
if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
- sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
+ sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
rcu_read_unlock();
}
}
@@ -126,7 +126,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2;
while (1) {
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
@@ -139,7 +139,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
}
if (signal_pending(current))
goto do_interrupted;
- clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
if (sk_stream_memory_free(sk) && !vm_wait)
break;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 95b6139d710c..a6beb7b6ae55 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -26,6 +26,7 @@ static int zero = 0;
static int one = 1;
static int min_sndbuf = SOCK_MIN_SNDBUF;
static int min_rcvbuf = SOCK_MIN_RCVBUF;
+static int max_skb_frags = MAX_SKB_FRAGS;
static int net_msg_warn; /* Unused, but still a sysctl */
@@ -392,6 +393,15 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "max_skb_frags",
+ .data = &sysctl_max_skb_frags,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ .extra2 = &max_skb_frags,
+ },
{ }
};
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 5684e14932bd..902d606324a0 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -824,26 +824,26 @@ lookup:
if (sk->sk_state == DCCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
- struct sock *nsk = NULL;
+ struct sock *nsk;
sk = req->rsk_listener;
- if (likely(sk->sk_state == DCCP_LISTEN)) {
- nsk = dccp_check_req(sk, skb, req);
- } else {
+ if (unlikely(sk->sk_state != DCCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
}
+ sock_hold(sk);
+ nsk = dccp_check_req(sk, skb, req);
if (!nsk) {
reqsk_put(req);
- goto discard_it;
+ goto discard_and_relse;
}
if (nsk == sk) {
- sock_hold(sk);
reqsk_put(req);
} else if (dccp_child_process(sk, nsk, skb)) {
dccp_v4_ctl_send_reset(sk, skb);
- goto discard_it;
+ goto discard_and_relse;
} else {
+ sock_put(sk);
return 0;
}
}
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index db5fc2440a23..b8608b71a66d 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -202,7 +202,9 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
security_req_classify_flow(req, flowi6_to_flowi(&fl6));
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
@@ -219,7 +221,10 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
&ireq->ir_v6_loc_addr,
&ireq->ir_v6_rmt_addr);
fl6.daddr = ireq->ir_v6_rmt_addr;
- err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+ rcu_read_lock();
+ err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
+ np->tclass);
+ rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -387,6 +392,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *newnp;
const struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
struct inet_sock *newinet;
struct dccp6_sock *newdp6;
struct sock *newsk;
@@ -453,7 +459,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
* comment in that function for the gory details. -acme
*/
- __ip6_dst_store(newsk, dst, NULL, NULL);
+ ip6_dst_store(newsk, dst, NULL, NULL);
newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM |
NETIF_F_TSO);
newdp6 = (struct dccp6_sock *)newsk;
@@ -488,13 +494,15 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
* Yes, keeping reference count would be much more clever, but we make
* one more one thing there: reattach optmem to newsk.
*/
- if (np->opt != NULL)
- newnp->opt = ipv6_dup_options(newsk, np->opt);
-
+ opt = rcu_dereference(np->opt);
+ if (opt) {
+ opt = ipv6_dup_options(newsk, opt);
+ RCU_INIT_POINTER(newnp->opt, opt);
+ }
inet_csk(newsk)->icsk_ext_hdr_len = 0;
- if (newnp->opt != NULL)
- inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
- newnp->opt->opt_flen);
+ if (opt)
+ inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
+ opt->opt_flen;
dccp_sync_mss(newsk, dst_mtu(dst));
@@ -683,26 +691,26 @@ lookup:
if (sk->sk_state == DCCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
- struct sock *nsk = NULL;
+ struct sock *nsk;
sk = req->rsk_listener;
- if (likely(sk->sk_state == DCCP_LISTEN)) {
- nsk = dccp_check_req(sk, skb, req);
- } else {
+ if (unlikely(sk->sk_state != DCCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
}
+ sock_hold(sk);
+ nsk = dccp_check_req(sk, skb, req);
if (!nsk) {
reqsk_put(req);
- goto discard_it;
+ goto discard_and_relse;
}
if (nsk == sk) {
- sock_hold(sk);
reqsk_put(req);
} else if (dccp_child_process(sk, nsk, skb)) {
dccp_v6_ctl_send_reset(sk, skb);
- goto discard_it;
+ goto discard_and_relse;
} else {
+ sock_put(sk);
return 0;
}
}
@@ -757,6 +765,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
struct ipv6_pinfo *np = inet6_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
struct in6_addr *saddr = NULL, *final_p, final;
+ struct ipv6_txoptions *opt;
struct flowi6 fl6;
struct dst_entry *dst;
int addr_type;
@@ -856,7 +865,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl6.fl6_sport = inet->inet_sport;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ final_p = fl6_update_dst(&fl6, opt, &final);
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
@@ -873,12 +883,11 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
np->saddr = *saddr;
inet->inet_rcv_saddr = LOOPBACK4_IPV6;
- __ip6_dst_store(sk, dst, NULL, NULL);
+ ip6_dst_store(sk, dst, NULL, NULL);
icsk->icsk_ext_hdr_len = 0;
- if (np->opt != NULL)
- icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
- np->opt->opt_nflen);
+ if (opt)
+ icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
inet->inet_dport = usin->sin6_port;
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 4ce912e691d0..b66c84db0766 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -201,7 +201,7 @@ void dccp_write_space(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
- if (wq_has_sleeper(wq))
+ if (skwq_has_sleeper(wq))
wake_up_interruptible(&wq->wait);
/* Should agree with poll, otherwise some programs break */
if (sock_writeable(sk))
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index b5cf13a28009..41e65804ddf5 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -339,8 +339,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
if (sk_stream_is_writeable(sk)) {
mask |= POLLOUT | POLLWRNORM;
} else { /* send SIGIO later */
- set_bit(SOCK_ASYNC_NOSPACE,
- &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
/* Race breaker. If space is freed after
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 675cf94e04f8..13d6b1a6e0fc 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -678,6 +678,9 @@ static int dn_create(struct net *net, struct socket *sock, int protocol,
{
struct sock *sk;
+ if (protocol < 0 || protocol > SK_PROTOCOL_MAX)
+ return -EINVAL;
+
if (!net_eq(net, &init_net))
return -EAFNOSUPPORT;
@@ -1747,9 +1750,9 @@ static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
}
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target));
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
finish_wait(sk_sleep(sk), &wait);
}
@@ -2004,10 +2007,10 @@ static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
}
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
sk_wait_event(sk, &timeo,
!dn_queue_too_long(scp, queue, flags));
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
finish_wait(sk_sleep(sk), &wait);
continue;
}
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index 4677b6fa6dda..ecc28cff08ab 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -67,7 +67,7 @@
* Returns the size of the result on success, -ve error code otherwise.
*/
int dns_query(const char *type, const char *name, size_t namelen,
- const char *options, char **_result, time_t *_expiry)
+ const char *options, char **_result, time64_t *_expiry)
{
struct key *rkey;
const struct user_key_payload *upayload;
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 1eba07feb34a..fa4daba8db55 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -21,8 +21,10 @@
#include <linux/of_mdio.h>
#include <linux/of_platform.h>
#include <linux/of_net.h>
+#include <linux/of_gpio.h>
#include <linux/sysfs.h>
#include <linux/phy_fixed.h>
+#include <linux/gpio/consumer.h>
#include "dsa_priv.h"
char dsa_driver_version[] = "0.1";
@@ -437,7 +439,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
if (of_phy_is_fixed_link(port_dn)) {
phydev = of_phy_find_device(port_dn);
if (phydev) {
- int addr = phydev->addr;
+ int addr = phydev->mdio.addr;
phy_device_free(phydev);
of_node_put(port_dn);
@@ -454,8 +456,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
if (!ds->ports[port])
continue;
- unregister_netdev(ds->ports[port]);
- free_netdev(ds->ports[port]);
+ dsa_slave_destroy(ds->ports[port]);
}
mdiobus_unregister(ds->slave_mii_bus);
@@ -506,33 +507,6 @@ static int dsa_switch_resume(struct dsa_switch *ds)
}
#endif
-
-/* link polling *************************************************************/
-static void dsa_link_poll_work(struct work_struct *ugly)
-{
- struct dsa_switch_tree *dst;
- int i;
-
- dst = container_of(ugly, struct dsa_switch_tree, link_poll_work);
-
- for (i = 0; i < dst->pd->nr_chips; i++) {
- struct dsa_switch *ds = dst->ds[i];
-
- if (ds != NULL && ds->drv->poll_link != NULL)
- ds->drv->poll_link(ds);
- }
-
- mod_timer(&dst->link_poll_timer, round_jiffies(jiffies + HZ));
-}
-
-static void dsa_link_poll_timer(unsigned long _dst)
-{
- struct dsa_switch_tree *dst = (void *)_dst;
-
- schedule_work(&dst->link_poll_work);
-}
-
-
/* platform driver init and cleanup *****************************************/
static int dev_is_class(struct device *dev, void *class)
{
@@ -688,6 +662,9 @@ static int dsa_of_probe(struct device *dev)
const char *port_name;
int chip_index, port_index;
const unsigned int *sw_addr, *port_reg;
+ int gpio;
+ enum of_gpio_flags of_flags;
+ unsigned long flags;
u32 eeprom_len;
int ret;
@@ -766,6 +743,19 @@ static int dsa_of_probe(struct device *dev)
put_device(cd->host_dev);
cd->host_dev = &mdio_bus_switch->dev;
}
+ gpio = of_get_named_gpio_flags(child, "reset-gpios", 0,
+ &of_flags);
+ if (gpio_is_valid(gpio)) {
+ flags = (of_flags == OF_GPIO_ACTIVE_LOW ?
+ GPIOF_ACTIVE_LOW : 0);
+ ret = devm_gpio_request_one(dev, gpio, flags,
+ "switch_reset");
+ if (ret)
+ goto out_free_chip;
+
+ cd->reset = gpio_to_desc(gpio);
+ gpiod_direction_output(cd->reset, 0);
+ }
for_each_available_child_of_node(child, port) {
port_reg = of_get_property(port, "reg", NULL);
@@ -859,8 +849,6 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
}
dst->ds[i] = ds;
- if (ds->drv->poll_link != NULL)
- dst->link_poll_needed = 1;
++configured;
}
@@ -879,15 +867,6 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
wmb();
dev->dsa_ptr = (void *)dst;
- if (dst->link_poll_needed) {
- INIT_WORK(&dst->link_poll_work, dsa_link_poll_work);
- init_timer(&dst->link_poll_timer);
- dst->link_poll_timer.data = (unsigned long)dst;
- dst->link_poll_timer.function = dsa_link_poll_timer;
- dst->link_poll_timer.expires = round_jiffies(jiffies + HZ);
- add_timer(&dst->link_poll_timer);
- }
-
return 0;
}
@@ -939,8 +918,10 @@ static int dsa_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, dst);
ret = dsa_setup_dst(dst, dev, &pdev->dev, pd);
- if (ret)
+ if (ret) {
+ dev_put(dev);
goto out;
+ }
return 0;
@@ -954,17 +935,14 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
{
int i;
- if (dst->link_poll_needed)
- del_timer_sync(&dst->link_poll_timer);
-
- flush_work(&dst->link_poll_work);
-
for (i = 0; i < dst->pd->nr_chips; i++) {
struct dsa_switch *ds = dst->ds[i];
if (ds)
dsa_switch_destroy(ds);
}
+
+ dev_put(dst->master_netdev);
}
static int dsa_remove(struct platform_device *pdev)
@@ -1010,6 +988,14 @@ static int dsa_suspend(struct device *d)
struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
int i, ret = 0;
+ dst->master_netdev->dsa_ptr = NULL;
+
+ /* If we used a tagging format that doesn't have an ethertype
+ * field, make sure that all packets from this point get sent
+ * without the tag and go through the regular receive path.
+ */
+ wmb();
+
for (i = 0; i < dst->pd->nr_chips; i++) {
struct dsa_switch *ds = dst->ds[i];
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 311796c809af..1d1a54687e4a 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -61,6 +61,7 @@ extern const struct dsa_device_ops notag_netdev_ops;
void dsa_slave_mii_bus_init(struct dsa_switch *ds);
int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
int port, char *name);
+void dsa_slave_destroy(struct net_device *slave_dev);
int dsa_slave_suspend(struct net_device *slave_dev);
int dsa_slave_resume(struct net_device *slave_dev);
int dsa_slave_netdevice_event(struct notifier_block *unused,
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 7bc787b095c8..ab24521beb4d 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -15,6 +15,7 @@
#include <linux/phy_fixed.h>
#include <linux/of_net.h>
#include <linux/of_mdio.h>
+#include <linux/mdio.h>
#include <net/rtnetlink.h>
#include <net/switchdev.h>
#include <linux/if_bridge.h>
@@ -997,7 +998,7 @@ static int dsa_slave_phy_connect(struct dsa_slave_priv *p,
{
struct dsa_switch *ds = p->parent;
- p->phy = ds->slave_mii_bus->phy_map[addr];
+ p->phy = mdiobus_get_phy(ds->slave_mii_bus, addr);
if (!p->phy) {
netdev_err(slave_dev, "no phy at %d\n", addr);
return -ENODEV;
@@ -1080,11 +1081,10 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
netdev_err(slave_dev, "failed to connect to port %d: %d\n", p->port, ret);
return ret;
}
- } else {
- netdev_info(slave_dev, "attached PHY at address %d [%s]\n",
- p->phy->addr, p->phy->drv->name);
}
+ phy_attached_info(p->phy);
+
return 0;
}
@@ -1189,19 +1189,11 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
p->old_link = -1;
p->old_duplex = -1;
- ret = dsa_slave_phy_setup(p, slave_dev);
- if (ret) {
- netdev_err(master, "error %d setting up slave phy\n", ret);
- free_netdev(slave_dev);
- return ret;
- }
-
ds->ports[port] = slave_dev;
ret = register_netdev(slave_dev);
if (ret) {
netdev_err(master, "error %d registering interface %s\n",
ret, slave_dev->name);
- phy_disconnect(p->phy);
ds->ports[port] = NULL;
free_netdev(slave_dev);
return ret;
@@ -1209,9 +1201,28 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
netif_carrier_off(slave_dev);
+ ret = dsa_slave_phy_setup(p, slave_dev);
+ if (ret) {
+ netdev_err(master, "error %d setting up slave phy\n", ret);
+ unregister_netdev(slave_dev);
+ free_netdev(slave_dev);
+ return ret;
+ }
+
return 0;
}
+void dsa_slave_destroy(struct net_device *slave_dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(slave_dev);
+
+ netif_carrier_off(slave_dev);
+ if (p->phy)
+ phy_disconnect(p->phy);
+ unregister_netdev(slave_dev);
+ free_netdev(slave_dev);
+}
+
static bool dsa_slave_dev_check(struct net_device *dev)
{
return dev->netdev_ops == &dsa_slave_netdev_ops;
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 9e63f252a89e..103871784e50 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -52,6 +52,8 @@
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/if_ether.h>
+#include <linux/of_net.h>
+#include <linux/pci.h>
#include <net/dst.h>
#include <net/arp.h>
#include <net/sock.h>
@@ -485,3 +487,32 @@ static int __init eth_offload_init(void)
}
fs_initcall(eth_offload_init);
+
+unsigned char * __weak arch_get_platform_mac_address(void)
+{
+ return NULL;
+}
+
+int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr)
+{
+ const unsigned char *addr;
+ struct device_node *dp;
+
+ if (dev_is_pci(dev))
+ dp = pci_device_to_OF_node(to_pci_dev(dev));
+ else
+ dp = dev->of_node;
+
+ addr = NULL;
+ if (dp)
+ addr = of_get_mac_address(dp);
+ if (!addr)
+ addr = arch_get_platform_mac_address();
+
+ if (!addr)
+ return -ENODEV;
+
+ ether_addr_copy(mac_addr, addr);
+ return 0;
+}
+EXPORT_SYMBOL(eth_platform_get_mac_address);
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 35a9788bb3ae..c7d1adca30d8 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -312,7 +312,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master, u8 type)
return;
out:
- WARN_ON_ONCE("HSR: Could not send supervision frame\n");
+ WARN_ONCE(1, "HSR: Could not send supervision frame\n");
kfree_skb(skb);
}
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 20c49c724ba0..737c87a2a41e 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -161,9 +161,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev,
wdev->needed_headroom;
ldev->needed_tailroom = wdev->needed_tailroom;
- lowpan_netdev_setup(ldev, LOWPAN_LLTYPE_IEEE802154);
-
- ret = register_netdevice(ldev);
+ ret = lowpan_register_netdevice(ldev, LOWPAN_LLTYPE_IEEE802154);
if (ret < 0) {
dev_put(wdev);
return ret;
@@ -180,7 +178,7 @@ static void lowpan_dellink(struct net_device *ldev, struct list_head *head)
ASSERT_RTNL();
wdev->ieee802154_ptr->lowpan_dev = NULL;
- unregister_netdevice(ldev);
+ lowpan_unregister_netdevice(ldev);
dev_put(wdev);
}
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 6b437e8760d3..30d875dff6b5 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -624,7 +624,6 @@ int __init lowpan_net_frag_init(void)
lowpan_frags.hashfn = lowpan_hashfn;
lowpan_frags.constructor = lowpan_frag_init;
lowpan_frags.destructor = NULL;
- lowpan_frags.skb_free = NULL;
lowpan_frags.qsize = sizeof(struct frag_queue);
lowpan_frags.match = lowpan_frag_match;
lowpan_frags.frag_expire = lowpan_frag_expire;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 416dfa004cfb..775824720b6b 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -353,6 +353,7 @@ config INET_ESP
select CRYPTO_CBC
select CRYPTO_SHA1
select CRYPTO_DES
+ select CRYPTO_ECHAINIV
---help---
Support for IPsec ESP.
@@ -436,6 +437,19 @@ config INET_UDP_DIAG
Support for UDP socket monitoring interface used by the ss tool.
If unsure, say Y.
+config INET_DIAG_DESTROY
+ bool "INET: allow privileged process to administratively close sockets"
+ depends on INET_DIAG
+ default n
+ ---help---
+ Provides a SOCK_DESTROY operation that allows privileged processes
+ (e.g., a connection manager or a network administration tool such as
+ ss) to close sockets opened by other processes. Closing a socket in
+ this way interrupts any blocking read/write/connect operations on
+ the socket and causes future socket calls to behave as if the socket
+ had been disconnected.
+ If unsure, say N.
+
menuconfig TCP_CONG_ADVANCED
bool "TCP: advanced congestion control"
---help---
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index c29809f765dc..62c049b647e9 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -56,7 +56,6 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
-obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 11c4ca13ec3b..5c5db6636704 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -257,6 +257,9 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
int try_loading_module = 0;
int err;
+ if (protocol < 0 || protocol >= IPPROTO_MAX)
+ return -EINVAL;
+
sock->state = SS_UNCONNECTED;
/* Look for the requested type/protocol pair. */
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index cebd9d31e65a..f6303b17546b 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1847,7 +1847,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
if (err < 0)
goto errout;
- err = EINVAL;
+ err = -EINVAL;
if (!tb[NETCONFA_IFINDEX])
goto errout;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index cc8f3e506cde..473447593060 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1155,6 +1155,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_changeupper_info *info;
struct in_device *in_dev;
struct net *net = dev_net(dev);
unsigned int flags;
@@ -1193,6 +1194,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
case NETDEV_CHANGEMTU:
rt_cache_flush(net);
break;
+ case NETDEV_CHANGEUPPER:
+ info = ptr;
+ /* flush all routes if dev is linked to or unlinked from
+ * an L3 master device (e.g., VRF)
+ */
+ if (info->upper_dev && netif_is_l3_master(info->upper_dev))
+ fib_disable_ip(dev, NETDEV_DOWN, true);
+ break;
}
return NOTIFY_DONE;
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3e87447e65c7..d97268e8ff10 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -923,14 +923,21 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
fib_prefsrc != cfg->fc_dst) {
u32 tb_id = cfg->fc_table;
+ int rc;
if (tb_id == RT_TABLE_MAIN)
tb_id = RT_TABLE_LOCAL;
- if (inet_addr_type_table(cfg->fc_nlinfo.nl_net,
- fib_prefsrc, tb_id) != RTN_LOCAL) {
- return false;
+ rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
+ fib_prefsrc, tb_id);
+
+ if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) {
+ rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
+ fib_prefsrc, RT_TABLE_LOCAL);
}
+
+ if (rc != RTN_LOCAL)
+ return false;
}
return true;
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 744e5936c10d..d07fc076bea0 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -289,10 +289,8 @@ static void __node_free_rcu(struct rcu_head *head)
if (!n->tn_bits)
kmem_cache_free(trie_leaf_kmem, n);
- else if (n->tn_bits <= TNODE_KMALLOC_MAX)
- kfree(n);
else
- vfree(n);
+ kvfree(n);
}
#define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu)
@@ -1396,9 +1394,10 @@ found:
struct fib_info *fi = fa->fa_info;
int nhsel, err;
- if ((index >= (1ul << fa->fa_slen)) &&
- ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen != KEYLENGTH)))
- continue;
+ if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
+ if (index >= (1ul << fa->fa_slen))
+ continue;
+ }
if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
continue;
if (fi->fib_dead)
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index e0fcbbbcfe54..976f0dcf6991 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -24,6 +24,7 @@ struct fou {
u16 type;
struct udp_offload udp_offloads;
struct list_head list;
+ struct rcu_head rcu;
};
#define FOU_F_REMCSUM_NOPARTIAL BIT(0)
@@ -417,7 +418,7 @@ static void fou_release(struct fou *fou)
list_del(&fou->list);
udp_tunnel_sock_release(sock);
- kfree(fou);
+ kfree_rcu(fou, rcu);
}
static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
@@ -497,7 +498,7 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
sk->sk_allocation = GFP_ATOMIC;
if (cfg->udp_config.family == AF_INET) {
- err = udp_add_offload(&fou->udp_offloads);
+ err = udp_add_offload(net, &fou->udp_offloads);
if (err)
goto error;
}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 64aaf3522a59..b3086cf27027 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -356,9 +356,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
skb_dst_set(skb, &rt->dst);
skb->dev = dev;
- skb->reserved_tailroom = skb_end_offset(skb) -
- min(mtu, skb_end_offset(skb));
skb_reserve(skb, hlen);
+ skb_tailroom_reserve(skb, mtu, tlen);
skb_reset_network_header(skb);
pip = ip_hdr(skb);
@@ -2126,7 +2125,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
ASSERT_RTNL();
in_dev = ip_mc_find_dev(net, imr);
- if (!in_dev) {
+ if (!imr->imr_ifindex && !imr->imr_address.s_addr && !in_dev) {
ret = -ENODEV;
goto out;
}
@@ -2147,7 +2146,8 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
*imlp = iml->next_rcu;
- ip_mc_dec_group(in_dev, group);
+ if (in_dev)
+ ip_mc_dec_group(in_dev, group);
/* decrease mem now to avoid the memleak warning */
atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
@@ -2392,11 +2392,11 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
struct ip_sf_socklist *psl;
struct net *net = sock_net(sk);
+ ASSERT_RTNL();
+
if (!ipv4_is_multicast(addr))
return -EINVAL;
- rtnl_lock();
-
imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
imr.imr_address.s_addr = msf->imsf_interface;
imr.imr_ifindex = 0;
@@ -2417,7 +2417,6 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
goto done;
msf->imsf_fmode = pmc->sfmode;
psl = rtnl_dereference(pmc->sflist);
- rtnl_unlock();
if (!psl) {
len = 0;
count = 0;
@@ -2436,7 +2435,6 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
return -EFAULT;
return 0;
done:
- rtnl_unlock();
return err;
}
@@ -2450,6 +2448,8 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
struct inet_sock *inet = inet_sk(sk);
struct ip_sf_socklist *psl;
+ ASSERT_RTNL();
+
psin = (struct sockaddr_in *)&gsf->gf_group;
if (psin->sin_family != AF_INET)
return -EINVAL;
@@ -2457,8 +2457,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
if (!ipv4_is_multicast(addr))
return -EINVAL;
- rtnl_lock();
-
err = -EADDRNOTAVAIL;
for_each_pmc_rtnl(inet, pmc) {
@@ -2470,7 +2468,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
goto done;
gsf->gf_fmode = pmc->sfmode;
psl = rtnl_dereference(pmc->sflist);
- rtnl_unlock();
count = psl ? psl->sl_count : 0;
copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
gsf->gf_numsrc = count;
@@ -2490,7 +2487,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
}
return 0;
done:
- rtnl_unlock();
return err;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 1feb15f23de8..64148914803a 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -563,7 +563,7 @@ static void reqsk_timer_handler(unsigned long data)
int max_retries, thresh;
u8 defer_accept;
- if (sk_listener->sk_state != TCP_LISTEN)
+ if (sk_state_load(sk_listener) != TCP_LISTEN)
goto drop;
max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
@@ -749,7 +749,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
* It is OK, because this socket enters to hash table only
* after validation is complete.
*/
- sk->sk_state = TCP_LISTEN;
+ sk_state_store(sk, TCP_LISTEN);
if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
inet->inet_sport = htons(inet->inet_num);
@@ -789,14 +789,16 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req,
reqsk_put(req);
}
-void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
- struct sock *child)
+struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
+ struct request_sock *req,
+ struct sock *child)
{
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
spin_lock(&queue->rskq_lock);
if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_child_forget(sk, req, child);
+ child = NULL;
} else {
req->sk = child;
req->dl_next = NULL;
@@ -808,6 +810,7 @@ void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
sk_acceptq_added(sk);
}
spin_unlock(&queue->rskq_lock);
+ return child;
}
EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
@@ -817,11 +820,8 @@ struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
if (own_req) {
inet_csk_reqsk_queue_drop(sk, req);
reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
- inet_csk_reqsk_queue_add(sk, req, child);
- /* Warning: caller must not call reqsk_put(req);
- * child stole last reference on it.
- */
- return child;
+ if (inet_csk_reqsk_queue_add(sk, req, child))
+ return child;
}
/* Too bad, another child took ownership of the request, undo. */
bh_unlock_sock(child);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ab9f8a66615d..6029157a19ed 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -350,40 +350,60 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
nlmsg_flags, unlh);
}
-int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
- struct sk_buff *in_skb,
- const struct nlmsghdr *nlh,
- const struct inet_diag_req_v2 *req)
+struct sock *inet_diag_find_one_icsk(struct net *net,
+ struct inet_hashinfo *hashinfo,
+ const struct inet_diag_req_v2 *req)
{
- struct net *net = sock_net(in_skb->sk);
- struct sk_buff *rep;
struct sock *sk;
- int err;
- err = -EINVAL;
if (req->sdiag_family == AF_INET)
sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0],
req->id.idiag_dport, req->id.idiag_src[0],
req->id.idiag_sport, req->id.idiag_if);
#if IS_ENABLED(CONFIG_IPV6)
- else if (req->sdiag_family == AF_INET6)
- sk = inet6_lookup(net, hashinfo,
- (struct in6_addr *)req->id.idiag_dst,
- req->id.idiag_dport,
- (struct in6_addr *)req->id.idiag_src,
- req->id.idiag_sport,
- req->id.idiag_if);
+ else if (req->sdiag_family == AF_INET6) {
+ if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
+ ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
+ sk = inet_lookup(net, hashinfo, req->id.idiag_dst[3],
+ req->id.idiag_dport, req->id.idiag_src[3],
+ req->id.idiag_sport, req->id.idiag_if);
+ else
+ sk = inet6_lookup(net, hashinfo,
+ (struct in6_addr *)req->id.idiag_dst,
+ req->id.idiag_dport,
+ (struct in6_addr *)req->id.idiag_src,
+ req->id.idiag_sport,
+ req->id.idiag_if);
+ }
#endif
else
- goto out_nosk;
+ return ERR_PTR(-EINVAL);
- err = -ENOENT;
if (!sk)
- goto out_nosk;
+ return ERR_PTR(-ENOENT);
- err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
- if (err)
- goto out;
+ if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) {
+ sock_gen_put(sk);
+ return ERR_PTR(-ENOENT);
+ }
+
+ return sk;
+}
+EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk);
+
+int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
+ struct sk_buff *in_skb,
+ const struct nlmsghdr *nlh,
+ const struct inet_diag_req_v2 *req)
+{
+ struct net *net = sock_net(in_skb->sk);
+ struct sk_buff *rep;
+ struct sock *sk;
+ int err;
+
+ sk = inet_diag_find_one_icsk(net, hashinfo, req);
+ if (IS_ERR(sk))
+ return PTR_ERR(sk);
rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL);
if (!rep) {
@@ -409,12 +429,11 @@ out:
if (sk)
sock_gen_put(sk);
-out_nosk:
return err;
}
EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
-static int inet_diag_get_exact(struct sk_buff *in_skb,
+static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb,
const struct nlmsghdr *nlh,
const struct inet_diag_req_v2 *req)
{
@@ -424,8 +443,12 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
handler = inet_diag_lock_handler(req->sdiag_protocol);
if (IS_ERR(handler))
err = PTR_ERR(handler);
- else
+ else if (cmd == SOCK_DIAG_BY_FAMILY)
err = handler->dump_one(in_skb, nlh, req);
+ else if (cmd == SOCK_DESTROY && handler->destroy)
+ err = handler->destroy(in_skb, req);
+ else
+ err = -EOPNOTSUPP;
inet_diag_unlock_handler(handler);
return err;
@@ -938,7 +961,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
req.idiag_states = rc->idiag_states;
req.id = rc->id;
- return inet_diag_get_exact(in_skb, nlh, &req);
+ return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, &req);
}
static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -972,7 +995,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
return inet_diag_get_exact_compat(skb, nlh);
}
-static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h)
{
int hdrlen = sizeof(struct inet_diag_req_v2);
struct net *net = sock_net(skb->sk);
@@ -980,7 +1003,8 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
if (nlmsg_len(h) < hdrlen)
return -EINVAL;
- if (h->nlmsg_flags & NLM_F_DUMP) {
+ if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY &&
+ h->nlmsg_flags & NLM_F_DUMP) {
if (nlmsg_attrlen(h, hdrlen)) {
struct nlattr *attr;
@@ -999,7 +1023,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
}
}
- return inet_diag_get_exact(skb, h, nlmsg_data(h));
+ return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h));
}
static
@@ -1050,14 +1074,16 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
static const struct sock_diag_handler inet_diag_handler = {
.family = AF_INET,
- .dump = inet_diag_handler_dump,
+ .dump = inet_diag_handler_cmd,
.get_info = inet_diag_handler_get_info,
+ .destroy = inet_diag_handler_cmd,
};
static const struct sock_diag_handler inet6_diag_handler = {
.family = AF_INET6,
- .dump = inet_diag_handler_dump,
+ .dump = inet_diag_handler_cmd,
.get_info = inet_diag_handler_get_info,
+ .destroy = inet_diag_handler_cmd,
};
int inet_diag_register(const struct inet_diag_handler *h)
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index fe144dae7372..3a88b0c73797 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -285,14 +285,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
}
EXPORT_SYMBOL(inet_frag_kill);
-static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
- struct sk_buff *skb)
-{
- if (f->skb_free)
- f->skb_free(skb);
- kfree_skb(skb);
-}
-
void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
{
struct sk_buff *fp;
@@ -309,7 +301,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
struct sk_buff *xp = fp->next;
sum_truesize += fp->truesize;
- frag_kfree_skb(nf, f, fp);
+ kfree_skb(fp);
fp = xp;
}
sum = sum_truesize + f->qsize;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 1fe55ae81781..187c6fcc3027 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -661,6 +661,7 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
struct ipq *qp;
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
+ skb_orphan(skb);
/* Lookup (or create) queue header */
qp = ip_find(net, ip_hdr(skb), user, vif);
@@ -891,7 +892,6 @@ void __init ipfrag_init(void)
ip4_frags.hashfn = ip4_hashfn;
ip4_frags.constructor = ip4_frag_init;
ip4_frags.destructor = ip4_frag_free;
- ip4_frags.skb_free = NULL;
ip4_frags.qsize = sizeof(struct ipq);
ip4_frags.match = ip4_frag_match;
ip4_frags.frag_expire = ip_expire;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 614521437e30..41ba68de46d8 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -24,7 +24,6 @@
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/if_arp.h>
-#include <linux/mroute.h>
#include <linux/if_vlan.h>
#include <linux/init.h>
#include <linux/in6.h>
@@ -562,10 +561,9 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
tunnel_id_to_key(tun_info->key.tun_id), 0);
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
- err = iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
- key->u.ipv4.dst, IPPROTO_GRE,
- key->tos, key->ttl, df, false);
- iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
+
+ iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
+ key->tos, key->ttl, df, false);
return;
err_free_rt:
@@ -1056,8 +1054,9 @@ static const struct net_device_ops gre_tap_netdev_ops = {
static void ipgre_tap_setup(struct net_device *dev)
{
ether_setup(dev);
- dev->netdev_ops = &gre_tap_netdev_ops;
- dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ dev->netdev_ops = &gre_tap_netdev_ops;
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
ip_tunnel_setup(dev, gre_tap_net_id);
}
@@ -1242,6 +1241,14 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
err = ipgre_newlink(net, dev, tb, NULL);
if (err < 0)
goto out;
+
+ /* openvswitch users expect packet sizes to be unrestricted,
+ * so set the largest MTU we can.
+ */
+ err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
+ if (err)
+ goto out;
+
return dev;
out:
free_netdev(dev);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index b1209b63381f..d77eb0c3b684 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -316,7 +316,10 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
- if (sysctl_ip_early_demux && !skb_dst(skb) && !skb->sk) {
+ if (sysctl_ip_early_demux &&
+ !skb_dst(skb) &&
+ !skb->sk &&
+ !ip_is_fragment(iph)) {
const struct net_protocol *ipprot;
int protocol = iph->protocol;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 4233cbe47052..565bf64b2b7d 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -76,7 +76,6 @@
#include <linux/igmp.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_bridge.h>
-#include <linux/mroute.h>
#include <linux/netlink.h>
#include <linux/tcp.h>
@@ -240,6 +239,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
* from host network stack.
*/
features = netif_skb_features(skb);
+ BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET);
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
if (IS_ERR_OR_NULL(segs)) {
kfree_skb(skb);
@@ -912,7 +912,7 @@ static int __ip_append_data(struct sock *sk,
*/
if (transhdrlen &&
length + fragheaderlen <= mtu &&
- rt->dst.dev->features & NETIF_F_V4_CSUM &&
+ rt->dst.dev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM) &&
!(flags & MSG_MORE) &&
!exthdrlen)
csummode = CHECKSUM_PARTIAL;
@@ -921,7 +921,7 @@ static int __ip_append_data(struct sock *sk,
if (((length > mtu) || (skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
- (sk->sk_type == SOCK_DGRAM)) {
+ (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
err = ip_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen, transhdrlen,
maxfraglen, flags);
@@ -1236,13 +1236,16 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
if (!skb)
return -EINVAL;
- cork->length += size;
if ((size + skb->len > mtu) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO)) {
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ return -EOPNOTSUPP;
+
skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
}
+ cork->length += size;
while (size > 0) {
if (skb_is_gso(skb)) {
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index c3c359ad66e3..a50124260f5a 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -249,6 +249,8 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc,
switch (cmsg->cmsg_type) {
case IP_RETOPTS:
err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
+
+ /* Our caller is responsible for freeing ipc->opt */
err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
err < 40 ? err : 40);
if (err)
@@ -1251,11 +1253,22 @@ EXPORT_SYMBOL(compat_ip_setsockopt);
* the _received_ ones. The set sets the _sent_ ones.
*/
+static bool getsockopt_needs_rtnl(int optname)
+{
+ switch (optname) {
+ case IP_MSFILTER:
+ case MCAST_MSFILTER:
+ return true;
+ }
+ return false;
+}
+
static int do_ip_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen, unsigned int flags)
{
struct inet_sock *inet = inet_sk(sk);
- int val;
+ bool needs_rtnl = getsockopt_needs_rtnl(optname);
+ int val, err = 0;
int len;
if (level != SOL_IP)
@@ -1269,6 +1282,8 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
if (len < 0)
return -EINVAL;
+ if (needs_rtnl)
+ rtnl_lock();
lock_sock(sk);
switch (optname) {
@@ -1386,39 +1401,35 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_MSFILTER:
{
struct ip_msfilter msf;
- int err;
if (len < IP_MSFILTER_SIZE(0)) {
- release_sock(sk);
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
- release_sock(sk);
- return -EFAULT;
+ err = -EFAULT;
+ goto out;
}
err = ip_mc_msfget(sk, &msf,
(struct ip_msfilter __user *)optval, optlen);
- release_sock(sk);
- return err;
+ goto out;
}
case MCAST_MSFILTER:
{
struct group_filter gsf;
- int err;
if (len < GROUP_FILTER_SIZE(0)) {
- release_sock(sk);
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
- release_sock(sk);
- return -EFAULT;
+ err = -EFAULT;
+ goto out;
}
err = ip_mc_gsfget(sk, &gsf,
(struct group_filter __user *)optval,
optlen);
- release_sock(sk);
- return err;
+ goto out;
}
case IP_MULTICAST_ALL:
val = inet->mc_all;
@@ -1485,6 +1496,12 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
return -EFAULT;
}
return 0;
+
+out:
+ release_sock(sk);
+ if (needs_rtnl)
+ rtnl_unlock();
+ return err;
}
int ip_getsockopt(struct sock *sk, int level,
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index cbb51f3fac06..336e6892a93c 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -30,7 +30,6 @@
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/if_arp.h>
-#include <linux/mroute.h>
#include <linux/init.h>
#include <linux/in6.h>
#include <linux/inetdevice.h>
@@ -657,12 +656,13 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
struct rtable *rt; /* Route to the other host */
unsigned int max_headroom; /* The extra header space needed */
__be32 dst;
- int err;
bool connected;
inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
connected = (tunnel->parms.iph.daddr != 0);
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
dst = tnl_params->daddr;
if (dst == 0) {
/* NBMA tunnel */
@@ -760,7 +760,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
tunnel->err_count--;
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
dst_link_failure(skb);
} else
tunnel->err_count = 0;
@@ -795,10 +794,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
return;
}
- err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol,
- tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
- iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
-
+ iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
+ df, !net_eq(tunnel->net, dev_net(dev)));
return;
#if IS_ENABLED(CONFIG_IPV6)
@@ -947,17 +944,31 @@ done:
}
EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
-int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+ int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
- if (new_mtu < 68 ||
- new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
+ if (new_mtu < 68)
return -EINVAL;
+
+ if (new_mtu > max_mtu) {
+ if (strict)
+ return -EINVAL;
+
+ new_mtu = max_mtu;
+ }
+
dev->mtu = new_mtu;
return 0;
}
+EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
+
+int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+{
+ return __ip_tunnel_change_mtu(dev, new_mtu, true);
+}
EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
static void ip_tunnel_dev_free(struct net_device *dev)
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 6cb9009c3d96..859d415c0b2d 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -24,7 +24,6 @@
#include <linux/netdevice.h>
#include <linux/in.h>
#include <linux/if_arp.h>
-#include <linux/mroute.h>
#include <linux/init.h>
#include <linux/in6.h>
#include <linux/inetdevice.h>
@@ -48,12 +47,13 @@
#include <net/rtnetlink.h>
#include <net/dst_metadata.h>
-int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
- __be32 src, __be32 dst, __u8 proto,
- __u8 tos, __u8 ttl, __be16 df, bool xnet)
+void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
+ __be32 src, __be32 dst, __u8 proto,
+ __u8 tos, __u8 ttl, __be16 df, bool xnet)
{
int pkt_len = skb->len - skb_inner_network_offset(skb);
struct net *net = dev_net(rt->dst.dev);
+ struct net_device *dev = skb->dev;
struct iphdr *iph;
int err;
@@ -82,7 +82,7 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
err = ip_local_out(net, sk, skb);
if (unlikely(net_xmit_eval(err)))
pkt_len = 0;
- return pkt_len;
+ iptunnel_xmit_stats(dev, pkt_len);
}
EXPORT_SYMBOL_GPL(iptunnel_xmit);
@@ -251,7 +251,7 @@ static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr,
tun_info = lwt_tun_info(new_state);
if (tb[LWTUNNEL_IP_ID])
- tun_info->key.tun_id = nla_get_u64(tb[LWTUNNEL_IP_ID]);
+ tun_info->key.tun_id = nla_get_be64(tb[LWTUNNEL_IP_ID]);
if (tb[LWTUNNEL_IP_DST])
tun_info->key.u.ipv4.dst = nla_get_be32(tb[LWTUNNEL_IP_DST]);
@@ -266,7 +266,7 @@ static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr,
tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]);
if (tb[LWTUNNEL_IP_FLAGS])
- tun_info->key.tun_flags = nla_get_u16(tb[LWTUNNEL_IP_FLAGS]);
+ tun_info->key.tun_flags = nla_get_be16(tb[LWTUNNEL_IP_FLAGS]);
tun_info->mode = IP_TUNNEL_INFO_TX;
tun_info->options_len = 0;
@@ -281,12 +281,12 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb,
{
struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate);
- if (nla_put_u64(skb, LWTUNNEL_IP_ID, tun_info->key.tun_id) ||
+ if (nla_put_be64(skb, LWTUNNEL_IP_ID, tun_info->key.tun_id) ||
nla_put_be32(skb, LWTUNNEL_IP_DST, tun_info->key.u.ipv4.dst) ||
nla_put_be32(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) ||
nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) ||
nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) ||
- nla_put_u16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags))
+ nla_put_be16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags))
return -ENOMEM;
return 0;
@@ -346,7 +346,7 @@ static int ip6_tun_build_state(struct net_device *dev, struct nlattr *attr,
tun_info = lwt_tun_info(new_state);
if (tb[LWTUNNEL_IP6_ID])
- tun_info->key.tun_id = nla_get_u64(tb[LWTUNNEL_IP6_ID]);
+ tun_info->key.tun_id = nla_get_be64(tb[LWTUNNEL_IP6_ID]);
if (tb[LWTUNNEL_IP6_DST])
tun_info->key.u.ipv6.dst = nla_get_in6_addr(tb[LWTUNNEL_IP6_DST]);
@@ -361,7 +361,7 @@ static int ip6_tun_build_state(struct net_device *dev, struct nlattr *attr,
tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP6_TC]);
if (tb[LWTUNNEL_IP6_FLAGS])
- tun_info->key.tun_flags = nla_get_u16(tb[LWTUNNEL_IP6_FLAGS]);
+ tun_info->key.tun_flags = nla_get_be16(tb[LWTUNNEL_IP6_FLAGS]);
tun_info->mode = IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6;
tun_info->options_len = 0;
@@ -376,12 +376,12 @@ static int ip6_tun_fill_encap_info(struct sk_buff *skb,
{
struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate);
- if (nla_put_u64(skb, LWTUNNEL_IP6_ID, tun_info->key.tun_id) ||
+ if (nla_put_be64(skb, LWTUNNEL_IP6_ID, tun_info->key.tun_id) ||
nla_put_in6_addr(skb, LWTUNNEL_IP6_DST, &tun_info->key.u.ipv6.dst) ||
nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) ||
nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.tos) ||
nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.ttl) ||
- nla_put_u16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags))
+ nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags))
return -ENOMEM;
return 0;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 4d8f0b698777..5cf10b777b7e 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -30,7 +30,6 @@
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/if_arp.h>
-#include <linux/mroute.h>
#include <linux/init.h>
#include <linux/netfilter_ipv4.h>
#include <linux/if_ether.h>
@@ -200,7 +199,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
err = dst_output(tunnel->net, skb->sk, skb);
if (net_xmit_eval(err) == 0)
err = skb->len;
- iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
+ iptunnel_xmit_stats(dev, err);
return NETDEV_TX_OK;
tx_error_icmp:
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 0bc7412d9e14..2ed9dd2b5f2f 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -65,15 +65,6 @@
#include <net/checksum.h>
#include <asm/processor.h>
-/* Define this to allow debugging output */
-#undef IPCONFIG_DEBUG
-
-#ifdef IPCONFIG_DEBUG
-#define DBG(x) printk x
-#else
-#define DBG(x) do { } while(0)
-#endif
-
#if defined(CONFIG_IP_PNP_DHCP)
#define IPCONFIG_DHCP
#endif
@@ -152,7 +143,11 @@ static char dhcp_client_identifier[253] __initdata;
/* Persistent data: */
+#ifdef IPCONFIG_DYNAMIC
static int ic_proto_used; /* Protocol used, if any */
+#else
+#define ic_proto_used 0
+#endif
static __be32 ic_nameservers[CONF_NAMESERVERS_MAX]; /* DNS Server IP addresses */
static u8 ic_domain[64]; /* DNS (not NIS) domain name */
@@ -227,7 +222,7 @@ static int __init ic_open_devs(void)
if (dev->mtu >= 364)
able |= IC_BOOTP;
else
- pr_warn("DHCP/BOOTP: Ignoring device %s, MTU %d too small",
+ pr_warn("DHCP/BOOTP: Ignoring device %s, MTU %d too small\n",
dev->name, dev->mtu);
if (!(dev->flags & IFF_NOARP))
able |= IC_RARP;
@@ -254,8 +249,8 @@ static int __init ic_open_devs(void)
else
d->xid = 0;
ic_proto_have_if |= able;
- DBG(("IP-Config: %s UP (able=%d, xid=%08x)\n",
- dev->name, able, d->xid));
+ pr_debug("IP-Config: %s UP (able=%d, xid=%08x)\n",
+ dev->name, able, d->xid);
}
}
@@ -311,7 +306,7 @@ static void __init ic_close_devs(void)
next = d->next;
dev = d->dev;
if (dev != ic_dev && !netdev_uses_dsa(dev)) {
- DBG(("IP-Config: Downing %s\n", dev->name));
+ pr_debug("IP-Config: Downing %s\n", dev->name);
dev_change_flags(dev, d->flags);
}
kfree(d);
@@ -464,7 +459,8 @@ static int __init ic_defaults(void)
&ic_myaddr);
return -1;
}
- printk("IP-Config: Guessing netmask %pI4\n", &ic_netmask);
+ pr_notice("IP-Config: Guessing netmask %pI4\n",
+ &ic_netmask);
}
return 0;
@@ -675,9 +671,7 @@ ic_dhcp_init_options(u8 *options)
u8 *e = options;
int len;
-#ifdef IPCONFIG_DEBUG
- printk("DHCP: Sending message type %d\n", mt);
-#endif
+ pr_debug("DHCP: Sending message type %d\n", mt);
memcpy(e, ic_bootp_cookie, 4); /* RFC1048 Magic Cookie */
e += 4;
@@ -847,7 +841,8 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
else if (dev->type == ARPHRD_FDDI)
b->htype = ARPHRD_ETHER;
else {
- printk("Unknown ARP type 0x%04x for device %s\n", dev->type, dev->name);
+ pr_warn("Unknown ARP type 0x%04x for device %s\n", dev->type,
+ dev->name);
b->htype = dev->type; /* can cause undefined behavior */
}
@@ -904,14 +899,12 @@ static void __init ic_do_bootp_ext(u8 *ext)
int i;
__be16 mtu;
-#ifdef IPCONFIG_DEBUG
u8 *c;
- printk("DHCP/BOOTP: Got extension %d:",*ext);
+ pr_debug("DHCP/BOOTP: Got extension %d:", *ext);
for (c=ext+2; c<ext+2+ext[1]; c++)
- printk(" %02x", *c);
- printk("\n");
-#endif
+ pr_debug(" %02x", *c);
+ pr_debug("\n");
switch (*ext++) {
case 1: /* Subnet mask */
@@ -1080,9 +1073,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
}
}
-#ifdef IPCONFIG_DEBUG
- printk("DHCP: Got message type %d\n", mt);
-#endif
+ pr_debug("DHCP: Got message type %d\n", mt);
switch (mt) {
case DHCPOFFER:
@@ -1095,10 +1086,8 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
/* Let's accept that offer. */
ic_myaddr = b->your_ip;
ic_servaddr = server_id;
-#ifdef IPCONFIG_DEBUG
- printk("DHCP: Offered address %pI4 by server %pI4\n",
- &ic_myaddr, &b->iph.saddr);
-#endif
+ pr_debug("DHCP: Offered address %pI4 by server %pI4\n",
+ &ic_myaddr, &b->iph.saddr);
/* The DHCP indicated server address takes
* precedence over the bootp header one if
* they are different.
@@ -1295,11 +1284,10 @@ static int __init ic_dynamic(void)
return -1;
}
- printk("IP-Config: Got %s answer from %pI4, ",
+ pr_info("IP-Config: Got %s answer from %pI4, my address is %pI4\n",
((ic_got_reply & IC_RARP) ? "RARP"
- : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"),
- &ic_addrservaddr);
- pr_cont("my address is %pI4\n", &ic_myaddr);
+ : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"),
+ &ic_addrservaddr, &ic_myaddr);
return 0;
}
@@ -1426,7 +1414,7 @@ static int __init ip_auto_config(void)
if (!ic_enable)
return 0;
- DBG(("IP-Config: Entered.\n"));
+ pr_debug("IP-Config: Entered.\n");
#ifdef IPCONFIG_DYNAMIC
try_try_again:
#endif
@@ -1542,7 +1530,7 @@ static int __init ip_auto_config(void)
pr_cont(", mtu=%d", ic_dev_mtu);
for (i = 0; i < CONF_NAMESERVERS_MAX; i++)
if (ic_nameservers[i] != NONE) {
- pr_info(" nameserver%u=%pI4",
+ pr_cont(" nameserver%u=%pI4",
i, &ic_nameservers[i]);
break;
}
@@ -1585,7 +1573,7 @@ static int __init ic_proto_name(char *name)
return 1;
*v = 0;
if (kstrtou8(client_id, 0, dhcp_client_identifier))
- DBG("DHCP: Invalid client identifier type\n");
+ pr_debug("DHCP: Invalid client identifier type\n");
strncpy(dhcp_client_identifier + 1, v + 1, 251);
*v = ',';
}
@@ -1644,7 +1632,7 @@ static int __init ip_auto_config_setup(char *addrs)
if ((cp = strchr(ip, ':')))
*cp++ = '\0';
if (strlen(ip) > 0) {
- DBG(("IP-Config: Parameter #%d: `%s'\n", num, ip));
+ pr_debug("IP-Config: Parameter #%d: `%s'\n", num, ip);
switch (num) {
case 0:
if ((ic_myaddr = in_aton(ip)) == ANY)
@@ -1716,7 +1704,7 @@ static int __init vendor_class_identifier_setup(char *addrs)
if (strlcpy(vendor_class_identifier, addrs,
sizeof(vendor_class_identifier))
>= sizeof(vendor_class_identifier))
- pr_warn("DHCP: vendorclass too long, truncated to \"%s\"",
+ pr_warn("DHCP: vendorclass too long, truncated to \"%s\"\n",
vendor_class_identifier);
return 1;
}
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index f34c31defafe..4044da61e747 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -103,7 +103,6 @@
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/if_arp.h>
-#include <linux/mroute.h>
#include <linux/init.h>
#include <linux/netfilter_ipv4.h>
#include <linux/if_ether.h>
@@ -253,9 +252,6 @@ ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
p.i_key = p.o_key = 0;
p.i_flags = p.o_flags = 0;
- if (p.iph.ttl)
- p.iph.frag_off |= htons(IP_DF);
-
err = ip_tunnel_ioctl(dev, &p, cmd);
if (err)
return err;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 92dd4b74d513..395e2814a46d 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -66,28 +66,7 @@
#include <net/netlink.h>
#include <net/fib_rules.h>
#include <linux/netconf.h>
-
-#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
-#define CONFIG_IP_PIMSM 1
-#endif
-
-struct mr_table {
- struct list_head list;
- possible_net_t net;
- u32 id;
- struct sock __rcu *mroute_sk;
- struct timer_list ipmr_expire_timer;
- struct list_head mfc_unres_queue;
- struct list_head mfc_cache_array[MFC_LINES];
- struct vif_device vif_table[MAXVIFS];
- int maxvif;
- atomic_t cache_resolve_queue_len;
- bool mroute_do_assert;
- bool mroute_do_pim;
-#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
- int mroute_reg_vif_num;
-#endif
-};
+#include <net/nexthop.h>
struct ipmr_rule {
struct fib_rule common;
@@ -103,11 +82,7 @@ struct ipmr_result {
static DEFINE_RWLOCK(mrt_lock);
-/*
- * Multicast router control variables
- */
-
-#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
+/* Multicast router control variables */
/* Special spinlock for queue of unresolved entries */
static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -134,7 +109,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
struct mfc_cache *c, struct rtmsg *rtm);
static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
int cmd);
-static void mroute_clean_tables(struct mr_table *mrt);
+static void mroute_clean_tables(struct mr_table *mrt, bool all);
static void ipmr_expire_process(unsigned long arg);
#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
@@ -252,8 +227,8 @@ static int __net_init ipmr_rules_init(struct net *net)
INIT_LIST_HEAD(&net->ipv4.mr_tables);
mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
- if (!mrt) {
- err = -ENOMEM;
+ if (IS_ERR(mrt)) {
+ err = PTR_ERR(mrt);
goto err1;
}
@@ -301,8 +276,13 @@ static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
static int __net_init ipmr_rules_init(struct net *net)
{
- net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
- return net->ipv4.mrt ? 0 : -ENOMEM;
+ struct mr_table *mrt;
+
+ mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
+ if (IS_ERR(mrt))
+ return PTR_ERR(mrt);
+ net->ipv4.mrt = mrt;
+ return 0;
}
static void __net_exit ipmr_rules_exit(struct net *net)
@@ -319,13 +299,17 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
struct mr_table *mrt;
unsigned int i;
+ /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */
+ if (id != RT_TABLE_DEFAULT && id >= 1000000000)
+ return ERR_PTR(-EINVAL);
+
mrt = ipmr_get_table(net, id);
if (mrt)
return mrt;
mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
if (!mrt)
- return NULL;
+ return ERR_PTR(-ENOMEM);
write_pnet(&mrt->net, net);
mrt->id = id;
@@ -338,9 +322,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
(unsigned long)mrt);
-#ifdef CONFIG_IP_PIMSM
mrt->mroute_reg_vif_num = -1;
-#endif
#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
#endif
@@ -350,7 +332,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
static void ipmr_free_table(struct mr_table *mrt)
{
del_timer_sync(&mrt->ipmr_expire_timer);
- mroute_clean_tables(mrt);
+ mroute_clean_tables(mrt, true);
kfree(mrt);
}
@@ -387,8 +369,24 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
}
}
-static
-struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
+/* Initialize ipmr pimreg/tunnel in_device */
+static bool ipmr_init_vif_indev(const struct net_device *dev)
+{
+ struct in_device *in_dev;
+
+ ASSERT_RTNL();
+
+ in_dev = __in_dev_get_rtnl(dev);
+ if (!in_dev)
+ return false;
+ ipv4_devconf_setall(in_dev);
+ neigh_parms_data_state_setall(in_dev->arp_parms);
+ IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
+
+ return true;
+}
+
+static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
{
struct net_device *dev;
@@ -399,7 +397,6 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
int err;
struct ifreq ifr;
struct ip_tunnel_parm p;
- struct in_device *in_dev;
memset(&p, 0, sizeof(p));
p.iph.daddr = v->vifc_rmt_addr.s_addr;
@@ -424,15 +421,8 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
if (err == 0 &&
(dev = __dev_get_by_name(net, p.name)) != NULL) {
dev->flags |= IFF_MULTICAST;
-
- in_dev = __in_dev_get_rtnl(dev);
- if (!in_dev)
+ if (!ipmr_init_vif_indev(dev))
goto failure;
-
- ipv4_devconf_setall(in_dev);
- neigh_parms_data_state_setall(in_dev->arp_parms);
- IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
-
if (dev_open(dev))
goto failure;
dev_hold(dev);
@@ -441,16 +431,11 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
return dev;
failure:
- /* allow the register to be completed before unregistering. */
- rtnl_unlock();
- rtnl_lock();
-
unregister_netdevice(dev);
return NULL;
}
-#ifdef CONFIG_IP_PIMSM
-
+#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct net *net = dev_net(dev);
@@ -500,7 +485,6 @@ static void reg_vif_setup(struct net_device *dev)
static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
{
struct net_device *dev;
- struct in_device *in_dev;
char name[IFNAMSIZ];
if (mrt->id == RT_TABLE_DEFAULT)
@@ -520,18 +504,8 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
return NULL;
}
- rcu_read_lock();
- in_dev = __in_dev_get_rcu(dev);
- if (!in_dev) {
- rcu_read_unlock();
+ if (!ipmr_init_vif_indev(dev))
goto failure;
- }
-
- ipv4_devconf_setall(in_dev);
- neigh_parms_data_state_setall(in_dev->arp_parms);
- IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
- rcu_read_unlock();
-
if (dev_open(dev))
goto failure;
@@ -540,20 +514,59 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
return dev;
failure:
- /* allow the register to be completed before unregistering. */
- rtnl_unlock();
- rtnl_lock();
-
unregister_netdevice(dev);
return NULL;
}
+
+/* called with rcu_read_lock() */
+static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
+ unsigned int pimlen)
+{
+ struct net_device *reg_dev = NULL;
+ struct iphdr *encap;
+
+ encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
+ /* Check that:
+ * a. packet is really sent to a multicast group
+ * b. packet is not a NULL-REGISTER
+ * c. packet is not truncated
+ */
+ if (!ipv4_is_multicast(encap->daddr) ||
+ encap->tot_len == 0 ||
+ ntohs(encap->tot_len) + pimlen > skb->len)
+ return 1;
+
+ read_lock(&mrt_lock);
+ if (mrt->mroute_reg_vif_num >= 0)
+ reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
+ read_unlock(&mrt_lock);
+
+ if (!reg_dev)
+ return 1;
+
+ skb->mac_header = skb->network_header;
+ skb_pull(skb, (u8 *)encap - skb->data);
+ skb_reset_network_header(skb);
+ skb->protocol = htons(ETH_P_IP);
+ skb->ip_summed = CHECKSUM_NONE;
+
+ skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
+
+ netif_rx(skb);
+
+ return NET_RX_SUCCESS;
+}
+#else
+static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
+{
+ return NULL;
+}
#endif
/**
* vif_delete - Delete a VIF entry
* @notify: Set to 1, if the caller is a notifier_call
*/
-
static int vif_delete(struct mr_table *mrt, int vifi, int notify,
struct list_head *head)
{
@@ -575,10 +588,8 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
return -EADDRNOTAVAIL;
}
-#ifdef CONFIG_IP_PIMSM
if (vifi == mrt->mroute_reg_vif_num)
mrt->mroute_reg_vif_num = -1;
-#endif
if (vifi + 1 == mrt->maxvif) {
int tmp;
@@ -625,7 +636,6 @@ static inline void ipmr_cache_free(struct mfc_cache *c)
/* Destroy an unresolved cache entry, killing queued skbs
* and reporting error to netlink readers.
*/
-
static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
{
struct net *net = read_pnet(&mrt->net);
@@ -653,9 +663,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
ipmr_cache_free(c);
}
-
/* Timer process for the unresolved queue. */
-
static void ipmr_expire_process(unsigned long arg)
{
struct mr_table *mrt = (struct mr_table *)arg;
@@ -695,7 +703,6 @@ out:
}
/* Fill oifs list. It is called under write locked mrt_lock. */
-
static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
unsigned char *ttls)
{
@@ -731,10 +738,10 @@ static int vif_add(struct net *net, struct mr_table *mrt,
return -EADDRINUSE;
switch (vifc->vifc_flags) {
-#ifdef CONFIG_IP_PIMSM
case VIFF_REGISTER:
- /*
- * Special Purpose VIF in PIM
+ if (!ipmr_pimsm_enabled())
+ return -EINVAL;
+ /* Special Purpose VIF in PIM
* All the packets will be sent to the daemon
*/
if (mrt->mroute_reg_vif_num >= 0)
@@ -749,7 +756,6 @@ static int vif_add(struct net *net, struct mr_table *mrt,
return err;
}
break;
-#endif
case VIFF_TUNNEL:
dev = ipmr_new_tunnel(net, vifc);
if (!dev)
@@ -761,7 +767,6 @@ static int vif_add(struct net *net, struct mr_table *mrt,
return err;
}
break;
-
case VIFF_USE_IFINDEX:
case 0:
if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
@@ -815,10 +820,8 @@ static int vif_add(struct net *net, struct mr_table *mrt,
/* And finish update writing critical data */
write_lock_bh(&mrt_lock);
v->dev = dev;
-#ifdef CONFIG_IP_PIMSM
if (v->flags & VIFF_REGISTER)
mrt->mroute_reg_vif_num = vifi;
-#endif
if (vifi+1 > mrt->maxvif)
mrt->maxvif = vifi+1;
write_unlock_bh(&mrt_lock);
@@ -883,9 +886,7 @@ skip:
return ipmr_cache_find_any_parent(mrt, vifi);
}
-/*
- * Allocate a multicast cache entry
- */
+/* Allocate a multicast cache entry */
static struct mfc_cache *ipmr_cache_alloc(void)
{
struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
@@ -906,10 +907,7 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void)
return c;
}
-/*
- * A cache entry has gone into a resolved state from queued
- */
-
+/* A cache entry has gone into a resolved state from queued */
static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
struct mfc_cache *uc, struct mfc_cache *c)
{
@@ -917,7 +915,6 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
struct nlmsgerr *e;
/* Play the pending entries through our router */
-
while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
if (ip_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
@@ -941,34 +938,29 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
}
}
-/*
- * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
- * expects the following bizarre scheme.
+/* Bounce a cache query up to mrouted. We could use netlink for this but mrouted
+ * expects the following bizarre scheme.
*
- * Called under mrt_lock.
+ * Called under mrt_lock.
*/
-
static int ipmr_cache_report(struct mr_table *mrt,
struct sk_buff *pkt, vifi_t vifi, int assert)
{
- struct sk_buff *skb;
const int ihl = ip_hdrlen(pkt);
+ struct sock *mroute_sk;
struct igmphdr *igmp;
struct igmpmsg *msg;
- struct sock *mroute_sk;
+ struct sk_buff *skb;
int ret;
-#ifdef CONFIG_IP_PIMSM
if (assert == IGMPMSG_WHOLEPKT)
skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
else
-#endif
skb = alloc_skb(128, GFP_ATOMIC);
if (!skb)
return -ENOBUFS;
-#ifdef CONFIG_IP_PIMSM
if (assert == IGMPMSG_WHOLEPKT) {
/* Ugly, but we have no choice with this interface.
* Duplicate old header, fix ihl, length etc.
@@ -986,28 +978,23 @@ static int ipmr_cache_report(struct mr_table *mrt,
ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
sizeof(struct iphdr));
- } else
-#endif
- {
-
- /* Copy the IP header */
-
- skb_set_network_header(skb, skb->len);
- skb_put(skb, ihl);
- skb_copy_to_linear_data(skb, pkt->data, ihl);
- ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
- msg = (struct igmpmsg *)skb_network_header(skb);
- msg->im_vif = vifi;
- skb_dst_set(skb, dst_clone(skb_dst(pkt)));
-
- /* Add our header */
-
- igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
- igmp->type =
- msg->im_msgtype = assert;
- igmp->code = 0;
- ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
- skb->transport_header = skb->network_header;
+ } else {
+ /* Copy the IP header */
+ skb_set_network_header(skb, skb->len);
+ skb_put(skb, ihl);
+ skb_copy_to_linear_data(skb, pkt->data, ihl);
+ /* Flag to the kernel this is a route add */
+ ip_hdr(skb)->protocol = 0;
+ msg = (struct igmpmsg *)skb_network_header(skb);
+ msg->im_vif = vifi;
+ skb_dst_set(skb, dst_clone(skb_dst(pkt)));
+ /* Add our header */
+ igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
+ igmp->type = assert;
+ msg->im_msgtype = assert;
+ igmp->code = 0;
+ ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
+ skb->transport_header = skb->network_header;
}
rcu_read_lock();
@@ -1019,7 +1006,6 @@ static int ipmr_cache_report(struct mr_table *mrt,
}
/* Deliver to mrouted */
-
ret = sock_queue_rcv_skb(mroute_sk, skb);
rcu_read_unlock();
if (ret < 0) {
@@ -1030,12 +1016,9 @@ static int ipmr_cache_report(struct mr_table *mrt,
return ret;
}
-/*
- * Queue a packet for resolution. It gets locked cache entry!
- */
-
-static int
-ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
+/* Queue a packet for resolution. It gets locked cache entry! */
+static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
+ struct sk_buff *skb)
{
bool found = false;
int err;
@@ -1053,7 +1036,6 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
if (!found) {
/* Create a new entry if allowable */
-
if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
(c = ipmr_cache_alloc_unres()) == NULL) {
spin_unlock_bh(&mfc_unres_lock);
@@ -1063,13 +1045,11 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
}
/* Fill in the new cache entry */
-
c->mfc_parent = -1;
c->mfc_origin = iph->saddr;
c->mfc_mcastgrp = iph->daddr;
/* Reflect first query at mrouted. */
-
err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
if (err < 0) {
/* If the report failed throw the cache entry
@@ -1091,7 +1071,6 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
}
/* See if we can append the packet */
-
if (c->mfc_un.unres.unresolved.qlen > 3) {
kfree_skb(skb);
err = -ENOBUFS;
@@ -1104,9 +1083,7 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
return err;
}
-/*
- * MFC cache manipulation by user space mroute daemon
- */
+/* MFC cache manipulation by user space mroute daemon */
static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
{
@@ -1177,9 +1154,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
list_add_rcu(&c->list, &mrt->mfc_cache_array[line]);
- /*
- * Check to see if we resolved a queued list. If so we
- * need to send on the frames and tidy up.
+ /* Check to see if we resolved a queued list. If so we
+ * need to send on the frames and tidy up.
*/
found = false;
spin_lock_bh(&mfc_unres_lock);
@@ -1204,29 +1180,25 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
return 0;
}
-/*
- * Close the multicast socket, and clear the vif tables etc
- */
-
-static void mroute_clean_tables(struct mr_table *mrt)
+/* Close the multicast socket, and clear the vif tables etc */
+static void mroute_clean_tables(struct mr_table *mrt, bool all)
{
int i;
LIST_HEAD(list);
struct mfc_cache *c, *next;
/* Shut down all active vif entries */
-
for (i = 0; i < mrt->maxvif; i++) {
- if (!(mrt->vif_table[i].flags & VIFF_STATIC))
- vif_delete(mrt, i, 0, &list);
+ if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
+ continue;
+ vif_delete(mrt, i, 0, &list);
}
unregister_netdevice_many(&list);
/* Wipe the cache */
-
for (i = 0; i < MFC_LINES; i++) {
list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
- if (c->mfc_flags & MFC_STATIC)
+ if (!all && (c->mfc_flags & MFC_STATIC))
continue;
list_del_rcu(&c->list);
mroute_netlink_event(mrt, c, RTM_DELROUTE);
@@ -1261,50 +1233,58 @@ static void mrtsock_destruct(struct sock *sk)
NETCONFA_IFINDEX_ALL,
net->ipv4.devconf_all);
RCU_INIT_POINTER(mrt->mroute_sk, NULL);
- mroute_clean_tables(mrt);
+ mroute_clean_tables(mrt, false);
}
}
rtnl_unlock();
}
-/*
- * Socket options and virtual interface manipulation. The whole
- * virtual interface system is a complete heap, but unfortunately
- * that's how BSD mrouted happens to think. Maybe one day with a proper
- * MOSPF/PIM router set up we can clean this up.
+/* Socket options and virtual interface manipulation. The whole
+ * virtual interface system is a complete heap, but unfortunately
+ * that's how BSD mrouted happens to think. Maybe one day with a proper
+ * MOSPF/PIM router set up we can clean this up.
*/
-int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
+int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
+ unsigned int optlen)
{
- int ret, parent = 0;
- struct vifctl vif;
- struct mfcctl mfc;
struct net *net = sock_net(sk);
+ int val, ret = 0, parent = 0;
struct mr_table *mrt;
+ struct vifctl vif;
+ struct mfcctl mfc;
+ u32 uval;
+ /* There's one exception to the lock - MRT_DONE which needs to unlock */
+ rtnl_lock();
if (sk->sk_type != SOCK_RAW ||
- inet_sk(sk)->inet_num != IPPROTO_IGMP)
- return -EOPNOTSUPP;
+ inet_sk(sk)->inet_num != IPPROTO_IGMP) {
+ ret = -EOPNOTSUPP;
+ goto out_unlock;
+ }
mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
- if (!mrt)
- return -ENOENT;
-
+ if (!mrt) {
+ ret = -ENOENT;
+ goto out_unlock;
+ }
if (optname != MRT_INIT) {
if (sk != rcu_access_pointer(mrt->mroute_sk) &&
- !ns_capable(net->user_ns, CAP_NET_ADMIN))
- return -EACCES;
+ !ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+ ret = -EACCES;
+ goto out_unlock;
+ }
}
switch (optname) {
case MRT_INIT:
- if (optlen != sizeof(int))
- return -EINVAL;
-
- rtnl_lock();
+ if (optlen != sizeof(int)) {
+ ret = -EINVAL;
+ break;
+ }
if (rtnl_dereference(mrt->mroute_sk)) {
- rtnl_unlock();
- return -EADDRINUSE;
+ ret = -EADDRINUSE;
+ break;
}
ret = ip_ra_control(sk, 1, mrtsock_destruct);
@@ -1315,129 +1295,133 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
NETCONFA_IFINDEX_ALL,
net->ipv4.devconf_all);
}
- rtnl_unlock();
- return ret;
+ break;
case MRT_DONE:
- if (sk != rcu_access_pointer(mrt->mroute_sk))
- return -EACCES;
- return ip_ra_control(sk, 0, NULL);
+ if (sk != rcu_access_pointer(mrt->mroute_sk)) {
+ ret = -EACCES;
+ } else {
+ /* We need to unlock here because mrtsock_destruct takes
+ * care of rtnl itself and we can't change that due to
+ * the IP_ROUTER_ALERT setsockopt which runs without it.
+ */
+ rtnl_unlock();
+ ret = ip_ra_control(sk, 0, NULL);
+ goto out;
+ }
+ break;
case MRT_ADD_VIF:
case MRT_DEL_VIF:
- if (optlen != sizeof(vif))
- return -EINVAL;
- if (copy_from_user(&vif, optval, sizeof(vif)))
- return -EFAULT;
- if (vif.vifc_vifi >= MAXVIFS)
- return -ENFILE;
- rtnl_lock();
+ if (optlen != sizeof(vif)) {
+ ret = -EINVAL;
+ break;
+ }
+ if (copy_from_user(&vif, optval, sizeof(vif))) {
+ ret = -EFAULT;
+ break;
+ }
+ if (vif.vifc_vifi >= MAXVIFS) {
+ ret = -ENFILE;
+ break;
+ }
if (optname == MRT_ADD_VIF) {
ret = vif_add(net, mrt, &vif,
sk == rtnl_dereference(mrt->mroute_sk));
} else {
ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
}
- rtnl_unlock();
- return ret;
-
- /*
- * Manipulate the forwarding caches. These live
- * in a sort of kernel/user symbiosis.
- */
+ break;
+ /* Manipulate the forwarding caches. These live
+ * in a sort of kernel/user symbiosis.
+ */
case MRT_ADD_MFC:
case MRT_DEL_MFC:
parent = -1;
case MRT_ADD_MFC_PROXY:
case MRT_DEL_MFC_PROXY:
- if (optlen != sizeof(mfc))
- return -EINVAL;
- if (copy_from_user(&mfc, optval, sizeof(mfc)))
- return -EFAULT;
+ if (optlen != sizeof(mfc)) {
+ ret = -EINVAL;
+ break;
+ }
+ if (copy_from_user(&mfc, optval, sizeof(mfc))) {
+ ret = -EFAULT;
+ break;
+ }
if (parent == 0)
parent = mfc.mfcc_parent;
- rtnl_lock();
if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY)
ret = ipmr_mfc_delete(mrt, &mfc, parent);
else
ret = ipmr_mfc_add(net, mrt, &mfc,
sk == rtnl_dereference(mrt->mroute_sk),
parent);
- rtnl_unlock();
- return ret;
- /*
- * Control PIM assert.
- */
+ break;
+ /* Control PIM assert. */
case MRT_ASSERT:
- {
- int v;
- if (optlen != sizeof(v))
- return -EINVAL;
- if (get_user(v, (int __user *)optval))
- return -EFAULT;
- mrt->mroute_do_assert = v;
- return 0;
- }
-#ifdef CONFIG_IP_PIMSM
+ if (optlen != sizeof(val)) {
+ ret = -EINVAL;
+ break;
+ }
+ if (get_user(val, (int __user *)optval)) {
+ ret = -EFAULT;
+ break;
+ }
+ mrt->mroute_do_assert = val;
+ break;
case MRT_PIM:
- {
- int v;
-
- if (optlen != sizeof(v))
- return -EINVAL;
- if (get_user(v, (int __user *)optval))
- return -EFAULT;
- v = !!v;
+ if (!ipmr_pimsm_enabled()) {
+ ret = -ENOPROTOOPT;
+ break;
+ }
+ if (optlen != sizeof(val)) {
+ ret = -EINVAL;
+ break;
+ }
+ if (get_user(val, (int __user *)optval)) {
+ ret = -EFAULT;
+ break;
+ }
- rtnl_lock();
- ret = 0;
- if (v != mrt->mroute_do_pim) {
- mrt->mroute_do_pim = v;
- mrt->mroute_do_assert = v;
+ val = !!val;
+ if (val != mrt->mroute_do_pim) {
+ mrt->mroute_do_pim = val;
+ mrt->mroute_do_assert = val;
}
- rtnl_unlock();
- return ret;
- }
-#endif
-#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+ break;
case MRT_TABLE:
- {
- u32 v;
-
- if (optlen != sizeof(u32))
- return -EINVAL;
- if (get_user(v, (u32 __user *)optval))
- return -EFAULT;
-
- /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */
- if (v != RT_TABLE_DEFAULT && v >= 1000000000)
- return -EINVAL;
+ if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) {
+ ret = -ENOPROTOOPT;
+ break;
+ }
+ if (optlen != sizeof(uval)) {
+ ret = -EINVAL;
+ break;
+ }
+ if (get_user(uval, (u32 __user *)optval)) {
+ ret = -EFAULT;
+ break;
+ }
- rtnl_lock();
- ret = 0;
if (sk == rtnl_dereference(mrt->mroute_sk)) {
ret = -EBUSY;
} else {
- if (!ipmr_new_table(net, v))
- ret = -ENOMEM;
+ mrt = ipmr_new_table(net, uval);
+ if (IS_ERR(mrt))
+ ret = PTR_ERR(mrt);
else
- raw_sk(sk)->ipmr_table = v;
+ raw_sk(sk)->ipmr_table = uval;
}
- rtnl_unlock();
- return ret;
- }
-#endif
- /*
- * Spurious command, or MRT_VERSION which you cannot
- * set.
- */
+ break;
+ /* Spurious command, or MRT_VERSION which you cannot set. */
default:
- return -ENOPROTOOPT;
+ ret = -ENOPROTOOPT;
}
+out_unlock:
+ rtnl_unlock();
+out:
+ return ret;
}
-/*
- * Getsock opt support for the multicast routing system.
- */
-
+/* Getsock opt support for the multicast routing system. */
int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
{
int olr;
@@ -1453,39 +1437,35 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
if (!mrt)
return -ENOENT;
- if (optname != MRT_VERSION &&
-#ifdef CONFIG_IP_PIMSM
- optname != MRT_PIM &&
-#endif
- optname != MRT_ASSERT)
+ switch (optname) {
+ case MRT_VERSION:
+ val = 0x0305;
+ break;
+ case MRT_PIM:
+ if (!ipmr_pimsm_enabled())
+ return -ENOPROTOOPT;
+ val = mrt->mroute_do_pim;
+ break;
+ case MRT_ASSERT:
+ val = mrt->mroute_do_assert;
+ break;
+ default:
return -ENOPROTOOPT;
+ }
if (get_user(olr, optlen))
return -EFAULT;
-
olr = min_t(unsigned int, olr, sizeof(int));
if (olr < 0)
return -EINVAL;
-
if (put_user(olr, optlen))
return -EFAULT;
- if (optname == MRT_VERSION)
- val = 0x0305;
-#ifdef CONFIG_IP_PIMSM
- else if (optname == MRT_PIM)
- val = mrt->mroute_do_pim;
-#endif
- else
- val = mrt->mroute_do_assert;
if (copy_to_user(optval, &val, olr))
return -EFAULT;
return 0;
}
-/*
- * The IP multicast ioctl support routines.
- */
-
+/* The IP multicast ioctl support routines. */
int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
{
struct sioc_sg_req sr;
@@ -1618,7 +1598,6 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
}
#endif
-
static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
@@ -1640,17 +1619,14 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
return NOTIFY_DONE;
}
-
static struct notifier_block ip_mr_notifier = {
.notifier_call = ipmr_device_event,
};
-/*
- * Encapsulate a packet by attaching a valid IPIP header to it.
- * This avoids tunnel drivers and other mess and gives us the speed so
- * important for multicast video.
+/* Encapsulate a packet by attaching a valid IPIP header to it.
+ * This avoids tunnel drivers and other mess and gives us the speed so
+ * important for multicast video.
*/
-
static void ip_encap(struct net *net, struct sk_buff *skb,
__be32 saddr, __be32 daddr)
{
@@ -1692,9 +1668,7 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
return dst_output(net, sk, skb);
}
-/*
- * Processing handlers for ipmr_forward
- */
+/* Processing handlers for ipmr_forward */
static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, struct mfc_cache *c, int vifi)
@@ -1709,7 +1683,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
if (!vif->dev)
goto out_free;
-#ifdef CONFIG_IP_PIMSM
if (vif->flags & VIFF_REGISTER) {
vif->pkt_out++;
vif->bytes_out += skb->len;
@@ -1718,7 +1691,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
goto out_free;
}
-#endif
if (vif->flags & VIFF_TUNNEL) {
rt = ip_route_output_ports(net, &fl4, NULL,
@@ -1745,7 +1717,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
* allow to send ICMP, so that packets will disappear
* to blackhole.
*/
-
IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
ip_rt_put(rt);
goto out_free;
@@ -1777,8 +1748,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
IPCB(skb)->flags |= IPSKB_FORWARDED;
- /*
- * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
+ /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
* not only before forwarding, but after forwarding on all output
* interfaces. It is clear, if mrouter runs a multicasting
* program, it should receive packets not depending to what interface
@@ -1809,7 +1779,6 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
}
/* "local" means that we should preserve one skb (for local delivery) */
-
static void ip_mr_forward(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, struct mfc_cache *cache,
int local)
@@ -1834,9 +1803,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
goto forward;
}
- /*
- * Wrong interface: drop packet and (maybe) send PIM assert.
- */
+ /* Wrong interface: drop packet and (maybe) send PIM assert. */
if (mrt->vif_table[vif].dev != skb->dev) {
if (rt_is_output_route(skb_rtable(skb))) {
/* It is our own packet, looped back.
@@ -1875,9 +1842,7 @@ forward:
mrt->vif_table[vif].pkt_in++;
mrt->vif_table[vif].bytes_in += skb->len;
- /*
- * Forward the frame
- */
+ /* Forward the frame */
if (cache->mfc_origin == htonl(INADDR_ANY) &&
cache->mfc_mcastgrp == htonl(INADDR_ANY)) {
if (true_vifi >= 0 &&
@@ -1951,11 +1916,9 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
return mrt;
}
-/*
- * Multicast packets for forwarding arrive here
- * Called with rcu_read_lock();
+/* Multicast packets for forwarding arrive here
+ * Called with rcu_read_lock();
*/
-
int ip_mr_input(struct sk_buff *skb)
{
struct mfc_cache *cache;
@@ -2006,9 +1969,7 @@ int ip_mr_input(struct sk_buff *skb)
vif);
}
- /*
- * No usable cache entry
- */
+ /* No usable cache entry */
if (!cache) {
int vif;
@@ -2049,53 +2010,8 @@ dont_forward:
return 0;
}
-#ifdef CONFIG_IP_PIMSM
-/* called with rcu_read_lock() */
-static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
- unsigned int pimlen)
-{
- struct net_device *reg_dev = NULL;
- struct iphdr *encap;
-
- encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
- /*
- * Check that:
- * a. packet is really sent to a multicast group
- * b. packet is not a NULL-REGISTER
- * c. packet is not truncated
- */
- if (!ipv4_is_multicast(encap->daddr) ||
- encap->tot_len == 0 ||
- ntohs(encap->tot_len) + pimlen > skb->len)
- return 1;
-
- read_lock(&mrt_lock);
- if (mrt->mroute_reg_vif_num >= 0)
- reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
- read_unlock(&mrt_lock);
-
- if (!reg_dev)
- return 1;
-
- skb->mac_header = skb->network_header;
- skb_pull(skb, (u8 *)encap - skb->data);
- skb_reset_network_header(skb);
- skb->protocol = htons(ETH_P_IP);
- skb->ip_summed = CHECKSUM_NONE;
-
- skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
-
- netif_rx(skb);
-
- return NET_RX_SUCCESS;
-}
-#endif
-
#ifdef CONFIG_IP_PIMSM_V1
-/*
- * Handle IGMP messages of PIMv1
- */
-
+/* Handle IGMP messages of PIMv1 */
int pim_rcv_v1(struct sk_buff *skb)
{
struct igmphdr *pim;
@@ -2256,8 +2172,6 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
}
read_lock(&mrt_lock);
- if (!nowait && (rtm->rtm_flags & RTM_F_NOTIFY))
- cache->mfc_flags |= MFC_NOTIFY;
err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
read_unlock(&mrt_lock);
rcu_read_unlock();
@@ -2419,10 +2333,133 @@ done:
return skb->len;
}
+static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
+ [RTA_SRC] = { .type = NLA_U32 },
+ [RTA_DST] = { .type = NLA_U32 },
+ [RTA_IIF] = { .type = NLA_U32 },
+ [RTA_TABLE] = { .type = NLA_U32 },
+ [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
+};
+
+static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol)
+{
+ switch (rtm_protocol) {
+ case RTPROT_STATIC:
+ case RTPROT_MROUTED:
+ return true;
+ }
+ return false;
+}
+
+static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc)
+{
+ struct rtnexthop *rtnh = nla_data(nla);
+ int remaining = nla_len(nla), vifi = 0;
+
+ while (rtnh_ok(rtnh, remaining)) {
+ mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops;
+ if (++vifi == MAXVIFS)
+ break;
+ rtnh = rtnh_next(rtnh, &remaining);
+ }
+
+ return remaining > 0 ? -EINVAL : vifi;
+}
+
+/* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */
+static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh,
+ struct mfcctl *mfcc, int *mrtsock,
+ struct mr_table **mrtret)
+{
+ struct net_device *dev = NULL;
+ u32 tblid = RT_TABLE_DEFAULT;
+ struct mr_table *mrt;
+ struct nlattr *attr;
+ struct rtmsg *rtm;
+ int ret, rem;
+
+ ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy);
+ if (ret < 0)
+ goto out;
+ rtm = nlmsg_data(nlh);
+
+ ret = -EINVAL;
+ if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 ||
+ rtm->rtm_type != RTN_MULTICAST ||
+ rtm->rtm_scope != RT_SCOPE_UNIVERSE ||
+ !ipmr_rtm_validate_proto(rtm->rtm_protocol))
+ goto out;
+
+ memset(mfcc, 0, sizeof(*mfcc));
+ mfcc->mfcc_parent = -1;
+ ret = 0;
+ nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) {
+ switch (nla_type(attr)) {
+ case RTA_SRC:
+ mfcc->mfcc_origin.s_addr = nla_get_be32(attr);
+ break;
+ case RTA_DST:
+ mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr);
+ break;
+ case RTA_IIF:
+ dev = __dev_get_by_index(net, nla_get_u32(attr));
+ if (!dev) {
+ ret = -ENODEV;
+ goto out;
+ }
+ break;
+ case RTA_MULTIPATH:
+ if (ipmr_nla_get_ttls(attr, mfcc) < 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+ break;
+ case RTA_PREFSRC:
+ ret = 1;
+ break;
+ case RTA_TABLE:
+ tblid = nla_get_u32(attr);
+ break;
+ }
+ }
+ mrt = ipmr_get_table(net, tblid);
+ if (!mrt) {
+ ret = -ENOENT;
+ goto out;
+ }
+ *mrtret = mrt;
+ *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0;
+ if (dev)
+ mfcc->mfcc_parent = ipmr_find_vif(mrt, dev);
+
+out:
+ return ret;
+}
+
+/* takes care of both newroute and delroute */
+static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ struct net *net = sock_net(skb->sk);
+ int ret, mrtsock, parent;
+ struct mr_table *tbl;
+ struct mfcctl mfcc;
+
+ mrtsock = 0;
+ tbl = NULL;
+ ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl);
+ if (ret < 0)
+ return ret;
+
+ parent = ret ? mfcc.mfcc_parent : -1;
+ if (nlh->nlmsg_type == RTM_NEWROUTE)
+ return ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent);
+ else
+ return ipmr_mfc_delete(tbl, &mfcc, parent);
+}
+
#ifdef CONFIG_PROC_FS
-/*
- * The /proc interfaces to multicast routing :
- * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
+/* The /proc interfaces to multicast routing :
+ * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
*/
struct ipmr_vif_iter {
struct seq_net_private p;
@@ -2706,10 +2743,7 @@ static const struct net_protocol pim_protocol = {
};
#endif
-
-/*
- * Setup for IP multicast routing
- */
+/* Setup for IP multicast routing */
static int __net_init ipmr_net_init(struct net *net)
{
int err;
@@ -2759,8 +2793,6 @@ int __init ip_mr_init(void)
sizeof(struct mfc_cache),
0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
NULL);
- if (!mrt_cachep)
- return -ENOMEM;
err = register_pernet_subsys(&ipmr_net_ops);
if (err)
@@ -2778,6 +2810,10 @@ int __init ip_mr_init(void)
#endif
rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE,
NULL, ipmr_rtm_dumproute, NULL);
+ rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE,
+ ipmr_rtm_route, NULL, NULL);
+ rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE,
+ ipmr_rtm_route, NULL, NULL);
return 0;
#ifdef CONFIG_IP_PIMSM_V2
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index a35584176535..c187c60e3e0c 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -60,6 +60,7 @@ config NFT_REJECT_IPV4
config NFT_DUP_IPV4
tristate "IPv4 nf_tables packet duplication support"
+ depends on !NF_CONNTRACK || NF_CONNTRACK
select NF_DUP_IPV4
help
This module enables IPv4 packet duplication support for nf_tables.
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 11dccba474b7..b488cac9c5ca 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -38,13 +38,13 @@ MODULE_DESCRIPTION("arptables core");
/*#define DEBUG_ARP_TABLES_USER*/
#ifdef DEBUG_ARP_TABLES
-#define dprintf(format, args...) printk(format , ## args)
+#define dprintf(format, args...) pr_debug(format, ## args)
#else
#define dprintf(format, args...)
#endif
#ifdef DEBUG_ARP_TABLES_USER
-#define duprintf(format, args...) printk(format , ## args)
+#define duprintf(format, args...) pr_debug(format, ## args)
#else
#define duprintf(format, args...)
#endif
@@ -1905,7 +1905,7 @@ static int __init arp_tables_init(void)
if (ret < 0)
goto err4;
- printk(KERN_INFO "arp_tables: (C) 2002 David S. Miller\n");
+ pr_info("arp_tables: (C) 2002 David S. Miller\n");
return 0;
err4:
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 461ca926fd39..e3c46e8e2762 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -451,7 +451,7 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
ret = nf_register_sockopt(&so_getorigdst);
if (ret < 0) {
- printk(KERN_ERR "Unable to register netfilter socket option\n");
+ pr_err("Unable to register netfilter socket option\n");
return ret;
}
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 0e5591c2ee9f..a04dee536b8e 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -27,8 +27,6 @@ static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
{
int err;
- skb_orphan(skb);
-
local_bh_disable();
err = ip_defrag(net, skb, user);
local_bh_enable();
@@ -67,10 +65,9 @@ static unsigned int ipv4_conntrack_defrag(void *priv,
const struct nf_hook_state *state)
{
struct sock *sk = skb->sk;
- struct inet_sock *inet = inet_sk(skb->sk);
- if (sk && (sk->sk_family == PF_INET) &&
- inet->nodefrag)
+ if (sk && sk_fullsock(sk) && (sk->sk_family == PF_INET) &&
+ inet_sk(sk)->nodefrag)
return NF_ACCEPT;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 5075b7ecd26d..61c7cc22ea68 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -132,7 +132,8 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
if (skb->ip_summed != CHECKSUM_PARTIAL) {
if (!(rt->rt_flags & RTCF_LOCAL) &&
- (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) {
+ (!skb->dev || skb->dev->features &
+ (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) {
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_headroom(skb) +
skb_network_offset(skb) +
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 657d2307f031..b3ca21b2ba9b 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -45,7 +45,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
struct net *net = nf_ct_net(ct);
const struct nf_conn *master = ct->master;
struct nf_conntrack_expect *other_exp;
- struct nf_conntrack_tuple t;
+ struct nf_conntrack_tuple t = {};
const struct nf_ct_pptp_master *ct_pptp_info;
const struct nf_nat_pptp *nat_pptp_info;
struct nf_nat_range range;
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index ddb894ac1458..c9b52c361da2 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1048,7 +1048,7 @@ static int snmp_parse_mangle(unsigned char *msg,
if (!asn1_uint_decode (&ctx, end, &vers))
return 0;
if (debug > 1)
- printk(KERN_DEBUG "bsalg: snmp version: %u\n", vers + 1);
+ pr_debug("bsalg: snmp version: %u\n", vers + 1);
if (vers > 1)
return 1;
@@ -1064,10 +1064,10 @@ static int snmp_parse_mangle(unsigned char *msg,
if (debug > 1) {
unsigned int i;
- printk(KERN_DEBUG "bsalg: community: ");
+ pr_debug("bsalg: community: ");
for (i = 0; i < comm.len; i++)
- printk("%c", comm.data[i]);
- printk("\n");
+ pr_cont("%c", comm.data[i]);
+ pr_cont("\n");
}
kfree(comm.data);
@@ -1091,9 +1091,9 @@ static int snmp_parse_mangle(unsigned char *msg,
};
if (pdutype > SNMP_PDU_TRAP2)
- printk(KERN_DEBUG "bsalg: bad pdu type %u\n", pdutype);
+ pr_debug("bsalg: bad pdu type %u\n", pdutype);
else
- printk(KERN_DEBUG "bsalg: pdu: %s\n", pdus[pdutype]);
+ pr_debug("bsalg: pdu: %s\n", pdus[pdutype]);
}
if (pdutype != SNMP_PDU_RESPONSE &&
pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2)
@@ -1119,7 +1119,7 @@ static int snmp_parse_mangle(unsigned char *msg,
return 0;
if (debug > 1)
- printk(KERN_DEBUG "bsalg: request: id=0x%lx error_status=%u "
+ pr_debug("bsalg: request: id=0x%lx error_status=%u "
"error_index=%u\n", req.id, req.error_status,
req.error_index);
}
@@ -1145,13 +1145,13 @@ static int snmp_parse_mangle(unsigned char *msg,
}
if (debug > 1) {
- printk(KERN_DEBUG "bsalg: object: ");
+ pr_debug("bsalg: object: ");
for (i = 0; i < obj->id_len; i++) {
if (i > 0)
- printk(".");
- printk("%lu", obj->id[i]);
+ pr_cont(".");
+ pr_cont("%lu", obj->id[i]);
}
- printk(": type=%u\n", obj->type);
+ pr_cont(": type=%u\n", obj->type);
}
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index c747b2d9eb77..b6ea57ec5e14 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -14,7 +14,6 @@
#include <net/netfilter/ipv4/nf_reject.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_bridge.h>
-#include <net/netfilter/ipv4/nf_reject.h>
const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
struct tcphdr *_oth, int hook)
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
index 9d09d4f59545..cd84d4295a20 100644
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -57,7 +57,7 @@ err:
static void nf_tables_arp_exit_net(struct net *net)
{
- nft_unregister_afinfo(net->nft.arp);
+ nft_unregister_afinfo(net, net->nft.arp);
kfree(net->nft.arp);
}
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
index ca9dc3c46c4f..e44ba3b12fbb 100644
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -78,7 +78,7 @@ err:
static void nf_tables_ipv4_exit_net(struct net *net)
{
- nft_unregister_afinfo(net->nft.ipv4);
+ nft_unregister_afinfo(net, net->nft.ipv4);
kfree(net->nft.ipv4);
}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index e89094ab5ddb..d3a27165f9cc 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -746,8 +746,10 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_controllen) {
err = ip_cmsg_send(sock_net(sk), msg, &ipc, false);
- if (err)
+ if (unlikely(err)) {
+ kfree(ipc.opt);
return err;
+ }
if (ipc.opt)
free = 1;
}
@@ -1063,6 +1065,7 @@ static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos)
}
void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family)
+ __acquires(ping_table.lock)
{
struct ping_iter_state *state = seq->private;
state->bucket = 0;
@@ -1094,6 +1097,7 @@ void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos)
EXPORT_SYMBOL_GPL(ping_seq_next);
void ping_seq_stop(struct seq_file *seq, void *v)
+ __releases(ping_table.lock)
{
read_unlock_bh(&ping_table.lock);
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 8c0d0bdc2a7c..7113bae4e6a0 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -406,10 +406,12 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
ip_select_ident(net, skb, NULL);
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+ skb->transport_header += iphlen;
+ if (iph->protocol == IPPROTO_ICMP &&
+ length >= iphlen + sizeof(struct icmphdr))
+ icmp_out_count(net, ((struct icmphdr *)
+ skb_transport_header(skb))->type);
}
- if (iph->protocol == IPPROTO_ICMP)
- icmp_out_count(net, ((struct icmphdr *)
- skb_transport_header(skb))->type);
err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
net, sk, skb, NULL, rt->dst.dev,
@@ -545,8 +547,10 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_controllen) {
err = ip_cmsg_send(net, msg, &ipc, false);
- if (err)
+ if (unlikely(err)) {
+ kfree(ipc.opt);
goto out;
+ }
if (ipc.opt)
free = 1;
}
@@ -599,8 +603,11 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
(inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
daddr, saddr, 0, 0);
- if (!saddr && ipc.oif)
- l3mdev_get_saddr(net, ipc.oif, &fl4);
+ if (!saddr && ipc.oif) {
+ err = l3mdev_get_saddr(net, ipc.oif, &fl4);
+ if (err < 0)
+ goto done;
+ }
if (!inet->hdrincl) {
rfv.msg = msg;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 85f184e429c6..02c62299d717 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -129,6 +129,7 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
static int ip_rt_min_advmss __read_mostly = 256;
+static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
/*
* Interface to generic destination cache.
*/
@@ -755,7 +756,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
struct fib_nh *nh = &FIB_RES_NH(res);
update_or_create_fnhe(nh, fl4->daddr, new_gw,
- 0, 0);
+ 0, jiffies + ip_rt_gc_timeout);
}
if (kill_route)
rt->dst.obsolete = DST_OBSOLETE_KILL;
@@ -1556,6 +1557,36 @@ static void ip_handle_martian_source(struct net_device *dev,
#endif
}
+static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
+{
+ struct fnhe_hash_bucket *hash;
+ struct fib_nh_exception *fnhe, __rcu **fnhe_p;
+ u32 hval = fnhe_hashfun(daddr);
+
+ spin_lock_bh(&fnhe_lock);
+
+ hash = rcu_dereference_protected(nh->nh_exceptions,
+ lockdep_is_held(&fnhe_lock));
+ hash += hval;
+
+ fnhe_p = &hash->chain;
+ fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
+ while (fnhe) {
+ if (fnhe->fnhe_daddr == daddr) {
+ rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
+ fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
+ fnhe_flush_routes(fnhe);
+ kfree_rcu(fnhe, rcu);
+ break;
+ }
+ fnhe_p = &fnhe->fnhe_next;
+ fnhe = rcu_dereference_protected(fnhe->fnhe_next,
+ lockdep_is_held(&fnhe_lock));
+ }
+
+ spin_unlock_bh(&fnhe_lock);
+}
+
/* called in rcu_read_lock() section */
static int __mkroute_input(struct sk_buff *skb,
const struct fib_result *res,
@@ -1609,11 +1640,20 @@ static int __mkroute_input(struct sk_buff *skb,
fnhe = find_exception(&FIB_RES_NH(*res), daddr);
if (do_cache) {
- if (fnhe)
+ if (fnhe) {
rth = rcu_dereference(fnhe->fnhe_rth_input);
- else
- rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+ if (rth && rth->dst.expires &&
+ time_after(jiffies, rth->dst.expires)) {
+ ip_del_fnhe(&FIB_RES_NH(*res), daddr);
+ fnhe = NULL;
+ } else {
+ goto rt_cache;
+ }
+ }
+
+ rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+rt_cache:
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
goto out;
@@ -2014,19 +2054,29 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
struct fib_nh *nh = &FIB_RES_NH(*res);
fnhe = find_exception(nh, fl4->daddr);
- if (fnhe)
+ if (fnhe) {
prth = &fnhe->fnhe_rth_output;
- else {
- if (unlikely(fl4->flowi4_flags &
- FLOWI_FLAG_KNOWN_NH &&
- !(nh->nh_gw &&
- nh->nh_scope == RT_SCOPE_LINK))) {
- do_cache = false;
- goto add;
+ rth = rcu_dereference(*prth);
+ if (rth && rth->dst.expires &&
+ time_after(jiffies, rth->dst.expires)) {
+ ip_del_fnhe(nh, fl4->daddr);
+ fnhe = NULL;
+ } else {
+ goto rt_cache;
}
- prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
}
+
+ if (unlikely(fl4->flowi4_flags &
+ FLOWI_FLAG_KNOWN_NH &&
+ !(nh->nh_gw &&
+ nh->nh_scope == RT_SCOPE_LINK))) {
+ do_cache = false;
+ goto add;
+ }
+ prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
rth = rcu_dereference(*prth);
+
+rt_cache:
if (rt_cache_valid(rth)) {
dst_hold(&rth->dst);
return rth;
@@ -2569,7 +2619,6 @@ void ip_rt_multicast_event(struct in_device *in_dev)
}
#ifdef CONFIG_SYSCTL
-static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
static int ip_rt_gc_interval __read_mostly = 60 * HZ;
static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
static int ip_rt_gc_elasticity __read_mostly = 8;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 4cbe9f0a4281..2d5589b61e9f 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -50,8 +50,7 @@ static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly;
#define TSBITS 6
#define TSMASK (((__u32)1 << TSBITS) - 1)
-static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
- ipv4_cookie_scratch);
+static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], ipv4_cookie_scratch);
static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
u32 count, int c)
@@ -351,7 +350,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
treq->snt_synack.v64 = 0;
treq->tfo_listener = false;
- ireq->ir_iif = sk->sk_bound_dev_if;
+ ireq->ir_iif = inet_request_bound_dev_if(sk, skb);
/* We throwed the options of the initial SYN away, so we hope
* the ACK carries the same options again (see RFC1122 4.2.3.8)
@@ -371,7 +370,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
* hasn't changed since we received the original syn, but I see
* no easy way to do this.
*/
- flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark,
+ flowi4_init_output(&fl4, ireq->ir_iif, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
inet_sk_flowi_flags(sk),
opt->srr ? opt->faddr : ireq->ir_rmt_addr,
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 25300c5e283b..4d367b4139a3 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -24,7 +24,6 @@
#include <net/cipso_ipv4.h>
#include <net/inet_frag.h>
#include <net/ping.h>
-#include <net/tcp_memcontrol.h>
static int zero;
static int one = 1;
@@ -48,14 +47,14 @@ static void set_local_port_range(struct net *net, int range[2])
{
bool same_parity = !((range[0] ^ range[1]) & 1);
- write_seqlock(&net->ipv4.ip_local_ports.lock);
+ write_seqlock_bh(&net->ipv4.ip_local_ports.lock);
if (same_parity && !net->ipv4.ip_local_ports.warned) {
net->ipv4.ip_local_ports.warned = true;
pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n");
}
net->ipv4.ip_local_ports.range[0] = range[0];
net->ipv4.ip_local_ports.range[1] = range[1];
- write_sequnlock(&net->ipv4.ip_local_ports.lock);
+ write_sequnlock_bh(&net->ipv4.ip_local_ports.lock);
}
/* Validate changes from /proc interface. */
@@ -337,27 +336,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec
},
{
- .procname = "tcp_keepalive_time",
- .data = &sysctl_tcp_keepalive_time,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "tcp_keepalive_probes",
- .data = &sysctl_tcp_keepalive_probes,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_keepalive_intvl",
- .data = &sysctl_tcp_keepalive_intvl,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
.procname = "tcp_retries1",
.data = &sysctl_tcp_retries1,
.maxlen = sizeof(int),
@@ -915,6 +893,17 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ {
+ .procname = "tcp_l3mdev_accept",
+ .data = &init_net.ipv4.sysctl_tcp_l3mdev_accept,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+#endif
{
.procname = "tcp_mtu_probing",
.data = &init_net.ipv4.sysctl_tcp_mtu_probing,
@@ -950,6 +939,27 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "tcp_keepalive_time",
+ .data = &init_net.ipv4.sysctl_tcp_keepalive_time,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "tcp_keepalive_probes",
+ .data = &init_net.ipv4.sysctl_tcp_keepalive_probes,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_keepalive_intvl",
+ .data = &init_net.ipv4.sysctl_tcp_keepalive_intvl,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
{ }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0cfa7c0c1e80..4804645bdf02 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -247,6 +247,7 @@
#define pr_fmt(fmt) "TCP: " fmt
+#include <crypto/hash.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -266,7 +267,6 @@
#include <linux/swap.h>
#include <linux/cache.h>
#include <linux/err.h>
-#include <linux/crypto.h>
#include <linux/time.h>
#include <linux/slab.h>
@@ -279,6 +279,7 @@
#include <asm/uaccess.h>
#include <asm/ioctls.h>
+#include <asm/unaligned.h>
#include <net/busy_poll.h>
int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
@@ -422,7 +423,8 @@ void tcp_init_sock(struct sock *sk)
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
local_bh_disable();
- sock_update_memcg(sk);
+ if (mem_cgroup_sockets_enabled)
+ sock_update_memcg(sk);
sk_sockets_allocated_inc(sk);
local_bh_enable();
}
@@ -451,11 +453,14 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
unsigned int mask;
struct sock *sk = sock->sk;
const struct tcp_sock *tp = tcp_sk(sk);
+ int state;
sock_rps_record_flow(sk);
sock_poll_wait(file, sk_sleep(sk), wait);
- if (sk->sk_state == TCP_LISTEN)
+
+ state = sk_state_load(sk);
+ if (state == TCP_LISTEN)
return inet_csk_listen_poll(sk);
/* Socket is not locked. We are protected from async events
@@ -492,14 +497,14 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
* NOTE. Check for TCP_CLOSE is added. The goal is to prevent
* blocking on fresh not-connected or disconnected socket. --ANK
*/
- if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE)
+ if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
mask |= POLLHUP;
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLIN | POLLRDNORM | POLLRDHUP;
/* Connected or passive Fast Open socket? */
- if (sk->sk_state != TCP_SYN_SENT &&
- (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk)) {
+ if (state != TCP_SYN_SENT &&
+ (state != TCP_SYN_RECV || tp->fastopen_rsk)) {
int target = sock_rcvlowat(sk, 0, INT_MAX);
if (tp->urg_seq == tp->copied_seq &&
@@ -507,9 +512,6 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
tp->urg_data)
target++;
- /* Potential race condition. If read of tp below will
- * escape above sk->sk_state, we can be illegally awaken
- * in SYN_* states. */
if (tp->rcv_nxt - tp->copied_seq >= target)
mask |= POLLIN | POLLRDNORM;
@@ -517,8 +519,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
if (sk_stream_is_writeable(sk)) {
mask |= POLLOUT | POLLWRNORM;
} else { /* send SIGIO later */
- set_bit(SOCK_ASYNC_NOSPACE,
- &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
/* Race breaker. If space is freed after
@@ -906,7 +907,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
goto out_err;
}
- clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
mss_now = tcp_send_mss(sk, &size_goal, flags);
copied = 0;
@@ -939,7 +940,7 @@ new_segment:
i = skb_shinfo(skb)->nr_frags;
can_coalesce = skb_can_coalesce(skb, i, page, offset);
- if (!can_coalesce && i >= MAX_SKB_FRAGS) {
+ if (!can_coalesce && i >= sysctl_max_skb_frags) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@ -1019,7 +1020,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset,
ssize_t res;
if (!(sk->sk_route_caps & NETIF_F_SG) ||
- !(sk->sk_route_caps & NETIF_F_ALL_CSUM))
+ !sk_check_csum_caps(sk))
return sock_no_sendpage(sk->sk_socket, page, offset, size,
flags);
@@ -1134,7 +1135,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
}
/* This should be in poll */
- clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
mss_now = tcp_send_mss(sk, &size_goal, flags);
@@ -1176,7 +1177,7 @@ new_segment:
/*
* Check whether we can use HW checksum.
*/
- if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
+ if (sk_check_csum_caps(sk))
skb->ip_summed = CHECKSUM_PARTIAL;
skb_entail(sk, skb);
@@ -1212,7 +1213,7 @@ new_segment:
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
- if (i == MAX_SKB_FRAGS || !sg) {
+ if (i == sysctl_max_skb_frags || !sg) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@ -1934,7 +1935,7 @@ void tcp_set_state(struct sock *sk, int state)
/* Change state AFTER socket is unhashed to avoid closed
* socket sitting in hash tables.
*/
- sk->sk_state = state;
+ sk_state_store(sk, state);
#ifdef STATE_TRACE
SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
@@ -2638,13 +2639,15 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 now = tcp_time_stamp;
unsigned int start;
+ u64 rate64;
u32 rate;
memset(info, 0, sizeof(*info));
if (sk->sk_type != SOCK_STREAM)
return;
- info->tcpi_state = sk->sk_state;
+ info->tcpi_state = sk_state_load(sk);
+
info->tcpi_ca_state = icsk->icsk_ca_state;
info->tcpi_retransmits = icsk->icsk_retransmits;
info->tcpi_probes = icsk->icsk_probes_out;
@@ -2672,7 +2675,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_snd_mss = tp->mss_cache;
info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
- if (sk->sk_state == TCP_LISTEN) {
+ if (info->tcpi_state == TCP_LISTEN) {
info->tcpi_unacked = sk->sk_ack_backlog;
info->tcpi_sacked = sk->sk_max_ack_backlog;
} else {
@@ -2702,15 +2705,17 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_total_retrans = tp->total_retrans;
rate = READ_ONCE(sk->sk_pacing_rate);
- info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL;
+ rate64 = rate != ~0U ? rate : ~0ULL;
+ put_unaligned(rate64, &info->tcpi_pacing_rate);
rate = READ_ONCE(sk->sk_max_pacing_rate);
- info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL;
+ rate64 = rate != ~0U ? rate : ~0ULL;
+ put_unaligned(rate64, &info->tcpi_max_pacing_rate);
do {
start = u64_stats_fetch_begin_irq(&tp->syncp);
- info->tcpi_bytes_acked = tp->bytes_acked;
- info->tcpi_bytes_received = tp->bytes_received;
+ put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked);
+ put_unaligned(tp->bytes_received, &info->tcpi_bytes_received);
} while (u64_stats_fetch_retry_irq(&tp->syncp, start));
info->tcpi_segs_out = tp->segs_out;
info->tcpi_segs_in = tp->segs_in;
@@ -2938,17 +2943,26 @@ static bool tcp_md5sig_pool_populated = false;
static void __tcp_alloc_md5sig_pool(void)
{
+ struct crypto_ahash *hash;
int cpu;
+ hash = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(hash))
+ return;
+
for_each_possible_cpu(cpu) {
- if (!per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm) {
- struct crypto_hash *hash;
+ struct ahash_request *req;
- hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR_OR_NULL(hash))
- return;
- per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash;
- }
+ if (per_cpu(tcp_md5sig_pool, cpu).md5_req)
+ continue;
+
+ req = ahash_request_alloc(hash, GFP_KERNEL);
+ if (!req)
+ return;
+
+ ahash_request_set_callback(req, 0, NULL, NULL);
+
+ per_cpu(tcp_md5sig_pool, cpu).md5_req = req;
}
/* before setting tcp_md5sig_pool_populated, we must commit all writes
* to memory. See smp_rmb() in tcp_get_md5sig_pool()
@@ -2998,7 +3012,6 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
{
struct scatterlist sg;
struct tcphdr hdr;
- int err;
/* We are not allowed to change tcphdr, make a local copy */
memcpy(&hdr, th, sizeof(hdr));
@@ -3006,8 +3019,8 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
/* options aren't included in the hash */
sg_init_one(&sg, &hdr, sizeof(hdr));
- err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(hdr));
- return err;
+ ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(hdr));
+ return crypto_ahash_update(hp->md5_req);
}
EXPORT_SYMBOL(tcp_md5_hash_header);
@@ -3016,7 +3029,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
{
struct scatterlist sg;
const struct tcphdr *tp = tcp_hdr(skb);
- struct hash_desc *desc = &hp->md5_desc;
+ struct ahash_request *req = hp->md5_req;
unsigned int i;
const unsigned int head_data_len = skb_headlen(skb) > header_len ?
skb_headlen(skb) - header_len : 0;
@@ -3026,7 +3039,8 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
sg_init_table(&sg, 1);
sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len);
- if (crypto_hash_update(desc, &sg, head_data_len))
+ ahash_request_set_crypt(req, &sg, NULL, head_data_len);
+ if (crypto_ahash_update(req))
return 1;
for (i = 0; i < shi->nr_frags; ++i) {
@@ -3036,7 +3050,8 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
sg_set_page(&sg, page, skb_frag_size(f),
offset_in_page(offset));
- if (crypto_hash_update(desc, &sg, skb_frag_size(f)))
+ ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f));
+ if (crypto_ahash_update(req))
return 1;
}
@@ -3053,7 +3068,8 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *ke
struct scatterlist sg;
sg_init_one(&sg, key->key, key->keylen);
- return crypto_hash_update(&hp->md5_desc, &sg, key->keylen);
+ ahash_request_set_crypt(hp->md5_req, &sg, NULL, key->keylen);
+ return crypto_ahash_update(hp->md5_req);
}
EXPORT_SYMBOL(tcp_md5_hash_key);
@@ -3080,6 +3096,52 @@ void tcp_done(struct sock *sk)
}
EXPORT_SYMBOL_GPL(tcp_done);
+int tcp_abort(struct sock *sk, int err)
+{
+ if (!sk_fullsock(sk)) {
+ if (sk->sk_state == TCP_NEW_SYN_RECV) {
+ struct request_sock *req = inet_reqsk(sk);
+
+ local_bh_disable();
+ inet_csk_reqsk_queue_drop_and_put(req->rsk_listener,
+ req);
+ local_bh_enable();
+ return 0;
+ }
+ sock_gen_put(sk);
+ return -EOPNOTSUPP;
+ }
+
+ /* Don't race with userspace socket closes such as tcp_close. */
+ lock_sock(sk);
+
+ if (sk->sk_state == TCP_LISTEN) {
+ tcp_set_state(sk, TCP_CLOSE);
+ inet_csk_listen_stop(sk);
+ }
+
+ /* Don't race with BH socket closes such as inet_csk_listen_stop. */
+ local_bh_disable();
+ bh_lock_sock(sk);
+
+ if (!sock_flag(sk, SOCK_DEAD)) {
+ sk->sk_err = err;
+ /* This barrier is coupled with smp_rmb() in tcp_poll() */
+ smp_wmb();
+ sk->sk_error_report(sk);
+ if (tcp_need_reset(sk->sk_state))
+ tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_done(sk);
+ }
+
+ bh_unlock_sock(sk);
+ local_bh_enable();
+ release_sock(sk);
+ sock_put(sk);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tcp_abort);
+
extern struct tcp_congestion_ops tcp_reno;
static __initdata unsigned long thash_entries;
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 479f34946177..4d610934fb39 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -10,6 +10,8 @@
*/
#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/sock_diag.h>
#include <linux/inet_diag.h>
#include <linux/tcp.h>
@@ -21,7 +23,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
{
struct tcp_info *info = _info;
- if (sk->sk_state == TCP_LISTEN) {
+ if (sk_state_load(sk) == TCP_LISTEN) {
r->idiag_rqueue = sk->sk_ack_backlog;
r->idiag_wqueue = sk->sk_max_ack_backlog;
} else if (sk->sk_type == SOCK_STREAM) {
@@ -46,12 +48,29 @@ static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req);
}
+#ifdef CONFIG_INET_DIAG_DESTROY
+static int tcp_diag_destroy(struct sk_buff *in_skb,
+ const struct inet_diag_req_v2 *req)
+{
+ struct net *net = sock_net(in_skb->sk);
+ struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req);
+
+ if (IS_ERR(sk))
+ return PTR_ERR(sk);
+
+ return sock_diag_destroy(sk, ECONNABORTED);
+}
+#endif
+
static const struct inet_diag_handler tcp_diag_handler = {
.dump = tcp_diag_dump,
.dump_one = tcp_diag_dump_one,
.idiag_get_info = tcp_diag_get_info,
.idiag_type = IPPROTO_TCP,
.idiag_info_size = sizeof(struct tcp_info),
+#ifdef CONFIG_INET_DIAG_DESTROY
+ .destroy = tcp_diag_destroy,
+#endif
};
static int __init tcp_diag_init(void)
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 55be6ac70cff..4c65ca1a86d1 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -1,3 +1,4 @@
+#include <linux/crypto.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fdd88c3803a6..3b2c8e90a475 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2164,8 +2164,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- int cnt, oldcnt;
- int err;
+ int cnt, oldcnt, lost;
unsigned int mss;
/* Use SACK to deduce losses of new sequences sent during recovery */
const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
@@ -2205,9 +2204,10 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
break;
mss = tcp_skb_mss(skb);
- err = tcp_fragment(sk, skb, (packets - oldcnt) * mss,
- mss, GFP_ATOMIC);
- if (err < 0)
+ /* If needed, chop off the prefix to mark as lost. */
+ lost = (packets - oldcnt) * mss;
+ if (lost < skb->len &&
+ tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0)
break;
cnt = packets;
}
@@ -2366,8 +2366,6 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
tp->snd_ssthresh = tp->prior_ssthresh;
tcp_ecn_withdraw_cwr(tp);
}
- } else {
- tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
}
tp->snd_cwnd_stamp = tcp_time_stamp;
tp->undo_marker = 0;
@@ -2478,6 +2476,9 @@ static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
int newly_acked_sacked = prior_unsacked -
(tp->packets_out - tp->sacked_out);
+ if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
+ return;
+
tp->prr_delivered += newly_acked_sacked;
if (delta < 0) {
u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
@@ -2895,7 +2896,10 @@ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
{
const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ;
struct rtt_meas *m = tcp_sk(sk)->rtt_min;
- struct rtt_meas rttm = { .rtt = (rtt_us ? : 1), .ts = now };
+ struct rtt_meas rttm = {
+ .rtt = likely(rtt_us) ? rtt_us : jiffies_to_usecs(1),
+ .ts = now,
+ };
u32 elapsed;
/* Check if the new measurement updates the 1st, 2nd, or 3rd choices */
@@ -4481,19 +4485,34 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
{
struct sk_buff *skb;
+ int err = -ENOMEM;
+ int data_len = 0;
bool fragstolen;
if (size == 0)
return 0;
- skb = alloc_skb(size, sk->sk_allocation);
+ if (size > PAGE_SIZE) {
+ int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
+
+ data_len = npages << PAGE_SHIFT;
+ size = data_len + (size & ~PAGE_MASK);
+ }
+ skb = alloc_skb_with_frags(size - data_len, data_len,
+ PAGE_ALLOC_COSTLY_ORDER,
+ &err, sk->sk_allocation);
if (!skb)
goto err;
+ skb_put(skb, size - data_len);
+ skb->data_len = data_len;
+ skb->len = size;
+
if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
goto err_free;
- if (memcpy_from_msg(skb_put(skb, size), msg, size))
+ err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
+ if (err)
goto err_free;
TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
@@ -4509,7 +4528,8 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
err_free:
kfree_skb(skb);
err:
- return -ENOMEM;
+ return err;
+
}
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
@@ -5667,6 +5687,7 @@ discard:
}
tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
+ tp->copied_seq = tp->rcv_nxt;
tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
/* RFC1323: The window in SYN & SYN/ACK segments is
@@ -6187,7 +6208,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_openreq_init(req, &tmp_opt, skb, sk);
/* Note: tcp_v6_init_req() might override ir_iif for link locals */
- inet_rsk(req)->ir_iif = sk->sk_bound_dev_if;
+ inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
af_ops->init_req(req, sk, skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1c2648bbac4b..4fdbf4e56797 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -73,7 +73,6 @@
#include <net/timewait_sock.h>
#include <net/xfrm.h>
#include <net/secure_seq.h>
-#include <net/tcp_memcontrol.h>
#include <net/busy_poll.h>
#include <linux/inet.h>
@@ -82,7 +81,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
-#include <linux/crypto.h>
+#include <crypto/hash.h>
#include <linux/scatterlist.h>
int sysctl_tcp_tw_reuse __read_mostly;
@@ -312,7 +311,7 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk)
/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
-void tcp_req_err(struct sock *sk, u32 seq)
+void tcp_req_err(struct sock *sk, u32 seq, bool abort)
{
struct request_sock *req = inet_reqsk(sk);
struct net *net = sock_net(sk);
@@ -324,7 +323,7 @@ void tcp_req_err(struct sock *sk, u32 seq)
if (seq != tcp_rsk(req)->snt_isn) {
NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
- } else {
+ } else if (abort) {
/*
* Still in SYN_RECV, just remove it silently.
* There is no good way to pass the error to the newly
@@ -384,7 +383,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
}
seq = ntohl(th->seq);
if (sk->sk_state == TCP_NEW_SYN_RECV)
- return tcp_req_err(sk, seq);
+ return tcp_req_err(sk, seq,
+ type == ICMP_PARAMETERPROB ||
+ type == ICMP_TIME_EXCEEDED ||
+ (type == ICMP_DEST_UNREACH &&
+ (code == ICMP_NET_UNREACH ||
+ code == ICMP_HOST_UNREACH)));
bh_lock_sock(sk);
/* If too many ICMPs get dropped on busy
@@ -587,7 +591,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
} rep;
struct ip_reply_arg arg;
#ifdef CONFIG_TCP_MD5SIG
- struct tcp_md5sig_key *key;
+ struct tcp_md5sig_key *key = NULL;
const __u8 *hash_location = NULL;
unsigned char newhash[16];
int genhash;
@@ -627,7 +631,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
#ifdef CONFIG_TCP_MD5SIG
hash_location = tcp_parse_md5sig_option(th);
- if (!sk && hash_location) {
+ if (sk && sk_fullsock(sk)) {
+ key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
+ &ip_hdr(skb)->saddr, AF_INET);
+ } else if (hash_location) {
/*
* active side is lost. Try to find listening socket through
* source port, and then find md5 key through listening socket.
@@ -651,10 +658,6 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
if (genhash || memcmp(hash_location, newhash, 16) != 0)
goto release_sk1;
- } else {
- key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
- &ip_hdr(skb)->saddr,
- AF_INET) : NULL;
}
if (key) {
@@ -675,7 +678,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
ip_hdr(skb)->saddr, /* XXX */
arg.iov[0].iov_len, IPPROTO_TCP, 0);
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
- arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
+ arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
+
/* When socket is gone, all binding information is lost.
* routing might fail in this case. No choice here, if we choose to force
* input interface, we will misroute in case of asymmetric route.
@@ -683,6 +687,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
if (sk)
arg.bound_dev_if = sk->sk_bound_dev_if;
+ BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
+ offsetof(struct inet_timewait_sock, tw_bound_dev_if));
+
arg.tos = ip_hdr(skb)->tos;
ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
skb, &TCP_SKB_CB(skb)->header.h4.opt,
@@ -705,7 +712,8 @@ release_sk1:
outside socket context is ugly, certainly. What can I do?
*/
-static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
+static void tcp_v4_send_ack(struct net *net,
+ struct sk_buff *skb, u32 seq, u32 ack,
u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key,
int reply_flags, u8 tos)
@@ -720,7 +728,6 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
];
} rep;
struct ip_reply_arg arg;
- struct net *net = dev_net(skb_dst(skb)->dev);
memset(&rep.th, 0, sizeof(struct tcphdr));
memset(&arg, 0, sizeof(arg));
@@ -782,7 +789,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
struct inet_timewait_sock *tw = inet_twsk(sk);
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
- tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+ tcp_v4_send_ack(sock_net(sk), skb,
+ tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp + tcptw->tw_ts_offset,
tcptw->tw_ts_recent,
@@ -801,8 +809,10 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
*/
- tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
- tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
+ u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
+ tcp_sk(sk)->snd_nxt;
+
+ tcp_v4_send_ack(sock_net(sk), skb, seq,
tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
tcp_time_stamp,
req->ts_recent,
@@ -921,7 +931,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
}
md5sig = rcu_dereference_protected(tp->md5sig_info,
- sock_owned_by_user(sk));
+ sock_owned_by_user(sk) ||
+ lockdep_is_held(&sk->sk_lock.slock));
if (!md5sig) {
md5sig = kmalloc(sizeof(*md5sig), gfp);
if (!md5sig)
@@ -1028,21 +1039,22 @@ static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
bp->len = cpu_to_be16(nbytes);
sg_init_one(&sg, bp, sizeof(*bp));
- return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
+ ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp));
+ return crypto_ahash_update(hp->md5_req);
}
static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
__be32 daddr, __be32 saddr, const struct tcphdr *th)
{
struct tcp_md5sig_pool *hp;
- struct hash_desc *desc;
+ struct ahash_request *req;
hp = tcp_get_md5sig_pool();
if (!hp)
goto clear_hash_noput;
- desc = &hp->md5_desc;
+ req = hp->md5_req;
- if (crypto_hash_init(desc))
+ if (crypto_ahash_init(req))
goto clear_hash;
if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
goto clear_hash;
@@ -1050,7 +1062,8 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
goto clear_hash;
if (tcp_md5_hash_key(hp, key))
goto clear_hash;
- if (crypto_hash_final(desc, md5_hash))
+ ahash_request_set_crypt(req, NULL, md5_hash, 0);
+ if (crypto_ahash_final(req))
goto clear_hash;
tcp_put_md5sig_pool();
@@ -1068,7 +1081,7 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
const struct sk_buff *skb)
{
struct tcp_md5sig_pool *hp;
- struct hash_desc *desc;
+ struct ahash_request *req;
const struct tcphdr *th = tcp_hdr(skb);
__be32 saddr, daddr;
@@ -1084,9 +1097,9 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
hp = tcp_get_md5sig_pool();
if (!hp)
goto clear_hash_noput;
- desc = &hp->md5_desc;
+ req = hp->md5_req;
- if (crypto_hash_init(desc))
+ if (crypto_ahash_init(req))
goto clear_hash;
if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
@@ -1097,7 +1110,8 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
goto clear_hash;
if (tcp_md5_hash_key(hp, key))
goto clear_hash;
- if (crypto_hash_final(desc, md5_hash))
+ ahash_request_set_crypt(req, NULL, md5_hash, 0);
+ if (crypto_ahash_final(req))
goto clear_hash;
tcp_put_md5sig_pool();
@@ -1275,6 +1289,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
ireq = inet_rsk(req);
sk_daddr_set(newsk, ireq->ir_rmt_addr);
sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
+ newsk->sk_bound_dev_if = ireq->ir_iif;
newinet->inet_saddr = ireq->ir_loc_addr;
inet_opt = ireq->opt;
rcu_assign_pointer(newinet->inet_opt, inet_opt);
@@ -1326,6 +1341,8 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
if (__inet_inherit_port(sk, newsk) < 0)
goto put_and_exit;
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+ if (*own_req)
+ tcp_move_syn(newtp, req);
return newsk;
@@ -1490,7 +1507,7 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
if (likely(sk->sk_rx_dst))
skb_dst_drop(skb);
else
- skb_dst_force(skb);
+ skb_dst_force_safe(skb);
__skb_queue_tail(&tp->ucopy.prequeue, skb);
tp->ucopy.memory += skb->truesize;
@@ -1583,28 +1600,30 @@ process:
if (sk->sk_state == TCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
- struct sock *nsk = NULL;
+ struct sock *nsk;
sk = req->rsk_listener;
- if (tcp_v4_inbound_md5_hash(sk, skb))
- goto discard_and_relse;
- if (likely(sk->sk_state == TCP_LISTEN)) {
- nsk = tcp_check_req(sk, skb, req, false);
- } else {
+ if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
+ reqsk_put(req);
+ goto discard_it;
+ }
+ if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
}
+ sock_hold(sk);
+ nsk = tcp_check_req(sk, skb, req, false);
if (!nsk) {
reqsk_put(req);
- goto discard_it;
+ goto discard_and_relse;
}
if (nsk == sk) {
- sock_hold(sk);
reqsk_put(req);
} else if (tcp_child_process(sk, nsk, skb)) {
tcp_v4_send_reset(nsk, skb);
- goto discard_it;
+ goto discard_and_relse;
} else {
+ sock_put(sk);
return 0;
}
}
@@ -1702,7 +1721,9 @@ do_time_wait:
tcp_v4_timewait_ack(sk, skb);
break;
case TCP_TW_RST:
- goto no_tcp_socket;
+ tcp_v4_send_reset(sk, skb);
+ inet_twsk_deschedule_put(inet_twsk(sk));
+ goto discard_it;
case TCP_TW_SUCCESS:;
}
goto discard_it;
@@ -1718,8 +1739,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- if (dst) {
- dst_hold(dst);
+ if (dst && dst_hold_safe(dst)) {
sk->sk_rx_dst = dst;
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
}
@@ -1810,7 +1830,9 @@ void tcp_v4_destroy_sock(struct sock *sk)
tcp_saved_syn_free(tp);
sk_sockets_allocated_dec(sk);
- sock_release_memcg(sk);
+
+ if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+ sock_release_memcg(sk);
}
EXPORT_SYMBOL(tcp_v4_destroy_sock);
@@ -2156,6 +2178,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
__u16 destp = ntohs(inet->inet_dport);
__u16 srcp = ntohs(inet->inet_sport);
int rx_queue;
+ int state;
if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
@@ -2173,17 +2196,18 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
timer_expires = jiffies;
}
- if (sk->sk_state == TCP_LISTEN)
+ state = sk_state_load(sk);
+ if (state == TCP_LISTEN)
rx_queue = sk->sk_ack_backlog;
else
- /*
- * because we dont lock socket, we might find a transient negative value
+ /* Because we don't lock the socket,
+ * we might find a transient negative value.
*/
rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
"%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
- i, src, srcp, dest, destp, sk->sk_state,
+ i, src, srcp, dest, destp, state,
tp->write_seq - tp->snd_una,
rx_queue,
timer_active,
@@ -2197,8 +2221,8 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
tp->snd_cwnd,
- sk->sk_state == TCP_LISTEN ?
- (fastopenq ? fastopenq->max_qlen : 0) :
+ state == TCP_LISTEN ?
+ fastopenq->max_qlen :
(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
}
@@ -2332,11 +2356,7 @@ struct proto tcp_prot = {
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
-#ifdef CONFIG_MEMCG_KMEM
- .init_cgroup = tcp_init_cgroup,
- .destroy_cgroup = tcp_destroy_cgroup,
- .proto_cgroup = tcp_proto_cgroup,
-#endif
+ .diag_destroy = tcp_abort,
};
EXPORT_SYMBOL(tcp_prot);
@@ -2374,6 +2394,10 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
+ net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
+ net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
+ net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
+
return 0;
fail:
tcp_sk_exit(net);
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
deleted file mode 100644
index 2379c1b4efb2..000000000000
--- a/net/ipv4/tcp_memcontrol.c
+++ /dev/null
@@ -1,233 +0,0 @@
-#include <net/tcp.h>
-#include <net/tcp_memcontrol.h>
-#include <net/sock.h>
-#include <net/ip.h>
-#include <linux/nsproxy.h>
-#include <linux/memcontrol.h>
-#include <linux/module.h>
-
-int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
-{
- /*
- * The root cgroup does not use page_counters, but rather,
- * rely on the data already collected by the network
- * subsystem
- */
- struct mem_cgroup *parent = parent_mem_cgroup(memcg);
- struct page_counter *counter_parent = NULL;
- struct cg_proto *cg_proto, *parent_cg;
-
- cg_proto = tcp_prot.proto_cgroup(memcg);
- if (!cg_proto)
- return 0;
-
- cg_proto->sysctl_mem[0] = sysctl_tcp_mem[0];
- cg_proto->sysctl_mem[1] = sysctl_tcp_mem[1];
- cg_proto->sysctl_mem[2] = sysctl_tcp_mem[2];
- cg_proto->memory_pressure = 0;
- cg_proto->memcg = memcg;
-
- parent_cg = tcp_prot.proto_cgroup(parent);
- if (parent_cg)
- counter_parent = &parent_cg->memory_allocated;
-
- page_counter_init(&cg_proto->memory_allocated, counter_parent);
- percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL);
-
- return 0;
-}
-EXPORT_SYMBOL(tcp_init_cgroup);
-
-void tcp_destroy_cgroup(struct mem_cgroup *memcg)
-{
- struct cg_proto *cg_proto;
-
- cg_proto = tcp_prot.proto_cgroup(memcg);
- if (!cg_proto)
- return;
-
- percpu_counter_destroy(&cg_proto->sockets_allocated);
-
- if (test_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags))
- static_key_slow_dec(&memcg_socket_limit_enabled);
-
-}
-EXPORT_SYMBOL(tcp_destroy_cgroup);
-
-static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
-{
- struct cg_proto *cg_proto;
- int i;
- int ret;
-
- cg_proto = tcp_prot.proto_cgroup(memcg);
- if (!cg_proto)
- return -EINVAL;
-
- ret = page_counter_limit(&cg_proto->memory_allocated, nr_pages);
- if (ret)
- return ret;
-
- for (i = 0; i < 3; i++)
- cg_proto->sysctl_mem[i] = min_t(long, nr_pages,
- sysctl_tcp_mem[i]);
-
- if (nr_pages == PAGE_COUNTER_MAX)
- clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
- else {
- /*
- * The active bit needs to be written after the static_key
- * update. This is what guarantees that the socket activation
- * function is the last one to run. See sock_update_memcg() for
- * details, and note that we don't mark any socket as belonging
- * to this memcg until that flag is up.
- *
- * We need to do this, because static_keys will span multiple
- * sites, but we can't control their order. If we mark a socket
- * as accounted, but the accounting functions are not patched in
- * yet, we'll lose accounting.
- *
- * We never race with the readers in sock_update_memcg(),
- * because when this value change, the code to process it is not
- * patched in yet.
- *
- * The activated bit is used to guarantee that no two writers
- * will do the update in the same memcg. Without that, we can't
- * properly shutdown the static key.
- */
- if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags))
- static_key_slow_inc(&memcg_socket_limit_enabled);
- set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
- }
-
- return 0;
-}
-
-enum {
- RES_USAGE,
- RES_LIMIT,
- RES_MAX_USAGE,
- RES_FAILCNT,
-};
-
-static DEFINE_MUTEX(tcp_limit_mutex);
-
-static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off)
-{
- struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
- unsigned long nr_pages;
- int ret = 0;
-
- buf = strstrip(buf);
-
- switch (of_cft(of)->private) {
- case RES_LIMIT:
- /* see memcontrol.c */
- ret = page_counter_memparse(buf, "-1", &nr_pages);
- if (ret)
- break;
- mutex_lock(&tcp_limit_mutex);
- ret = tcp_update_limit(memcg, nr_pages);
- mutex_unlock(&tcp_limit_mutex);
- break;
- default:
- ret = -EINVAL;
- break;
- }
- return ret ?: nbytes;
-}
-
-static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
-{
- struct mem_cgroup *memcg = mem_cgroup_from_css(css);
- struct cg_proto *cg_proto = tcp_prot.proto_cgroup(memcg);
- u64 val;
-
- switch (cft->private) {
- case RES_LIMIT:
- if (!cg_proto)
- return PAGE_COUNTER_MAX;
- val = cg_proto->memory_allocated.limit;
- val *= PAGE_SIZE;
- break;
- case RES_USAGE:
- if (!cg_proto)
- val = atomic_long_read(&tcp_memory_allocated);
- else
- val = page_counter_read(&cg_proto->memory_allocated);
- val *= PAGE_SIZE;
- break;
- case RES_FAILCNT:
- if (!cg_proto)
- return 0;
- val = cg_proto->memory_allocated.failcnt;
- break;
- case RES_MAX_USAGE:
- if (!cg_proto)
- return 0;
- val = cg_proto->memory_allocated.watermark;
- val *= PAGE_SIZE;
- break;
- default:
- BUG();
- }
- return val;
-}
-
-static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off)
-{
- struct mem_cgroup *memcg;
- struct cg_proto *cg_proto;
-
- memcg = mem_cgroup_from_css(of_css(of));
- cg_proto = tcp_prot.proto_cgroup(memcg);
- if (!cg_proto)
- return nbytes;
-
- switch (of_cft(of)->private) {
- case RES_MAX_USAGE:
- page_counter_reset_watermark(&cg_proto->memory_allocated);
- break;
- case RES_FAILCNT:
- cg_proto->memory_allocated.failcnt = 0;
- break;
- }
-
- return nbytes;
-}
-
-static struct cftype tcp_files[] = {
- {
- .name = "kmem.tcp.limit_in_bytes",
- .write = tcp_cgroup_write,
- .read_u64 = tcp_cgroup_read,
- .private = RES_LIMIT,
- },
- {
- .name = "kmem.tcp.usage_in_bytes",
- .read_u64 = tcp_cgroup_read,
- .private = RES_USAGE,
- },
- {
- .name = "kmem.tcp.failcnt",
- .private = RES_FAILCNT,
- .write = tcp_cgroup_reset,
- .read_u64 = tcp_cgroup_read,
- },
- {
- .name = "kmem.tcp.max_usage_in_bytes",
- .private = RES_MAX_USAGE,
- .write = tcp_cgroup_reset,
- .read_u64 = tcp_cgroup_read,
- },
- { } /* terminate */
-};
-
-static int __init tcp_memcontrol_init(void)
-{
- WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, tcp_files));
- return 0;
-}
-__initcall(tcp_memcontrol_init);
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index c8cbc2b4b792..a726d7853ce5 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -550,7 +550,7 @@ reset:
*/
if (crtt > tp->srtt_us) {
/* Set RTO like tcp_rtt_estimator(), but from cached RTT. */
- crtt /= 8 * USEC_PER_MSEC;
+ crtt /= 8 * USEC_PER_SEC / HZ;
inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk));
} else if (tp->srtt_us == 0) {
/* RFC6298: 5.7 We've failed to get a valid RTT sample from
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 3575dd1e5b67..9b02af2139d3 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -131,7 +131,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
goto kill;
if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt))
- goto kill_with_rst;
+ return TCP_TW_RST;
/* Dup ACK? */
if (!th->ack ||
@@ -145,11 +145,8 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
* reset.
*/
if (!th->fin ||
- TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) {
-kill_with_rst:
- inet_twsk_deschedule_put(tw);
+ TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1)
return TCP_TW_RST;
- }
/* FIN arrived, enter true time-wait state. */
tw->tw_substate = TCP_TIME_WAIT;
@@ -458,7 +455,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->rcv_wup = newtp->copied_seq =
newtp->rcv_nxt = treq->rcv_isn + 1;
- newtp->segs_in = 0;
+ newtp->segs_in = 1;
newtp->snd_sml = newtp->snd_una =
newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
@@ -551,9 +548,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->rack.mstamp.v64 = 0;
newtp->rack.advanced = 0;
- newtp->saved_syn = req->saved_syn;
- req->saved_syn = NULL;
-
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
}
return newsk;
@@ -821,6 +815,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
int ret = 0;
int state = child->sk_state;
+ tcp_sk(child)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
if (!sock_owned_by_user(child)) {
ret = tcp_rcv_state_process(child, skb);
/* Wakeup parent, send SIGIO */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index cb7ca569052c..fda379cd600d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2296,7 +2296,7 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
return;
if (tcp_write_xmit(sk, cur_mss, nonagle, 0,
- sk_gfp_atomic(sk, GFP_ATOMIC)))
+ sk_gfp_mask(sk, GFP_ATOMIC)))
tcp_check_probe_timer(sk);
}
@@ -2813,13 +2813,16 @@ begin_fwd:
*/
void sk_forced_mem_schedule(struct sock *sk, int size)
{
- int amt, status;
+ int amt;
if (size <= sk->sk_forward_alloc)
return;
amt = sk_mem_pages(size);
sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
- sk_memory_allocated_add(sk, amt, &status);
+ sk_memory_allocated_add(sk, amt);
+
+ if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+ mem_cgroup_charge_skmem(sk->sk_memcg, amt);
}
/* Send a FIN. The caller locks the socket for us.
@@ -3150,7 +3153,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_request *fo = tp->fastopen_req;
- int syn_loss = 0, space, err = 0, copied;
+ int syn_loss = 0, space, err = 0;
unsigned long last_syn_loss = 0;
struct sk_buff *syn_data;
@@ -3188,17 +3191,18 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
goto fallback;
syn_data->ip_summed = CHECKSUM_PARTIAL;
memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
- copied = copy_from_iter(skb_put(syn_data, space), space,
- &fo->data->msg_iter);
- if (unlikely(!copied)) {
- kfree_skb(syn_data);
- goto fallback;
- }
- if (copied != space) {
- skb_trim(syn_data, copied);
- space = copied;
+ if (space) {
+ int copied = copy_from_iter(skb_put(syn_data, space), space,
+ &fo->data->msg_iter);
+ if (unlikely(!copied)) {
+ kfree_skb(syn_data);
+ goto fallback;
+ }
+ if (copied != space) {
+ skb_trim(syn_data, copied);
+ space = copied;
+ }
}
-
/* No more data pending in inet_wait_for_connect() */
if (space == fo->size)
fo->data = NULL;
@@ -3352,8 +3356,9 @@ void tcp_send_ack(struct sock *sk)
* tcp_transmit_skb() will set the ownership to this
* sock.
*/
- buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
- if (!buff) {
+ buff = alloc_skb(MAX_TCP_HEADER,
+ sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN));
+ if (unlikely(!buff)) {
inet_csk_schedule_ack(sk);
inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
@@ -3375,7 +3380,7 @@ void tcp_send_ack(struct sock *sk)
/* Send it off, this clears delayed acks for us. */
skb_mstamp_get(&buff->skb_mstamp);
- tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
+ tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0);
}
EXPORT_SYMBOL_GPL(tcp_send_ack);
@@ -3396,7 +3401,8 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
struct sk_buff *skb;
/* We don't queue it, tcp_transmit_skb() sets ownership. */
- skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
+ skb = alloc_skb(MAX_TCP_HEADER,
+ sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN));
if (!skb)
return -1;
@@ -3409,7 +3415,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
skb_mstamp_get(&skb->skb_mstamp);
NET_INC_STATS(sock_net(sk), mib);
- return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
+ return tcp_transmit_skb(sk, skb, 0, (__force gfp_t)0);
}
void tcp_send_window_probe(struct sock *sk)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c9c716a483e4..a4730a28b220 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -24,9 +24,6 @@
int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES;
int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES;
-int sysctl_tcp_keepalive_time __read_mostly = TCP_KEEPALIVE_TIME;
-int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES;
-int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL;
int sysctl_tcp_retries1 __read_mostly = TCP_RETR1;
int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
int sysctl_tcp_orphan_retries __read_mostly;
@@ -168,7 +165,7 @@ static int tcp_write_timeout(struct sock *sk)
dst_negative_advice(sk);
if (tp->syn_fastopen || tp->syn_data)
tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
- if (tp->syn_data)
+ if (tp->syn_data && icsk->icsk_retransmits == 1)
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPFASTOPENACTIVEFAIL);
}
@@ -176,6 +173,18 @@ static int tcp_write_timeout(struct sock *sk)
syn_set = true;
} else {
if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) {
+ /* Some middle-boxes may black-hole Fast Open _after_
+ * the handshake. Therefore we conservatively disable
+ * Fast Open on this path on recurring timeouts with
+ * few or zero bytes acked after Fast Open.
+ */
+ if (tp->syn_data_acked &&
+ tp->bytes_acked <= tp->rx_opt.mss_clamp) {
+ tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
+ if (icsk->icsk_retransmits == sysctl_tcp_retries1)
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENACTIVEFAIL);
+ }
/* Black hole detection */
tcp_mtu_probing(icsk, sk);
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 17d35662930d..3e6a472e6b88 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -219,7 +219,7 @@ static u32 tcp_yeah_ssthresh(struct sock *sk)
yeah->fast_count = 0;
yeah->reno_count = max(yeah->reno_count>>1, 2U);
- return tp->snd_cwnd - reduction;
+ return max_t(int, tp->snd_cwnd - reduction, 2);
}
static struct tcp_congestion_ops tcp_yeah __read_mostly = {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 24ec14f9825c..95d2f198017e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -100,7 +100,6 @@
#include <linux/slab.h>
#include <net/tcp_states.h>
#include <linux/skbuff.h>
-#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <net/net_namespace.h>
@@ -114,6 +113,7 @@
#include <trace/events/skb.h>
#include <net/busy_poll.h>
#include "udp_impl.h"
+#include <net/sock_reuseport.h>
struct udp_table udp_table __read_mostly;
EXPORT_SYMBOL(udp_table);
@@ -138,7 +138,8 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
unsigned long *bitmap,
struct sock *sk,
int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2),
+ const struct sock *sk2,
+ bool match_wildcard),
unsigned int log)
{
struct sock *sk2;
@@ -153,8 +154,9 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
(!sk2->sk_reuseport || !sk->sk_reuseport ||
+ rcu_access_pointer(sk->sk_reuseport_cb) ||
!uid_eq(uid, sock_i_uid(sk2))) &&
- saddr_comp(sk, sk2)) {
+ saddr_comp(sk, sk2, true)) {
if (!bitmap)
return 1;
__set_bit(udp_sk(sk2)->udp_port_hash >> log, bitmap);
@@ -171,7 +173,8 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
struct udp_hslot *hslot2,
struct sock *sk,
int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2))
+ const struct sock *sk2,
+ bool match_wildcard))
{
struct sock *sk2;
struct hlist_nulls_node *node;
@@ -187,8 +190,9 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
(!sk2->sk_reuseport || !sk->sk_reuseport ||
+ rcu_access_pointer(sk->sk_reuseport_cb) ||
!uid_eq(uid, sock_i_uid(sk2))) &&
- saddr_comp(sk, sk2)) {
+ saddr_comp(sk, sk2, true)) {
res = 1;
break;
}
@@ -197,6 +201,35 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
return res;
}
+static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,
+ int (*saddr_same)(const struct sock *sk1,
+ const struct sock *sk2,
+ bool match_wildcard))
+{
+ struct net *net = sock_net(sk);
+ struct hlist_nulls_node *node;
+ kuid_t uid = sock_i_uid(sk);
+ struct sock *sk2;
+
+ sk_nulls_for_each(sk2, node, &hslot->head) {
+ if (net_eq(sock_net(sk2), net) &&
+ sk2 != sk &&
+ sk2->sk_family == sk->sk_family &&
+ ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
+ (udp_sk(sk2)->udp_port_hash == udp_sk(sk)->udp_port_hash) &&
+ (sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+ sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
+ (*saddr_same)(sk, sk2, false)) {
+ return reuseport_add_sock(sk, sk2);
+ }
+ }
+
+ /* Initial allocation may have already happened via setsockopt */
+ if (!rcu_access_pointer(sk->sk_reuseport_cb))
+ return reuseport_alloc(sk);
+ return 0;
+}
+
/**
* udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6
*
@@ -208,7 +241,8 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
*/
int udp_lib_get_port(struct sock *sk, unsigned short snum,
int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2),
+ const struct sock *sk2,
+ bool match_wildcard),
unsigned int hash2_nulladdr)
{
struct udp_hslot *hslot, *hslot2;
@@ -291,6 +325,14 @@ found:
udp_sk(sk)->udp_port_hash = snum;
udp_sk(sk)->udp_portaddr_hash ^= snum;
if (sk_unhashed(sk)) {
+ if (sk->sk_reuseport &&
+ udp_reuseport_add_sock(sk, hslot, saddr_comp)) {
+ inet_sk(sk)->inet_num = 0;
+ udp_sk(sk)->udp_port_hash = 0;
+ udp_sk(sk)->udp_portaddr_hash ^= snum;
+ goto fail_unlock;
+ }
+
sk_nulls_add_node_rcu(sk, &hslot->head);
hslot->count++;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -310,13 +352,22 @@ fail:
}
EXPORT_SYMBOL(udp_lib_get_port);
-static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
+/* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses
+ * match_wildcard == false: addresses must be exactly the same, i.e.
+ * 0.0.0.0 only equals to 0.0.0.0
+ */
+static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2,
+ bool match_wildcard)
{
struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
- return (!ipv6_only_sock(sk2) &&
- (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr ||
- inet1->inet_rcv_saddr == inet2->inet_rcv_saddr));
+ if (!ipv6_only_sock(sk2)) {
+ if (inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)
+ return 1;
+ if (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr)
+ return match_wildcard;
+ }
+ return 0;
}
static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr,
@@ -442,11 +493,13 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
static struct sock *udp4_lib_lookup2(struct net *net,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned int hnum, int dif,
- struct udp_hslot *hslot2, unsigned int slot2)
+ struct udp_hslot *hslot2, unsigned int slot2,
+ struct sk_buff *skb)
{
struct sock *sk, *result;
struct hlist_nulls_node *node;
int score, badness, matches = 0, reuseport = 0;
+ bool select_ok = true;
u32 hash = 0;
begin:
@@ -462,6 +515,17 @@ begin:
if (reuseport) {
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
+ if (select_ok) {
+ struct sock *sk2;
+
+ sk2 = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ if (sk2) {
+ result = sk2;
+ select_ok = false;
+ goto found;
+ }
+ }
matches = 1;
}
} else if (score == badness && reuseport) {
@@ -479,6 +543,7 @@ begin:
if (get_nulls_value(node) != slot2)
goto begin;
if (result) {
+found:
if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
result = NULL;
else if (unlikely(compute_score2(result, net, saddr, sport,
@@ -495,7 +560,7 @@ begin:
*/
struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
__be16 sport, __be32 daddr, __be16 dport,
- int dif, struct udp_table *udptable)
+ int dif, struct udp_table *udptable, struct sk_buff *skb)
{
struct sock *sk, *result;
struct hlist_nulls_node *node;
@@ -503,6 +568,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
int score, badness, matches = 0, reuseport = 0;
+ bool select_ok = true;
u32 hash = 0;
rcu_read_lock();
@@ -515,7 +581,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
result = udp4_lib_lookup2(net, saddr, sport,
daddr, hnum, dif,
- hslot2, slot2);
+ hslot2, slot2, skb);
if (!result) {
hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
slot2 = hash2 & udptable->mask;
@@ -525,7 +591,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
result = udp4_lib_lookup2(net, saddr, sport,
htonl(INADDR_ANY), hnum, dif,
- hslot2, slot2);
+ hslot2, slot2, skb);
}
rcu_read_unlock();
return result;
@@ -543,6 +609,17 @@ begin:
if (reuseport) {
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
+ if (select_ok) {
+ struct sock *sk2;
+
+ sk2 = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ if (sk2) {
+ result = sk2;
+ select_ok = false;
+ goto found;
+ }
+ }
matches = 1;
}
} else if (score == badness && reuseport) {
@@ -561,6 +638,7 @@ begin:
goto begin;
if (result) {
+found:
if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
result = NULL;
else if (unlikely(compute_score(result, net, saddr, hnum, sport,
@@ -582,13 +660,14 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport,
iph->daddr, dport, inet_iif(skb),
- udptable);
+ udptable, skb);
}
struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif)
{
- return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table);
+ return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif,
+ &udp_table, NULL);
}
EXPORT_SYMBOL_GPL(udp4_lib_lookup);
@@ -636,7 +715,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
struct net *net = dev_net(skb->dev);
sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
- iph->saddr, uh->source, skb->dev->ifindex, udptable);
+ iph->saddr, uh->source, skb->dev->ifindex, udptable,
+ NULL);
if (!sk) {
ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
return; /* No socket for error */
@@ -773,7 +853,8 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
else if (skb_is_gso(skb))
uh->check = ~udp_v4_check(len, saddr, daddr, 0);
else if (skb_dst(skb) && skb_dst(skb)->dev &&
- (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
+ (skb_dst(skb)->dev->features &
+ (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) {
BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
@@ -967,8 +1048,10 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_controllen) {
err = ip_cmsg_send(sock_net(sk), msg, &ipc,
sk->sk_family == AF_INET6);
- if (err)
+ if (unlikely(err)) {
+ kfree(ipc.opt);
return err;
+ }
if (ipc.opt)
free = 1;
connected = 0;
@@ -1026,8 +1109,11 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
flow_flags,
faddr, saddr, dport, inet->inet_sport);
- if (!saddr && ipc.oif)
- l3mdev_get_saddr(net, ipc.oif, fl4);
+ if (!saddr && ipc.oif) {
+ err = l3mdev_get_saddr(net, ipc.oif, fl4);
+ if (err < 0)
+ goto out;
+ }
security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
rt = ip_route_output_flow(net, fl4, sk);
@@ -1271,6 +1357,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
int peeked, off = 0;
int err;
int is_udplite = IS_UDPLITE(sk);
+ bool checksum_valid = false;
bool slow;
if (flags & MSG_ERRQUEUE)
@@ -1296,11 +1383,12 @@ try_again:
*/
if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
- if (udp_lib_checksum_complete(skb))
+ checksum_valid = !udp_lib_checksum_complete(skb);
+ if (!checksum_valid)
goto csum_copy_err;
}
- if (skb_csum_unnecessary(skb))
+ if (checksum_valid || skb_csum_unnecessary(skb))
err = skb_copy_datagram_msg(skb, sizeof(struct udphdr),
msg, copied);
else {
@@ -1396,6 +1484,8 @@ void udp_lib_unhash(struct sock *sk)
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
spin_lock_bh(&hslot->lock);
+ if (rcu_access_pointer(sk->sk_reuseport_cb))
+ reuseport_detach_sock(sk);
if (sk_nulls_del_node_init_rcu(sk)) {
hslot->count--;
inet_sk(sk)->inet_num = 0;
@@ -1423,22 +1513,28 @@ void udp_lib_rehash(struct sock *sk, u16 newhash)
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
nhslot2 = udp_hashslot2(udptable, newhash);
udp_sk(sk)->udp_portaddr_hash = newhash;
- if (hslot2 != nhslot2) {
+
+ if (hslot2 != nhslot2 ||
+ rcu_access_pointer(sk->sk_reuseport_cb)) {
hslot = udp_hashslot(udptable, sock_net(sk),
udp_sk(sk)->udp_port_hash);
/* we must lock primary chain too */
spin_lock_bh(&hslot->lock);
-
- spin_lock(&hslot2->lock);
- hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
- hslot2->count--;
- spin_unlock(&hslot2->lock);
-
- spin_lock(&nhslot2->lock);
- hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
- &nhslot2->head);
- nhslot2->count++;
- spin_unlock(&nhslot2->lock);
+ if (rcu_access_pointer(sk->sk_reuseport_cb))
+ reuseport_detach_sock(sk);
+
+ if (hslot2 != nhslot2) {
+ spin_lock(&hslot2->lock);
+ hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
+ hslot2->count--;
+ spin_unlock(&hslot2->lock);
+
+ spin_lock(&nhslot2->lock);
+ hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
+ &nhslot2->head);
+ nhslot2->count++;
+ spin_unlock(&nhslot2->lock);
+ }
spin_unlock_bh(&hslot->lock);
}
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 6116604bf6e8..df1966f3b6ec 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -44,7 +44,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
sk = __udp4_lib_lookup(net,
req->id.idiag_src[0], req->id.idiag_sport,
req->id.idiag_dst[0], req->id.idiag_dport,
- req->id.idiag_if, tbl);
+ req->id.idiag_if, tbl, NULL);
#if IS_ENABLED(CONFIG_IPV6)
else if (req->sdiag_family == AF_INET6)
sk = __udp6_lib_lookup(net,
@@ -52,7 +52,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
req->id.idiag_sport,
(struct in6_addr *)req->id.idiag_dst,
req->id.idiag_dport,
- req->id.idiag_if, tbl);
+ req->id.idiag_if, tbl, NULL);
#endif
else
goto out_nosk;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index f9386160cbee..4c519c1dc161 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -21,6 +21,7 @@ static struct udp_offload_priv __rcu *udp_offload_base __read_mostly;
struct udp_offload_priv {
struct udp_offload *offload;
+ possible_net_t net;
struct rcu_head rcu;
struct udp_offload_priv __rcu *next;
};
@@ -60,8 +61,9 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
/* Try to offload checksum if possible */
offload_csum = !!(need_csum &&
- (skb->dev->features &
- (is_ipv6 ? NETIF_F_V6_CSUM : NETIF_F_V4_CSUM)));
+ ((skb->dev->features & NETIF_F_HW_CSUM) ||
+ (skb->dev->features & (is_ipv6 ?
+ NETIF_F_IPV6_CSUM : NETIF_F_IP_CSUM))));
/* segment inner packet. */
enc_features = skb->dev->hw_enc_features & features;
@@ -241,13 +243,14 @@ out:
return segs;
}
-int udp_add_offload(struct udp_offload *uo)
+int udp_add_offload(struct net *net, struct udp_offload *uo)
{
struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC);
if (!new_offload)
return -ENOMEM;
+ write_pnet(&new_offload->net, net);
new_offload->offload = uo;
spin_lock(&udp_offload_lock);
@@ -311,7 +314,8 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
rcu_read_lock();
uo_priv = rcu_dereference(udp_offload_base);
for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
- if (uo_priv->offload->port == uh->dest &&
+ if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) &&
+ uo_priv->offload->port == uh->dest &&
uo_priv->offload->callbacks.gro_receive)
goto unflush;
}
@@ -389,7 +393,8 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff)
uo_priv = rcu_dereference(udp_offload_base);
for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
- if (uo_priv->offload->port == uh->dest &&
+ if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) &&
+ uo_priv->offload->port == uh->dest &&
uo_priv->offload->callbacks.gro_complete)
break;
}
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index aba428626b52..96599d1a1318 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -74,10 +74,10 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
}
EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock);
-int udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
- __be32 src, __be32 dst, __u8 tos, __u8 ttl,
- __be16 df, __be16 src_port, __be16 dst_port,
- bool xnet, bool nocheck)
+void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
+ __be32 src, __be32 dst, __u8 tos, __u8 ttl,
+ __be16 df, __be16 src_port, __be16 dst_port,
+ bool xnet, bool nocheck)
{
struct udphdr *uh;
@@ -89,10 +89,11 @@ int udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
uh->source = src_port;
uh->len = htons(skb->len);
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
udp_set_csum(nocheck, skb, src, dst, skb->len);
- return iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP,
- tos, ttl, df, xnet);
+ iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet);
}
EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 1e0c3c835a63..7b0edb37a115 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -259,7 +259,7 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
xfrm_dst_ifdown(dst, dev);
}
-static struct dst_ops xfrm4_dst_ops = {
+static struct dst_ops xfrm4_dst_ops_template = {
.family = AF_INET,
.gc = xfrm4_garbage_collect,
.update_pmtu = xfrm4_update_pmtu,
@@ -273,7 +273,7 @@ static struct dst_ops xfrm4_dst_ops = {
static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
.family = AF_INET,
- .dst_ops = &xfrm4_dst_ops,
+ .dst_ops = &xfrm4_dst_ops_template,
.dst_lookup = xfrm4_dst_lookup,
.get_saddr = xfrm4_get_saddr,
.decode_session = _decode_session4,
@@ -295,7 +295,7 @@ static struct ctl_table xfrm4_policy_table[] = {
{ }
};
-static int __net_init xfrm4_net_init(struct net *net)
+static int __net_init xfrm4_net_sysctl_init(struct net *net)
{
struct ctl_table *table;
struct ctl_table_header *hdr;
@@ -323,7 +323,7 @@ err_alloc:
return -ENOMEM;
}
-static void __net_exit xfrm4_net_exit(struct net *net)
+static void __net_exit xfrm4_net_sysctl_exit(struct net *net)
{
struct ctl_table *table;
@@ -335,12 +335,44 @@ static void __net_exit xfrm4_net_exit(struct net *net)
if (!net_eq(net, &init_net))
kfree(table);
}
+#else /* CONFIG_SYSCTL */
+static int inline xfrm4_net_sysctl_init(struct net *net)
+{
+ return 0;
+}
+
+static void inline xfrm4_net_sysctl_exit(struct net *net)
+{
+}
+#endif
+
+static int __net_init xfrm4_net_init(struct net *net)
+{
+ int ret;
+
+ memcpy(&net->xfrm.xfrm4_dst_ops, &xfrm4_dst_ops_template,
+ sizeof(xfrm4_dst_ops_template));
+ ret = dst_entries_init(&net->xfrm.xfrm4_dst_ops);
+ if (ret)
+ return ret;
+
+ ret = xfrm4_net_sysctl_init(net);
+ if (ret)
+ dst_entries_destroy(&net->xfrm.xfrm4_dst_ops);
+
+ return ret;
+}
+
+static void __net_exit xfrm4_net_exit(struct net *net)
+{
+ xfrm4_net_sysctl_exit(net);
+ dst_entries_destroy(&net->xfrm.xfrm4_dst_ops);
+}
static struct pernet_operations __net_initdata xfrm4_net_ops = {
.init = xfrm4_net_init,
.exit = xfrm4_net_exit,
};
-#endif
static void __init xfrm4_policy_init(void)
{
@@ -349,13 +381,9 @@ static void __init xfrm4_policy_init(void)
void __init xfrm4_init(void)
{
- dst_entries_init(&xfrm4_dst_ops);
-
xfrm4_state_init();
xfrm4_policy_init();
xfrm4_protocol_init();
-#ifdef CONFIG_SYSCTL
register_pernet_subsys(&xfrm4_net_ops);
-#endif
}
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 983bb999738c..40c897515ddc 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -69,6 +69,7 @@ config INET6_ESP
select CRYPTO_CBC
select CRYPTO_SHA1
select CRYPTO_DES
+ select CRYPTO_ECHAINIV
---help---
Support for IPsec ESP.
@@ -94,6 +95,7 @@ config IPV6_MIP6
config IPV6_ILA
tristate "IPv6: Identifier Locator Addressing (ILA)"
+ depends on NETFILTER
select LWTUNNEL
---help---
Support for IPv6 Identifier Locator Addressing (ILA).
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 2c900c7b7eb1..2fbd90bf8d33 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -34,7 +34,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
obj-$(CONFIG_IPV6_MIP6) += mip6.o
-obj-$(CONFIG_IPV6_ILA) += ila.o
+obj-$(CONFIG_IPV6_ILA) += ila/
obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_IPV6_VTI) += ip6_vti.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d72fa90d6feb..bdd7eac4307a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -70,7 +70,7 @@
#include <net/sock.h>
#include <net/snmp.h>
-#include <net/af_ieee802154.h>
+#include <net/6lowpan.h>
#include <net/firewire.h>
#include <net/ipv6.h>
#include <net/protocol.h>
@@ -350,6 +350,12 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
setup_timer(&ndev->rs_timer, addrconf_rs_timer,
(unsigned long)ndev);
memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf));
+
+ if (ndev->cnf.stable_secret.initialized)
+ ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
+ else
+ ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64;
+
ndev->cnf.mtu6 = dev->mtu;
ndev->cnf.sysctl = NULL;
ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
@@ -418,6 +424,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
if (err) {
ipv6_mc_destroy_dev(ndev);
del_timer(&ndev->regen_timer);
+ snmp6_unregister_dev(ndev);
goto err_release;
}
/* protected by rtnl_lock */
@@ -576,7 +583,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
if (err < 0)
goto errout;
- err = EINVAL;
+ err = -EINVAL;
if (!tb[NETCONFA_IFINDEX])
goto errout;
@@ -1765,12 +1772,13 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
{
+ if (dad_failed)
+ ifp->flags |= IFA_F_DADFAILED;
+
if (ifp->flags&IFA_F_PERMANENT) {
spin_lock_bh(&ifp->lock);
addrconf_del_dad_work(ifp);
ifp->flags |= IFA_F_TENTATIVE;
- if (dad_failed)
- ifp->flags |= IFA_F_DADFAILED;
spin_unlock_bh(&ifp->lock);
if (dad_failed)
ipv6_ifa_notify(0, ifp);
@@ -1946,9 +1954,9 @@ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev)
{
- if (dev->addr_len != IEEE802154_ADDR_LEN)
+ if (dev->addr_len != EUI64_ADDR_LEN)
return -1;
- memcpy(eui, dev->dev_addr, 8);
+ memcpy(eui, dev->dev_addr, EUI64_ADDR_LEN);
eui[0] ^= 2;
return 0;
}
@@ -2040,7 +2048,6 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
case ARPHRD_IPGRE:
return addrconf_ifid_gre(eui, dev);
case ARPHRD_6LOWPAN:
- case ARPHRD_IEEE802154:
return addrconf_ifid_eui64(eui, dev);
case ARPHRD_IEEE1394:
return addrconf_ifid_ieee1394(eui, dev);
@@ -2313,6 +2320,12 @@ static void manage_tempaddrs(struct inet6_dev *idev,
}
}
+static bool is_addr_mode_generate_stable(struct inet6_dev *idev)
+{
+ return idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY ||
+ idev->addr_gen_mode == IN6_ADDR_GEN_MODE_RANDOM;
+}
+
void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
{
struct prefix_info *pinfo;
@@ -2426,8 +2439,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
in6_dev->token.s6_addr + 8, 8);
read_unlock_bh(&in6_dev->lock);
tokenized = true;
- } else if (in6_dev->addr_gen_mode ==
- IN6_ADDR_GEN_MODE_STABLE_PRIVACY &&
+ } else if (is_addr_mode_generate_stable(in6_dev) &&
!ipv6_generate_stable_address(&addr, 0,
in6_dev)) {
addr_flags |= IFA_F_STABLE_PRIVACY;
@@ -2454,7 +2466,7 @@ ok:
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
if (in6_dev->cnf.optimistic_dad &&
!net->ipv6.devconf_all->forwarding && sllao)
- addr_flags = IFA_F_OPTIMISTIC;
+ addr_flags |= IFA_F_OPTIMISTIC;
#endif
/* Do not allow to create too much of autoconfigured
@@ -3027,6 +3039,17 @@ retry:
return 0;
}
+static void ipv6_gen_mode_random_init(struct inet6_dev *idev)
+{
+ struct ipv6_stable_secret *s = &idev->cnf.stable_secret;
+
+ if (s->initialized)
+ return;
+ s = &idev->cnf.stable_secret;
+ get_random_bytes(&s->secret, sizeof(s->secret));
+ s->initialized = true;
+}
+
static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
{
struct in6_addr addr;
@@ -3037,13 +3060,18 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
- if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) {
+ switch (idev->addr_gen_mode) {
+ case IN6_ADDR_GEN_MODE_RANDOM:
+ ipv6_gen_mode_random_init(idev);
+ /* fallthrough */
+ case IN6_ADDR_GEN_MODE_STABLE_PRIVACY:
if (!ipv6_generate_stable_address(&addr, 0, idev))
addrconf_add_linklocal(idev, &addr,
IFA_F_STABLE_PRIVACY);
else if (prefix_route)
addrconf_prefix_route(&addr, 64, idev->dev, 0, 0);
- } else if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) {
+ break;
+ case IN6_ADDR_GEN_MODE_EUI64:
/* addrconf_add_linklocal also adds a prefix_route and we
* only need to care about prefix routes if ipv6_generate_eui64
* couldn't generate one.
@@ -3052,6 +3080,11 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
addrconf_add_linklocal(idev, &addr, 0);
else if (prefix_route)
addrconf_prefix_route(&addr, 64, idev->dev, 0, 0);
+ break;
+ case IN6_ADDR_GEN_MODE_NONE:
+ default:
+ /* will not add any link local address */
+ break;
}
}
@@ -3065,10 +3098,10 @@ static void addrconf_dev_config(struct net_device *dev)
(dev->type != ARPHRD_FDDI) &&
(dev->type != ARPHRD_ARCNET) &&
(dev->type != ARPHRD_INFINIBAND) &&
- (dev->type != ARPHRD_IEEE802154) &&
(dev->type != ARPHRD_IEEE1394) &&
(dev->type != ARPHRD_TUNNEL6) &&
- (dev->type != ARPHRD_6LOWPAN)) {
+ (dev->type != ARPHRD_6LOWPAN) &&
+ (dev->type != ARPHRD_NONE)) {
/* Alas, we support only Ethernet autoconfiguration. */
return;
}
@@ -3077,6 +3110,11 @@ static void addrconf_dev_config(struct net_device *dev)
if (IS_ERR(idev))
return;
+ /* this device type has no EUI support */
+ if (dev->type == ARPHRD_NONE &&
+ idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)
+ idev->addr_gen_mode = IN6_ADDR_GEN_MODE_RANDOM;
+
addrconf_addr_gen(idev, false);
}
@@ -3286,7 +3324,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
case NETDEV_PRE_TYPE_CHANGE:
case NETDEV_POST_TYPE_CHANGE:
- addrconf_type_change(dev, event);
+ if (idev)
+ addrconf_type_change(dev, event);
break;
}
@@ -3499,6 +3538,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
{
struct inet6_dev *idev = ifp->idev;
struct net_device *dev = idev->dev;
+ bool notify = false;
addrconf_join_solict(dev, &ifp->addr);
@@ -3544,7 +3584,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
/* Because optimistic nodes can use this address,
* notify listeners. If DAD fails, RTM_DELADDR is sent.
*/
- ipv6_ifa_notify(RTM_NEWADDR, ifp);
+ notify = true;
}
}
@@ -3552,6 +3592,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
out:
spin_unlock(&ifp->lock);
read_unlock_bh(&idev->lock);
+ if (notify)
+ ipv6_ifa_notify(RTM_NEWADDR, ifp);
}
static void addrconf_dad_start(struct inet6_ifaddr *ifp)
@@ -3641,7 +3683,7 @@ static void addrconf_dad_work(struct work_struct *w)
/* send a neighbour solicitation for our addr */
addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
- ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any, NULL);
+ ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any);
out:
in6_ifa_put(ifp);
rtnl_unlock();
@@ -4920,7 +4962,8 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
if (mode != IN6_ADDR_GEN_MODE_EUI64 &&
mode != IN6_ADDR_GEN_MODE_NONE &&
- mode != IN6_ADDR_GEN_MODE_STABLE_PRIVACY)
+ mode != IN6_ADDR_GEN_MODE_STABLE_PRIVACY &&
+ mode != IN6_ADDR_GEN_MODE_RANDOM)
return -EINVAL;
if (mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY &&
@@ -5199,6 +5242,20 @@ int addrconf_sysctl_forward(struct ctl_table *ctl, int write,
}
static
+int addrconf_sysctl_hop_limit(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table lctl;
+ int min_hl = 1, max_hl = 255;
+
+ lctl = *ctl;
+ lctl.extra1 = &min_hl;
+ lctl.extra2 = &max_hl;
+
+ return proc_dointvec_minmax(&lctl, write, buffer, lenp, ppos);
+}
+
+static
int addrconf_sysctl_mtu(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
@@ -5362,13 +5419,10 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write,
goto out;
}
- if (!write) {
- err = snprintf(str, sizeof(str), "%pI6",
- &secret->secret);
- if (err >= sizeof(str)) {
- err = -EIO;
- goto out;
- }
+ err = snprintf(str, sizeof(str), "%pI6", &secret->secret);
+ if (err >= sizeof(str)) {
+ err = -EIO;
+ goto out;
}
err = proc_dostring(&lctl, write, buffer, lenp, ppos);
@@ -5453,7 +5507,7 @@ static struct addrconf_sysctl_table
.data = &ipv6_devconf.hop_limit,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_sysctl_hop_limit,
},
{
.procname = "mtu",
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 882124ebb438..a8f6986dcbe5 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -552,7 +552,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh)
rcu_read_lock();
p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
- if (p && ip6addrlbl_hold(p))
+ if (p && !ip6addrlbl_hold(p))
p = NULL;
lseq = ip6addrlbl_table.seq;
rcu_read_unlock();
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 44bb66bde0e2..9f5137cd604e 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -109,6 +109,9 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
int try_loading_module = 0;
int err;
+ if (protocol < 0 || protocol >= IPPROTO_MAX)
+ return -EINVAL;
+
/* Look for the requested type/protocol pair. */
lookup_protocol:
err = -ESOCKTNOSUPPORT;
@@ -428,9 +431,11 @@ void inet6_destroy_sock(struct sock *sk)
/* Free tx options */
- opt = xchg(&np->opt, NULL);
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
}
EXPORT_SYMBOL_GPL(inet6_destroy_sock);
@@ -659,7 +664,10 @@ int inet6_sk_rebuild_header(struct sock *sk)
fl6.fl6_sport = inet->inet_sport;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt),
+ &final);
+ rcu_read_unlock();
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
@@ -668,7 +676,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
return PTR_ERR(dst);
}
- __ip6_dst_store(sk, dst, NULL, NULL);
+ ip6_dst_store(sk, dst, NULL, NULL);
}
return 0;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index d70b0238f468..428162155280 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -162,13 +162,18 @@ ipv4_connected:
fl6.fl6_dport = inet->inet_dport;
fl6.fl6_sport = inet->inet_sport;
+ if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
+
if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST))
fl6.flowi6_oif = np->mcast_oif;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- opt = flowlabel ? flowlabel->opt : np->opt;
+ rcu_read_lock();
+ opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt);
final_p = fl6_update_dst(&fl6, opt, &final);
+ rcu_read_unlock();
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
err = 0;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index ce203b0402be..ea7c4d64a00a 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -727,6 +727,7 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
*((char **)&opt2->dst1opt) += dif;
if (opt2->srcrt)
*((char **)&opt2->srcrt) += dif;
+ atomic_set(&opt2->refcnt, 1);
}
return opt2;
}
@@ -790,7 +791,7 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
return ERR_PTR(-ENOBUFS);
memset(opt2, 0, tot_len);
-
+ atomic_set(&opt2->refcnt, 1);
opt2->tot_len = tot_len;
p = (char *)(opt2 + 1);
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 5c5d23e59da5..9508a20fbf61 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -257,7 +257,11 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
*fragoff = _frag_off;
return hp->nexthdr;
}
- return -ENOENT;
+ if (!found)
+ return -ENOENT;
+ if (fragoff)
+ *fragoff = _frag_off;
+ break;
}
hdrlen = 8;
} else if (nexthdr == NEXTHDR_AUTH) {
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 36c5a98b0472..0a37ddc7af51 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -834,11 +834,6 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
}
-/*
- * Special lock-class for __icmpv6_sk:
- */
-static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
-
static int __net_init icmpv6_sk_init(struct net *net)
{
struct sock *sk;
@@ -860,15 +855,6 @@ static int __net_init icmpv6_sk_init(struct net *net)
net->ipv6.icmp_sk[i] = sk;
- /*
- * Split off their lock-class, because sk->sk_dst_lock
- * gets used from softirqs, which is safe for
- * __icmpv6_sk (because those never get directly used
- * via userspace syscalls), but unsafe for normal sockets.
- */
- lockdep_set_class(&sk->sk_dst_lock,
- &icmpv6_socket_sk_dst_lock_key);
-
/* Enough space for 2 64K ICMP packets, including
* sk_buff struct overhead.
*/
diff --git a/net/ipv6/ila/Makefile b/net/ipv6/ila/Makefile
new file mode 100644
index 000000000000..4b32e5921e5c
--- /dev/null
+++ b/net/ipv6/ila/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for ILA module
+#
+
+obj-$(CONFIG_IPV6_ILA) += ila.o
+
+ila-objs := ila_common.o ila_lwt.o ila_xlat.o
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
new file mode 100644
index 000000000000..28542cb2b387
--- /dev/null
+++ b/net/ipv6/ila/ila.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2015 Tom Herbert <tom@herbertland.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ */
+
+#ifndef __ILA_H
+#define __ILA_H
+
+#include <linux/errno.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <net/checksum.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <uapi/linux/ila.h>
+
+struct ila_params {
+ __be64 locator;
+ __be64 locator_match;
+ __wsum csum_diff;
+};
+
+static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
+{
+ __be32 diff[] = {
+ ~from[0], ~from[1], to[0], to[1],
+ };
+
+ return csum_partial(diff, sizeof(diff), 0);
+}
+
+void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p);
+
+int ila_lwt_init(void);
+void ila_lwt_fini(void);
+int ila_xlat_init(void);
+void ila_xlat_fini(void);
+
+#endif /* __ILA_H */
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
new file mode 100644
index 000000000000..32dc9aab7297
--- /dev/null
+++ b/net/ipv6/ila/ila_common.c
@@ -0,0 +1,103 @@
+#include <linux/errno.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/types.h>
+#include <net/checksum.h>
+#include <net/ip.h>
+#include <net/ip6_fib.h>
+#include <net/lwtunnel.h>
+#include <net/protocol.h>
+#include <uapi/linux/ila.h>
+#include "ila.h"
+
+static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
+{
+ if (*(__be64 *)&ip6h->daddr == p->locator_match)
+ return p->csum_diff;
+ else
+ return compute_csum_diff8((__be32 *)&ip6h->daddr,
+ (__be32 *)&p->locator);
+}
+
+void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
+{
+ __wsum diff;
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ size_t nhoff = sizeof(struct ipv6hdr);
+
+ /* First update checksum */
+ switch (ip6h->nexthdr) {
+ case NEXTHDR_TCP:
+ if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) {
+ struct tcphdr *th = (struct tcphdr *)
+ (skb_network_header(skb) + nhoff);
+
+ diff = get_csum_diff(ip6h, p);
+ inet_proto_csum_replace_by_diff(&th->check, skb,
+ diff, true);
+ }
+ break;
+ case NEXTHDR_UDP:
+ if (likely(pskb_may_pull(skb, nhoff + sizeof(struct udphdr)))) {
+ struct udphdr *uh = (struct udphdr *)
+ (skb_network_header(skb) + nhoff);
+
+ if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ diff = get_csum_diff(ip6h, p);
+ inet_proto_csum_replace_by_diff(&uh->check, skb,
+ diff, true);
+ if (!uh->check)
+ uh->check = CSUM_MANGLED_0;
+ }
+ }
+ break;
+ case NEXTHDR_ICMP:
+ if (likely(pskb_may_pull(skb,
+ nhoff + sizeof(struct icmp6hdr)))) {
+ struct icmp6hdr *ih = (struct icmp6hdr *)
+ (skb_network_header(skb) + nhoff);
+
+ diff = get_csum_diff(ip6h, p);
+ inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb,
+ diff, true);
+ }
+ break;
+ }
+
+ /* Now change destination address */
+ *(__be64 *)&ip6h->daddr = p->locator;
+}
+
+static int __init ila_init(void)
+{
+ int ret;
+
+ ret = ila_lwt_init();
+
+ if (ret)
+ goto fail_lwt;
+
+ ret = ila_xlat_init();
+ if (ret)
+ goto fail_xlat;
+
+ return 0;
+fail_xlat:
+ ila_lwt_fini();
+fail_lwt:
+ return ret;
+}
+
+static void __exit ila_fini(void)
+{
+ ila_xlat_fini();
+ ila_lwt_fini();
+}
+
+module_init(ila_init);
+module_exit(ila_fini);
+MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>");
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/ila.c b/net/ipv6/ila/ila_lwt.c
index 1a6852e1ac69..2ae3c4fd8aab 100644
--- a/net/ipv6/ila.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -11,12 +11,7 @@
#include <net/lwtunnel.h>
#include <net/protocol.h>
#include <uapi/linux/ila.h>
-
-struct ila_params {
- __be64 locator;
- __be64 locator_match;
- __wsum csum_diff;
-};
+#include "ila.h"
static inline struct ila_params *ila_params_lwtunnel(
struct lwtunnel_state *lwstate)
@@ -24,73 +19,6 @@ static inline struct ila_params *ila_params_lwtunnel(
return (struct ila_params *)lwstate->data;
}
-static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
-{
- __be32 diff[] = {
- ~from[0], ~from[1], to[0], to[1],
- };
-
- return csum_partial(diff, sizeof(diff), 0);
-}
-
-static inline __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
-{
- if (*(__be64 *)&ip6h->daddr == p->locator_match)
- return p->csum_diff;
- else
- return compute_csum_diff8((__be32 *)&ip6h->daddr,
- (__be32 *)&p->locator);
-}
-
-static void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
-{
- __wsum diff;
- struct ipv6hdr *ip6h = ipv6_hdr(skb);
- size_t nhoff = sizeof(struct ipv6hdr);
-
- /* First update checksum */
- switch (ip6h->nexthdr) {
- case NEXTHDR_TCP:
- if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) {
- struct tcphdr *th = (struct tcphdr *)
- (skb_network_header(skb) + nhoff);
-
- diff = get_csum_diff(ip6h, p);
- inet_proto_csum_replace_by_diff(&th->check, skb,
- diff, true);
- }
- break;
- case NEXTHDR_UDP:
- if (likely(pskb_may_pull(skb, nhoff + sizeof(struct udphdr)))) {
- struct udphdr *uh = (struct udphdr *)
- (skb_network_header(skb) + nhoff);
-
- if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
- diff = get_csum_diff(ip6h, p);
- inet_proto_csum_replace_by_diff(&uh->check, skb,
- diff, true);
- if (!uh->check)
- uh->check = CSUM_MANGLED_0;
- }
- }
- break;
- case NEXTHDR_ICMP:
- if (likely(pskb_may_pull(skb,
- nhoff + sizeof(struct icmp6hdr)))) {
- struct icmp6hdr *ih = (struct icmp6hdr *)
- (skb_network_header(skb) + nhoff);
-
- diff = get_csum_diff(ip6h, p);
- inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb,
- diff, true);
- }
- break;
- }
-
- /* Now change destination address */
- *(__be64 *)&ip6h->daddr = p->locator;
-}
-
static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -213,17 +141,12 @@ static const struct lwtunnel_encap_ops ila_encap_ops = {
.cmp_encap = ila_encap_cmp,
};
-static int __init ila_init(void)
+int ila_lwt_init(void)
{
return lwtunnel_encap_add_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA);
}
-static void __exit ila_fini(void)
+void ila_lwt_fini(void)
{
lwtunnel_encap_del_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA);
}
-
-module_init(ila_init);
-module_exit(ila_fini);
-MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>");
-MODULE_LICENSE("GPL");
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
new file mode 100644
index 000000000000..295ca29a23c3
--- /dev/null
+++ b/net/ipv6/ila/ila_xlat.c
@@ -0,0 +1,680 @@
+#include <linux/jhash.h>
+#include <linux/netfilter.h>
+#include <linux/rcupdate.h>
+#include <linux/rhashtable.h>
+#include <linux/vmalloc.h>
+#include <net/genetlink.h>
+#include <net/ila.h>
+#include <net/netns/generic.h>
+#include <uapi/linux/genetlink.h>
+#include "ila.h"
+
+struct ila_xlat_params {
+ struct ila_params ip;
+ __be64 identifier;
+ int ifindex;
+ unsigned int dir;
+};
+
+struct ila_map {
+ struct ila_xlat_params p;
+ struct rhash_head node;
+ struct ila_map __rcu *next;
+ struct rcu_head rcu;
+};
+
+static unsigned int ila_net_id;
+
+struct ila_net {
+ struct rhashtable rhash_table;
+ spinlock_t *locks; /* Bucket locks for entry manipulation */
+ unsigned int locks_mask;
+ bool hooks_registered;
+};
+
+#define LOCKS_PER_CPU 10
+
+static int alloc_ila_locks(struct ila_net *ilan)
+{
+ unsigned int i, size;
+ unsigned int nr_pcpus = num_possible_cpus();
+
+ nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
+ size = roundup_pow_of_two(nr_pcpus * LOCKS_PER_CPU);
+
+ if (sizeof(spinlock_t) != 0) {
+#ifdef CONFIG_NUMA
+ if (size * sizeof(spinlock_t) > PAGE_SIZE)
+ ilan->locks = vmalloc(size * sizeof(spinlock_t));
+ else
+#endif
+ ilan->locks = kmalloc_array(size, sizeof(spinlock_t),
+ GFP_KERNEL);
+ if (!ilan->locks)
+ return -ENOMEM;
+ for (i = 0; i < size; i++)
+ spin_lock_init(&ilan->locks[i]);
+ }
+ ilan->locks_mask = size - 1;
+
+ return 0;
+}
+
+static u32 hashrnd __read_mostly;
+static __always_inline void __ila_hash_secret_init(void)
+{
+ net_get_random_once(&hashrnd, sizeof(hashrnd));
+}
+
+static inline u32 ila_identifier_hash(__be64 identifier)
+{
+ u32 *v = (u32 *)&identifier;
+
+ return jhash_2words(v[0], v[1], hashrnd);
+}
+
+static inline spinlock_t *ila_get_lock(struct ila_net *ilan, __be64 identifier)
+{
+ return &ilan->locks[ila_identifier_hash(identifier) & ilan->locks_mask];
+}
+
+static inline int ila_cmp_wildcards(struct ila_map *ila, __be64 loc,
+ int ifindex, unsigned int dir)
+{
+ return (ila->p.ip.locator_match && ila->p.ip.locator_match != loc) ||
+ (ila->p.ifindex && ila->p.ifindex != ifindex) ||
+ !(ila->p.dir & dir);
+}
+
+static inline int ila_cmp_params(struct ila_map *ila, struct ila_xlat_params *p)
+{
+ return (ila->p.ip.locator_match != p->ip.locator_match) ||
+ (ila->p.ifindex != p->ifindex) ||
+ (ila->p.dir != p->dir);
+}
+
+static int ila_cmpfn(struct rhashtable_compare_arg *arg,
+ const void *obj)
+{
+ const struct ila_map *ila = obj;
+
+ return (ila->p.identifier != *(__be64 *)arg->key);
+}
+
+static inline int ila_order(struct ila_map *ila)
+{
+ int score = 0;
+
+ if (ila->p.ip.locator_match)
+ score += 1 << 0;
+
+ if (ila->p.ifindex)
+ score += 1 << 1;
+
+ return score;
+}
+
+static const struct rhashtable_params rht_params = {
+ .nelem_hint = 1024,
+ .head_offset = offsetof(struct ila_map, node),
+ .key_offset = offsetof(struct ila_map, p.identifier),
+ .key_len = sizeof(u64), /* identifier */
+ .max_size = 1048576,
+ .min_size = 256,
+ .automatic_shrinking = true,
+ .obj_cmpfn = ila_cmpfn,
+};
+
+static struct genl_family ila_nl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = 0,
+ .name = ILA_GENL_NAME,
+ .version = ILA_GENL_VERSION,
+ .maxattr = ILA_ATTR_MAX,
+ .netnsok = true,
+ .parallel_ops = true,
+};
+
+static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
+ [ILA_ATTR_IDENTIFIER] = { .type = NLA_U64, },
+ [ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
+ [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, },
+ [ILA_ATTR_IFINDEX] = { .type = NLA_U32, },
+ [ILA_ATTR_DIR] = { .type = NLA_U32, },
+};
+
+static int parse_nl_config(struct genl_info *info,
+ struct ila_xlat_params *p)
+{
+ memset(p, 0, sizeof(*p));
+
+ if (info->attrs[ILA_ATTR_IDENTIFIER])
+ p->identifier = (__force __be64)nla_get_u64(
+ info->attrs[ILA_ATTR_IDENTIFIER]);
+
+ if (info->attrs[ILA_ATTR_LOCATOR])
+ p->ip.locator = (__force __be64)nla_get_u64(
+ info->attrs[ILA_ATTR_LOCATOR]);
+
+ if (info->attrs[ILA_ATTR_LOCATOR_MATCH])
+ p->ip.locator_match = (__force __be64)nla_get_u64(
+ info->attrs[ILA_ATTR_LOCATOR_MATCH]);
+
+ if (info->attrs[ILA_ATTR_IFINDEX])
+ p->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]);
+
+ if (info->attrs[ILA_ATTR_DIR])
+ p->dir = nla_get_u32(info->attrs[ILA_ATTR_DIR]);
+
+ return 0;
+}
+
+/* Must be called with rcu readlock */
+static inline struct ila_map *ila_lookup_wildcards(__be64 id, __be64 loc,
+ int ifindex,
+ unsigned int dir,
+ struct ila_net *ilan)
+{
+ struct ila_map *ila;
+
+ ila = rhashtable_lookup_fast(&ilan->rhash_table, &id, rht_params);
+ while (ila) {
+ if (!ila_cmp_wildcards(ila, loc, ifindex, dir))
+ return ila;
+ ila = rcu_access_pointer(ila->next);
+ }
+
+ return NULL;
+}
+
+/* Must be called with rcu readlock */
+static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *p,
+ struct ila_net *ilan)
+{
+ struct ila_map *ila;
+
+ ila = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier,
+ rht_params);
+ while (ila) {
+ if (!ila_cmp_params(ila, p))
+ return ila;
+ ila = rcu_access_pointer(ila->next);
+ }
+
+ return NULL;
+}
+
+static inline void ila_release(struct ila_map *ila)
+{
+ kfree_rcu(ila, rcu);
+}
+
+static void ila_free_cb(void *ptr, void *arg)
+{
+ struct ila_map *ila = (struct ila_map *)ptr, *next;
+
+ /* Assume rcu_readlock held */
+ while (ila) {
+ next = rcu_access_pointer(ila->next);
+ ila_release(ila);
+ ila = next;
+ }
+}
+
+static int ila_xlat_addr(struct sk_buff *skb, int dir);
+
+static unsigned int
+ila_nf_input(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ ila_xlat_addr(skb, ILA_DIR_IN);
+ return NF_ACCEPT;
+}
+
+static struct nf_hook_ops ila_nf_hook_ops[] __read_mostly = {
+ {
+ .hook = ila_nf_input,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = -1,
+ },
+};
+
+static int ila_add_mapping(struct net *net, struct ila_xlat_params *p)
+{
+ struct ila_net *ilan = net_generic(net, ila_net_id);
+ struct ila_map *ila, *head;
+ spinlock_t *lock = ila_get_lock(ilan, p->identifier);
+ int err = 0, order;
+
+ if (!ilan->hooks_registered) {
+ /* We defer registering net hooks in the namespace until the
+ * first mapping is added.
+ */
+ err = nf_register_net_hooks(net, ila_nf_hook_ops,
+ ARRAY_SIZE(ila_nf_hook_ops));
+ if (err)
+ return err;
+
+ ilan->hooks_registered = true;
+ }
+
+ ila = kzalloc(sizeof(*ila), GFP_KERNEL);
+ if (!ila)
+ return -ENOMEM;
+
+ ila->p = *p;
+
+ if (p->ip.locator_match) {
+ /* Precompute checksum difference for translation since we
+ * know both the old identifier and the new one.
+ */
+ ila->p.ip.csum_diff = compute_csum_diff8(
+ (__be32 *)&p->ip.locator_match,
+ (__be32 *)&p->ip.locator);
+ }
+
+ order = ila_order(ila);
+
+ spin_lock(lock);
+
+ head = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier,
+ rht_params);
+ if (!head) {
+ /* New entry for the rhash_table */
+ err = rhashtable_lookup_insert_fast(&ilan->rhash_table,
+ &ila->node, rht_params);
+ } else {
+ struct ila_map *tila = head, *prev = NULL;
+
+ do {
+ if (!ila_cmp_params(tila, p)) {
+ err = -EEXIST;
+ goto out;
+ }
+
+ if (order > ila_order(tila))
+ break;
+
+ prev = tila;
+ tila = rcu_dereference_protected(tila->next,
+ lockdep_is_held(lock));
+ } while (tila);
+
+ if (prev) {
+ /* Insert in sub list of head */
+ RCU_INIT_POINTER(ila->next, tila);
+ rcu_assign_pointer(prev->next, ila);
+ } else {
+ /* Make this ila new head */
+ RCU_INIT_POINTER(ila->next, head);
+ err = rhashtable_replace_fast(&ilan->rhash_table,
+ &head->node,
+ &ila->node, rht_params);
+ if (err)
+ goto out;
+ }
+ }
+
+out:
+ spin_unlock(lock);
+
+ if (err)
+ kfree(ila);
+
+ return err;
+}
+
+static int ila_del_mapping(struct net *net, struct ila_xlat_params *p)
+{
+ struct ila_net *ilan = net_generic(net, ila_net_id);
+ struct ila_map *ila, *head, *prev;
+ spinlock_t *lock = ila_get_lock(ilan, p->identifier);
+ int err = -ENOENT;
+
+ spin_lock(lock);
+
+ head = rhashtable_lookup_fast(&ilan->rhash_table,
+ &p->identifier, rht_params);
+ ila = head;
+
+ prev = NULL;
+
+ while (ila) {
+ if (ila_cmp_params(ila, p)) {
+ prev = ila;
+ ila = rcu_dereference_protected(ila->next,
+ lockdep_is_held(lock));
+ continue;
+ }
+
+ err = 0;
+
+ if (prev) {
+ /* Not head, just delete from list */
+ rcu_assign_pointer(prev->next, ila->next);
+ } else {
+ /* It is the head. If there is something in the
+ * sublist we need to make a new head.
+ */
+ head = rcu_dereference_protected(ila->next,
+ lockdep_is_held(lock));
+ if (head) {
+ /* Put first entry in the sublist into the
+ * table
+ */
+ err = rhashtable_replace_fast(
+ &ilan->rhash_table, &ila->node,
+ &head->node, rht_params);
+ if (err)
+ goto out;
+ } else {
+ /* Entry no longer used */
+ err = rhashtable_remove_fast(&ilan->rhash_table,
+ &ila->node,
+ rht_params);
+ }
+ }
+
+ ila_release(ila);
+
+ break;
+ }
+
+out:
+ spin_unlock(lock);
+
+ return err;
+}
+
+static int ila_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ struct ila_xlat_params p;
+ int err;
+
+ err = parse_nl_config(info, &p);
+ if (err)
+ return err;
+
+ return ila_add_mapping(net, &p);
+}
+
+static int ila_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ struct ila_xlat_params p;
+ int err;
+
+ err = parse_nl_config(info, &p);
+ if (err)
+ return err;
+
+ ila_del_mapping(net, &p);
+
+ return 0;
+}
+
+static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg)
+{
+ if (nla_put_u64(msg, ILA_ATTR_IDENTIFIER,
+ (__force u64)ila->p.identifier) ||
+ nla_put_u64(msg, ILA_ATTR_LOCATOR,
+ (__force u64)ila->p.ip.locator) ||
+ nla_put_u64(msg, ILA_ATTR_LOCATOR_MATCH,
+ (__force u64)ila->p.ip.locator_match) ||
+ nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->p.ifindex) ||
+ nla_put_u32(msg, ILA_ATTR_DIR, ila->p.dir))
+ return -1;
+
+ return 0;
+}
+
+static int ila_dump_info(struct ila_map *ila,
+ u32 portid, u32 seq, u32 flags,
+ struct sk_buff *skb, u8 cmd)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(skb, portid, seq, &ila_nl_family, flags, cmd);
+ if (!hdr)
+ return -ENOMEM;
+
+ if (ila_fill_info(ila, skb) < 0)
+ goto nla_put_failure;
+
+ genlmsg_end(skb, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+static int ila_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ struct ila_net *ilan = net_generic(net, ila_net_id);
+ struct sk_buff *msg;
+ struct ila_xlat_params p;
+ struct ila_map *ila;
+ int ret;
+
+ ret = parse_nl_config(info, &p);
+ if (ret)
+ return ret;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ rcu_read_lock();
+
+ ila = ila_lookup_by_params(&p, ilan);
+ if (ila) {
+ ret = ila_dump_info(ila,
+ info->snd_portid,
+ info->snd_seq, 0, msg,
+ info->genlhdr->cmd);
+ }
+
+ rcu_read_unlock();
+
+ if (ret < 0)
+ goto out_free;
+
+ return genlmsg_reply(msg, info);
+
+out_free:
+ nlmsg_free(msg);
+ return ret;
+}
+
+struct ila_dump_iter {
+ struct rhashtable_iter rhiter;
+};
+
+static int ila_nl_dump_start(struct netlink_callback *cb)
+{
+ struct net *net = sock_net(cb->skb->sk);
+ struct ila_net *ilan = net_generic(net, ila_net_id);
+ struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args;
+
+ return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter);
+}
+
+static int ila_nl_dump_done(struct netlink_callback *cb)
+{
+ struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args;
+
+ rhashtable_walk_exit(&iter->rhiter);
+
+ return 0;
+}
+
+static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args;
+ struct rhashtable_iter *rhiter = &iter->rhiter;
+ struct ila_map *ila;
+ int ret;
+
+ ret = rhashtable_walk_start(rhiter);
+ if (ret && ret != -EAGAIN)
+ goto done;
+
+ for (;;) {
+ ila = rhashtable_walk_next(rhiter);
+
+ if (IS_ERR(ila)) {
+ if (PTR_ERR(ila) == -EAGAIN)
+ continue;
+ ret = PTR_ERR(ila);
+ goto done;
+ } else if (!ila) {
+ break;
+ }
+
+ while (ila) {
+ ret = ila_dump_info(ila, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ skb, ILA_CMD_GET);
+ if (ret)
+ goto done;
+
+ ila = rcu_access_pointer(ila->next);
+ }
+ }
+
+ ret = skb->len;
+
+done:
+ rhashtable_walk_stop(rhiter);
+ return ret;
+}
+
+static const struct genl_ops ila_nl_ops[] = {
+ {
+ .cmd = ILA_CMD_ADD,
+ .doit = ila_nl_cmd_add_mapping,
+ .policy = ila_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = ILA_CMD_DEL,
+ .doit = ila_nl_cmd_del_mapping,
+ .policy = ila_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = ILA_CMD_GET,
+ .doit = ila_nl_cmd_get_mapping,
+ .start = ila_nl_dump_start,
+ .dumpit = ila_nl_dump,
+ .done = ila_nl_dump_done,
+ .policy = ila_nl_policy,
+ },
+};
+
+#define ILA_HASH_TABLE_SIZE 1024
+
+static __net_init int ila_init_net(struct net *net)
+{
+ int err;
+ struct ila_net *ilan = net_generic(net, ila_net_id);
+
+ err = alloc_ila_locks(ilan);
+ if (err)
+ return err;
+
+ rhashtable_init(&ilan->rhash_table, &rht_params);
+
+ return 0;
+}
+
+static __net_exit void ila_exit_net(struct net *net)
+{
+ struct ila_net *ilan = net_generic(net, ila_net_id);
+
+ rhashtable_free_and_destroy(&ilan->rhash_table, ila_free_cb, NULL);
+
+ kvfree(ilan->locks);
+
+ if (ilan->hooks_registered)
+ nf_unregister_net_hooks(net, ila_nf_hook_ops,
+ ARRAY_SIZE(ila_nf_hook_ops));
+}
+
+static struct pernet_operations ila_net_ops = {
+ .init = ila_init_net,
+ .exit = ila_exit_net,
+ .id = &ila_net_id,
+ .size = sizeof(struct ila_net),
+};
+
+static int ila_xlat_addr(struct sk_buff *skb, int dir)
+{
+ struct ila_map *ila;
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ struct net *net = dev_net(skb->dev);
+ struct ila_net *ilan = net_generic(net, ila_net_id);
+ __be64 identifier, locator_match;
+ size_t nhoff;
+
+ /* Assumes skb contains a valid IPv6 header that is pulled */
+
+ identifier = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[8];
+ locator_match = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[0];
+ nhoff = sizeof(struct ipv6hdr);
+
+ rcu_read_lock();
+
+ ila = ila_lookup_wildcards(identifier, locator_match,
+ skb->dev->ifindex, dir, ilan);
+ if (ila)
+ update_ipv6_locator(skb, &ila->p.ip);
+
+ rcu_read_unlock();
+
+ return 0;
+}
+
+int ila_xlat_incoming(struct sk_buff *skb)
+{
+ return ila_xlat_addr(skb, ILA_DIR_IN);
+}
+EXPORT_SYMBOL(ila_xlat_incoming);
+
+int ila_xlat_outgoing(struct sk_buff *skb)
+{
+ return ila_xlat_addr(skb, ILA_DIR_OUT);
+}
+EXPORT_SYMBOL(ila_xlat_outgoing);
+
+int ila_xlat_init(void)
+{
+ int ret;
+
+ ret = register_pernet_device(&ila_net_ops);
+ if (ret)
+ goto exit;
+
+ ret = genl_register_family_with_ops(&ila_nl_family,
+ ila_nl_ops);
+ if (ret < 0)
+ goto unregister;
+
+ return 0;
+
+unregister:
+ unregister_pernet_device(&ila_net_ops);
+exit:
+ return ret;
+}
+
+void ila_xlat_fini(void)
+{
+ genl_unregister_family(&ila_nl_family);
+ unregister_pernet_device(&ila_net_ops);
+}
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 5d1c7cee2cb2..36c3f0155010 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -51,12 +51,12 @@ int inet6_csk_bind_conflict(const struct sock *sk,
(sk2->sk_state != TCP_TIME_WAIT &&
!uid_eq(uid,
sock_i_uid((struct sock *)sk2))))) {
- if (ipv6_rcv_saddr_equal(sk, sk2))
+ if (ipv6_rcv_saddr_equal(sk, sk2, true))
break;
}
if (!relax && reuse && sk2->sk_reuse &&
sk2->sk_state != TCP_LISTEN &&
- ipv6_rcv_saddr_equal(sk, sk2))
+ ipv6_rcv_saddr_equal(sk, sk2, true))
break;
}
}
@@ -78,7 +78,9 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk,
memset(fl6, 0, sizeof(*fl6));
fl6->flowi6_proto = proto;
fl6->daddr = ireq->ir_v6_rmt_addr;
- final_p = fl6_update_dst(fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();
fl6->saddr = ireq->ir_v6_loc_addr;
fl6->flowi6_oif = ireq->ir_iif;
fl6->flowi6_mark = ireq->ir_mark;
@@ -109,14 +111,6 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
static inline
-void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr)
-{
- __ip6_dst_store(sk, dst, daddr, saddr);
-}
-
-static inline
struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
{
return __sk_dst_check(sk, cookie);
@@ -142,14 +136,16 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
fl6->fl6_dport = inet->inet_dport;
security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
- final_p = fl6_update_dst(fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();
dst = __inet6_csk_dst_check(sk, np->dst_cookie);
if (!dst) {
dst = ip6_dst_lookup_flow(sk, fl6, final_p);
if (!IS_ERR(dst))
- __inet6_csk_dst_store(sk, dst, NULL, NULL);
+ ip6_dst_store(sk, dst, NULL, NULL);
}
return dst;
}
@@ -175,7 +171,8 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
/* Restore final destination back after routing done */
fl6.daddr = sk->sk_v6_daddr;
- res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+ res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
+ np->tclass);
rcu_read_unlock();
return res;
}
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 1f9ebe3cbb4a..dc2db4f7b182 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -540,12 +540,13 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
}
spin_lock_bh(&ip6_sk_fl_lock);
for (sflp = &np->ipv6_fl_list;
- (sfl = rcu_dereference(*sflp)) != NULL;
+ (sfl = rcu_dereference_protected(*sflp,
+ lockdep_is_held(&ip6_sk_fl_lock))) != NULL;
sflp = &sfl->next) {
if (sfl->fl->label == freq.flr_label) {
if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
np->flow_label &= ~IPV6_FLOWLABEL_MASK;
- *sflp = rcu_dereference(sfl->next);
+ *sflp = sfl->next;
spin_unlock_bh(&ip6_sk_fl_lock);
fl_release(sfl->fl);
kfree_rcu(sfl, rcu);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3c7b9310b33f..c0d4dc1c5ea4 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -24,7 +24,6 @@
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/if_arp.h>
-#include <linux/mroute.h>
#include <linux/init.h>
#include <linux/in6.h>
#include <linux/inetdevice.h>
@@ -778,6 +777,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
__u32 mtu;
int err;
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
encap_limit = t->parms.encap_limit;
@@ -1513,6 +1514,7 @@ static void ip6gre_tap_setup(struct net_device *dev)
dev->destructor = ip6gre_dev_free;
dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
}
static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
@@ -1571,13 +1573,11 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
return -EEXIST;
} else {
t = nt;
-
- ip6gre_tunnel_unlink(ign, t);
- ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
- ip6gre_tunnel_link(ign, t);
- netdev_state_change(dev);
}
+ ip6gre_tunnel_unlink(ign, t);
+ ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
+ ip6gre_tunnel_link(ign, t);
return 0;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index e6a7bd15b9b7..a163102f1803 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -909,6 +909,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
struct rt6_info *rt;
#endif
int err;
+ int flags = 0;
/* The correct way to handle this would be to do
* ip6_route_get_saddr, and then ip6_route_output; however,
@@ -940,10 +941,13 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
dst_release(*dst);
*dst = NULL;
}
+
+ if (fl6->flowi6_oif)
+ flags |= RT6_LOOKUP_F_IFACE;
}
if (!*dst)
- *dst = ip6_route_output(net, sk, fl6);
+ *dst = ip6_route_output_flags(net, sk, fl6, flags);
err = (*dst)->error;
if (err)
@@ -1322,7 +1326,7 @@ emsgsize:
headersize == sizeof(struct ipv6hdr) &&
length < mtu - headersize &&
!(flags & MSG_MORE) &&
- rt->dst.dev->features & NETIF_F_V6_CSUM)
+ rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
csummode = CHECKSUM_PARTIAL;
if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
@@ -1353,7 +1357,7 @@ emsgsize:
(skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO) &&
- (sk->sk_type == SOCK_DGRAM)) {
+ (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen,
transhdrlen, mtu, flags, fl6);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index eabffbb89795..6c5dfec7a377 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -177,7 +177,7 @@ void ip6_tnl_dst_reset(struct ip6_tnl *t)
int i;
for_each_possible_cpu(i)
- ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), NULL);
+ ip6_tnl_per_cpu_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
}
EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
@@ -1180,6 +1180,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
u8 tproto;
int err;
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
tproto = ACCESS_ONCE(t->parms.proto);
if (tproto != IPPROTO_IPIP && tproto != 0)
return -1;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index ad19136086dd..a10e77103c88 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -118,7 +118,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
int cmd);
static int ip6mr_rtm_dumproute(struct sk_buff *skb,
struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr6_table *mrt);
+static void mroute_clean_tables(struct mr6_table *mrt, bool all);
static void ipmr_expire_process(unsigned long arg);
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
@@ -334,7 +334,7 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
static void ip6mr_free_table(struct mr6_table *mrt)
{
del_timer_sync(&mrt->ipmr_expire_timer);
- mroute_clean_tables(mrt);
+ mroute_clean_tables(mrt, true);
kfree(mrt);
}
@@ -765,10 +765,6 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
return dev;
failure:
- /* allow the register to be completed before unregistering. */
- rtnl_unlock();
- rtnl_lock();
-
unregister_netdevice(dev);
return NULL;
}
@@ -1542,7 +1538,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
* Close the multicast socket, and clear the vif tables etc
*/
-static void mroute_clean_tables(struct mr6_table *mrt)
+static void mroute_clean_tables(struct mr6_table *mrt, bool all)
{
int i;
LIST_HEAD(list);
@@ -1552,8 +1548,9 @@ static void mroute_clean_tables(struct mr6_table *mrt)
* Shut down all active vif entries
*/
for (i = 0; i < mrt->maxvif; i++) {
- if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
- mif6_delete(mrt, i, &list);
+ if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
+ continue;
+ mif6_delete(mrt, i, &list);
}
unregister_netdevice_many(&list);
@@ -1562,7 +1559,7 @@ static void mroute_clean_tables(struct mr6_table *mrt)
*/
for (i = 0; i < MFC6_LINES; i++) {
list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
- if (c->mfc_flags & MFC_STATIC)
+ if (!all && (c->mfc_flags & MFC_STATIC))
continue;
write_lock_bh(&mrt_lock);
list_del(&c->list);
@@ -1625,7 +1622,7 @@ int ip6mr_sk_done(struct sock *sk)
net->ipv6.devconf_all);
write_unlock_bh(&mrt_lock);
- mroute_clean_tables(mrt);
+ mroute_clean_tables(mrt, false);
err = 0;
break;
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 63e6956917c9..4449ad1f8114 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -111,7 +111,8 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
}
}
- opt = xchg(&inet6_sk(sk)->opt, opt);
+ opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt,
+ opt);
sk_dst_reset(sk);
return opt;
@@ -231,9 +232,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
sk->sk_socket->ops = &inet_dgram_ops;
sk->sk_family = PF_INET;
}
- opt = xchg(&np->opt, NULL);
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ opt = xchg((__force struct ipv6_txoptions **)&np->opt,
+ NULL);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
pktopt = xchg(&np->pktoptions, NULL);
kfree_skb(pktopt);
@@ -403,7 +407,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
break;
- opt = ipv6_renew_options(sk, np->opt, optname,
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ opt = ipv6_renew_options(sk, opt, optname,
(struct ipv6_opt_hdr __user *)optval,
optlen);
if (IS_ERR(opt)) {
@@ -432,8 +437,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
retv = 0;
opt = ipv6_update_options(sk, opt);
sticky_done:
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
break;
}
@@ -486,6 +493,7 @@ sticky_done:
break;
memset(opt, 0, sizeof(*opt));
+ atomic_set(&opt->refcnt, 1);
opt->tot_len = sizeof(*opt) + optlen;
retv = -EFAULT;
if (copy_from_user(opt+1, optval, optlen))
@@ -502,8 +510,10 @@ update:
retv = 0;
opt = ipv6_update_options(sk, opt);
done:
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
break;
}
case IPV6_UNICAST_HOPS:
@@ -1110,10 +1120,11 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
case IPV6_RTHDR:
case IPV6_DSTOPTS:
{
+ struct ipv6_txoptions *opt;
lock_sock(sk);
- len = ipv6_getsockopt_sticky(sk, np->opt,
- optname, optval, len);
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len);
release_sock(sk);
/* check if ipv6_getsockopt_sticky() returns err code */
if (len < 0)
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 124338a39e29..d64ee7e83664 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1574,9 +1574,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
return NULL;
skb->priority = TC_PRIO_CONTROL;
- skb->reserved_tailroom = skb_end_offset(skb) -
- min(mtu, skb_end_offset(skb));
skb_reserve(skb, hlen);
+ skb_tailroom_reserve(skb, mtu, tlen);
if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
@@ -1651,7 +1650,6 @@ out:
if (!err) {
ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
- IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
} else {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
}
@@ -2015,7 +2013,6 @@ out:
if (!err) {
ICMP6MSGOUT_INC_STATS(net, idev, type);
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
- IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, full_len);
} else
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 3e0f855e1bea..84afb9a77278 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -556,8 +556,7 @@ static void ndisc_send_unsol_na(struct net_device *dev)
}
void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
- const struct in6_addr *daddr, const struct in6_addr *saddr,
- struct sk_buff *oskb)
+ const struct in6_addr *daddr, const struct in6_addr *saddr)
{
struct sk_buff *skb;
struct in6_addr addr_buf;
@@ -593,9 +592,6 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
dev->dev_addr);
- if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE) && oskb)
- skb_dst_copy(skb, oskb);
-
ndisc_send_skb(skb, daddr, saddr);
}
@@ -682,12 +678,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
"%s: trying to ucast probe in NUD_INVALID: %pI6\n",
__func__, target);
}
- ndisc_send_ns(dev, target, target, saddr, skb);
+ ndisc_send_ns(dev, target, target, saddr);
} else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
neigh_app_ns(neigh);
} else {
addrconf_addr_solict_mult(target, &mcaddr);
- ndisc_send_ns(dev, target, &mcaddr, saddr, skb);
+ ndisc_send_ns(dev, target, &mcaddr, saddr);
}
}
@@ -1187,7 +1183,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
*/
if (!in6_dev->cnf.accept_ra_from_local &&
ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
- NULL, 0)) {
+ in6_dev->dev, 0)) {
ND_PRINTK(2, info,
"RA from local address detected on dev: %s: default router ignored\n",
skb->dev->name);
@@ -1341,7 +1337,7 @@ skip_linkparms:
#ifdef CONFIG_IPV6_ROUTE_INFO
if (!in6_dev->cnf.accept_ra_from_local &&
ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
- NULL, 0)) {
+ in6_dev->dev, 0)) {
ND_PRINTK(2, info,
"RA from local address detected on dev: %s: router info ignored.\n",
skb->dev->name);
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index f6a024e141e5..e10a04c9cdc7 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -49,6 +49,7 @@ config NFT_REJECT_IPV6
config NFT_DUP_IPV6
tristate "IPv6 nf_tables packet duplication support"
+ depends on !NF_CONNTRACK || NF_CONNTRACK
select NF_DUP_IPV6
help
This module enables IPv6 packet duplication support for nf_tables.
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index d5efeb87350e..e4347aeb2e65 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -56,7 +56,6 @@ struct nf_ct_frag6_skb_cb
{
struct inet6_skb_parm h;
int offset;
- struct sk_buff *orig;
};
#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb *)((skb)->cb))
@@ -170,12 +169,6 @@ static unsigned int nf_hashfn(const struct inet_frag_queue *q)
return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
}
-static void nf_skb_free(struct sk_buff *skb)
-{
- if (NFCT_FRAG6_CB(skb)->orig)
- kfree_skb(NFCT_FRAG6_CB(skb)->orig);
-}
-
static void nf_ct_frag6_expire(unsigned long data)
{
struct frag_queue *fq;
@@ -190,7 +183,7 @@ static void nf_ct_frag6_expire(unsigned long data)
/* Creation primitives. */
static inline struct frag_queue *fq_find(struct net *net, __be32 id,
u32 user, struct in6_addr *src,
- struct in6_addr *dst, u8 ecn)
+ struct in6_addr *dst, int iif, u8 ecn)
{
struct inet_frag_queue *q;
struct ip6_create_arg arg;
@@ -200,6 +193,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
arg.user = user;
arg.src = src;
arg.dst = dst;
+ arg.iif = iif;
arg.ecn = ecn;
local_bh_disable();
@@ -368,17 +362,18 @@ err:
/*
* Check if this packet is complete.
- * Returns NULL on failure by any reason, and pointer
- * to current nexthdr field in reassembled frame.
*
* It is called with locked fq, and caller must check that
* queue is eligible for reassembly i.e. it is not COMPLETE,
* the last and the first frames arrived and all the bits are here.
+ *
+ * returns true if *prev skb has been transformed into the reassembled
+ * skb, false otherwise.
*/
-static struct sk_buff *
-nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
+static bool
+nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev)
{
- struct sk_buff *fp, *op, *head = fq->q.fragments;
+ struct sk_buff *fp, *head = fq->q.fragments;
int payload_len;
u8 ecn;
@@ -389,22 +384,21 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
ecn = ip_frag_ecn_table[fq->ecn];
if (unlikely(ecn == 0xff))
- goto out_fail;
+ return false;
/* Unfragmented part is taken from the first segment. */
payload_len = ((head->data - skb_network_header(head)) -
sizeof(struct ipv6hdr) + fq->q.len -
sizeof(struct frag_hdr));
if (payload_len > IPV6_MAXPLEN) {
- pr_debug("payload len is too large.\n");
- goto out_oversize;
+ net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n",
+ payload_len);
+ return false;
}
/* Head of list must not be cloned. */
- if (skb_unclone(head, GFP_ATOMIC)) {
- pr_debug("skb is cloned but can't expand head");
- goto out_oom;
- }
+ if (skb_unclone(head, GFP_ATOMIC))
+ return false;
/* If the first fragment is fragmented itself, we split
* it to two chunks: the first with data and paged part
@@ -415,7 +409,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
clone = alloc_skb(0, GFP_ATOMIC);
if (clone == NULL)
- goto out_oom;
+ return false;
clone->next = head->next;
head->next = clone;
@@ -429,10 +423,41 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
clone->csum = 0;
clone->ip_summed = head->ip_summed;
- NFCT_FRAG6_CB(clone)->orig = NULL;
add_frag_mem_limit(fq->q.net, clone->truesize);
}
+ /* morph head into last received skb: prev.
+ *
+ * This allows callers of ipv6 conntrack defrag to continue
+ * to use the last skb(frag) passed into the reasm engine.
+ * The last skb frag 'silently' turns into the full reassembled skb.
+ *
+ * Since prev is also part of q->fragments we have to clone it first.
+ */
+ if (head != prev) {
+ struct sk_buff *iter;
+
+ fp = skb_clone(prev, GFP_ATOMIC);
+ if (!fp)
+ return false;
+
+ fp->next = prev->next;
+
+ iter = head;
+ while (iter) {
+ if (iter->next == prev) {
+ iter->next = fp;
+ break;
+ }
+ iter = iter->next;
+ }
+
+ skb_morph(prev, head);
+ prev->next = head->next;
+ consume_skb(head);
+ head = prev;
+ }
+
/* We have to remove fragment header from datagram and to relocate
* header in order to calculate ICV correctly. */
skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0];
@@ -473,31 +498,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
fq->q.fragments = NULL;
fq->q.fragments_tail = NULL;
- /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
- fp = skb_shinfo(head)->frag_list;
- if (fp && NFCT_FRAG6_CB(fp)->orig == NULL)
- /* at above code, head skb is divided into two skbs. */
- fp = fp->next;
-
- op = NFCT_FRAG6_CB(head)->orig;
- for (; fp; fp = fp->next) {
- struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig;
-
- op->next = orig;
- op = orig;
- NFCT_FRAG6_CB(fp)->orig = NULL;
- }
-
- return head;
-
-out_oversize:
- net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n",
- payload_len);
- goto out_fail;
-out_oom:
- net_dbg_ratelimited("nf_ct_frag6_reasm: no memory for reassembly\n");
-out_fail:
- return NULL;
+ return true;
}
/*
@@ -563,89 +564,61 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
return 0;
}
-struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
+int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
{
- struct sk_buff *clone;
struct net_device *dev = skb->dev;
+ int fhoff, nhoff, ret;
struct frag_hdr *fhdr;
struct frag_queue *fq;
struct ipv6hdr *hdr;
- int fhoff, nhoff;
u8 prevhdr;
- struct sk_buff *ret_skb = NULL;
/* Jumbo payload inhibits frag. header */
if (ipv6_hdr(skb)->payload_len == 0) {
pr_debug("payload len = 0\n");
- return skb;
+ return -EINVAL;
}
if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0)
- return skb;
+ return -EINVAL;
- clone = skb_clone(skb, GFP_ATOMIC);
- if (clone == NULL) {
- pr_debug("Can't clone skb\n");
- return skb;
- }
+ if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr)))
+ return -ENOMEM;
- NFCT_FRAG6_CB(clone)->orig = skb;
-
- if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) {
- pr_debug("message is too short.\n");
- goto ret_orig;
- }
-
- skb_set_transport_header(clone, fhoff);
- hdr = ipv6_hdr(clone);
- fhdr = (struct frag_hdr *)skb_transport_header(clone);
+ skb_set_transport_header(skb, fhoff);
+ hdr = ipv6_hdr(skb);
+ fhdr = (struct frag_hdr *)skb_transport_header(skb);
fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
- ip6_frag_ecn(hdr));
+ skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
if (fq == NULL) {
pr_debug("Can't find and can't create new queue\n");
- goto ret_orig;
+ return -ENOMEM;
}
spin_lock_bh(&fq->q.lock);
- if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
- spin_unlock_bh(&fq->q.lock);
- pr_debug("Can't insert skb to queue\n");
- inet_frag_put(&fq->q, &nf_frags);
- goto ret_orig;
+ if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) {
+ ret = -EINVAL;
+ goto out_unlock;
}
+ /* after queue has assumed skb ownership, only 0 or -EINPROGRESS
+ * must be returned.
+ */
+ ret = -EINPROGRESS;
if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
- fq->q.meat == fq->q.len) {
- ret_skb = nf_ct_frag6_reasm(fq, dev);
- if (ret_skb == NULL)
- pr_debug("Can't reassemble fragmented packets\n");
- }
- spin_unlock_bh(&fq->q.lock);
+ fq->q.meat == fq->q.len &&
+ nf_ct_frag6_reasm(fq, skb, dev))
+ ret = 0;
+out_unlock:
+ spin_unlock_bh(&fq->q.lock);
inet_frag_put(&fq->q, &nf_frags);
- return ret_skb;
-
-ret_orig:
- kfree_skb(clone);
- return skb;
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
-void nf_ct_frag6_consume_orig(struct sk_buff *skb)
-{
- struct sk_buff *s, *s2;
-
- for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
- s2 = s->next;
- s->next = NULL;
- consume_skb(s);
- s = s2;
- }
-}
-EXPORT_SYMBOL_GPL(nf_ct_frag6_consume_orig);
-
static int nf_ct_net_init(struct net *net)
{
int res;
@@ -680,7 +653,6 @@ int nf_ct_frag6_init(void)
nf_frags.hashfn = nf_hashfn;
nf_frags.constructor = ip6_frag_init;
nf_frags.destructor = NULL;
- nf_frags.skb_free = nf_skb_free;
nf_frags.qsize = sizeof(struct frag_queue);
nf_frags.match = ip6_frag_match;
nf_frags.frag_expire = nf_ct_frag6_expire;
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index 4fdbed5ebfb6..f7aab5ab93a5 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -55,7 +55,7 @@ static unsigned int ipv6_defrag(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- struct sk_buff *reasm;
+ int err;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
/* Previously seen (loopback)? */
@@ -63,23 +63,13 @@ static unsigned int ipv6_defrag(void *priv,
return NF_ACCEPT;
#endif
- reasm = nf_ct_frag6_gather(state->net, skb,
- nf_ct6_defrag_user(state->hook, skb));
+ err = nf_ct_frag6_gather(state->net, skb,
+ nf_ct6_defrag_user(state->hook, skb));
/* queued */
- if (reasm == NULL)
+ if (err == -EINPROGRESS)
return NF_STOLEN;
- /* error occurred or not fragmented */
- if (reasm == skb)
- return NF_ACCEPT;
-
- nf_ct_frag6_consume_orig(reasm);
-
- NF_HOOK_THRESH(NFPROTO_IPV6, state->hook, state->net, state->sk, reasm,
- state->in, state->out,
- state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
-
- return NF_STOLEN;
+ return NF_ACCEPT;
}
static struct nf_hook_ops ipv6_defrag_ops[] = {
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 238e70c3f7b7..6ce309928841 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -136,7 +136,8 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
if (skb->ip_summed != CHECKSUM_PARTIAL) {
if (!(rt->rt6i_flags & RTF_LOCAL) &&
- (!skb->dev || skb->dev->features & NETIF_F_V6_CSUM)) {
+ (!skb->dev || skb->dev->features &
+ (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))) {
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_headroom(skb) +
skb_network_offset(skb) +
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 31ba7ca19757..051b6a6bfff6 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -21,6 +21,10 @@
#include <net/ipv6.h>
#include <net/netfilter/ipv6/nf_nat_masquerade.h>
+#define MAX_WORK_COUNT 16
+
+static atomic_t v6_worker_count;
+
unsigned int
nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
const struct net_device *out)
@@ -78,14 +82,78 @@ static struct notifier_block masq_dev_notifier = {
.notifier_call = masq_device_event,
};
+struct masq_dev_work {
+ struct work_struct work;
+ struct net *net;
+ int ifindex;
+};
+
+static void iterate_cleanup_work(struct work_struct *work)
+{
+ struct masq_dev_work *w;
+ long index;
+
+ w = container_of(work, struct masq_dev_work, work);
+
+ index = w->ifindex;
+ nf_ct_iterate_cleanup(w->net, device_cmp, (void *)index, 0, 0);
+
+ put_net(w->net);
+ kfree(w);
+ atomic_dec(&v6_worker_count);
+ module_put(THIS_MODULE);
+}
+
+/* ipv6 inet notifier is an atomic notifier, i.e. we cannot
+ * schedule.
+ *
+ * Unfortunately, nf_ct_iterate_cleanup can run for a long
+ * time if there are lots of conntracks and the system
+ * handles high softirq load, so it frequently calls cond_resched
+ * while iterating the conntrack table.
+ *
+ * So we defer nf_ct_iterate_cleanup walk to the system workqueue.
+ *
+ * As we can have 'a lot' of inet_events (depending on amount
+ * of ipv6 addresses being deleted), we also need to add an upper
+ * limit to the number of queued work items.
+ */
static int masq_inet_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct inet6_ifaddr *ifa = ptr;
- struct netdev_notifier_info info;
+ const struct net_device *dev;
+ struct masq_dev_work *w;
+ struct net *net;
+
+ if (event != NETDEV_DOWN ||
+ atomic_read(&v6_worker_count) >= MAX_WORK_COUNT)
+ return NOTIFY_DONE;
+
+ dev = ifa->idev->dev;
+ net = maybe_get_net(dev_net(dev));
+ if (!net)
+ return NOTIFY_DONE;
- netdev_notifier_info_init(&info, ifa->idev->dev);
- return masq_device_event(this, event, &info);
+ if (!try_module_get(THIS_MODULE))
+ goto err_module;
+
+ w = kmalloc(sizeof(*w), GFP_ATOMIC);
+ if (w) {
+ atomic_inc(&v6_worker_count);
+
+ INIT_WORK(&w->work, iterate_cleanup_work);
+ w->ifindex = dev->ifindex;
+ w->net = net;
+ schedule_work(&w->work);
+
+ return NOTIFY_DONE;
+ }
+
+ module_put(THIS_MODULE);
+ err_module:
+ put_net(net);
+ return NOTIFY_DONE;
}
static struct notifier_block masq_inet_notifier = {
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index e0f922b777e3..4709f657b7b6 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -14,7 +14,6 @@
#include <net/netfilter/ipv6/nf_reject.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_bridge.h>
-#include <net/netfilter/ipv6/nf_reject.h>
const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb,
struct tcphdr *otcph,
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
index 120ea9131be0..30b22f4dff55 100644
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -77,7 +77,7 @@ err:
static void nf_tables_ipv6_exit_net(struct net *net)
{
- nft_unregister_afinfo(net->nft.ipv6);
+ nft_unregister_afinfo(net, net->nft.ipv6);
kfree(net->nft.ipv6);
}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index dc65ec198f7c..fa59dd7a427e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -733,6 +733,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd,
static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
+ struct ipv6_txoptions *opt_to_free = NULL;
struct ipv6_txoptions opt_space;
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
struct in6_addr *daddr, *final_p, final;
@@ -839,8 +840,10 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!(opt->opt_nflen|opt->opt_flen))
opt = NULL;
}
- if (!opt)
- opt = np->opt;
+ if (!opt) {
+ opt = txopt_get(np);
+ opt_to_free = opt;
+ }
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
@@ -906,6 +909,7 @@ done:
dst_release(dst);
out:
fl6_sock_release(flowlabel);
+ txopt_put(opt_to_free);
return err < 0 ? err : len;
do_confirm:
dst_confirm(dst);
@@ -968,6 +972,11 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
return -EFAULT;
switch (optname) {
+ case IPV6_HDRINCL:
+ if (sk->sk_type != SOCK_RAW)
+ return -EINVAL;
+ inet_sk(sk)->hdrincl = !!val;
+ return 0;
case IPV6_CHECKSUM:
if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 &&
level == IPPROTO_IPV6) {
@@ -1012,7 +1021,8 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
return -EOPNOTSUPP;
return rawv6_seticmpfilter(sk, level, optname, optval, optlen);
case SOL_IPV6:
- if (optname == IPV6_CHECKSUM)
+ if (optname == IPV6_CHECKSUM ||
+ optname == IPV6_HDRINCL)
break;
default:
return ipv6_setsockopt(sk, level, optname, optval, optlen);
@@ -1033,7 +1043,8 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
return -EOPNOTSUPP;
return rawv6_seticmpfilter(sk, level, optname, optval, optlen);
case SOL_IPV6:
- if (optname == IPV6_CHECKSUM)
+ if (optname == IPV6_CHECKSUM ||
+ optname == IPV6_HDRINCL)
break;
default:
return compat_ipv6_setsockopt(sk, level, optname,
@@ -1053,6 +1064,9 @@ static int do_rawv6_getsockopt(struct sock *sk, int level, int optname,
return -EFAULT;
switch (optname) {
+ case IPV6_HDRINCL:
+ val = inet_sk(sk)->hdrincl;
+ break;
case IPV6_CHECKSUM:
/*
* We allow getsockopt() for IPPROTO_IPV6-level
@@ -1090,7 +1104,8 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
return -EOPNOTSUPP;
return rawv6_geticmpfilter(sk, level, optname, optval, optlen);
case SOL_IPV6:
- if (optname == IPV6_CHECKSUM)
+ if (optname == IPV6_CHECKSUM ||
+ optname == IPV6_HDRINCL)
break;
default:
return ipv6_getsockopt(sk, level, optname, optval, optlen);
@@ -1111,7 +1126,8 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
return -EOPNOTSUPP;
return rawv6_geticmpfilter(sk, level, optname, optval, optlen);
case SOL_IPV6:
- if (optname == IPV6_CHECKSUM)
+ if (optname == IPV6_CHECKSUM ||
+ optname == IPV6_HDRINCL)
break;
default:
return compat_ipv6_getsockopt(sk, level, optname,
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 44e21a03cfc3..18f3498a6c80 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -108,7 +108,10 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
return fq->id == arg->id &&
fq->user == arg->user &&
ipv6_addr_equal(&fq->saddr, arg->src) &&
- ipv6_addr_equal(&fq->daddr, arg->dst);
+ ipv6_addr_equal(&fq->daddr, arg->dst) &&
+ (arg->iif == fq->iif ||
+ !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST |
+ IPV6_ADDR_LINKLOCAL)));
}
EXPORT_SYMBOL(ip6_frag_match);
@@ -180,7 +183,7 @@ static void ip6_frag_expire(unsigned long data)
static struct frag_queue *
fq_find(struct net *net, __be32 id, const struct in6_addr *src,
- const struct in6_addr *dst, u8 ecn)
+ const struct in6_addr *dst, int iif, u8 ecn)
{
struct inet_frag_queue *q;
struct ip6_create_arg arg;
@@ -190,6 +193,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src,
arg.user = IP6_DEFRAG_LOCAL_DELIVER;
arg.src = src;
arg.dst = dst;
+ arg.iif = iif;
arg.ecn = ecn;
hash = inet6_hash_frag(id, src, dst);
@@ -551,7 +555,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
}
fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
- ip6_frag_ecn(hdr));
+ skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
if (fq) {
int ret;
@@ -751,7 +755,6 @@ int __init ipv6_frag_init(void)
ip6_frags.hashfn = ip6_hashfn;
ip6_frags.constructor = ip6_frag_init;
ip6_frags.destructor = NULL;
- ip6_frags.skb_free = NULL;
ip6_frags.qsize = sizeof(struct frag_queue);
ip6_frags.match = ip6_frag_match;
ip6_frags.frag_expire = ip6_frag_expire;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c8bc9b4ac328..ed446639219c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -62,6 +62,7 @@
#include <net/lwtunnel.h>
#include <net/ip_tunnels.h>
#include <net/l3mdev.h>
+#include <trace/events/fib6.h>
#include <asm/uaccess.h>
@@ -404,6 +405,14 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
}
}
+static bool __rt6_check_expired(const struct rt6_info *rt)
+{
+ if (rt->rt6i_flags & RTF_EXPIRES)
+ return time_after(jiffies, rt->dst.expires);
+ else
+ return false;
+}
+
static bool rt6_check_expired(const struct rt6_info *rt)
{
if (rt->rt6i_flags & RTF_EXPIRES) {
@@ -515,7 +524,7 @@ static void rt6_probe_deferred(struct work_struct *w)
container_of(w, struct __rt6_probe_work, work);
addrconf_addr_solict_mult(&work->target, &mcaddr);
- ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, NULL);
+ ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
dev_put(work->dev);
kfree(work);
}
@@ -857,6 +866,9 @@ restart:
}
dst_use(&rt->dst, jiffies);
read_unlock_bh(&table->tb6_lock);
+
+ trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
+
return rt;
}
@@ -1070,6 +1082,8 @@ redo_rt6_select:
read_unlock_bh(&table->tb6_lock);
rt6_dst_from_metrics_check(rt);
+
+ trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
return rt;
} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
!(rt->rt6i_flags & RTF_GATEWAY))) {
@@ -1093,6 +1107,8 @@ redo_rt6_select:
uncached_rt = net->ipv6.ip6_null_entry;
dst_hold(&uncached_rt->dst);
+
+ trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
return uncached_rt;
} else {
@@ -1117,6 +1133,7 @@ redo_rt6_select:
dst_release(&rt->dst);
}
+ trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
return pcpu_rt;
}
@@ -1166,11 +1183,10 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
}
-struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
- struct flowi6 *fl6)
+struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
+ struct flowi6 *fl6, int flags)
{
struct dst_entry *dst;
- int flags = 0;
bool any_src;
dst = l3mdev_rt6_dst_by_oif(net, fl6);
@@ -1191,7 +1207,7 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
}
-EXPORT_SYMBOL(ip6_route_output);
+EXPORT_SYMBOL_GPL(ip6_route_output_flags);
struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
{
@@ -1252,7 +1268,8 @@ static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
{
- if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
+ if (!__rt6_check_expired(rt) &&
+ rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
rt6_check((struct rt6_info *)(rt->dst.from), cookie))
return &rt->dst;
else
@@ -1272,7 +1289,8 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
rt6_dst_from_metrics_check(rt);
- if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
+ if (rt->rt6i_flags & RTF_PCPU ||
+ (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
return rt6_dst_from_check(rt, cookie);
else
return rt6_check(rt, cookie);
@@ -1322,6 +1340,12 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
}
+static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
+{
+ return !(rt->rt6i_flags & RTF_CACHE) &&
+ (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
+}
+
static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
const struct ipv6hdr *iph, u32 mtu)
{
@@ -1335,7 +1359,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
if (mtu >= dst_mtu(dst))
return;
- if (rt6->rt6i_flags & RTF_CACHE) {
+ if (!rt6_cache_allowed_for_pmtu(rt6)) {
rt6_do_update_pmtu(rt6, mtu);
} else {
const struct in6_addr *daddr, *saddr;
@@ -1458,6 +1482,7 @@ out:
read_unlock_bh(&table->tb6_lock);
+ trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
return rt;
};
@@ -2683,6 +2708,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_PREF] = { .type = NLA_U8 },
[RTA_ENCAP_TYPE] = { .type = NLA_U16 },
[RTA_ENCAP] = { .type = NLA_NESTED },
+ [RTA_EXPIRES] = { .type = NLA_U32 },
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2783,6 +2809,15 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[RTA_ENCAP_TYPE])
cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
+ if (tb[RTA_EXPIRES]) {
+ unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
+
+ if (addrconf_finite_timeout(timeout)) {
+ cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
+ cfg->fc_flags |= RTF_EXPIRES;
+ }
+ }
+
err = 0;
errout:
return err;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index dcccae86190f..2066d1c25a11 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -201,14 +201,14 @@ static int ipip6_tunnel_create(struct net_device *dev)
if ((__force u16)t->parms.i_flags & SIT_ISATAP)
dev->priv_flags |= IFF_ISATAP;
+ dev->rtnl_link_ops = &sit_link_ops;
+
err = register_netdevice(dev);
if (err < 0)
goto out;
ipip6_tunnel_clone_6rd(dev, sitn);
- dev->rtnl_link_ops = &sit_link_ops;
-
dev_hold(dev);
ipip6_tunnel_link(sitn, t);
@@ -820,7 +820,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
const struct in6_addr *addr6;
int addr_type;
u8 ttl;
- int err;
u8 protocol = IPPROTO_IPV6;
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
@@ -983,10 +982,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
skb_set_inner_ipproto(skb, IPPROTO_IPV6);
- err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr,
- protocol, tos, ttl, df,
- !net_eq(tunnel->net, dev_net(dev)));
- iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
+ iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
+ df, !net_eq(tunnel->net, dev_net(dev)));
return NETDEV_TX_OK;
tx_error_icmp:
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index bb8f2fa1c7fb..aae3e5ca63ea 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -41,8 +41,7 @@ static __u16 const msstab[] = {
9000 - 60,
};
-static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
- ipv6_cookie_scratch);
+static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], ipv6_cookie_scratch);
static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr,
__be16 sport, __be16 dport, u32 count, int c)
@@ -193,7 +192,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq->pktopts = skb;
}
- ireq->ir_iif = sk->sk_bound_dev_if;
+ ireq->ir_iif = inet_request_bound_dev_if(sk, skb);
/* So that link locals have meaning */
if (!sk->sk_bound_dev_if &&
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
@@ -222,9 +221,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_TCP;
fl6.daddr = ireq->ir_v6_rmt_addr;
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
fl6.saddr = ireq->ir_v6_loc_addr;
- fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.flowi6_oif = ireq->ir_iif;
fl6.flowi6_mark = ireq->ir_mark;
fl6.fl6_dport = ireq->ir_rmt_port;
fl6.fl6_sport = inet_sk(sk)->inet_sport;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index ea2f4d5440b5..3447859bdc57 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -61,13 +61,12 @@
#include <net/timewait_sock.h>
#include <net/inet_common.h>
#include <net/secure_seq.h>
-#include <net/tcp_memcontrol.h>
#include <net/busy_poll.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
-#include <linux/crypto.h>
+#include <crypto/hash.h>
#include <linux/scatterlist.h>
static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
@@ -93,10 +92,9 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- if (dst) {
+ if (dst && dst_hold_safe(dst)) {
const struct rt6_info *rt = (const struct rt6_info *)dst;
- dst_hold(dst);
sk->sk_rx_dst = dst;
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
@@ -120,6 +118,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
struct ipv6_pinfo *np = inet6_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct in6_addr *saddr = NULL, *final_p, final;
+ struct ipv6_txoptions *opt;
struct flowi6 fl6;
struct dst_entry *dst;
int addr_type;
@@ -235,7 +234,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl6.fl6_dport = usin->sin6_port;
fl6.fl6_sport = inet->inet_sport;
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ final_p = fl6_update_dst(&fl6, opt, &final);
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
@@ -255,7 +255,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
inet->inet_rcv_saddr = LOOPBACK4_IPV6;
sk->sk_gso_type = SKB_GSO_TCPV6;
- __ip6_dst_store(sk, dst, NULL, NULL);
+ ip6_dst_store(sk, dst, NULL, NULL);
if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp &&
@@ -263,9 +263,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
tcp_fetch_timewait_stamp(sk, dst);
icsk->icsk_ext_hdr_len = 0;
- if (np->opt)
- icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
- np->opt->opt_nflen);
+ if (opt)
+ icsk->icsk_ext_hdr_len = opt->opt_flen +
+ opt->opt_nflen;
tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
@@ -327,6 +327,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct tcp_sock *tp;
__u32 seq, snd_una;
struct sock *sk;
+ bool fatal;
int err;
sk = __inet6_lookup_established(net, &tcp_hashinfo,
@@ -345,8 +346,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return;
}
seq = ntohl(th->seq);
+ fatal = icmpv6_err_convert(type, code, &err);
if (sk->sk_state == TCP_NEW_SYN_RECV)
- return tcp_req_err(sk, seq);
+ return tcp_req_err(sk, seq, fatal);
bh_lock_sock(sk);
if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
@@ -400,7 +402,6 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
goto out;
}
- icmpv6_err_convert(type, code, &err);
/* Might be for an request_sock */
switch (sk->sk_state) {
@@ -461,7 +462,10 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
if (np->repflow && ireq->pktopts)
fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
- err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
+ rcu_read_lock();
+ err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt),
+ np->tclass);
+ rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -537,7 +541,8 @@ static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
bp->len = cpu_to_be32(nbytes);
sg_init_one(&sg, bp, sizeof(*bp));
- return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
+ ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp));
+ return crypto_ahash_update(hp->md5_req);
}
static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
@@ -545,14 +550,14 @@ static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
const struct tcphdr *th)
{
struct tcp_md5sig_pool *hp;
- struct hash_desc *desc;
+ struct ahash_request *req;
hp = tcp_get_md5sig_pool();
if (!hp)
goto clear_hash_noput;
- desc = &hp->md5_desc;
+ req = hp->md5_req;
- if (crypto_hash_init(desc))
+ if (crypto_ahash_init(req))
goto clear_hash;
if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
goto clear_hash;
@@ -560,7 +565,8 @@ static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
goto clear_hash;
if (tcp_md5_hash_key(hp, key))
goto clear_hash;
- if (crypto_hash_final(desc, md5_hash))
+ ahash_request_set_crypt(req, NULL, md5_hash, 0);
+ if (crypto_ahash_final(req))
goto clear_hash;
tcp_put_md5sig_pool();
@@ -580,7 +586,7 @@ static int tcp_v6_md5_hash_skb(char *md5_hash,
{
const struct in6_addr *saddr, *daddr;
struct tcp_md5sig_pool *hp;
- struct hash_desc *desc;
+ struct ahash_request *req;
const struct tcphdr *th = tcp_hdr(skb);
if (sk) { /* valid for establish/request sockets */
@@ -595,9 +601,9 @@ static int tcp_v6_md5_hash_skb(char *md5_hash,
hp = tcp_get_md5sig_pool();
if (!hp)
goto clear_hash_noput;
- desc = &hp->md5_desc;
+ req = hp->md5_req;
- if (crypto_hash_init(desc))
+ if (crypto_ahash_init(req))
goto clear_hash;
if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
@@ -608,7 +614,8 @@ static int tcp_v6_md5_hash_skb(char *md5_hash,
goto clear_hash;
if (tcp_md5_hash_key(hp, key))
goto clear_hash;
- if (crypto_hash_final(desc, md5_hash))
+ ahash_request_set_crypt(req, NULL, md5_hash, 0);
+ if (crypto_ahash_final(req))
goto clear_hash;
tcp_put_md5sig_pool();
@@ -852,7 +859,9 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
#ifdef CONFIG_TCP_MD5SIG
hash_location = tcp_parse_md5sig_option(th);
- if (!sk && hash_location) {
+ if (sk && sk_fullsock(sk)) {
+ key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
+ } else if (hash_location) {
/*
* active side is lost. Try to find listening socket through
* source port, and then find md5 key through listening socket.
@@ -875,8 +884,6 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
if (genhash || memcmp(hash_location, newhash, 16) != 0)
goto release_sk1;
- } else {
- key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
}
#endif
@@ -972,6 +979,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
struct inet_request_sock *ireq;
struct ipv6_pinfo *newnp;
const struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
struct tcp6_sock *newtcp6sk;
struct inet_sock *newinet;
struct tcp_sock *newtp;
@@ -1056,7 +1064,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
*/
newsk->sk_gso_type = SKB_GSO_TCPV6;
- __ip6_dst_store(newsk, dst, NULL, NULL);
+ ip6_dst_store(newsk, dst, NULL, NULL);
inet6_sk_rx_dst_set(newsk, skb);
newtcp6sk = (struct tcp6_sock *)newsk;
@@ -1098,13 +1106,15 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
but we make one more one thing there: reattach optmem
to newsk.
*/
- if (np->opt)
- newnp->opt = ipv6_dup_options(newsk, np->opt);
-
+ opt = rcu_dereference(np->opt);
+ if (opt) {
+ opt = ipv6_dup_options(newsk, opt);
+ RCU_INIT_POINTER(newnp->opt, opt);
+ }
inet_csk(newsk)->icsk_ext_hdr_len = 0;
- if (newnp->opt)
- inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
- newnp->opt->opt_flen);
+ if (opt)
+ inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
+ opt->opt_flen;
tcp_ca_openreq_child(newsk, dst);
@@ -1130,7 +1140,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
*/
tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
AF_INET6, key->key, key->keylen,
- sk_gfp_atomic(sk, GFP_ATOMIC));
+ sk_gfp_mask(sk, GFP_ATOMIC));
}
#endif
@@ -1140,14 +1150,18 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
goto out;
}
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
- /* Clone pktoptions received with SYN, if we own the req */
- if (*own_req && ireq->pktopts) {
- newnp->pktoptions = skb_clone(ireq->pktopts,
- sk_gfp_atomic(sk, GFP_ATOMIC));
- consume_skb(ireq->pktopts);
- ireq->pktopts = NULL;
- if (newnp->pktoptions)
- skb_set_owner_r(newnp->pktoptions, newsk);
+ if (*own_req) {
+ tcp_move_syn(newtp, req);
+
+ /* Clone pktoptions received with SYN, if we own the req */
+ if (ireq->pktopts) {
+ newnp->pktoptions = skb_clone(ireq->pktopts,
+ sk_gfp_mask(sk, GFP_ATOMIC));
+ consume_skb(ireq->pktopts);
+ ireq->pktopts = NULL;
+ if (newnp->pktoptions)
+ skb_set_owner_r(newnp->pktoptions, newsk);
+ }
}
return newsk;
@@ -1208,7 +1222,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
--ANK (980728)
*/
if (np->rxopt.all)
- opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC));
+ opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
struct dst_entry *dst = sk->sk_rx_dst;
@@ -1376,7 +1390,7 @@ process:
if (sk->sk_state == TCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
- struct sock *nsk = NULL;
+ struct sock *nsk;
sk = req->rsk_listener;
tcp_v6_fill_cb(skb, hdr, th);
@@ -1384,24 +1398,24 @@ process:
reqsk_put(req);
goto discard_it;
}
- if (likely(sk->sk_state == TCP_LISTEN)) {
- nsk = tcp_check_req(sk, skb, req, false);
- } else {
+ if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
}
+ sock_hold(sk);
+ nsk = tcp_check_req(sk, skb, req, false);
if (!nsk) {
reqsk_put(req);
- goto discard_it;
+ goto discard_and_relse;
}
if (nsk == sk) {
- sock_hold(sk);
reqsk_put(req);
tcp_v6_restore_cb(skb);
} else if (tcp_child_process(sk, nsk, skb)) {
tcp_v6_send_reset(nsk, skb);
- goto discard_it;
+ goto discard_and_relse;
} else {
+ sock_put(sk);
return 0;
}
}
@@ -1507,7 +1521,9 @@ do_time_wait:
break;
case TCP_TW_RST:
tcp_v6_restore_cb(skb);
- goto no_tcp_socket;
+ tcp_v6_send_reset(sk, skb);
+ inet_twsk_deschedule_put(inet_twsk(sk));
+ goto discard_it;
case TCP_TW_SUCCESS:
;
}
@@ -1686,6 +1702,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
const struct tcp_sock *tp = tcp_sk(sp);
const struct inet_connection_sock *icsk = inet_csk(sp);
const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
+ int rx_queue;
+ int state;
dest = &sp->sk_v6_daddr;
src = &sp->sk_v6_rcv_saddr;
@@ -1706,6 +1724,15 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
timer_expires = jiffies;
}
+ state = sk_state_load(sp);
+ if (state == TCP_LISTEN)
+ rx_queue = sp->sk_ack_backlog;
+ else
+ /* Because we don't lock the socket,
+ * we might find a transient negative value.
+ */
+ rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
+
seq_printf(seq,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
"%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
@@ -1714,9 +1741,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
src->s6_addr32[2], src->s6_addr32[3], srcp,
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3], destp,
- sp->sk_state,
- tp->write_seq-tp->snd_una,
- (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
+ state,
+ tp->write_seq - tp->snd_una,
+ rx_queue,
timer_active,
jiffies_delta_to_clock_t(timer_expires - jiffies),
icsk->icsk_retransmits,
@@ -1728,7 +1755,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
tp->snd_cwnd,
- sp->sk_state == TCP_LISTEN ?
+ state == TCP_LISTEN ?
fastopenq->max_qlen :
(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
);
@@ -1865,10 +1892,8 @@ struct proto tcpv6_prot = {
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
-#ifdef CONFIG_MEMCG_KMEM
- .proto_cgroup = tcp_proto_cgroup,
-#endif
.clear_sk = tcp_v6_clear_sk,
+ .diag_destroy = tcp_abort,
};
static const struct inet6_protocol tcpv6_protocol = {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 01bcb49619ee..422dd014aa2c 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -47,6 +47,7 @@
#include <net/xfrm.h>
#include <net/inet6_hashtables.h>
#include <net/busy_poll.h>
+#include <net/sock_reuseport.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -76,7 +77,14 @@ static u32 udp6_ehashfn(const struct net *net,
udp_ipv6_hash_secret + net_hash_mix(net));
}
-int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
+/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
+ * only, and any IPv4 addresses if not IPv6 only
+ * match_wildcard == false: addresses must be exactly the same, i.e.
+ * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
+ * and 0.0.0.0 equals to 0.0.0.0 only
+ */
+int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+ bool match_wildcard)
{
const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
int sk2_ipv6only = inet_v6_ipv6only(sk2);
@@ -84,16 +92,24 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
/* if both are mapped, treat as IPv4 */
- if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
- return (!sk2_ipv6only &&
- (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr ||
- sk->sk_rcv_saddr == sk2->sk_rcv_saddr));
+ if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
+ if (!sk2_ipv6only) {
+ if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
+ return 1;
+ if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
+ return match_wildcard;
+ }
+ return 0;
+ }
- if (addr_type2 == IPV6_ADDR_ANY &&
+ if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
+ return 1;
+
+ if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
!(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
return 1;
- if (addr_type == IPV6_ADDR_ANY &&
+ if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
!(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
return 1;
@@ -235,11 +251,13 @@ static inline int compute_score2(struct sock *sk, struct net *net,
static struct sock *udp6_lib_lookup2(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, unsigned int hnum, int dif,
- struct udp_hslot *hslot2, unsigned int slot2)
+ struct udp_hslot *hslot2, unsigned int slot2,
+ struct sk_buff *skb)
{
struct sock *sk, *result;
struct hlist_nulls_node *node;
int score, badness, matches = 0, reuseport = 0;
+ bool select_ok = true;
u32 hash = 0;
begin:
@@ -255,6 +273,17 @@ begin:
if (reuseport) {
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
+ if (select_ok) {
+ struct sock *sk2;
+
+ sk2 = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ if (sk2) {
+ result = sk2;
+ select_ok = false;
+ goto found;
+ }
+ }
matches = 1;
}
} else if (score == badness && reuseport) {
@@ -273,6 +302,7 @@ begin:
goto begin;
if (result) {
+found:
if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
result = NULL;
else if (unlikely(compute_score2(result, net, saddr, sport,
@@ -287,7 +317,8 @@ begin:
struct sock *__udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
- int dif, struct udp_table *udptable)
+ int dif, struct udp_table *udptable,
+ struct sk_buff *skb)
{
struct sock *sk, *result;
struct hlist_nulls_node *node;
@@ -295,6 +326,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
int score, badness, matches = 0, reuseport = 0;
+ bool select_ok = true;
u32 hash = 0;
rcu_read_lock();
@@ -307,7 +339,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
result = udp6_lib_lookup2(net, saddr, sport,
daddr, hnum, dif,
- hslot2, slot2);
+ hslot2, slot2, skb);
if (!result) {
hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
slot2 = hash2 & udptable->mask;
@@ -317,7 +349,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
result = udp6_lib_lookup2(net, saddr, sport,
&in6addr_any, hnum, dif,
- hslot2, slot2);
+ hslot2, slot2, skb);
}
rcu_read_unlock();
return result;
@@ -334,6 +366,17 @@ begin:
if (reuseport) {
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
+ if (select_ok) {
+ struct sock *sk2;
+
+ sk2 = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ if (sk2) {
+ result = sk2;
+ select_ok = false;
+ goto found;
+ }
+ }
matches = 1;
}
} else if (score == badness && reuseport) {
@@ -352,6 +395,7 @@ begin:
goto begin;
if (result) {
+found:
if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
result = NULL;
else if (unlikely(compute_score(result, net, hnum, saddr, sport,
@@ -377,13 +421,13 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
return sk;
return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport,
&iph->daddr, dport, inet6_iif(skb),
- udptable);
+ udptable, skb);
}
struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport, int dif)
{
- return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table);
+ return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table, NULL);
}
EXPORT_SYMBOL_GPL(udp6_lib_lookup);
@@ -402,6 +446,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int peeked, off = 0;
int err;
int is_udplite = IS_UDPLITE(sk);
+ bool checksum_valid = false;
int is_udp4;
bool slow;
@@ -433,11 +478,12 @@ try_again:
*/
if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
- if (udp_lib_checksum_complete(skb))
+ checksum_valid = !udp_lib_checksum_complete(skb);
+ if (!checksum_valid)
goto csum_copy_err;
}
- if (skb_csum_unnecessary(skb))
+ if (checksum_valid || skb_csum_unnecessary(skb))
err = skb_copy_datagram_msg(skb, sizeof(struct udphdr),
msg, copied);
else {
@@ -547,8 +593,8 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
int err;
struct net *net = dev_net(skb->dev);
- sk = __udp6_lib_lookup(net, daddr, uh->dest,
- saddr, uh->source, inet6_iif(skb), udptable);
+ sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
+ inet6_iif(skb), udptable, skb);
if (!sk) {
ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
ICMP6_MIB_INERRORS);
@@ -916,11 +962,9 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
ret = udpv6_queue_rcv_skb(sk, skb);
sock_put(sk);
- /* a return value > 0 means to resubmit the input, but
- * it wants the return to be -protocol, or 0
- */
+ /* a return value > 0 means to resubmit the input */
if (ret > 0)
- return -ret;
+ return ret;
return 0;
}
@@ -1110,6 +1154,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
struct in6_addr *daddr, *final_p, final;
struct ipv6_txoptions *opt = NULL;
+ struct ipv6_txoptions *opt_to_free = NULL;
struct ip6_flowlabel *flowlabel = NULL;
struct flowi6 fl6;
struct dst_entry *dst;
@@ -1263,8 +1308,10 @@ do_udp_sendmsg:
opt = NULL;
connected = 0;
}
- if (!opt)
- opt = np->opt;
+ if (!opt) {
+ opt = txopt_get(np);
+ opt_to_free = opt;
+ }
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
@@ -1373,6 +1420,7 @@ release_dst:
out:
dst_release(dst);
fl6_sock_release(flowlabel);
+ txopt_put(opt_to_free);
if (!err)
return len;
/*
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index f7fbdbabe50e..372855eeaf42 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -23,7 +23,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
struct ipv6hdr *inner_iph = ipipv6_hdr(skb);
if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
- IP6_ECN_set_ce(inner_iph);
+ IP6_ECN_set_ce(skb, inner_iph);
}
/* Add encapsulation header.
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 5643423fe67a..c074771a10f7 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -279,7 +279,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
xfrm_dst_ifdown(dst, dev);
}
-static struct dst_ops xfrm6_dst_ops = {
+static struct dst_ops xfrm6_dst_ops_template = {
.family = AF_INET6,
.gc = xfrm6_garbage_collect,
.update_pmtu = xfrm6_update_pmtu,
@@ -293,7 +293,7 @@ static struct dst_ops xfrm6_dst_ops = {
static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
.family = AF_INET6,
- .dst_ops = &xfrm6_dst_ops,
+ .dst_ops = &xfrm6_dst_ops_template,
.dst_lookup = xfrm6_dst_lookup,
.get_saddr = xfrm6_get_saddr,
.decode_session = _decode_session6,
@@ -325,7 +325,7 @@ static struct ctl_table xfrm6_policy_table[] = {
{ }
};
-static int __net_init xfrm6_net_init(struct net *net)
+static int __net_init xfrm6_net_sysctl_init(struct net *net)
{
struct ctl_table *table;
struct ctl_table_header *hdr;
@@ -353,7 +353,7 @@ err_alloc:
return -ENOMEM;
}
-static void __net_exit xfrm6_net_exit(struct net *net)
+static void __net_exit xfrm6_net_sysctl_exit(struct net *net)
{
struct ctl_table *table;
@@ -365,24 +365,52 @@ static void __net_exit xfrm6_net_exit(struct net *net)
if (!net_eq(net, &init_net))
kfree(table);
}
+#else /* CONFIG_SYSCTL */
+static int inline xfrm6_net_sysctl_init(struct net *net)
+{
+ return 0;
+}
+
+static void inline xfrm6_net_sysctl_exit(struct net *net)
+{
+}
+#endif
+
+static int __net_init xfrm6_net_init(struct net *net)
+{
+ int ret;
+
+ memcpy(&net->xfrm.xfrm6_dst_ops, &xfrm6_dst_ops_template,
+ sizeof(xfrm6_dst_ops_template));
+ ret = dst_entries_init(&net->xfrm.xfrm6_dst_ops);
+ if (ret)
+ return ret;
+
+ ret = xfrm6_net_sysctl_init(net);
+ if (ret)
+ dst_entries_destroy(&net->xfrm.xfrm6_dst_ops);
+
+ return ret;
+}
+
+static void __net_exit xfrm6_net_exit(struct net *net)
+{
+ xfrm6_net_sysctl_exit(net);
+ dst_entries_destroy(&net->xfrm.xfrm6_dst_ops);
+}
static struct pernet_operations xfrm6_net_ops = {
.init = xfrm6_net_init,
.exit = xfrm6_net_exit,
};
-#endif
int __init xfrm6_init(void)
{
int ret;
- dst_entries_init(&xfrm6_dst_ops);
-
ret = xfrm6_policy_init();
- if (ret) {
- dst_entries_destroy(&xfrm6_dst_ops);
+ if (ret)
goto out;
- }
ret = xfrm6_state_init();
if (ret)
goto out_policy;
@@ -391,9 +419,7 @@ int __init xfrm6_init(void)
if (ret)
goto out_state;
-#ifdef CONFIG_SYSCTL
register_pernet_subsys(&xfrm6_net_ops);
-#endif
out:
return ret;
out_state:
@@ -405,11 +431,8 @@ out_policy:
void xfrm6_fini(void)
{
-#ifdef CONFIG_SYSCTL
unregister_pernet_subsys(&xfrm6_net_ops);
-#endif
xfrm6_protocol_fini();
xfrm6_policy_fini();
xfrm6_state_fini();
- dst_entries_destroy(&xfrm6_dst_ops);
}
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index e6aa48b5395c..923abd6b3064 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1086,6 +1086,9 @@ static int irda_create(struct net *net, struct socket *sock, int protocol,
struct sock *sk;
struct irda_sock *self;
+ if (protocol < 0 || protocol > SK_PROTOCOL_MAX)
+ return -EINVAL;
+
if (net != &init_net)
return -EAFNOSUPPORT;
diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c
index 3c4caa60c926..5728e76ca6d5 100644
--- a/net/irda/ircomm/ircomm_param.c
+++ b/net/irda/ircomm/ircomm_param.c
@@ -134,11 +134,10 @@ int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush)
return -1;
}
skb_put(skb, count);
+ pr_debug("%s(), skb->len=%d\n", __func__, skb->len);
spin_unlock_irqrestore(&self->spinlock, flags);
- pr_debug("%s(), skb->len=%d\n", __func__ , skb->len);
-
if (flush) {
/* ircomm_tty_do_softint will take care of the rest */
schedule_work(&self->tqueue);
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index a4237707f79d..da126ee6d218 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -287,14 +287,14 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
if (filp->f_flags & O_NONBLOCK) {
/* nonblock mode is set */
- if (tty->termios.c_cflag & CBAUD)
+ if (C_BAUD(tty))
tty_port_raise_dtr_rts(port);
port->flags |= ASYNC_NORMAL_ACTIVE;
pr_debug("%s(), O_NONBLOCK requested!\n", __func__);
return 0;
}
- if (tty->termios.c_cflag & CLOCAL) {
+ if (C_CLOCAL(tty)) {
pr_debug("%s(), doing CLOCAL!\n", __func__);
do_clocal = 1;
}
@@ -806,7 +806,7 @@ static void ircomm_tty_throttle(struct tty_struct *tty)
ircomm_tty_send_xchar(tty, STOP_CHAR(tty));
/* Hardware flow control? */
- if (tty->termios.c_cflag & CRTSCTS) {
+ if (C_CRTSCTS(tty)) {
self->settings.dte &= ~IRCOMM_RTS;
self->settings.dte |= IRCOMM_DELTA_RTS;
@@ -831,12 +831,11 @@ static void ircomm_tty_unthrottle(struct tty_struct *tty)
IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
/* Using software flow control? */
- if (I_IXOFF(tty)) {
+ if (I_IXOFF(tty))
ircomm_tty_send_xchar(tty, START_CHAR(tty));
- }
/* Using hardware flow control? */
- if (tty->termios.c_cflag & CRTSCTS) {
+ if (C_CRTSCTS(tty)) {
self->settings.dte |= (IRCOMM_RTS|IRCOMM_DELTA_RTS);
ircomm_param_request(self, IRCOMM_DTE, TRUE);
@@ -1268,10 +1267,6 @@ static void ircomm_tty_line_info(struct ircomm_tty_cb *self, struct seq_file *m)
seq_printf(m, "%cASYNC_LOW_LATENCY", sep);
sep = '|';
}
- if (self->port.flags & ASYNC_CLOSING) {
- seq_printf(m, "%cASYNC_CLOSING", sep);
- sep = '|';
- }
if (self->port.flags & ASYNC_NORMAL_ACTIVE) {
seq_printf(m, "%cASYNC_NORMAL_ACTIVE", sep);
sep = '|';
diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c
index 75ccdbd0728e..d3687aaa23de 100644
--- a/net/irda/ircomm/ircomm_tty_ioctl.c
+++ b/net/irda/ircomm/ircomm_tty_ioctl.c
@@ -158,26 +158,21 @@ void ircomm_tty_set_termios(struct tty_struct *tty,
ircomm_tty_change_speed(self, tty);
/* Handle transition to B0 status */
- if ((old_termios->c_cflag & CBAUD) &&
- !(cflag & CBAUD)) {
+ if ((old_termios->c_cflag & CBAUD) && !(cflag & CBAUD)) {
self->settings.dte &= ~(IRCOMM_DTR|IRCOMM_RTS);
ircomm_param_request(self, IRCOMM_DTE, TRUE);
}
/* Handle transition away from B0 status */
- if (!(old_termios->c_cflag & CBAUD) &&
- (cflag & CBAUD)) {
+ if (!(old_termios->c_cflag & CBAUD) && (cflag & CBAUD)) {
self->settings.dte |= IRCOMM_DTR;
- if (!(tty->termios.c_cflag & CRTSCTS) ||
- !test_bit(TTY_THROTTLED, &tty->flags)) {
+ if (!C_CRTSCTS(tty) || !test_bit(TTY_THROTTLED, &tty->flags))
self->settings.dte |= IRCOMM_RTS;
- }
ircomm_param_request(self, IRCOMM_DTE, TRUE);
}
/* Handle turning off CRTSCTS */
- if ((old_termios->c_cflag & CRTSCTS) &&
- !(tty->termios.c_cflag & CRTSCTS))
+ if ((old_termios->c_cflag & CRTSCTS) && !C_CRTSCTS(tty))
{
tty->hw_stopped = 0;
ircomm_tty_start(tty);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index fcb2752419c6..fc3598a922b0 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -303,7 +303,7 @@ static void iucv_sock_wake_msglim(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
- if (wq_has_sleeper(wq))
+ if (skwq_has_sleeper(wq))
wake_up_interruptible_all(&wq->wait);
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
rcu_read_unlock();
@@ -708,6 +708,9 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
if (!addr || addr->sa_family != AF_IUCV)
return -EINVAL;
+ if (addr_len < sizeof(struct sockaddr_iucv))
+ return -EINVAL;
+
lock_sock(sk);
if (sk->sk_state != IUCV_OPEN) {
err = -EBADFD;
@@ -1031,7 +1034,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
struct sock *sk = sock->sk;
struct iucv_sock *iucv = iucv_sk(sk);
struct sk_buff *skb;
- struct iucv_message txmsg;
+ struct iucv_message txmsg = {0};
struct cmsghdr *cmsg;
int cmsg_done;
long timeo;
@@ -1483,7 +1486,7 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
if (sock_writeable(sk) && iucv_below_msglim(sk))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
return mask;
}
@@ -2084,11 +2087,7 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb)
return NET_RX_SUCCESS;
}
- /* write stuff from iucv_msg to skb cb */
- if (skb->len < sizeof(struct af_iucv_trans_hdr)) {
- kfree_skb(skb);
- return NET_RX_SUCCESS;
- }
+ /* write stuff from iucv_msg to skb cb */
skb_pull(skb, sizeof(struct af_iucv_trans_hdr));
skb_reset_transport_header(skb);
skb_reset_network_header(skb);
@@ -2119,6 +2118,20 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
char nullstring[8];
int err = 0;
+ if (skb->len < (ETH_HLEN + sizeof(struct af_iucv_trans_hdr))) {
+ WARN_ONCE(1, "AF_IUCV too short skb, len=%d, min=%d",
+ (int)skb->len,
+ (int)(ETH_HLEN + sizeof(struct af_iucv_trans_hdr)));
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+ }
+ if (skb_headlen(skb) < (ETH_HLEN + sizeof(struct af_iucv_trans_hdr)))
+ if (skb_linearize(skb)) {
+ WARN_ONCE(1, "AF_IUCV skb_linearize failed, len=%d",
+ (int)skb->len);
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+ }
skb_pull(skb, ETH_HLEN);
trans_hdr = (struct af_iucv_trans_hdr *)skb->data;
EBCASC(trans_hdr->destAppName, sizeof(trans_hdr->destAppName));
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index aca38d8aed8e..a2c8747d2936 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -486,6 +486,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name);
struct in6_addr *daddr, *final_p, final;
struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt_to_free = NULL;
struct ipv6_txoptions *opt = NULL;
struct ip6_flowlabel *flowlabel = NULL;
struct dst_entry *dst = NULL;
@@ -575,8 +576,10 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
opt = NULL;
}
- if (opt == NULL)
- opt = np->opt;
+ if (!opt) {
+ opt = txopt_get(np);
+ opt_to_free = opt;
+ }
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
@@ -631,6 +634,7 @@ done:
dst_release(dst);
out:
fl6_sock_release(flowlabel);
+ txopt_put(opt_to_free);
return err < 0 ? err : len;
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index f93c5be612a7..2caaa84ce92d 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -124,8 +124,13 @@ static int l2tp_tunnel_notify(struct genl_family *family,
ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq,
NLM_F_ACK, tunnel, cmd);
- if (ret >= 0)
- return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+ if (ret >= 0) {
+ ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+ /* We don't care if no one is listening */
+ if (ret == -ESRCH)
+ ret = 0;
+ return ret;
+ }
nlmsg_free(msg);
@@ -147,8 +152,13 @@ static int l2tp_session_notify(struct genl_family *family,
ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
NLM_F_ACK, session, cmd);
- if (ret >= 0)
- return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+ if (ret >= 0) {
+ ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+ /* We don't care if no one is listening */
+ if (ret == -ESRCH)
+ ret = 0;
+ return ret;
+ }
nlmsg_free(msg);
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 1ad18c55064c..652c250b9a3b 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -230,26 +230,11 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
if (sk->sk_state & PPPOX_BOUND) {
struct pppox_sock *po;
+
l2tp_dbg(session, PPPOL2TP_MSG_DATA,
"%s: recv %d byte data frame, passing to ppp\n",
session->name, data_len);
- /* We need to forget all info related to the L2TP packet
- * gathered in the skb as we are going to reuse the same
- * skb for the inner packet.
- * Namely we need to:
- * - reset xfrm (IPSec) information as it applies to
- * the outer L2TP packet and not to the inner one
- * - release the dst to force a route lookup on the inner
- * IP packet since skb->dst currently points to the dst
- * of the UDP tunnel
- * - reset netfilter information as it doesn't apply
- * to the inner packet either
- */
- secpath_reset(skb);
- skb_dst_drop(skb);
- nf_reset(skb);
-
po = pppox_sk(sk);
ppp_input(&po->chan, skb);
} else {
@@ -1862,5 +1847,5 @@ MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
MODULE_DESCRIPTION("PPP over L2TP over UDP");
MODULE_LICENSE("GPL");
MODULE_VERSION(PPPOL2TP_DRV_VERSION);
-MODULE_ALIAS("pppox-proto-" __stringify(PX_PROTO_OL2TP));
+MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_OL2TP);
MODULE_ALIAS_L2TP_PWTYPE(11);
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 10ad4ac1fa0b..367784be5df2 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -291,7 +291,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
}
/* prepare A-MPDU MLME for Rx aggregation */
- tid_agg_rx = kmalloc(sizeof(struct tid_ampdu_rx), GFP_KERNEL);
+ tid_agg_rx = kzalloc(sizeof(*tid_agg_rx), GFP_KERNEL);
if (!tid_agg_rx)
goto end;
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index a758eb84e8f0..ff757181b0a8 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -500,7 +500,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
/* send AddBA request */
ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
tid_tx->dialog_token, start_seq_num,
- local->hw.max_tx_aggregation_subframes,
+ IEEE80211_MAX_AMPDU_BUF,
tid_tx->timeout);
}
@@ -926,6 +926,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
amsdu = capab & IEEE80211_ADDBA_PARAM_AMSDU_MASK;
tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
+ buf_size = min(buf_size, local->hw.max_tx_aggregation_subframes);
mutex_lock(&sta->ampdu_mlme.mtx);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index c2bd1b6a6922..166a29fe6c35 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1169,8 +1169,7 @@ static int sta_apply_parameters(struct ieee80211_local *local,
* rc isn't initialized here yet, so ignore it
*/
__ieee80211_vht_handle_opmode(sdata, sta,
- params->opmode_notif,
- band, false);
+ params->opmode_notif, band);
}
if (ieee80211_vif_is_mesh(&sdata->vif))
@@ -1216,16 +1215,6 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
if (!sta)
return -ENOMEM;
- /*
- * defaults -- if userspace wants something else we'll
- * change it accordingly in sta_apply_parameters()
- */
- if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) &&
- !(params->sta_flags_set & (BIT(NL80211_STA_FLAG_AUTHENTICATED) |
- BIT(NL80211_STA_FLAG_ASSOCIATED)))) {
- sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
- sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
- }
if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
sta->sta.tdls = true;
@@ -1994,6 +1983,11 @@ static int ieee80211_scan(struct wiphy *wiphy,
return ieee80211_request_scan(sdata, req);
}
+static void ieee80211_abort_scan(struct wiphy *wiphy, struct wireless_dev *wdev)
+{
+ ieee80211_scan_cancel(wiphy_priv(wiphy));
+}
+
static int
ieee80211_sched_scan_start(struct wiphy *wiphy,
struct net_device *dev,
@@ -2509,294 +2503,6 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
return 0;
}
-static bool ieee80211_coalesce_started_roc(struct ieee80211_local *local,
- struct ieee80211_roc_work *new_roc,
- struct ieee80211_roc_work *cur_roc)
-{
- unsigned long now = jiffies;
- unsigned long remaining = cur_roc->hw_start_time +
- msecs_to_jiffies(cur_roc->duration) -
- now;
-
- if (WARN_ON(!cur_roc->started || !cur_roc->hw_begun))
- return false;
-
- /* if it doesn't fit entirely, schedule a new one */
- if (new_roc->duration > jiffies_to_msecs(remaining))
- return false;
-
- ieee80211_handle_roc_started(new_roc);
-
- /* add to dependents so we send the expired event properly */
- list_add_tail(&new_roc->list, &cur_roc->dependents);
- return true;
-}
-
-static u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local)
-{
- lockdep_assert_held(&local->mtx);
-
- local->roc_cookie_counter++;
-
- /* wow, you wrapped 64 bits ... more likely a bug */
- if (WARN_ON(local->roc_cookie_counter == 0))
- local->roc_cookie_counter++;
-
- return local->roc_cookie_counter;
-}
-
-static int ieee80211_start_roc_work(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct ieee80211_channel *channel,
- unsigned int duration, u64 *cookie,
- struct sk_buff *txskb,
- enum ieee80211_roc_type type)
-{
- struct ieee80211_roc_work *roc, *tmp;
- bool queued = false;
- int ret;
-
- lockdep_assert_held(&local->mtx);
-
- if (local->use_chanctx && !local->ops->remain_on_channel)
- return -EOPNOTSUPP;
-
- roc = kzalloc(sizeof(*roc), GFP_KERNEL);
- if (!roc)
- return -ENOMEM;
-
- /*
- * If the duration is zero, then the driver
- * wouldn't actually do anything. Set it to
- * 10 for now.
- *
- * TODO: cancel the off-channel operation
- * when we get the SKB's TX status and
- * the wait time was zero before.
- */
- if (!duration)
- duration = 10;
-
- roc->chan = channel;
- roc->duration = duration;
- roc->req_duration = duration;
- roc->frame = txskb;
- roc->type = type;
- roc->sdata = sdata;
- INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work);
- INIT_LIST_HEAD(&roc->dependents);
-
- /*
- * cookie is either the roc cookie (for normal roc)
- * or the SKB (for mgmt TX)
- */
- if (!txskb) {
- roc->cookie = ieee80211_mgmt_tx_cookie(local);
- *cookie = roc->cookie;
- } else {
- roc->mgmt_tx_cookie = *cookie;
- }
-
- /* if there's one pending or we're scanning, queue this one */
- if (!list_empty(&local->roc_list) ||
- local->scanning || ieee80211_is_radar_required(local))
- goto out_check_combine;
-
- /* if not HW assist, just queue & schedule work */
- if (!local->ops->remain_on_channel) {
- ieee80211_queue_delayed_work(&local->hw, &roc->work, 0);
- goto out_queue;
- }
-
- /* otherwise actually kick it off here (for error handling) */
-
- ret = drv_remain_on_channel(local, sdata, channel, duration, type);
- if (ret) {
- kfree(roc);
- return ret;
- }
-
- roc->started = true;
- goto out_queue;
-
- out_check_combine:
- list_for_each_entry(tmp, &local->roc_list, list) {
- if (tmp->chan != channel || tmp->sdata != sdata)
- continue;
-
- /*
- * Extend this ROC if possible:
- *
- * If it hasn't started yet, just increase the duration
- * and add the new one to the list of dependents.
- * If the type of the new ROC has higher priority, modify the
- * type of the previous one to match that of the new one.
- */
- if (!tmp->started) {
- list_add_tail(&roc->list, &tmp->dependents);
- tmp->duration = max(tmp->duration, roc->duration);
- tmp->type = max(tmp->type, roc->type);
- queued = true;
- break;
- }
-
- /* If it has already started, it's more difficult ... */
- if (local->ops->remain_on_channel) {
- /*
- * In the offloaded ROC case, if it hasn't begun, add
- * this new one to the dependent list to be handled
- * when the master one begins. If it has begun,
- * check if it fits entirely within the existing one,
- * in which case it will just be dependent as well.
- * Otherwise, schedule it by itself.
- */
- if (!tmp->hw_begun) {
- list_add_tail(&roc->list, &tmp->dependents);
- queued = true;
- break;
- }
-
- if (ieee80211_coalesce_started_roc(local, roc, tmp))
- queued = true;
- } else if (del_timer_sync(&tmp->work.timer)) {
- unsigned long new_end;
-
- /*
- * In the software ROC case, cancel the timer, if
- * that fails then the finish work is already
- * queued/pending and thus we queue the new ROC
- * normally, if that succeeds then we can extend
- * the timer duration and TX the frame (if any.)
- */
-
- list_add_tail(&roc->list, &tmp->dependents);
- queued = true;
-
- new_end = jiffies + msecs_to_jiffies(roc->duration);
-
- /* ok, it was started & we canceled timer */
- if (time_after(new_end, tmp->work.timer.expires))
- mod_timer(&tmp->work.timer, new_end);
- else
- add_timer(&tmp->work.timer);
-
- ieee80211_handle_roc_started(roc);
- }
- break;
- }
-
- out_queue:
- if (!queued)
- list_add_tail(&roc->list, &local->roc_list);
-
- return 0;
-}
-
-static int ieee80211_remain_on_channel(struct wiphy *wiphy,
- struct wireless_dev *wdev,
- struct ieee80211_channel *chan,
- unsigned int duration,
- u64 *cookie)
-{
- struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
- struct ieee80211_local *local = sdata->local;
- int ret;
-
- mutex_lock(&local->mtx);
- ret = ieee80211_start_roc_work(local, sdata, chan,
- duration, cookie, NULL,
- IEEE80211_ROC_TYPE_NORMAL);
- mutex_unlock(&local->mtx);
-
- return ret;
-}
-
-static int ieee80211_cancel_roc(struct ieee80211_local *local,
- u64 cookie, bool mgmt_tx)
-{
- struct ieee80211_roc_work *roc, *tmp, *found = NULL;
- int ret;
-
- mutex_lock(&local->mtx);
- list_for_each_entry_safe(roc, tmp, &local->roc_list, list) {
- struct ieee80211_roc_work *dep, *tmp2;
-
- list_for_each_entry_safe(dep, tmp2, &roc->dependents, list) {
- if (!mgmt_tx && dep->cookie != cookie)
- continue;
- else if (mgmt_tx && dep->mgmt_tx_cookie != cookie)
- continue;
- /* found dependent item -- just remove it */
- list_del(&dep->list);
- mutex_unlock(&local->mtx);
-
- ieee80211_roc_notify_destroy(dep, true);
- return 0;
- }
-
- if (!mgmt_tx && roc->cookie != cookie)
- continue;
- else if (mgmt_tx && roc->mgmt_tx_cookie != cookie)
- continue;
-
- found = roc;
- break;
- }
-
- if (!found) {
- mutex_unlock(&local->mtx);
- return -ENOENT;
- }
-
- /*
- * We found the item to cancel, so do that. Note that it
- * may have dependents, which we also cancel (and send
- * the expired signal for.) Not doing so would be quite
- * tricky here, but we may need to fix it later.
- */
-
- if (local->ops->remain_on_channel) {
- if (found->started) {
- ret = drv_cancel_remain_on_channel(local);
- if (WARN_ON_ONCE(ret)) {
- mutex_unlock(&local->mtx);
- return ret;
- }
- }
-
- list_del(&found->list);
-
- if (found->started)
- ieee80211_start_next_roc(local);
- mutex_unlock(&local->mtx);
-
- ieee80211_roc_notify_destroy(found, true);
- } else {
- /* work may be pending so use it all the time */
- found->abort = true;
- ieee80211_queue_delayed_work(&local->hw, &found->work, 0);
-
- mutex_unlock(&local->mtx);
-
- /* work will clean up etc */
- flush_delayed_work(&found->work);
- WARN_ON(!found->to_be_freed);
- kfree(found);
- }
-
- return 0;
-}
-
-static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy,
- struct wireless_dev *wdev,
- u64 cookie)
-{
- struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
- struct ieee80211_local *local = sdata->local;
-
- return ieee80211_cancel_roc(local, cookie, false);
-}
-
static int ieee80211_start_radar_detection(struct wiphy *wiphy,
struct net_device *dev,
struct cfg80211_chan_def *chandef,
@@ -3267,9 +2973,21 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
return err;
}
-static struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local,
- struct sk_buff *skb, u64 *cookie,
- gfp_t gfp)
+u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local)
+{
+ lockdep_assert_held(&local->mtx);
+
+ local->roc_cookie_counter++;
+
+ /* wow, you wrapped 64 bits ... more likely a bug */
+ if (WARN_ON(local->roc_cookie_counter == 0))
+ local->roc_cookie_counter++;
+
+ return local->roc_cookie_counter;
+}
+
+int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb,
+ u64 *cookie, gfp_t gfp)
{
unsigned long spin_flags;
struct sk_buff *ack_skb;
@@ -3277,7 +2995,7 @@ static struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local,
ack_skb = skb_copy(skb, gfp);
if (!ack_skb)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
spin_lock_irqsave(&local->ack_status_lock, spin_flags);
id = idr_alloc(&local->ack_status_frames, ack_skb,
@@ -3286,7 +3004,7 @@ static struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local,
if (id < 0) {
kfree_skb(ack_skb);
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
}
IEEE80211_SKB_CB(skb)->ack_frame_id = id;
@@ -3294,200 +3012,7 @@ static struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local,
*cookie = ieee80211_mgmt_tx_cookie(local);
IEEE80211_SKB_CB(ack_skb)->ack.cookie = *cookie;
- return ack_skb;
-}
-
-static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
- struct cfg80211_mgmt_tx_params *params,
- u64 *cookie)
-{
- struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
- struct ieee80211_local *local = sdata->local;
- struct sk_buff *skb, *ack_skb;
- struct sta_info *sta;
- const struct ieee80211_mgmt *mgmt = (void *)params->buf;
- bool need_offchan = false;
- u32 flags;
- int ret;
- u8 *data;
-
- if (params->dont_wait_for_ack)
- flags = IEEE80211_TX_CTL_NO_ACK;
- else
- flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX |
- IEEE80211_TX_CTL_REQ_TX_STATUS;
-
- if (params->no_cck)
- flags |= IEEE80211_TX_CTL_NO_CCK_RATE;
-
- switch (sdata->vif.type) {
- case NL80211_IFTYPE_ADHOC:
- if (!sdata->vif.bss_conf.ibss_joined)
- need_offchan = true;
- /* fall through */
-#ifdef CONFIG_MAC80211_MESH
- case NL80211_IFTYPE_MESH_POINT:
- if (ieee80211_vif_is_mesh(&sdata->vif) &&
- !sdata->u.mesh.mesh_id_len)
- need_offchan = true;
- /* fall through */
-#endif
- case NL80211_IFTYPE_AP:
- case NL80211_IFTYPE_AP_VLAN:
- case NL80211_IFTYPE_P2P_GO:
- if (sdata->vif.type != NL80211_IFTYPE_ADHOC &&
- !ieee80211_vif_is_mesh(&sdata->vif) &&
- !rcu_access_pointer(sdata->bss->beacon))
- need_offchan = true;
- if (!ieee80211_is_action(mgmt->frame_control) ||
- mgmt->u.action.category == WLAN_CATEGORY_PUBLIC ||
- mgmt->u.action.category == WLAN_CATEGORY_SELF_PROTECTED ||
- mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT)
- break;
- rcu_read_lock();
- sta = sta_info_get(sdata, mgmt->da);
- rcu_read_unlock();
- if (!sta)
- return -ENOLINK;
- break;
- case NL80211_IFTYPE_STATION:
- case NL80211_IFTYPE_P2P_CLIENT:
- sdata_lock(sdata);
- if (!sdata->u.mgd.associated ||
- (params->offchan && params->wait &&
- local->ops->remain_on_channel &&
- memcmp(sdata->u.mgd.associated->bssid,
- mgmt->bssid, ETH_ALEN)))
- need_offchan = true;
- sdata_unlock(sdata);
- break;
- case NL80211_IFTYPE_P2P_DEVICE:
- need_offchan = true;
- break;
- default:
- return -EOPNOTSUPP;
- }
-
- /* configurations requiring offchan cannot work if no channel has been
- * specified
- */
- if (need_offchan && !params->chan)
- return -EINVAL;
-
- mutex_lock(&local->mtx);
-
- /* Check if the operating channel is the requested channel */
- if (!need_offchan) {
- struct ieee80211_chanctx_conf *chanctx_conf;
-
- rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
-
- if (chanctx_conf) {
- need_offchan = params->chan &&
- (params->chan !=
- chanctx_conf->def.chan);
- } else if (!params->chan) {
- ret = -EINVAL;
- rcu_read_unlock();
- goto out_unlock;
- } else {
- need_offchan = true;
- }
- rcu_read_unlock();
- }
-
- if (need_offchan && !params->offchan) {
- ret = -EBUSY;
- goto out_unlock;
- }
-
- skb = dev_alloc_skb(local->hw.extra_tx_headroom + params->len);
- if (!skb) {
- ret = -ENOMEM;
- goto out_unlock;
- }
- skb_reserve(skb, local->hw.extra_tx_headroom);
-
- data = skb_put(skb, params->len);
- memcpy(data, params->buf, params->len);
-
- /* Update CSA counters */
- if (sdata->vif.csa_active &&
- (sdata->vif.type == NL80211_IFTYPE_AP ||
- sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||
- sdata->vif.type == NL80211_IFTYPE_ADHOC) &&
- params->n_csa_offsets) {
- int i;
- struct beacon_data *beacon = NULL;
-
- rcu_read_lock();
-
- if (sdata->vif.type == NL80211_IFTYPE_AP)
- beacon = rcu_dereference(sdata->u.ap.beacon);
- else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
- beacon = rcu_dereference(sdata->u.ibss.presp);
- else if (ieee80211_vif_is_mesh(&sdata->vif))
- beacon = rcu_dereference(sdata->u.mesh.beacon);
-
- if (beacon)
- for (i = 0; i < params->n_csa_offsets; i++)
- data[params->csa_offsets[i]] =
- beacon->csa_current_counter;
-
- rcu_read_unlock();
- }
-
- IEEE80211_SKB_CB(skb)->flags = flags;
-
- skb->dev = sdata->dev;
-
- if (!params->dont_wait_for_ack) {
- /* make a copy to preserve the frame contents
- * in case of encryption.
- */
- ack_skb = ieee80211_make_ack_skb(local, skb, cookie,
- GFP_KERNEL);
- if (IS_ERR(ack_skb)) {
- ret = PTR_ERR(ack_skb);
- kfree_skb(skb);
- goto out_unlock;
- }
- } else {
- /* for cookie below */
- ack_skb = skb;
- }
-
- if (!need_offchan) {
- ieee80211_tx_skb(sdata, skb);
- ret = 0;
- goto out_unlock;
- }
-
- IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN |
- IEEE80211_TX_INTFL_OFFCHAN_TX_OK;
- if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL))
- IEEE80211_SKB_CB(skb)->hw_queue =
- local->hw.offchannel_tx_hw_queue;
-
- /* This will handle all kinds of coalescing and immediate TX */
- ret = ieee80211_start_roc_work(local, sdata, params->chan,
- params->wait, cookie, skb,
- IEEE80211_ROC_TYPE_MGMT_TX);
- if (ret)
- kfree_skb(skb);
- out_unlock:
- mutex_unlock(&local->mtx);
- return ret;
-}
-
-static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy,
- struct wireless_dev *wdev,
- u64 cookie)
-{
- struct ieee80211_local *local = wiphy_priv(wiphy);
-
- return ieee80211_cancel_roc(local, cookie, true);
+ return 0;
}
static void ieee80211_mgmt_frame_register(struct wiphy *wiphy,
@@ -3565,7 +3090,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
struct ieee80211_qos_hdr *nullfunc;
- struct sk_buff *skb, *ack_skb;
+ struct sk_buff *skb;
int size = sizeof(*nullfunc);
__le16 fc;
bool qos;
@@ -3633,10 +3158,9 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
if (qos)
nullfunc->qos_ctrl = cpu_to_le16(7);
- ack_skb = ieee80211_make_ack_skb(local, skb, cookie, GFP_ATOMIC);
- if (IS_ERR(ack_skb)) {
+ ret = ieee80211_attach_ack_skb(local, skb, cookie, GFP_ATOMIC);
+ if (ret) {
kfree_skb(skb);
- ret = PTR_ERR(ack_skb);
goto unlock;
}
@@ -3838,6 +3362,7 @@ const struct cfg80211_ops mac80211_config_ops = {
.suspend = ieee80211_suspend,
.resume = ieee80211_resume,
.scan = ieee80211_scan,
+ .abort_scan = ieee80211_abort_scan,
.sched_scan_start = ieee80211_sched_scan_start,
.sched_scan_stop = ieee80211_sched_scan_stop,
.auth = ieee80211_auth,
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 4d2aaebd4f97..3e24d0ddb51b 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -91,7 +91,7 @@ static const struct file_operations reset_ops = {
};
#endif
-static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = {
+static const char *hw_flag_names[] = {
#define FLAG(F) [IEEE80211_HW_##F] = #F
FLAG(HAS_RATE_CONTROL),
FLAG(RX_INCLUDES_FCS),
@@ -125,9 +125,7 @@ static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = {
FLAG(TDLS_WIDER_BW),
FLAG(SUPPORTS_AMSDU_IN_AMPDU),
FLAG(BEACON_TX_STATUS),
-
- /* keep last for the build bug below */
- (void *)0x1
+ FLAG(NEEDS_UNIQUE_STA_ADDR),
#undef FLAG
};
@@ -147,7 +145,7 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf,
/* fail compilation if somebody adds or removes
* a flag without updating the name array above
*/
- BUILD_BUG_ON(hw_flag_names[NUM_IEEE80211_HW_FLAGS] != (void *)0x1);
+ BUILD_BUG_ON(ARRAY_SIZE(hw_flag_names) != NUM_IEEE80211_HW_FLAGS);
for (i = 0; i < NUM_IEEE80211_HW_FLAGS; i++) {
if (test_bit(i, local->hw.flags))
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 337bb5d78003..978d3bc31df7 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -428,6 +428,7 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
chandef.width = sdata->u.ibss.chandef.width;
break;
case NL80211_CHAN_WIDTH_80:
+ case NL80211_CHAN_WIDTH_80P80:
case NL80211_CHAN_WIDTH_160:
chandef = sdata->u.ibss.chandef;
chandef.chan = cbss->channel;
@@ -1732,7 +1733,6 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
if (sdata->vif.type != NL80211_IFTYPE_ADHOC)
continue;
sdata->u.ibss.last_scan_completed = jiffies;
- ieee80211_queue_work(&local->hw, &sdata->work);
}
mutex_unlock(&local->iflist_mtx);
}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index d832bd59236b..f006f4a44c0e 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -92,7 +92,7 @@ struct ieee80211_fragment_entry {
u16 extra_len;
u16 last_frag;
u8 rx_queue;
- bool ccmp; /* Whether fragments were encrypted with CCMP */
+ bool check_sequential_pn; /* needed for CCMP/GCMP */
u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
};
@@ -325,19 +325,15 @@ struct mesh_preq_queue {
struct ieee80211_roc_work {
struct list_head list;
- struct list_head dependents;
-
- struct delayed_work work;
struct ieee80211_sub_if_data *sdata;
struct ieee80211_channel *chan;
bool started, abort, hw_begun, notified;
- bool to_be_freed;
bool on_channel;
- unsigned long hw_start_time;
+ unsigned long start_time;
u32 duration, req_duration;
struct sk_buff *frame;
@@ -1335,6 +1331,7 @@ struct ieee80211_local {
/*
* Remain-on-channel support
*/
+ struct delayed_work roc_work;
struct list_head roc_list;
struct work_struct hw_roc_start, hw_roc_done;
unsigned long hw_roc_start_time;
@@ -1483,6 +1480,10 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
void ieee80211_configure_filter(struct ieee80211_local *local);
u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata);
+u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local);
+int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb,
+ u64 *cookie, gfp_t gfp);
+
/* STA code */
void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata);
int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
@@ -1577,16 +1578,22 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_local *local);
void ieee80211_sched_scan_end(struct ieee80211_local *local);
void ieee80211_sched_scan_stopped_work(struct work_struct *work);
-/* off-channel helpers */
+/* off-channel/mgmt-tx */
void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local);
void ieee80211_offchannel_return(struct ieee80211_local *local);
void ieee80211_roc_setup(struct ieee80211_local *local);
void ieee80211_start_next_roc(struct ieee80211_local *local);
void ieee80211_roc_purge(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata);
-void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free);
-void ieee80211_sw_roc_work(struct work_struct *work);
-void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc);
+int ieee80211_remain_on_channel(struct wiphy *wiphy, struct wireless_dev *wdev,
+ struct ieee80211_channel *chan,
+ unsigned int duration, u64 *cookie);
+int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy,
+ struct wireless_dev *wdev, u64 cookie);
+int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
+ struct cfg80211_mgmt_tx_params *params, u64 *cookie);
+int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy,
+ struct wireless_dev *wdev, u64 cookie);
/* channel switch handling */
void ieee80211_csa_finalize_work(struct work_struct *work);
@@ -1709,10 +1716,10 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta);
void ieee80211_sta_set_rx_nss(struct sta_info *sta);
u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta, u8 opmode,
- enum ieee80211_band band, bool nss_only);
+ enum ieee80211_band band);
void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta, u8 opmode,
- enum ieee80211_band band, bool nss_only);
+ enum ieee80211_band band);
void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata,
struct ieee80211_sta_vht_cap *vht_cap);
void ieee80211_get_vht_mask_from_cap(__le16 vht_cap,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index d0dc1bfaeec2..c9e325d2e120 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -76,7 +76,8 @@ bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata,
bool update_bss)
{
- if (__ieee80211_recalc_txpower(sdata) || update_bss)
+ if (__ieee80211_recalc_txpower(sdata) ||
+ (update_bss && ieee80211_sdata_running(sdata)))
ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER);
}
@@ -1861,6 +1862,7 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata)
unregister_netdevice(sdata->dev);
} else {
cfg80211_unregister_wdev(&sdata->wdev);
+ ieee80211_teardown_sdata(sdata);
kfree(sdata);
}
}
@@ -1870,7 +1872,6 @@ void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata)
if (WARN_ON_ONCE(!test_bit(SDATA_STATE_RUNNING, &sdata->state)))
return;
ieee80211_do_stop(sdata, true);
- ieee80211_teardown_sdata(sdata);
}
void ieee80211_remove_interfaces(struct ieee80211_local *local)
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 44388d6a1d8e..5e5bc599da4c 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -4,6 +4,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
+ * Copyright 2015 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -320,7 +321,7 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
return;
if (new)
- list_add_tail(&new->list, &sdata->key_list);
+ list_add_tail_rcu(&new->list, &sdata->key_list);
WARN_ON(new && old && new->conf.keyidx != old->conf.keyidx);
@@ -368,7 +369,7 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
}
if (old)
- list_del(&old->list);
+ list_del_rcu(&old->list);
}
struct ieee80211_key *
@@ -592,8 +593,8 @@ static void ieee80211_key_destroy(struct ieee80211_key *key,
return;
/*
- * Synchronize so the TX path can no longer be using
- * this key before we free/remove it.
+ * Synchronize so the TX path and rcu key iterators
+ * can no longer be using this key before we free/remove it.
*/
synchronize_net();
@@ -744,6 +745,53 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw,
}
EXPORT_SYMBOL(ieee80211_iter_keys);
+static void
+_ieee80211_iter_keys_rcu(struct ieee80211_hw *hw,
+ struct ieee80211_sub_if_data *sdata,
+ void (*iter)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta,
+ struct ieee80211_key_conf *key,
+ void *data),
+ void *iter_data)
+{
+ struct ieee80211_key *key;
+
+ list_for_each_entry_rcu(key, &sdata->key_list, list) {
+ /* skip keys of station in removal process */
+ if (key->sta && key->sta->removed)
+ continue;
+ if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
+ continue;
+
+ iter(hw, &sdata->vif,
+ key->sta ? &key->sta->sta : NULL,
+ &key->conf, iter_data);
+ }
+}
+
+void ieee80211_iter_keys_rcu(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ void (*iter)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta,
+ struct ieee80211_key_conf *key,
+ void *data),
+ void *iter_data)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_sub_if_data *sdata;
+
+ if (vif) {
+ sdata = vif_to_sdata(vif);
+ _ieee80211_iter_keys_rcu(hw, sdata, iter, iter_data);
+ } else {
+ list_for_each_entry_rcu(sdata, &local->interfaces, list)
+ _ieee80211_iter_keys_rcu(hw, sdata, iter, iter_data);
+ }
+}
+EXPORT_SYMBOL(ieee80211_iter_keys_rcu);
+
static void ieee80211_free_keys_iface(struct ieee80211_sub_if_data *sdata,
struct list_head *keys)
{
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 858f6b1cb149..8190bf27ebff 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -248,6 +248,7 @@ static void ieee80211_restart_work(struct work_struct *work)
/* wait for scan work complete */
flush_workqueue(local->workqueue);
+ flush_work(&local->sched_scan_stopped_work);
WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
"%s called with hardware scan in progress\n", __func__);
@@ -256,6 +257,11 @@ static void ieee80211_restart_work(struct work_struct *work)
list_for_each_entry(sdata, &local->interfaces, list)
flush_delayed_work(&sdata->dec_tailroom_needed_wk);
ieee80211_scan_cancel(local);
+
+ /* make sure any new ROC will consider local->in_reconfig */
+ flush_delayed_work(&local->roc_work);
+ flush_work(&local->hw_roc_done);
+
ieee80211_reconfig(local);
rtnl_unlock();
}
@@ -1149,6 +1155,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
rtnl_unlock();
+ cancel_delayed_work_sync(&local->roc_work);
cancel_work_sync(&local->restart_work);
cancel_work_sync(&local->reconfig_filter);
cancel_work_sync(&local->tdls_chsw_work);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index fa28500f28fd..6f85b6ab8e51 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1370,17 +1370,6 @@ out:
sdata_unlock(sdata);
}
-void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local)
-{
- struct ieee80211_sub_if_data *sdata;
-
- rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list)
- if (ieee80211_vif_is_mesh(&sdata->vif) &&
- ieee80211_sdata_running(sdata))
- ieee80211_queue_work(&local->hw, &sdata->work);
- rcu_read_unlock();
-}
void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
{
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index a1596344c3ba..4a8019f79fb2 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -362,14 +362,10 @@ static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
return sdata->u.mesh.mesh_pp_id == IEEE80211_PATH_PROTOCOL_HWMP;
}
-void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local);
-
void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata);
void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata);
void ieee80211s_stop(void);
#else
-static inline void
-ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {}
static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
{ return false; }
static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index b890e225a8f1..dadf8dc6f1cf 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -779,10 +779,8 @@ void mesh_plink_broken(struct sta_info *sta)
static void mesh_path_node_reclaim(struct rcu_head *rp)
{
struct mpath_node *node = container_of(rp, struct mpath_node, rcu);
- struct ieee80211_sub_if_data *sdata = node->mpath->sdata;
del_timer_sync(&node->mpath->timer);
- atomic_dec(&sdata->u.mesh.mpaths);
kfree(node->mpath);
kfree(node);
}
@@ -790,8 +788,9 @@ static void mesh_path_node_reclaim(struct rcu_head *rp)
/* needs to be called with the corresponding hashwlock taken */
static void __mesh_path_del(struct mesh_table *tbl, struct mpath_node *node)
{
- struct mesh_path *mpath;
- mpath = node->mpath;
+ struct mesh_path *mpath = node->mpath;
+ struct ieee80211_sub_if_data *sdata = node->mpath->sdata;
+
spin_lock(&mpath->state_lock);
mpath->flags |= MESH_PATH_RESOLVING;
if (mpath->is_gate)
@@ -799,6 +798,7 @@ static void __mesh_path_del(struct mesh_table *tbl, struct mpath_node *node)
hlist_del_rcu(&node->list);
call_rcu(&node->rcu, mesh_path_node_reclaim);
spin_unlock(&mpath->state_lock);
+ atomic_dec(&sdata->u.mesh.mpaths);
atomic_dec(&tbl->entries);
}
@@ -968,8 +968,8 @@ int mesh_path_send_to_gates(struct mesh_path *mpath)
copy = true;
} else {
mpath_dbg(sdata,
- "Not forwarding %p (flags %#x)\n",
- gate->mpath, gate->mpath->flags);
+ "Not forwarding to %pM (flags %#x)\n",
+ gate->mpath->dst, gate->mpath->flags);
}
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b140cc6651f4..bfbb1acafdd1 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1379,21 +1379,26 @@ static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
*/
if (has_80211h_pwr &&
(!has_cisco_pwr || pwr_level_80211h <= pwr_level_cisco)) {
+ new_ap_level = pwr_level_80211h;
+
+ if (sdata->ap_power_level == new_ap_level)
+ return 0;
+
sdata_dbg(sdata,
"Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n",
pwr_level_80211h, chan_pwr, pwr_reduction_80211h,
sdata->u.mgd.bssid);
- new_ap_level = pwr_level_80211h;
} else { /* has_cisco_pwr is always true here. */
+ new_ap_level = pwr_level_cisco;
+
+ if (sdata->ap_power_level == new_ap_level)
+ return 0;
+
sdata_dbg(sdata,
"Limiting TX power to %d dBm as advertised by %pM\n",
pwr_level_cisco, sdata->u.mgd.bssid);
- new_ap_level = pwr_level_cisco;
}
- if (sdata->ap_power_level == new_ap_level)
- return 0;
-
sdata->ap_power_level = new_ap_level;
if (__ieee80211_recalc_txpower(sdata))
return BSS_CHANGED_TXPOWER;
@@ -1930,7 +1935,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
sdata->u.mgd.flags |= IEEE80211_STA_RESET_SIGNAL_AVE;
- if (sdata->vif.p2p) {
+ if (sdata->vif.p2p ||
+ sdata->vif.driver_flags & IEEE80211_VIF_GET_NOA_UPDATE) {
const struct cfg80211_bss_ies *ies;
rcu_read_lock();
@@ -3458,7 +3464,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
}
}
- if (sdata->vif.p2p) {
+ if (sdata->vif.p2p ||
+ sdata->vif.driver_flags & IEEE80211_VIF_GET_NOA_UPDATE) {
struct ieee80211_p2p_noa_attr noa = {};
int ret;
@@ -3575,7 +3582,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
if (sta && elems.opmode_notif)
ieee80211_vht_handle_opmode(sdata, sta, *elems.opmode_notif,
- rx_status->band, true);
+ rx_status->band);
mutex_unlock(&local->sta_mtx);
changed |= ieee80211_handle_pwr_constr(sdata, chan, mgmt,
@@ -3998,8 +4005,6 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
if (!ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR))
ieee80211_queue_work(&sdata->local->hw,
&sdata->u.mgd.monitor_work);
- /* and do all the other regular work too */
- ieee80211_queue_work(&sdata->local->hw, &sdata->work);
}
}
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 04401037140e..55a9c5b94ce1 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -187,11 +187,77 @@ void ieee80211_offchannel_return(struct ieee80211_local *local)
false);
}
-void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc)
+static void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc)
{
- if (roc->notified)
+ /* was never transmitted */
+ if (roc->frame) {
+ cfg80211_mgmt_tx_status(&roc->sdata->wdev, roc->mgmt_tx_cookie,
+ roc->frame->data, roc->frame->len,
+ false, GFP_KERNEL);
+ ieee80211_free_txskb(&roc->sdata->local->hw, roc->frame);
+ }
+
+ if (!roc->mgmt_tx_cookie)
+ cfg80211_remain_on_channel_expired(&roc->sdata->wdev,
+ roc->cookie, roc->chan,
+ GFP_KERNEL);
+
+ list_del(&roc->list);
+ kfree(roc);
+}
+
+static unsigned long ieee80211_end_finished_rocs(struct ieee80211_local *local,
+ unsigned long now)
+{
+ struct ieee80211_roc_work *roc, *tmp;
+ long remaining_dur_min = LONG_MAX;
+
+ lockdep_assert_held(&local->mtx);
+
+ list_for_each_entry_safe(roc, tmp, &local->roc_list, list) {
+ long remaining;
+
+ if (!roc->started)
+ break;
+
+ remaining = roc->start_time +
+ msecs_to_jiffies(roc->duration) -
+ now;
+
+ /* In case of HW ROC, it is possible that the HW finished the
+ * ROC session before the actual requested time. In such a case
+ * end the ROC session (disregarding the remaining time).
+ */
+ if (roc->abort || roc->hw_begun || remaining <= 0)
+ ieee80211_roc_notify_destroy(roc);
+ else
+ remaining_dur_min = min(remaining_dur_min, remaining);
+ }
+
+ return remaining_dur_min;
+}
+
+static bool ieee80211_recalc_sw_work(struct ieee80211_local *local,
+ unsigned long now)
+{
+ long dur = ieee80211_end_finished_rocs(local, now);
+
+ if (dur == LONG_MAX)
+ return false;
+
+ mod_delayed_work(local->workqueue, &local->roc_work, dur);
+ return true;
+}
+
+static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc,
+ unsigned long start_time)
+{
+ if (WARN_ON(roc->notified))
return;
+ roc->start_time = start_time;
+ roc->started = true;
+
if (roc->mgmt_tx_cookie) {
if (!WARN_ON(!roc->frame)) {
ieee80211_tx_skb_tid_band(roc->sdata, roc->frame, 7,
@@ -211,34 +277,18 @@ static void ieee80211_hw_roc_start(struct work_struct *work)
{
struct ieee80211_local *local =
container_of(work, struct ieee80211_local, hw_roc_start);
- struct ieee80211_roc_work *roc, *dep, *tmp;
+ struct ieee80211_roc_work *roc;
mutex_lock(&local->mtx);
- if (list_empty(&local->roc_list))
- goto out_unlock;
+ list_for_each_entry(roc, &local->roc_list, list) {
+ if (!roc->started)
+ break;
- roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work,
- list);
-
- if (!roc->started)
- goto out_unlock;
-
- roc->hw_begun = true;
- roc->hw_start_time = local->hw_roc_start_time;
-
- ieee80211_handle_roc_started(roc);
- list_for_each_entry_safe(dep, tmp, &roc->dependents, list) {
- ieee80211_handle_roc_started(dep);
-
- if (dep->duration > roc->duration) {
- u32 dur = dep->duration;
- dep->duration = dur - roc->duration;
- roc->duration = dur;
- list_move(&dep->list, &roc->list);
- }
+ roc->hw_begun = true;
+ ieee80211_handle_roc_started(roc, local->hw_roc_start_time);
}
- out_unlock:
+
mutex_unlock(&local->mtx);
}
@@ -254,34 +304,40 @@ void ieee80211_ready_on_channel(struct ieee80211_hw *hw)
}
EXPORT_SYMBOL_GPL(ieee80211_ready_on_channel);
-void ieee80211_start_next_roc(struct ieee80211_local *local)
+static void _ieee80211_start_next_roc(struct ieee80211_local *local)
{
- struct ieee80211_roc_work *roc;
+ struct ieee80211_roc_work *roc, *tmp;
+ enum ieee80211_roc_type type;
+ u32 min_dur, max_dur;
lockdep_assert_held(&local->mtx);
- if (list_empty(&local->roc_list)) {
- ieee80211_run_deferred_scan(local);
+ if (WARN_ON(list_empty(&local->roc_list)))
return;
- }
roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work,
list);
- if (WARN_ON_ONCE(roc->started))
+ if (WARN_ON(roc->started))
return;
- if (local->ops->remain_on_channel) {
- int ret, duration = roc->duration;
-
- /* XXX: duplicated, see ieee80211_start_roc_work() */
- if (!duration)
- duration = 10;
+ min_dur = roc->duration;
+ max_dur = roc->duration;
+ type = roc->type;
- ret = drv_remain_on_channel(local, roc->sdata, roc->chan,
- duration, roc->type);
+ list_for_each_entry(tmp, &local->roc_list, list) {
+ if (tmp == roc)
+ continue;
+ if (tmp->sdata != roc->sdata || tmp->chan != roc->chan)
+ break;
+ max_dur = max(tmp->duration, max_dur);
+ min_dur = min(tmp->duration, min_dur);
+ type = max(tmp->type, type);
+ }
- roc->started = true;
+ if (local->ops->remain_on_channel) {
+ int ret = drv_remain_on_channel(local, roc->sdata, roc->chan,
+ max_dur, type);
if (ret) {
wiphy_warn(local->hw.wiphy,
@@ -290,74 +346,24 @@ void ieee80211_start_next_roc(struct ieee80211_local *local)
* queue the work struct again to avoid recursion
* when multiple failures occur
*/
- ieee80211_remain_on_channel_expired(&local->hw);
+ list_for_each_entry(tmp, &local->roc_list, list) {
+ if (tmp->sdata != roc->sdata ||
+ tmp->chan != roc->chan)
+ break;
+ tmp->started = true;
+ tmp->abort = true;
+ }
+ ieee80211_queue_work(&local->hw, &local->hw_roc_done);
+ return;
}
- } else {
- /* delay it a bit */
- ieee80211_queue_delayed_work(&local->hw, &roc->work,
- round_jiffies_relative(HZ/2));
- }
-}
-
-void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free)
-{
- struct ieee80211_roc_work *dep, *tmp;
-
- if (WARN_ON(roc->to_be_freed))
- return;
-
- /* was never transmitted */
- if (roc->frame) {
- cfg80211_mgmt_tx_status(&roc->sdata->wdev,
- (unsigned long)roc->frame,
- roc->frame->data, roc->frame->len,
- false, GFP_KERNEL);
- kfree_skb(roc->frame);
- }
-
- if (!roc->mgmt_tx_cookie)
- cfg80211_remain_on_channel_expired(&roc->sdata->wdev,
- roc->cookie, roc->chan,
- GFP_KERNEL);
-
- list_for_each_entry_safe(dep, tmp, &roc->dependents, list)
- ieee80211_roc_notify_destroy(dep, true);
-
- if (free)
- kfree(roc);
- else
- roc->to_be_freed = true;
-}
-
-void ieee80211_sw_roc_work(struct work_struct *work)
-{
- struct ieee80211_roc_work *roc =
- container_of(work, struct ieee80211_roc_work, work.work);
- struct ieee80211_sub_if_data *sdata = roc->sdata;
- struct ieee80211_local *local = sdata->local;
- bool started, on_channel;
-
- mutex_lock(&local->mtx);
-
- if (roc->to_be_freed)
- goto out_unlock;
-
- if (roc->abort)
- goto finish;
-
- if (WARN_ON(list_empty(&local->roc_list)))
- goto out_unlock;
-
- if (WARN_ON(roc != list_first_entry(&local->roc_list,
- struct ieee80211_roc_work,
- list)))
- goto out_unlock;
-
- if (!roc->started) {
- struct ieee80211_roc_work *dep;
-
- WARN_ON(local->use_chanctx);
+ /* we'll notify about the start once the HW calls back */
+ list_for_each_entry(tmp, &local->roc_list, list) {
+ if (tmp->sdata != roc->sdata || tmp->chan != roc->chan)
+ break;
+ tmp->started = true;
+ }
+ } else {
/* If actually operating on the desired channel (with at least
* 20 MHz channel width) don't stop all the operations but still
* treat it as though the ROC operation started properly, so
@@ -377,27 +383,76 @@ void ieee80211_sw_roc_work(struct work_struct *work)
ieee80211_hw_config(local, 0);
}
- /* tell userspace or send frame */
- ieee80211_handle_roc_started(roc);
- list_for_each_entry(dep, &roc->dependents, list)
- ieee80211_handle_roc_started(dep);
+ ieee80211_queue_delayed_work(&local->hw, &local->roc_work,
+ msecs_to_jiffies(min_dur));
+
+ /* tell userspace or send frame(s) */
+ list_for_each_entry(tmp, &local->roc_list, list) {
+ if (tmp->sdata != roc->sdata || tmp->chan != roc->chan)
+ break;
- /* if it was pure TX, just finish right away */
- if (!roc->duration)
- goto finish;
+ tmp->on_channel = roc->on_channel;
+ ieee80211_handle_roc_started(tmp, jiffies);
+ }
+ }
+}
- roc->started = true;
- ieee80211_queue_delayed_work(&local->hw, &roc->work,
- msecs_to_jiffies(roc->duration));
+void ieee80211_start_next_roc(struct ieee80211_local *local)
+{
+ struct ieee80211_roc_work *roc;
+
+ lockdep_assert_held(&local->mtx);
+
+ if (list_empty(&local->roc_list)) {
+ ieee80211_run_deferred_scan(local);
+ return;
+ }
+
+ /* defer roc if driver is not started (i.e. during reconfig) */
+ if (local->in_reconfig)
+ return;
+
+ roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work,
+ list);
+
+ if (WARN_ON_ONCE(roc->started))
+ return;
+
+ if (local->ops->remain_on_channel) {
+ _ieee80211_start_next_roc(local);
+ } else {
+ /* delay it a bit */
+ ieee80211_queue_delayed_work(&local->hw, &local->roc_work,
+ round_jiffies_relative(HZ/2));
+ }
+}
+
+static void __ieee80211_roc_work(struct ieee80211_local *local)
+{
+ struct ieee80211_roc_work *roc;
+ bool on_channel;
+
+ lockdep_assert_held(&local->mtx);
+
+ if (WARN_ON(local->ops->remain_on_channel))
+ return;
+
+ roc = list_first_entry_or_null(&local->roc_list,
+ struct ieee80211_roc_work, list);
+ if (!roc)
+ return;
+
+ if (!roc->started) {
+ WARN_ON(local->use_chanctx);
+ _ieee80211_start_next_roc(local);
} else {
- /* finish this ROC */
- finish:
- list_del(&roc->list);
- started = roc->started;
on_channel = roc->on_channel;
- ieee80211_roc_notify_destroy(roc, !roc->abort);
+ if (ieee80211_recalc_sw_work(local, jiffies))
+ return;
+
+ /* careful - roc pointer became invalid during recalc */
- if (started && !on_channel) {
+ if (!on_channel) {
ieee80211_flush_queues(local, NULL, false);
local->tmp_channel = NULL;
@@ -407,14 +462,17 @@ void ieee80211_sw_roc_work(struct work_struct *work)
}
ieee80211_recalc_idle(local);
-
- if (started)
- ieee80211_start_next_roc(local);
- else if (list_empty(&local->roc_list))
- ieee80211_run_deferred_scan(local);
+ ieee80211_start_next_roc(local);
}
+}
- out_unlock:
+static void ieee80211_roc_work(struct work_struct *work)
+{
+ struct ieee80211_local *local =
+ container_of(work, struct ieee80211_local, roc_work.work);
+
+ mutex_lock(&local->mtx);
+ __ieee80211_roc_work(local);
mutex_unlock(&local->mtx);
}
@@ -422,27 +480,14 @@ static void ieee80211_hw_roc_done(struct work_struct *work)
{
struct ieee80211_local *local =
container_of(work, struct ieee80211_local, hw_roc_done);
- struct ieee80211_roc_work *roc;
mutex_lock(&local->mtx);
- if (list_empty(&local->roc_list))
- goto out_unlock;
-
- roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work,
- list);
-
- if (!roc->started)
- goto out_unlock;
-
- list_del(&roc->list);
-
- ieee80211_roc_notify_destroy(roc, true);
+ ieee80211_end_finished_rocs(local, jiffies);
/* if there's another roc, start it now */
ieee80211_start_next_roc(local);
- out_unlock:
mutex_unlock(&local->mtx);
}
@@ -456,47 +501,503 @@ void ieee80211_remain_on_channel_expired(struct ieee80211_hw *hw)
}
EXPORT_SYMBOL_GPL(ieee80211_remain_on_channel_expired);
-void ieee80211_roc_setup(struct ieee80211_local *local)
+static bool
+ieee80211_coalesce_hw_started_roc(struct ieee80211_local *local,
+ struct ieee80211_roc_work *new_roc,
+ struct ieee80211_roc_work *cur_roc)
{
- INIT_WORK(&local->hw_roc_start, ieee80211_hw_roc_start);
- INIT_WORK(&local->hw_roc_done, ieee80211_hw_roc_done);
- INIT_LIST_HEAD(&local->roc_list);
+ unsigned long now = jiffies;
+ unsigned long remaining;
+
+ if (WARN_ON(!cur_roc->started))
+ return false;
+
+ /* if it was scheduled in the hardware, but not started yet,
+ * we can only combine if the older one had a longer duration
+ */
+ if (!cur_roc->hw_begun && new_roc->duration > cur_roc->duration)
+ return false;
+
+ remaining = cur_roc->start_time +
+ msecs_to_jiffies(cur_roc->duration) -
+ now;
+
+ /* if it doesn't fit entirely, schedule a new one */
+ if (new_roc->duration > jiffies_to_msecs(remaining))
+ return false;
+
+ /* add just after the current one so we combine their finish later */
+ list_add(&new_roc->list, &cur_roc->list);
+
+ /* if the existing one has already begun then let this one also
+ * begin, otherwise they'll both be marked properly by the work
+ * struct that runs once the driver notifies us of the beginning
+ */
+ if (cur_roc->hw_begun) {
+ new_roc->hw_begun = true;
+ ieee80211_handle_roc_started(new_roc, now);
+ }
+
+ return true;
}
-void ieee80211_roc_purge(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata)
+static int ieee80211_start_roc_work(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_channel *channel,
+ unsigned int duration, u64 *cookie,
+ struct sk_buff *txskb,
+ enum ieee80211_roc_type type)
{
struct ieee80211_roc_work *roc, *tmp;
- LIST_HEAD(tmp_list);
+ bool queued = false, combine_started = true;
+ int ret;
+
+ lockdep_assert_held(&local->mtx);
+
+ if (local->use_chanctx && !local->ops->remain_on_channel)
+ return -EOPNOTSUPP;
+
+ roc = kzalloc(sizeof(*roc), GFP_KERNEL);
+ if (!roc)
+ return -ENOMEM;
+
+ /*
+ * If the duration is zero, then the driver
+ * wouldn't actually do anything. Set it to
+ * 10 for now.
+ *
+ * TODO: cancel the off-channel operation
+ * when we get the SKB's TX status and
+ * the wait time was zero before.
+ */
+ if (!duration)
+ duration = 10;
+
+ roc->chan = channel;
+ roc->duration = duration;
+ roc->req_duration = duration;
+ roc->frame = txskb;
+ roc->type = type;
+ roc->sdata = sdata;
+
+ /*
+ * cookie is either the roc cookie (for normal roc)
+ * or the SKB (for mgmt TX)
+ */
+ if (!txskb) {
+ roc->cookie = ieee80211_mgmt_tx_cookie(local);
+ *cookie = roc->cookie;
+ } else {
+ roc->mgmt_tx_cookie = *cookie;
+ }
+
+ /* if there's no need to queue, handle it immediately */
+ if (list_empty(&local->roc_list) &&
+ !local->scanning && !ieee80211_is_radar_required(local)) {
+ /* if not HW assist, just queue & schedule work */
+ if (!local->ops->remain_on_channel) {
+ list_add_tail(&roc->list, &local->roc_list);
+ ieee80211_queue_delayed_work(&local->hw,
+ &local->roc_work, 0);
+ } else {
+ /* otherwise actually kick it off here
+ * (for error handling)
+ */
+ ret = drv_remain_on_channel(local, sdata, channel,
+ duration, type);
+ if (ret) {
+ kfree(roc);
+ return ret;
+ }
+ roc->started = true;
+ list_add_tail(&roc->list, &local->roc_list);
+ }
+
+ return 0;
+ }
+
+ /* otherwise handle queueing */
+
+ list_for_each_entry(tmp, &local->roc_list, list) {
+ if (tmp->chan != channel || tmp->sdata != sdata)
+ continue;
+
+ /*
+ * Extend this ROC if possible: If it hasn't started, add
+ * just after the new one to combine.
+ */
+ if (!tmp->started) {
+ list_add(&roc->list, &tmp->list);
+ queued = true;
+ break;
+ }
+
+ if (!combine_started)
+ continue;
+
+ if (!local->ops->remain_on_channel) {
+ /* If there's no hardware remain-on-channel, and
+ * doing so won't push us over the maximum r-o-c
+ * we allow, then we can just add the new one to
+ * the list and mark it as having started now.
+ * If it would push over the limit, don't try to
+ * combine with other started ones (that haven't
+ * been running as long) but potentially sort it
+ * with others that had the same fate.
+ */
+ unsigned long now = jiffies;
+ u32 elapsed = jiffies_to_msecs(now - tmp->start_time);
+ struct wiphy *wiphy = local->hw.wiphy;
+ u32 max_roc = wiphy->max_remain_on_channel_duration;
+
+ if (elapsed + roc->duration > max_roc) {
+ combine_started = false;
+ continue;
+ }
+
+ list_add(&roc->list, &tmp->list);
+ queued = true;
+ roc->on_channel = tmp->on_channel;
+ ieee80211_handle_roc_started(roc, now);
+ ieee80211_recalc_sw_work(local, now);
+ break;
+ }
+
+ queued = ieee80211_coalesce_hw_started_roc(local, roc, tmp);
+ if (queued)
+ break;
+ /* if it wasn't queued, perhaps it can be combined with
+ * another that also couldn't get combined previously,
+ * but no need to check for already started ones, since
+ * that can't work.
+ */
+ combine_started = false;
+ }
+
+ if (!queued)
+ list_add_tail(&roc->list, &local->roc_list);
+
+ return 0;
+}
+
+int ieee80211_remain_on_channel(struct wiphy *wiphy, struct wireless_dev *wdev,
+ struct ieee80211_channel *chan,
+ unsigned int duration, u64 *cookie)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+ struct ieee80211_local *local = sdata->local;
+ int ret;
+
+ mutex_lock(&local->mtx);
+ ret = ieee80211_start_roc_work(local, sdata, chan,
+ duration, cookie, NULL,
+ IEEE80211_ROC_TYPE_NORMAL);
+ mutex_unlock(&local->mtx);
+
+ return ret;
+}
+
+static int ieee80211_cancel_roc(struct ieee80211_local *local,
+ u64 cookie, bool mgmt_tx)
+{
+ struct ieee80211_roc_work *roc, *tmp, *found = NULL;
+ int ret;
+
+ if (!cookie)
+ return -ENOENT;
mutex_lock(&local->mtx);
list_for_each_entry_safe(roc, tmp, &local->roc_list, list) {
- if (sdata && roc->sdata != sdata)
+ if (!mgmt_tx && roc->cookie != cookie)
+ continue;
+ else if (mgmt_tx && roc->mgmt_tx_cookie != cookie)
continue;
- if (roc->started && local->ops->remain_on_channel) {
- /* can race, so ignore return value */
- drv_cancel_remain_on_channel(local);
+ found = roc;
+ break;
+ }
+
+ if (!found) {
+ mutex_unlock(&local->mtx);
+ return -ENOENT;
+ }
+
+ if (!found->started) {
+ ieee80211_roc_notify_destroy(found);
+ goto out_unlock;
+ }
+
+ if (local->ops->remain_on_channel) {
+ ret = drv_cancel_remain_on_channel(local);
+ if (WARN_ON_ONCE(ret)) {
+ mutex_unlock(&local->mtx);
+ return ret;
}
- list_move_tail(&roc->list, &tmp_list);
- roc->abort = true;
+ /* TODO:
+ * if multiple items were combined here then we really shouldn't
+ * cancel them all - we should wait for as much time as needed
+ * for the longest remaining one, and only then cancel ...
+ */
+ list_for_each_entry_safe(roc, tmp, &local->roc_list, list) {
+ if (!roc->started)
+ break;
+ if (roc == found)
+ found = NULL;
+ ieee80211_roc_notify_destroy(roc);
+ }
+
+ /* that really must not happen - it was started */
+ WARN_ON(found);
+
+ ieee80211_start_next_roc(local);
+ } else {
+ /* go through work struct to return to the operating channel */
+ found->abort = true;
+ mod_delayed_work(local->workqueue, &local->roc_work, 0);
}
+
+ out_unlock:
mutex_unlock(&local->mtx);
- list_for_each_entry_safe(roc, tmp, &tmp_list, list) {
- if (local->ops->remain_on_channel) {
- list_del(&roc->list);
- ieee80211_roc_notify_destroy(roc, true);
+ return 0;
+}
+
+int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy,
+ struct wireless_dev *wdev, u64 cookie)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+ struct ieee80211_local *local = sdata->local;
+
+ return ieee80211_cancel_roc(local, cookie, false);
+}
+
+int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
+ struct cfg80211_mgmt_tx_params *params, u64 *cookie)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+ struct ieee80211_local *local = sdata->local;
+ struct sk_buff *skb;
+ struct sta_info *sta;
+ const struct ieee80211_mgmt *mgmt = (void *)params->buf;
+ bool need_offchan = false;
+ u32 flags;
+ int ret;
+ u8 *data;
+
+ if (params->dont_wait_for_ack)
+ flags = IEEE80211_TX_CTL_NO_ACK;
+ else
+ flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX |
+ IEEE80211_TX_CTL_REQ_TX_STATUS;
+
+ if (params->no_cck)
+ flags |= IEEE80211_TX_CTL_NO_CCK_RATE;
+
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_ADHOC:
+ if (!sdata->vif.bss_conf.ibss_joined)
+ need_offchan = true;
+ /* fall through */
+#ifdef CONFIG_MAC80211_MESH
+ case NL80211_IFTYPE_MESH_POINT:
+ if (ieee80211_vif_is_mesh(&sdata->vif) &&
+ !sdata->u.mesh.mesh_id_len)
+ need_offchan = true;
+ /* fall through */
+#endif
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_AP_VLAN:
+ case NL80211_IFTYPE_P2P_GO:
+ if (sdata->vif.type != NL80211_IFTYPE_ADHOC &&
+ !ieee80211_vif_is_mesh(&sdata->vif) &&
+ !rcu_access_pointer(sdata->bss->beacon))
+ need_offchan = true;
+ if (!ieee80211_is_action(mgmt->frame_control) ||
+ mgmt->u.action.category == WLAN_CATEGORY_PUBLIC ||
+ mgmt->u.action.category == WLAN_CATEGORY_SELF_PROTECTED ||
+ mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT)
+ break;
+ rcu_read_lock();
+ sta = sta_info_get(sdata, mgmt->da);
+ rcu_read_unlock();
+ if (!sta)
+ return -ENOLINK;
+ break;
+ case NL80211_IFTYPE_STATION:
+ case NL80211_IFTYPE_P2P_CLIENT:
+ sdata_lock(sdata);
+ if (!sdata->u.mgd.associated ||
+ (params->offchan && params->wait &&
+ local->ops->remain_on_channel &&
+ memcmp(sdata->u.mgd.associated->bssid,
+ mgmt->bssid, ETH_ALEN)))
+ need_offchan = true;
+ sdata_unlock(sdata);
+ break;
+ case NL80211_IFTYPE_P2P_DEVICE:
+ need_offchan = true;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ /* configurations requiring offchan cannot work if no channel has been
+ * specified
+ */
+ if (need_offchan && !params->chan)
+ return -EINVAL;
+
+ mutex_lock(&local->mtx);
+
+ /* Check if the operating channel is the requested channel */
+ if (!need_offchan) {
+ struct ieee80211_chanctx_conf *chanctx_conf;
+
+ rcu_read_lock();
+ chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+
+ if (chanctx_conf) {
+ need_offchan = params->chan &&
+ (params->chan !=
+ chanctx_conf->def.chan);
+ } else if (!params->chan) {
+ ret = -EINVAL;
+ rcu_read_unlock();
+ goto out_unlock;
} else {
- ieee80211_queue_delayed_work(&local->hw, &roc->work, 0);
+ need_offchan = true;
+ }
+ rcu_read_unlock();
+ }
+
+ if (need_offchan && !params->offchan) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom + params->len);
+ if (!skb) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+ skb_reserve(skb, local->hw.extra_tx_headroom);
+
+ data = skb_put(skb, params->len);
+ memcpy(data, params->buf, params->len);
+
+ /* Update CSA counters */
+ if (sdata->vif.csa_active &&
+ (sdata->vif.type == NL80211_IFTYPE_AP ||
+ sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||
+ sdata->vif.type == NL80211_IFTYPE_ADHOC) &&
+ params->n_csa_offsets) {
+ int i;
+ struct beacon_data *beacon = NULL;
+
+ rcu_read_lock();
+
+ if (sdata->vif.type == NL80211_IFTYPE_AP)
+ beacon = rcu_dereference(sdata->u.ap.beacon);
+ else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
+ beacon = rcu_dereference(sdata->u.ibss.presp);
+ else if (ieee80211_vif_is_mesh(&sdata->vif))
+ beacon = rcu_dereference(sdata->u.mesh.beacon);
+
+ if (beacon)
+ for (i = 0; i < params->n_csa_offsets; i++)
+ data[params->csa_offsets[i]] =
+ beacon->csa_current_counter;
+
+ rcu_read_unlock();
+ }
- /* work will clean up etc */
- flush_delayed_work(&roc->work);
- WARN_ON(!roc->to_be_freed);
- kfree(roc);
+ IEEE80211_SKB_CB(skb)->flags = flags;
+
+ skb->dev = sdata->dev;
+
+ if (!params->dont_wait_for_ack) {
+ /* make a copy to preserve the frame contents
+ * in case of encryption.
+ */
+ ret = ieee80211_attach_ack_skb(local, skb, cookie, GFP_KERNEL);
+ if (ret) {
+ kfree_skb(skb);
+ goto out_unlock;
}
+ } else {
+ /* Assign a dummy non-zero cookie, it's not sent to
+ * userspace in this case but we rely on its value
+ * internally in the need_offchan case to distinguish
+ * mgmt-tx from remain-on-channel.
+ */
+ *cookie = 0xffffffff;
}
- WARN_ON_ONCE(!list_empty(&tmp_list));
+ if (!need_offchan) {
+ ieee80211_tx_skb(sdata, skb);
+ ret = 0;
+ goto out_unlock;
+ }
+
+ IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN |
+ IEEE80211_TX_INTFL_OFFCHAN_TX_OK;
+ if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL))
+ IEEE80211_SKB_CB(skb)->hw_queue =
+ local->hw.offchannel_tx_hw_queue;
+
+ /* This will handle all kinds of coalescing and immediate TX */
+ ret = ieee80211_start_roc_work(local, sdata, params->chan,
+ params->wait, cookie, skb,
+ IEEE80211_ROC_TYPE_MGMT_TX);
+ if (ret)
+ ieee80211_free_txskb(&local->hw, skb);
+ out_unlock:
+ mutex_unlock(&local->mtx);
+ return ret;
+}
+
+int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy,
+ struct wireless_dev *wdev, u64 cookie)
+{
+ struct ieee80211_local *local = wiphy_priv(wiphy);
+
+ return ieee80211_cancel_roc(local, cookie, true);
+}
+
+void ieee80211_roc_setup(struct ieee80211_local *local)
+{
+ INIT_WORK(&local->hw_roc_start, ieee80211_hw_roc_start);
+ INIT_WORK(&local->hw_roc_done, ieee80211_hw_roc_done);
+ INIT_DELAYED_WORK(&local->roc_work, ieee80211_roc_work);
+ INIT_LIST_HEAD(&local->roc_list);
+}
+
+void ieee80211_roc_purge(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_roc_work *roc, *tmp;
+ bool work_to_do = false;
+
+ mutex_lock(&local->mtx);
+ list_for_each_entry_safe(roc, tmp, &local->roc_list, list) {
+ if (sdata && roc->sdata != sdata)
+ continue;
+
+ if (roc->started) {
+ if (local->ops->remain_on_channel) {
+ /* can race, so ignore return value */
+ drv_cancel_remain_on_channel(local);
+ ieee80211_roc_notify_destroy(roc);
+ } else {
+ roc->abort = true;
+ work_to_do = true;
+ }
+ } else {
+ ieee80211_roc_notify_destroy(roc);
+ }
+ }
+ if (work_to_do)
+ __ieee80211_roc_work(local);
+ mutex_unlock(&local->mtx);
}
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 3ece7d1034c8..b54f398cda5d 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -711,7 +711,7 @@ static u32 minstrel_get_expected_throughput(void *priv_sta)
* computing cur_tp
*/
tmp_mrs = &mi->r[idx].stats;
- tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma);
+ tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma) * 10;
tmp_cur_tp = tmp_cur_tp * 1200 * 8 / 1024;
return tmp_cur_tp;
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 3928dbd24e25..370d677b547b 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -414,15 +414,16 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index)
(max_tp_group != MINSTREL_CCK_GROUP))
return;
+ max_gpr_group = mg->max_group_prob_rate / MCS_GROUP_RATES;
+ max_gpr_idx = mg->max_group_prob_rate % MCS_GROUP_RATES;
+ max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_ewma;
+
if (mrs->prob_ewma > MINSTREL_FRAC(75, 100)) {
cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx,
mrs->prob_ewma);
if (cur_tp_avg > tmp_tp_avg)
mi->max_prob_rate = index;
- max_gpr_group = mg->max_group_prob_rate / MCS_GROUP_RATES;
- max_gpr_idx = mg->max_group_prob_rate % MCS_GROUP_RATES;
- max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_ewma;
max_gpr_tp_avg = minstrel_ht_get_tp_avg(mi, max_gpr_group,
max_gpr_idx,
max_gpr_prob);
@@ -431,7 +432,7 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index)
} else {
if (mrs->prob_ewma > tmp_prob)
mi->max_prob_rate = index;
- if (mrs->prob_ewma > mg->rates[mg->max_group_prob_rate].prob_ewma)
+ if (mrs->prob_ewma > max_gpr_prob)
mg->max_group_prob_rate = index;
}
}
@@ -691,7 +692,7 @@ minstrel_aggr_check(struct ieee80211_sta *pubsta, struct sk_buff *skb)
if (likely(sta->ampdu_mlme.tid_tx[tid]))
return;
- ieee80211_start_tx_ba_session(pubsta, tid, 5000);
+ ieee80211_start_tx_ba_session(pubsta, tid, 0);
}
static void
@@ -871,7 +872,7 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
* - if station is in dynamic SMPS (and streams > 1)
* - for fallback rates, to increase chances of getting through
*/
- if (offset > 0 &&
+ if (offset > 0 ||
(mi->sta->smps_mode == IEEE80211_SMPS_DYNAMIC &&
group->streams > 1)) {
ratetbl->rate[offset].count = ratetbl->rate[offset].count_rts;
@@ -1334,7 +1335,8 @@ static u32 minstrel_ht_get_expected_throughput(void *priv_sta)
prob = mi->groups[i].rates[j].prob_ewma;
/* convert tp_avg from pkt per second in kbps */
- tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * AVG_PKT_SIZE * 8 / 1024;
+ tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * 10;
+ tp_avg = tp_avg * AVG_PKT_SIZE * 8 / 1024;
return tp_avg;
}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 8bae5de0dc44..60d093f40f1d 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -661,8 +661,7 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
static void ieee80211_verify_alignment(struct ieee80211_rx_data *rx)
{
#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
- WARN_ONCE((unsigned long)rx->skb->data & 1,
- "unaligned packet at 0x%p\n", rx->skb->data);
+ WARN_ON_ONCE((unsigned long)rx->skb->data & 1);
#endif
}
@@ -1754,7 +1753,7 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
entry->seq = seq;
entry->rx_queue = rx_queue;
entry->last_frag = frag;
- entry->ccmp = 0;
+ entry->check_sequential_pn = false;
entry->extra_len = 0;
return entry;
@@ -1850,15 +1849,27 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
rx->seqno_idx, &(rx->skb));
if (rx->key &&
(rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP ||
- rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256) &&
+ rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256 ||
+ rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP ||
+ rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP_256) &&
ieee80211_has_protected(fc)) {
int queue = rx->security_idx;
- /* Store CCMP PN so that we can verify that the next
- * fragment has a sequential PN value. */
- entry->ccmp = 1;
+
+ /* Store CCMP/GCMP PN so that we can verify that the
+ * next fragment has a sequential PN value.
+ */
+ entry->check_sequential_pn = true;
memcpy(entry->last_pn,
rx->key->u.ccmp.rx_pn[queue],
IEEE80211_CCMP_PN_LEN);
+ BUILD_BUG_ON(offsetof(struct ieee80211_key,
+ u.ccmp.rx_pn) !=
+ offsetof(struct ieee80211_key,
+ u.gcmp.rx_pn));
+ BUILD_BUG_ON(sizeof(rx->key->u.ccmp.rx_pn[queue]) !=
+ sizeof(rx->key->u.gcmp.rx_pn[queue]));
+ BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN !=
+ IEEE80211_GCMP_PN_LEN);
}
return RX_QUEUED;
}
@@ -1873,15 +1884,21 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
return RX_DROP_MONITOR;
}
- /* Verify that MPDUs within one MSDU have sequential PN values.
- * (IEEE 802.11i, 8.3.3.4.5) */
- if (entry->ccmp) {
+ /* "The receiver shall discard MSDUs and MMPDUs whose constituent
+ * MPDU PN values are not incrementing in steps of 1."
+ * see IEEE P802.11-REVmc/D5.0, 12.5.3.4.4, item d (for CCMP)
+ * and IEEE P802.11-REVmc/D5.0, 12.5.5.4.4, item d (for GCMP)
+ */
+ if (entry->check_sequential_pn) {
int i;
u8 pn[IEEE80211_CCMP_PN_LEN], *rpn;
int queue;
+
if (!rx->key ||
(rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP &&
- rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256))
+ rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256 &&
+ rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP &&
+ rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP_256))
return RX_DROP_UNUSABLE;
memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN);
for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) {
@@ -2736,8 +2753,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode;
ieee80211_vht_handle_opmode(rx->sdata, rx->sta,
- opmode, status->band,
- false);
+ opmode, status->band);
goto handled;
}
default:
@@ -3368,6 +3384,7 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
return false;
/* ignore action frames to TDLS-peers */
if (ieee80211_is_action(hdr->frame_control) &&
+ !is_broadcast_ether_addr(bssid) &&
!ether_addr_equal(bssid, hdr->addr1))
return false;
}
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 4aeca4b0c3cb..ae980ce8daff 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -314,6 +314,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
bool was_scanning = local->scanning;
struct cfg80211_scan_request *scan_req;
struct ieee80211_sub_if_data *scan_sdata;
+ struct ieee80211_sub_if_data *sdata;
lockdep_assert_held(&local->mtx);
@@ -373,7 +374,16 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
ieee80211_mlme_notify_scan_completed(local);
ieee80211_ibss_notify_scan_completed(local);
- ieee80211_mesh_notify_scan_completed(local);
+
+ /* Requeue all the work that might have been ignored while
+ * the scan was in progress; if there was none this will
+ * just be a no-op for the particular interface.
+ */
+ list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ if (ieee80211_sdata_running(sdata))
+ ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+ }
+
if (was_scanning)
ieee80211_start_next_roc(local);
}
@@ -597,8 +607,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
/* We need to ensure power level is at max for scanning. */
ieee80211_hw_config(local, 0);
- if ((req->channels[0]->flags &
- IEEE80211_CHAN_NO_IR) ||
+ if ((req->channels[0]->flags & (IEEE80211_CHAN_NO_IR |
+ IEEE80211_CHAN_RADAR)) ||
!req->n_ssids) {
next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
} else {
@@ -645,7 +655,7 @@ ieee80211_scan_get_channel_time(struct ieee80211_channel *chan)
* TODO: channel switching also consumes quite some time,
* add that delay as well to get a better estimation
*/
- if (chan->flags & IEEE80211_CHAN_NO_IR)
+ if (chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR))
return IEEE80211_PASSIVE_CHANNEL_TIME;
return IEEE80211_PROBE_DELAY + IEEE80211_CHANNEL_TIME;
}
@@ -777,7 +787,8 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local,
*
* In any case, it is not necessary for a passive scan.
*/
- if (chan->flags & IEEE80211_CHAN_NO_IR || !scan_req->n_ssids) {
+ if ((chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR)) ||
+ !scan_req->n_ssids) {
*next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
local->next_scan_state = SCAN_DECISION;
return;
@@ -1212,6 +1223,14 @@ void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw)
trace_api_sched_scan_stopped(local);
+ /*
+ * this shouldn't really happen, so for simplicity
+ * simply ignore it, and let mac80211 reconfigure
+ * the sched scan later on.
+ */
+ if (local->in_reconfig)
+ return;
+
schedule_work(&local->sched_scan_stopped_work);
}
EXPORT_SYMBOL(ieee80211_sched_scan_stopped);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index f91d1873218c..a4a4f89d3ba0 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -2,6 +2,7 @@
* Copyright 2002-2005, Instant802 Networks, Inc.
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2013-2014 Intel Mobile Communications GmbH
+ * Copyright (C) 2015 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -435,6 +436,19 @@ static int sta_info_insert_check(struct sta_info *sta)
is_multicast_ether_addr(sta->sta.addr)))
return -EINVAL;
+ /* Strictly speaking this isn't necessary as we hold the mutex, but
+ * the rhashtable code can't really deal with that distinction. We
+ * do require the mutex for correctness though.
+ */
+ rcu_read_lock();
+ lockdep_assert_held(&sdata->local->sta_mtx);
+ if (ieee80211_hw_check(&sdata->local->hw, NEEDS_UNIQUE_STA_ADDR) &&
+ ieee80211_find_sta_by_ifaddr(&sdata->local->hw, sta->addr, NULL)) {
+ rcu_read_unlock();
+ return -ENOTUNIQ;
+ }
+ rcu_read_unlock();
+
return 0;
}
@@ -554,14 +568,15 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
might_sleep();
+ mutex_lock(&local->sta_mtx);
+
err = sta_info_insert_check(sta);
if (err) {
+ mutex_unlock(&local->sta_mtx);
rcu_read_lock();
goto out_free;
}
- mutex_lock(&local->sta_mtx);
-
err = sta_info_insert_finish(sta);
if (err)
goto out_free;
@@ -868,6 +883,7 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta)
}
list_del_rcu(&sta->list);
+ sta->removed = true;
drv_sta_pre_rcu_remove(local, sta->sdata, sta);
@@ -1230,11 +1246,11 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
ieee80211_check_fast_xmit(sta);
}
-static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata,
- struct sta_info *sta, int tid,
+static void ieee80211_send_null_response(struct sta_info *sta, int tid,
enum ieee80211_frame_release_type reason,
- bool call_driver)
+ bool call_driver, bool more_data)
{
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_qos_hdr *nullfunc;
struct sk_buff *skb;
@@ -1274,9 +1290,13 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata,
if (qos) {
nullfunc->qos_ctrl = cpu_to_le16(tid);
- if (reason == IEEE80211_FRAME_RELEASE_UAPSD)
+ if (reason == IEEE80211_FRAME_RELEASE_UAPSD) {
nullfunc->qos_ctrl |=
cpu_to_le16(IEEE80211_QOS_CTL_EOSP);
+ if (more_data)
+ nullfunc->frame_control |=
+ cpu_to_le16(IEEE80211_FCTL_MOREDATA);
+ }
}
info = IEEE80211_SKB_CB(skb);
@@ -1323,22 +1343,48 @@ static int find_highest_prio_tid(unsigned long tids)
return fls(tids) - 1;
}
+/* Indicates if the MORE_DATA bit should be set in the last
+ * frame obtained by ieee80211_sta_ps_get_frames.
+ * Note that driver_release_tids is relevant only if
+ * reason = IEEE80211_FRAME_RELEASE_PSPOLL
+ */
+static bool
+ieee80211_sta_ps_more_data(struct sta_info *sta, u8 ignored_acs,
+ enum ieee80211_frame_release_type reason,
+ unsigned long driver_release_tids)
+{
+ int ac;
+
+ /* If the driver has data on more than one TID then
+ * certainly there's more data if we release just a
+ * single frame now (from a single TID). This will
+ * only happen for PS-Poll.
+ */
+ if (reason == IEEE80211_FRAME_RELEASE_PSPOLL &&
+ hweight16(driver_release_tids) > 1)
+ return true;
+
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+ if (ignored_acs & BIT(ac))
+ continue;
+
+ if (!skb_queue_empty(&sta->tx_filtered[ac]) ||
+ !skb_queue_empty(&sta->ps_tx_buf[ac]))
+ return true;
+ }
+
+ return false;
+}
+
static void
-ieee80211_sta_ps_deliver_response(struct sta_info *sta,
- int n_frames, u8 ignored_acs,
- enum ieee80211_frame_release_type reason)
+ieee80211_sta_ps_get_frames(struct sta_info *sta, int n_frames, u8 ignored_acs,
+ enum ieee80211_frame_release_type reason,
+ struct sk_buff_head *frames,
+ unsigned long *driver_release_tids)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
struct ieee80211_local *local = sdata->local;
- bool more_data = false;
int ac;
- unsigned long driver_release_tids = 0;
- struct sk_buff_head frames;
-
- /* Service or PS-Poll period starts */
- set_sta_flag(sta, WLAN_STA_SP);
-
- __skb_queue_head_init(&frames);
/* Get response frame(s) and more data bit for the last one. */
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
@@ -1352,26 +1398,13 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
/* if we already have frames from software, then we can't also
* release from hardware queues
*/
- if (skb_queue_empty(&frames)) {
- driver_release_tids |= sta->driver_buffered_tids & tids;
- driver_release_tids |= sta->txq_buffered_tids & tids;
+ if (skb_queue_empty(frames)) {
+ *driver_release_tids |=
+ sta->driver_buffered_tids & tids;
+ *driver_release_tids |= sta->txq_buffered_tids & tids;
}
- if (driver_release_tids) {
- /* If the driver has data on more than one TID then
- * certainly there's more data if we release just a
- * single frame now (from a single TID). This will
- * only happen for PS-Poll.
- */
- if (reason == IEEE80211_FRAME_RELEASE_PSPOLL &&
- hweight16(driver_release_tids) > 1) {
- more_data = true;
- driver_release_tids =
- BIT(find_highest_prio_tid(
- driver_release_tids));
- break;
- }
- } else {
+ if (!*driver_release_tids) {
struct sk_buff *skb;
while (n_frames > 0) {
@@ -1385,20 +1418,44 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
if (!skb)
break;
n_frames--;
- __skb_queue_tail(&frames, skb);
+ __skb_queue_tail(frames, skb);
}
}
- /* If we have more frames buffered on this AC, then set the
- * more-data bit and abort the loop since we can't send more
- * data from other ACs before the buffered frames from this.
+ /* If we have more frames buffered on this AC, then abort the
+ * loop since we can't send more data from other ACs before
+ * the buffered frames from this.
*/
if (!skb_queue_empty(&sta->tx_filtered[ac]) ||
- !skb_queue_empty(&sta->ps_tx_buf[ac])) {
- more_data = true;
+ !skb_queue_empty(&sta->ps_tx_buf[ac]))
break;
- }
}
+}
+
+static void
+ieee80211_sta_ps_deliver_response(struct sta_info *sta,
+ int n_frames, u8 ignored_acs,
+ enum ieee80211_frame_release_type reason)
+{
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
+ struct ieee80211_local *local = sdata->local;
+ unsigned long driver_release_tids = 0;
+ struct sk_buff_head frames;
+ bool more_data;
+
+ /* Service or PS-Poll period starts */
+ set_sta_flag(sta, WLAN_STA_SP);
+
+ __skb_queue_head_init(&frames);
+
+ ieee80211_sta_ps_get_frames(sta, n_frames, ignored_acs, reason,
+ &frames, &driver_release_tids);
+
+ more_data = ieee80211_sta_ps_more_data(sta, ignored_acs, reason, driver_release_tids);
+
+ if (driver_release_tids && reason == IEEE80211_FRAME_RELEASE_PSPOLL)
+ driver_release_tids =
+ BIT(find_highest_prio_tid(driver_release_tids));
if (skb_queue_empty(&frames) && !driver_release_tids) {
int tid;
@@ -1421,7 +1478,7 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
/* This will evaluate to 1, 3, 5 or 7. */
tid = 7 - ((ffs(~ignored_acs) - 1) << 1);
- ieee80211_send_null_response(sdata, sta, tid, reason, true);
+ ieee80211_send_null_response(sta, tid, reason, true, false);
} else if (!driver_release_tids) {
struct sk_buff_head pending;
struct sk_buff *skb;
@@ -1521,8 +1578,8 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
if (need_null)
ieee80211_send_null_response(
- sdata, sta, find_highest_prio_tid(tids),
- reason, false);
+ sta, find_highest_prio_tid(tids),
+ reason, false, false);
sta_info_recalc_tim(sta);
} else {
@@ -1660,6 +1717,22 @@ void ieee80211_sta_eosp(struct ieee80211_sta *pubsta)
}
EXPORT_SYMBOL(ieee80211_sta_eosp);
+void ieee80211_send_eosp_nullfunc(struct ieee80211_sta *pubsta, int tid)
+{
+ struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
+ enum ieee80211_frame_release_type reason;
+ bool more_data;
+
+ trace_api_send_eosp_nullfunc(sta->local, pubsta, tid);
+
+ reason = IEEE80211_FRAME_RELEASE_UAPSD;
+ more_data = ieee80211_sta_ps_more_data(sta, ~sta->sta.uapsd_queues,
+ reason, 0);
+
+ ieee80211_send_null_response(sta, tid, reason, false, more_data);
+}
+EXPORT_SYMBOL(ieee80211_send_eosp_nullfunc);
+
void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta,
u8 tid, bool buffered)
{
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 2cafb21b422f..d6051629ed15 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -367,6 +367,7 @@ DECLARE_EWMA(signal, 1024, 8)
* @mesh: mesh STA information
* @debugfs: debug filesystem info
* @dead: set to true when sta is unlinked
+ * @removed: set to true when sta is being removed from sta_list
* @uploaded: set to true when sta is uploaded to the driver
* @sta: station information we share with the driver
* @sta_state: duplicates information about station state (for debug)
@@ -412,6 +413,7 @@ struct sta_info {
u16 listen_interval;
bool dead;
+ bool removed;
bool uploaded;
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 5bad05e9af90..6101deb805a8 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -51,6 +51,11 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
struct ieee80211_hdr *hdr = (void *)skb->data;
int ac;
+ if (info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER) {
+ ieee80211_free_txskb(&local->hw, skb);
+ return;
+ }
+
/*
* This skb 'survived' a round-trip through the driver, and
* hopefully the driver didn't mangle it too badly. However,
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 56c6d6cfa5a1..a6b4442776a0 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -2027,6 +2027,31 @@ TRACE_EVENT(api_eosp,
)
);
+TRACE_EVENT(api_send_eosp_nullfunc,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sta *sta,
+ u8 tid),
+
+ TP_ARGS(local, sta, tid),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ STA_ENTRY
+ __field(u8, tid)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ STA_ASSIGN;
+ __entry->tid = tid;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT STA_PR_FMT " tid:%d",
+ LOCAL_PR_ARG, STA_PR_ARG, __entry->tid
+ )
+);
+
TRACE_EVENT(api_sta_set_buffered,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sta *sta,
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index bdc224d5053a..3311ce0f3d6c 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1431,7 +1431,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
info->hw_queue =
vif->hw_queue[skb_get_queue_mapping(skb)];
} else if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) {
- dev_kfree_skb(skb);
+ ieee80211_purge_tx_queue(&local->hw, skbs);
return true;
} else
vif = NULL;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 74058020b7d6..58f58bd5202f 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -288,10 +288,13 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue,
if (!test_bit(reason, &local->queue_stop_reasons[queue]))
return;
- if (!refcounted)
+ if (!refcounted) {
local->q_stop_reasons[queue][reason] = 0;
- else
+ } else {
local->q_stop_reasons[queue][reason]--;
+ if (WARN_ON(local->q_stop_reasons[queue][reason] < 0))
+ local->q_stop_reasons[queue][reason] = 0;
+ }
if (local->q_stop_reasons[queue][reason] == 0)
__clear_bit(reason, &local->queue_stop_reasons[queue]);
@@ -1641,6 +1644,29 @@ void ieee80211_stop_device(struct ieee80211_local *local)
drv_stop(local);
}
+static void ieee80211_flush_completed_scan(struct ieee80211_local *local,
+ bool aborted)
+{
+ /* It's possible that we don't handle the scan completion in
+ * time during suspend, so if it's still marked as completed
+ * here, queue the work and flush it to clean things up.
+ * Instead of calling the worker function directly here, we
+ * really queue it to avoid potential races with other flows
+ * scheduling the same work.
+ */
+ if (test_bit(SCAN_COMPLETED, &local->scanning)) {
+ /* If coming from reconfiguration failure, abort the scan so
+ * we don't attempt to continue a partial HW scan - which is
+ * possible otherwise if (e.g.) the 2.4 GHz portion was the
+ * completed scan, and a 5 GHz portion is still pending.
+ */
+ if (aborted)
+ set_bit(SCAN_ABORTED, &local->scanning);
+ ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0);
+ flush_delayed_work(&local->scan_work);
+ }
+}
+
static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata;
@@ -1660,6 +1686,8 @@ static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local)
local->suspended = false;
local->in_reconfig = false;
+ ieee80211_flush_completed_scan(local, true);
+
/* scheduled scan clearly can't be running any more, but tell
* cfg80211 and clear local state
*/
@@ -1698,6 +1726,27 @@ static void ieee80211_assign_chanctx(struct ieee80211_local *local,
mutex_unlock(&local->chanctx_mtx);
}
+static void ieee80211_reconfig_stations(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct sta_info *sta;
+
+ /* add STAs back */
+ mutex_lock(&local->sta_mtx);
+ list_for_each_entry(sta, &local->sta_list, list) {
+ enum ieee80211_sta_state state;
+
+ if (!sta->uploaded || sta->sdata != sdata)
+ continue;
+
+ for (state = IEEE80211_STA_NOTEXIST;
+ state < sta->sta_state; state++)
+ WARN_ON(drv_sta_state(local, sta->sdata, sta, state,
+ state + 1));
+ }
+ mutex_unlock(&local->sta_mtx);
+}
+
int ieee80211_reconfig(struct ieee80211_local *local)
{
struct ieee80211_hw *hw = &local->hw;
@@ -1833,50 +1882,11 @@ int ieee80211_reconfig(struct ieee80211_local *local)
WARN_ON(drv_add_chanctx(local, ctx));
mutex_unlock(&local->chanctx_mtx);
- list_for_each_entry(sdata, &local->interfaces, list) {
- if (!ieee80211_sdata_running(sdata))
- continue;
- ieee80211_assign_chanctx(local, sdata);
- }
-
sdata = rtnl_dereference(local->monitor_sdata);
if (sdata && ieee80211_sdata_running(sdata))
ieee80211_assign_chanctx(local, sdata);
}
- /* add STAs back */
- mutex_lock(&local->sta_mtx);
- list_for_each_entry(sta, &local->sta_list, list) {
- enum ieee80211_sta_state state;
-
- if (!sta->uploaded)
- continue;
-
- /* AP-mode stations will be added later */
- if (sta->sdata->vif.type == NL80211_IFTYPE_AP)
- continue;
-
- for (state = IEEE80211_STA_NOTEXIST;
- state < sta->sta_state; state++)
- WARN_ON(drv_sta_state(local, sta->sdata, sta, state,
- state + 1));
- }
- mutex_unlock(&local->sta_mtx);
-
- /* reconfigure tx conf */
- if (hw->queues >= IEEE80211_NUM_ACS) {
- list_for_each_entry(sdata, &local->interfaces, list) {
- if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
- sdata->vif.type == NL80211_IFTYPE_MONITOR ||
- !ieee80211_sdata_running(sdata))
- continue;
-
- for (i = 0; i < IEEE80211_NUM_ACS; i++)
- drv_conf_tx(local, sdata, i,
- &sdata->tx_conf[i]);
- }
- }
-
/* reconfigure hardware */
ieee80211_hw_config(local, ~0);
@@ -1889,6 +1899,22 @@ int ieee80211_reconfig(struct ieee80211_local *local)
if (!ieee80211_sdata_running(sdata))
continue;
+ ieee80211_assign_chanctx(local, sdata);
+
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_AP_VLAN:
+ case NL80211_IFTYPE_MONITOR:
+ break;
+ default:
+ ieee80211_reconfig_stations(sdata);
+ /* fall through */
+ case NL80211_IFTYPE_AP: /* AP stations are handled later */
+ for (i = 0; i < IEEE80211_NUM_ACS; i++)
+ drv_conf_tx(local, sdata, i,
+ &sdata->tx_conf[i]);
+ break;
+ }
+
/* common change flags for all interface types */
changed = BSS_CHANGED_ERP_CTS_PROT |
BSS_CHANGED_ERP_PREAMBLE |
@@ -2017,16 +2043,26 @@ int ieee80211_reconfig(struct ieee80211_local *local)
*/
if (sched_scan_req->n_scan_plans > 1 ||
__ieee80211_request_sched_scan_start(sched_scan_sdata,
- sched_scan_req))
+ sched_scan_req)) {
+ RCU_INIT_POINTER(local->sched_scan_sdata, NULL);
+ RCU_INIT_POINTER(local->sched_scan_req, NULL);
sched_scan_stopped = true;
+ }
mutex_unlock(&local->mtx);
if (sched_scan_stopped)
cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy);
wake_up:
- local->in_reconfig = false;
- barrier();
+ if (local->in_reconfig) {
+ local->in_reconfig = false;
+ barrier();
+
+ /* Restart deferred ROCs */
+ mutex_lock(&local->mtx);
+ ieee80211_start_next_roc(local);
+ mutex_unlock(&local->mtx);
+ }
if (local->monitors == local->open_count && local->monitors > 0)
ieee80211_add_virtual_monitor(local);
@@ -2074,17 +2110,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
mb();
local->resuming = false;
- /* It's possible that we don't handle the scan completion in
- * time during suspend, so if it's still marked as completed
- * here, queue the work and flush it to clean things up.
- * Instead of calling the worker function directly here, we
- * really queue it to avoid potential races with other flows
- * scheduling the same work.
- */
- if (test_bit(SCAN_COMPLETED, &local->scanning)) {
- ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0);
- flush_delayed_work(&local->scan_work);
- }
+ ieee80211_flush_completed_scan(local, false);
if (local->open_count && !reconfig_due_to_wowlan)
drv_reconfig_complete(local, IEEE80211_RECONFIG_TYPE_SUSPEND);
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index ff1c798921a6..c38b2f07a919 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -378,7 +378,7 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta)
u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta, u8 opmode,
- enum ieee80211_band band, bool nss_only)
+ enum ieee80211_band band)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
@@ -401,9 +401,6 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
changed |= IEEE80211_RC_NSS_CHANGED;
}
- if (nss_only)
- return changed;
-
switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) {
case IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ:
sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_20;
@@ -430,13 +427,12 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta, u8 opmode,
- enum ieee80211_band band, bool nss_only)
+ enum ieee80211_band band)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
- u32 changed = __ieee80211_vht_handle_opmode(sdata, sta, opmode,
- band, nss_only);
+ u32 changed = __ieee80211_vht_handle_opmode(sdata, sta, opmode, band);
if (changed > 0)
rate_control_rate_update(local, sband, sta, changed);
diff --git a/net/mac802154/driver-ops.h b/net/mac802154/driver-ops.h
index 0550f3365e33..fd9daf2ecec9 100644
--- a/net/mac802154/driver-ops.h
+++ b/net/mac802154/driver-ops.h
@@ -18,9 +18,6 @@ drv_xmit_async(struct ieee802154_local *local, struct sk_buff *skb)
static inline int
drv_xmit_sync(struct ieee802154_local *local, struct sk_buff *skb)
{
- /* don't allow other operations while sync xmit */
- ASSERT_RTNL();
-
might_sleep();
return local->ops->xmit_sync(&local->hw, skb);
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index a13d02b7cee4..6a3e1c2181d3 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -17,9 +17,9 @@
#include <linux/err.h>
#include <linux/bug.h>
#include <linux/completion.h>
-#include <linux/crypto.h>
#include <linux/ieee802154.h>
#include <crypto/aead.h>
+#include <crypto/skcipher.h>
#include "ieee802154_i.h"
#include "llsec.h"
@@ -144,18 +144,18 @@ llsec_key_alloc(const struct ieee802154_llsec_key *template)
goto err_tfm;
}
- key->tfm0 = crypto_alloc_blkcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC);
+ key->tfm0 = crypto_alloc_skcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(key->tfm0))
goto err_tfm;
- if (crypto_blkcipher_setkey(key->tfm0, template->key,
- IEEE802154_LLSEC_KEY_SIZE))
+ if (crypto_skcipher_setkey(key->tfm0, template->key,
+ IEEE802154_LLSEC_KEY_SIZE))
goto err_tfm0;
return key;
err_tfm0:
- crypto_free_blkcipher(key->tfm0);
+ crypto_free_skcipher(key->tfm0);
err_tfm:
for (i = 0; i < ARRAY_SIZE(key->tfm); i++)
if (key->tfm[i])
@@ -175,7 +175,7 @@ static void llsec_key_release(struct kref *ref)
for (i = 0; i < ARRAY_SIZE(key->tfm); i++)
crypto_free_aead(key->tfm[i]);
- crypto_free_blkcipher(key->tfm0);
+ crypto_free_skcipher(key->tfm0);
kzfree(key);
}
@@ -620,15 +620,17 @@ llsec_do_encrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
{
u8 iv[16];
struct scatterlist src;
- struct blkcipher_desc req = {
- .tfm = key->tfm0,
- .info = iv,
- .flags = 0,
- };
+ SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
+ int err;
llsec_geniv(iv, sec->params.hwaddr, &hdr->sec);
sg_init_one(&src, skb->data, skb->len);
- return crypto_blkcipher_encrypt_iv(&req, &src, &src, skb->len);
+ skcipher_request_set_tfm(req, key->tfm0);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, &src, &src, skb->len, iv);
+ err = crypto_skcipher_encrypt(req);
+ skcipher_request_zero(req);
+ return err;
}
static struct crypto_aead*
@@ -830,11 +832,8 @@ llsec_do_decrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
unsigned char *data;
int datalen;
struct scatterlist src;
- struct blkcipher_desc req = {
- .tfm = key->tfm0,
- .info = iv,
- .flags = 0,
- };
+ SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
+ int err;
llsec_geniv(iv, dev_addr, &hdr->sec);
data = skb_mac_header(skb) + skb->mac_len;
@@ -842,7 +841,13 @@ llsec_do_decrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
sg_init_one(&src, data, datalen);
- return crypto_blkcipher_decrypt_iv(&req, &src, &src, datalen);
+ skcipher_request_set_tfm(req, key->tfm0);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, &src, &src, datalen, iv);
+
+ err = crypto_skcipher_decrypt(req);
+ skcipher_request_zero(req);
+ return err;
}
static int
diff --git a/net/mac802154/llsec.h b/net/mac802154/llsec.h
index 950578e1d7be..6f3b658e3279 100644
--- a/net/mac802154/llsec.h
+++ b/net/mac802154/llsec.h
@@ -19,7 +19,6 @@
#include <linux/slab.h>
#include <linux/hashtable.h>
-#include <linux/crypto.h>
#include <linux/kref.h>
#include <linux/spinlock.h>
#include <net/af_ieee802154.h>
@@ -30,7 +29,7 @@ struct mac802154_llsec_key {
/* one tfm for each authsize (4/8/16) */
struct crypto_aead *tfm[3];
- struct crypto_blkcipher *tfm0;
+ struct crypto_skcipher *tfm0;
struct kref ref;
};
diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c
index 8606da459ff3..3db16346cab3 100644
--- a/net/mac802154/mac_cmd.c
+++ b/net/mac802154/mac_cmd.c
@@ -126,7 +126,7 @@ static void mac802154_get_mac_params(struct net_device *dev,
params->lbt = wpan_dev->lbt;
}
-static struct ieee802154_llsec_ops mac802154_llsec_ops = {
+static const struct ieee802154_llsec_ops mac802154_llsec_ops = {
.get_params = mac802154_get_params,
.set_params = mac802154_set_params,
.add_key = mac802154_add_key,
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index 42e96729dae6..446e1300383e 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -217,8 +217,7 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local,
break;
}
- if (skb)
- kfree_skb(skb);
+ kfree_skb(skb);
}
static void
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index 3827f359b336..7e253455f9dd 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -38,12 +38,6 @@ void ieee802154_xmit_worker(struct work_struct *work)
struct net_device *dev = skb->dev;
int res;
- rtnl_lock();
-
- /* check if ifdown occurred while schedule */
- if (!netif_running(dev))
- goto err_tx;
-
res = drv_xmit_sync(local, skb);
if (res)
goto err_tx;
@@ -53,14 +47,11 @@ void ieee802154_xmit_worker(struct work_struct *work)
dev->stats.tx_packets++;
dev->stats.tx_bytes += skb->len;
- rtnl_unlock();
-
return;
err_tx:
/* Restart the netif queue on each sub_if_data object. */
ieee802154_wake_queue(&local->hw);
- rtnl_unlock();
kfree_skb(skb);
netdev_dbg(dev, "transmission failed\n");
}
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index c70d750148b6..b18c5ed42d95 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -27,6 +27,8 @@
*/
#define MAX_MP_SELECT_LABELS 4
+#define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1)
+
static int zero = 0;
static int label_limit = (1 << 20) - 1;
@@ -96,22 +98,15 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
}
EXPORT_SYMBOL_GPL(mpls_pkt_too_big);
-static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
- struct sk_buff *skb, bool bos)
+static u32 mpls_multipath_hash(struct mpls_route *rt,
+ struct sk_buff *skb, bool bos)
{
struct mpls_entry_decoded dec;
struct mpls_shim_hdr *hdr;
bool eli_seen = false;
int label_index;
- int nh_index = 0;
u32 hash = 0;
- /* No need to look further into packet if there's only
- * one path
- */
- if (rt->rt_nhn == 1)
- goto out;
-
for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos;
label_index++) {
if (!pskb_may_pull(skb, sizeof(*hdr) * label_index))
@@ -165,7 +160,38 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
}
}
- nh_index = hash % rt->rt_nhn;
+ return hash;
+}
+
+static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
+ struct sk_buff *skb, bool bos)
+{
+ int alive = ACCESS_ONCE(rt->rt_nhn_alive);
+ u32 hash = 0;
+ int nh_index = 0;
+ int n = 0;
+
+ /* No need to look further into packet if there's only
+ * one path
+ */
+ if (rt->rt_nhn == 1)
+ goto out;
+
+ if (alive <= 0)
+ return NULL;
+
+ hash = mpls_multipath_hash(rt, skb, bos);
+ nh_index = hash % alive;
+ if (alive == rt->rt_nhn)
+ goto out;
+ for_nexthops(rt) {
+ if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
+ continue;
+ if (n == nh_index)
+ return nh;
+ n++;
+ } endfor_nexthops(rt);
+
out:
return &rt->rt_nh[nh_index];
}
@@ -317,7 +343,13 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
}
}
- err = neigh_xmit(nh->nh_via_table, out_dev, mpls_nh_via(rt, nh), skb);
+ /* If via wasn't specified then send out using device address */
+ if (nh->nh_via_table == MPLS_NEIGH_TABLE_UNSPEC)
+ err = neigh_xmit(NEIGH_LINK_TABLE, out_dev,
+ out_dev->dev_addr, skb);
+ else
+ err = neigh_xmit(nh->nh_via_table, out_dev,
+ mpls_nh_via(rt, nh), skb);
if (err)
net_dbg_ratelimited("%s: packet transmission failed: %d\n",
__func__, err);
@@ -365,6 +397,7 @@ static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen)
GFP_KERNEL);
if (rt) {
rt->rt_nhn = num_nh;
+ rt->rt_nhn_alive = num_nh;
rt->rt_max_alen = max_alen_aligned;
}
@@ -534,8 +567,22 @@ static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt,
if (!mpls_dev_get(dev))
goto errout;
+ if ((nh->nh_via_table == NEIGH_LINK_TABLE) &&
+ (dev->addr_len != nh->nh_via_alen))
+ goto errout;
+
RCU_INIT_POINTER(nh->nh_dev, dev);
+ if (!(dev->flags & IFF_UP)) {
+ nh->nh_flags |= RTNH_F_DEAD;
+ } else {
+ unsigned int flags;
+
+ flags = dev_get_flags(dev);
+ if (!(flags & (IFF_RUNNING | IFF_LOWER_UP)))
+ nh->nh_flags |= RTNH_F_LINKDOWN;
+ }
+
return 0;
errout:
@@ -570,6 +617,9 @@ static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg,
if (err)
goto errout;
+ if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
+ rt->rt_nhn_alive--;
+
return 0;
errout:
@@ -577,8 +627,8 @@ errout:
}
static int mpls_nh_build(struct net *net, struct mpls_route *rt,
- struct mpls_nh *nh, int oif,
- struct nlattr *via, struct nlattr *newdst)
+ struct mpls_nh *nh, int oif, struct nlattr *via,
+ struct nlattr *newdst)
{
int err = -ENOMEM;
@@ -592,10 +642,14 @@ static int mpls_nh_build(struct net *net, struct mpls_route *rt,
goto errout;
}
- err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table,
- __mpls_nh_via(rt, nh));
- if (err)
- goto errout;
+ if (via) {
+ err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table,
+ __mpls_nh_via(rt, nh));
+ if (err)
+ goto errout;
+ } else {
+ nh->nh_via_table = MPLS_NEIGH_TABLE_UNSPEC;
+ }
err = mpls_nh_assign_dev(net, rt, nh, oif);
if (err)
@@ -677,15 +731,14 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg,
nla_newdst = nla_find(attrs, attrlen, RTA_NEWDST);
}
- if (!nla_via)
- goto errout;
-
err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh,
- rtnh->rtnh_ifindex, nla_via,
- nla_newdst);
+ rtnh->rtnh_ifindex, nla_via, nla_newdst);
if (err)
goto errout;
+ if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
+ rt->rt_nhn_alive--;
+
rtnh = rtnh_next(rtnh, &remaining);
nhs++;
} endfor_nexthops(rt);
@@ -875,34 +928,74 @@ free:
return ERR_PTR(err);
}
-static void mpls_ifdown(struct net_device *dev)
+static void mpls_ifdown(struct net_device *dev, int event)
{
struct mpls_route __rcu **platform_label;
struct net *net = dev_net(dev);
- struct mpls_dev *mdev;
unsigned index;
platform_label = rtnl_dereference(net->mpls.platform_label);
for (index = 0; index < net->mpls.platform_labels; index++) {
struct mpls_route *rt = rtnl_dereference(platform_label[index]);
+
if (!rt)
continue;
- for_nexthops(rt) {
+
+ change_nexthops(rt) {
if (rtnl_dereference(nh->nh_dev) != dev)
continue;
- nh->nh_dev = NULL;
+ switch (event) {
+ case NETDEV_DOWN:
+ case NETDEV_UNREGISTER:
+ nh->nh_flags |= RTNH_F_DEAD;
+ /* fall through */
+ case NETDEV_CHANGE:
+ nh->nh_flags |= RTNH_F_LINKDOWN;
+ ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1;
+ break;
+ }
+ if (event == NETDEV_UNREGISTER)
+ RCU_INIT_POINTER(nh->nh_dev, NULL);
} endfor_nexthops(rt);
}
- mdev = mpls_dev_get(dev);
- if (!mdev)
- return;
- mpls_dev_sysctl_unregister(mdev);
+ return;
+}
+
+static void mpls_ifup(struct net_device *dev, unsigned int nh_flags)
+{
+ struct mpls_route __rcu **platform_label;
+ struct net *net = dev_net(dev);
+ unsigned index;
+ int alive;
+
+ platform_label = rtnl_dereference(net->mpls.platform_label);
+ for (index = 0; index < net->mpls.platform_labels; index++) {
+ struct mpls_route *rt = rtnl_dereference(platform_label[index]);
+
+ if (!rt)
+ continue;
+
+ alive = 0;
+ change_nexthops(rt) {
+ struct net_device *nh_dev =
+ rtnl_dereference(nh->nh_dev);
- RCU_INIT_POINTER(dev->mpls_ptr, NULL);
+ if (!(nh->nh_flags & nh_flags)) {
+ alive++;
+ continue;
+ }
+ if (nh_dev != dev)
+ continue;
+ alive++;
+ nh->nh_flags &= ~nh_flags;
+ } endfor_nexthops(rt);
+
+ ACCESS_ONCE(rt->rt_nhn_alive) = alive;
+ }
- kfree_rcu(mdev, rcu);
+ return;
}
static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
@@ -910,9 +1003,9 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct mpls_dev *mdev;
+ unsigned int flags;
- switch(event) {
- case NETDEV_REGISTER:
+ if (event == NETDEV_REGISTER) {
/* For now just support ethernet devices */
if ((dev->type == ARPHRD_ETHER) ||
(dev->type == ARPHRD_LOOPBACK)) {
@@ -920,10 +1013,39 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
if (IS_ERR(mdev))
return notifier_from_errno(PTR_ERR(mdev));
}
- break;
+ return NOTIFY_OK;
+ }
+
+ mdev = mpls_dev_get(dev);
+ if (!mdev)
+ return NOTIFY_OK;
+ switch (event) {
+ case NETDEV_DOWN:
+ mpls_ifdown(dev, event);
+ break;
+ case NETDEV_UP:
+ flags = dev_get_flags(dev);
+ if (flags & (IFF_RUNNING | IFF_LOWER_UP))
+ mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN);
+ else
+ mpls_ifup(dev, RTNH_F_DEAD);
+ break;
+ case NETDEV_CHANGE:
+ flags = dev_get_flags(dev);
+ if (flags & (IFF_RUNNING | IFF_LOWER_UP))
+ mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN);
+ else
+ mpls_ifdown(dev, event);
+ break;
case NETDEV_UNREGISTER:
- mpls_ifdown(dev);
+ mpls_ifdown(dev, event);
+ mdev = mpls_dev_get(dev);
+ if (mdev) {
+ mpls_dev_sysctl_unregister(mdev);
+ RCU_INIT_POINTER(dev->mpls_ptr, NULL);
+ kfree_rcu(mdev, rcu);
+ }
break;
case NETDEV_CHANGENAME:
mdev = mpls_dev_get(dev);
@@ -1118,6 +1240,7 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->rc_label = LABEL_NOT_SPECIFIED;
cfg->rc_protocol = rtm->rtm_protocol;
+ cfg->rc_via_table = MPLS_NEIGH_TABLE_UNSPEC;
cfg->rc_nlflags = nlh->nlmsg_flags;
cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid;
cfg->rc_nlinfo.nlh = nlh;
@@ -1231,15 +1354,22 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
nla_put_labels(skb, RTA_NEWDST, nh->nh_labels,
nh->nh_label))
goto nla_put_failure;
- if (nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh),
+ if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC &&
+ nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh),
nh->nh_via_alen))
goto nla_put_failure;
dev = rtnl_dereference(nh->nh_dev);
if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
goto nla_put_failure;
+ if (nh->nh_flags & RTNH_F_LINKDOWN)
+ rtm->rtm_flags |= RTNH_F_LINKDOWN;
+ if (nh->nh_flags & RTNH_F_DEAD)
+ rtm->rtm_flags |= RTNH_F_DEAD;
} else {
struct rtnexthop *rtnh;
struct nlattr *mp;
+ int dead = 0;
+ int linkdown = 0;
mp = nla_nest_start(skb, RTA_MULTIPATH);
if (!mp)
@@ -1253,11 +1383,21 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
dev = rtnl_dereference(nh->nh_dev);
if (dev)
rtnh->rtnh_ifindex = dev->ifindex;
+ if (nh->nh_flags & RTNH_F_LINKDOWN) {
+ rtnh->rtnh_flags |= RTNH_F_LINKDOWN;
+ linkdown++;
+ }
+ if (nh->nh_flags & RTNH_F_DEAD) {
+ rtnh->rtnh_flags |= RTNH_F_DEAD;
+ dead++;
+ }
+
if (nh->nh_labels && nla_put_labels(skb, RTA_NEWDST,
nh->nh_labels,
nh->nh_label))
goto nla_put_failure;
- if (nla_put_via(skb, nh->nh_via_table,
+ if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC &&
+ nla_put_via(skb, nh->nh_via_table,
mpls_nh_via(rt, nh),
nh->nh_via_alen))
goto nla_put_failure;
@@ -1266,6 +1406,11 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
} endfor_nexthops(rt);
+ if (linkdown == rt->rt_nhn)
+ rtm->rtm_flags |= RTNH_F_LINKDOWN;
+ if (dead == rt->rt_nhn)
+ rtm->rtm_flags |= RTNH_F_DEAD;
+
nla_nest_end(skb, mp);
}
@@ -1319,7 +1464,8 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
if (nh->nh_dev)
payload += nla_total_size(4); /* RTA_OIF */
- payload += nla_total_size(2 + nh->nh_via_alen); /* RTA_VIA */
+ if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC) /* RTA_VIA */
+ payload += nla_total_size(2 + nh->nh_via_alen);
if (nh->nh_labels) /* RTA_NEWDST */
payload += nla_total_size(nh->nh_labels * 4);
} else {
@@ -1328,7 +1474,9 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
for_nexthops(rt) {
nhsize += nla_total_size(sizeof(struct rtnexthop));
- nhsize += nla_total_size(2 + nh->nh_via_alen);
+ /* RTA_VIA */
+ if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC)
+ nhsize += nla_total_size(2 + nh->nh_via_alen);
if (nh->nh_labels)
nhsize += nla_total_size(nh->nh_labels * 4);
} endfor_nexthops(rt);
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index bde52ce88c94..732a5c17e986 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -41,6 +41,7 @@ enum mpls_payload_type {
struct mpls_nh { /* next hop label forwarding entry */
struct net_device __rcu *nh_dev;
+ unsigned int nh_flags;
u32 nh_label[MAX_NEW_LABELS];
u8 nh_labels;
u8 nh_via_alen;
@@ -74,6 +75,7 @@ struct mpls_route { /* next hop label forwarding entry */
u8 rt_payload_type;
u8 rt_max_alen;
unsigned int rt_nhn;
+ unsigned int rt_nhn_alive;
struct mpls_nh rt_nh[0];
};
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 67591aef9cae..fb31aa87de81 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -37,7 +37,7 @@ static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en)
return en->labels * sizeof(struct mpls_shim_hdr);
}
-int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+static int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct mpls_iptunnel_encap *tun_encap_info;
struct mpls_shim_hdr *hdr;
@@ -54,10 +54,10 @@ int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb)
unsigned int ttl;
/* Obtain the ttl */
- if (skb->protocol == htons(ETH_P_IP)) {
+ if (dst->ops->family == AF_INET) {
ttl = ip_hdr(skb)->ttl;
rt = (struct rtable *)dst;
- } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ } else if (dst->ops->family == AF_INET6) {
ttl = ipv6_hdr(skb)->hop_limit;
rt6 = (struct rt6_info *)dst;
} else {
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e22349ea7256..95e757c377f9 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -563,6 +563,28 @@ config NFT_COMPAT
x_tables match/target extensions over the nf_tables
framework.
+if NF_TABLES_NETDEV
+
+config NF_DUP_NETDEV
+ tristate "Netfilter packet duplication support"
+ help
+ This option enables the generic packet duplication infrastructure
+ for Netfilter.
+
+config NFT_DUP_NETDEV
+ tristate "Netfilter nf_tables netdev packet duplication support"
+ select NF_DUP_NETDEV
+ help
+ This option enables packet duplication for the "netdev" family.
+
+config NFT_FWD_NETDEV
+ tristate "Netfilter nf_tables netdev packet forwarding support"
+ select NF_DUP_NETDEV
+ help
+ This option enables packet forwarding for the "netdev" family.
+
+endif # NF_TABLES_NETDEV
+
endif # NF_TABLES
config NETFILTER_XTABLES
@@ -869,7 +891,7 @@ config NETFILTER_XT_TARGET_TEE
depends on IPV6 || IPV6=n
depends on !NF_CONNTRACK || NF_CONNTRACK
select NF_DUP_IPV4
- select NF_DUP_IPV6 if IP6_NF_IPTABLES
+ select NF_DUP_IPV6 if IPV6
---help---
This option adds a "TEE" target with which a packet can be cloned and
this clone be rerouted to another nexthop.
@@ -882,7 +904,7 @@ config NETFILTER_XT_TARGET_TPROXY
depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
depends on IP_NF_MANGLE
select NF_DEFRAG_IPV4
- select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
+ select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
help
This option adds a `TPROXY' target, which is somewhat similar to
REDIRECT. It can only be used in the mangle table and is useful
@@ -1375,7 +1397,7 @@ config NETFILTER_XT_MATCH_SOCKET
depends on IPV6 || IPV6=n
depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
select NF_DEFRAG_IPV4
- select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
+ select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
help
This option adds a `socket' match, which can be used to match
packets for which a TCP or UDP socket lookup finds a valid socket.
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 7638c36b498c..69134541d65b 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -66,8 +66,11 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
# SYNPROXY
obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
+# generic packet duplication from netdev family
+obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o
+
# nf_tables
-nf_tables-objs += nf_tables_core.o nf_tables_api.o
+nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o
nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o
nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
@@ -90,6 +93,10 @@ obj-$(CONFIG_NFT_LOG) += nft_log.o
obj-$(CONFIG_NFT_MASQ) += nft_masq.o
obj-$(CONFIG_NFT_REDIR) += nft_redir.o
+# nf_tables netdev
+obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
+obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
+
# generic X tables
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index d05e759ed0fa..b0bc475f641e 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -33,7 +33,7 @@
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
#define mtype MTYPE
-#define get_ext(set, map, id) ((map)->extensions + (set)->dsize * (id))
+#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
static void
mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
@@ -67,12 +67,9 @@ mtype_destroy(struct ip_set *set)
del_timer_sync(&map->gc);
ip_set_free(map->members);
- if (set->dsize) {
- if (set->extensions & IPSET_EXT_DESTROY)
- mtype_ext_cleanup(set);
- ip_set_free(map->extensions);
- }
- kfree(map);
+ if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
+ mtype_ext_cleanup(set);
+ ip_set_free(map);
set->data = NULL;
}
@@ -92,16 +89,14 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
{
const struct mtype *map = set->data;
struct nlattr *nested;
+ size_t memsize = sizeof(*map) + map->memsize;
nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
if (!nested)
goto nla_put_failure;
if (mtype_do_head(skb, map) ||
nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
- nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
- htonl(sizeof(*map) +
- map->memsize +
- set->dsize * map->elements)))
+ nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
goto nla_put_failure;
if (unlikely(ip_set_put_flags(skb, set)))
goto nla_put_failure;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 64a564334418..4783efff0bde 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -41,7 +41,6 @@ MODULE_ALIAS("ip_set_bitmap:ip");
/* Type structure */
struct bitmap_ip {
void *members; /* the set members */
- void *extensions; /* data extensions */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -49,6 +48,8 @@ struct bitmap_ip {
size_t memsize; /* members size */
u8 netmask; /* subnet netmask */
struct timer_list gc; /* garbage collection */
+ unsigned char extensions[0] /* data extensions */
+ __aligned(__alignof__(u64));
};
/* ADT structure for generic function args */
@@ -224,13 +225,6 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
map->members = ip_set_alloc(map->memsize);
if (!map->members)
return false;
- if (set->dsize) {
- map->extensions = ip_set_alloc(set->dsize * elements);
- if (!map->extensions) {
- kfree(map->members);
- return false;
- }
- }
map->first_ip = first_ip;
map->last_ip = last_ip;
map->elements = elements;
@@ -316,13 +310,13 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
pr_debug("hosts %u, elements %llu\n",
hosts, (unsigned long long)elements);
- map = kzalloc(sizeof(*map), GFP_KERNEL);
+ set->dsize = ip_set_elem_len(set, tb, 0, 0);
+ map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
if (!map)
return -ENOMEM;
map->memsize = bitmap_bytes(0, elements - 1);
set->variant = &bitmap_ip;
- set->dsize = ip_set_elem_len(set, tb, 0);
if (!init_map_ip(set, map, first_ip, last_ip,
elements, hosts, netmask)) {
kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 1430535118fb..29dde208381d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -47,24 +47,26 @@ enum {
/* Type structure */
struct bitmap_ipmac {
void *members; /* the set members */
- void *extensions; /* MAC + data extensions */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
size_t memsize; /* members size */
struct timer_list gc; /* garbage collector */
+ unsigned char extensions[0] /* MAC + data extensions */
+ __aligned(__alignof__(u64));
};
/* ADT structure for generic function args */
struct bitmap_ipmac_adt_elem {
+ unsigned char ether[ETH_ALEN] __aligned(2);
u16 id;
- unsigned char *ether;
+ u16 add_mac;
};
struct bitmap_ipmac_elem {
unsigned char ether[ETH_ALEN];
unsigned char filled;
-} __attribute__ ((aligned));
+} __aligned(__alignof__(u64));
static inline u32
ip_to_id(const struct bitmap_ipmac *m, u32 ip)
@@ -72,11 +74,11 @@ ip_to_id(const struct bitmap_ipmac *m, u32 ip)
return ip - m->first_ip;
}
-static inline struct bitmap_ipmac_elem *
-get_elem(void *extensions, u16 id, size_t dsize)
-{
- return (struct bitmap_ipmac_elem *)(extensions + id * dsize);
-}
+#define get_elem(extensions, id, dsize) \
+ (struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
+
+#define get_const_elem(extensions, id, dsize) \
+ (const struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
/* Common functions */
@@ -88,10 +90,9 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
if (!test_bit(e->id, map->members))
return 0;
- elem = get_elem(map->extensions, e->id, dsize);
- if (elem->filled == MAC_FILLED)
- return !e->ether ||
- ether_addr_equal(e->ether, elem->ether);
+ elem = get_const_elem(map->extensions, e->id, dsize);
+ if (e->add_mac && elem->filled == MAC_FILLED)
+ return ether_addr_equal(e->ether, elem->ether);
/* Trigger kernel to fill out the ethernet address */
return -EAGAIN;
}
@@ -103,7 +104,7 @@ bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
if (!test_bit(id, map->members))
return 0;
- elem = get_elem(map->extensions, id, dsize);
+ elem = get_const_elem(map->extensions, id, dsize);
/* Timer not started for the incomplete elements */
return elem->filled == MAC_FILLED;
}
@@ -133,7 +134,7 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
* and we can reuse it later when MAC is filled out,
* possibly by the kernel
*/
- if (e->ether)
+ if (e->add_mac)
ip_set_timeout_set(timeout, t);
else
*timeout = t;
@@ -150,7 +151,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
elem = get_elem(map->extensions, e->id, dsize);
if (test_bit(e->id, map->members)) {
if (elem->filled == MAC_FILLED) {
- if (e->ether &&
+ if (e->add_mac &&
(flags & IPSET_FLAG_EXIST) &&
!ether_addr_equal(e->ether, elem->ether)) {
/* memcpy isn't atomic */
@@ -159,7 +160,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
ether_addr_copy(elem->ether, e->ether);
}
return IPSET_ADD_FAILED;
- } else if (!e->ether)
+ } else if (!e->add_mac)
/* Already added without ethernet address */
return IPSET_ADD_FAILED;
/* Fill the MAC address and trigger the timer activation */
@@ -168,7 +169,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
ether_addr_copy(elem->ether, e->ether);
elem->filled = MAC_FILLED;
return IPSET_ADD_START_STORED_TIMEOUT;
- } else if (e->ether) {
+ } else if (e->add_mac) {
/* We can store MAC too */
ether_addr_copy(elem->ether, e->ether);
elem->filled = MAC_FILLED;
@@ -191,7 +192,7 @@ bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
u32 id, size_t dsize)
{
const struct bitmap_ipmac_elem *elem =
- get_elem(map->extensions, id, dsize);
+ get_const_elem(map->extensions, id, dsize);
return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
htonl(map->first_ip + id)) ||
@@ -213,7 +214,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
{
struct bitmap_ipmac *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct bitmap_ipmac_adt_elem e = { .id = 0 };
+ struct bitmap_ipmac_adt_elem e = { .id = 0, .add_mac = 1 };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
u32 ip;
@@ -231,7 +232,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
return -EINVAL;
e.id = ip_to_id(map, ip);
- e.ether = eth_hdr(skb)->h_source;
+ memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
@@ -265,11 +266,10 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
return -IPSET_ERR_BITMAP_RANGE;
e.id = ip_to_id(map, ip);
- if (tb[IPSET_ATTR_ETHER])
- e.ether = nla_data(tb[IPSET_ATTR_ETHER]);
- else
- e.ether = NULL;
-
+ if (tb[IPSET_ATTR_ETHER]) {
+ memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+ e.add_mac = 1;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
return ip_set_eexist(ret, flags) ? 0 : ret;
@@ -300,13 +300,6 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
map->members = ip_set_alloc(map->memsize);
if (!map->members)
return false;
- if (set->dsize) {
- map->extensions = ip_set_alloc(set->dsize * elements);
- if (!map->extensions) {
- kfree(map->members);
- return false;
- }
- }
map->first_ip = first_ip;
map->last_ip = last_ip;
map->elements = elements;
@@ -361,14 +354,15 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (elements > IPSET_BITMAP_MAX_RANGE + 1)
return -IPSET_ERR_BITMAP_RANGE_SIZE;
- map = kzalloc(sizeof(*map), GFP_KERNEL);
+ set->dsize = ip_set_elem_len(set, tb,
+ sizeof(struct bitmap_ipmac_elem),
+ __alignof__(struct bitmap_ipmac_elem));
+ map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
if (!map)
return -ENOMEM;
map->memsize = bitmap_bytes(0, elements - 1);
set->variant = &bitmap_ipmac;
- set->dsize = ip_set_elem_len(set, tb,
- sizeof(struct bitmap_ipmac_elem));
if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
kfree(map);
return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 5338ccd5da46..7f0c733358a4 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -35,12 +35,13 @@ MODULE_ALIAS("ip_set_bitmap:port");
/* Type structure */
struct bitmap_port {
void *members; /* the set members */
- void *extensions; /* data extensions */
u16 first_port; /* host byte order, included in range */
u16 last_port; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
size_t memsize; /* members size */
struct timer_list gc; /* garbage collection */
+ unsigned char extensions[0] /* data extensions */
+ __aligned(__alignof__(u64));
};
/* ADT structure for generic function args */
@@ -209,13 +210,6 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
map->members = ip_set_alloc(map->memsize);
if (!map->members)
return false;
- if (set->dsize) {
- map->extensions = ip_set_alloc(set->dsize * map->elements);
- if (!map->extensions) {
- kfree(map->members);
- return false;
- }
- }
map->first_port = first_port;
map->last_port = last_port;
set->timeout = IPSET_NO_TIMEOUT;
@@ -232,6 +226,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
{
struct bitmap_port *map;
u16 first_port, last_port;
+ u32 elements;
if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
@@ -248,14 +243,15 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
last_port = tmp;
}
- map = kzalloc(sizeof(*map), GFP_KERNEL);
+ elements = last_port - first_port + 1;
+ set->dsize = ip_set_elem_len(set, tb, 0, 0);
+ map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
if (!map)
return -ENOMEM;
- map->elements = last_port - first_port + 1;
+ map->elements = elements;
map->memsize = bitmap_bytes(0, map->elements);
set->variant = &bitmap_port;
- set->dsize = ip_set_elem_len(set, tb, 0);
if (!init_map_port(set, map, first_port, last_port)) {
kfree(map);
return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 69ab9c2634e1..95db43fc0303 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -364,25 +364,27 @@ add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
}
size_t
-ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
+ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len,
+ size_t align)
{
enum ip_set_ext_id id;
- size_t offset = len;
u32 cadt_flags = 0;
if (tb[IPSET_ATTR_CADT_FLAGS])
cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
if (cadt_flags & IPSET_FLAG_WITH_FORCEADD)
set->flags |= IPSET_CREATE_FLAG_FORCEADD;
+ if (!align)
+ align = 1;
for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
if (!add_extension(id, cadt_flags, tb))
continue;
- offset = ALIGN(offset, ip_set_extensions[id].align);
- set->offset[id] = offset;
+ len = ALIGN(len, ip_set_extensions[id].align);
+ set->offset[id] = len;
set->extensions |= ip_set_extensions[id].type;
- offset += ip_set_extensions[id].len;
+ len += ip_set_extensions[id].len;
}
- return offset;
+ return ALIGN(len, align);
}
EXPORT_SYMBOL_GPL(ip_set_elem_len);
@@ -823,20 +825,17 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
return 0;
}
-static int
-ip_set_none(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_none(struct net *net, struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
return -EOPNOTSUPP;
}
-static int
-ip_set_create(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_create(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
- struct net *net = sock_net(ctnl);
struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *set, *clash = NULL;
ip_set_id_t index = IPSET_INVALID_ID;
@@ -974,12 +973,11 @@ ip_set_destroy_set(struct ip_set *set)
kfree(set);
}
-static int
-ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_destroy(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
+ struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *s;
ip_set_id_t i;
int ret = 0;
@@ -1050,12 +1048,11 @@ ip_set_flush_set(struct ip_set *set)
spin_unlock_bh(&set->lock);
}
-static int
-ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
+ struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *s;
ip_set_id_t i;
@@ -1090,12 +1087,11 @@ ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
.len = IPSET_MAXNAMELEN - 1 },
};
-static int
-ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_rename(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
+ struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *set, *s;
const char *name2;
ip_set_id_t i;
@@ -1140,12 +1136,11 @@ out:
* so the ip_set_list always contains valid pointers to the sets.
*/
-static int
-ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
+ struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *from, *to;
ip_set_id_t from_id, to_id;
char from_name[IPSET_MAXNAMELEN];
@@ -1411,10 +1406,9 @@ out:
return ret < 0 ? ret : skb->len;
}
-static int
-ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
if (unlikely(protocol_failed(attr)))
return -IPSET_ERR_PROTOCOL;
@@ -1498,12 +1492,11 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
return ret;
}
-static int
-ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
+ struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *set;
struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
const struct nlattr *nla;
@@ -1553,12 +1546,11 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
return ret;
}
-static int
-ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
+ struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *set;
struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
const struct nlattr *nla;
@@ -1608,12 +1600,11 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
return ret;
}
-static int
-ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
+ struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *set;
struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
int ret = 0;
@@ -1644,12 +1635,11 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
/* Get headed data of a set */
-static int
-ip_set_header(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_header(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
+ struct ip_set_net *inst = ip_set_pernet(net);
const struct ip_set *set;
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
@@ -1701,10 +1691,9 @@ static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
[IPSET_ATTR_FAMILY] = { .type = NLA_U8 },
};
-static int
-ip_set_type(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
@@ -1760,10 +1749,9 @@ ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
[IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
};
-static int
-ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_protocol(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 691b54fcaf2a..e5336ab36d67 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -72,8 +72,9 @@ struct hbucket {
DECLARE_BITMAP(used, AHASH_MAX_TUNED);
u8 size; /* size of the array */
u8 pos; /* position of the first free entry */
- unsigned char value[0]; /* the array of the values */
-} __attribute__ ((aligned));
+ unsigned char value[0] /* the array of the values */
+ __aligned(__alignof__(u64));
+};
/* The hash table: the table size stored here in order to make resizing easy */
struct htable {
@@ -475,7 +476,7 @@ static void
mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
{
struct htable *t;
- struct hbucket *n;
+ struct hbucket *n, *tmp;
struct mtype_elem *data;
u32 i, j, d;
#ifdef IP_SET_HASH_WITH_NETS
@@ -510,9 +511,14 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
}
}
if (d >= AHASH_INIT_SIZE) {
- struct hbucket *tmp = kzalloc(sizeof(*tmp) +
- (n->size - AHASH_INIT_SIZE) * dsize,
- GFP_ATOMIC);
+ if (d >= n->size) {
+ rcu_assign_pointer(hbucket(t, i), NULL);
+ kfree_rcu(n, rcu);
+ continue;
+ }
+ tmp = kzalloc(sizeof(*tmp) +
+ (n->size - AHASH_INIT_SIZE) * dsize,
+ GFP_ATOMIC);
if (!tmp)
/* Still try to delete expired elements */
continue;
@@ -522,7 +528,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
continue;
data = ahash_data(n, j, dsize);
memcpy(tmp->value + d * dsize, data, dsize);
- set_bit(j, tmp->used);
+ set_bit(d, tmp->used);
d++;
}
tmp->pos = d;
@@ -1323,12 +1329,14 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
#endif
set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
set->dsize = ip_set_elem_len(set, tb,
- sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)));
+ sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)),
+ __alignof__(struct IPSET_TOKEN(HTYPE, 4_elem)));
#ifndef IP_SET_PROTO_UNDEF
} else {
set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
set->dsize = ip_set_elem_len(set, tb,
- sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)));
+ sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)),
+ __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem)));
}
#endif
if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 43d8c9896fa3..f0f688db6213 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -164,8 +164,6 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- if (e.cidr == 0)
- return -EINVAL;
if (adt == IPSET_TEST)
e.cidr = HOST_MASK;
@@ -377,8 +375,6 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- if (e.cidr == 0)
- return -EINVAL;
if (adt == IPSET_TEST)
e.cidr = HOST_MASK;
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 5a30ce6e8c90..bbede95c9f68 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -31,7 +31,7 @@ struct set_elem {
struct rcu_head rcu;
struct list_head list;
ip_set_id_t id;
-};
+} __aligned(__alignof__(u64));
struct set_adt_elem {
ip_set_id_t id;
@@ -618,7 +618,8 @@ list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
size = IP_SET_LIST_MIN_SIZE;
set->variant = &set_variant;
- set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem));
+ set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem),
+ __alignof__(struct set_elem));
if (!init_list_set(net, set, size))
return -ENOMEM;
if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 1e24fff53e4b..f57b4dcdb233 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1176,6 +1176,7 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
+ struct sock *sk;
EnterFunction(11);
@@ -1183,13 +1184,12 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
if (skb->ipvs_property)
return NF_ACCEPT;
+ sk = skb_to_full_sk(skb);
/* Bad... Do not break raw sockets */
- if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
+ if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
af == AF_INET)) {
- struct sock *sk = skb->sk;
- struct inet_sock *inet = inet_sk(skb->sk);
- if (inet && sk->sk_family == PF_INET && inet->nodefrag)
+ if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
return NF_ACCEPT;
}
@@ -1681,6 +1681,7 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
struct ip_vs_conn *cp;
int ret, pkts;
int conn_reuse_mode;
+ struct sock *sk;
/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
@@ -1708,12 +1709,11 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
ip_vs_fill_iph_skb(af, skb, false, &iph);
/* Bad... Do not break raw sockets */
- if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
+ sk = skb_to_full_sk(skb);
+ if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
af == AF_INET)) {
- struct sock *sk = skb->sk;
- struct inet_sock *inet = inet_sk(skb->sk);
- if (inet && sk->sk_family == PF_INET && inet->nodefrag)
+ if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
return NF_ACCEPT;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 010ddeec135f..d952d67f904d 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -169,7 +169,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
/* Only update csum if we really have to */
if (sctph->dest != cp->dport || payload_csum ||
(skb->ip_summed == CHECKSUM_PARTIAL &&
- !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CSUM))) {
+ !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) {
sctph->dest = cp->dport;
sctp_nat_csum(skb, sctph, sctphoff);
} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3cb3cb831591..f60b4fdeeb8c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -66,6 +66,21 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
+static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
+static __read_mostly bool nf_conntrack_locks_all;
+
+void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
+{
+ spin_lock(lock);
+ while (unlikely(nf_conntrack_locks_all)) {
+ spin_unlock(lock);
+ spin_lock(&nf_conntrack_locks_all_lock);
+ spin_unlock(&nf_conntrack_locks_all_lock);
+ spin_lock(lock);
+ }
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_lock);
+
static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
{
h1 %= CONNTRACK_LOCKS;
@@ -82,12 +97,12 @@ static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
h1 %= CONNTRACK_LOCKS;
h2 %= CONNTRACK_LOCKS;
if (h1 <= h2) {
- spin_lock(&nf_conntrack_locks[h1]);
+ nf_conntrack_lock(&nf_conntrack_locks[h1]);
if (h1 != h2)
spin_lock_nested(&nf_conntrack_locks[h2],
SINGLE_DEPTH_NESTING);
} else {
- spin_lock(&nf_conntrack_locks[h2]);
+ nf_conntrack_lock(&nf_conntrack_locks[h2]);
spin_lock_nested(&nf_conntrack_locks[h1],
SINGLE_DEPTH_NESTING);
}
@@ -102,16 +117,19 @@ static void nf_conntrack_all_lock(void)
{
int i;
- for (i = 0; i < CONNTRACK_LOCKS; i++)
- spin_lock_nested(&nf_conntrack_locks[i], i);
+ spin_lock(&nf_conntrack_locks_all_lock);
+ nf_conntrack_locks_all = true;
+
+ for (i = 0; i < CONNTRACK_LOCKS; i++) {
+ spin_lock(&nf_conntrack_locks[i]);
+ spin_unlock(&nf_conntrack_locks[i]);
+ }
}
static void nf_conntrack_all_unlock(void)
{
- int i;
-
- for (i = 0; i < CONNTRACK_LOCKS; i++)
- spin_unlock(&nf_conntrack_locks[i]);
+ nf_conntrack_locks_all = false;
+ spin_unlock(&nf_conntrack_locks_all_lock);
}
unsigned int nf_conntrack_htable_size __read_mostly;
@@ -757,7 +775,7 @@ restart:
hash = hash_bucket(_hash, net);
for (; i < net->ct.htable_size; i++) {
lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
- spin_lock(lockp);
+ nf_conntrack_lock(lockp);
if (read_seqcount_retry(&net->ct.generation, sequence)) {
spin_unlock(lockp);
goto restart;
@@ -1382,7 +1400,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
for (; *bucket < net->ct.htable_size; (*bucket)++) {
lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
local_bh_disable();
- spin_lock(lockp);
+ nf_conntrack_lock(lockp);
if (*bucket < net->ct.htable_size) {
hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
@@ -1394,6 +1412,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
}
spin_unlock(lockp);
local_bh_enable();
+ cond_resched();
}
for_each_possible_cpu(cpu) {
@@ -1406,6 +1425,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
set_bit(IPS_DYING_BIT, &ct->status);
}
spin_unlock_bh(&pcpu->lock);
+ cond_resched();
}
return NULL;
found:
@@ -1422,6 +1442,8 @@ void nf_ct_iterate_cleanup(struct net *net,
struct nf_conn *ct;
unsigned int bucket = 0;
+ might_sleep();
+
while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
/* Time to push up daises... */
if (del_timer(&ct->timeout))
@@ -1430,6 +1452,7 @@ void nf_ct_iterate_cleanup(struct net *net,
/* ... else the timer will get him soon. */
nf_ct_put(ct);
+ cond_resched();
}
}
EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index acf5c7b3f378..278927ab0948 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -596,11 +596,18 @@ static int exp_proc_init(struct net *net)
{
#ifdef CONFIG_NF_CONNTRACK_PROCFS
struct proc_dir_entry *proc;
+ kuid_t root_uid;
+ kgid_t root_gid;
proc = proc_create("nf_conntrack_expect", 0440, net->proc_net,
&exp_file_ops);
if (!proc)
return -ENOMEM;
+
+ root_uid = make_kuid(net->user_ns, 0);
+ root_gid = make_kgid(net->user_ns, 0);
+ if (uid_valid(root_uid) && gid_valid(root_gid))
+ proc_set_user(proc, root_uid, root_gid);
#endif /* CONFIG_NF_CONNTRACK_PROCFS */
return 0;
}
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index b666959f17c0..883c691ec8d0 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -10,6 +10,8 @@
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/netfilter.h>
@@ -505,11 +507,11 @@ skip_nl_seq:
different IP address. Simply don't record it for
NAT. */
if (cmd.l3num == PF_INET) {
- pr_debug("conntrack_ftp: NOT RECORDING: %pI4 != %pI4\n",
+ pr_debug("NOT RECORDING: %pI4 != %pI4\n",
&cmd.u3.ip,
&ct->tuplehash[dir].tuple.src.u3.ip);
} else {
- pr_debug("conntrack_ftp: NOT RECORDING: %pI6 != %pI6\n",
+ pr_debug("NOT RECORDING: %pI6 != %pI6\n",
cmd.u3.ip6,
ct->tuplehash[dir].tuple.src.u3.ip6);
}
@@ -586,8 +588,7 @@ static void nf_conntrack_ftp_fini(void)
if (ftp[i][j].me == NULL)
continue;
- pr_debug("nf_ct_ftp: unregistering helper for pf: %d "
- "port: %d\n",
+ pr_debug("unregistering helper for pf: %d port: %d\n",
ftp[i][j].tuple.src.l3num, ports[i]);
nf_conntrack_helper_unregister(&ftp[i][j]);
}
@@ -625,14 +626,12 @@ static int __init nf_conntrack_ftp_init(void)
else
sprintf(ftp[i][j].name, "ftp-%d", ports[i]);
- pr_debug("nf_ct_ftp: registering helper for pf: %d "
- "port: %d\n",
+ pr_debug("registering helper for pf: %d port: %d\n",
ftp[i][j].tuple.src.l3num, ports[i]);
ret = nf_conntrack_helper_register(&ftp[i][j]);
if (ret) {
- printk(KERN_ERR "nf_ct_ftp: failed to register"
- " helper for pf: %d port: %d\n",
- ftp[i][j].tuple.src.l3num, ports[i]);
+ pr_err("failed to register helper for pf: %d port: %d\n",
+ ftp[i][j].tuple.src.l3num, ports[i]);
nf_conntrack_ftp_fini();
return ret;
}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index bd9d31537905..3b40ec575cd5 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -425,7 +425,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
}
local_bh_disable();
for (i = 0; i < net->ct.htable_size; i++) {
- spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+ nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
if (i < net->ct.htable_size) {
hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
unhelp(h, me);
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 0fd2976db7ee..8b6da2719600 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/skbuff.h>
@@ -237,7 +239,7 @@ static int __init nf_conntrack_irc_init(void)
int i, ret;
if (max_dcc_channels < 1) {
- printk(KERN_ERR "nf_ct_irc: max_dcc_channels must not be zero\n");
+ pr_err("max_dcc_channels must not be zero\n");
return -EINVAL;
}
@@ -267,8 +269,7 @@ static int __init nf_conntrack_irc_init(void)
ret = nf_conntrack_helper_register(&irc[i]);
if (ret) {
- printk(KERN_ERR "nf_ct_irc: failed to register helper "
- "for pf: %u port: %u\n",
+ pr_err("failed to register helper for pf: %u port: %u\n",
irc[i].tuple.src.l3num, ports[i]);
nf_conntrack_irc_fini();
return ret;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 9f5272968abb..355e8552fd5b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -840,7 +840,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
restart:
lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
- spin_lock(lockp);
+ nf_conntrack_lock(lockp);
if (cb->args[0] >= net->ct.htable_size) {
spin_unlock(lockp);
goto out;
@@ -1113,12 +1113,11 @@ static int ctnetlink_flush_conntrack(struct net *net,
return 0;
}
-static int
-ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
- struct net *net = sock_net(ctnl);
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
struct nf_conn *ct;
@@ -1168,12 +1167,11 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
return 0;
}
-static int
-ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
- struct net *net = sock_net(ctnl);
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
struct nf_conn *ct;
@@ -1330,10 +1328,10 @@ ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
return ctnetlink_dump_list(skb, cb, true);
}
-static int
-ctnetlink_get_ct_dying(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int ctnetlink_get_ct_dying(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
@@ -1352,10 +1350,10 @@ ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb)
return ctnetlink_dump_list(skb, cb, false);
}
-static int
-ctnetlink_get_ct_unconfirmed(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int ctnetlink_get_ct_unconfirmed(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
@@ -1865,12 +1863,11 @@ err1:
return ERR_PTR(err);
}
-static int
-ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
- struct net *net = sock_net(ctnl);
struct nf_conntrack_tuple otuple, rtuple;
struct nf_conntrack_tuple_hash *h = NULL;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -2034,10 +2031,10 @@ ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
-static int
-ctnetlink_stat_ct_cpu(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int ctnetlink_stat_ct_cpu(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
@@ -2080,10 +2077,9 @@ nlmsg_failure:
return -1;
}
-static int
-ctnetlink_stat_ct(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int ctnetlink_stat_ct(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
struct sk_buff *skb2;
int err;
@@ -2729,12 +2725,12 @@ out:
return skb->len;
}
-static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb,
+static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const cda[])
{
int err;
- struct net *net = sock_net(ctnl);
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
struct nf_conntrack_tuple tuple;
@@ -2768,12 +2764,10 @@ static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb,
return err;
}
-static int
-ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
- struct net *net = sock_net(ctnl);
struct nf_conntrack_tuple tuple;
struct nf_conntrack_expect *exp;
struct sk_buff *skb2;
@@ -2784,7 +2778,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
if (cda[CTA_EXPECT_MASTER])
- return ctnetlink_dump_exp_ct(ctnl, skb, nlh, cda);
+ return ctnetlink_dump_exp_ct(net, ctnl, skb, nlh, cda);
else {
struct netlink_dump_control c = {
.dump = ctnetlink_exp_dump_table,
@@ -2850,12 +2844,10 @@ out:
return err == -EAGAIN ? -ENOBUFS : err;
}
-static int
-ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
- struct net *net = sock_net(ctnl);
struct nf_conntrack_expect *exp;
struct nf_conntrack_tuple tuple;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -3136,12 +3128,10 @@ err_ct:
return err;
}
-static int
-ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int ctnetlink_new_expect(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
- struct net *net = sock_net(ctnl);
struct nf_conntrack_tuple tuple;
struct nf_conntrack_expect *exp;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -3242,10 +3232,10 @@ ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
-static int
-ctnetlink_stat_exp_cpu(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int ctnetlink_stat_exp_cpu(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index 4a2134fd3fcb..7523a575f6d1 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -17,6 +17,8 @@
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/netfilter.h>
@@ -120,14 +122,14 @@ static int help(struct sk_buff *skb,
ct_sane_info->state = SANE_STATE_NORMAL;
if (datalen < sizeof(struct sane_reply_net_start)) {
- pr_debug("nf_ct_sane: NET_START reply too short\n");
+ pr_debug("NET_START reply too short\n");
goto out;
}
reply = sb_ptr;
if (reply->status != htonl(SANE_STATUS_SUCCESS)) {
/* saned refused the command */
- pr_debug("nf_ct_sane: unsuccessful SANE_STATUS = %u\n",
+ pr_debug("unsuccessful SANE_STATUS = %u\n",
ntohl(reply->status));
goto out;
}
@@ -148,7 +150,7 @@ static int help(struct sk_buff *skb,
&tuple->src.u3, &tuple->dst.u3,
IPPROTO_TCP, NULL, &reply->port);
- pr_debug("nf_ct_sane: expect: ");
+ pr_debug("expect: ");
nf_ct_dump_tuple(&exp->tuple);
/* Can't expect this? Best to drop packet now. */
@@ -178,8 +180,7 @@ static void nf_conntrack_sane_fini(void)
for (i = 0; i < ports_c; i++) {
for (j = 0; j < 2; j++) {
- pr_debug("nf_ct_sane: unregistering helper for pf: %d "
- "port: %d\n",
+ pr_debug("unregistering helper for pf: %d port: %d\n",
sane[i][j].tuple.src.l3num, ports[i]);
nf_conntrack_helper_unregister(&sane[i][j]);
}
@@ -216,14 +217,12 @@ static int __init nf_conntrack_sane_init(void)
else
sprintf(sane[i][j].name, "sane-%d", ports[i]);
- pr_debug("nf_ct_sane: registering helper for pf: %d "
- "port: %d\n",
+ pr_debug("registering helper for pf: %d port: %d\n",
sane[i][j].tuple.src.l3num, ports[i]);
ret = nf_conntrack_helper_register(&sane[i][j]);
if (ret) {
- printk(KERN_ERR "nf_ct_sane: failed to "
- "register helper for pf: %d port: %d\n",
- sane[i][j].tuple.src.l3num, ports[i]);
+ pr_err("failed to register helper for pf: %d port: %d\n",
+ sane[i][j].tuple.src.l3num, ports[i]);
nf_conntrack_sane_fini();
return ret;
}
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 885b4aba3695..3e06402739e0 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -10,6 +10,8 @@
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/skbuff.h>
@@ -1665,8 +1667,7 @@ static int __init nf_conntrack_sip_init(void)
ret = nf_conntrack_helper_register(&sip[i][j]);
if (ret) {
- printk(KERN_ERR "nf_ct_sip: failed to register"
- " helper for pf: %u port: %u\n",
+ pr_err("failed to register helper for pf: %u port: %u\n",
sip[i][j].tuple.src.l3num, ports[i]);
nf_conntrack_sip_fini();
return ret;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 1fb3cacc04e1..0f1a45bcacb2 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -392,11 +392,18 @@ static const struct file_operations ct_cpu_seq_fops = {
static int nf_conntrack_standalone_init_proc(struct net *net)
{
struct proc_dir_entry *pde;
+ kuid_t root_uid;
+ kgid_t root_gid;
pde = proc_create("nf_conntrack", 0440, net->proc_net, &ct_file_ops);
if (!pde)
goto out_nf_conntrack;
+ root_uid = make_kuid(net->user_ns, 0);
+ root_gid = make_kgid(net->user_ns, 0);
+ if (uid_valid(root_uid) && gid_valid(root_gid))
+ proc_set_user(pde, root_uid, root_gid);
+
pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat,
&ct_cpu_seq_fops);
if (!pde)
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index e68ab4fbd71f..36f964066461 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -5,6 +5,8 @@
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/in.h>
@@ -138,9 +140,8 @@ static int __init nf_conntrack_tftp_init(void)
ret = nf_conntrack_helper_register(&tftp[i][j]);
if (ret) {
- printk(KERN_ERR "nf_ct_tftp: failed to register"
- " helper for pf: %u port: %u\n",
- tftp[i][j].tuple.src.l3num, ports[i]);
+ pr_err("failed to register helper for pf: %u port: %u\n",
+ tftp[i][j].tuple.src.l3num, ports[i]);
nf_conntrack_tftp_fini();
return ret;
}
diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c
index 93da609d9d29..26e742006c48 100644
--- a/net/netfilter/nf_conntrack_timeout.c
+++ b/net/netfilter/nf_conntrack_timeout.c
@@ -25,7 +25,7 @@
#include <net/netfilter/nf_conntrack_timeout.h>
struct ctnl_timeout *
-(*nf_ct_timeout_find_get_hook)(const char *name) __read_mostly;
+(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name) __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_timeout_find_get_hook);
void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout) __read_mostly;
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
new file mode 100644
index 000000000000..8414ee1a0319
--- /dev/null
+++ b/net/netfilter/nf_dup_netdev.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif)
+{
+ struct net_device *dev;
+ struct sk_buff *skb;
+
+ dev = dev_get_by_index_rcu(pkt->net, oif);
+ if (dev == NULL)
+ return;
+
+ skb = skb_clone(pkt->skb, GFP_ATOMIC);
+ if (skb == NULL)
+ return;
+
+ if (skb_mac_header_was_set(skb))
+ skb_push(skb, skb->mac_len);
+
+ skb->dev = dev;
+ skb_sender_cpu_clear(skb);
+ dev_queue_xmit(skb);
+}
+EXPORT_SYMBOL_GPL(nf_dup_netdev_egress);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index 97b75f9bfbcd..d43869879fcf 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -55,7 +55,7 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
rcu_read_lock();
indev = __in_dev_get_rcu(skb->dev);
- if (indev != NULL) {
+ if (indev && indev->ifa_list) {
ifa = indev->ifa_list;
newdst = ifa->ifa_local;
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 93cc4737018f..2011977cd79d 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -41,6 +41,8 @@ int nft_register_afinfo(struct net *net, struct nft_af_info *afi)
}
EXPORT_SYMBOL_GPL(nft_register_afinfo);
+static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi);
+
/**
* nft_unregister_afinfo - unregister nf_tables address family info
*
@@ -48,9 +50,10 @@ EXPORT_SYMBOL_GPL(nft_register_afinfo);
*
* Unregister the address family for use with nf_tables.
*/
-void nft_unregister_afinfo(struct nft_af_info *afi)
+void nft_unregister_afinfo(struct net *net, struct nft_af_info *afi)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ __nft_release_afinfo(net, afi);
list_del_rcu(&afi->list);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
@@ -89,6 +92,7 @@ nf_tables_afinfo_lookup(struct net *net, int family, bool autoload)
}
static void nft_ctx_init(struct nft_ctx *ctx,
+ struct net *net,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
struct nft_af_info *afi,
@@ -96,7 +100,7 @@ static void nft_ctx_init(struct nft_ctx *ctx,
struct nft_chain *chain,
const struct nlattr * const *nla)
{
- ctx->net = sock_net(skb->sk);
+ ctx->net = net;
ctx->afi = afi;
ctx->table = table;
ctx->chain = chain;
@@ -127,8 +131,8 @@ static void nft_trans_destroy(struct nft_trans *trans)
kfree(trans);
}
-int nft_register_basechain(struct nft_base_chain *basechain,
- unsigned int hook_nops)
+static int nft_register_basechain(struct nft_base_chain *basechain,
+ unsigned int hook_nops)
{
struct net *net = read_pnet(&basechain->pnet);
@@ -137,10 +141,9 @@ int nft_register_basechain(struct nft_base_chain *basechain,
return nf_register_net_hooks(net, basechain->ops, hook_nops);
}
-EXPORT_SYMBOL_GPL(nft_register_basechain);
-void nft_unregister_basechain(struct nft_base_chain *basechain,
- unsigned int hook_nops)
+static void nft_unregister_basechain(struct nft_base_chain *basechain,
+ unsigned int hook_nops)
{
struct net *net = read_pnet(&basechain->pnet);
@@ -149,7 +152,6 @@ void nft_unregister_basechain(struct nft_base_chain *basechain,
nf_unregister_net_hooks(net, basechain->ops, hook_nops);
}
-EXPORT_SYMBOL_GPL(nft_unregister_basechain);
static int nf_tables_register_hooks(const struct nft_table *table,
struct nft_chain *chain,
@@ -541,15 +543,14 @@ done:
return skb->len;
}
-static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_gettable(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nft_af_info *afi;
const struct nft_table *table;
struct sk_buff *skb2;
- struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
int err;
@@ -672,15 +673,14 @@ err:
return ret;
}
-static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_newtable(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nlattr *name;
struct nft_af_info *afi;
struct nft_table *table;
- struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
u32 flags = 0;
struct nft_ctx ctx;
@@ -706,7 +706,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
return nf_tables_updtable(&ctx);
}
@@ -730,7 +730,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
INIT_LIST_HEAD(&table->sets);
table->flags = flags;
- nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
if (err < 0)
goto err3;
@@ -810,18 +810,17 @@ out:
return err;
}
-static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_deltable(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi;
struct nft_table *table;
- struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct nft_ctx ctx;
- nft_ctx_init(&ctx, skb, nlh, NULL, NULL, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, NULL, NULL, NULL, nla);
if (family == AF_UNSPEC || nla[NFTA_TABLE_NAME] == NULL)
return nft_flush(&ctx, family);
@@ -832,8 +831,6 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
if (IS_ERR(table))
return PTR_ERR(table);
- if (table->flags & NFT_TABLE_INACTIVE)
- return -ENOENT;
ctx.afi = afi;
ctx.table = table;
@@ -1099,8 +1096,8 @@ done:
return skb->len;
}
-static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_getchain(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -1108,7 +1105,6 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
const struct nft_table *table;
const struct nft_chain *chain;
struct sk_buff *skb2;
- struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
int err;
@@ -1221,8 +1217,8 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
}
}
-static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_newchain(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -1232,7 +1228,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
struct nft_chain *chain;
struct nft_base_chain *basechain = NULL;
struct nlattr *ha[NFTA_HOOK_MAX + 1];
- struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct net_device *dev = NULL;
u8 policy = NF_ACCEPT;
@@ -1313,7 +1308,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
return PTR_ERR(stats);
}
- nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN,
sizeof(struct nft_trans_chain));
if (trans == NULL) {
@@ -1461,7 +1456,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
if (err < 0)
goto err1;
- nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN);
if (err < 0)
goto err2;
@@ -1476,15 +1471,14 @@ err1:
return err;
}
-static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_delchain(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
- struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct nft_ctx ctx;
@@ -1495,18 +1489,14 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
- if (table->flags & NFT_TABLE_INACTIVE)
- return -ENOENT;
chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
if (IS_ERR(chain))
return PTR_ERR(chain);
- if (chain->flags & NFT_CHAIN_INACTIVE)
- return -ENOENT;
if (chain->use > 0)
return -EBUSY;
- nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
return nft_delchain(&ctx);
}
@@ -1931,8 +1921,8 @@ done:
return skb->len;
}
-static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_getrule(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -1941,7 +1931,6 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
const struct nft_chain *chain;
const struct nft_rule *rule;
struct sk_buff *skb2;
- struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
int err;
@@ -2010,13 +1999,12 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
static struct nft_expr_info *info;
-static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_newrule(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi;
- struct net *net = sock_net(skb->sk);
struct nft_table *table;
struct nft_chain *chain;
struct nft_rule *rule, *old_rule = NULL;
@@ -2075,7 +2063,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
return PTR_ERR(old_rule);
}
- nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
n = 0;
size = 0;
@@ -2176,13 +2164,12 @@ err1:
return err;
}
-static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_delrule(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi;
- struct net *net = sock_net(skb->sk);
struct nft_table *table;
struct nft_chain *chain = NULL;
struct nft_rule *rule;
@@ -2196,8 +2183,6 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
- if (table->flags & NFT_TABLE_INACTIVE)
- return -ENOENT;
if (nla[NFTA_RULE_CHAIN]) {
chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
@@ -2205,7 +2190,7 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
return PTR_ERR(chain);
}
- nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
if (chain) {
if (nla[NFTA_RULE_HANDLE]) {
@@ -2338,18 +2323,19 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
[NFTA_SET_ID] = { .type = NLA_U32 },
[NFTA_SET_TIMEOUT] = { .type = NLA_U64 },
[NFTA_SET_GC_INTERVAL] = { .type = NLA_U32 },
+ [NFTA_SET_USERDATA] = { .type = NLA_BINARY,
+ .len = NFT_USERDATA_MAXLEN },
};
static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
[NFTA_SET_DESC_SIZE] = { .type = NLA_U32 },
};
-static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
+static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
- struct net *net = sock_net(skb->sk);
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi = NULL;
struct nft_table *table = NULL;
@@ -2367,11 +2353,9 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
- if (table->flags & NFT_TABLE_INACTIVE)
- return -ENOENT;
}
- nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla);
return 0;
}
@@ -2500,6 +2484,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
goto nla_put_failure;
}
+ if (nla_put(skb, NFTA_SET_USERDATA, set->udlen, set->udata))
+ goto nla_put_failure;
+
desc = nla_nest_start(skb, NFTA_SET_DESC);
if (desc == NULL)
goto nla_put_failure;
@@ -2619,8 +2606,8 @@ static int nf_tables_dump_sets_done(struct netlink_callback *cb)
return 0;
}
-static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_getset(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nft_set *set;
@@ -2630,7 +2617,7 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
int err;
/* Verify existence before starting dump */
- err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla);
+ err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla);
if (err < 0)
return err;
@@ -2693,14 +2680,13 @@ static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
return 0;
}
-static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_newset(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nft_set_ops *ops;
struct nft_af_info *afi;
- struct net *net = sock_net(skb->sk);
struct nft_table *table;
struct nft_set *set;
struct nft_ctx ctx;
@@ -2710,6 +2696,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
u64 timeout;
u32 ktype, dtype, flags, policy, gc_int;
struct nft_set_desc desc;
+ unsigned char *udata;
+ u16 udlen;
int err;
if (nla[NFTA_SET_TABLE] == NULL ||
@@ -2798,7 +2786,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
if (IS_ERR(table))
return PTR_ERR(table);
- nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]);
if (IS_ERR(set)) {
@@ -2822,12 +2810,16 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
if (IS_ERR(ops))
return PTR_ERR(ops);
+ udlen = 0;
+ if (nla[NFTA_SET_USERDATA])
+ udlen = nla_len(nla[NFTA_SET_USERDATA]);
+
size = 0;
if (ops->privsize != NULL)
size = ops->privsize(nla);
err = -ENOMEM;
- set = kzalloc(sizeof(*set) + size, GFP_KERNEL);
+ set = kzalloc(sizeof(*set) + size + udlen, GFP_KERNEL);
if (set == NULL)
goto err1;
@@ -2836,6 +2828,12 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
if (err < 0)
goto err2;
+ udata = NULL;
+ if (udlen) {
+ udata = set->data + size;
+ nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen);
+ }
+
INIT_LIST_HEAD(&set->bindings);
write_pnet(&set->pnet, net);
set->ops = ops;
@@ -2846,6 +2844,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
set->flags = flags;
set->size = desc.size;
set->policy = policy;
+ set->udlen = udlen;
+ set->udata = udata;
set->timeout = timeout;
set->gc_int = gc_int;
@@ -2882,8 +2882,8 @@ static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set
nft_set_destroy(set);
}
-static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_delset(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -2896,15 +2896,13 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
if (nla[NFTA_SET_TABLE] == NULL)
return -EINVAL;
- err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla);
+ err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla);
if (err < 0)
return err;
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
if (IS_ERR(set))
return PTR_ERR(set);
- if (set->flags & NFT_SET_INACTIVE)
- return -ENOENT;
if (!list_empty(&set->bindings))
return -EBUSY;
@@ -3024,16 +3022,14 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
[NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 },
};
-static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
+static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- bool trans)
+ const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi;
struct nft_table *table;
- struct net *net = sock_net(skb->sk);
afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
if (IS_ERR(afi))
@@ -3042,10 +3038,8 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
- if (!trans && (table->flags & NFT_TABLE_INACTIVE))
- return -ENOENT;
- nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla);
return 0;
}
@@ -3135,6 +3129,7 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct net *net = sock_net(skb->sk);
const struct nft_set *set;
struct nft_set_dump_args args;
struct nft_ctx ctx;
@@ -3150,10 +3145,12 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
if (err < 0)
return err;
- err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla,
- false);
+ err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh,
+ (void *)nla);
if (err < 0)
return err;
+ if (ctx.table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
if (IS_ERR(set))
@@ -3208,17 +3205,19 @@ nla_put_failure:
return -ENOSPC;
}
-static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nft_set *set;
struct nft_ctx ctx;
int err;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla);
if (err < 0)
return err;
+ if (ctx.table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
if (IS_ERR(set))
@@ -3528,11 +3527,10 @@ err1:
return err;
}
-static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
- struct net *net = sock_net(skb->sk);
const struct nlattr *attr;
struct nft_set *set;
struct nft_ctx ctx;
@@ -3541,7 +3539,7 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
return -EINVAL;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, true);
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla);
if (err < 0)
return err;
@@ -3623,8 +3621,8 @@ err1:
return err;
}
-static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nlattr *attr;
@@ -3635,7 +3633,7 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
return -EINVAL;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla);
if (err < 0)
return err;
@@ -3739,11 +3737,10 @@ err:
return err;
}
-static int nf_tables_getgen(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_getgen(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
- struct net *net = sock_net(skb->sk);
struct sk_buff *skb2;
int err;
@@ -3887,9 +3884,8 @@ static void nf_tables_commit_release(struct nft_trans *trans)
kfree(trans);
}
-static int nf_tables_commit(struct sk_buff *skb)
+static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
- struct net *net = sock_net(skb->sk);
struct nft_trans *trans, *next;
struct nft_trans_elem *te;
@@ -4024,13 +4020,13 @@ static void nf_tables_abort_release(struct nft_trans *trans)
kfree(trans);
}
-static int nf_tables_abort(struct sk_buff *skb)
+static int nf_tables_abort(struct net *net, struct sk_buff *skb)
{
- struct net *net = sock_net(skb->sk);
struct nft_trans *trans, *next;
struct nft_trans_elem *te;
- list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list,
+ list) {
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
@@ -4446,22 +4442,22 @@ static void nft_verdict_uninit(const struct nft_data *data)
}
}
-static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data)
+int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v)
{
struct nlattr *nest;
- nest = nla_nest_start(skb, NFTA_DATA_VERDICT);
+ nest = nla_nest_start(skb, type);
if (!nest)
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict.code)))
+ if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(v->code)))
goto nla_put_failure;
- switch (data->verdict.code) {
+ switch (v->code) {
case NFT_JUMP:
case NFT_GOTO:
if (nla_put_string(skb, NFTA_VERDICT_CHAIN,
- data->verdict.chain->name))
+ v->chain->name))
goto nla_put_failure;
}
nla_nest_end(skb, nest);
@@ -4572,7 +4568,7 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
err = nft_value_dump(skb, data, len);
break;
case NFT_DATA_VERDICT:
- err = nft_verdict_dump(skb, data);
+ err = nft_verdict_dump(skb, NFTA_DATA_VERDICT, &data->verdict);
break;
default:
err = -EINVAL;
@@ -4584,7 +4580,7 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
}
EXPORT_SYMBOL_GPL(nft_data_dump);
-static int nf_tables_init_net(struct net *net)
+static int __net_init nf_tables_init_net(struct net *net)
{
INIT_LIST_HEAD(&net->nft.af_info);
INIT_LIST_HEAD(&net->nft.commit_list);
@@ -4592,6 +4588,67 @@ static int nf_tables_init_net(struct net *net)
return 0;
}
+int __nft_release_basechain(struct nft_ctx *ctx)
+{
+ struct nft_rule *rule, *nr;
+
+ BUG_ON(!(ctx->chain->flags & NFT_BASE_CHAIN));
+
+ nf_tables_unregister_hooks(ctx->chain->table, ctx->chain,
+ ctx->afi->nops);
+ list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
+ list_del(&rule->list);
+ ctx->chain->use--;
+ nf_tables_rule_destroy(ctx, rule);
+ }
+ list_del(&ctx->chain->list);
+ ctx->table->use--;
+ nf_tables_chain_destroy(ctx->chain);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__nft_release_basechain);
+
+/* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */
+static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
+{
+ struct nft_table *table, *nt;
+ struct nft_chain *chain, *nc;
+ struct nft_rule *rule, *nr;
+ struct nft_set *set, *ns;
+ struct nft_ctx ctx = {
+ .net = net,
+ .afi = afi,
+ };
+
+ list_for_each_entry_safe(table, nt, &afi->tables, list) {
+ list_for_each_entry(chain, &table->chains, list)
+ nf_tables_unregister_hooks(table, chain, afi->nops);
+ /* No packets are walking on these chains anymore. */
+ ctx.table = table;
+ list_for_each_entry(chain, &table->chains, list) {
+ ctx.chain = chain;
+ list_for_each_entry_safe(rule, nr, &chain->rules, list) {
+ list_del(&rule->list);
+ chain->use--;
+ nf_tables_rule_destroy(&ctx, rule);
+ }
+ }
+ list_for_each_entry_safe(set, ns, &table->sets, list) {
+ list_del(&set->list);
+ table->use--;
+ nft_set_destroy(set);
+ }
+ list_for_each_entry_safe(chain, nc, &table->chains, list) {
+ list_del(&chain->list);
+ table->use--;
+ nf_tables_chain_destroy(chain);
+ }
+ list_del(&table->list);
+ nf_tables_table_destroy(&ctx);
+ }
+}
+
static struct pernet_operations nf_tables_net_ops = {
.init = nf_tables_init_net,
};
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index f3695a497408..e9f8dffcc244 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -16,22 +16,17 @@
#include <linux/skbuff.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
+#include <linux/static_key.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_log.h>
-enum nft_trace {
- NFT_TRACE_RULE,
- NFT_TRACE_RETURN,
- NFT_TRACE_POLICY,
-};
-
-static const char *const comments[] = {
- [NFT_TRACE_RULE] = "rule",
- [NFT_TRACE_RETURN] = "return",
- [NFT_TRACE_POLICY] = "policy",
+static const char *const comments[__NFT_TRACETYPE_MAX] = {
+ [NFT_TRACETYPE_POLICY] = "policy",
+ [NFT_TRACETYPE_RETURN] = "return",
+ [NFT_TRACETYPE_RULE] = "rule",
};
static struct nf_loginfo trace_loginfo = {
@@ -44,22 +39,36 @@ static struct nf_loginfo trace_loginfo = {
},
};
-static void __nft_trace_packet(const struct nft_pktinfo *pkt,
- const struct nft_chain *chain,
- int rulenum, enum nft_trace type)
+static noinline void __nft_trace_packet(struct nft_traceinfo *info,
+ const struct nft_chain *chain,
+ int rulenum, enum nft_trace_types type)
{
+ const struct nft_pktinfo *pkt = info->pkt;
+
+ if (!info->trace || !pkt->skb->nf_trace)
+ return;
+
+ info->chain = chain;
+ info->type = type;
+
+ nft_trace_notify(info);
+
nf_log_trace(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in,
pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
chain->table->name, chain->name, comments[type],
rulenum);
}
-static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
+static inline void nft_trace_packet(struct nft_traceinfo *info,
const struct nft_chain *chain,
- int rulenum, enum nft_trace type)
+ const struct nft_rule *rule,
+ int rulenum,
+ enum nft_trace_types type)
{
- if (unlikely(pkt->skb->nf_trace))
- __nft_trace_packet(pkt, chain, rulenum, type);
+ if (static_branch_unlikely(&nft_trace_enabled)) {
+ info->rule = rule;
+ __nft_trace_packet(info, chain, rulenum, type);
+ }
}
static void nft_cmp_fast_eval(const struct nft_expr *expr,
@@ -121,7 +130,11 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
struct nft_stats *stats;
int rulenum;
unsigned int gencursor = nft_genmask_cur(net);
+ struct nft_traceinfo info;
+ info.trace = false;
+ if (static_branch_unlikely(&nft_trace_enabled))
+ nft_trace_init(&info, pkt, &regs.verdict, basechain);
do_chain:
rulenum = 0;
rule = list_entry(&chain->rules, struct nft_rule, list);
@@ -151,7 +164,8 @@ next_rule:
regs.verdict.code = NFT_CONTINUE;
continue;
case NFT_CONTINUE:
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+ nft_trace_packet(&info, chain, rule,
+ rulenum, NFT_TRACETYPE_RULE);
continue;
}
break;
@@ -161,7 +175,8 @@ next_rule:
case NF_ACCEPT:
case NF_DROP:
case NF_QUEUE:
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+ nft_trace_packet(&info, chain, rule,
+ rulenum, NFT_TRACETYPE_RULE);
return regs.verdict.code;
}
@@ -174,7 +189,8 @@ next_rule:
stackptr++;
/* fall through */
case NFT_GOTO:
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+ nft_trace_packet(&info, chain, rule,
+ rulenum, NFT_TRACETYPE_RULE);
chain = regs.verdict.chain;
goto do_chain;
@@ -182,7 +198,8 @@ next_rule:
rulenum++;
/* fall through */
case NFT_RETURN:
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN);
+ nft_trace_packet(&info, chain, rule,
+ rulenum, NFT_TRACETYPE_RETURN);
break;
default:
WARN_ON(1);
@@ -196,7 +213,8 @@ next_rule:
goto next_rule;
}
- nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY);
+ nft_trace_packet(&info, basechain, NULL, -1,
+ NFT_TRACETYPE_POLICY);
rcu_read_lock_bh();
stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats));
diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c
index 9dd2d216cfc1..6b5f76295d3d 100644
--- a/net/netfilter/nf_tables_inet.c
+++ b/net/netfilter/nf_tables_inet.c
@@ -57,7 +57,7 @@ err:
static void __net_exit nf_tables_inet_exit_net(struct net *net)
{
- nft_unregister_afinfo(net->nft.inet);
+ nft_unregister_afinfo(net, net->nft.inet);
kfree(net->nft.inet);
}
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index 7b9c053ba750..5eefe4a355c6 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -94,7 +94,7 @@ nft_do_chain_netdev(void *priv, struct sk_buff *skb,
{
struct nft_pktinfo pkt;
- switch (eth_hdr(skb)->h_proto) {
+ switch (skb->protocol) {
case htons(ETH_P_IP):
nft_netdev_set_pktinfo_ipv4(&pkt, skb, state);
break;
@@ -139,7 +139,7 @@ err:
static void nf_tables_netdev_exit_net(struct net *net)
{
- nft_unregister_afinfo(net->nft.netdev);
+ nft_unregister_afinfo(net, net->nft.netdev);
kfree(net->nft.netdev);
}
@@ -156,35 +156,17 @@ static const struct nf_chain_type nft_filter_chain_netdev = {
.hook_mask = (1 << NF_NETDEV_INGRESS),
};
-static void nft_netdev_event(unsigned long event, struct nft_af_info *afi,
- struct net_device *dev, struct nft_table *table,
- struct nft_base_chain *basechain)
+static void nft_netdev_event(unsigned long event, struct net_device *dev,
+ struct nft_ctx *ctx)
{
- switch (event) {
- case NETDEV_REGISTER:
- if (strcmp(basechain->dev_name, dev->name) != 0)
- return;
-
- BUG_ON(!(basechain->flags & NFT_BASECHAIN_DISABLED));
+ struct nft_base_chain *basechain = nft_base_chain(ctx->chain);
- dev_hold(dev);
- basechain->ops[0].dev = dev;
- basechain->flags &= ~NFT_BASECHAIN_DISABLED;
- if (!(table->flags & NFT_TABLE_F_DORMANT))
- nft_register_basechain(basechain, afi->nops);
- break;
+ switch (event) {
case NETDEV_UNREGISTER:
if (strcmp(basechain->dev_name, dev->name) != 0)
return;
- BUG_ON(basechain->flags & NFT_BASECHAIN_DISABLED);
-
- if (!(table->flags & NFT_TABLE_F_DORMANT))
- nft_unregister_basechain(basechain, afi->nops);
-
- dev_put(basechain->ops[0].dev);
- basechain->ops[0].dev = NULL;
- basechain->flags |= NFT_BASECHAIN_DISABLED;
+ __nft_release_basechain(ctx);
break;
case NETDEV_CHANGENAME:
if (dev->ifindex != basechain->ops[0].dev->ifindex)
@@ -201,20 +183,29 @@ static int nf_tables_netdev_event(struct notifier_block *this,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct nft_af_info *afi;
struct nft_table *table;
- struct nft_chain *chain;
+ struct nft_chain *chain, *nr;
+ struct nft_ctx ctx = {
+ .net = dev_net(dev),
+ };
+
+ if (event != NETDEV_UNREGISTER &&
+ event != NETDEV_CHANGENAME)
+ return NOTIFY_DONE;
nfnl_lock(NFNL_SUBSYS_NFTABLES);
list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) {
+ ctx.afi = afi;
if (afi->family != NFPROTO_NETDEV)
continue;
list_for_each_entry(table, &afi->tables, list) {
- list_for_each_entry(chain, &table->chains, list) {
+ ctx.table = table;
+ list_for_each_entry_safe(chain, nr, &table->chains, list) {
if (!(chain->flags & NFT_BASE_CHAIN))
continue;
- nft_netdev_event(event, afi, dev, table,
- nft_base_chain(chain));
+ ctx.chain = chain;
+ nft_netdev_event(event, dev, &ctx);
}
}
}
@@ -233,12 +224,12 @@ static int __init nf_tables_netdev_init(void)
nft_register_chain_type(&nft_filter_chain_netdev);
ret = register_pernet_subsys(&nf_tables_netdev_net_ops);
- if (ret < 0)
+ if (ret < 0) {
nft_unregister_chain_type(&nft_filter_chain_netdev);
-
+ return ret;
+ }
register_netdevice_notifier(&nf_tables_netdev_notifier);
-
- return ret;
+ return 0;
}
static void __exit nf_tables_netdev_exit(void)
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
new file mode 100644
index 000000000000..e9e959f65d91
--- /dev/null
+++ b/net/netfilter/nf_tables_trace.c
@@ -0,0 +1,275 @@
+/*
+ * (C) 2015 Red Hat GmbH
+ * Author: Florian Westphal <fw@strlen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/static_key.h>
+#include <linux/hash.h>
+#include <linux/jhash.h>
+#include <linux/if_vlan.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+#define NFT_TRACETYPE_LL_HSIZE 20
+#define NFT_TRACETYPE_NETWORK_HSIZE 40
+#define NFT_TRACETYPE_TRANSPORT_HSIZE 20
+
+DEFINE_STATIC_KEY_FALSE(nft_trace_enabled);
+EXPORT_SYMBOL_GPL(nft_trace_enabled);
+
+static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb)
+{
+ __be32 id;
+
+ /* using skb address as ID results in a limited number of
+ * values (and quick reuse).
+ *
+ * So we attempt to use as many skb members that will not
+ * change while skb is with netfilter.
+ */
+ id = (__be32)jhash_2words(hash32_ptr(skb), skb_get_hash(skb),
+ skb->skb_iif);
+
+ return nla_put_be32(nlskb, NFTA_TRACE_ID, id);
+}
+
+static int trace_fill_header(struct sk_buff *nlskb, u16 type,
+ const struct sk_buff *skb,
+ int off, unsigned int len)
+{
+ struct nlattr *nla;
+
+ if (len == 0)
+ return 0;
+
+ nla = nla_reserve(nlskb, type, len);
+ if (!nla || skb_copy_bits(skb, off, nla_data(nla), len))
+ return -1;
+
+ return 0;
+}
+
+static int nf_trace_fill_ll_header(struct sk_buff *nlskb,
+ const struct sk_buff *skb)
+{
+ struct vlan_ethhdr veth;
+ int off;
+
+ BUILD_BUG_ON(sizeof(veth) > NFT_TRACETYPE_LL_HSIZE);
+
+ off = skb_mac_header(skb) - skb->data;
+ if (off != -ETH_HLEN)
+ return -1;
+
+ if (skb_copy_bits(skb, off, &veth, ETH_HLEN))
+ return -1;
+
+ veth.h_vlan_proto = skb->vlan_proto;
+ veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
+ veth.h_vlan_encapsulated_proto = skb->protocol;
+
+ return nla_put(nlskb, NFTA_TRACE_LL_HEADER, sizeof(veth), &veth);
+}
+
+static int nf_trace_fill_dev_info(struct sk_buff *nlskb,
+ const struct net_device *indev,
+ const struct net_device *outdev)
+{
+ if (indev) {
+ if (nla_put_be32(nlskb, NFTA_TRACE_IIF,
+ htonl(indev->ifindex)))
+ return -1;
+
+ if (nla_put_be16(nlskb, NFTA_TRACE_IIFTYPE,
+ htons(indev->type)))
+ return -1;
+ }
+
+ if (outdev) {
+ if (nla_put_be32(nlskb, NFTA_TRACE_OIF,
+ htonl(outdev->ifindex)))
+ return -1;
+
+ if (nla_put_be16(nlskb, NFTA_TRACE_OIFTYPE,
+ htons(outdev->type)))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int nf_trace_fill_pkt_info(struct sk_buff *nlskb,
+ const struct nft_pktinfo *pkt)
+{
+ const struct sk_buff *skb = pkt->skb;
+ unsigned int len = min_t(unsigned int,
+ pkt->xt.thoff - skb_network_offset(skb),
+ NFT_TRACETYPE_NETWORK_HSIZE);
+ int off = skb_network_offset(skb);
+
+ if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len))
+ return -1;
+
+ len = min_t(unsigned int, skb->len - pkt->xt.thoff,
+ NFT_TRACETYPE_TRANSPORT_HSIZE);
+
+ if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb,
+ pkt->xt.thoff, len))
+ return -1;
+
+ if (!skb_mac_header_was_set(skb))
+ return 0;
+
+ if (skb_vlan_tag_get(skb))
+ return nf_trace_fill_ll_header(nlskb, skb);
+
+ off = skb_mac_header(skb) - skb->data;
+ len = min_t(unsigned int, -off, NFT_TRACETYPE_LL_HSIZE);
+ return trace_fill_header(nlskb, NFTA_TRACE_LL_HEADER,
+ skb, off, len);
+}
+
+static int nf_trace_fill_rule_info(struct sk_buff *nlskb,
+ const struct nft_traceinfo *info)
+{
+ if (!info->rule)
+ return 0;
+
+ /* a continue verdict with ->type == RETURN means that this is
+ * an implicit return (end of chain reached).
+ *
+ * Since no rule matched, the ->rule pointer is invalid.
+ */
+ if (info->type == NFT_TRACETYPE_RETURN &&
+ info->verdict->code == NFT_CONTINUE)
+ return 0;
+
+ return nla_put_be64(nlskb, NFTA_TRACE_RULE_HANDLE,
+ cpu_to_be64(info->rule->handle));
+}
+
+void nft_trace_notify(struct nft_traceinfo *info)
+{
+ const struct nft_pktinfo *pkt = info->pkt;
+ struct nfgenmsg *nfmsg;
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+ unsigned int size;
+ int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_TRACE;
+
+ if (!nfnetlink_has_listeners(pkt->net, NFNLGRP_NFTRACE))
+ return;
+
+ size = nlmsg_total_size(sizeof(struct nfgenmsg)) +
+ nla_total_size(NFT_TABLE_MAXNAMELEN) +
+ nla_total_size(NFT_CHAIN_MAXNAMELEN) +
+ nla_total_size(sizeof(__be64)) + /* rule handle */
+ nla_total_size(sizeof(__be32)) + /* trace type */
+ nla_total_size(0) + /* VERDICT, nested */
+ nla_total_size(sizeof(u32)) + /* verdict code */
+ nla_total_size(NFT_CHAIN_MAXNAMELEN) + /* jump target */
+ nla_total_size(sizeof(u32)) + /* id */
+ nla_total_size(NFT_TRACETYPE_LL_HSIZE) +
+ nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) +
+ nla_total_size(NFT_TRACETYPE_TRANSPORT_HSIZE) +
+ nla_total_size(sizeof(u32)) + /* iif */
+ nla_total_size(sizeof(__be16)) + /* iiftype */
+ nla_total_size(sizeof(u32)) + /* oif */
+ nla_total_size(sizeof(__be16)) + /* oiftype */
+ nla_total_size(sizeof(u32)) + /* mark */
+ nla_total_size(sizeof(u32)) + /* nfproto */
+ nla_total_size(sizeof(u32)); /* policy */
+
+ skb = nlmsg_new(size, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct nfgenmsg), 0);
+ if (!nlh)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = info->basechain->type->family;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(pkt->pf)))
+ goto nla_put_failure;
+
+ if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type)))
+ goto nla_put_failure;
+
+ if (trace_fill_id(skb, pkt->skb))
+ goto nla_put_failure;
+
+ if (info->chain) {
+ if (nla_put_string(skb, NFTA_TRACE_CHAIN,
+ info->chain->name))
+ goto nla_put_failure;
+ if (nla_put_string(skb, NFTA_TRACE_TABLE,
+ info->chain->table->name))
+ goto nla_put_failure;
+ }
+
+ if (nf_trace_fill_rule_info(skb, info))
+ goto nla_put_failure;
+
+ switch (info->type) {
+ case NFT_TRACETYPE_UNSPEC:
+ case __NFT_TRACETYPE_MAX:
+ break;
+ case NFT_TRACETYPE_RETURN:
+ case NFT_TRACETYPE_RULE:
+ if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict))
+ goto nla_put_failure;
+ break;
+ case NFT_TRACETYPE_POLICY:
+ if (nla_put_be32(skb, NFTA_TRACE_POLICY,
+ info->basechain->policy))
+ goto nla_put_failure;
+ break;
+ }
+
+ if (pkt->skb->mark &&
+ nla_put_be32(skb, NFTA_TRACE_MARK, htonl(pkt->skb->mark)))
+ goto nla_put_failure;
+
+ if (!info->packet_dumped) {
+ if (nf_trace_fill_dev_info(skb, pkt->in, pkt->out))
+ goto nla_put_failure;
+
+ if (nf_trace_fill_pkt_info(skb, pkt))
+ goto nla_put_failure;
+ info->packet_dumped = true;
+ }
+
+ nlmsg_end(skb, nlh);
+ nfnetlink_send(skb, pkt->net, 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC);
+ return;
+
+ nla_put_failure:
+ WARN_ON_ONCE(1);
+ kfree_skb(skb);
+}
+
+void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
+ const struct nft_verdict *verdict,
+ const struct nft_chain *chain)
+{
+ info->basechain = nft_base_chain(chain);
+ info->trace = true;
+ info->packet_dumped = false;
+ info->pkt = pkt;
+ info->verdict = verdict;
+}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index f1d9e887f5b1..857ae89633af 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -33,6 +33,10 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
+#define nfnl_dereference_protected(id) \
+ rcu_dereference_protected(table[(id)].subsys, \
+ lockdep_nfnl_is_held((id)))
+
static char __initdata nfversion[] = "0.30";
static struct {
@@ -49,6 +53,7 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = {
[NFNLGRP_CONNTRACK_EXP_DESTROY] = NFNL_SUBSYS_CTNETLINK_EXP,
[NFNLGRP_NFTABLES] = NFNL_SUBSYS_NFTABLES,
[NFNLGRP_ACCT_QUOTA] = NFNL_SUBSYS_ACCT,
+ [NFNLGRP_NFTRACE] = NFNL_SUBSYS_NFTABLES,
};
void nfnl_lock(__u8 subsys_id)
@@ -201,19 +206,18 @@ replay:
}
if (nc->call_rcu) {
- err = nc->call_rcu(net->nfnl, skb, nlh,
+ err = nc->call_rcu(net, net->nfnl, skb, nlh,
(const struct nlattr **)cda);
rcu_read_unlock();
} else {
rcu_read_unlock();
nfnl_lock(subsys_id);
- if (rcu_dereference_protected(table[subsys_id].subsys,
- lockdep_is_held(&table[subsys_id].mutex)) != ss ||
+ if (nfnl_dereference_protected(subsys_id) != ss ||
nfnetlink_find_client(type, ss) != nc)
err = -EAGAIN;
else if (nc->call)
- err = nc->call(net->nfnl, skb, nlh,
- (const struct nlattr **)cda);
+ err = nc->call(net, net->nfnl, skb, nlh,
+ (const struct nlattr **)cda);
else
err = -EINVAL;
nfnl_unlock(subsys_id);
@@ -295,30 +299,26 @@ replay:
if (!skb)
return netlink_ack(oskb, nlh, -ENOMEM);
- skb->sk = oskb->sk;
-
nfnl_lock(subsys_id);
- ss = rcu_dereference_protected(table[subsys_id].subsys,
- lockdep_is_held(&table[subsys_id].mutex));
+ ss = nfnl_dereference_protected(subsys_id);
if (!ss) {
#ifdef CONFIG_MODULES
nfnl_unlock(subsys_id);
request_module("nfnetlink-subsys-%d", subsys_id);
nfnl_lock(subsys_id);
- ss = rcu_dereference_protected(table[subsys_id].subsys,
- lockdep_is_held(&table[subsys_id].mutex));
+ ss = nfnl_dereference_protected(subsys_id);
if (!ss)
#endif
{
nfnl_unlock(subsys_id);
- netlink_ack(skb, nlh, -EOPNOTSUPP);
+ netlink_ack(oskb, nlh, -EOPNOTSUPP);
return kfree_skb(skb);
}
}
if (!ss->commit || !ss->abort) {
nfnl_unlock(subsys_id);
- netlink_ack(skb, nlh, -EOPNOTSUPP);
+ netlink_ack(oskb, nlh, -EOPNOTSUPP);
return kfree_skb(skb);
}
@@ -328,10 +328,12 @@ replay:
nlh = nlmsg_hdr(skb);
err = 0;
- if (nlmsg_len(nlh) < sizeof(struct nfgenmsg) ||
- skb->len < nlh->nlmsg_len) {
- err = -EINVAL;
- goto ack;
+ if (nlh->nlmsg_len < NLMSG_HDRLEN ||
+ skb->len < nlh->nlmsg_len ||
+ nlmsg_len(nlh) < sizeof(struct nfgenmsg)) {
+ nfnl_err_reset(&err_list);
+ status |= NFNL_BATCH_FAILURE;
+ goto done;
}
/* Only requests are handled by the kernel */
@@ -381,7 +383,7 @@ replay:
goto ack;
if (nc->call_batch) {
- err = nc->call_batch(net->nfnl, skb, nlh,
+ err = nc->call_batch(net, net->nfnl, skb, nlh,
(const struct nlattr **)cda);
}
@@ -406,7 +408,7 @@ ack:
* pointing to the batch header.
*/
nfnl_err_reset(&err_list);
- netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM);
+ netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM);
status |= NFNL_BATCH_FAILURE;
goto done;
}
@@ -425,15 +427,15 @@ next:
}
done:
if (status & NFNL_BATCH_REPLAY) {
- ss->abort(oskb);
+ ss->abort(net, oskb);
nfnl_err_reset(&err_list);
nfnl_unlock(subsys_id);
kfree_skb(skb);
goto replay;
} else if (status == NFNL_BATCH_DONE) {
- ss->commit(oskb);
+ ss->commit(net, oskb);
} else {
- ss->abort(oskb);
+ ss->abort(net, oskb);
}
nfnl_err_deliver(&err_list, oskb);
@@ -492,7 +494,7 @@ static int nfnetlink_bind(struct net *net, int group)
type = nfnl_group2type[group];
rcu_read_lock();
- ss = nfnetlink_get_subsys(type);
+ ss = nfnetlink_get_subsys(type << 8);
rcu_read_unlock();
if (!ss)
request_module("nfnetlink-subsys-%d", type);
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index fefbf5f0b28d..5274b04c42a6 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -46,12 +46,11 @@ struct nfacct_filter {
#define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES)
#define NFACCT_OVERQUOTA_BIT 2 /* NFACCT_F_OVERQUOTA */
-static int
-nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+static int nfnl_acct_new(struct net *net, struct sock *nfnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const tb[])
{
struct nf_acct *nfacct, *matching = NULL;
- struct net *net = sock_net(nfnl);
char *acct_name;
unsigned int size = 0;
u32 flags = 0;
@@ -253,11 +252,10 @@ nfacct_filter_alloc(const struct nlattr * const attr)
return filter;
}
-static int
-nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+static int nfnl_acct_get(struct net *net, struct sock *nfnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const tb[])
{
- struct net *net = sock_net(nfnl);
int ret = -ENOENT;
struct nf_acct *cur;
char *acct_name;
@@ -333,11 +331,10 @@ static int nfnl_acct_try_del(struct nf_acct *cur)
return ret;
}
-static int
-nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+static int nfnl_acct_del(struct net *net, struct sock *nfnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const tb[])
{
- struct net *net = sock_net(nfnl);
char *acct_name;
struct nf_acct *cur;
int ret = -ENOENT;
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 54330fb5efaf..e924e95fcc7f 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -286,9 +286,9 @@ nfnl_cthelper_update(const struct nlattr * const tb[],
return 0;
}
-static int
-nfnl_cthelper_new(struct sock *nfnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const tb[])
{
const char *helper_name;
struct nf_conntrack_helper *cur, *helper = NULL;
@@ -498,9 +498,9 @@ out:
return skb->len;
}
-static int
-nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const tb[])
{
int ret = -ENOENT, i;
struct nf_conntrack_helper *cur;
@@ -570,9 +570,9 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb,
return ret;
}
-static int
-nfnl_cthelper_del(struct sock *nfnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const tb[])
{
char *helper_name = NULL;
struct nf_conntrack_helper *cur;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index c7a2d0e1c462..2671b9deb103 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -38,8 +38,6 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning");
-static LIST_HEAD(cttimeout_list);
-
static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
[CTA_TIMEOUT_NAME] = { .type = NLA_NUL_STRING,
.len = CTNL_TIMEOUT_NAME_MAX - 1},
@@ -67,16 +65,15 @@ ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto,
return ret;
}
-static int
-cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
__u16 l3num;
__u8 l4num;
struct nf_conntrack_l4proto *l4proto;
struct ctnl_timeout *timeout, *matching = NULL;
- struct net *net = sock_net(skb->sk);
char *name;
int ret;
@@ -90,7 +87,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb,
l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
- list_for_each_entry(timeout, &cttimeout_list, head) {
+ list_for_each_entry(timeout, &net->nfct_timeout_list, head) {
if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
@@ -145,7 +142,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb,
timeout->l3num = l3num;
timeout->l4proto = l4proto;
atomic_set(&timeout->refcnt, 1);
- list_add_tail_rcu(&timeout->head, &cttimeout_list);
+ list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list);
return 0;
err:
@@ -209,6 +206,7 @@ nla_put_failure:
static int
ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct net *net = sock_net(skb->sk);
struct ctnl_timeout *cur, *last;
if (cb->args[2])
@@ -219,7 +217,7 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[1] = 0;
rcu_read_lock();
- list_for_each_entry_rcu(cur, &cttimeout_list, head) {
+ list_for_each_entry_rcu(cur, &net->nfct_timeout_list, head) {
if (last) {
if (cur != last)
continue;
@@ -240,10 +238,10 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
-static int
-cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int cttimeout_get_timeout(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
int ret = -ENOENT;
char *name;
@@ -260,7 +258,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb,
return -EINVAL;
name = nla_data(cda[CTA_TIMEOUT_NAME]);
- list_for_each_entry(cur, &cttimeout_list, head) {
+ list_for_each_entry(cur, &net->nfct_timeout_list, head) {
struct sk_buff *skb2;
if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
@@ -301,17 +299,17 @@ static void untimeout(struct nf_conntrack_tuple_hash *i,
RCU_INIT_POINTER(timeout_ext->timeout, NULL);
}
-static void ctnl_untimeout(struct ctnl_timeout *timeout)
+static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
{
struct nf_conntrack_tuple_hash *h;
const struct hlist_nulls_node *nn;
int i;
local_bh_disable();
- for (i = 0; i < init_net.ct.htable_size; i++) {
- spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
- if (i < init_net.ct.htable_size) {
- hlist_nulls_for_each_entry(h, nn, &init_net.ct.hash[i], hnnode)
+ for (i = 0; i < net->ct.htable_size; i++) {
+ nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+ if (i < net->ct.htable_size) {
+ hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
untimeout(h, timeout);
}
spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
@@ -320,7 +318,7 @@ static void ctnl_untimeout(struct ctnl_timeout *timeout)
}
/* try to delete object, fail if it is still in use. */
-static int ctnl_timeout_try_del(struct ctnl_timeout *timeout)
+static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
{
int ret = 0;
@@ -329,7 +327,7 @@ static int ctnl_timeout_try_del(struct ctnl_timeout *timeout)
/* We are protected by nfnl mutex. */
list_del_rcu(&timeout->head);
nf_ct_l4proto_put(timeout->l4proto);
- ctnl_untimeout(timeout);
+ ctnl_untimeout(net, timeout);
kfree_rcu(timeout, rcu_head);
} else {
/* still in use, restore reference counter. */
@@ -339,28 +337,28 @@ static int ctnl_timeout_try_del(struct ctnl_timeout *timeout)
return ret;
}
-static int
-cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
- char *name;
struct ctnl_timeout *cur;
int ret = -ENOENT;
+ char *name;
if (!cda[CTA_TIMEOUT_NAME]) {
- list_for_each_entry(cur, &cttimeout_list, head)
- ctnl_timeout_try_del(cur);
+ list_for_each_entry(cur, &net->nfct_timeout_list, head)
+ ctnl_timeout_try_del(net, cur);
return 0;
}
name = nla_data(cda[CTA_TIMEOUT_NAME]);
- list_for_each_entry(cur, &cttimeout_list, head) {
+ list_for_each_entry(cur, &net->nfct_timeout_list, head) {
if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
- ret = ctnl_timeout_try_del(cur);
+ ret = ctnl_timeout_try_del(net, cur);
if (ret < 0)
return ret;
@@ -369,15 +367,14 @@ cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb,
return ret;
}
-static int
-cttimeout_default_set(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int cttimeout_default_set(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
__u16 l3num;
__u8 l4num;
struct nf_conntrack_l4proto *l4proto;
- struct net *net = sock_net(skb->sk);
unsigned int *timeouts;
int ret;
@@ -459,14 +456,14 @@ nla_put_failure:
return -1;
}
-static int cttimeout_default_get(struct sock *ctnl, struct sk_buff *skb,
+static int cttimeout_default_get(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const cda[])
{
__u16 l3num;
__u8 l4num;
struct nf_conntrack_l4proto *l4proto;
- struct net *net = sock_net(skb->sk);
struct sk_buff *skb2;
int ret, err;
@@ -511,12 +508,13 @@ err:
}
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-static struct ctnl_timeout *ctnl_timeout_find_get(const char *name)
+static struct ctnl_timeout *
+ctnl_timeout_find_get(struct net *net, const char *name)
{
struct ctnl_timeout *timeout, *matching = NULL;
rcu_read_lock();
- list_for_each_entry_rcu(timeout, &cttimeout_list, head) {
+ list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) {
if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
@@ -569,10 +567,39 @@ static const struct nfnetlink_subsystem cttimeout_subsys = {
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT);
+static int __net_init cttimeout_net_init(struct net *net)
+{
+ INIT_LIST_HEAD(&net->nfct_timeout_list);
+
+ return 0;
+}
+
+static void __net_exit cttimeout_net_exit(struct net *net)
+{
+ struct ctnl_timeout *cur, *tmp;
+
+ ctnl_untimeout(net, NULL);
+
+ list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) {
+ list_del_rcu(&cur->head);
+ nf_ct_l4proto_put(cur->l4proto);
+ kfree_rcu(cur, rcu_head);
+ }
+}
+
+static struct pernet_operations cttimeout_ops = {
+ .init = cttimeout_net_init,
+ .exit = cttimeout_net_exit,
+};
+
static int __init cttimeout_init(void)
{
int ret;
+ ret = register_pernet_subsys(&cttimeout_ops);
+ if (ret < 0)
+ return ret;
+
ret = nfnetlink_subsys_register(&cttimeout_subsys);
if (ret < 0) {
pr_err("cttimeout_init: cannot register cttimeout with "
@@ -586,28 +613,17 @@ static int __init cttimeout_init(void)
return 0;
err_out:
+ unregister_pernet_subsys(&cttimeout_ops);
return ret;
}
static void __exit cttimeout_exit(void)
{
- struct ctnl_timeout *cur, *tmp;
-
pr_info("cttimeout: unregistering from nfnetlink.\n");
nfnetlink_subsys_unregister(&cttimeout_subsys);
- /* Make sure no conntrack objects refer to custom timeouts anymore. */
- ctnl_untimeout(NULL);
-
- list_for_each_entry_safe(cur, tmp, &cttimeout_list, head) {
- list_del_rcu(&cur->head);
- /* We are sure that our objects have no clients at this point,
- * it's safe to release them all without checking refcnt.
- */
- nf_ct_l4proto_put(cur->l4proto);
- kfree_rcu(cur, rcu_head);
- }
+ unregister_pernet_subsys(&cttimeout_ops);
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL);
RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 06eb48fceb42..8ca932057c13 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -293,24 +293,20 @@ nfulnl_set_nlbufsiz(struct nfulnl_instance *inst, u_int32_t nlbufsiz)
return status;
}
-static int
+static void
nfulnl_set_timeout(struct nfulnl_instance *inst, u_int32_t timeout)
{
spin_lock_bh(&inst->lock);
inst->flushtimeout = timeout;
spin_unlock_bh(&inst->lock);
-
- return 0;
}
-static int
+static void
nfulnl_set_qthresh(struct nfulnl_instance *inst, u_int32_t qthresh)
{
spin_lock_bh(&inst->lock);
inst->qthreshold = qthresh;
spin_unlock_bh(&inst->lock);
-
- return 0;
}
static int
@@ -789,10 +785,9 @@ static struct notifier_block nfulnl_rtnl_notifier = {
.notifier_call = nfulnl_rcv_nl_event,
};
-static int
-nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nfqa[])
+static int nfulnl_recv_unsupp(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const nfqa[])
{
return -ENOTSUPP;
}
@@ -813,19 +808,17 @@ static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = {
[NFULA_CFG_FLAGS] = { .type = NLA_U16 },
};
-static int
-nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nfula[])
+static int nfulnl_recv_config(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const nfula[])
{
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int16_t group_num = ntohs(nfmsg->res_id);
struct nfulnl_instance *inst;
struct nfulnl_msg_config_cmd *cmd = NULL;
- struct net *net = sock_net(ctnl);
struct nfnl_log_net *log = nfnl_log_pernet(net);
int ret = 0;
- u16 flags;
+ u16 flags = 0;
if (nfula[NFULA_CFG_CMD]) {
u_int8_t pf = nfmsg->nfgen_family;
@@ -895,7 +888,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
goto out_put;
default:
ret = -ENOTSUPP;
- break;
+ goto out_put;
}
} else if (!inst) {
ret = -ENODEV;
@@ -1064,15 +1057,26 @@ static int __net_init nfnl_log_net_init(struct net *net)
{
unsigned int i;
struct nfnl_log_net *log = nfnl_log_pernet(net);
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *proc;
+ kuid_t root_uid;
+ kgid_t root_gid;
+#endif
for (i = 0; i < INSTANCE_BUCKETS; i++)
INIT_HLIST_HEAD(&log->instance_table[i]);
spin_lock_init(&log->instances_lock);
#ifdef CONFIG_PROC_FS
- if (!proc_create("nfnetlink_log", 0440,
- net->nf.proc_netfilter, &nful_file_ops))
+ proc = proc_create("nfnetlink_log", 0440,
+ net->nf.proc_netfilter, &nful_file_ops);
+ if (!proc)
return -ENOMEM;
+
+ root_uid = make_kuid(net->user_ns, 0);
+ root_gid = make_kgid(net->user_ns, 0);
+ if (uid_valid(root_uid) && gid_valid(root_gid))
+ proc_set_user(proc, root_uid, root_gid);
#endif
return 0;
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 7d81d280cb4f..1d3936587ace 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -365,8 +365,9 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
break;
}
+ nfnl_ct = rcu_dereference(nfnl_ct_hook);
+
if (queue->flags & NFQA_CFG_F_CONNTRACK) {
- nfnl_ct = rcu_dereference(nfnl_ct_hook);
if (nfnl_ct != NULL) {
ct = nfnl_ct->get_ct(entskb, &ctinfo);
if (ct != NULL)
@@ -956,10 +957,10 @@ static int nfq_id_after(unsigned int id, unsigned int max)
return (int)(id - max) > 0;
}
-static int
-nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nfqa[])
+static int nfqnl_recv_verdict_batch(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nfqa[])
{
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nf_queue_entry *entry, *tmp;
@@ -968,8 +969,6 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
struct nfqnl_instance *queue;
LIST_HEAD(batch_list);
u16 queue_num = ntohs(nfmsg->res_id);
-
- struct net *net = sock_net(ctnl);
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
queue = verdict_instance_lookup(q, queue_num,
@@ -1028,14 +1027,13 @@ static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct,
return ct;
}
-static int
-nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nfqa[])
+static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nfqa[])
{
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int16_t queue_num = ntohs(nfmsg->res_id);
-
struct nfqnl_msg_verdict_hdr *vhdr;
struct nfqnl_instance *queue;
unsigned int verdict;
@@ -1043,8 +1041,6 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
enum ip_conntrack_info uninitialized_var(ctinfo);
struct nfnl_ct_hook *nfnl_ct;
struct nf_conn *ct = NULL;
-
- struct net *net = sock_net(ctnl);
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
queue = instance_lookup(q, queue_num);
@@ -1064,9 +1060,10 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
if (entry == NULL)
return -ENOENT;
+ /* rcu lock already held from nfnl->call_rcu. */
+ nfnl_ct = rcu_dereference(nfnl_ct_hook);
+
if (nfqa[NFQA_CT]) {
- /* rcu lock already held from nfnl->call_rcu. */
- nfnl_ct = rcu_dereference(nfnl_ct_hook);
if (nfnl_ct != NULL)
ct = nfqnl_ct_parse(nfnl_ct, nlh, nfqa, entry, &ctinfo);
}
@@ -1090,10 +1087,9 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
return 0;
}
-static int
-nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nfqa[])
+static int nfqnl_recv_unsupp(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const nfqa[])
{
return -ENOTSUPP;
}
@@ -1108,17 +1104,16 @@ static const struct nf_queue_handler nfqh = {
.nf_hook_drop = &nfqnl_nf_hook_drop,
};
-static int
-nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nfqa[])
+static int nfqnl_recv_config(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const nfqa[])
{
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int16_t queue_num = ntohs(nfmsg->res_id);
struct nfqnl_instance *queue;
struct nfqnl_msg_config_cmd *cmd = NULL;
- struct net *net = sock_net(ctnl);
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+ __u32 flags = 0, mask = 0;
int ret = 0;
if (nfqa[NFQA_CFG_CMD]) {
@@ -1131,6 +1126,40 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
}
}
+ /* Check if we support these flags in first place, dependencies should
+ * be there too not to break atomicity.
+ */
+ if (nfqa[NFQA_CFG_FLAGS]) {
+ if (!nfqa[NFQA_CFG_MASK]) {
+ /* A mask is needed to specify which flags are being
+ * changed.
+ */
+ return -EINVAL;
+ }
+
+ flags = ntohl(nla_get_be32(nfqa[NFQA_CFG_FLAGS]));
+ mask = ntohl(nla_get_be32(nfqa[NFQA_CFG_MASK]));
+
+ if (flags >= NFQA_CFG_F_MAX)
+ return -EOPNOTSUPP;
+
+#if !IS_ENABLED(CONFIG_NETWORK_SECMARK)
+ if (flags & mask & NFQA_CFG_F_SECCTX)
+ return -EOPNOTSUPP;
+#endif
+ if ((flags & mask & NFQA_CFG_F_CONNTRACK) &&
+ !rcu_access_pointer(nfnl_ct_hook)) {
+#ifdef CONFIG_MODULES
+ nfnl_unlock(NFNL_SUBSYS_QUEUE);
+ request_module("ip_conntrack_netlink");
+ nfnl_lock(NFNL_SUBSYS_QUEUE);
+ if (rcu_access_pointer(nfnl_ct_hook))
+ return -EAGAIN;
+#endif
+ return -EOPNOTSUPP;
+ }
+ }
+
rcu_read_lock();
queue = instance_lookup(q, queue_num);
if (queue && queue->peer_portid != NETLINK_CB(skb).portid) {
@@ -1158,70 +1187,38 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
goto err_out_unlock;
}
instance_destroy(q, queue);
- break;
+ goto err_out_unlock;
case NFQNL_CFG_CMD_PF_BIND:
case NFQNL_CFG_CMD_PF_UNBIND:
break;
default:
ret = -ENOTSUPP;
- break;
+ goto err_out_unlock;
}
}
+ if (!queue) {
+ ret = -ENODEV;
+ goto err_out_unlock;
+ }
+
if (nfqa[NFQA_CFG_PARAMS]) {
- struct nfqnl_msg_config_params *params;
+ struct nfqnl_msg_config_params *params =
+ nla_data(nfqa[NFQA_CFG_PARAMS]);
- if (!queue) {
- ret = -ENODEV;
- goto err_out_unlock;
- }
- params = nla_data(nfqa[NFQA_CFG_PARAMS]);
nfqnl_set_mode(queue, params->copy_mode,
ntohl(params->copy_range));
}
if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) {
- __be32 *queue_maxlen;
+ __be32 *queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]);
- if (!queue) {
- ret = -ENODEV;
- goto err_out_unlock;
- }
- queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]);
spin_lock_bh(&queue->lock);
queue->queue_maxlen = ntohl(*queue_maxlen);
spin_unlock_bh(&queue->lock);
}
if (nfqa[NFQA_CFG_FLAGS]) {
- __u32 flags, mask;
-
- if (!queue) {
- ret = -ENODEV;
- goto err_out_unlock;
- }
-
- if (!nfqa[NFQA_CFG_MASK]) {
- /* A mask is needed to specify which flags are being
- * changed.
- */
- ret = -EINVAL;
- goto err_out_unlock;
- }
-
- flags = ntohl(nla_get_be32(nfqa[NFQA_CFG_FLAGS]));
- mask = ntohl(nla_get_be32(nfqa[NFQA_CFG_MASK]));
-
- if (flags >= NFQA_CFG_F_MAX) {
- ret = -EOPNOTSUPP;
- goto err_out_unlock;
- }
-#if !IS_ENABLED(CONFIG_NETWORK_SECMARK)
- if (flags & mask & NFQA_CFG_F_SECCTX) {
- ret = -EOPNOTSUPP;
- goto err_out_unlock;
- }
-#endif
spin_lock_bh(&queue->lock);
queue->flags &= ~mask;
queue->flags |= flags & mask;
@@ -1417,6 +1414,7 @@ static int __init nfnetlink_queue_init(void)
cleanup_netlink_notifier:
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
+ unregister_pernet_subsys(&nfnl_queue_net_ops);
out:
return status;
}
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index fde5145f2e36..b78c28ba465f 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -8,6 +8,7 @@
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*/
+#include <asm/unaligned.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -39,6 +40,25 @@ static void nft_byteorder_eval(const struct nft_expr *expr,
d = (void *)dst;
switch (priv->size) {
+ case 8: {
+ u64 src64;
+
+ switch (priv->op) {
+ case NFT_BYTEORDER_NTOH:
+ for (i = 0; i < priv->len / 8; i++) {
+ src64 = get_unaligned((u64 *)&src[i]);
+ put_unaligned_be64(src64, &dst[i]);
+ }
+ break;
+ case NFT_BYTEORDER_HTON:
+ for (i = 0; i < priv->len / 8; i++) {
+ src64 = get_unaligned_be64(&src[i]);
+ put_unaligned(src64, (u64 *)&dst[i]);
+ }
+ break;
+ }
+ break;
+ }
case 4:
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
@@ -101,6 +121,7 @@ static int nft_byteorder_init(const struct nft_ctx *ctx,
switch (priv->size) {
case 2:
case 4:
+ case 8:
break;
default:
return -EINVAL;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 9c8fab00164b..454841baa4d0 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -519,9 +519,9 @@ nla_put_failure:
return -1;
}
-static int
-nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+static int nfnl_compat_get(struct net *net, struct sock *nfnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const tb[])
{
int ret = 0, target;
struct nfgenmsg *nfmsg;
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 1067fb4c1ffa..c9743f78f219 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -47,27 +47,34 @@ static void nft_counter_eval(const struct nft_expr *expr,
local_bh_enable();
}
-static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static void nft_counter_fetch(const struct nft_counter_percpu __percpu *counter,
+ struct nft_counter *total)
{
- struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
- struct nft_counter_percpu *cpu_stats;
- struct nft_counter total;
+ const struct nft_counter_percpu *cpu_stats;
u64 bytes, packets;
unsigned int seq;
int cpu;
- memset(&total, 0, sizeof(total));
+ memset(total, 0, sizeof(*total));
for_each_possible_cpu(cpu) {
- cpu_stats = per_cpu_ptr(priv->counter, cpu);
+ cpu_stats = per_cpu_ptr(counter, cpu);
do {
seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
bytes = cpu_stats->counter.bytes;
packets = cpu_stats->counter.packets;
} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
- total.packets += packets;
- total.bytes += bytes;
+ total->packets += packets;
+ total->bytes += bytes;
}
+}
+
+static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+ struct nft_counter total;
+
+ nft_counter_fetch(priv->counter, &total);
if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) ||
nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets)))
@@ -93,7 +100,7 @@ static int nft_counter_init(const struct nft_ctx *ctx,
cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu);
if (cpu_stats == NULL)
- return ENOMEM;
+ return -ENOMEM;
preempt_disable();
this_cpu = this_cpu_ptr(cpu_stats);
@@ -118,6 +125,31 @@ static void nft_counter_destroy(const struct nft_ctx *ctx,
free_percpu(priv->counter);
}
+static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
+{
+ struct nft_counter_percpu_priv *priv = nft_expr_priv(src);
+ struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst);
+ struct nft_counter_percpu __percpu *cpu_stats;
+ struct nft_counter_percpu *this_cpu;
+ struct nft_counter total;
+
+ nft_counter_fetch(priv->counter, &total);
+
+ cpu_stats = __netdev_alloc_pcpu_stats(struct nft_counter_percpu,
+ GFP_ATOMIC);
+ if (cpu_stats == NULL)
+ return -ENOMEM;
+
+ preempt_disable();
+ this_cpu = this_cpu_ptr(cpu_stats);
+ this_cpu->counter.packets = total.packets;
+ this_cpu->counter.bytes = total.bytes;
+ preempt_enable();
+
+ priv_clone->counter = cpu_stats;
+ return 0;
+}
+
static struct nft_expr_type nft_counter_type;
static const struct nft_expr_ops nft_counter_ops = {
.type = &nft_counter_type,
@@ -126,6 +158,7 @@ static const struct nft_expr_ops nft_counter_ops = {
.init = nft_counter_init,
.destroy = nft_counter_destroy,
.dump = nft_counter_dump,
+ .clone = nft_counter_clone,
};
static struct nft_expr_type nft_counter_type __read_mostly = {
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 8cbca3432f90..d4a4619fcebc 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -16,6 +16,7 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_ecache.h>
@@ -30,6 +31,18 @@ struct nft_ct {
};
};
+static u64 nft_ct_get_eval_counter(const struct nf_conn_counter *c,
+ enum nft_ct_keys k,
+ enum ip_conntrack_dir d)
+{
+ if (d < IP_CT_DIR_MAX)
+ return k == NFT_CT_BYTES ? atomic64_read(&c[d].bytes) :
+ atomic64_read(&c[d].packets);
+
+ return nft_ct_get_eval_counter(c, k, IP_CT_DIR_ORIGINAL) +
+ nft_ct_get_eval_counter(c, k, IP_CT_DIR_REPLY);
+}
+
static void nft_ct_get_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -115,6 +128,17 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
return;
}
#endif
+ case NFT_CT_BYTES: /* fallthrough */
+ case NFT_CT_PKTS: {
+ const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
+ u64 count = 0;
+
+ if (acct)
+ count = nft_ct_get_eval_counter(acct->counter,
+ priv->key, priv->dir);
+ memcpy(dest, &count, sizeof(count));
+ return;
+ }
default:
break;
}
@@ -291,6 +315,13 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
return -EINVAL;
len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u.all);
break;
+ case NFT_CT_BYTES:
+ case NFT_CT_PKTS:
+ /* no direction? return sum of original + reply */
+ if (tb[NFTA_CT_DIRECTION] == NULL)
+ priv->dir = IP_CT_DIR_MAX;
+ len = sizeof(u64);
+ break;
default:
return -EOPNOTSUPP;
}
@@ -366,6 +397,7 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
goto nla_put_failure;
switch (priv->key) {
+ case NFT_CT_L3PROTOCOL:
case NFT_CT_PROTOCOL:
case NFT_CT_SRC:
case NFT_CT_DST:
@@ -373,6 +405,13 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
case NFT_CT_PROTO_DST:
if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
goto nla_put_failure;
+ break;
+ case NFT_CT_BYTES:
+ case NFT_CT_PKTS:
+ if (priv->dir < IP_CT_DIR_MAX &&
+ nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
+ goto nla_put_failure;
+ break;
default:
break;
}
diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c
new file mode 100644
index 000000000000..2cc1e0ef56e8
--- /dev/null
+++ b/net/netfilter/nft_dup_netdev.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_dup_netdev.h>
+
+struct nft_dup_netdev {
+ enum nft_registers sreg_dev:8;
+};
+
+static void nft_dup_netdev_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_dup_netdev *priv = nft_expr_priv(expr);
+ int oif = regs->data[priv->sreg_dev];
+
+ nf_dup_netdev_egress(pkt, oif);
+}
+
+static const struct nla_policy nft_dup_netdev_policy[NFTA_DUP_MAX + 1] = {
+ [NFTA_DUP_SREG_DEV] = { .type = NLA_U32 },
+};
+
+static int nft_dup_netdev_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_dup_netdev *priv = nft_expr_priv(expr);
+
+ if (tb[NFTA_DUP_SREG_DEV] == NULL)
+ return -EINVAL;
+
+ priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]);
+ return nft_validate_register_load(priv->sreg_dev, sizeof(int));
+}
+
+static const struct nft_expr_ops nft_dup_netdev_ingress_ops;
+
+static int nft_dup_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_dup_netdev *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_dup_netdev_type;
+static const struct nft_expr_ops nft_dup_netdev_ops = {
+ .type = &nft_dup_netdev_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_dup_netdev)),
+ .eval = nft_dup_netdev_eval,
+ .init = nft_dup_netdev_init,
+ .dump = nft_dup_netdev_dump,
+};
+
+static struct nft_expr_type nft_dup_netdev_type __read_mostly = {
+ .family = NFPROTO_NETDEV,
+ .name = "dup",
+ .ops = &nft_dup_netdev_ops,
+ .policy = nft_dup_netdev_policy,
+ .maxattr = NFTA_DUP_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_dup_netdev_module_init(void)
+{
+ return nft_register_expr(&nft_dup_netdev_type);
+}
+
+static void __exit nft_dup_netdev_module_exit(void)
+{
+ nft_unregister_expr(&nft_dup_netdev_type);
+}
+
+module_init(nft_dup_netdev_module_init);
+module_exit(nft_dup_netdev_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_AF_EXPR(5, "dup");
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 513a8ef60a59..9dec3bd1b63c 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -50,8 +50,9 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
}
ext = nft_set_elem_ext(set, elem);
- if (priv->expr != NULL)
- nft_expr_clone(nft_set_ext_expr(ext), priv->expr);
+ if (priv->expr != NULL &&
+ nft_expr_clone(nft_set_ext_expr(ext), priv->expr) < 0)
+ return NULL;
return elem;
}
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
new file mode 100644
index 000000000000..763ebc3e0b2b
--- /dev/null
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_dup_netdev.h>
+
+struct nft_fwd_netdev {
+ enum nft_registers sreg_dev:8;
+};
+
+static void nft_fwd_netdev_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_fwd_netdev *priv = nft_expr_priv(expr);
+ int oif = regs->data[priv->sreg_dev];
+
+ nf_dup_netdev_egress(pkt, oif);
+ regs->verdict.code = NF_DROP;
+}
+
+static const struct nla_policy nft_fwd_netdev_policy[NFTA_FWD_MAX + 1] = {
+ [NFTA_FWD_SREG_DEV] = { .type = NLA_U32 },
+};
+
+static int nft_fwd_netdev_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_fwd_netdev *priv = nft_expr_priv(expr);
+
+ if (tb[NFTA_FWD_SREG_DEV] == NULL)
+ return -EINVAL;
+
+ priv->sreg_dev = nft_parse_register(tb[NFTA_FWD_SREG_DEV]);
+ return nft_validate_register_load(priv->sreg_dev, sizeof(int));
+}
+
+static const struct nft_expr_ops nft_fwd_netdev_ingress_ops;
+
+static int nft_fwd_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_fwd_netdev *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_FWD_SREG_DEV, priv->sreg_dev))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_fwd_netdev_type;
+static const struct nft_expr_ops nft_fwd_netdev_ops = {
+ .type = &nft_fwd_netdev_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_fwd_netdev)),
+ .eval = nft_fwd_netdev_eval,
+ .init = nft_fwd_netdev_init,
+ .dump = nft_fwd_netdev_dump,
+};
+
+static struct nft_expr_type nft_fwd_netdev_type __read_mostly = {
+ .family = NFPROTO_NETDEV,
+ .name = "fwd",
+ .ops = &nft_fwd_netdev_ops,
+ .policy = nft_fwd_netdev_policy,
+ .maxattr = NFTA_FWD_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_fwd_netdev_module_init(void)
+{
+ return nft_register_expr(&nft_fwd_netdev_type);
+}
+
+static void __exit nft_fwd_netdev_module_exit(void)
+{
+ nft_unregister_expr(&nft_fwd_netdev_type);
+}
+
+module_init(nft_fwd_netdev_module_init);
+module_exit(nft_fwd_netdev_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_AF_EXPR(5, "fwd");
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 5d67938f8b2f..99d18578afc6 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -26,6 +26,7 @@ struct nft_limit {
u64 rate;
u64 nsecs;
u32 burst;
+ bool invert;
};
static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
@@ -44,11 +45,11 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
if (delta >= 0) {
limit->tokens = delta;
spin_unlock_bh(&limit_lock);
- return false;
+ return limit->invert;
}
limit->tokens = tokens;
spin_unlock_bh(&limit_lock);
- return true;
+ return !limit->invert;
}
static int nft_limit_init(struct nft_limit *limit,
@@ -78,6 +79,12 @@ static int nft_limit_init(struct nft_limit *limit,
limit->rate = rate;
}
+ if (tb[NFTA_LIMIT_FLAGS]) {
+ u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS]));
+
+ if (flags & NFT_LIMIT_F_INV)
+ limit->invert = true;
+ }
limit->last = ktime_get_ns();
return 0;
@@ -86,13 +93,15 @@ static int nft_limit_init(struct nft_limit *limit,
static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit,
enum nft_limit_type type)
{
+ u32 flags = limit->invert ? NFT_LIMIT_F_INV : 0;
u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC);
u64 rate = limit->rate - limit->burst;
if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(rate)) ||
nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs)) ||
nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(limit->burst)) ||
- nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type)))
+ nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type)) ||
+ nla_put_be32(skb, NFTA_LIMIT_FLAGS, htonl(flags)))
goto nla_put_failure;
return 0;
@@ -120,6 +129,7 @@ static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = {
[NFTA_LIMIT_UNIT] = { .type = NLA_U64 },
[NFTA_LIMIT_BURST] = { .type = NLA_U32 },
[NFTA_LIMIT_TYPE] = { .type = NLA_U32 },
+ [NFTA_LIMIT_FLAGS] = { .type = NLA_U32 },
};
static int nft_limit_pkts_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index e4ad2c24bc41..fe885bf271c5 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -18,12 +18,16 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/smp.h>
+#include <linux/static_key.h>
#include <net/dst.h>
#include <net/sock.h>
#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nft_meta.h>
+#include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */
+
void nft_meta_get_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -31,6 +35,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
const struct nft_meta *priv = nft_expr_priv(expr);
const struct sk_buff *skb = pkt->skb;
const struct net_device *in = pkt->in, *out = pkt->out;
+ struct sock *sk;
u32 *dest = &regs->data[priv->dreg];
switch (priv->key) {
@@ -86,33 +91,35 @@ void nft_meta_get_eval(const struct nft_expr *expr,
*(u16 *)dest = out->type;
break;
case NFT_META_SKUID:
- if (skb->sk == NULL || !sk_fullsock(skb->sk))
+ sk = skb_to_full_sk(skb);
+ if (!sk || !sk_fullsock(sk))
goto err;
- read_lock_bh(&skb->sk->sk_callback_lock);
- if (skb->sk->sk_socket == NULL ||
- skb->sk->sk_socket->file == NULL) {
- read_unlock_bh(&skb->sk->sk_callback_lock);
+ read_lock_bh(&sk->sk_callback_lock);
+ if (sk->sk_socket == NULL ||
+ sk->sk_socket->file == NULL) {
+ read_unlock_bh(&sk->sk_callback_lock);
goto err;
}
*dest = from_kuid_munged(&init_user_ns,
- skb->sk->sk_socket->file->f_cred->fsuid);
- read_unlock_bh(&skb->sk->sk_callback_lock);
+ sk->sk_socket->file->f_cred->fsuid);
+ read_unlock_bh(&sk->sk_callback_lock);
break;
case NFT_META_SKGID:
- if (skb->sk == NULL || !sk_fullsock(skb->sk))
+ sk = skb_to_full_sk(skb);
+ if (!sk || !sk_fullsock(sk))
goto err;
- read_lock_bh(&skb->sk->sk_callback_lock);
- if (skb->sk->sk_socket == NULL ||
- skb->sk->sk_socket->file == NULL) {
- read_unlock_bh(&skb->sk->sk_callback_lock);
+ read_lock_bh(&sk->sk_callback_lock);
+ if (sk->sk_socket == NULL ||
+ sk->sk_socket->file == NULL) {
+ read_unlock_bh(&sk->sk_callback_lock);
goto err;
}
*dest = from_kgid_munged(&init_user_ns,
- skb->sk->sk_socket->file->f_cred->fsgid);
- read_unlock_bh(&skb->sk->sk_callback_lock);
+ sk->sk_socket->file->f_cred->fsgid);
+ read_unlock_bh(&sk->sk_callback_lock);
break;
#ifdef CONFIG_IP_ROUTE_CLASSID
case NFT_META_RTCLASSID: {
@@ -168,9 +175,10 @@ void nft_meta_get_eval(const struct nft_expr *expr,
break;
#ifdef CONFIG_CGROUP_NET_CLASSID
case NFT_META_CGROUP:
- if (skb->sk == NULL || !sk_fullsock(skb->sk))
+ sk = skb_to_full_sk(skb);
+ if (!sk || !sk_fullsock(sk))
goto err;
- *dest = skb->sk->sk_classid;
+ *dest = sock_cgroup_classid(&sk->sk_cgrp_data);
break;
#endif
default:
@@ -184,6 +192,13 @@ err:
}
EXPORT_SYMBOL_GPL(nft_meta_get_eval);
+/* don't change or set _LOOPBACK, _USER, etc. */
+static bool pkt_type_ok(u32 p)
+{
+ return p == PACKET_HOST || p == PACKET_BROADCAST ||
+ p == PACKET_MULTICAST || p == PACKET_OTHERHOST;
+}
+
void nft_meta_set_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -199,6 +214,11 @@ void nft_meta_set_eval(const struct nft_expr *expr,
case NFT_META_PRIORITY:
skb->priority = value;
break;
+ case NFT_META_PKTTYPE:
+ if (skb->pkt_type != value &&
+ pkt_type_ok(value) && pkt_type_ok(skb->pkt_type))
+ skb->pkt_type = value;
+ break;
case NFT_META_NFTRACE:
skb->nf_trace = 1;
break;
@@ -267,6 +287,24 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
}
EXPORT_SYMBOL_GPL(nft_meta_get_init);
+static int nft_meta_set_init_pkttype(const struct nft_ctx *ctx)
+{
+ unsigned int hooks;
+
+ switch (ctx->afi->family) {
+ case NFPROTO_BRIDGE:
+ hooks = 1 << NF_BR_PRE_ROUTING;
+ break;
+ case NFPROTO_NETDEV:
+ hooks = 1 << NF_NETDEV_INGRESS;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+
int nft_meta_set_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
@@ -284,6 +322,12 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
case NFT_META_NFTRACE:
len = sizeof(u8);
break;
+ case NFT_META_PKTTYPE:
+ err = nft_meta_set_init_pkttype(ctx);
+ if (err)
+ return err;
+ len = sizeof(u8);
+ break;
default:
return -EOPNOTSUPP;
}
@@ -293,6 +337,9 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
+ if (priv->key == NFT_META_NFTRACE)
+ static_branch_inc(&nft_trace_enabled);
+
return 0;
}
EXPORT_SYMBOL_GPL(nft_meta_set_init);
@@ -330,6 +377,16 @@ nla_put_failure:
}
EXPORT_SYMBOL_GPL(nft_meta_set_dump);
+void nft_meta_set_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ const struct nft_meta *priv = nft_expr_priv(expr);
+
+ if (priv->key == NFT_META_NFTRACE)
+ static_branch_dec(&nft_trace_enabled);
+}
+EXPORT_SYMBOL_GPL(nft_meta_set_destroy);
+
static struct nft_expr_type nft_meta_type;
static const struct nft_expr_ops nft_meta_get_ops = {
.type = &nft_meta_type,
@@ -344,6 +401,7 @@ static const struct nft_expr_ops nft_meta_set_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.eval = nft_meta_set_eval,
.init = nft_meta_set_init,
+ .destroy = nft_meta_set_destroy,
.dump = nft_meta_set_dump,
};
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 09b4b07eb676..12cd4bf16d17 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -107,10 +107,13 @@ err:
}
static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = {
- [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 },
- [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 },
- [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 },
- [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_SREG] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_CSUM_OFFSET] = { .type = NLA_U32 },
};
static int nft_payload_init(const struct nft_ctx *ctx,
@@ -160,6 +163,118 @@ const struct nft_expr_ops nft_payload_fast_ops = {
.dump = nft_payload_dump,
};
+static void nft_payload_set_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_payload_set *priv = nft_expr_priv(expr);
+ struct sk_buff *skb = pkt->skb;
+ const u32 *src = &regs->data[priv->sreg];
+ int offset, csum_offset;
+ __wsum fsum, tsum;
+ __sum16 sum;
+
+ switch (priv->base) {
+ case NFT_PAYLOAD_LL_HEADER:
+ if (!skb_mac_header_was_set(skb))
+ goto err;
+ offset = skb_mac_header(skb) - skb->data;
+ break;
+ case NFT_PAYLOAD_NETWORK_HEADER:
+ offset = skb_network_offset(skb);
+ break;
+ case NFT_PAYLOAD_TRANSPORT_HEADER:
+ offset = pkt->xt.thoff;
+ break;
+ default:
+ BUG();
+ }
+
+ csum_offset = offset + priv->csum_offset;
+ offset += priv->offset;
+
+ if (priv->csum_type == NFT_PAYLOAD_CSUM_INET &&
+ (priv->base != NFT_PAYLOAD_TRANSPORT_HEADER ||
+ skb->ip_summed != CHECKSUM_PARTIAL)) {
+ if (skb_copy_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
+ goto err;
+
+ fsum = skb_checksum(skb, offset, priv->len, 0);
+ tsum = csum_partial(src, priv->len, 0);
+ sum = csum_fold(csum_add(csum_sub(~csum_unfold(sum), fsum),
+ tsum));
+ if (sum == 0)
+ sum = CSUM_MANGLED_0;
+
+ if (!skb_make_writable(skb, csum_offset + sizeof(sum)) ||
+ skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
+ goto err;
+ }
+
+ if (!skb_make_writable(skb, max(offset + priv->len, 0)) ||
+ skb_store_bits(skb, offset, src, priv->len) < 0)
+ goto err;
+
+ return;
+err:
+ regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_payload_set_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_payload_set *priv = nft_expr_priv(expr);
+
+ priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
+ priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
+ priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+ priv->sreg = nft_parse_register(tb[NFTA_PAYLOAD_SREG]);
+
+ if (tb[NFTA_PAYLOAD_CSUM_TYPE])
+ priv->csum_type =
+ ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE]));
+ if (tb[NFTA_PAYLOAD_CSUM_OFFSET])
+ priv->csum_offset =
+ ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET]));
+
+ switch (priv->csum_type) {
+ case NFT_PAYLOAD_CSUM_NONE:
+ case NFT_PAYLOAD_CSUM_INET:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return nft_validate_register_load(priv->sreg, priv->len);
+}
+
+static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_payload_set *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_PAYLOAD_SREG, priv->sreg) ||
+ nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) ||
+ nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) ||
+ nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)) ||
+ nla_put_be32(skb, NFTA_PAYLOAD_CSUM_TYPE, htonl(priv->csum_type)) ||
+ nla_put_be32(skb, NFTA_PAYLOAD_CSUM_OFFSET,
+ htonl(priv->csum_offset)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static const struct nft_expr_ops nft_payload_set_ops = {
+ .type = &nft_payload_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_payload_set)),
+ .eval = nft_payload_set_eval,
+ .init = nft_payload_set_init,
+ .dump = nft_payload_set_dump,
+};
+
static const struct nft_expr_ops *
nft_payload_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
@@ -167,8 +282,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
enum nft_payload_bases base;
unsigned int offset, len;
- if (tb[NFTA_PAYLOAD_DREG] == NULL ||
- tb[NFTA_PAYLOAD_BASE] == NULL ||
+ if (tb[NFTA_PAYLOAD_BASE] == NULL ||
tb[NFTA_PAYLOAD_OFFSET] == NULL ||
tb[NFTA_PAYLOAD_LEN] == NULL)
return ERR_PTR(-EINVAL);
@@ -183,6 +297,15 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
return ERR_PTR(-EOPNOTSUPP);
}
+ if (tb[NFTA_PAYLOAD_SREG] != NULL) {
+ if (tb[NFTA_PAYLOAD_DREG] != NULL)
+ return ERR_PTR(-EINVAL);
+ return &nft_payload_set_ops;
+ }
+
+ if (tb[NFTA_PAYLOAD_DREG] == NULL)
+ return ERR_PTR(-EINVAL);
+
offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index d4aaad747ea9..c8a0b7da5ff4 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -26,6 +26,7 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/audit.h>
+#include <linux/user_namespace.h>
#include <net/net_namespace.h>
#include <linux/netfilter/x_tables.h>
@@ -1226,6 +1227,8 @@ int xt_proto_init(struct net *net, u_int8_t af)
#ifdef CONFIG_PROC_FS
char buf[XT_FUNCTION_MAXNAMELEN];
struct proc_dir_entry *proc;
+ kuid_t root_uid;
+ kgid_t root_gid;
#endif
if (af >= ARRAY_SIZE(xt_prefix))
@@ -1233,12 +1236,17 @@ int xt_proto_init(struct net *net, u_int8_t af)
#ifdef CONFIG_PROC_FS
+ root_uid = make_kuid(net->user_ns, 0);
+ root_gid = make_kgid(net->user_ns, 0);
+
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TABLES, sizeof(buf));
proc = proc_create_data(buf, 0440, net->proc_net, &xt_table_ops,
(void *)(unsigned long)af);
if (!proc)
goto out;
+ if (uid_valid(root_uid) && gid_valid(root_gid))
+ proc_set_user(proc, root_uid, root_gid);
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_MATCHES, sizeof(buf));
@@ -1246,6 +1254,8 @@ int xt_proto_init(struct net *net, u_int8_t af)
(void *)(unsigned long)af);
if (!proc)
goto out_remove_tables;
+ if (uid_valid(root_uid) && gid_valid(root_gid))
+ proc_set_user(proc, root_uid, root_gid);
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TARGETS, sizeof(buf));
@@ -1253,6 +1263,8 @@ int xt_proto_init(struct net *net, u_int8_t af)
(void *)(unsigned long)af);
if (!proc)
goto out_remove_matches;
+ if (uid_valid(root_uid) && gid_valid(root_gid))
+ proc_set_user(proc, root_uid, root_gid);
#endif
return 0;
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index e7ac07e53b59..6669e68d589e 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -143,7 +143,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
goto out;
}
- timeout = timeout_find_get(timeout_name);
+ timeout = timeout_find_get(par->net, timeout_name);
if (timeout == NULL) {
ret = -ENOENT;
pr_info("No such timeout policy \"%s\"\n", timeout_name);
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index b7c43def0dc6..e118397254af 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -228,7 +228,7 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
u8 nexthdr;
- __be16 frag_off;
+ __be16 frag_off, oldlen, newlen;
int tcphoff;
int ret;
@@ -244,7 +244,12 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
return NF_DROP;
if (ret > 0) {
ipv6h = ipv6_hdr(skb);
- ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
+ oldlen = ipv6h->payload_len;
+ newlen = htons(ntohs(oldlen) + ret);
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->csum = csum_add(csum_sub(skb->csum, oldlen),
+ newlen);
+ ipv6h->payload_len = newlen;
}
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 899b06115fc5..6e57a3966dc5 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -31,19 +31,21 @@ static unsigned int
tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tee_tginfo *info = par->targinfo;
+ int oif = info->priv ? info->priv->oif : 0;
- nf_dup_ipv4(par->net, skb, par->hooknum, &info->gw.in, info->priv->oif);
+ nf_dup_ipv4(par->net, skb, par->hooknum, &info->gw.in, oif);
return XT_CONTINUE;
}
-#if IS_ENABLED(CONFIG_NF_DUP_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
static unsigned int
tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tee_tginfo *info = par->targinfo;
+ int oif = info->priv ? info->priv->oif : 0;
- nf_dup_ipv6(par->net, skb, par->hooknum, &info->gw.in6, info->priv->oif);
+ nf_dup_ipv6(par->net, skb, par->hooknum, &info->gw.in6, oif);
return XT_CONTINUE;
}
@@ -129,7 +131,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
.destroy = tee_tg_destroy,
.me = THIS_MODULE,
},
-#if IS_ENABLED(CONFIG_NF_DUP_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
{
.name = "TEE",
.revision = 1,
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index a1d126f29463..a086a914865f 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -24,9 +24,9 @@ MODULE_DESCRIPTION("Xtables: process control group matching");
MODULE_ALIAS("ipt_cgroup");
MODULE_ALIAS("ip6t_cgroup");
-static int cgroup_mt_check(const struct xt_mtchk_param *par)
+static int cgroup_mt_check_v0(const struct xt_mtchk_param *par)
{
- struct xt_cgroup_info *info = par->matchinfo;
+ struct xt_cgroup_info_v0 *info = par->matchinfo;
if (info->invert & ~1)
return -EINVAL;
@@ -34,38 +34,110 @@ static int cgroup_mt_check(const struct xt_mtchk_param *par)
return 0;
}
+static int cgroup_mt_check_v1(const struct xt_mtchk_param *par)
+{
+ struct xt_cgroup_info_v1 *info = par->matchinfo;
+ struct cgroup *cgrp;
+
+ if ((info->invert_path & ~1) || (info->invert_classid & ~1))
+ return -EINVAL;
+
+ if (!info->has_path && !info->has_classid) {
+ pr_info("xt_cgroup: no path or classid specified\n");
+ return -EINVAL;
+ }
+
+ if (info->has_path && info->has_classid) {
+ pr_info("xt_cgroup: both path and classid specified\n");
+ return -EINVAL;
+ }
+
+ if (info->has_path) {
+ cgrp = cgroup_get_from_path(info->path);
+ if (IS_ERR(cgrp)) {
+ pr_info("xt_cgroup: invalid path, errno=%ld\n",
+ PTR_ERR(cgrp));
+ return -EINVAL;
+ }
+ info->priv = cgrp;
+ }
+
+ return 0;
+}
+
static bool
-cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
+cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
{
- const struct xt_cgroup_info *info = par->matchinfo;
+ const struct xt_cgroup_info_v0 *info = par->matchinfo;
if (skb->sk == NULL || !sk_fullsock(skb->sk))
return false;
- return (info->id == skb->sk->sk_classid) ^ info->invert;
+ return (info->id == sock_cgroup_classid(&skb->sk->sk_cgrp_data)) ^
+ info->invert;
+}
+
+static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_cgroup_info_v1 *info = par->matchinfo;
+ struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data;
+ struct cgroup *ancestor = info->priv;
+
+ if (!skb->sk || !sk_fullsock(skb->sk))
+ return false;
+
+ if (ancestor)
+ return cgroup_is_descendant(sock_cgroup_ptr(skcd), ancestor) ^
+ info->invert_path;
+ else
+ return (info->classid == sock_cgroup_classid(skcd)) ^
+ info->invert_classid;
+}
+
+static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par)
+{
+ struct xt_cgroup_info_v1 *info = par->matchinfo;
+
+ if (info->priv)
+ cgroup_put(info->priv);
}
-static struct xt_match cgroup_mt_reg __read_mostly = {
- .name = "cgroup",
- .revision = 0,
- .family = NFPROTO_UNSPEC,
- .checkentry = cgroup_mt_check,
- .match = cgroup_mt,
- .matchsize = sizeof(struct xt_cgroup_info),
- .me = THIS_MODULE,
- .hooks = (1 << NF_INET_LOCAL_OUT) |
- (1 << NF_INET_POST_ROUTING) |
- (1 << NF_INET_LOCAL_IN),
+static struct xt_match cgroup_mt_reg[] __read_mostly = {
+ {
+ .name = "cgroup",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = cgroup_mt_check_v0,
+ .match = cgroup_mt_v0,
+ .matchsize = sizeof(struct xt_cgroup_info_v0),
+ .me = THIS_MODULE,
+ .hooks = (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_IN),
+ },
+ {
+ .name = "cgroup",
+ .revision = 1,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = cgroup_mt_check_v1,
+ .match = cgroup_mt_v1,
+ .matchsize = sizeof(struct xt_cgroup_info_v1),
+ .destroy = cgroup_mt_destroy_v1,
+ .me = THIS_MODULE,
+ .hooks = (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_IN),
+ },
};
static int __init cgroup_mt_init(void)
{
- return xt_register_match(&cgroup_mt_reg);
+ return xt_register_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg));
}
static void __exit cgroup_mt_exit(void)
{
- xt_unregister_match(&cgroup_mt_reg);
+ xt_unregister_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg));
}
module_init(cgroup_mt_init);
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index df8801e02a32..4e3c3affd285 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -61,8 +61,8 @@ static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = {
[OSF_ATTR_FINGER] = { .len = sizeof(struct xt_osf_user_finger) },
};
-static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int xt_osf_add_callback(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const osf_attrs[])
{
struct xt_osf_user_finger *f;
@@ -104,7 +104,8 @@ static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb,
return err;
}
-static int xt_osf_remove_callback(struct sock *ctnl, struct sk_buff *skb,
+static int xt_osf_remove_callback(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const osf_attrs[])
{
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index ca2e577ed8ac..1302b475abcb 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -14,6 +14,7 @@
#include <linux/skbuff.h>
#include <linux/file.h>
#include <net/sock.h>
+#include <net/inet_sock.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_owner.h>
@@ -33,8 +34,9 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_owner_match_info *info = par->matchinfo;
const struct file *filp;
+ struct sock *sk = skb_to_full_sk(skb);
- if (skb->sk == NULL || skb->sk->sk_socket == NULL)
+ if (sk == NULL || sk->sk_socket == NULL)
return (info->match ^ info->invert) == 0;
else if (info->match & info->invert & XT_OWNER_SOCKET)
/*
@@ -43,7 +45,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
*/
return false;
- filp = skb->sk->sk_socket->file;
+ filp = sk->sk_socket->file;
if (filp == NULL)
return ((info->match ^ info->invert) &
(XT_OWNER_UID | XT_OWNER_GID)) == 0;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index fafe33bdb619..f1ffb34e253f 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2116,7 +2116,7 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid
consume_skb(info.skb2);
if (info.delivered) {
- if (info.congested && (allocation & __GFP_WAIT))
+ if (info.congested && gfpflags_allow_blocking(allocation))
yield();
return 0;
}
@@ -2831,7 +2831,8 @@ static int netlink_dump(struct sock *sk)
* reasonable static buffer based on the expected largest dump of a
* single netdev. The outcome is MSG_TRUNC error.
*/
- skb_reserve(skb, skb_tailroom(skb) - alloc_size);
+ if (!netlink_rx_is_mmaped(sk))
+ skb_reserve(skb, skb_tailroom(skb) - alloc_size);
netlink_skb_set_owner_r(skb, sk);
len = cb->dump(skb, cb);
@@ -2915,6 +2916,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
cb = &nlk->cb;
memset(cb, 0, sizeof(*cb));
+ cb->start = control->start;
cb->dump = control->dump;
cb->done = control->done;
cb->nlh = nlh;
@@ -2927,6 +2929,9 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
mutex_unlock(nlk->cb_mutex);
+ if (cb->start)
+ cb->start(cb);
+
ret = netlink_dump(sk);
sock_put(sk);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index bc0e504f33a6..f830326b3b1d 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -185,7 +185,7 @@ static int genl_allocate_reserve_groups(int n_groups, int *first_id)
}
}
- if (id >= mc_groups_longs * BITS_PER_LONG) {
+ if (id + n_groups > mc_groups_longs * BITS_PER_LONG) {
unsigned long new_longs = mc_groups_longs +
BITS_TO_LONGS(n_groups);
size_t nlen = new_longs * sizeof(unsigned long);
@@ -513,6 +513,20 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
}
EXPORT_SYMBOL(genlmsg_put);
+static int genl_lock_start(struct netlink_callback *cb)
+{
+ /* our ops are always const - netlink API doesn't propagate that */
+ const struct genl_ops *ops = cb->data;
+ int rc = 0;
+
+ if (ops->start) {
+ genl_lock();
+ rc = ops->start(cb);
+ genl_unlock();
+ }
+ return rc;
+}
+
static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
/* our ops are always const - netlink API doesn't propagate that */
@@ -577,6 +591,7 @@ static int genl_family_rcv_msg(struct genl_family *family,
.module = family->module,
/* we have const, but the netlink API doesn't */
.data = (void *)ops,
+ .start = genl_lock_start,
.dump = genl_lock_dumpit,
.done = genl_lock_done,
};
@@ -588,6 +603,7 @@ static int genl_family_rcv_msg(struct genl_family *family,
} else {
struct netlink_dump_control c = {
.module = family->module,
+ .start = ops->start,
.dump = ops->dumpit,
.done = ops->done,
};
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 1fe3d3b362c0..122bb81da918 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -953,6 +953,19 @@ out:
}
EXPORT_SYMBOL(nfc_se_transaction);
+int nfc_se_connectivity(struct nfc_dev *dev, u8 se_idx)
+{
+ int rc;
+
+ pr_debug("connectivity: %x\n", se_idx);
+
+ device_lock(&dev->dev);
+ rc = nfc_genl_se_connectivity(dev, se_idx);
+ device_unlock(&dev->dev);
+ return rc;
+}
+EXPORT_SYMBOL(nfc_se_connectivity);
+
static void nfc_release(struct device *d)
{
struct nfc_dev *dev = to_nfc_dev(d);
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index 23c2a118ac9f..dd9003f38822 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -20,7 +20,8 @@
#include "digital.h"
#define DIGITAL_PROTO_NFCA_RF_TECH \
- (NFC_PROTO_JEWEL_MASK | NFC_PROTO_MIFARE_MASK | NFC_PROTO_NFC_DEP_MASK)
+ (NFC_PROTO_JEWEL_MASK | NFC_PROTO_MIFARE_MASK | \
+ NFC_PROTO_NFC_DEP_MASK | NFC_PROTO_ISO14443_MASK)
#define DIGITAL_PROTO_NFCB_RF_TECH NFC_PROTO_ISO14443_B_MASK
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index b7de0da46acd..ecf0a0196f18 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -572,7 +572,7 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock,
if (sock_writeable(sk) && sk->sk_state == LLCP_CONNECTED)
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
pr_debug("mask 0x%x\n", mask);
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 10c99a578421..fbb7a2b57b44 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -610,14 +610,14 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
struct nci_core_conn_create_cmd *cmd;
struct core_conn_create_data data;
+ if (!number_destination_params)
+ return -EINVAL;
+
data.length = params_len + sizeof(struct nci_core_conn_create_cmd);
cmd = kzalloc(data.length, GFP_KERNEL);
if (!cmd)
return -ENOMEM;
- if (!number_destination_params)
- return -EINVAL;
-
cmd->destination_type = destination_type;
cmd->number_destination_params = number_destination_params;
memcpy(cmd->params, params, params_len);
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index 2aedac15cb59..a0ab26d535dc 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -676,7 +676,7 @@ int nci_hci_connect_gate(struct nci_dev *ndev,
break;
default:
pipe = nci_hci_create_pipe(ndev, dest_host, dest_gate, &r);
- if (pipe < 0)
+ if (pipe == NCI_HCI_INVALID_PIPE)
return r;
pipe_created = true;
break;
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index 21d8875673a4..c468eabd6943 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -171,14 +171,7 @@ static int nci_uart_tty_open(struct tty_struct *tty)
tty->disc_data = NULL;
tty->receive_room = 65536;
- /* Flush any pending characters in the driver and line discipline. */
-
- /* FIXME: why is this needed. Note don't use ldisc_ref here as the
- * open path is before the ldisc is referencable.
- */
-
- if (tty->ldisc->ops->flush_buffer)
- tty->ldisc->ops->flush_buffer(tty);
+ /* Flush any pending characters in the driver */
tty_driver_flush_buffer(tty);
return 0;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index f58c1fba1026..ea023b35f1c2 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -552,6 +552,43 @@ free_msg:
return -EMSGSIZE;
}
+int nfc_genl_se_connectivity(struct nfc_dev *dev, u8 se_idx)
+{
+ struct nfc_se *se;
+ struct sk_buff *msg;
+ void *hdr;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
+ NFC_EVENT_SE_CONNECTIVITY);
+ if (!hdr)
+ goto free_msg;
+
+ se = nfc_find_se(dev, se_idx);
+ if (!se)
+ goto free_msg;
+
+ if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
+ nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) ||
+ nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+free_msg:
+ nlmsg_free(msg);
+ return -EMSGSIZE;
+}
+
static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
u32 portid, u32 seq,
struct netlink_callback *cb,
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index c20b784ad720..6c6f76b370b1 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -105,6 +105,7 @@ int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type);
int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx);
int nfc_genl_se_transaction(struct nfc_dev *dev, u8 se_idx,
struct nfc_evt_transaction *evt_transaction);
+int nfc_genl_se_connectivity(struct nfc_dev *dev, u8 se_idx);
struct nfc_dev *nfc_get_device(unsigned int idx);
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index c88d0f2d3e01..2d59df521915 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1160,17 +1160,26 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
const struct sw_flow_actions *acts,
struct sw_flow_key *key)
{
- int level = this_cpu_read(exec_actions_level);
- int err;
+ static const int ovs_recursion_limit = 5;
+ int err, level;
+
+ level = __this_cpu_inc_return(exec_actions_level);
+ if (unlikely(level > ovs_recursion_limit)) {
+ net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
+ ovs_dp_name(dp));
+ kfree_skb(skb);
+ err = -ENETDOWN;
+ goto out;
+ }
- this_cpu_inc(exec_actions_level);
err = do_execute_actions(dp, skb, key,
acts->actions, acts->actions_len);
- if (!level)
+ if (level == 1)
process_deferred_actions(dp);
- this_cpu_dec(exec_actions_level);
+out:
+ __this_cpu_dec(exec_actions_level);
return err;
}
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index c2cc11168fd5..ee6ff8ffc12d 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -53,6 +53,8 @@ struct ovs_conntrack_info {
struct md_labels labels;
};
+static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info);
+
static u16 key_to_nfproto(const struct sw_flow_key *key)
{
switch (ntohs(key->eth.type)) {
@@ -141,6 +143,7 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
* previously sent the packet to conntrack via the ct action.
*/
static void ovs_ct_update_key(const struct sk_buff *skb,
+ const struct ovs_conntrack_info *info,
struct sw_flow_key *key, bool post_ct)
{
const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
@@ -158,13 +161,15 @@ static void ovs_ct_update_key(const struct sk_buff *skb,
zone = nf_ct_zone(ct);
} else if (post_ct) {
state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
+ if (info)
+ zone = &info->zone;
}
__ovs_ct_update_key(key, state, zone, ct);
}
void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
{
- ovs_ct_update_key(skb, key, false);
+ ovs_ct_update_key(skb, NULL, key, false);
}
int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
@@ -300,10 +305,10 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
u16 zone, struct sk_buff *skb)
{
struct ovs_skb_cb ovs_cb = *OVS_CB(skb);
+ int err;
if (key->eth.type == htons(ETH_P_IP)) {
enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
- int err;
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
err = ip_defrag(net, skb, user);
@@ -314,28 +319,13 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
} else if (key->eth.type == htons(ETH_P_IPV6)) {
enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
- struct sk_buff *reasm;
memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
- reasm = nf_ct_frag6_gather(net, skb, user);
- if (!reasm)
- return -EINPROGRESS;
-
- if (skb == reasm) {
- kfree_skb(skb);
- return -EINVAL;
- }
-
- /* Don't free 'skb' even though it is one of the original
- * fragments, as we're going to morph it into the head.
- */
- skb_get(skb);
- nf_ct_frag6_consume_orig(reasm);
+ err = nf_ct_frag6_gather(net, skb, user);
+ if (err)
+ return err;
- key->ip.proto = ipv6_hdr(reasm)->nexthdr;
- skb_morph(skb, reasm);
- skb->next = reasm->next;
- consume_skb(reasm);
+ key->ip.proto = ipv6_hdr(skb)->nexthdr;
ovs_cb.mru = IP6CB(skb)->frag_max_size;
#endif
} else {
@@ -418,7 +408,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
}
}
- ovs_ct_update_key(skb, key, true);
+ ovs_ct_update_key(skb, info, key, true);
return 0;
}
@@ -693,6 +683,10 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
OVS_NLERR(log, "Failed to allocate conntrack template");
return -ENOMEM;
}
+
+ __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
+ nf_conntrack_get(&ct_info.ct->ct_general);
+
if (helper) {
err = ovs_ct_add_helper(&ct_info, helper, key, log);
if (err)
@@ -704,11 +698,9 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
if (err)
goto err_free_ct;
- __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
- nf_conntrack_get(&ct_info.ct->ct_general);
return 0;
err_free_ct:
- nf_conntrack_free(ct_info.ct);
+ __ovs_ct_free_action(&ct_info);
return err;
}
@@ -750,6 +742,11 @@ void ovs_ct_free_action(const struct nlattr *a)
{
struct ovs_conntrack_info *ct_info = nla_data(a);
+ __ovs_ct_free_action(ct_info);
+}
+
+static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
+{
if (ct_info->helper)
module_put(ct_info->helper->me);
if (ct_info->ct)
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 91a8b004dc51..deadfdab1bc3 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -336,12 +336,10 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
unsigned short gso_type = skb_shinfo(skb)->gso_type;
struct sw_flow_key later_key;
struct sk_buff *segs, *nskb;
- struct ovs_skb_cb ovs_cb;
int err;
- ovs_cb = *OVS_CB(skb);
+ BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_SGO_CB_OFFSET);
segs = __skb_gso_segment(skb, NETIF_F_SG, false);
- *OVS_CB(skb) = ovs_cb;
if (IS_ERR(segs))
return PTR_ERR(segs);
if (segs == NULL)
@@ -359,7 +357,6 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
/* Queue all of the segments. */
skb = segs;
do {
- *OVS_CB(skb) = ovs_cb;
if (gso_type & SKB_GSO_UDP && skb != segs)
key = &later_key;
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index a7a80a6b77b0..653d073bae45 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -58,7 +58,7 @@ void ovs_dp_notify_wq(struct work_struct *work)
struct hlist_node *n;
hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) {
- if (vport->ops->type != OVS_VPORT_TYPE_NETDEV)
+ if (vport->ops->type == OVS_VPORT_TYPE_INTERNAL)
continue;
if (!(vport->dev->priv_flags & IFF_OVS_DATAPATH))
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 907d6fd28ede..d1bd4a45ca2d 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2434,7 +2434,10 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
if (!start)
return -EMSGSIZE;
- err = ovs_nla_put_tunnel_info(skb, tun_info);
+ err = ip_tun_to_nlattr(skb, &tun_info->key,
+ ip_tunnel_info_opts(tun_info),
+ tun_info->options_len,
+ ip_tunnel_info_af(tun_info));
if (err)
return err;
nla_nest_end(skb, start);
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 69f1de58a3b4..1a1fcec88695 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -34,7 +34,7 @@ static struct vport_ops ovs_geneve_vport_ops;
* @dst_port: destination port.
*/
struct geneve_port {
- u16 port_no;
+ u16 dst_port;
};
static inline struct geneve_port *geneve_vport(const struct vport *vport)
@@ -47,7 +47,7 @@ static int geneve_get_options(const struct vport *vport,
{
struct geneve_port *geneve_port = geneve_vport(vport);
- if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, geneve_port->port_no))
+ if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, geneve_port->dst_port))
return -EMSGSIZE;
return 0;
}
@@ -83,7 +83,7 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms)
return vport;
geneve_port = geneve_vport(vport);
- geneve_port->port_no = dst_port;
+ geneve_port->dst_port = dst_port;
rtnl_lock();
dev = geneve_dev_create_fb(net, parms->name, NET_NAME_USER, dst_port);
@@ -117,7 +117,6 @@ static struct vport_ops ovs_geneve_vport_ops = {
.destroy = ovs_netdev_tunnel_destroy,
.get_options = geneve_get_options,
.send = dev_queue_xmit,
- .owner = THIS_MODULE,
};
static int __init ovs_geneve_tnl_init(void)
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index c3257d78d3d2..7f8897f33a67 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -89,7 +89,6 @@ static struct vport_ops ovs_gre_vport_ops = {
.create = gre_create,
.send = dev_queue_xmit,
.destroy = ovs_netdev_tunnel_destroy,
- .owner = THIS_MODULE,
};
static int __init ovs_gre_tnl_init(void)
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index b327368a3848..6a6adf314363 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -105,7 +105,7 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name)
rtnl_lock();
err = netdev_master_upper_dev_link(vport->dev,
- get_dpdev(vport->dp));
+ get_dpdev(vport->dp), NULL, NULL);
if (err)
goto error_unlock;
@@ -180,9 +180,13 @@ void ovs_netdev_tunnel_destroy(struct vport *vport)
if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
ovs_netdev_detach_dev(vport);
- /* Early release so we can unregister the device */
+ /* We can be invoked by both explicit vport deletion and
+ * underlying netdev deregistration; delete the link only
+ * if it's not already shutting down.
+ */
+ if (vport->dev->reg_state == NETREG_REGISTERED)
+ rtnl_delete_link(vport->dev);
dev_put(vport->dev);
- rtnl_delete_link(vport->dev);
vport->dev = NULL;
rtnl_unlock();
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 1605691d9414..5eb7694348b5 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -90,7 +90,9 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
int err;
struct vxlan_config conf = {
.no_share = true,
- .flags = VXLAN_F_COLLECT_METADATA,
+ .flags = VXLAN_F_COLLECT_METADATA | VXLAN_F_UDP_ZERO_CSUM6_RX,
+ /* Don't restrict the packets that can be sent by MTU */
+ .mtu = IP_MAX_MTU,
};
if (!options) {
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 0ac0fd004d7e..31cbc8c5c7db 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -71,7 +71,7 @@ static struct hlist_head *hash_bucket(const struct net *net, const char *name)
return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
}
-int ovs_vport_ops_register(struct vport_ops *ops)
+int __ovs_vport_ops_register(struct vport_ops *ops)
{
int err = -EEXIST;
struct vport_ops *o;
@@ -87,7 +87,7 @@ errout:
ovs_unlock();
return err;
}
-EXPORT_SYMBOL_GPL(ovs_vport_ops_register);
+EXPORT_SYMBOL_GPL(__ovs_vport_ops_register);
void ovs_vport_ops_unregister(struct vport_ops *ops)
{
@@ -256,8 +256,8 @@ int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
*
* @vport: vport to delete.
*
- * Detaches @vport from its datapath and destroys it. It is possible to fail
- * for reasons such as lack of memory. ovs_mutex must be held.
+ * Detaches @vport from its datapath and destroys it. ovs_mutex must
+ * be held.
*/
void ovs_vport_del(struct vport *vport)
{
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index bdfd82a7c064..c10899cb9040 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -70,7 +70,7 @@ struct vport_portids {
/**
* struct vport - one port within a datapath
- * @rcu: RCU callback head for deferred destruction.
+ * @dev: Pointer to net_device.
* @dp: Datapath to which this port belongs.
* @upcall_portids: RCU protected 'struct vport_portids'.
* @port_no: Index into @dp's @ports array.
@@ -78,6 +78,7 @@ struct vport_portids {
* @dp_hash_node: Element in @datapath->ports hash table in datapath.c.
* @ops: Class structure.
* @detach_list: list used for detaching vport in net-exit call.
+ * @rcu: RCU callback head for deferred destruction.
*/
struct vport {
struct net_device *dev;
@@ -196,28 +197,14 @@ static inline const char *ovs_vport_name(struct vport *vport)
return vport->dev->name;
}
-int ovs_vport_ops_register(struct vport_ops *ops);
-void ovs_vport_ops_unregister(struct vport_ops *ops);
-
-static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,
- const struct ip_tunnel_key *key,
- u32 mark,
- struct flowi4 *fl,
- u8 protocol)
-{
- struct rtable *rt;
-
- memset(fl, 0, sizeof(*fl));
- fl->daddr = key->u.ipv4.dst;
- fl->saddr = key->u.ipv4.src;
- fl->flowi4_tos = RT_TOS(key->tos);
- fl->flowi4_mark = mark;
- fl->flowi4_proto = protocol;
-
- rt = ip_route_output_key(net, fl);
- return rt;
-}
+int __ovs_vport_ops_register(struct vport_ops *ops);
+#define ovs_vport_ops_register(ops) \
+ ({ \
+ (ops)->owner = THIS_MODULE; \
+ __ovs_vport_ops_register(ops); \
+ })
+void ovs_vport_ops_unregister(struct vport_ops *ops);
void ovs_vport_send(struct vport *vport, struct sk_buff *skb);
#endif /* vport.h */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 691660b9b7ef..992396aa635c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1741,6 +1741,20 @@ static void fanout_release(struct sock *sk)
kfree_rcu(po->rollover, rcu);
}
+static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
+ struct sk_buff *skb)
+{
+ /* Earlier code assumed this would be a VLAN pkt, double-check
+ * this now that we have the actual packet in hand. We can only
+ * do this check on Ethernet devices.
+ */
+ if (unlikely(dev->type != ARPHRD_ETHER))
+ return false;
+
+ skb_reset_mac_header(skb);
+ return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q));
+}
+
static const struct proto_ops packet_ops;
static const struct proto_ops packet_ops_spkt;
@@ -1902,18 +1916,10 @@ retry:
goto retry;
}
- if (len > (dev->mtu + dev->hard_header_len + extra_len)) {
- /* Earlier code assumed this would be a VLAN pkt,
- * double-check this now that we have the actual
- * packet in hand.
- */
- struct ethhdr *ehdr;
- skb_reset_mac_header(skb);
- ehdr = eth_hdr(skb);
- if (ehdr->h_proto != htons(ETH_P_8021Q)) {
- err = -EMSGSIZE;
- goto out_unlock;
- }
+ if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
+ !packet_extra_vlan_len_allowed(dev, skb)) {
+ err = -EMSGSIZE;
+ goto out_unlock;
}
skb->protocol = proto;
@@ -2323,8 +2329,8 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
static bool ll_header_truncated(const struct net_device *dev, int len)
{
/* net device doesn't like empty head */
- if (unlikely(len <= dev->hard_header_len)) {
- net_warn_ratelimited("%s: packet size is too short (%d <= %d)\n",
+ if (unlikely(len < dev->hard_header_len)) {
+ net_warn_ratelimited("%s: packet size is too short (%d < %d)\n",
current->comm, len, dev->hard_header_len);
return true;
}
@@ -2332,6 +2338,15 @@ static bool ll_header_truncated(const struct net_device *dev, int len)
return false;
}
+static void tpacket_set_protocol(const struct net_device *dev,
+ struct sk_buff *skb)
+{
+ if (dev->type == ARPHRD_ETHER) {
+ skb_reset_mac_header(skb);
+ skb->protocol = eth_hdr(skb)->h_proto;
+ }
+}
+
static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
void *frame, struct net_device *dev, int size_max,
__be16 proto, unsigned char *addr, int hlen)
@@ -2368,8 +2383,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb_reserve(skb, hlen);
skb_reset_network_header(skb);
- if (!packet_use_direct_xmit(po))
- skb_probe_transport_header(skb, 0);
if (unlikely(po->tp_tx_has_off)) {
int off_min, off_max, off;
off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
@@ -2415,6 +2428,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
dev->hard_header_len);
if (unlikely(err))
return err;
+ if (!skb->protocol)
+ tpacket_set_protocol(dev, skb);
data += dev->hard_header_len;
to_write -= dev->hard_header_len;
@@ -2449,6 +2464,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
len = ((to_write > len_max) ? len_max : to_write);
}
+ skb_probe_transport_header(skb, 0);
+
return tp_len;
}
@@ -2493,12 +2510,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
if (unlikely(!(dev->flags & IFF_UP)))
goto out_put;
- reserve = dev->hard_header_len + VLAN_HLEN;
+ if (po->sk.sk_socket->type == SOCK_RAW)
+ reserve = dev->hard_header_len;
size_max = po->tx_ring.frame_size
- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
- if (size_max > dev->mtu + reserve)
- size_max = dev->mtu + reserve;
+ if (size_max > dev->mtu + reserve + VLAN_HLEN)
+ size_max = dev->mtu + reserve + VLAN_HLEN;
do {
ph = packet_current_frame(po, &po->tx_ring,
@@ -2525,18 +2543,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
addr, hlen);
if (likely(tp_len >= 0) &&
- tp_len > dev->mtu + dev->hard_header_len) {
- struct ethhdr *ehdr;
- /* Earlier code assumed this would be a VLAN pkt,
- * double-check this now that we have the actual
- * packet in hand.
- */
+ tp_len > dev->mtu + reserve &&
+ !packet_extra_vlan_len_allowed(dev, skb))
+ tp_len = -EMSGSIZE;
- skb_reset_mac_header(skb);
- ehdr = eth_hdr(skb);
- if (ehdr->h_proto != htons(ETH_P_8021Q))
- tp_len = -EMSGSIZE;
- }
if (unlikely(tp_len < 0)) {
if (po->tp_loss) {
__packet_set_status(po, ph,
@@ -2765,18 +2775,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
- if (!gso_type && (len > dev->mtu + reserve + extra_len)) {
- /* Earlier code assumed this would be a VLAN pkt,
- * double-check this now that we have the actual
- * packet in hand.
- */
- struct ethhdr *ehdr;
- skb_reset_mac_header(skb);
- ehdr = eth_hdr(skb);
- if (ehdr->h_proto != htons(ETH_P_8021Q)) {
- err = -EMSGSIZE;
- goto out_free;
- }
+ if (!gso_type && (len > dev->mtu + reserve + extra_len) &&
+ !packet_extra_vlan_len_allowed(dev, skb)) {
+ err = -EMSGSIZE;
+ goto out_free;
}
skb->protocol = proto;
@@ -2807,8 +2809,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
len += vnet_hdr_len;
}
- if (!packet_use_direct_xmit(po))
- skb_probe_transport_header(skb, reserve);
+ skb_probe_transport_header(skb, reserve);
+
if (unlikely(extra_len == 4))
skb->no_fcs = 1;
@@ -2911,22 +2913,40 @@ static int packet_release(struct socket *sock)
* Attach a packet hook.
*/
-static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
+static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
+ __be16 proto)
{
struct packet_sock *po = pkt_sk(sk);
struct net_device *dev_curr;
__be16 proto_curr;
bool need_rehook;
+ struct net_device *dev = NULL;
+ int ret = 0;
+ bool unlisted = false;
- if (po->fanout) {
- if (dev)
- dev_put(dev);
-
+ if (po->fanout)
return -EINVAL;
- }
lock_sock(sk);
spin_lock(&po->bind_lock);
+ rcu_read_lock();
+
+ if (name) {
+ dev = dev_get_by_name_rcu(sock_net(sk), name);
+ if (!dev) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+ } else if (ifindex) {
+ dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
+ if (!dev) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+ }
+
+ if (dev)
+ dev_hold(dev);
proto_curr = po->prot_hook.type;
dev_curr = po->prot_hook.dev;
@@ -2934,14 +2954,29 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
need_rehook = proto_curr != proto || dev_curr != dev;
if (need_rehook) {
- unregister_prot_hook(sk, true);
+ if (po->running) {
+ rcu_read_unlock();
+ __unregister_prot_hook(sk, true);
+ rcu_read_lock();
+ dev_curr = po->prot_hook.dev;
+ if (dev)
+ unlisted = !dev_get_by_index_rcu(sock_net(sk),
+ dev->ifindex);
+ }
po->num = proto;
po->prot_hook.type = proto;
- po->prot_hook.dev = dev;
- po->ifindex = dev ? dev->ifindex : 0;
- packet_cached_dev_assign(po, dev);
+ if (unlikely(unlisted)) {
+ dev_put(dev);
+ po->prot_hook.dev = NULL;
+ po->ifindex = -1;
+ packet_cached_dev_reset(po);
+ } else {
+ po->prot_hook.dev = dev;
+ po->ifindex = dev ? dev->ifindex : 0;
+ packet_cached_dev_assign(po, dev);
+ }
}
if (dev_curr)
dev_put(dev_curr);
@@ -2949,7 +2984,7 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
if (proto == 0 || !need_rehook)
goto out_unlock;
- if (!dev || (dev->flags & IFF_UP)) {
+ if (!unlisted && (!dev || (dev->flags & IFF_UP))) {
register_prot_hook(sk);
} else {
sk->sk_err = ENETDOWN;
@@ -2958,9 +2993,10 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
}
out_unlock:
+ rcu_read_unlock();
spin_unlock(&po->bind_lock);
release_sock(sk);
- return 0;
+ return ret;
}
/*
@@ -2972,8 +3008,6 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
{
struct sock *sk = sock->sk;
char name[15];
- struct net_device *dev;
- int err = -ENODEV;
/*
* Check legality
@@ -2983,19 +3017,13 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
return -EINVAL;
strlcpy(name, uaddr->sa_data, sizeof(name));
- dev = dev_get_by_name(sock_net(sk), name);
- if (dev)
- err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
- return err;
+ return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
}
static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
struct sock *sk = sock->sk;
- struct net_device *dev = NULL;
- int err;
-
/*
* Check legality
@@ -3006,16 +3034,8 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len
if (sll->sll_family != AF_PACKET)
return -EINVAL;
- if (sll->sll_ifindex) {
- err = -ENODEV;
- dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
- if (dev == NULL)
- goto out;
- }
- err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
-
-out:
- return err;
+ return packet_do_bind(sk, NULL, sll->sll_ifindex,
+ sll->sll_protocol ? : pkt_sk(sk)->num);
}
static struct proto packet_proto = {
@@ -4089,7 +4109,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
err = -EINVAL;
if (unlikely((int)req->tp_block_size <= 0))
goto out;
- if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
+ if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
goto out;
if (po->tp_version >= TPACKET_V3 &&
(int)(req->tp_block_size -
@@ -4101,8 +4121,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
goto out;
- rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
- if (unlikely(rb->frames_per_block <= 0))
+ rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
+ if (unlikely(rb->frames_per_block == 0))
goto out;
if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
req->tp_frame_nr))
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 10d42f3220ab..f925753668a7 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -377,6 +377,10 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
struct sockaddr_pn sa;
u16 len;
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (!skb)
+ return NET_RX_DROP;
+
/* check we have at least a full Phonet header */
if (!pskb_pull(skb, sizeof(struct phonethdr)))
goto out;
diff --git a/net/rds/connection.c b/net/rds/connection.c
index d4564036a339..e3b118cae81d 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -186,12 +186,6 @@ static struct rds_connection *__rds_conn_create(struct net *net,
}
}
- if (trans == NULL) {
- kmem_cache_free(rds_conn_slab, conn);
- conn = ERR_PTR(-ENODEV);
- goto out;
- }
-
conn->c_trans = trans;
ret = trans->conn_alloc(conn, gfp);
diff --git a/net/rds/ib.c b/net/rds/ib.c
index a833ab7898fe..9481d55ff6cb 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -122,44 +122,34 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
static void rds_ib_add_one(struct ib_device *device)
{
struct rds_ib_device *rds_ibdev;
- struct ib_device_attr *dev_attr;
/* Only handle IB (no iWARP) devices */
if (device->node_type != RDMA_NODE_IB_CA)
return;
- dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
- if (!dev_attr)
- return;
-
- if (ib_query_device(device, dev_attr)) {
- rdsdebug("Query device failed for %s\n", device->name);
- goto free_attr;
- }
-
rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL,
ibdev_to_node(device));
if (!rds_ibdev)
- goto free_attr;
+ return;
spin_lock_init(&rds_ibdev->spinlock);
atomic_set(&rds_ibdev->refcount, 1);
INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free);
- rds_ibdev->max_wrs = dev_attr->max_qp_wr;
- rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE);
+ rds_ibdev->max_wrs = device->attrs.max_qp_wr;
+ rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE);
- rds_ibdev->fmr_max_remaps = dev_attr->max_map_per_fmr?: 32;
- rds_ibdev->max_1m_fmrs = dev_attr->max_mr ?
- min_t(unsigned int, (dev_attr->max_mr / 2),
+ rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32;
+ rds_ibdev->max_1m_fmrs = device->attrs.max_mr ?
+ min_t(unsigned int, (device->attrs.max_mr / 2),
rds_ib_fmr_1m_pool_size) : rds_ib_fmr_1m_pool_size;
- rds_ibdev->max_8k_fmrs = dev_attr->max_mr ?
- min_t(unsigned int, ((dev_attr->max_mr / 2) * RDS_MR_8K_SCALE),
+ rds_ibdev->max_8k_fmrs = device->attrs.max_mr ?
+ min_t(unsigned int, ((device->attrs.max_mr / 2) * RDS_MR_8K_SCALE),
rds_ib_fmr_8k_pool_size) : rds_ib_fmr_8k_pool_size;
- rds_ibdev->max_initiator_depth = dev_attr->max_qp_init_rd_atom;
- rds_ibdev->max_responder_resources = dev_attr->max_qp_rd_atom;
+ rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom;
+ rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom;
rds_ibdev->dev = device;
rds_ibdev->pd = ib_alloc_pd(device);
@@ -183,7 +173,7 @@ static void rds_ib_add_one(struct ib_device *device)
}
rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_fmrs = %d, max_8k_fmrs = %d\n",
- dev_attr->max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
+ device->attrs.max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_fmrs,
rds_ibdev->max_8k_fmrs);
@@ -202,8 +192,6 @@ static void rds_ib_add_one(struct ib_device *device)
put_dev:
rds_ib_dev_put(rds_ibdev);
-free_attr:
- kfree(dev_attr);
}
/*
@@ -336,7 +324,7 @@ static int rds_ib_laddr_check(struct net *net, __be32 addr)
/* Create a CMA ID and try to bind it. This catches both
* IB and iWARP capable NICs.
*/
- cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
+ cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(cm_id))
return PTR_ERR(cm_id);
diff --git a/net/rds/ib.h b/net/rds/ib.h
index f17d09567890..b3fdebb57460 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -75,7 +75,11 @@ struct rds_ib_connect_private {
struct rds_ib_send_work {
void *s_op;
- struct ib_send_wr s_wr;
+ union {
+ struct ib_send_wr s_wr;
+ struct ib_rdma_wr s_rdma_wr;
+ struct ib_atomic_wr s_atomic_wr;
+ };
struct ib_sge s_sge[RDS_IB_MAX_SGE];
unsigned long s_queued;
};
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 2b2370e7f356..da5a7fb98c77 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -668,7 +668,7 @@ int rds_ib_conn_connect(struct rds_connection *conn)
/* XXX I wonder what affect the port space has */
/* delegate cm event handler to rdma_transport */
- ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
+ ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(ic->i_cm_id)) {
ret = PTR_ERR(ic->i_cm_id);
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 96744b75db93..977fb86065b7 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -305,7 +305,7 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn,
gfp_t slab_mask = GFP_NOWAIT;
gfp_t page_mask = GFP_NOWAIT;
- if (gfp & __GFP_WAIT) {
+ if (gfp & __GFP_DIRECT_RECLAIM) {
slab_mask = GFP_KERNEL;
page_mask = GFP_HIGHUSER;
}
@@ -379,7 +379,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
struct ib_recv_wr *failed_wr;
unsigned int posted = 0;
int ret = 0;
- bool can_wait = !!(gfp & __GFP_WAIT);
+ bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM);
u32 pos;
/* the goal here is to just make sure that someone, somewhere
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 670882c752e9..eac30bf486d7 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -777,23 +777,23 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
send->s_queued = jiffies;
if (op->op_type == RDS_ATOMIC_TYPE_CSWP) {
- send->s_wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
- send->s_wr.wr.atomic.compare_add = op->op_m_cswp.compare;
- send->s_wr.wr.atomic.swap = op->op_m_cswp.swap;
- send->s_wr.wr.atomic.compare_add_mask = op->op_m_cswp.compare_mask;
- send->s_wr.wr.atomic.swap_mask = op->op_m_cswp.swap_mask;
+ send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
+ send->s_atomic_wr.compare_add = op->op_m_cswp.compare;
+ send->s_atomic_wr.swap = op->op_m_cswp.swap;
+ send->s_atomic_wr.compare_add_mask = op->op_m_cswp.compare_mask;
+ send->s_atomic_wr.swap_mask = op->op_m_cswp.swap_mask;
} else { /* FADD */
- send->s_wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
- send->s_wr.wr.atomic.compare_add = op->op_m_fadd.add;
- send->s_wr.wr.atomic.swap = 0;
- send->s_wr.wr.atomic.compare_add_mask = op->op_m_fadd.nocarry_mask;
- send->s_wr.wr.atomic.swap_mask = 0;
+ send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
+ send->s_atomic_wr.compare_add = op->op_m_fadd.add;
+ send->s_atomic_wr.swap = 0;
+ send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask;
+ send->s_atomic_wr.swap_mask = 0;
}
nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
- send->s_wr.num_sge = 1;
- send->s_wr.next = NULL;
- send->s_wr.wr.atomic.remote_addr = op->op_remote_addr;
- send->s_wr.wr.atomic.rkey = op->op_rkey;
+ send->s_atomic_wr.wr.num_sge = 1;
+ send->s_atomic_wr.wr.next = NULL;
+ send->s_atomic_wr.remote_addr = op->op_remote_addr;
+ send->s_atomic_wr.rkey = op->op_rkey;
send->s_op = op;
rds_message_addref(container_of(send->s_op, struct rds_message, atomic));
@@ -818,11 +818,11 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
if (nr_sig)
atomic_add(nr_sig, &ic->i_signaled_sends);
- failed_wr = &send->s_wr;
- ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr);
+ failed_wr = &send->s_atomic_wr.wr;
+ ret = ib_post_send(ic->i_cm_id->qp, &send->s_atomic_wr.wr, &failed_wr);
rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic,
- send, &send->s_wr, ret, failed_wr);
- BUG_ON(failed_wr != &send->s_wr);
+ send, &send->s_atomic_wr, ret, failed_wr);
+ BUG_ON(failed_wr != &send->s_atomic_wr.wr);
if (ret) {
printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 "
"returned %d\n", &conn->c_faddr, ret);
@@ -831,9 +831,9 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
goto out;
}
- if (unlikely(failed_wr != &send->s_wr)) {
+ if (unlikely(failed_wr != &send->s_atomic_wr.wr)) {
printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
- BUG_ON(failed_wr != &send->s_wr);
+ BUG_ON(failed_wr != &send->s_atomic_wr.wr);
}
out:
@@ -904,22 +904,23 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify);
send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
- send->s_wr.wr.rdma.remote_addr = remote_addr;
- send->s_wr.wr.rdma.rkey = op->op_rkey;
+ send->s_rdma_wr.remote_addr = remote_addr;
+ send->s_rdma_wr.rkey = op->op_rkey;
if (num_sge > max_sge) {
- send->s_wr.num_sge = max_sge;
+ send->s_rdma_wr.wr.num_sge = max_sge;
num_sge -= max_sge;
} else {
- send->s_wr.num_sge = num_sge;
+ send->s_rdma_wr.wr.num_sge = num_sge;
}
- send->s_wr.next = NULL;
+ send->s_rdma_wr.wr.next = NULL;
if (prev)
- prev->s_wr.next = &send->s_wr;
+ prev->s_rdma_wr.wr.next = &send->s_rdma_wr.wr;
- for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
+ for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
+ scat != &op->op_sg[op->op_count]; j++) {
len = ib_sg_dma_len(ic->i_cm_id->device, scat);
send->s_sge[j].addr =
ib_sg_dma_address(ic->i_cm_id->device, scat);
@@ -934,7 +935,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
}
rdsdebug("send %p wr %p num_sge %u next %p\n", send,
- &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
+ &send->s_rdma_wr.wr,
+ send->s_rdma_wr.wr.num_sge,
+ send->s_rdma_wr.wr.next);
prev = send;
if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
@@ -955,11 +958,11 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
if (nr_sig)
atomic_add(nr_sig, &ic->i_signaled_sends);
- failed_wr = &first->s_wr;
- ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
+ failed_wr = &first->s_rdma_wr.wr;
+ ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr);
rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
- first, &first->s_wr, ret, failed_wr);
- BUG_ON(failed_wr != &first->s_wr);
+ first, &first->s_rdma_wr.wr, ret, failed_wr);
+ BUG_ON(failed_wr != &first->s_rdma_wr.wr);
if (ret) {
printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 "
"returned %d\n", &conn->c_faddr, ret);
@@ -968,9 +971,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
goto out;
}
- if (unlikely(failed_wr != &first->s_wr)) {
+ if (unlikely(failed_wr != &first->s_rdma_wr.wr)) {
printk(KERN_WARNING "RDS/IB: ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
- BUG_ON(failed_wr != &first->s_wr);
+ BUG_ON(failed_wr != &first->s_rdma_wr.wr);
}
diff --git a/net/rds/iw.c b/net/rds/iw.c
index 3df0295c6659..f4a9fff829e0 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -60,30 +60,20 @@ LIST_HEAD(iw_nodev_conns);
static void rds_iw_add_one(struct ib_device *device)
{
struct rds_iw_device *rds_iwdev;
- struct ib_device_attr *dev_attr;
/* Only handle iwarp devices */
if (device->node_type != RDMA_NODE_RNIC)
return;
- dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
- if (!dev_attr)
- return;
-
- if (ib_query_device(device, dev_attr)) {
- rdsdebug("Query device failed for %s\n", device->name);
- goto free_attr;
- }
-
rds_iwdev = kmalloc(sizeof *rds_iwdev, GFP_KERNEL);
if (!rds_iwdev)
- goto free_attr;
+ return;
spin_lock_init(&rds_iwdev->spinlock);
- rds_iwdev->dma_local_lkey = !!(dev_attr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY);
- rds_iwdev->max_wrs = dev_attr->max_qp_wr;
- rds_iwdev->max_sge = min(dev_attr->max_sge, RDS_IW_MAX_SGE);
+ rds_iwdev->dma_local_lkey = !!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY);
+ rds_iwdev->max_wrs = device->attrs.max_qp_wr;
+ rds_iwdev->max_sge = min(device->attrs.max_sge, RDS_IW_MAX_SGE);
rds_iwdev->dev = device;
rds_iwdev->pd = ib_alloc_pd(device);
@@ -111,8 +101,7 @@ static void rds_iw_add_one(struct ib_device *device)
list_add_tail(&rds_iwdev->list, &rds_iw_devices);
ib_set_client_data(device, &rds_iw_client, rds_iwdev);
-
- goto free_attr;
+ return;
err_mr:
if (rds_iwdev->mr)
@@ -121,8 +110,6 @@ err_pd:
ib_dealloc_pd(rds_iwdev->pd);
free_dev:
kfree(rds_iwdev);
-free_attr:
- kfree(dev_attr);
}
static void rds_iw_remove_one(struct ib_device *device, void *client_data)
@@ -223,7 +210,7 @@ static int rds_iw_laddr_check(struct net *net, __be32 addr)
/* Create a CMA ID and try to bind it. This catches both
* IB and iWARP capable NICs.
*/
- cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
+ cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(cm_id))
return PTR_ERR(cm_id);
diff --git a/net/rds/iw.h b/net/rds/iw.h
index cbe6674e31ee..5af01d1758b3 100644
--- a/net/rds/iw.h
+++ b/net/rds/iw.h
@@ -74,10 +74,13 @@ struct rds_iw_send_work {
struct rm_rdma_op *s_op;
struct rds_iw_mapping *s_mapping;
struct ib_mr *s_mr;
- struct ib_fast_reg_page_list *s_page_list;
unsigned char s_remap_count;
- struct ib_send_wr s_wr;
+ union {
+ struct ib_send_wr s_send_wr;
+ struct ib_rdma_wr s_rdma_wr;
+ struct ib_reg_wr s_reg_wr;
+ };
struct ib_sge s_sge[RDS_IW_MAX_SGE];
unsigned long s_queued;
};
@@ -195,7 +198,7 @@ struct rds_iw_device {
/* Magic WR_ID for ACKs */
#define RDS_IW_ACK_WR_ID ((u64)0xffffffffffffffffULL)
-#define RDS_IW_FAST_REG_WR_ID ((u64)0xefefefefefefefefULL)
+#define RDS_IW_REG_WR_ID ((u64)0xefefefefefefefefULL)
#define RDS_IW_LOCAL_INV_WR_ID ((u64)0xdfdfdfdfdfdfdfdfULL)
struct rds_iw_statistics {
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index a6553a6fb2bc..aea4c911bc76 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -524,7 +524,7 @@ int rds_iw_conn_connect(struct rds_connection *conn)
/* XXX I wonder what affect the port space has */
/* delegate cm event handler to rdma_transport */
- ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
+ ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(ic->i_cm_id)) {
ret = PTR_ERR(ic->i_cm_id);
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index d3d4454ffc84..b09a40c1adce 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -47,7 +47,6 @@ struct rds_iw_mr {
struct rdma_cm_id *cm_id;
struct ib_mr *mr;
- struct ib_fast_reg_page_list *page_list;
struct rds_iw_mapping mapping;
unsigned char remap_count;
@@ -77,8 +76,8 @@ struct rds_iw_mr_pool {
static void rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all);
static void rds_iw_mr_pool_flush_worker(struct work_struct *work);
-static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
-static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
+static int rds_iw_init_reg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
+static int rds_iw_map_reg(struct rds_iw_mr_pool *pool,
struct rds_iw_mr *ibmr,
struct scatterlist *sg, unsigned int nents);
static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
@@ -258,19 +257,18 @@ static void rds_iw_set_scatterlist(struct rds_iw_scatterlist *sg,
sg->bytes = 0;
}
-static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
- struct rds_iw_scatterlist *sg)
+static int rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
+ struct rds_iw_scatterlist *sg)
{
struct ib_device *dev = rds_iwdev->dev;
- u64 *dma_pages = NULL;
- int i, j, ret;
+ int i, ret;
WARN_ON(sg->dma_len);
sg->dma_len = ib_dma_map_sg(dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
if (unlikely(!sg->dma_len)) {
printk(KERN_WARNING "RDS/IW: dma_map_sg failed!\n");
- return ERR_PTR(-EBUSY);
+ return -EBUSY;
}
sg->bytes = 0;
@@ -303,31 +301,14 @@ static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
if (sg->dma_npages > fastreg_message_size)
goto out_unmap;
- dma_pages = kmalloc(sizeof(u64) * sg->dma_npages, GFP_ATOMIC);
- if (!dma_pages) {
- ret = -ENOMEM;
- goto out_unmap;
- }
- for (i = j = 0; i < sg->dma_len; ++i) {
- unsigned int dma_len = ib_sg_dma_len(dev, &sg->list[i]);
- u64 dma_addr = ib_sg_dma_address(dev, &sg->list[i]);
- u64 end_addr;
- end_addr = dma_addr + dma_len;
- dma_addr &= ~PAGE_MASK;
- for (; dma_addr < end_addr; dma_addr += PAGE_SIZE)
- dma_pages[j++] = dma_addr;
- BUG_ON(j > sg->dma_npages);
- }
-
- return dma_pages;
+ return 0;
out_unmap:
ib_dma_unmap_sg(rds_iwdev->dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
sg->dma_len = 0;
- kfree(dma_pages);
- return ERR_PTR(ret);
+ return ret;
}
@@ -440,7 +421,7 @@ static struct rds_iw_mr *rds_iw_alloc_mr(struct rds_iw_device *rds_iwdev)
INIT_LIST_HEAD(&ibmr->mapping.m_list);
ibmr->mapping.m_mr = ibmr;
- err = rds_iw_init_fastreg(pool, ibmr);
+ err = rds_iw_init_reg(pool, ibmr);
if (err)
goto out_no_cigar;
@@ -620,7 +601,7 @@ void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents,
ibmr->cm_id = cm_id;
ibmr->device = rds_iwdev;
- ret = rds_iw_map_fastreg(rds_iwdev->mr_pool, ibmr, sg, nents);
+ ret = rds_iw_map_reg(rds_iwdev->mr_pool, ibmr, sg, nents);
if (ret == 0)
*key_ret = ibmr->mr->rkey;
else
@@ -636,7 +617,7 @@ out:
}
/*
- * iWARP fastreg handling
+ * iWARP reg handling
*
* The life cycle of a fastreg registration is a bit different from
* FMRs.
@@ -648,7 +629,7 @@ out:
* This creates a bit of a problem for us, as we do not have the destination
* IP in GET_MR, so the connection must be setup prior to the GET_MR call for
* RDMA to be correctly setup. If a fastreg request is present, rds_iw_xmit
- * will try to queue a LOCAL_INV (if needed) and a FAST_REG_MR work request
+ * will try to queue a LOCAL_INV (if needed) and a REG_MR work request
* before queuing the SEND. When completions for these arrive, they are
* dispatched to the MR has a bit set showing that RDMa can be performed.
*
@@ -657,11 +638,10 @@ out:
* The expectation there is that this invalidation step includes ALL
* PREVIOUSLY FREED MRs.
*/
-static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool,
- struct rds_iw_mr *ibmr)
+static int rds_iw_init_reg(struct rds_iw_mr_pool *pool,
+ struct rds_iw_mr *ibmr)
{
struct rds_iw_device *rds_iwdev = pool->device;
- struct ib_fast_reg_page_list *page_list = NULL;
struct ib_mr *mr;
int err;
@@ -674,55 +654,44 @@ static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool,
return err;
}
- /* FIXME - this is overkill, but mapping->m_sg.dma_len/mapping->m_sg.dma_npages
- * is not filled in.
- */
- page_list = ib_alloc_fast_reg_page_list(rds_iwdev->dev, pool->max_message_size);
- if (IS_ERR(page_list)) {
- err = PTR_ERR(page_list);
-
- printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed (err=%d)\n", err);
- ib_dereg_mr(mr);
- return err;
- }
-
- ibmr->page_list = page_list;
ibmr->mr = mr;
return 0;
}
-static int rds_iw_rdma_build_fastreg(struct rds_iw_mapping *mapping)
+static int rds_iw_rdma_reg_mr(struct rds_iw_mapping *mapping)
{
struct rds_iw_mr *ibmr = mapping->m_mr;
- struct ib_send_wr f_wr, *failed_wr;
- int ret;
+ struct rds_iw_scatterlist *m_sg = &mapping->m_sg;
+ struct ib_reg_wr reg_wr;
+ struct ib_send_wr *failed_wr;
+ int ret, n;
+
+ n = ib_map_mr_sg_zbva(ibmr->mr, m_sg->list, m_sg->len, PAGE_SIZE);
+ if (unlikely(n != m_sg->len))
+ return n < 0 ? n : -EINVAL;
+
+ reg_wr.wr.next = NULL;
+ reg_wr.wr.opcode = IB_WR_REG_MR;
+ reg_wr.wr.wr_id = RDS_IW_REG_WR_ID;
+ reg_wr.wr.num_sge = 0;
+ reg_wr.mr = ibmr->mr;
+ reg_wr.key = mapping->m_rkey;
+ reg_wr.access = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_WRITE;
/*
- * Perform a WR for the fast_reg_mr. Each individual page
+ * Perform a WR for the reg_mr. Each individual page
* in the sg list is added to the fast reg page list and placed
- * inside the fast_reg_mr WR. The key used is a rolling 8bit
+ * inside the reg_mr WR. The key used is a rolling 8bit
* counter, which should guarantee uniqueness.
*/
ib_update_fast_reg_key(ibmr->mr, ibmr->remap_count++);
mapping->m_rkey = ibmr->mr->rkey;
- memset(&f_wr, 0, sizeof(f_wr));
- f_wr.wr_id = RDS_IW_FAST_REG_WR_ID;
- f_wr.opcode = IB_WR_FAST_REG_MR;
- f_wr.wr.fast_reg.length = mapping->m_sg.bytes;
- f_wr.wr.fast_reg.rkey = mapping->m_rkey;
- f_wr.wr.fast_reg.page_list = ibmr->page_list;
- f_wr.wr.fast_reg.page_list_len = mapping->m_sg.dma_len;
- f_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
- f_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_READ |
- IB_ACCESS_REMOTE_WRITE;
- f_wr.wr.fast_reg.iova_start = 0;
- f_wr.send_flags = IB_SEND_SIGNALED;
-
- failed_wr = &f_wr;
- ret = ib_post_send(ibmr->cm_id->qp, &f_wr, &failed_wr);
- BUG_ON(failed_wr != &f_wr);
+ failed_wr = &reg_wr.wr;
+ ret = ib_post_send(ibmr->cm_id->qp, &reg_wr.wr, &failed_wr);
+ BUG_ON(failed_wr != &reg_wr.wr);
if (ret)
printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n",
__func__, __LINE__, ret);
@@ -754,21 +723,20 @@ out:
return ret;
}
-static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
- struct rds_iw_mr *ibmr,
- struct scatterlist *sg,
- unsigned int sg_len)
+static int rds_iw_map_reg(struct rds_iw_mr_pool *pool,
+ struct rds_iw_mr *ibmr,
+ struct scatterlist *sg,
+ unsigned int sg_len)
{
struct rds_iw_device *rds_iwdev = pool->device;
struct rds_iw_mapping *mapping = &ibmr->mapping;
u64 *dma_pages;
- int i, ret = 0;
+ int ret = 0;
rds_iw_set_scatterlist(&mapping->m_sg, sg, sg_len);
- dma_pages = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg);
- if (IS_ERR(dma_pages)) {
- ret = PTR_ERR(dma_pages);
+ ret = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg);
+ if (ret) {
dma_pages = NULL;
goto out;
}
@@ -778,10 +746,7 @@ static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
goto out;
}
- for (i = 0; i < mapping->m_sg.dma_npages; ++i)
- ibmr->page_list->page_list[i] = dma_pages[i];
-
- ret = rds_iw_rdma_build_fastreg(mapping);
+ ret = rds_iw_rdma_reg_mr(mapping);
if (ret)
goto out;
@@ -867,8 +832,6 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool,
struct rds_iw_mr *ibmr)
{
- if (ibmr->page_list)
- ib_free_fast_reg_page_list(ibmr->page_list);
if (ibmr->mr)
ib_dereg_mr(ibmr->mr);
}
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 86152ec3b887..e20bd503f4bd 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -137,13 +137,13 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic)
send->s_op = NULL;
send->s_mapping = NULL;
- send->s_wr.next = NULL;
- send->s_wr.wr_id = i;
- send->s_wr.sg_list = send->s_sge;
- send->s_wr.num_sge = 1;
- send->s_wr.opcode = IB_WR_SEND;
- send->s_wr.send_flags = 0;
- send->s_wr.ex.imm_data = 0;
+ send->s_send_wr.next = NULL;
+ send->s_send_wr.wr_id = i;
+ send->s_send_wr.sg_list = send->s_sge;
+ send->s_send_wr.num_sge = 1;
+ send->s_send_wr.opcode = IB_WR_SEND;
+ send->s_send_wr.send_flags = 0;
+ send->s_send_wr.ex.imm_data = 0;
sge = rds_iw_data_sge(ic, send->s_sge);
sge->lkey = 0;
@@ -159,13 +159,6 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic)
printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed\n");
break;
}
-
- send->s_page_list = ib_alloc_fast_reg_page_list(
- ic->i_cm_id->device, fastreg_message_size);
- if (IS_ERR(send->s_page_list)) {
- printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed\n");
- break;
- }
}
}
@@ -177,9 +170,7 @@ void rds_iw_send_clear_ring(struct rds_iw_connection *ic)
for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
BUG_ON(!send->s_mr);
ib_dereg_mr(send->s_mr);
- BUG_ON(!send->s_page_list);
- ib_free_fast_reg_page_list(send->s_page_list);
- if (send->s_wr.opcode == 0xdead)
+ if (send->s_send_wr.opcode == 0xdead)
continue;
if (send->s_rm)
rds_iw_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
@@ -227,7 +218,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
continue;
}
- if (wc.opcode == IB_WC_FAST_REG_MR && wc.wr_id == RDS_IW_FAST_REG_WR_ID) {
+ if (wc.opcode == IB_WC_REG_MR && wc.wr_id == RDS_IW_REG_WR_ID) {
ic->i_fastreg_posted = 1;
continue;
}
@@ -247,12 +238,12 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
send = &ic->i_sends[oldest];
/* In the error case, wc.opcode sometimes contains garbage */
- switch (send->s_wr.opcode) {
+ switch (send->s_send_wr.opcode) {
case IB_WR_SEND:
if (send->s_rm)
rds_iw_send_unmap_rm(ic, send, wc.status);
break;
- case IB_WR_FAST_REG_MR:
+ case IB_WR_REG_MR:
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_READ:
case IB_WR_RDMA_READ_WITH_INV:
@@ -262,12 +253,12 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
default:
printk_ratelimited(KERN_NOTICE
"RDS/IW: %s: unexpected opcode 0x%x in WR!\n",
- __func__, send->s_wr.opcode);
+ __func__, send->s_send_wr.opcode);
break;
}
- send->s_wr.opcode = 0xdead;
- send->s_wr.num_sge = 1;
+ send->s_send_wr.opcode = 0xdead;
+ send->s_send_wr.num_sge = 1;
if (time_after(jiffies, send->s_queued + HZ/2))
rds_iw_stats_inc(s_iw_tx_stalled);
@@ -455,10 +446,10 @@ rds_iw_xmit_populate_wr(struct rds_iw_connection *ic,
WARN_ON(pos != send - ic->i_sends);
- send->s_wr.send_flags = send_flags;
- send->s_wr.opcode = IB_WR_SEND;
- send->s_wr.num_sge = 2;
- send->s_wr.next = NULL;
+ send->s_send_wr.send_flags = send_flags;
+ send->s_send_wr.opcode = IB_WR_SEND;
+ send->s_send_wr.num_sge = 2;
+ send->s_send_wr.next = NULL;
send->s_queued = jiffies;
send->s_op = NULL;
@@ -472,7 +463,7 @@ rds_iw_xmit_populate_wr(struct rds_iw_connection *ic,
} else {
/* We're sending a packet with no payload. There is only
* one SGE */
- send->s_wr.num_sge = 1;
+ send->s_send_wr.num_sge = 1;
sge = &send->s_sge[0];
}
@@ -672,23 +663,23 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
*/
if (ic->i_unsignaled_wrs-- == 0) {
ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
- send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
}
ic->i_unsignaled_bytes -= len;
if (ic->i_unsignaled_bytes <= 0) {
ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
- send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
}
/*
* Always signal the last one if we're stopping due to flow control.
*/
if (flow_controlled && i == (work_alloc-1))
- send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
rdsdebug("send %p wr %p num_sge %u next %p\n", send,
- &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
+ &send->s_send_wr, send->s_send_wr.num_sge, send->s_send_wr.next);
sent += len;
rm->data.op_dmaoff += len;
@@ -722,7 +713,7 @@ add_header:
}
if (prev)
- prev->s_wr.next = &send->s_wr;
+ prev->s_send_wr.next = &send->s_send_wr;
prev = send;
pos = (pos + 1) % ic->i_send_ring.w_nr;
@@ -736,7 +727,7 @@ add_header:
/* if we finished the message then send completion owns it */
if (scat == &rm->data.op_sg[rm->data.op_count]) {
prev->s_rm = ic->i_rm;
- prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ prev->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
ic->i_rm = NULL;
}
@@ -748,11 +739,11 @@ add_header:
rds_iw_send_add_credits(conn, credit_alloc - i);
/* XXX need to worry about failed_wr and partial sends. */
- failed_wr = &first->s_wr;
- ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
+ failed_wr = &first->s_send_wr;
+ ret = ib_post_send(ic->i_cm_id->qp, &first->s_send_wr, &failed_wr);
rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
- first, &first->s_wr, ret, failed_wr);
- BUG_ON(failed_wr != &first->s_wr);
+ first, &first->s_send_wr, ret, failed_wr);
+ BUG_ON(failed_wr != &first->s_send_wr);
if (ret) {
printk(KERN_WARNING "RDS/IW: ib_post_send to %pI4 "
"returned %d\n", &conn->c_faddr, ret);
@@ -770,24 +761,26 @@ out:
return ret;
}
-static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rds_iw_connection *ic, struct rds_iw_send_work *send, int nent, int len, u64 sg_addr)
+static int rds_iw_build_send_reg(struct rds_iw_send_work *send,
+ struct scatterlist *sg,
+ int sg_nents)
{
- BUG_ON(nent > send->s_page_list->max_page_list_len);
- /*
- * Perform a WR for the fast_reg_mr. Each individual page
- * in the sg list is added to the fast reg page list and placed
- * inside the fast_reg_mr WR.
- */
- send->s_wr.opcode = IB_WR_FAST_REG_MR;
- send->s_wr.wr.fast_reg.length = len;
- send->s_wr.wr.fast_reg.rkey = send->s_mr->rkey;
- send->s_wr.wr.fast_reg.page_list = send->s_page_list;
- send->s_wr.wr.fast_reg.page_list_len = nent;
- send->s_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
- send->s_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE;
- send->s_wr.wr.fast_reg.iova_start = sg_addr;
+ int n;
+
+ n = ib_map_mr_sg(send->s_mr, sg, sg_nents, PAGE_SIZE);
+ if (unlikely(n != sg_nents))
+ return n < 0 ? n : -EINVAL;
+
+ send->s_reg_wr.wr.opcode = IB_WR_REG_MR;
+ send->s_reg_wr.wr.wr_id = 0;
+ send->s_reg_wr.wr.num_sge = 0;
+ send->s_reg_wr.mr = send->s_mr;
+ send->s_reg_wr.key = send->s_mr->rkey;
+ send->s_reg_wr.access = IB_ACCESS_REMOTE_WRITE;
ib_update_fast_reg_key(send->s_mr, send->s_remap_count++);
+
+ return 0;
}
int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
@@ -808,6 +801,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
int sent;
int ret;
int num_sge;
+ int sg_nents;
rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
@@ -861,9 +855,10 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
scat = &op->op_sg[0];
sent = 0;
num_sge = op->op_count;
+ sg_nents = 0;
for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
- send->s_wr.send_flags = 0;
+ send->s_rdma_wr.wr.send_flags = 0;
send->s_queued = jiffies;
/*
@@ -872,7 +867,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
*/
if (ic->i_unsignaled_wrs-- == 0) {
ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
- send->s_wr.send_flags = IB_SEND_SIGNALED;
+ send->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED;
}
/* To avoid the need to have the plumbing to invalidate the fastreg_mr used
@@ -880,30 +875,31 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
* IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed.
*/
if (op->op_write)
- send->s_wr.opcode = IB_WR_RDMA_WRITE;
+ send->s_rdma_wr.wr.opcode = IB_WR_RDMA_WRITE;
else
- send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
+ send->s_rdma_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
- send->s_wr.wr.rdma.remote_addr = remote_addr;
- send->s_wr.wr.rdma.rkey = op->op_rkey;
+ send->s_rdma_wr.remote_addr = remote_addr;
+ send->s_rdma_wr.rkey = op->op_rkey;
send->s_op = op;
if (num_sge > rds_iwdev->max_sge) {
- send->s_wr.num_sge = rds_iwdev->max_sge;
+ send->s_rdma_wr.wr.num_sge = rds_iwdev->max_sge;
num_sge -= rds_iwdev->max_sge;
} else
- send->s_wr.num_sge = num_sge;
+ send->s_rdma_wr.wr.num_sge = num_sge;
- send->s_wr.next = NULL;
+ send->s_rdma_wr.wr.next = NULL;
if (prev)
- prev->s_wr.next = &send->s_wr;
+ prev->s_send_wr.next = &send->s_rdma_wr.wr;
- for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
+ for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
+ scat != &op->op_sg[op->op_count]; j++) {
len = ib_sg_dma_len(ic->i_cm_id->device, scat);
- if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV)
- send->s_page_list->page_list[j] = ib_sg_dma_address(ic->i_cm_id->device, scat);
+ if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV)
+ sg_nents++;
else {
send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat);
send->s_sge[j].length = len;
@@ -917,15 +913,17 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
scat++;
}
- if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) {
- send->s_wr.num_sge = 1;
+ if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV) {
+ send->s_rdma_wr.wr.num_sge = 1;
send->s_sge[0].addr = conn->c_xmit_rm->m_rs->rs_user_addr;
send->s_sge[0].length = conn->c_xmit_rm->m_rs->rs_user_bytes;
send->s_sge[0].lkey = ic->i_sends[fr_pos].s_mr->lkey;
}
rdsdebug("send %p wr %p num_sge %u next %p\n", send,
- &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
+ &send->s_rdma_wr,
+ send->s_rdma_wr.wr.num_sge,
+ send->s_rdma_wr.wr.next);
prev = send;
if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
@@ -934,7 +932,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
/* if we finished the message then send completion owns it */
if (scat == &op->op_sg[op->op_count])
- first->s_wr.send_flags = IB_SEND_SIGNALED;
+ first->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED;
if (i < work_alloc) {
rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i);
@@ -948,16 +946,20 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
* fastreg_mr (or possibly a dma_mr)
*/
if (!op->op_write) {
- rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos],
- op->op_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr);
+ ret = rds_iw_build_send_reg(&ic->i_sends[fr_pos],
+ &op->op_sg[0], sg_nents);
+ if (ret) {
+ printk(KERN_WARNING "RDS/IW: failed to reg send mem\n");
+ goto out;
+ }
work_alloc++;
}
- failed_wr = &first->s_wr;
- ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
+ failed_wr = &first->s_rdma_wr.wr;
+ ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr);
rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
- first, &first->s_wr, ret, failed_wr);
- BUG_ON(failed_wr != &first->s_wr);
+ first, &first->s_rdma_wr, ret, failed_wr);
+ BUG_ON(failed_wr != &first->s_rdma_wr.wr);
if (ret) {
printk(KERN_WARNING "RDS/IW: rdma ib_post_send to %pI4 "
"returned %d\n", &conn->c_faddr, ret);
diff --git a/net/rds/page.c b/net/rds/page.c
index 9005a2c920ee..616f21f4e7d7 100644
--- a/net/rds/page.c
+++ b/net/rds/page.c
@@ -42,8 +42,8 @@ struct rds_page_remainder {
unsigned long r_offset;
};
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder,
- rds_page_remainders);
+static
+DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders);
/*
* returns 0 on success or -errno on failure.
@@ -179,37 +179,18 @@ out:
}
EXPORT_SYMBOL_GPL(rds_page_remainder_alloc);
-static int rds_page_remainder_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+void rds_page_exit(void)
{
- struct rds_page_remainder *rem;
- long cpu = (long)hcpu;
+ unsigned int cpu;
- rem = &per_cpu(rds_page_remainders, cpu);
+ for_each_possible_cpu(cpu) {
+ struct rds_page_remainder *rem;
- rdsdebug("cpu %ld action 0x%lx\n", cpu, action);
+ rem = &per_cpu(rds_page_remainders, cpu);
+ rdsdebug("cpu %u\n", cpu);
- switch (action) {
- case CPU_DEAD:
if (rem->r_page)
__free_page(rem->r_page);
rem->r_page = NULL;
- break;
}
-
- return 0;
-}
-
-static struct notifier_block rds_page_remainder_nb = {
- .notifier_call = rds_page_remainder_cpu_notify,
-};
-
-void rds_page_exit(void)
-{
- int i;
-
- for_each_possible_cpu(i)
- rds_page_remainder_cpu_notify(&rds_page_remainder_nb,
- (unsigned long)CPU_DEAD,
- (void *)(long)i);
}
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index b9b40af5345b..9c1fed81bf0f 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -142,8 +142,8 @@ static int rds_rdma_listen_init(void)
struct rdma_cm_id *cm_id;
int ret;
- cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP,
- IB_QPT_RC);
+ cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, NULL,
+ RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(cm_id)) {
ret = PTR_ERR(cm_id);
printk(KERN_ERR "RDS/RDMA: failed to setup listener, "
diff --git a/net/rds/send.c b/net/rds/send.c
index 827155c2ead1..c9cdb358ea88 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1013,11 +1013,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
release_sock(sk);
}
- /* racing with another thread binding seems ok here */
+ lock_sock(sk);
if (daddr == 0 || rs->rs_bound_addr == 0) {
+ release_sock(sk);
ret = -ENOTCONN; /* XXX not a great errno */
goto out;
}
+ release_sock(sk);
if (payload_len > rds_sk_sndbuf(rs)) {
ret = -EMSGSIZE;
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index b41e9ea2ffff..cf5b69ab1829 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -49,7 +49,6 @@
struct rfkill {
spinlock_t lock;
- const char *name;
enum rfkill_type type;
unsigned long state;
@@ -73,6 +72,7 @@ struct rfkill {
struct delayed_work poll_work;
struct work_struct uevent_work;
struct work_struct sync_work;
+ char name[];
};
#define to_rfkill(d) container_of(d, struct rfkill, dev)
@@ -876,14 +876,14 @@ struct rfkill * __must_check rfkill_alloc(const char *name,
if (WARN_ON(type == RFKILL_TYPE_ALL || type >= NUM_RFKILL_TYPES))
return NULL;
- rfkill = kzalloc(sizeof(*rfkill), GFP_KERNEL);
+ rfkill = kzalloc(sizeof(*rfkill) + strlen(name) + 1, GFP_KERNEL);
if (!rfkill)
return NULL;
spin_lock_init(&rfkill->lock);
INIT_LIST_HEAD(&rfkill->node);
rfkill->type = type;
- rfkill->name = name;
+ strcpy(rfkill->name, name);
rfkill->ops = ops;
rfkill->data = ops_data;
@@ -1095,17 +1095,6 @@ static unsigned int rfkill_fop_poll(struct file *file, poll_table *wait)
return res;
}
-static bool rfkill_readable(struct rfkill_data *data)
-{
- bool r;
-
- mutex_lock(&data->mtx);
- r = !list_empty(&data->events);
- mutex_unlock(&data->mtx);
-
- return r;
-}
-
static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
size_t count, loff_t *pos)
{
@@ -1122,8 +1111,11 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
goto out;
}
mutex_unlock(&data->mtx);
+ /* since we re-check and it just compares pointers,
+ * using !list_empty() without locking isn't a problem
+ */
ret = wait_event_interruptible(data->read_wait,
- rfkill_readable(data));
+ !list_empty(&data->events));
mutex_lock(&data->mtx);
if (ret)
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 93127220cb54..4b1e3f35f06c 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -163,10 +163,6 @@ static int rfkill_gpio_remove(struct platform_device *pdev)
#ifdef CONFIG_ACPI
static const struct acpi_device_id rfkill_acpi_match[] = {
- { "BCM2E1A", RFKILL_TYPE_BLUETOOTH },
- { "BCM2E3D", RFKILL_TYPE_BLUETOOTH },
- { "BCM2E40", RFKILL_TYPE_BLUETOOTH },
- { "BCM2E64", RFKILL_TYPE_BLUETOOTH },
{ "BCM4752", RFKILL_TYPE_GPS },
{ "LNV4752", RFKILL_TYPE_GPS },
{ },
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 1f8a144a5dc2..7e2d1057d8bc 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -67,7 +67,7 @@ static void rxrpc_write_space(struct sock *sk)
if (rxrpc_writable(sk)) {
struct socket_wq *wq = rcu_dereference(sk->sk_wq);
- if (wq_has_sleeper(wq))
+ if (skwq_has_sleeper(wq))
wake_up_interruptible(&wq->wait);
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index e0547f521f20..adc555e0323d 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -723,8 +723,10 @@ process_further:
if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY ||
call->state == RXRPC_CALL_SERVER_AWAIT_ACK) &&
- hard > tx)
+ hard > tx) {
+ call->acks_hard = tx;
goto all_acked;
+ }
smp_rmb();
rxrpc_rotate_tx_window(call, hard - 1);
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index 692b3e67fb54..6c71ed1caf16 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -500,7 +500,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
if (bundle->num_conns >= 20) {
_debug("too many conns");
- if (!(gfp & __GFP_WAIT)) {
+ if (!gfpflags_allow_blocking(gfp)) {
_leave(" = -EAGAIN");
return -EAGAIN;
}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 2934a73a5981..71598f5b11b7 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -252,7 +252,7 @@ struct rxrpc_connection {
struct rxrpc_security *security; /* applied security module */
struct key *key; /* security for this connection (client) */
struct key *server_key; /* security for this service */
- struct crypto_blkcipher *cipher; /* encryption handle */
+ struct crypto_skcipher *cipher; /* encryption handle */
struct rxrpc_crypt csum_iv; /* packet checksum base */
unsigned long events;
#define RXRPC_CONN_CHALLENGE 0 /* send challenge packet */
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index da3cc09f683e..3fb492eedeb9 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -12,11 +12,11 @@
* "afs@CAMBRIDGE.REDHAT.COM>
*/
+#include <crypto/skcipher.h>
#include <linux/module.h>
#include <linux/net.h>
#include <linux/skbuff.h>
#include <linux/key-type.h>
-#include <linux/crypto.h>
#include <linux/ctype.h>
#include <linux/slab.h>
#include <net/sock.h>
@@ -824,7 +824,7 @@ static void rxrpc_free_preparse(struct key_preparsed_payload *prep)
*/
static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
{
- struct crypto_blkcipher *ci;
+ struct crypto_skcipher *ci;
_enter("%zu", prep->datalen);
@@ -833,13 +833,13 @@ static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
memcpy(&prep->payload.data[2], prep->data, 8);
- ci = crypto_alloc_blkcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC);
+ ci = crypto_alloc_skcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(ci)) {
_leave(" = %ld", PTR_ERR(ci));
return PTR_ERR(ci);
}
- if (crypto_blkcipher_setkey(ci, prep->data, 8) < 0)
+ if (crypto_skcipher_setkey(ci, prep->data, 8) < 0)
BUG();
prep->payload.data[0] = ci;
@@ -853,7 +853,7 @@ static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep)
{
if (prep->payload.data[0])
- crypto_free_blkcipher(prep->payload.data[0]);
+ crypto_free_skcipher(prep->payload.data[0]);
}
/*
@@ -870,7 +870,7 @@ static void rxrpc_destroy(struct key *key)
static void rxrpc_destroy_s(struct key *key)
{
if (key->payload.data[0]) {
- crypto_free_blkcipher(key->payload.data[0]);
+ crypto_free_skcipher(key->payload.data[0]);
key->payload.data[0] = NULL;
}
}
@@ -896,15 +896,9 @@ int rxrpc_request_key(struct rxrpc_sock *rx, char __user *optval, int optlen)
if (optlen <= 0 || optlen > PAGE_SIZE - 1)
return -EINVAL;
- description = kmalloc(optlen + 1, GFP_KERNEL);
- if (!description)
- return -ENOMEM;
-
- if (copy_from_user(description, optval, optlen)) {
- kfree(description);
- return -EFAULT;
- }
- description[optlen] = 0;
+ description = memdup_user_nul(optval, optlen);
+ if (IS_ERR(description))
+ return PTR_ERR(description);
key = request_key(&key_type_rxrpc, description, NULL);
if (IS_ERR(key)) {
@@ -933,15 +927,9 @@ int rxrpc_server_keyring(struct rxrpc_sock *rx, char __user *optval,
if (optlen <= 0 || optlen > PAGE_SIZE - 1)
return -EINVAL;
- description = kmalloc(optlen + 1, GFP_KERNEL);
- if (!description)
- return -ENOMEM;
-
- if (copy_from_user(description, optval, optlen)) {
- kfree(description);
- return -EFAULT;
- }
- description[optlen] = 0;
+ description = memdup_user_nul(optval, optlen);
+ if (IS_ERR(description))
+ return PTR_ERR(description);
key = request_key(&key_type_keyring, description, NULL);
if (IS_ERR(key)) {
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index a40d3afe93b7..14c4e12c47b0 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -531,7 +531,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
/* this should be in poll */
- clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
return -EPIPE;
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index d7a9ab5a9d9c..0d96b48a6492 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -9,11 +9,11 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <crypto/skcipher.h>
#include <linux/module.h>
#include <linux/net.h>
#include <linux/skbuff.h>
#include <linux/udp.h>
-#include <linux/crypto.h>
#include <linux/scatterlist.h>
#include <linux/ctype.h>
#include <linux/slab.h>
@@ -53,7 +53,7 @@ MODULE_LICENSE("GPL");
* alloc routine, but since we have it to hand, we use it to decrypt RESPONSE
* packets
*/
-static struct crypto_blkcipher *rxkad_ci;
+static struct crypto_skcipher *rxkad_ci;
static DEFINE_MUTEX(rxkad_ci_mutex);
/*
@@ -61,7 +61,7 @@ static DEFINE_MUTEX(rxkad_ci_mutex);
*/
static int rxkad_init_connection_security(struct rxrpc_connection *conn)
{
- struct crypto_blkcipher *ci;
+ struct crypto_skcipher *ci;
struct rxrpc_key_token *token;
int ret;
@@ -70,15 +70,15 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn)
token = conn->key->payload.data[0];
conn->security_ix = token->security_index;
- ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
+ ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(ci)) {
_debug("no cipher");
ret = PTR_ERR(ci);
goto error;
}
- if (crypto_blkcipher_setkey(ci, token->kad->session_key,
- sizeof(token->kad->session_key)) < 0)
+ if (crypto_skcipher_setkey(ci, token->kad->session_key,
+ sizeof(token->kad->session_key)) < 0)
BUG();
switch (conn->security_level) {
@@ -113,7 +113,7 @@ error:
static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
{
struct rxrpc_key_token *token;
- struct blkcipher_desc desc;
+ SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
struct scatterlist sg[2];
struct rxrpc_crypt iv;
struct {
@@ -128,10 +128,6 @@ static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
token = conn->key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
- desc.tfm = conn->cipher;
- desc.info = iv.x;
- desc.flags = 0;
-
tmpbuf.x[0] = conn->epoch;
tmpbuf.x[1] = conn->cid;
tmpbuf.x[2] = 0;
@@ -139,7 +135,13 @@ static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
- crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
+
+ skcipher_request_set_tfm(req, conn->cipher);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
+
+ crypto_skcipher_encrypt(req);
+ skcipher_request_zero(req);
memcpy(&conn->csum_iv, &tmpbuf.x[2], sizeof(conn->csum_iv));
ASSERTCMP(conn->csum_iv.n[0], ==, tmpbuf.x[2]);
@@ -156,7 +158,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
void *sechdr)
{
struct rxrpc_skb_priv *sp;
- struct blkcipher_desc desc;
+ SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxrpc_crypt iv;
struct scatterlist sg[2];
struct {
@@ -177,13 +179,16 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
/* start the encryption afresh */
memset(&iv, 0, sizeof(iv));
- desc.tfm = call->conn->cipher;
- desc.info = iv.x;
- desc.flags = 0;
sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
- crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
+
+ skcipher_request_set_tfm(req, call->conn->cipher);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
+
+ crypto_skcipher_encrypt(req);
+ skcipher_request_zero(req);
memcpy(sechdr, &tmpbuf, sizeof(tmpbuf));
@@ -203,13 +208,14 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
struct rxkad_level2_hdr rxkhdr
__attribute__((aligned(8))); /* must be all on one page */
struct rxrpc_skb_priv *sp;
- struct blkcipher_desc desc;
+ SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxrpc_crypt iv;
struct scatterlist sg[16];
struct sk_buff *trailer;
unsigned int len;
u16 check;
int nsg;
+ int err;
sp = rxrpc_skb(skb);
@@ -223,28 +229,38 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
/* encrypt from the session key */
token = call->conn->key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
- desc.tfm = call->conn->cipher;
- desc.info = iv.x;
- desc.flags = 0;
sg_init_one(&sg[0], sechdr, sizeof(rxkhdr));
sg_init_one(&sg[1], &rxkhdr, sizeof(rxkhdr));
- crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(rxkhdr));
+
+ skcipher_request_set_tfm(req, call->conn->cipher);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(rxkhdr), iv.x);
+
+ crypto_skcipher_encrypt(req);
/* we want to encrypt the skbuff in-place */
nsg = skb_cow_data(skb, 0, &trailer);
+ err = -ENOMEM;
if (nsg < 0 || nsg > 16)
- return -ENOMEM;
+ goto out;
len = data_size + call->conn->size_align - 1;
len &= ~(call->conn->size_align - 1);
sg_init_table(sg, nsg);
skb_to_sgvec(skb, sg, 0, len);
- crypto_blkcipher_encrypt_iv(&desc, sg, sg, len);
+
+ skcipher_request_set_crypt(req, sg, sg, len, iv.x);
+
+ crypto_skcipher_encrypt(req);
_leave(" = 0");
- return 0;
+ err = 0;
+
+out:
+ skcipher_request_zero(req);
+ return err;
}
/*
@@ -256,7 +272,7 @@ static int rxkad_secure_packet(const struct rxrpc_call *call,
void *sechdr)
{
struct rxrpc_skb_priv *sp;
- struct blkcipher_desc desc;
+ SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxrpc_crypt iv;
struct scatterlist sg[2];
struct {
@@ -281,9 +297,6 @@ static int rxkad_secure_packet(const struct rxrpc_call *call,
/* continue encrypting from where we left off */
memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
- desc.tfm = call->conn->cipher;
- desc.info = iv.x;
- desc.flags = 0;
/* calculate the security checksum */
x = htonl(call->channel << (32 - RXRPC_CIDSHIFT));
@@ -293,7 +306,13 @@ static int rxkad_secure_packet(const struct rxrpc_call *call,
sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
- crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
+
+ skcipher_request_set_tfm(req, call->conn->cipher);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
+
+ crypto_skcipher_encrypt(req);
+ skcipher_request_zero(req);
y = ntohl(tmpbuf.x[1]);
y = (y >> 16) & 0xffff;
@@ -330,7 +349,7 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
{
struct rxkad_level1_hdr sechdr;
struct rxrpc_skb_priv *sp;
- struct blkcipher_desc desc;
+ SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxrpc_crypt iv;
struct scatterlist sg[16];
struct sk_buff *trailer;
@@ -352,11 +371,13 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
/* start the decryption afresh */
memset(&iv, 0, sizeof(iv));
- desc.tfm = call->conn->cipher;
- desc.info = iv.x;
- desc.flags = 0;
- crypto_blkcipher_decrypt_iv(&desc, sg, sg, 8);
+ skcipher_request_set_tfm(req, call->conn->cipher);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, sg, sg, 8, iv.x);
+
+ crypto_skcipher_decrypt(req);
+ skcipher_request_zero(req);
/* remove the decrypted packet length */
if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0)
@@ -405,7 +426,7 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
const struct rxrpc_key_token *token;
struct rxkad_level2_hdr sechdr;
struct rxrpc_skb_priv *sp;
- struct blkcipher_desc desc;
+ SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxrpc_crypt iv;
struct scatterlist _sg[4], *sg;
struct sk_buff *trailer;
@@ -435,11 +456,13 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
/* decrypt from the session key */
token = call->conn->key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
- desc.tfm = call->conn->cipher;
- desc.info = iv.x;
- desc.flags = 0;
- crypto_blkcipher_decrypt_iv(&desc, sg, sg, skb->len);
+ skcipher_request_set_tfm(req, call->conn->cipher);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, sg, sg, skb->len, iv.x);
+
+ crypto_skcipher_decrypt(req);
+ skcipher_request_zero(req);
if (sg != _sg)
kfree(sg);
@@ -487,7 +510,7 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
struct sk_buff *skb,
u32 *_abort_code)
{
- struct blkcipher_desc desc;
+ SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxrpc_skb_priv *sp;
struct rxrpc_crypt iv;
struct scatterlist sg[2];
@@ -516,9 +539,6 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
/* continue encrypting from where we left off */
memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
- desc.tfm = call->conn->cipher;
- desc.info = iv.x;
- desc.flags = 0;
/* validate the security checksum */
x = htonl(call->channel << (32 - RXRPC_CIDSHIFT));
@@ -528,7 +548,13 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
- crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
+
+ skcipher_request_set_tfm(req, call->conn->cipher);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
+
+ crypto_skcipher_encrypt(req);
+ skcipher_request_zero(req);
y = ntohl(tmpbuf.x[1]);
y = (y >> 16) & 0xffff;
@@ -718,18 +744,21 @@ static void rxkad_encrypt_response(struct rxrpc_connection *conn,
struct rxkad_response *resp,
const struct rxkad_key *s2)
{
- struct blkcipher_desc desc;
+ SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
struct rxrpc_crypt iv;
struct scatterlist sg[2];
/* continue encrypting from where we left off */
memcpy(&iv, s2->session_key, sizeof(iv));
- desc.tfm = conn->cipher;
- desc.info = iv.x;
- desc.flags = 0;
rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted));
- crypto_blkcipher_encrypt_iv(&desc, sg, sg, sizeof(resp->encrypted));
+
+ skcipher_request_set_tfm(req, conn->cipher);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
+
+ crypto_skcipher_encrypt(req);
+ skcipher_request_zero(req);
}
/*
@@ -822,7 +851,7 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
time_t *_expiry,
u32 *_abort_code)
{
- struct blkcipher_desc desc;
+ struct skcipher_request *req;
struct rxrpc_crypt iv, key;
struct scatterlist sg[1];
struct in_addr addr;
@@ -853,12 +882,21 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
memcpy(&iv, &conn->server_key->payload.data[2], sizeof(iv));
- desc.tfm = conn->server_key->payload.data[0];
- desc.info = iv.x;
- desc.flags = 0;
+ req = skcipher_request_alloc(conn->server_key->payload.data[0],
+ GFP_NOFS);
+ if (!req) {
+ *_abort_code = RXKADNOAUTH;
+ ret = -ENOMEM;
+ goto error;
+ }
sg_init_one(&sg[0], ticket, ticket_len);
- crypto_blkcipher_decrypt_iv(&desc, sg, sg, ticket_len);
+
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, sg, sg, ticket_len, iv.x);
+
+ crypto_skcipher_decrypt(req);
+ skcipher_request_free(req);
p = ticket;
end = p + ticket_len;
@@ -966,7 +1004,7 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
struct rxkad_response *resp,
const struct rxrpc_crypt *session_key)
{
- struct blkcipher_desc desc;
+ SKCIPHER_REQUEST_ON_STACK(req, rxkad_ci);
struct scatterlist sg[2];
struct rxrpc_crypt iv;
@@ -976,17 +1014,21 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
ASSERT(rxkad_ci != NULL);
mutex_lock(&rxkad_ci_mutex);
- if (crypto_blkcipher_setkey(rxkad_ci, session_key->x,
- sizeof(*session_key)) < 0)
+ if (crypto_skcipher_setkey(rxkad_ci, session_key->x,
+ sizeof(*session_key)) < 0)
BUG();
memcpy(&iv, session_key, sizeof(iv));
- desc.tfm = rxkad_ci;
- desc.info = iv.x;
- desc.flags = 0;
rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted));
- crypto_blkcipher_decrypt_iv(&desc, sg, sg, sizeof(resp->encrypted));
+
+ skcipher_request_set_tfm(req, rxkad_ci);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
+
+ crypto_skcipher_decrypt(req);
+ skcipher_request_zero(req);
+
mutex_unlock(&rxkad_ci_mutex);
_leave("");
@@ -1115,7 +1157,7 @@ static void rxkad_clear(struct rxrpc_connection *conn)
_enter("");
if (conn->cipher)
- crypto_free_blkcipher(conn->cipher);
+ crypto_free_skcipher(conn->cipher);
}
/*
@@ -1141,7 +1183,7 @@ static __init int rxkad_init(void)
/* pin the cipher we need so that the crypto layer doesn't invoke
* keventd to go get it */
- rxkad_ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
+ rxkad_ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(rxkad_ci))
return PTR_ERR(rxkad_ci);
@@ -1155,7 +1197,7 @@ static __exit void rxkad_exit(void)
_enter("");
rxrpc_unregister_security(&rxkad);
- crypto_free_blkcipher(rxkad_ci);
+ crypto_free_skcipher(rxkad_ci);
}
module_exit(rxkad_exit);
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index daa33432b716..82830824fb1f 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -310,15 +310,21 @@ config NET_SCH_PIE
If unsure, say N.
config NET_SCH_INGRESS
- tristate "Ingress Qdisc"
+ tristate "Ingress/classifier-action Qdisc"
depends on NET_CLS_ACT
select NET_INGRESS
+ select NET_EGRESS
---help---
- Say Y here if you want to use classifiers for incoming packets.
+ Say Y here if you want to use classifiers for incoming and/or outgoing
+ packets. This qdisc doesn't do anything else besides running classifiers,
+ which can also have actions attached to them. In case of outgoing packets,
+ classifiers that this qdisc holds are executed in the transmit path
+ before real enqueuing to an egress qdisc happens.
+
If unsure, say Y.
- To compile this code as a module, choose M here: the
- module will be called sch_ingress.
+ To compile this code as a module, choose M here: the module will be
+ called sch_ingress with alias of sch_clsact.
config NET_SCH_PLUG
tristate "Plug network traffic until release (PLUG)"
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index d05869646515..6b70399ab781 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -62,6 +62,7 @@ static void ipt_destroy_target(struct xt_entry_target *t)
struct xt_tgdtor_param par = {
.target = t->u.kernel.target,
.targinfo = t->data,
+ .family = NFPROTO_IPV4,
};
if (par.target->destroy != NULL)
par.target->destroy(&par);
@@ -195,6 +196,7 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
par.hooknum = ipt->tcfi_hook;
par.target = ipt->tcfi_t->u.kernel.target;
par.targinfo = ipt->tcfi_t->data;
+ par.family = NFPROTO_IPV4;
ret = par.target->target(skb, &par);
switch (ret) {
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 5faaa5425f7b..8dc84300ee79 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -79,12 +79,8 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
struct cls_bpf_head *head = rcu_dereference_bh(tp->root);
+ bool at_ingress = skb_at_tc_ingress(skb);
struct cls_bpf_prog *prog;
-#ifdef CONFIG_NET_CLS_ACT
- bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
-#else
- bool at_ingress = false;
-#endif
int ret = -1;
if (unlikely(!skb_mac_header_was_set(skb)))
@@ -295,7 +291,7 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
prog->bpf_name = name;
prog->filter = fp;
- if (fp->dst_needed)
+ if (fp->dst_needed && !(tp->q->flags & TCQ_F_INGRESS))
netif_keep_dst(qdisc_dev(tp->q));
return 0;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 536838b657bf..fbfec6a18839 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -22,6 +22,7 @@
#include <linux/if_vlan.h>
#include <linux/slab.h>
#include <linux/module.h>
+#include <net/inet_sock.h>
#include <net/pkt_cls.h>
#include <net/ip.h>
@@ -197,8 +198,11 @@ static u32 flow_get_rtclassid(const struct sk_buff *skb)
static u32 flow_get_skuid(const struct sk_buff *skb)
{
- if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) {
- kuid_t skuid = skb->sk->sk_socket->file->f_cred->fsuid;
+ struct sock *sk = skb_to_full_sk(skb);
+
+ if (sk && sk->sk_socket && sk->sk_socket->file) {
+ kuid_t skuid = sk->sk_socket->file->f_cred->fsuid;
+
return from_kuid(&init_user_ns, skuid);
}
return 0;
@@ -206,8 +210,11 @@ static u32 flow_get_skuid(const struct sk_buff *skb)
static u32 flow_get_skgid(const struct sk_buff *skb)
{
- if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) {
- kgid_t skgid = skb->sk->sk_socket->file->f_cred->fsgid;
+ struct sock *sk = skb_to_full_sk(skb);
+
+ if (sk && sk->sk_socket && sk->sk_socket->file) {
+ kgid_t skgid = sk->sk_socket->file->f_cred->fsgid;
+
return from_kgid(&init_user_ns, skgid);
}
return 0;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 57692947ebbe..95b021243233 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -252,23 +252,28 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
sizeof(key->eth.src));
+
fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
&mask->basic.n_proto, TCA_FLOWER_UNSPEC,
sizeof(key->basic.n_proto));
+
if (key->basic.n_proto == htons(ETH_P_IP) ||
key->basic.n_proto == htons(ETH_P_IPV6)) {
fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
&mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
sizeof(key->basic.ip_proto));
}
- if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+
+ if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) {
+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
&mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
sizeof(key->ipv4.src));
fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
&mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
sizeof(key->ipv4.dst));
- } else if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ } else if (tb[TCA_FLOWER_KEY_IPV6_SRC] || tb[TCA_FLOWER_KEY_IPV6_DST]) {
+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
&mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
sizeof(key->ipv6.src));
@@ -276,6 +281,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
&mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
sizeof(key->ipv6.dst));
}
+
if (key->basic.ip_proto == IPPROTO_TCP) {
fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
&mask->tp.src, TCA_FLOWER_UNSPEC,
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index b5294ce20cd4..f2aabc0089da 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -343,119 +343,145 @@ META_COLLECTOR(int_sk_refcnt)
META_COLLECTOR(int_sk_rcvbuf)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_rcvbuf;
+ dst->value = sk->sk_rcvbuf;
}
META_COLLECTOR(int_sk_shutdown)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_shutdown;
+ dst->value = sk->sk_shutdown;
}
META_COLLECTOR(int_sk_proto)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_protocol;
+ dst->value = sk->sk_protocol;
}
META_COLLECTOR(int_sk_type)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_type;
+ dst->value = sk->sk_type;
}
META_COLLECTOR(int_sk_rmem_alloc)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = sk_rmem_alloc_get(skb->sk);
+ dst->value = sk_rmem_alloc_get(sk);
}
META_COLLECTOR(int_sk_wmem_alloc)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = sk_wmem_alloc_get(skb->sk);
+ dst->value = sk_wmem_alloc_get(sk);
}
META_COLLECTOR(int_sk_omem_alloc)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = atomic_read(&skb->sk->sk_omem_alloc);
+ dst->value = atomic_read(&sk->sk_omem_alloc);
}
META_COLLECTOR(int_sk_rcv_qlen)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_receive_queue.qlen;
+ dst->value = sk->sk_receive_queue.qlen;
}
META_COLLECTOR(int_sk_snd_qlen)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_write_queue.qlen;
+ dst->value = sk->sk_write_queue.qlen;
}
META_COLLECTOR(int_sk_wmem_queued)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_wmem_queued;
+ dst->value = sk->sk_wmem_queued;
}
META_COLLECTOR(int_sk_fwd_alloc)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_forward_alloc;
+ dst->value = sk->sk_forward_alloc;
}
META_COLLECTOR(int_sk_sndbuf)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_sndbuf;
+ dst->value = sk->sk_sndbuf;
}
META_COLLECTOR(int_sk_alloc)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = (__force int) skb->sk->sk_allocation;
+ dst->value = (__force int) sk->sk_allocation;
}
META_COLLECTOR(int_sk_hash)
@@ -469,92 +495,112 @@ META_COLLECTOR(int_sk_hash)
META_COLLECTOR(int_sk_lingertime)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_lingertime / HZ;
+ dst->value = sk->sk_lingertime / HZ;
}
META_COLLECTOR(int_sk_err_qlen)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_error_queue.qlen;
+ dst->value = sk->sk_error_queue.qlen;
}
META_COLLECTOR(int_sk_ack_bl)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_ack_backlog;
+ dst->value = sk->sk_ack_backlog;
}
META_COLLECTOR(int_sk_max_ack_bl)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_max_ack_backlog;
+ dst->value = sk->sk_max_ack_backlog;
}
META_COLLECTOR(int_sk_prio)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_priority;
+ dst->value = sk->sk_priority;
}
META_COLLECTOR(int_sk_rcvlowat)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_rcvlowat;
+ dst->value = sk->sk_rcvlowat;
}
META_COLLECTOR(int_sk_rcvtimeo)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_rcvtimeo / HZ;
+ dst->value = sk->sk_rcvtimeo / HZ;
}
META_COLLECTOR(int_sk_sndtimeo)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_sndtimeo / HZ;
+ dst->value = sk->sk_sndtimeo / HZ;
}
META_COLLECTOR(int_sk_sendmsg_off)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_frag.offset;
+ dst->value = sk->sk_frag.offset;
}
META_COLLECTOR(int_sk_write_pend)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_write_pending;
+ dst->value = sk->sk_write_pending;
}
/**************************************************************************
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index f43c8f33f09e..af1acf009866 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -253,7 +253,8 @@ int qdisc_set_default(const char *name)
}
/* We know handle. Find qdisc among all qdisc's attached to device
- (root qdisc, all its children, children of children etc.)
+ * (root qdisc, all its children, children of children etc.)
+ * Note: caller either uses rtnl or rcu_read_lock()
*/
static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
@@ -264,7 +265,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
root->handle == handle)
return root;
- list_for_each_entry(q, &root->list, list) {
+ list_for_each_entry_rcu(q, &root->list, list) {
if (q->handle == handle)
return q;
}
@@ -277,15 +278,18 @@ void qdisc_list_add(struct Qdisc *q)
struct Qdisc *root = qdisc_dev(q)->qdisc;
WARN_ON_ONCE(root == &noop_qdisc);
- list_add_tail(&q->list, &root->list);
+ ASSERT_RTNL();
+ list_add_tail_rcu(&q->list, &root->list);
}
}
EXPORT_SYMBOL(qdisc_list_add);
void qdisc_list_del(struct Qdisc *q)
{
- if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
- list_del(&q->list);
+ if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
+ ASSERT_RTNL();
+ list_del_rcu(&q->list);
+ }
}
EXPORT_SYMBOL(qdisc_list_del);
@@ -750,14 +754,18 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
if (n == 0)
return;
drops = max_t(int, n, 0);
+ rcu_read_lock();
while ((parentid = sch->parent)) {
if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
- return;
+ break;
+ if (sch->flags & TCQ_F_NOPARENT)
+ break;
+ /* TODO: perform the search on a per txq basis */
sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
if (sch == NULL) {
- WARN_ON(parentid != TC_H_ROOT);
- return;
+ WARN_ON_ONCE(parentid != TC_H_ROOT);
+ break;
}
cops = sch->ops->cl_ops;
if (cops->qlen_notify) {
@@ -768,6 +776,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
sch->q.qlen -= n;
__qdisc_qstats_drop(sch, drops);
}
+ rcu_read_unlock();
}
EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
@@ -1843,6 +1852,7 @@ reset:
}
tp = old_tp;
+ protocol = tc_skb_protocol(skb);
goto reclassify;
#endif
}
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index f26bdea875c1..a1cd778240cd 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -403,6 +403,8 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch)
if (len <= cl->deficit) {
cl->deficit -= len;
skb = qdisc_dequeue_peeked(cl->qdisc);
+ if (unlikely(skb == NULL))
+ goto out;
if (cl->qdisc->q.qlen == 0)
list_del(&cl->alist);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index cb5d4ad32946..16bc83b2842a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -658,8 +658,10 @@ static void qdisc_rcu_free(struct rcu_head *head)
{
struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
- if (qdisc_is_percpu_stats(qdisc))
+ if (qdisc_is_percpu_stats(qdisc)) {
free_percpu(qdisc->cpu_bstats);
+ free_percpu(qdisc->cpu_qstats);
+ }
kfree((char *) qdisc - qdisc->padded);
}
@@ -737,7 +739,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
return;
}
if (!netif_is_multiqueue(dev))
- qdisc->flags |= TCQ_F_ONETXQUEUE;
+ qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
dev_queue->qdisc_sleeping = qdisc;
}
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index e7c648fa9dc3..10adbc617905 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -1,4 +1,5 @@
-/* net/sched/sch_ingress.c - Ingress qdisc
+/* net/sched/sch_ingress.c - Ingress and clsact qdisc
+ *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -98,17 +99,100 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
.owner = THIS_MODULE,
};
+static unsigned long clsact_get(struct Qdisc *sch, u32 classid)
+{
+ switch (TC_H_MIN(classid)) {
+ case TC_H_MIN(TC_H_MIN_INGRESS):
+ case TC_H_MIN(TC_H_MIN_EGRESS):
+ return TC_H_MIN(classid);
+ default:
+ return 0;
+ }
+}
+
+static unsigned long clsact_bind_filter(struct Qdisc *sch,
+ unsigned long parent, u32 classid)
+{
+ return clsact_get(sch, classid);
+}
+
+static struct tcf_proto __rcu **clsact_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
+{
+ struct net_device *dev = qdisc_dev(sch);
+
+ switch (cl) {
+ case TC_H_MIN(TC_H_MIN_INGRESS):
+ return &dev->ingress_cl_list;
+ case TC_H_MIN(TC_H_MIN_EGRESS):
+ return &dev->egress_cl_list;
+ default:
+ return NULL;
+ }
+}
+
+static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
+{
+ net_inc_ingress_queue();
+ net_inc_egress_queue();
+
+ sch->flags |= TCQ_F_CPUSTATS;
+
+ return 0;
+}
+
+static void clsact_destroy(struct Qdisc *sch)
+{
+ struct net_device *dev = qdisc_dev(sch);
+
+ tcf_destroy_chain(&dev->ingress_cl_list);
+ tcf_destroy_chain(&dev->egress_cl_list);
+
+ net_dec_ingress_queue();
+ net_dec_egress_queue();
+}
+
+static const struct Qdisc_class_ops clsact_class_ops = {
+ .leaf = ingress_leaf,
+ .get = clsact_get,
+ .put = ingress_put,
+ .walk = ingress_walk,
+ .tcf_chain = clsact_find_tcf,
+ .bind_tcf = clsact_bind_filter,
+ .unbind_tcf = ingress_put,
+};
+
+static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
+ .cl_ops = &clsact_class_ops,
+ .id = "clsact",
+ .init = clsact_init,
+ .destroy = clsact_destroy,
+ .dump = ingress_dump,
+ .owner = THIS_MODULE,
+};
+
static int __init ingress_module_init(void)
{
- return register_qdisc(&ingress_qdisc_ops);
+ int ret;
+
+ ret = register_qdisc(&ingress_qdisc_ops);
+ if (!ret) {
+ ret = register_qdisc(&clsact_qdisc_ops);
+ if (ret)
+ unregister_qdisc(&ingress_qdisc_ops);
+ }
+
+ return ret;
}
static void __exit ingress_module_exit(void)
{
unregister_qdisc(&ingress_qdisc_ops);
+ unregister_qdisc(&clsact_qdisc_ops);
}
module_init(ingress_module_init);
module_exit(ingress_module_exit);
+MODULE_ALIAS("sch_clsact");
MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index f3cbaecd283a..3e82f047caaf 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -63,7 +63,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
if (qdisc == NULL)
goto err;
priv->qdiscs[ntx] = qdisc;
- qdisc->flags |= TCQ_F_ONETXQUEUE;
+ qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
}
sch->flags |= TCQ_F_MQROOT;
@@ -156,7 +156,7 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
*old = dev_graft_qdisc(dev_queue, new);
if (new)
- new->flags |= TCQ_F_ONETXQUEUE;
+ new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
if (dev->flags & IFF_UP)
dev_activate(dev);
return 0;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 3811a745452c..ad70ecf57ce7 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -132,7 +132,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
goto err;
}
priv->qdiscs[i] = qdisc;
- qdisc->flags |= TCQ_F_ONETXQUEUE;
+ qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
}
/* If the mqprio options indicate that hardware should own
@@ -209,7 +209,7 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
*old = dev_graft_qdisc(dev_queue, new);
if (new)
- new->flags |= TCQ_F_ONETXQUEUE;
+ new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
if (dev->flags & IFF_UP)
dev_activate(dev);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index b00f1f9611d6..2bf8ec92dde4 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -383,6 +383,7 @@ void sctp_association_free(struct sctp_association *asoc)
list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) {
transport = list_entry(pos, struct sctp_transport, transports);
list_del_rcu(pos);
+ sctp_unhash_transport(transport);
sctp_transport_free(transport);
}
@@ -500,6 +501,8 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
/* Remove this peer from the list. */
list_del_rcu(&peer->transports);
+ /* Remove this peer from the transport hashtable */
+ sctp_unhash_transport(peer);
/* Get the first transport of asoc. */
pos = asoc->peer.transport_addr_list.next;
@@ -699,6 +702,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
/* Attach the remote transport to our asoc. */
list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list);
asoc->peer.transport_count++;
+ /* Add this peer into the transport hashtable */
+ sctp_hash_transport(peer);
/* If we do not yet have a primary path, set one. */
if (!asoc->peer.primary_path) {
@@ -1590,7 +1595,7 @@ int sctp_assoc_lookup_laddr(struct sctp_association *asoc,
/* Set an association id for a given association */
int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp)
{
- bool preload = !!(gfp & __GFP_WAIT);
+ bool preload = gfpflags_allow_blocking(gfp);
int ret;
/* If the id is already assigned, keep it. */
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 4f15b7d730e1..912eb1685a5d 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -27,9 +27,9 @@
* Vlad Yasevich <vladislav.yasevich@hp.com>
*/
+#include <crypto/hash.h>
#include <linux/slab.h>
#include <linux/types.h>
-#include <linux/crypto.h>
#include <linux/scatterlist.h>
#include <net/sctp/sctp.h>
#include <net/sctp/auth.h>
@@ -448,7 +448,7 @@ struct sctp_shared_key *sctp_auth_get_shkey(
*/
int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
{
- struct crypto_hash *tfm = NULL;
+ struct crypto_shash *tfm = NULL;
__u16 id;
/* If AUTH extension is disabled, we are done */
@@ -462,9 +462,8 @@ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
return 0;
/* Allocated the array of pointers to transorms */
- ep->auth_hmacs = kzalloc(
- sizeof(struct crypto_hash *) * SCTP_AUTH_NUM_HMACS,
- gfp);
+ ep->auth_hmacs = kzalloc(sizeof(struct crypto_shash *) *
+ SCTP_AUTH_NUM_HMACS, gfp);
if (!ep->auth_hmacs)
return -ENOMEM;
@@ -483,8 +482,7 @@ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
continue;
/* Allocate the ID */
- tfm = crypto_alloc_hash(sctp_hmac_list[id].hmac_name, 0,
- CRYPTO_ALG_ASYNC);
+ tfm = crypto_alloc_shash(sctp_hmac_list[id].hmac_name, 0, 0);
if (IS_ERR(tfm))
goto out_err;
@@ -500,7 +498,7 @@ out_err:
}
/* Destroy the hmac tfm array */
-void sctp_auth_destroy_hmacs(struct crypto_hash *auth_hmacs[])
+void sctp_auth_destroy_hmacs(struct crypto_shash *auth_hmacs[])
{
int i;
@@ -508,8 +506,7 @@ void sctp_auth_destroy_hmacs(struct crypto_hash *auth_hmacs[])
return;
for (i = 0; i < SCTP_AUTH_NUM_HMACS; i++) {
- if (auth_hmacs[i])
- crypto_free_hash(auth_hmacs[i]);
+ crypto_free_shash(auth_hmacs[i]);
}
kfree(auth_hmacs);
}
@@ -709,8 +706,7 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
struct sctp_auth_chunk *auth,
gfp_t gfp)
{
- struct scatterlist sg;
- struct hash_desc desc;
+ struct crypto_shash *tfm;
struct sctp_auth_bytes *asoc_key;
__u16 key_id, hmac_id;
__u8 *digest;
@@ -742,16 +738,22 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
/* set up scatter list */
end = skb_tail_pointer(skb);
- sg_init_one(&sg, auth, end - (unsigned char *)auth);
- desc.tfm = asoc->ep->auth_hmacs[hmac_id];
- desc.flags = 0;
+ tfm = asoc->ep->auth_hmacs[hmac_id];
digest = auth->auth_hdr.hmac;
- if (crypto_hash_setkey(desc.tfm, &asoc_key->data[0], asoc_key->len))
+ if (crypto_shash_setkey(tfm, &asoc_key->data[0], asoc_key->len))
goto free;
- crypto_hash_digest(&desc, &sg, sg.length, digest);
+ {
+ SHASH_DESC_ON_STACK(desc, tfm);
+
+ desc->tfm = tfm;
+ desc->flags = 0;
+ crypto_shash_digest(desc, (u8 *)auth,
+ end - (unsigned char *)auth, digest);
+ shash_desc_zero(desc);
+ }
free:
if (free_key)
@@ -809,8 +811,8 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep,
if (!has_sha1)
return -EINVAL;
- memcpy(ep->auth_hmacs_list->hmac_ids, &hmacs->shmac_idents[0],
- hmacs->shmac_num_idents * sizeof(__u16));
+ for (i = 0; i < hmacs->shmac_num_idents; i++)
+ ep->auth_hmacs_list->hmac_ids[i] = htons(hmacs->shmac_idents[i]);
ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) +
hmacs->shmac_num_idents * sizeof(__u16));
return 0;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 9da76ba4d10f..9d494e35e7f9 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -42,7 +42,6 @@
#include <linux/slab.h>
#include <linux/in.h>
#include <linux/random.h> /* get_random_bytes() */
-#include <linux/crypto.h>
#include <net/sock.h>
#include <net/ipv6.h>
#include <net/sctp/sctp.h>
@@ -314,21 +313,16 @@ struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep,
}
/* Find the association that goes with this chunk.
- * We do a linear search of the associations for this endpoint.
- * We return the matching transport address too.
+ * We lookup the transport from hashtable at first, then get association
+ * through t->assoc.
*/
-static struct sctp_association *__sctp_endpoint_lookup_assoc(
+struct sctp_association *sctp_endpoint_lookup_assoc(
const struct sctp_endpoint *ep,
const union sctp_addr *paddr,
struct sctp_transport **transport)
{
struct sctp_association *asoc = NULL;
- struct sctp_association *tmp;
- struct sctp_transport *t = NULL;
- struct sctp_hashbucket *head;
- struct sctp_ep_common *epb;
- int hash;
- int rport;
+ struct sctp_transport *t;
*transport = NULL;
@@ -337,45 +331,16 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc(
*/
if (!ep->base.bind_addr.port)
goto out;
+ t = sctp_epaddr_lookup_transport(ep, paddr);
+ if (!t)
+ goto out;
- rport = ntohs(paddr->v4.sin_port);
-
- hash = sctp_assoc_hashfn(sock_net(ep->base.sk), ep->base.bind_addr.port,
- rport);
- head = &sctp_assoc_hashtable[hash];
- read_lock(&head->lock);
- sctp_for_each_hentry(epb, &head->chain) {
- tmp = sctp_assoc(epb);
- if (tmp->ep != ep || rport != tmp->peer.port)
- continue;
-
- t = sctp_assoc_lookup_paddr(tmp, paddr);
- if (t) {
- asoc = tmp;
- *transport = t;
- break;
- }
- }
- read_unlock(&head->lock);
+ *transport = t;
+ asoc = t->asoc;
out:
return asoc;
}
-/* Lookup association on an endpoint based on a peer address. BH-safe. */
-struct sctp_association *sctp_endpoint_lookup_assoc(
- const struct sctp_endpoint *ep,
- const union sctp_addr *paddr,
- struct sctp_transport **transport)
-{
- struct sctp_association *asoc;
-
- local_bh_disable();
- asoc = __sctp_endpoint_lookup_assoc(ep, paddr, transport);
- local_bh_enable();
-
- return asoc;
-}
-
/* Look for any peeled off association from the endpoint that matches the
* given peer address.
*/
diff --git a/net/sctp/input.c b/net/sctp/input.c
index b6493b3f11a9..49d2cc751386 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -782,65 +782,149 @@ hit:
return ep;
}
-/* Insert association into the hash table. */
-static void __sctp_hash_established(struct sctp_association *asoc)
+/* rhashtable for transport */
+struct sctp_hash_cmp_arg {
+ const struct sctp_endpoint *ep;
+ const union sctp_addr *laddr;
+ const union sctp_addr *paddr;
+ const struct net *net;
+};
+
+static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
{
- struct net *net = sock_net(asoc->base.sk);
- struct sctp_ep_common *epb;
- struct sctp_hashbucket *head;
+ const struct sctp_hash_cmp_arg *x = arg->key;
+ const struct sctp_transport *t = ptr;
+ struct sctp_association *asoc = t->asoc;
+ const struct net *net = x->net;
- epb = &asoc->base;
+ if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr))
+ return 1;
+ if (!net_eq(sock_net(asoc->base.sk), net))
+ return 1;
+ if (x->ep) {
+ if (x->ep != asoc->ep)
+ return 1;
+ } else {
+ if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port))
+ return 1;
+ if (!sctp_bind_addr_match(&asoc->base.bind_addr,
+ x->laddr, sctp_sk(asoc->base.sk)))
+ return 1;
+ }
- /* Calculate which chain this entry will belong to. */
- epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port,
- asoc->peer.port);
+ return 0;
+}
- head = &sctp_assoc_hashtable[epb->hashent];
+static inline u32 sctp_hash_obj(const void *data, u32 len, u32 seed)
+{
+ const struct sctp_transport *t = data;
+ const union sctp_addr *paddr = &t->ipaddr;
+ const struct net *net = sock_net(t->asoc->base.sk);
+ u16 lport = htons(t->asoc->base.bind_addr.port);
+ u32 addr;
+
+ if (paddr->sa.sa_family == AF_INET6)
+ addr = jhash(&paddr->v6.sin6_addr, 16, seed);
+ else
+ addr = paddr->v4.sin_addr.s_addr;
- write_lock(&head->lock);
- hlist_add_head(&epb->node, &head->chain);
- write_unlock(&head->lock);
+ return jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 |
+ (__force __u32)lport, net_hash_mix(net), seed);
}
-/* Add an association to the hash. Local BH-safe. */
-void sctp_hash_established(struct sctp_association *asoc)
+static inline u32 sctp_hash_key(const void *data, u32 len, u32 seed)
{
- if (asoc->temp)
- return;
+ const struct sctp_hash_cmp_arg *x = data;
+ const union sctp_addr *paddr = x->paddr;
+ const struct net *net = x->net;
+ u16 lport;
+ u32 addr;
+
+ lport = x->ep ? htons(x->ep->base.bind_addr.port) :
+ x->laddr->v4.sin_port;
+ if (paddr->sa.sa_family == AF_INET6)
+ addr = jhash(&paddr->v6.sin6_addr, 16, seed);
+ else
+ addr = paddr->v4.sin_addr.s_addr;
- local_bh_disable();
- __sctp_hash_established(asoc);
- local_bh_enable();
+ return jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 |
+ (__force __u32)lport, net_hash_mix(net), seed);
}
-/* Remove association from the hash table. */
-static void __sctp_unhash_established(struct sctp_association *asoc)
+static const struct rhashtable_params sctp_hash_params = {
+ .head_offset = offsetof(struct sctp_transport, node),
+ .hashfn = sctp_hash_key,
+ .obj_hashfn = sctp_hash_obj,
+ .obj_cmpfn = sctp_hash_cmp,
+ .automatic_shrinking = true,
+};
+
+int sctp_transport_hashtable_init(void)
{
- struct net *net = sock_net(asoc->base.sk);
- struct sctp_hashbucket *head;
- struct sctp_ep_common *epb;
+ return rhashtable_init(&sctp_transport_hashtable, &sctp_hash_params);
+}
- epb = &asoc->base;
+void sctp_transport_hashtable_destroy(void)
+{
+ rhashtable_destroy(&sctp_transport_hashtable);
+}
- epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port,
- asoc->peer.port);
+void sctp_hash_transport(struct sctp_transport *t)
+{
+ struct sctp_hash_cmp_arg arg;
- head = &sctp_assoc_hashtable[epb->hashent];
+ if (t->asoc->temp)
+ return;
- write_lock(&head->lock);
- hlist_del_init(&epb->node);
- write_unlock(&head->lock);
+ arg.ep = t->asoc->ep;
+ arg.paddr = &t->ipaddr;
+ arg.net = sock_net(t->asoc->base.sk);
+
+reinsert:
+ if (rhashtable_lookup_insert_key(&sctp_transport_hashtable, &arg,
+ &t->node, sctp_hash_params) == -EBUSY)
+ goto reinsert;
}
-/* Remove association from the hash table. Local BH-safe. */
-void sctp_unhash_established(struct sctp_association *asoc)
+void sctp_unhash_transport(struct sctp_transport *t)
{
- if (asoc->temp)
+ if (t->asoc->temp)
return;
- local_bh_disable();
- __sctp_unhash_established(asoc);
- local_bh_enable();
+ rhashtable_remove_fast(&sctp_transport_hashtable, &t->node,
+ sctp_hash_params);
+}
+
+struct sctp_transport *sctp_addrs_lookup_transport(
+ struct net *net,
+ const union sctp_addr *laddr,
+ const union sctp_addr *paddr)
+{
+ struct sctp_hash_cmp_arg arg = {
+ .ep = NULL,
+ .laddr = laddr,
+ .paddr = paddr,
+ .net = net,
+ };
+
+ return rhashtable_lookup_fast(&sctp_transport_hashtable, &arg,
+ sctp_hash_params);
+}
+
+struct sctp_transport *sctp_epaddr_lookup_transport(
+ const struct sctp_endpoint *ep,
+ const union sctp_addr *paddr)
+{
+ struct net *net = sock_net(ep->base.sk);
+ struct sctp_hash_cmp_arg arg = {
+ .ep = ep,
+ .paddr = paddr,
+ .net = net,
+ };
+
+ return rhashtable_lookup_fast(&sctp_transport_hashtable, &arg,
+ sctp_hash_params);
}
/* Look up an association. */
@@ -850,38 +934,26 @@ static struct sctp_association *__sctp_lookup_association(
const union sctp_addr *peer,
struct sctp_transport **pt)
{
- struct sctp_hashbucket *head;
- struct sctp_ep_common *epb;
- struct sctp_association *asoc;
- struct sctp_transport *transport;
- int hash;
+ struct sctp_transport *t;
+ struct sctp_association *asoc = NULL;
- /* Optimize here for direct hit, only listening connections can
- * have wildcards anyways.
- */
- hash = sctp_assoc_hashfn(net, ntohs(local->v4.sin_port),
- ntohs(peer->v4.sin_port));
- head = &sctp_assoc_hashtable[hash];
- read_lock(&head->lock);
- sctp_for_each_hentry(epb, &head->chain) {
- asoc = sctp_assoc(epb);
- transport = sctp_assoc_is_match(asoc, net, local, peer);
- if (transport)
- goto hit;
- }
+ rcu_read_lock();
+ t = sctp_addrs_lookup_transport(net, local, peer);
+ if (!t || !sctp_transport_hold(t))
+ goto out;
- read_unlock(&head->lock);
+ asoc = t->asoc;
+ sctp_association_hold(asoc);
+ *pt = t;
- return NULL;
+ sctp_transport_put(t);
-hit:
- *pt = transport;
- sctp_association_hold(asoc);
- read_unlock(&head->lock);
+out:
+ rcu_read_unlock();
return asoc;
}
-/* Look up an association. BH-safe. */
+/* Look up an association. protected by RCU read lock */
static
struct sctp_association *sctp_lookup_association(struct net *net,
const union sctp_addr *laddr,
@@ -890,9 +962,7 @@ struct sctp_association *sctp_lookup_association(struct net *net,
{
struct sctp_association *asoc;
- local_bh_disable();
asoc = __sctp_lookup_association(net, laddr, paddr, transportp);
- local_bh_enable();
return asoc;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e917d27328ea..ce46f1c7f133 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -209,6 +209,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
struct sock *sk = skb->sk;
struct ipv6_pinfo *np = inet6_sk(sk);
struct flowi6 *fl6 = &transport->fl.u.ip6;
+ int res;
pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb,
skb->len, &fl6->saddr, &fl6->daddr);
@@ -220,7 +221,10 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
- return ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
+ rcu_read_lock();
+ res = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), np->tclass);
+ rcu_read_unlock();
+ return res;
}
/* Returns the dst cache entry for the given source and destination ip
@@ -262,7 +266,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
pr_debug("src=%pI6 - ", &fl6->saddr);
}
- final_p = fl6_update_dst(fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();
+
dst = ip6_dst_lookup_flow(sk, fl6, final_p);
if (!asoc || saddr)
goto out;
@@ -316,14 +323,13 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
}
}
}
- rcu_read_unlock();
-
if (baddr) {
fl6->saddr = baddr->v6.sin6_addr;
fl6->fl6_sport = baddr->v6.sin6_port;
- final_p = fl6_update_dst(fl6, np->opt, &final);
+ final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
dst = ip6_dst_lookup_flow(sk, fl6, final_p);
}
+ rcu_read_unlock();
out:
if (!IS_ERR_OR_NULL(dst)) {
@@ -520,6 +526,8 @@ static int sctp_v6_cmp_addr(const union sctp_addr *addr1,
}
return 0;
}
+ if (addr1->v6.sin6_port != addr2->v6.sin6_port)
+ return 0;
if (!ipv6_addr_equal(&addr1->v6.sin6_addr, &addr2->v6.sin6_addr))
return 0;
/* If this is a linklocal address, compare the scope_id. */
@@ -635,6 +643,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
struct sock *newsk;
struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
struct sctp6_sock *newsctp6sk;
+ struct ipv6_txoptions *opt;
newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, 0);
if (!newsk)
@@ -654,6 +663,13 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+ rcu_read_lock();
+ opt = rcu_dereference(np->opt);
+ if (opt)
+ opt = ipv6_dup_options(newsk, opt);
+ RCU_INIT_POINTER(newnp->opt, opt);
+ rcu_read_unlock();
+
/* Initialize sk's sport, dport, rcv_saddr and daddr for getsockname()
* and getpeername().
*/
diff --git a/net/sctp/output.c b/net/sctp/output.c
index abe7c2db2412..9d610eddd19e 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -534,7 +534,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
* by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>.
*/
if (!sctp_checksum_disable) {
- if (!(dst->dev->features & NETIF_F_SCTP_CSUM) ||
+ if (!(dst->dev->features & NETIF_F_SCTP_CRC) ||
(dst_xfrm(dst) != NULL) || packet->ipfragok) {
sh->checksum = sctp_compute_cksum(nskb, 0);
} else {
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 7e8f0a117106..c0380cfb16ae 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -324,6 +324,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) :
"illegal chunk");
+ sctp_chunk_hold(chunk);
sctp_outq_tail_data(q, chunk);
if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS);
@@ -1251,6 +1252,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
*/
sack_a_rwnd = ntohl(sack->a_rwnd);
+ asoc->peer.zero_window_announced = !sack_a_rwnd;
outstanding = q->outstanding_bytes;
if (outstanding < sack_a_rwnd)
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 0697eda5aed8..963dffcc2618 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -165,8 +165,6 @@ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_associa
list_for_each_entry_rcu(transport, &assoc->peer.transport_addr_list,
transports) {
addr = &transport->ipaddr;
- if (transport->dead)
- continue;
af = sctp_get_af_specific(addr->sa.sa_family);
if (af->cmp_addr(addr, primary)) {
@@ -281,88 +279,140 @@ void sctp_eps_proc_exit(struct net *net)
remove_proc_entry("eps", net->sctp.proc_net_sctp);
}
+struct sctp_ht_iter {
+ struct seq_net_private p;
+ struct rhashtable_iter hti;
+};
-static void *sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos)
+static struct sctp_transport *sctp_transport_get_next(struct seq_file *seq)
{
- if (*pos >= sctp_assoc_hashsize)
- return NULL;
+ struct sctp_ht_iter *iter = seq->private;
+ struct sctp_transport *t;
- if (*pos < 0)
- *pos = 0;
+ t = rhashtable_walk_next(&iter->hti);
+ for (; t; t = rhashtable_walk_next(&iter->hti)) {
+ if (IS_ERR(t)) {
+ if (PTR_ERR(t) == -EAGAIN)
+ continue;
+ break;
+ }
- if (*pos == 0)
- seq_printf(seq, " ASSOC SOCK STY SST ST HBKT "
- "ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT "
- "RPORT LADDRS <-> RADDRS "
- "HBINT INS OUTS MAXRT T1X T2X RTXC "
- "wmema wmemq sndbuf rcvbuf\n");
+ if (net_eq(sock_net(t->asoc->base.sk), seq_file_net(seq)) &&
+ t->asoc->peer.primary_path == t)
+ break;
+ }
- return (void *)pos;
+ return t;
}
-static void sctp_assocs_seq_stop(struct seq_file *seq, void *v)
+static struct sctp_transport *sctp_transport_get_idx(struct seq_file *seq,
+ loff_t pos)
{
+ void *obj = SEQ_START_TOKEN;
+
+ while (pos && (obj = sctp_transport_get_next(seq)) && !IS_ERR(obj))
+ pos--;
+
+ return obj;
}
+static int sctp_transport_walk_start(struct seq_file *seq)
+{
+ struct sctp_ht_iter *iter = seq->private;
+ int err;
+
+ err = rhashtable_walk_init(&sctp_transport_hashtable, &iter->hti);
+ if (err)
+ return err;
+
+ err = rhashtable_walk_start(&iter->hti);
+
+ return err == -EAGAIN ? 0 : err;
+}
+
+static void sctp_transport_walk_stop(struct seq_file *seq)
+{
+ struct sctp_ht_iter *iter = seq->private;
+
+ rhashtable_walk_stop(&iter->hti);
+ rhashtable_walk_exit(&iter->hti);
+}
+
+static void *sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ int err = sctp_transport_walk_start(seq);
+
+ if (err)
+ return ERR_PTR(err);
+
+ return sctp_transport_get_idx(seq, *pos);
+}
+
+static void sctp_assocs_seq_stop(struct seq_file *seq, void *v)
+{
+ sctp_transport_walk_stop(seq);
+}
static void *sctp_assocs_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- if (++*pos >= sctp_assoc_hashsize)
- return NULL;
+ ++*pos;
- return pos;
+ return sctp_transport_get_next(seq);
}
/* Display sctp associations (/proc/net/sctp/assocs). */
static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
{
- struct sctp_hashbucket *head;
- struct sctp_ep_common *epb;
+ struct sctp_transport *transport;
struct sctp_association *assoc;
+ struct sctp_ep_common *epb;
struct sock *sk;
- int hash = *(loff_t *)v;
-
- if (hash >= sctp_assoc_hashsize)
- return -ENOMEM;
- head = &sctp_assoc_hashtable[hash];
- local_bh_disable();
- read_lock(&head->lock);
- sctp_for_each_hentry(epb, &head->chain) {
- assoc = sctp_assoc(epb);
- sk = epb->sk;
- if (!net_eq(sock_net(sk), seq_file_net(seq)))
- continue;
- seq_printf(seq,
- "%8pK %8pK %-3d %-3d %-2d %-4d "
- "%4d %8d %8d %7u %5lu %-5d %5d ",
- assoc, sk, sctp_sk(sk)->type, sk->sk_state,
- assoc->state, hash,
- assoc->assoc_id,
- assoc->sndbuf_used,
- atomic_read(&assoc->rmem_alloc),
- from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
- sock_i_ino(sk),
- epb->bind_addr.port,
- assoc->peer.port);
- seq_printf(seq, " ");
- sctp_seq_dump_local_addrs(seq, epb);
- seq_printf(seq, "<-> ");
- sctp_seq_dump_remote_addrs(seq, assoc);
- seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d "
- "%8d %8d %8d %8d",
- assoc->hbinterval, assoc->c.sinit_max_instreams,
- assoc->c.sinit_num_ostreams, assoc->max_retrans,
- assoc->init_retries, assoc->shutdown_retries,
- assoc->rtx_data_chunks,
- atomic_read(&sk->sk_wmem_alloc),
- sk->sk_wmem_queued,
- sk->sk_sndbuf,
- sk->sk_rcvbuf);
- seq_printf(seq, "\n");
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(seq, " ASSOC SOCK STY SST ST HBKT "
+ "ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT "
+ "RPORT LADDRS <-> RADDRS "
+ "HBINT INS OUTS MAXRT T1X T2X RTXC "
+ "wmema wmemq sndbuf rcvbuf\n");
+ return 0;
}
- read_unlock(&head->lock);
- local_bh_enable();
+
+ transport = (struct sctp_transport *)v;
+ if (!sctp_transport_hold(transport))
+ return 0;
+ assoc = transport->asoc;
+ epb = &assoc->base;
+ sk = epb->sk;
+
+ seq_printf(seq,
+ "%8pK %8pK %-3d %-3d %-2d %-4d "
+ "%4d %8d %8d %7u %5lu %-5d %5d ",
+ assoc, sk, sctp_sk(sk)->type, sk->sk_state,
+ assoc->state, 0,
+ assoc->assoc_id,
+ assoc->sndbuf_used,
+ atomic_read(&assoc->rmem_alloc),
+ from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
+ sock_i_ino(sk),
+ epb->bind_addr.port,
+ assoc->peer.port);
+ seq_printf(seq, " ");
+ sctp_seq_dump_local_addrs(seq, epb);
+ seq_printf(seq, "<-> ");
+ sctp_seq_dump_remote_addrs(seq, assoc);
+ seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d "
+ "%8d %8d %8d %8d",
+ assoc->hbinterval, assoc->c.sinit_max_instreams,
+ assoc->c.sinit_num_ostreams, assoc->max_retrans,
+ assoc->init_retries, assoc->shutdown_retries,
+ assoc->rtx_data_chunks,
+ atomic_read(&sk->sk_wmem_alloc),
+ sk->sk_wmem_queued,
+ sk->sk_sndbuf,
+ sk->sk_rcvbuf);
+ seq_printf(seq, "\n");
+
+ sctp_transport_put(transport);
return 0;
}
@@ -378,7 +428,7 @@ static const struct seq_operations sctp_assoc_ops = {
static int sctp_assocs_seq_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &sctp_assoc_ops,
- sizeof(struct seq_net_private));
+ sizeof(struct sctp_ht_iter));
}
static const struct file_operations sctp_assocs_seq_fops = {
@@ -409,112 +459,96 @@ void sctp_assocs_proc_exit(struct net *net)
static void *sctp_remaddr_seq_start(struct seq_file *seq, loff_t *pos)
{
- if (*pos >= sctp_assoc_hashsize)
- return NULL;
+ int err = sctp_transport_walk_start(seq);
- if (*pos < 0)
- *pos = 0;
-
- if (*pos == 0)
- seq_printf(seq, "ADDR ASSOC_ID HB_ACT RTO MAX_PATH_RTX "
- "REM_ADDR_RTX START STATE\n");
+ if (err)
+ return ERR_PTR(err);
- return (void *)pos;
+ return sctp_transport_get_idx(seq, *pos);
}
static void *sctp_remaddr_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- if (++*pos >= sctp_assoc_hashsize)
- return NULL;
+ ++*pos;
- return pos;
+ return sctp_transport_get_next(seq);
}
static void sctp_remaddr_seq_stop(struct seq_file *seq, void *v)
{
+ sctp_transport_walk_stop(seq);
}
static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
{
- struct sctp_hashbucket *head;
- struct sctp_ep_common *epb;
struct sctp_association *assoc;
- struct sctp_transport *tsp;
- int hash = *(loff_t *)v;
+ struct sctp_transport *transport, *tsp;
- if (hash >= sctp_assoc_hashsize)
- return -ENOMEM;
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(seq, "ADDR ASSOC_ID HB_ACT RTO MAX_PATH_RTX "
+ "REM_ADDR_RTX START STATE\n");
+ return 0;
+ }
- head = &sctp_assoc_hashtable[hash];
- local_bh_disable();
- read_lock(&head->lock);
- rcu_read_lock();
- sctp_for_each_hentry(epb, &head->chain) {
- if (!net_eq(sock_net(epb->sk), seq_file_net(seq)))
- continue;
- assoc = sctp_assoc(epb);
- list_for_each_entry_rcu(tsp, &assoc->peer.transport_addr_list,
- transports) {
- if (tsp->dead)
- continue;
+ transport = (struct sctp_transport *)v;
+ if (!sctp_transport_hold(transport))
+ return 0;
+ assoc = transport->asoc;
+
+ list_for_each_entry_rcu(tsp, &assoc->peer.transport_addr_list,
+ transports) {
+ /*
+ * The remote address (ADDR)
+ */
+ tsp->af_specific->seq_dump_addr(seq, &tsp->ipaddr);
+ seq_printf(seq, " ");
+ /*
+ * The association ID (ASSOC_ID)
+ */
+ seq_printf(seq, "%d ", tsp->asoc->assoc_id);
+
+ /*
+ * If the Heartbeat is active (HB_ACT)
+ * Note: 1 = Active, 0 = Inactive
+ */
+ seq_printf(seq, "%d ", timer_pending(&tsp->hb_timer));
+
+ /*
+ * Retransmit time out (RTO)
+ */
+ seq_printf(seq, "%lu ", tsp->rto);
+
+ /*
+ * Maximum path retransmit count (PATH_MAX_RTX)
+ */
+ seq_printf(seq, "%d ", tsp->pathmaxrxt);
+
+ /*
+ * remote address retransmit count (REM_ADDR_RTX)
+ * Note: We don't have a way to tally this at the moment
+ * so lets just leave it as zero for the moment
+ */
+ seq_puts(seq, "0 ");
+
+ /*
+ * remote address start time (START). This is also not
+ * currently implemented, but we can record it with a
+ * jiffies marker in a subsequent patch
+ */
+ seq_puts(seq, "0 ");
+
+ /*
+ * The current state of this destination. I.e.
+ * SCTP_ACTIVE, SCTP_INACTIVE, ...
+ */
+ seq_printf(seq, "%d", tsp->state);
- /*
- * The remote address (ADDR)
- */
- tsp->af_specific->seq_dump_addr(seq, &tsp->ipaddr);
- seq_printf(seq, " ");
-
- /*
- * The association ID (ASSOC_ID)
- */
- seq_printf(seq, "%d ", tsp->asoc->assoc_id);
-
- /*
- * If the Heartbeat is active (HB_ACT)
- * Note: 1 = Active, 0 = Inactive
- */
- seq_printf(seq, "%d ", timer_pending(&tsp->hb_timer));
-
- /*
- * Retransmit time out (RTO)
- */
- seq_printf(seq, "%lu ", tsp->rto);
-
- /*
- * Maximum path retransmit count (PATH_MAX_RTX)
- */
- seq_printf(seq, "%d ", tsp->pathmaxrxt);
-
- /*
- * remote address retransmit count (REM_ADDR_RTX)
- * Note: We don't have a way to tally this at the moment
- * so lets just leave it as zero for the moment
- */
- seq_puts(seq, "0 ");
-
- /*
- * remote address start time (START). This is also not
- * currently implemented, but we can record it with a
- * jiffies marker in a subsequent patch
- */
- seq_puts(seq, "0 ");
-
- /*
- * The current state of this destination. I.e.
- * SCTP_ACTIVE, SCTP_INACTIVE, ...
- */
- seq_printf(seq, "%d", tsp->state);
-
- seq_printf(seq, "\n");
- }
+ seq_printf(seq, "\n");
}
- rcu_read_unlock();
- read_unlock(&head->lock);
- local_bh_enable();
+ sctp_transport_put(transport);
return 0;
-
}
static const struct seq_operations sctp_remaddr_ops = {
@@ -533,7 +567,7 @@ void sctp_remaddr_proc_exit(struct net *net)
static int sctp_remaddr_seq_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &sctp_remaddr_ops,
- sizeof(struct seq_net_private));
+ sizeof(struct sctp_ht_iter));
}
static const struct file_operations sctp_remaddr_seq_fops = {
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 3d9ea9a48289..1099e99a53c4 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -60,6 +60,8 @@
#include <net/inet_common.h>
#include <net/inet_ecn.h>
+#define MAX_SCTP_PORT_HASH_ENTRIES (64 * 1024)
+
/* Global data structures. */
struct sctp_globals sctp_globals __read_mostly;
@@ -1223,6 +1225,9 @@ static int __net_init sctp_defaults_init(struct net *net)
/* Max.Burst - 4 */
net->sctp.max_burst = SCTP_DEFAULT_MAX_BURST;
+ /* Enable pf state by default */
+ net->sctp.pf_enable = 1;
+
/* Association.Max.Retrans - 10 attempts
* Path.Max.Retrans - 5 attempts (per destination address)
* Max.Init.Retransmits - 8 attempts
@@ -1352,6 +1357,8 @@ static __init int sctp_init(void)
unsigned long limit;
int max_share;
int order;
+ int num_entries;
+ int max_entry_order;
sock_skb_cb_check_size(sizeof(struct sctp_ulpevent));
@@ -1404,32 +1411,24 @@ static __init int sctp_init(void)
/* Size and allocate the association hash table.
* The methodology is similar to that of the tcp hash tables.
+ * Though not identical. Start by getting a goal size
*/
if (totalram_pages >= (128 * 1024))
goal = totalram_pages >> (22 - PAGE_SHIFT);
else
goal = totalram_pages >> (24 - PAGE_SHIFT);
- for (order = 0; (1UL << order) < goal; order++)
- ;
+ /* Then compute the page order for said goal */
+ order = get_order(goal);
- do {
- sctp_assoc_hashsize = (1UL << order) * PAGE_SIZE /
- sizeof(struct sctp_hashbucket);
- if ((sctp_assoc_hashsize > (64 * 1024)) && order > 0)
- continue;
- sctp_assoc_hashtable = (struct sctp_hashbucket *)
- __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order);
- } while (!sctp_assoc_hashtable && --order > 0);
- if (!sctp_assoc_hashtable) {
- pr_err("Failed association hash alloc\n");
- status = -ENOMEM;
- goto err_ahash_alloc;
- }
- for (i = 0; i < sctp_assoc_hashsize; i++) {
- rwlock_init(&sctp_assoc_hashtable[i].lock);
- INIT_HLIST_HEAD(&sctp_assoc_hashtable[i].chain);
- }
+ /* Now compute the required page order for the maximum sized table we
+ * want to create
+ */
+ max_entry_order = get_order(MAX_SCTP_PORT_HASH_ENTRIES *
+ sizeof(struct sctp_bind_hashbucket));
+
+ /* Limit the page order by that maximum hash table size */
+ order = min(order, max_entry_order);
/* Allocate and initialize the endpoint hash table. */
sctp_ep_hashsize = 64;
@@ -1445,27 +1444,45 @@ static __init int sctp_init(void)
INIT_HLIST_HEAD(&sctp_ep_hashtable[i].chain);
}
- /* Allocate and initialize the SCTP port hash table. */
+ /* Allocate and initialize the SCTP port hash table.
+ * Note that order is initalized to start at the max sized
+ * table we want to support. If we can't get that many pages
+ * reduce the order and try again
+ */
do {
- sctp_port_hashsize = (1UL << order) * PAGE_SIZE /
- sizeof(struct sctp_bind_hashbucket);
- if ((sctp_port_hashsize > (64 * 1024)) && order > 0)
- continue;
sctp_port_hashtable = (struct sctp_bind_hashbucket *)
- __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order);
+ __get_free_pages(GFP_KERNEL | __GFP_NOWARN, order);
} while (!sctp_port_hashtable && --order > 0);
+
if (!sctp_port_hashtable) {
pr_err("Failed bind hash alloc\n");
status = -ENOMEM;
goto err_bhash_alloc;
}
+
+ /* Now compute the number of entries that will fit in the
+ * port hash space we allocated
+ */
+ num_entries = (1UL << order) * PAGE_SIZE /
+ sizeof(struct sctp_bind_hashbucket);
+
+ /* And finish by rounding it down to the nearest power of two
+ * this wastes some memory of course, but its needed because
+ * the hash function operates based on the assumption that
+ * that the number of entries is a power of two
+ */
+ sctp_port_hashsize = rounddown_pow_of_two(num_entries);
+
for (i = 0; i < sctp_port_hashsize; i++) {
spin_lock_init(&sctp_port_hashtable[i].lock);
INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain);
}
- pr_info("Hash tables configured (established %d bind %d)\n",
- sctp_assoc_hashsize, sctp_port_hashsize);
+ if (sctp_transport_hashtable_init())
+ goto err_thash_alloc;
+
+ pr_info("Hash tables configured (bind %d/%d)\n", sctp_port_hashsize,
+ num_entries);
sctp_sysctl_register();
@@ -1518,12 +1535,10 @@ err_register_defaults:
get_order(sctp_port_hashsize *
sizeof(struct sctp_bind_hashbucket)));
err_bhash_alloc:
+ sctp_transport_hashtable_destroy();
+err_thash_alloc:
kfree(sctp_ep_hashtable);
err_ehash_alloc:
- free_pages((unsigned long)sctp_assoc_hashtable,
- get_order(sctp_assoc_hashsize *
- sizeof(struct sctp_hashbucket)));
-err_ahash_alloc:
percpu_counter_destroy(&sctp_sockets_allocated);
err_percpu_counter_init:
kmem_cache_destroy(sctp_chunk_cachep);
@@ -1557,13 +1572,11 @@ static __exit void sctp_exit(void)
sctp_sysctl_unregister();
- free_pages((unsigned long)sctp_assoc_hashtable,
- get_order(sctp_assoc_hashsize *
- sizeof(struct sctp_hashbucket)));
- kfree(sctp_ep_hashtable);
free_pages((unsigned long)sctp_port_hashtable,
get_order(sctp_port_hashsize *
sizeof(struct sctp_bind_hashbucket)));
+ kfree(sctp_ep_hashtable);
+ sctp_transport_hashtable_destroy();
percpu_counter_destroy(&sctp_sockets_allocated);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 763e06a55155..1296e555fe29 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -45,6 +45,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <crypto/hash.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/ip.h>
@@ -52,7 +53,6 @@
#include <linux/net.h>
#include <linux/inet.h>
#include <linux/scatterlist.h>
-#include <linux/crypto.h>
#include <linux/slab.h>
#include <net/sock.h>
@@ -1606,7 +1606,6 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
{
sctp_cookie_param_t *retval;
struct sctp_signed_cookie *cookie;
- struct scatterlist sg;
int headersize, bodysize;
/* Header size is static data prior to the actual cookie, including
@@ -1652,7 +1651,7 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
/* Set an expiration time for the cookie. */
cookie->c.expiration = ktime_add(asoc->cookie_life,
- ktime_get());
+ ktime_get_real());
/* Copy the peer's init packet. */
memcpy(&cookie->c.peer_init[0], init_chunk->chunk_hdr,
@@ -1663,16 +1662,19 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
ntohs(init_chunk->chunk_hdr->length), raw_addrs, addrs_len);
if (sctp_sk(ep->base.sk)->hmac) {
- struct hash_desc desc;
+ SHASH_DESC_ON_STACK(desc, sctp_sk(ep->base.sk)->hmac);
+ int err;
/* Sign the message. */
- sg_init_one(&sg, &cookie->c, bodysize);
- desc.tfm = sctp_sk(ep->base.sk)->hmac;
- desc.flags = 0;
-
- if (crypto_hash_setkey(desc.tfm, ep->secret_key,
- sizeof(ep->secret_key)) ||
- crypto_hash_digest(&desc, &sg, bodysize, cookie->signature))
+ desc->tfm = sctp_sk(ep->base.sk)->hmac;
+ desc->flags = 0;
+
+ err = crypto_shash_setkey(desc->tfm, ep->secret_key,
+ sizeof(ep->secret_key)) ?:
+ crypto_shash_digest(desc, (u8 *)&cookie->c, bodysize,
+ cookie->signature);
+ shash_desc_zero(desc);
+ if (err)
goto free_cookie;
}
@@ -1697,12 +1699,10 @@ struct sctp_association *sctp_unpack_cookie(
struct sctp_cookie *bear_cookie;
int headersize, bodysize, fixed_size;
__u8 *digest = ep->digest;
- struct scatterlist sg;
unsigned int len;
sctp_scope_t scope;
struct sk_buff *skb = chunk->skb;
ktime_t kt;
- struct hash_desc desc;
/* Header size is static data prior to the actual cookie, including
* any padding.
@@ -1733,16 +1733,23 @@ struct sctp_association *sctp_unpack_cookie(
goto no_hmac;
/* Check the signature. */
- sg_init_one(&sg, bear_cookie, bodysize);
- desc.tfm = sctp_sk(ep->base.sk)->hmac;
- desc.flags = 0;
-
- memset(digest, 0x00, SCTP_SIGNATURE_SIZE);
- if (crypto_hash_setkey(desc.tfm, ep->secret_key,
- sizeof(ep->secret_key)) ||
- crypto_hash_digest(&desc, &sg, bodysize, digest)) {
- *error = -SCTP_IERROR_NOMEM;
- goto fail;
+ {
+ SHASH_DESC_ON_STACK(desc, sctp_sk(ep->base.sk)->hmac);
+ int err;
+
+ desc->tfm = sctp_sk(ep->base.sk)->hmac;
+ desc->flags = 0;
+
+ err = crypto_shash_setkey(desc->tfm, ep->secret_key,
+ sizeof(ep->secret_key)) ?:
+ crypto_shash_digest(desc, (u8 *)bear_cookie, bodysize,
+ digest);
+ shash_desc_zero(desc);
+
+ if (err) {
+ *error = -SCTP_IERROR_NOMEM;
+ goto fail;
+ }
}
if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) {
@@ -1780,7 +1787,7 @@ no_hmac:
if (sock_flag(ep->base.sk, SOCK_TIMESTAMP))
kt = skb_get_ktime(skb);
else
- kt = ktime_get();
+ kt = ktime_get_real();
if (!asoc && ktime_before(bear_cookie->expiration, kt)) {
/*
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 6098d4c42fa9..b5327bb77458 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -63,7 +63,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
sctp_state_t state,
struct sctp_endpoint *ep,
- struct sctp_association *asoc,
+ struct sctp_association **asoc,
void *event_arg,
sctp_disposition_t status,
sctp_cmd_seq_t *commands,
@@ -259,12 +259,6 @@ void sctp_generate_t3_rtx_event(unsigned long peer)
goto out_unlock;
}
- /* Is this transport really dead and just waiting around for
- * the timer to let go of the reference?
- */
- if (transport->dead)
- goto out_unlock;
-
/* Run through the state machine. */
error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_T3_RTX),
@@ -380,12 +374,6 @@ void sctp_generate_heartbeat_event(unsigned long data)
goto out_unlock;
}
- /* Is this structure just waiting around for us to actually
- * get destroyed?
- */
- if (transport->dead)
- goto out_unlock;
-
error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_HEARTBEAT),
asoc->state, asoc->ep, asoc,
@@ -477,6 +465,8 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands,
struct sctp_transport *transport,
int is_hb)
{
+ struct net *net = sock_net(asoc->base.sk);
+
/* The check for association's overall error counter exceeding the
* threshold is done in the state function.
*/
@@ -503,7 +493,8 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands,
* is SCTP_ACTIVE, then mark this transport as Partially Failed,
* see SCTP Quick Failover Draft, section 5.1
*/
- if ((transport->state == SCTP_ACTIVE) &&
+ if (net->sctp.pf_enable &&
+ (transport->state == SCTP_ACTIVE) &&
(asoc->pf_retrans < transport->pathmaxrxt) &&
(transport->error_count > asoc->pf_retrans)) {
@@ -863,7 +854,6 @@ static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds,
(!asoc->temp) && (sk->sk_shutdown != SHUTDOWN_MASK))
return;
- sctp_unhash_established(asoc);
sctp_association_free(asoc);
}
@@ -1123,7 +1113,7 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
debug_post_sfn();
error = sctp_side_effects(event_type, subtype, state,
- ep, asoc, event_arg, status,
+ ep, &asoc, event_arg, status,
&commands, gfp);
debug_post_sfx();
@@ -1136,7 +1126,7 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
sctp_state_t state,
struct sctp_endpoint *ep,
- struct sctp_association *asoc,
+ struct sctp_association **asoc,
void *event_arg,
sctp_disposition_t status,
sctp_cmd_seq_t *commands,
@@ -1151,7 +1141,7 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
* disposition SCTP_DISPOSITION_CONSUME.
*/
if (0 != (error = sctp_cmd_interpreter(event_type, subtype, state,
- ep, asoc,
+ ep, *asoc,
event_arg, status,
commands, gfp)))
goto bail;
@@ -1174,11 +1164,12 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
break;
case SCTP_DISPOSITION_DELETE_TCB:
+ case SCTP_DISPOSITION_ABORT:
/* This should now be a command. */
+ *asoc = NULL;
break;
case SCTP_DISPOSITION_CONSUME:
- case SCTP_DISPOSITION_ABORT:
/*
* We should no longer have much work to do here as the
* real work has been done as explicit commands above.
@@ -1266,7 +1257,6 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
asoc = cmd->obj.asoc;
BUG_ON(asoc->peer.primary_path == NULL);
sctp_endpoint_add_asoc(ep, asoc);
- sctp_hash_established(asoc);
break;
case SCTP_CMD_UPDATE_ASSOC:
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 6f46aa16cb76..f1f08c8f277b 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2976,7 +2976,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net,
SCTP_INC_STATS(net, SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
goto discard_force;
case SCTP_IERROR_NO_DATA:
- goto consume;
+ return SCTP_DISPOSITION_ABORT;
case SCTP_IERROR_PROTO_VIOLATION:
return sctp_sf_abort_violation(net, ep, asoc, chunk, commands,
(u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t));
@@ -3043,9 +3043,6 @@ discard_noforce:
sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, force);
return SCTP_DISPOSITION_DISCARD;
-consume:
- return SCTP_DISPOSITION_CONSUME;
-
}
/*
@@ -3093,7 +3090,7 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
case SCTP_IERROR_BAD_STREAM:
break;
case SCTP_IERROR_NO_DATA:
- goto consume;
+ return SCTP_DISPOSITION_ABORT;
case SCTP_IERROR_PROTO_VIOLATION:
return sctp_sf_abort_violation(net, ep, asoc, chunk, commands,
(u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t));
@@ -3119,7 +3116,6 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN));
}
-consume:
return SCTP_DISPOSITION_CONSUME;
}
@@ -4825,11 +4821,9 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
* if necessary to fill gaps.
*/
struct sctp_chunk *abort = arg;
- sctp_disposition_t retval;
- retval = SCTP_DISPOSITION_CONSUME;
-
- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+ if (abort)
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
/* Even if we can't send the ABORT due to low memory delete the
* TCB. This is a departure from our typical NOMEM handling.
@@ -4844,7 +4838,7 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
- return retval;
+ return SCTP_DISPOSITION_ABORT;
}
/* We tried an illegal operation on an association which is closed. */
@@ -4959,14 +4953,13 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
sctp_cmd_seq_t *commands)
{
struct sctp_chunk *abort = arg;
- sctp_disposition_t retval;
/* Stop T1-init timer */
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
- retval = SCTP_DISPOSITION_CONSUME;
- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+ if (abort)
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_CLOSED));
@@ -4983,7 +4976,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED,
SCTP_PERR(SCTP_ERROR_USER_ABORT));
- return retval;
+ return SCTP_DISPOSITION_ABORT;
}
/*
@@ -5412,7 +5405,8 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net,
SCTP_INC_STATS(net, SCTP_MIB_T3_RTX_EXPIREDS);
if (asoc->overall_error_count >= asoc->max_retrans) {
- if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING) {
+ if (asoc->peer.zero_window_announced &&
+ asoc->state == SCTP_STATE_SHUTDOWN_PENDING) {
/*
* We are here likely because the receiver had its rwnd
* closed for a while and we have not been able to
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 897c01c029ca..de8eabf03eed 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -52,6 +52,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <crypto/hash.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/wait.h>
@@ -61,7 +62,6 @@
#include <linux/fcntl.h>
#include <linux/poll.h>
#include <linux/init.h>
-#include <linux/crypto.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/compat.h>
@@ -972,7 +972,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
return -EFAULT;
/* Alloc space for the address array in kernel memory. */
- kaddrs = kmalloc(addrs_size, GFP_KERNEL);
+ kaddrs = kmalloc(addrs_size, GFP_USER | __GFP_NOWARN);
if (unlikely(!kaddrs))
return -ENOMEM;
@@ -1228,7 +1228,6 @@ out_free:
* To the hash table, try to unhash it, just in case, its a noop
* if it wasn't hashed so we're safe
*/
- sctp_unhash_established(asoc);
sctp_association_free(asoc);
}
return err;
@@ -1301,8 +1300,9 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
int addrs_size,
sctp_assoc_t *assoc_id)
{
- int err = 0;
struct sockaddr *kaddrs;
+ gfp_t gfp = GFP_KERNEL;
+ int err = 0;
pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n",
__func__, sk, addrs, addrs_size);
@@ -1315,7 +1315,9 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
return -EFAULT;
/* Alloc space for the address array in kernel memory. */
- kaddrs = kmalloc(addrs_size, GFP_KERNEL);
+ if (sk->sk_socket->file)
+ gfp = GFP_USER | __GFP_NOWARN;
+ kaddrs = kmalloc(addrs_size, gfp);
if (unlikely(!kaddrs))
return -ENOMEM;
@@ -1501,7 +1503,6 @@ static void sctp_close(struct sock *sk, long timeout)
* ABORT or SHUTDOWN based on the linger options.
*/
if (sctp_state(asoc, CLOSED)) {
- sctp_unhash_established(asoc);
sctp_association_free(asoc);
continue;
}
@@ -1513,8 +1514,7 @@ static void sctp_close(struct sock *sk, long timeout)
struct sctp_chunk *chunk;
chunk = sctp_make_abort_user(asoc, NULL, 0);
- if (chunk)
- sctp_primitive_ABORT(net, asoc, chunk);
+ sctp_primitive_ABORT(net, asoc, chunk);
} else
sctp_primitive_SHUTDOWN(net, asoc, NULL);
}
@@ -1952,8 +1952,6 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
/* Now send the (possibly) fragmented message. */
list_for_each_entry(chunk, &datamsg->chunks, frag_list) {
- sctp_chunk_hold(chunk);
-
/* Do accounting for the write space. */
sctp_set_owner_w(chunk);
@@ -1966,15 +1964,13 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
* breaks.
*/
err = sctp_primitive_SEND(net, asoc, datamsg);
+ sctp_datamsg_put(datamsg);
/* Did the lower layer accept the chunk? */
- if (err) {
- sctp_datamsg_free(datamsg);
+ if (err)
goto out_free;
- }
pr_debug("%s: we sent primitively\n", __func__);
- sctp_datamsg_put(datamsg);
err = msg_len;
if (unlikely(wait_connect)) {
@@ -1988,10 +1984,8 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
goto out_unlock;
out_free:
- if (new_asoc) {
- sctp_unhash_established(asoc);
+ if (new_asoc)
sctp_association_free(asoc);
- }
out_unlock:
release_sock(sk);
@@ -4166,7 +4160,7 @@ static void sctp_destruct_sock(struct sock *sk)
struct sctp_sock *sp = sctp_sk(sk);
/* Free up the HMAC transform. */
- crypto_free_hash(sp->hmac);
+ crypto_free_shash(sp->hmac);
inet_sock_destruct(sk);
}
@@ -4928,7 +4922,7 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
to = optval + offsetof(struct sctp_getaddrs, addrs);
space_left = len - offsetof(struct sctp_getaddrs, addrs);
- addrs = kmalloc(space_left, GFP_KERNEL);
+ addrs = kmalloc(space_left, GFP_USER | __GFP_NOWARN);
if (!addrs)
return -ENOMEM;
@@ -5544,6 +5538,7 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
struct sctp_hmac_algo_param *hmacs;
__u16 data_len = 0;
u32 num_idents;
+ int i;
if (!ep->auth_enable)
return -EACCES;
@@ -5561,8 +5556,12 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
return -EFAULT;
if (put_user(num_idents, &p->shmac_num_idents))
return -EFAULT;
- if (copy_to_user(p->shmac_idents, hmacs->hmac_ids, data_len))
- return -EFAULT;
+ for (i = 0; i < num_idents; i++) {
+ __u16 hmacid = ntohs(hmacs->hmac_ids[i]);
+
+ if (copy_to_user(&p->shmac_idents[i], &hmacid, sizeof(__u16)))
+ return -EFAULT;
+ }
return 0;
}
@@ -5777,7 +5776,7 @@ static int sctp_getsockopt_assoc_ids(struct sock *sk, int len,
len = sizeof(struct sctp_assoc_ids) + sizeof(sctp_assoc_t) * num;
- ids = kmalloc(len, GFP_KERNEL);
+ ids = kmalloc(len, GFP_USER | __GFP_NOWARN);
if (unlikely(!ids))
return -ENOMEM;
@@ -6305,13 +6304,13 @@ static int sctp_listen_start(struct sock *sk, int backlog)
{
struct sctp_sock *sp = sctp_sk(sk);
struct sctp_endpoint *ep = sp->ep;
- struct crypto_hash *tfm = NULL;
+ struct crypto_shash *tfm = NULL;
char alg[32];
/* Allocate HMAC for generating cookie. */
if (!sp->hmac && sp->sctp_hmac_alg) {
sprintf(alg, "hmac(%s)", sp->sctp_hmac_alg);
- tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
+ tfm = crypto_alloc_shash(alg, 0, 0);
if (IS_ERR(tfm)) {
net_info_ratelimited("failed to load transform for %s: %ld\n",
sp->sctp_hmac_alg, PTR_ERR(tfm));
@@ -6458,7 +6457,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
if (sctp_writeable(sk)) {
mask |= POLLOUT | POLLWRNORM;
} else {
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
/*
* Since the socket is not locked, the buffer
* might be made available after the writeable check and
@@ -6642,6 +6641,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
if (cmsgs->srinfo->sinfo_flags &
~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+ SCTP_SACK_IMMEDIATELY |
SCTP_ABORT | SCTP_EOF))
return -EINVAL;
break;
@@ -6665,6 +6665,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
if (cmsgs->sinfo->snd_flags &
~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+ SCTP_SACK_IMMEDIATELY |
SCTP_ABORT | SCTP_EOF))
return -EINVAL;
break;
@@ -6801,26 +6802,30 @@ no_packet:
static void __sctp_write_space(struct sctp_association *asoc)
{
struct sock *sk = asoc->base.sk;
- struct socket *sock = sk->sk_socket;
- if ((sctp_wspace(asoc) > 0) && sock) {
- if (waitqueue_active(&asoc->wait))
- wake_up_interruptible(&asoc->wait);
+ if (sctp_wspace(asoc) <= 0)
+ return;
+
+ if (waitqueue_active(&asoc->wait))
+ wake_up_interruptible(&asoc->wait);
- if (sctp_writeable(sk)) {
- wait_queue_head_t *wq = sk_sleep(sk);
+ if (sctp_writeable(sk)) {
+ struct socket_wq *wq;
- if (wq && waitqueue_active(wq))
- wake_up_interruptible(wq);
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ if (wq) {
+ if (waitqueue_active(&wq->wait))
+ wake_up_interruptible(&wq->wait);
/* Note that we try to include the Async I/O support
* here by modeling from the current TCP/UDP code.
* We have not tested with it yet.
*/
if (!(sk->sk_shutdown & SEND_SHUTDOWN))
- sock_wake_async(sock,
- SOCK_WAKE_SPACE, POLL_OUT);
+ sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
}
+ rcu_read_unlock();
}
}
@@ -6978,7 +6983,7 @@ void sctp_data_ready(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
- if (wq_has_sleeper(wq))
+ if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
POLLRDNORM | POLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
@@ -7163,6 +7168,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
newsk->sk_type = sk->sk_type;
newsk->sk_bound_dev_if = sk->sk_bound_dev_if;
newsk->sk_flags = sk->sk_flags;
+ newsk->sk_tsflags = sk->sk_tsflags;
newsk->sk_no_check_tx = sk->sk_no_check_tx;
newsk->sk_no_check_rx = sk->sk_no_check_rx;
newsk->sk_reuse = sk->sk_reuse;
@@ -7195,6 +7201,11 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
newinet->mc_ttl = 1;
newinet->mc_index = 0;
newinet->mc_list = NULL;
+
+ if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
+ net_enable_timestamp();
+
+ security_sk_clone(sk, newsk);
}
static inline void sctp_copy_descendant(struct sock *sk_to,
@@ -7375,6 +7386,13 @@ struct proto sctp_prot = {
#if IS_ENABLED(CONFIG_IPV6)
+#include <net/transp_v6.h>
+static void sctp_v6_destroy_sock(struct sock *sk)
+{
+ sctp_destroy_sock(sk);
+ inet6_destroy_sock(sk);
+}
+
struct proto sctpv6_prot = {
.name = "SCTPv6",
.owner = THIS_MODULE,
@@ -7384,7 +7402,7 @@ struct proto sctpv6_prot = {
.accept = sctp_accept,
.ioctl = sctp_ioctl,
.init = sctp_init_sock,
- .destroy = sctp_destroy_sock,
+ .destroy = sctp_v6_destroy_sock,
.shutdown = sctp_shutdown,
.setsockopt = sctp_setsockopt,
.getsockopt = sctp_getsockopt,
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 26d50c565f54..daf8554fd42a 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -308,6 +308,13 @@ static struct ctl_table sctp_net_table[] = {
.extra1 = &max_autoclose_min,
.extra2 = &max_autoclose_max,
},
+ {
+ .procname = "pf_enable",
+ .data = &init_net.sctp.pf_enable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
{ /* sentinel */ }
};
@@ -320,7 +327,7 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write,
struct ctl_table tbl;
bool changed = false;
char *none = "none";
- char tmp[8];
+ char tmp[8] = {0};
int ret;
memset(&tbl, 0, sizeof(struct ctl_table));
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index a0a431824f63..a431c14044a4 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -132,8 +132,6 @@ fail:
*/
void sctp_transport_free(struct sctp_transport *transport)
{
- transport->dead = 1;
-
/* Try to delete the heartbeat timer. */
if (del_timer(&transport->hb_timer))
sctp_transport_put(transport);
@@ -169,7 +167,7 @@ static void sctp_transport_destroy_rcu(struct rcu_head *head)
*/
static void sctp_transport_destroy(struct sctp_transport *transport)
{
- if (unlikely(!transport->dead)) {
+ if (unlikely(atomic_read(&transport->refcnt))) {
WARN(1, "Attempt to destroy undead transport %p!\n", transport);
return;
}
@@ -296,9 +294,9 @@ void sctp_transport_route(struct sctp_transport *transport,
}
/* Hold a reference to a transport. */
-void sctp_transport_hold(struct sctp_transport *transport)
+int sctp_transport_hold(struct sctp_transport *transport)
{
- atomic_inc(&transport->refcnt);
+ return atomic_add_unless(&transport->refcnt, 1, 0);
}
/* Release a reference to a transport and clean up
@@ -331,7 +329,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
* 1/8, rto_alpha would be expressed as 3.
*/
tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta)
- + (((__u32)abs64((__s64)tp->srtt - (__s64)rtt)) >> net->sctp.rto_beta);
+ + (((__u32)abs((__s64)tp->srtt - (__s64)rtt)) >> net->sctp.rto_beta);
tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha)
+ (rtt >> net->sctp.rto_alpha);
} else {
diff --git a/net/socket.c b/net/socket.c
index dd2c247c99e3..c044d1e8508c 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -257,6 +257,7 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
}
init_waitqueue_head(&wq->wait);
wq->fasync_list = NULL;
+ wq->flags = 0;
RCU_INIT_POINTER(ei->socket.wq, wq);
ei->socket.state = SS_UNCONNECTED;
@@ -293,7 +294,7 @@ static int init_inodecache(void)
0,
(SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT |
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD | SLAB_ACCOUNT),
init_once);
if (sock_inode_cachep == NULL)
return -ENOMEM;
@@ -1056,27 +1057,20 @@ static int sock_fasync(int fd, struct file *filp, int on)
return 0;
}
-/* This function may be called only under socket lock or callback_lock or rcu_lock */
+/* This function may be called only under rcu_lock */
-int sock_wake_async(struct socket *sock, int how, int band)
+int sock_wake_async(struct socket_wq *wq, int how, int band)
{
- struct socket_wq *wq;
-
- if (!sock)
- return -1;
- rcu_read_lock();
- wq = rcu_dereference(sock->wq);
- if (!wq || !wq->fasync_list) {
- rcu_read_unlock();
+ if (!wq || !wq->fasync_list)
return -1;
- }
+
switch (how) {
case SOCK_WAKE_WAITD:
- if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
+ if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
break;
goto call_kill;
case SOCK_WAKE_SPACE:
- if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
+ if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
break;
/* fall through */
case SOCK_WAKE_IO:
@@ -1086,7 +1080,7 @@ call_kill:
case SOCK_WAKE_URG:
kill_fasync(&wq->fasync_list, SIGURG, band);
}
- rcu_read_unlock();
+
return 0;
}
EXPORT_SYMBOL(sock_wake_async);
@@ -1702,6 +1696,7 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
/* We assume all kernel code knows the size of sockaddr_storage */
msg.msg_namelen = 0;
+ msg.msg_iocb = NULL;
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags);
@@ -2046,6 +2041,7 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
if (err)
break;
++datagrams;
+ cond_resched();
}
fput_light(sock->file, fput_needed);
@@ -2241,6 +2237,7 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
/* Out of band data, return right away */
if (msg_sys.msg_flags & MSG_OOB)
break;
+ cond_resched();
}
out_put:
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index dace13d7638e..cabf586f47d7 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -740,7 +740,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
default:
printk(KERN_CRIT "%s: bad return from "
"gss_fill_context: %zd\n", __func__, err);
- BUG();
+ gss_msg->msg.errno = -EIO;
}
goto err_release_msg;
}
@@ -1411,17 +1411,16 @@ gss_key_timeout(struct rpc_cred *rc)
{
struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
struct gss_cl_ctx *ctx;
- unsigned long now = jiffies;
- unsigned long expire;
+ unsigned long timeout = jiffies + (gss_key_expire_timeo * HZ);
+ int ret = 0;
rcu_read_lock();
ctx = rcu_dereference(gss_cred->gc_ctx);
- if (ctx)
- expire = ctx->gc_expiry - (gss_key_expire_timeo * HZ);
+ if (!ctx || time_after(timeout, ctx->gc_expiry))
+ ret = -EACCES;
rcu_read_unlock();
- if (!ctx || time_after(now, expire))
- return -EACCES;
- return 0;
+
+ return ret;
}
static int
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index fee3c15a4b52..d94a8e1e9f05 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -34,11 +34,12 @@
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
+#include <crypto/hash.h>
+#include <crypto/skcipher.h>
#include <linux/err.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/scatterlist.h>
-#include <linux/crypto.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/random.h>
@@ -51,7 +52,7 @@
u32
krb5_encrypt(
- struct crypto_blkcipher *tfm,
+ struct crypto_skcipher *tfm,
void * iv,
void * in,
void * out,
@@ -60,24 +61,28 @@ krb5_encrypt(
u32 ret = -EINVAL;
struct scatterlist sg[1];
u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
- struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
+ SKCIPHER_REQUEST_ON_STACK(req, tfm);
- if (length % crypto_blkcipher_blocksize(tfm) != 0)
+ if (length % crypto_skcipher_blocksize(tfm) != 0)
goto out;
- if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
+ if (crypto_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
dprintk("RPC: gss_k5encrypt: tfm iv size too large %d\n",
- crypto_blkcipher_ivsize(tfm));
+ crypto_skcipher_ivsize(tfm));
goto out;
}
if (iv)
- memcpy(local_iv, iv, crypto_blkcipher_ivsize(tfm));
+ memcpy(local_iv, iv, crypto_skcipher_ivsize(tfm));
memcpy(out, in, length);
sg_init_one(sg, out, length);
- ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, length);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, sg, sg, length, local_iv);
+
+ ret = crypto_skcipher_encrypt(req);
+ skcipher_request_zero(req);
out:
dprintk("RPC: krb5_encrypt returns %d\n", ret);
return ret;
@@ -85,7 +90,7 @@ out:
u32
krb5_decrypt(
- struct crypto_blkcipher *tfm,
+ struct crypto_skcipher *tfm,
void * iv,
void * in,
void * out,
@@ -94,23 +99,27 @@ krb5_decrypt(
u32 ret = -EINVAL;
struct scatterlist sg[1];
u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
- struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
+ SKCIPHER_REQUEST_ON_STACK(req, tfm);
- if (length % crypto_blkcipher_blocksize(tfm) != 0)
+ if (length % crypto_skcipher_blocksize(tfm) != 0)
goto out;
- if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
+ if (crypto_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
dprintk("RPC: gss_k5decrypt: tfm iv size too large %d\n",
- crypto_blkcipher_ivsize(tfm));
+ crypto_skcipher_ivsize(tfm));
goto out;
}
if (iv)
- memcpy(local_iv,iv, crypto_blkcipher_ivsize(tfm));
+ memcpy(local_iv,iv, crypto_skcipher_ivsize(tfm));
memcpy(out, in, length);
sg_init_one(sg, out, length);
- ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, length);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, sg, sg, length, local_iv);
+
+ ret = crypto_skcipher_decrypt(req);
+ skcipher_request_zero(req);
out:
dprintk("RPC: gss_k5decrypt returns %d\n",ret);
return ret;
@@ -119,9 +128,11 @@ out:
static int
checksummer(struct scatterlist *sg, void *data)
{
- struct hash_desc *desc = data;
+ struct ahash_request *req = data;
+
+ ahash_request_set_crypt(req, sg, NULL, sg->length);
- return crypto_hash_update(desc, sg, sg->length);
+ return crypto_ahash_update(req);
}
static int
@@ -152,13 +163,13 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
struct xdr_buf *body, int body_offset, u8 *cksumkey,
unsigned int usage, struct xdr_netobj *cksumout)
{
- struct hash_desc desc;
struct scatterlist sg[1];
int err;
u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
u8 rc4salt[4];
- struct crypto_hash *md5;
- struct crypto_hash *hmac_md5;
+ struct crypto_ahash *md5;
+ struct crypto_ahash *hmac_md5;
+ struct ahash_request *req;
if (cksumkey == NULL)
return GSS_S_FAILURE;
@@ -174,61 +185,79 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
return GSS_S_FAILURE;
}
- md5 = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+ md5 = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(md5))
return GSS_S_FAILURE;
- hmac_md5 = crypto_alloc_hash(kctx->gk5e->cksum_name, 0,
- CRYPTO_ALG_ASYNC);
+ hmac_md5 = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0,
+ CRYPTO_ALG_ASYNC);
if (IS_ERR(hmac_md5)) {
- crypto_free_hash(md5);
+ crypto_free_ahash(md5);
+ return GSS_S_FAILURE;
+ }
+
+ req = ahash_request_alloc(md5, GFP_KERNEL);
+ if (!req) {
+ crypto_free_ahash(hmac_md5);
+ crypto_free_ahash(md5);
return GSS_S_FAILURE;
}
- desc.tfm = md5;
- desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+ ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
- err = crypto_hash_init(&desc);
+ err = crypto_ahash_init(req);
if (err)
goto out;
sg_init_one(sg, rc4salt, 4);
- err = crypto_hash_update(&desc, sg, 4);
+ ahash_request_set_crypt(req, sg, NULL, 4);
+ err = crypto_ahash_update(req);
if (err)
goto out;
sg_init_one(sg, header, hdrlen);
- err = crypto_hash_update(&desc, sg, hdrlen);
+ ahash_request_set_crypt(req, sg, NULL, hdrlen);
+ err = crypto_ahash_update(req);
if (err)
goto out;
err = xdr_process_buf(body, body_offset, body->len - body_offset,
- checksummer, &desc);
+ checksummer, req);
if (err)
goto out;
- err = crypto_hash_final(&desc, checksumdata);
+ ahash_request_set_crypt(req, NULL, checksumdata, 0);
+ err = crypto_ahash_final(req);
if (err)
goto out;
- desc.tfm = hmac_md5;
- desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+ ahash_request_free(req);
+ req = ahash_request_alloc(hmac_md5, GFP_KERNEL);
+ if (!req) {
+ crypto_free_ahash(hmac_md5);
+ crypto_free_ahash(md5);
+ return GSS_S_FAILURE;
+ }
+
+ ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
- err = crypto_hash_init(&desc);
+ err = crypto_ahash_init(req);
if (err)
goto out;
- err = crypto_hash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength);
+ err = crypto_ahash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength);
if (err)
goto out;
- sg_init_one(sg, checksumdata, crypto_hash_digestsize(md5));
- err = crypto_hash_digest(&desc, sg, crypto_hash_digestsize(md5),
- checksumdata);
+ sg_init_one(sg, checksumdata, crypto_ahash_digestsize(md5));
+ ahash_request_set_crypt(req, sg, checksumdata,
+ crypto_ahash_digestsize(md5));
+ err = crypto_ahash_digest(req);
if (err)
goto out;
memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
cksumout->len = kctx->gk5e->cksumlength;
out:
- crypto_free_hash(md5);
- crypto_free_hash(hmac_md5);
+ ahash_request_free(req);
+ crypto_free_ahash(md5);
+ crypto_free_ahash(hmac_md5);
return err ? GSS_S_FAILURE : 0;
}
@@ -242,7 +271,8 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
struct xdr_buf *body, int body_offset, u8 *cksumkey,
unsigned int usage, struct xdr_netobj *cksumout)
{
- struct hash_desc desc;
+ struct crypto_ahash *tfm;
+ struct ahash_request *req;
struct scatterlist sg[1];
int err;
u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
@@ -259,32 +289,41 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
return GSS_S_FAILURE;
}
- desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(desc.tfm))
+ tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(tfm))
return GSS_S_FAILURE;
- desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
- checksumlen = crypto_hash_digestsize(desc.tfm);
+ req = ahash_request_alloc(tfm, GFP_KERNEL);
+ if (!req) {
+ crypto_free_ahash(tfm);
+ return GSS_S_FAILURE;
+ }
+
+ ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
+
+ checksumlen = crypto_ahash_digestsize(tfm);
if (cksumkey != NULL) {
- err = crypto_hash_setkey(desc.tfm, cksumkey,
- kctx->gk5e->keylength);
+ err = crypto_ahash_setkey(tfm, cksumkey,
+ kctx->gk5e->keylength);
if (err)
goto out;
}
- err = crypto_hash_init(&desc);
+ err = crypto_ahash_init(req);
if (err)
goto out;
sg_init_one(sg, header, hdrlen);
- err = crypto_hash_update(&desc, sg, hdrlen);
+ ahash_request_set_crypt(req, sg, NULL, hdrlen);
+ err = crypto_ahash_update(req);
if (err)
goto out;
err = xdr_process_buf(body, body_offset, body->len - body_offset,
- checksummer, &desc);
+ checksummer, req);
if (err)
goto out;
- err = crypto_hash_final(&desc, checksumdata);
+ ahash_request_set_crypt(req, NULL, checksumdata, 0);
+ err = crypto_ahash_final(req);
if (err)
goto out;
@@ -307,7 +346,8 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
}
cksumout->len = kctx->gk5e->cksumlength;
out:
- crypto_free_hash(desc.tfm);
+ ahash_request_free(req);
+ crypto_free_ahash(tfm);
return err ? GSS_S_FAILURE : 0;
}
@@ -323,7 +363,8 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
struct xdr_buf *body, int body_offset, u8 *cksumkey,
unsigned int usage, struct xdr_netobj *cksumout)
{
- struct hash_desc desc;
+ struct crypto_ahash *tfm;
+ struct ahash_request *req;
struct scatterlist sg[1];
int err;
u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
@@ -340,31 +381,39 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
return GSS_S_FAILURE;
}
- desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0,
- CRYPTO_ALG_ASYNC);
- if (IS_ERR(desc.tfm))
+ tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(tfm))
return GSS_S_FAILURE;
- checksumlen = crypto_hash_digestsize(desc.tfm);
- desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+ checksumlen = crypto_ahash_digestsize(tfm);
+
+ req = ahash_request_alloc(tfm, GFP_KERNEL);
+ if (!req) {
+ crypto_free_ahash(tfm);
+ return GSS_S_FAILURE;
+ }
- err = crypto_hash_setkey(desc.tfm, cksumkey, kctx->gk5e->keylength);
+ ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
+
+ err = crypto_ahash_setkey(tfm, cksumkey, kctx->gk5e->keylength);
if (err)
goto out;
- err = crypto_hash_init(&desc);
+ err = crypto_ahash_init(req);
if (err)
goto out;
err = xdr_process_buf(body, body_offset, body->len - body_offset,
- checksummer, &desc);
+ checksummer, req);
if (err)
goto out;
if (header != NULL) {
sg_init_one(sg, header, hdrlen);
- err = crypto_hash_update(&desc, sg, hdrlen);
+ ahash_request_set_crypt(req, sg, NULL, hdrlen);
+ err = crypto_ahash_update(req);
if (err)
goto out;
}
- err = crypto_hash_final(&desc, checksumdata);
+ ahash_request_set_crypt(req, NULL, checksumdata, 0);
+ err = crypto_ahash_final(req);
if (err)
goto out;
@@ -381,13 +430,14 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
break;
}
out:
- crypto_free_hash(desc.tfm);
+ ahash_request_free(req);
+ crypto_free_ahash(tfm);
return err ? GSS_S_FAILURE : 0;
}
struct encryptor_desc {
u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
- struct blkcipher_desc desc;
+ struct skcipher_request *req;
int pos;
struct xdr_buf *outbuf;
struct page **pages;
@@ -402,6 +452,7 @@ encryptor(struct scatterlist *sg, void *data)
{
struct encryptor_desc *desc = data;
struct xdr_buf *outbuf = desc->outbuf;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(desc->req);
struct page *in_page;
int thislen = desc->fraglen + sg->length;
int fraglen, ret;
@@ -427,7 +478,7 @@ encryptor(struct scatterlist *sg, void *data)
desc->fraglen += sg->length;
desc->pos += sg->length;
- fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1);
+ fraglen = thislen & (crypto_skcipher_blocksize(tfm) - 1);
thislen -= fraglen;
if (thislen == 0)
@@ -436,8 +487,10 @@ encryptor(struct scatterlist *sg, void *data)
sg_mark_end(&desc->infrags[desc->fragno - 1]);
sg_mark_end(&desc->outfrags[desc->fragno - 1]);
- ret = crypto_blkcipher_encrypt_iv(&desc->desc, desc->outfrags,
- desc->infrags, thislen);
+ skcipher_request_set_crypt(desc->req, desc->infrags, desc->outfrags,
+ thislen, desc->iv);
+
+ ret = crypto_skcipher_encrypt(desc->req);
if (ret)
return ret;
@@ -459,18 +512,20 @@ encryptor(struct scatterlist *sg, void *data)
}
int
-gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
+gss_encrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *buf,
int offset, struct page **pages)
{
int ret;
struct encryptor_desc desc;
+ SKCIPHER_REQUEST_ON_STACK(req, tfm);
+
+ BUG_ON((buf->len - offset) % crypto_skcipher_blocksize(tfm) != 0);
- BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0);
+ skcipher_request_set_tfm(req, tfm);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
memset(desc.iv, 0, sizeof(desc.iv));
- desc.desc.tfm = tfm;
- desc.desc.info = desc.iv;
- desc.desc.flags = 0;
+ desc.req = req;
desc.pos = offset;
desc.outbuf = buf;
desc.pages = pages;
@@ -481,12 +536,13 @@ gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
sg_init_table(desc.outfrags, 4);
ret = xdr_process_buf(buf, offset, buf->len - offset, encryptor, &desc);
+ skcipher_request_zero(req);
return ret;
}
struct decryptor_desc {
u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
- struct blkcipher_desc desc;
+ struct skcipher_request *req;
struct scatterlist frags[4];
int fragno;
int fraglen;
@@ -497,6 +553,7 @@ decryptor(struct scatterlist *sg, void *data)
{
struct decryptor_desc *desc = data;
int thislen = desc->fraglen + sg->length;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(desc->req);
int fraglen, ret;
/* Worst case is 4 fragments: head, end of page 1, start
@@ -507,7 +564,7 @@ decryptor(struct scatterlist *sg, void *data)
desc->fragno++;
desc->fraglen += sg->length;
- fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1);
+ fraglen = thislen & (crypto_skcipher_blocksize(tfm) - 1);
thislen -= fraglen;
if (thislen == 0)
@@ -515,8 +572,10 @@ decryptor(struct scatterlist *sg, void *data)
sg_mark_end(&desc->frags[desc->fragno - 1]);
- ret = crypto_blkcipher_decrypt_iv(&desc->desc, desc->frags,
- desc->frags, thislen);
+ skcipher_request_set_crypt(desc->req, desc->frags, desc->frags,
+ thislen, desc->iv);
+
+ ret = crypto_skcipher_decrypt(desc->req);
if (ret)
return ret;
@@ -535,24 +594,29 @@ decryptor(struct scatterlist *sg, void *data)
}
int
-gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
+gss_decrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *buf,
int offset)
{
+ int ret;
struct decryptor_desc desc;
+ SKCIPHER_REQUEST_ON_STACK(req, tfm);
/* XXXJBF: */
- BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0);
+ BUG_ON((buf->len - offset) % crypto_skcipher_blocksize(tfm) != 0);
+
+ skcipher_request_set_tfm(req, tfm);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
memset(desc.iv, 0, sizeof(desc.iv));
- desc.desc.tfm = tfm;
- desc.desc.info = desc.iv;
- desc.desc.flags = 0;
+ desc.req = req;
desc.fragno = 0;
desc.fraglen = 0;
sg_init_table(desc.frags, 4);
- return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
+ ret = xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
+ skcipher_request_zero(req);
+ return ret;
}
/*
@@ -594,12 +658,12 @@ xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen)
}
static u32
-gss_krb5_cts_crypt(struct crypto_blkcipher *cipher, struct xdr_buf *buf,
+gss_krb5_cts_crypt(struct crypto_skcipher *cipher, struct xdr_buf *buf,
u32 offset, u8 *iv, struct page **pages, int encrypt)
{
u32 ret;
struct scatterlist sg[1];
- struct blkcipher_desc desc = { .tfm = cipher, .info = iv };
+ SKCIPHER_REQUEST_ON_STACK(req, cipher);
u8 data[GSS_KRB5_MAX_BLOCKSIZE * 2];
struct page **save_pages;
u32 len = buf->len - offset;
@@ -625,10 +689,16 @@ gss_krb5_cts_crypt(struct crypto_blkcipher *cipher, struct xdr_buf *buf,
sg_init_one(sg, data, len);
+ skcipher_request_set_tfm(req, cipher);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, sg, sg, len, iv);
+
if (encrypt)
- ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, len);
+ ret = crypto_skcipher_encrypt(req);
else
- ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, len);
+ ret = crypto_skcipher_decrypt(req);
+
+ skcipher_request_zero(req);
if (ret)
goto out;
@@ -647,7 +717,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
struct xdr_netobj hmac;
u8 *cksumkey;
u8 *ecptr;
- struct crypto_blkcipher *cipher, *aux_cipher;
+ struct crypto_skcipher *cipher, *aux_cipher;
int blocksize;
struct page **save_pages;
int nblocks, nbytes;
@@ -666,7 +736,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
cksumkey = kctx->acceptor_integ;
usage = KG_USAGE_ACCEPTOR_SEAL;
}
- blocksize = crypto_blkcipher_blocksize(cipher);
+ blocksize = crypto_skcipher_blocksize(cipher);
/* hide the gss token header and insert the confounder */
offset += GSS_KRB5_TOK_HDR_LEN;
@@ -719,20 +789,24 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
memset(desc.iv, 0, sizeof(desc.iv));
if (cbcbytes) {
+ SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
+
desc.pos = offset + GSS_KRB5_TOK_HDR_LEN;
desc.fragno = 0;
desc.fraglen = 0;
desc.pages = pages;
desc.outbuf = buf;
- desc.desc.info = desc.iv;
- desc.desc.flags = 0;
- desc.desc.tfm = aux_cipher;
+ desc.req = req;
+
+ skcipher_request_set_tfm(req, aux_cipher);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
sg_init_table(desc.infrags, 4);
sg_init_table(desc.outfrags, 4);
err = xdr_process_buf(buf, offset + GSS_KRB5_TOK_HDR_LEN,
cbcbytes, encryptor, &desc);
+ skcipher_request_zero(req);
if (err)
goto out_err;
}
@@ -763,7 +837,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
struct xdr_buf subbuf;
u32 ret = 0;
u8 *cksum_key;
- struct crypto_blkcipher *cipher, *aux_cipher;
+ struct crypto_skcipher *cipher, *aux_cipher;
struct xdr_netobj our_hmac_obj;
u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN];
u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN];
@@ -782,7 +856,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
cksum_key = kctx->initiator_integ;
usage = KG_USAGE_INITIATOR_SEAL;
}
- blocksize = crypto_blkcipher_blocksize(cipher);
+ blocksize = crypto_skcipher_blocksize(cipher);
/* create a segment skipping the header and leaving out the checksum */
@@ -799,15 +873,19 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
memset(desc.iv, 0, sizeof(desc.iv));
if (cbcbytes) {
+ SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
+
desc.fragno = 0;
desc.fraglen = 0;
- desc.desc.info = desc.iv;
- desc.desc.flags = 0;
- desc.desc.tfm = aux_cipher;
+ desc.req = req;
+
+ skcipher_request_set_tfm(req, aux_cipher);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
sg_init_table(desc.frags, 4);
ret = xdr_process_buf(&subbuf, 0, cbcbytes, decryptor, &desc);
+ skcipher_request_zero(req);
if (ret)
goto out_err;
}
@@ -850,61 +928,62 @@ out_err:
* Set the key of the given cipher.
*/
int
-krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
+krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
unsigned char *cksum)
{
- struct crypto_hash *hmac;
- struct hash_desc desc;
- struct scatterlist sg[1];
+ struct crypto_shash *hmac;
+ struct shash_desc *desc;
u8 Kseq[GSS_KRB5_MAX_KEYLEN];
u32 zeroconstant = 0;
int err;
dprintk("%s: entered\n", __func__);
- hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+ hmac = crypto_alloc_shash(kctx->gk5e->cksum_name, 0, 0);
if (IS_ERR(hmac)) {
dprintk("%s: error %ld, allocating hash '%s'\n",
__func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
return PTR_ERR(hmac);
}
- desc.tfm = hmac;
- desc.flags = 0;
+ desc = kmalloc(sizeof(*desc), GFP_KERNEL);
+ if (!desc) {
+ dprintk("%s: failed to allocate shash descriptor for '%s'\n",
+ __func__, kctx->gk5e->cksum_name);
+ crypto_free_shash(hmac);
+ return -ENOMEM;
+ }
- err = crypto_hash_init(&desc);
- if (err)
- goto out_err;
+ desc->tfm = hmac;
+ desc->flags = 0;
/* Compute intermediate Kseq from session key */
- err = crypto_hash_setkey(hmac, kctx->Ksess, kctx->gk5e->keylength);
+ err = crypto_shash_setkey(hmac, kctx->Ksess, kctx->gk5e->keylength);
if (err)
goto out_err;
- sg_init_one(sg, &zeroconstant, 4);
- err = crypto_hash_digest(&desc, sg, 4, Kseq);
+ err = crypto_shash_digest(desc, (u8 *)&zeroconstant, 4, Kseq);
if (err)
goto out_err;
/* Compute final Kseq from the checksum and intermediate Kseq */
- err = crypto_hash_setkey(hmac, Kseq, kctx->gk5e->keylength);
+ err = crypto_shash_setkey(hmac, Kseq, kctx->gk5e->keylength);
if (err)
goto out_err;
- sg_set_buf(sg, cksum, 8);
-
- err = crypto_hash_digest(&desc, sg, 8, Kseq);
+ err = crypto_shash_digest(desc, cksum, 8, Kseq);
if (err)
goto out_err;
- err = crypto_blkcipher_setkey(cipher, Kseq, kctx->gk5e->keylength);
+ err = crypto_skcipher_setkey(cipher, Kseq, kctx->gk5e->keylength);
if (err)
goto out_err;
err = 0;
out_err:
- crypto_free_hash(hmac);
+ kzfree(desc);
+ crypto_free_shash(hmac);
dprintk("%s: returning %d\n", __func__, err);
return err;
}
@@ -914,12 +993,11 @@ out_err:
* Set the key of cipher kctx->enc.
*/
int
-krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
+krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
s32 seqnum)
{
- struct crypto_hash *hmac;
- struct hash_desc desc;
- struct scatterlist sg[1];
+ struct crypto_shash *hmac;
+ struct shash_desc *desc;
u8 Kcrypt[GSS_KRB5_MAX_KEYLEN];
u8 zeroconstant[4] = {0};
u8 seqnumarray[4];
@@ -927,35 +1005,38 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
dprintk("%s: entered, seqnum %u\n", __func__, seqnum);
- hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+ hmac = crypto_alloc_shash(kctx->gk5e->cksum_name, 0, 0);
if (IS_ERR(hmac)) {
dprintk("%s: error %ld, allocating hash '%s'\n",
__func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
return PTR_ERR(hmac);
}
- desc.tfm = hmac;
- desc.flags = 0;
+ desc = kmalloc(sizeof(*desc), GFP_KERNEL);
+ if (!desc) {
+ dprintk("%s: failed to allocate shash descriptor for '%s'\n",
+ __func__, kctx->gk5e->cksum_name);
+ crypto_free_shash(hmac);
+ return -ENOMEM;
+ }
- err = crypto_hash_init(&desc);
- if (err)
- goto out_err;
+ desc->tfm = hmac;
+ desc->flags = 0;
/* Compute intermediate Kcrypt from session key */
for (i = 0; i < kctx->gk5e->keylength; i++)
Kcrypt[i] = kctx->Ksess[i] ^ 0xf0;
- err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
+ err = crypto_shash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
if (err)
goto out_err;
- sg_init_one(sg, zeroconstant, 4);
- err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
+ err = crypto_shash_digest(desc, zeroconstant, 4, Kcrypt);
if (err)
goto out_err;
/* Compute final Kcrypt from the seqnum and intermediate Kcrypt */
- err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
+ err = crypto_shash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
if (err)
goto out_err;
@@ -964,20 +1045,19 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
seqnumarray[2] = (unsigned char) ((seqnum >> 8) & 0xff);
seqnumarray[3] = (unsigned char) ((seqnum >> 0) & 0xff);
- sg_set_buf(sg, seqnumarray, 4);
-
- err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
+ err = crypto_shash_digest(desc, seqnumarray, 4, Kcrypt);
if (err)
goto out_err;
- err = crypto_blkcipher_setkey(cipher, Kcrypt, kctx->gk5e->keylength);
+ err = crypto_skcipher_setkey(cipher, Kcrypt, kctx->gk5e->keylength);
if (err)
goto out_err;
err = 0;
out_err:
- crypto_free_hash(hmac);
+ kzfree(desc);
+ crypto_free_shash(hmac);
dprintk("%s: returning %d\n", __func__, err);
return err;
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 234fa8d0fd9b..870133146026 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -54,9 +54,9 @@
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
+#include <crypto/skcipher.h>
#include <linux/err.h>
#include <linux/types.h>
-#include <linux/crypto.h>
#include <linux/sunrpc/gss_krb5.h>
#include <linux/sunrpc/xdr.h>
#include <linux/lcm.h>
@@ -147,7 +147,7 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
size_t blocksize, keybytes, keylength, n;
unsigned char *inblockdata, *outblockdata, *rawkey;
struct xdr_netobj inblock, outblock;
- struct crypto_blkcipher *cipher;
+ struct crypto_skcipher *cipher;
u32 ret = EINVAL;
blocksize = gk5e->blocksize;
@@ -157,11 +157,11 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
if ((inkey->len != keylength) || (outkey->len != keylength))
goto err_return;
- cipher = crypto_alloc_blkcipher(gk5e->encrypt_name, 0,
- CRYPTO_ALG_ASYNC);
+ cipher = crypto_alloc_skcipher(gk5e->encrypt_name, 0,
+ CRYPTO_ALG_ASYNC);
if (IS_ERR(cipher))
goto err_return;
- if (crypto_blkcipher_setkey(cipher, inkey->data, inkey->len))
+ if (crypto_skcipher_setkey(cipher, inkey->data, inkey->len))
goto err_return;
/* allocate and set up buffers */
@@ -238,7 +238,7 @@ err_free_in:
memset(inblockdata, 0, blocksize);
kfree(inblockdata);
err_free_cipher:
- crypto_free_blkcipher(cipher);
+ crypto_free_skcipher(cipher);
err_return:
return ret;
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 28db442a0034..71341ccb9890 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -34,6 +34,8 @@
*
*/
+#include <crypto/hash.h>
+#include <crypto/skcipher.h>
#include <linux/err.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -42,7 +44,6 @@
#include <linux/sunrpc/auth.h>
#include <linux/sunrpc/gss_krb5.h>
#include <linux/sunrpc/xdr.h>
-#include <linux/crypto.h>
#include <linux/sunrpc/gss_krb5_enctypes.h>
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -217,7 +218,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
static inline const void *
get_key(const void *p, const void *end,
- struct krb5_ctx *ctx, struct crypto_blkcipher **res)
+ struct krb5_ctx *ctx, struct crypto_skcipher **res)
{
struct xdr_netobj key;
int alg;
@@ -245,7 +246,7 @@ get_key(const void *p, const void *end,
if (IS_ERR(p))
goto out_err;
- *res = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
+ *res = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
CRYPTO_ALG_ASYNC);
if (IS_ERR(*res)) {
printk(KERN_WARNING "gss_kerberos_mech: unable to initialize "
@@ -253,7 +254,7 @@ get_key(const void *p, const void *end,
*res = NULL;
goto out_err_free_key;
}
- if (crypto_blkcipher_setkey(*res, key.data, key.len)) {
+ if (crypto_skcipher_setkey(*res, key.data, key.len)) {
printk(KERN_WARNING "gss_kerberos_mech: error setting key for "
"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
goto out_err_free_tfm;
@@ -263,7 +264,7 @@ get_key(const void *p, const void *end,
return p;
out_err_free_tfm:
- crypto_free_blkcipher(*res);
+ crypto_free_skcipher(*res);
out_err_free_key:
kfree(key.data);
p = ERR_PTR(-EINVAL);
@@ -335,30 +336,30 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
return 0;
out_err_free_key2:
- crypto_free_blkcipher(ctx->seq);
+ crypto_free_skcipher(ctx->seq);
out_err_free_key1:
- crypto_free_blkcipher(ctx->enc);
+ crypto_free_skcipher(ctx->enc);
out_err_free_mech:
kfree(ctx->mech_used.data);
out_err:
return PTR_ERR(p);
}
-static struct crypto_blkcipher *
+static struct crypto_skcipher *
context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key)
{
- struct crypto_blkcipher *cp;
+ struct crypto_skcipher *cp;
- cp = crypto_alloc_blkcipher(cname, 0, CRYPTO_ALG_ASYNC);
+ cp = crypto_alloc_skcipher(cname, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(cp)) {
dprintk("gss_kerberos_mech: unable to initialize "
"crypto algorithm %s\n", cname);
return NULL;
}
- if (crypto_blkcipher_setkey(cp, key, ctx->gk5e->keylength)) {
+ if (crypto_skcipher_setkey(cp, key, ctx->gk5e->keylength)) {
dprintk("gss_kerberos_mech: error setting key for "
"crypto algorithm %s\n", cname);
- crypto_free_blkcipher(cp);
+ crypto_free_skcipher(cp);
return NULL;
}
return cp;
@@ -412,9 +413,9 @@ context_derive_keys_des3(struct krb5_ctx *ctx, gfp_t gfp_mask)
return 0;
out_free_enc:
- crypto_free_blkcipher(ctx->enc);
+ crypto_free_skcipher(ctx->enc);
out_free_seq:
- crypto_free_blkcipher(ctx->seq);
+ crypto_free_skcipher(ctx->seq);
out_err:
return -EINVAL;
}
@@ -427,18 +428,17 @@ out_err:
static int
context_derive_keys_rc4(struct krb5_ctx *ctx)
{
- struct crypto_hash *hmac;
+ struct crypto_shash *hmac;
char sigkeyconstant[] = "signaturekey";
int slen = strlen(sigkeyconstant) + 1; /* include null terminator */
- struct hash_desc desc;
- struct scatterlist sg[1];
+ struct shash_desc *desc;
int err;
dprintk("RPC: %s: entered\n", __func__);
/*
* derive cksum (aka Ksign) key
*/
- hmac = crypto_alloc_hash(ctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+ hmac = crypto_alloc_shash(ctx->gk5e->cksum_name, 0, 0);
if (IS_ERR(hmac)) {
dprintk("%s: error %ld allocating hash '%s'\n",
__func__, PTR_ERR(hmac), ctx->gk5e->cksum_name);
@@ -446,37 +446,40 @@ context_derive_keys_rc4(struct krb5_ctx *ctx)
goto out_err;
}
- err = crypto_hash_setkey(hmac, ctx->Ksess, ctx->gk5e->keylength);
+ err = crypto_shash_setkey(hmac, ctx->Ksess, ctx->gk5e->keylength);
if (err)
goto out_err_free_hmac;
- sg_init_table(sg, 1);
- sg_set_buf(sg, sigkeyconstant, slen);
- desc.tfm = hmac;
- desc.flags = 0;
-
- err = crypto_hash_init(&desc);
- if (err)
+ desc = kmalloc(sizeof(*desc), GFP_KERNEL);
+ if (!desc) {
+ dprintk("%s: failed to allocate hash descriptor for '%s'\n",
+ __func__, ctx->gk5e->cksum_name);
+ err = -ENOMEM;
goto out_err_free_hmac;
+ }
+
+ desc->tfm = hmac;
+ desc->flags = 0;
- err = crypto_hash_digest(&desc, sg, slen, ctx->cksum);
+ err = crypto_shash_digest(desc, sigkeyconstant, slen, ctx->cksum);
+ kzfree(desc);
if (err)
goto out_err_free_hmac;
/*
- * allocate hash, and blkciphers for data and seqnum encryption
+ * allocate hash, and skciphers for data and seqnum encryption
*/
- ctx->enc = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
- CRYPTO_ALG_ASYNC);
+ ctx->enc = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
+ CRYPTO_ALG_ASYNC);
if (IS_ERR(ctx->enc)) {
err = PTR_ERR(ctx->enc);
goto out_err_free_hmac;
}
- ctx->seq = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
- CRYPTO_ALG_ASYNC);
+ ctx->seq = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
+ CRYPTO_ALG_ASYNC);
if (IS_ERR(ctx->seq)) {
- crypto_free_blkcipher(ctx->enc);
+ crypto_free_skcipher(ctx->enc);
err = PTR_ERR(ctx->seq);
goto out_err_free_hmac;
}
@@ -486,7 +489,7 @@ context_derive_keys_rc4(struct krb5_ctx *ctx)
err = 0;
out_err_free_hmac:
- crypto_free_hash(hmac);
+ crypto_free_shash(hmac);
out_err:
dprintk("RPC: %s: returning %d\n", __func__, err);
return err;
@@ -588,7 +591,7 @@ context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
context_v2_alloc_cipher(ctx, "cbc(aes)",
ctx->acceptor_seal);
if (ctx->acceptor_enc_aux == NULL) {
- crypto_free_blkcipher(ctx->initiator_enc_aux);
+ crypto_free_skcipher(ctx->initiator_enc_aux);
goto out_free_acceptor_enc;
}
}
@@ -596,9 +599,9 @@ context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
return 0;
out_free_acceptor_enc:
- crypto_free_blkcipher(ctx->acceptor_enc);
+ crypto_free_skcipher(ctx->acceptor_enc);
out_free_initiator_enc:
- crypto_free_blkcipher(ctx->initiator_enc);
+ crypto_free_skcipher(ctx->initiator_enc);
out_err:
return -EINVAL;
}
@@ -710,12 +713,12 @@ static void
gss_delete_sec_context_kerberos(void *internal_ctx) {
struct krb5_ctx *kctx = internal_ctx;
- crypto_free_blkcipher(kctx->seq);
- crypto_free_blkcipher(kctx->enc);
- crypto_free_blkcipher(kctx->acceptor_enc);
- crypto_free_blkcipher(kctx->initiator_enc);
- crypto_free_blkcipher(kctx->acceptor_enc_aux);
- crypto_free_blkcipher(kctx->initiator_enc_aux);
+ crypto_free_skcipher(kctx->seq);
+ crypto_free_skcipher(kctx->enc);
+ crypto_free_skcipher(kctx->acceptor_enc);
+ crypto_free_skcipher(kctx->initiator_enc);
+ crypto_free_skcipher(kctx->acceptor_enc_aux);
+ crypto_free_skcipher(kctx->initiator_enc_aux);
kfree(kctx->mech_used.data);
kfree(kctx);
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index 20d55c793eb6..c8b9082f4a9d 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -31,9 +31,9 @@
* PERFORMANCE OF THIS SOFTWARE.
*/
+#include <crypto/skcipher.h>
#include <linux/types.h>
#include <linux/sunrpc/gss_krb5.h>
-#include <linux/crypto.h>
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
@@ -43,13 +43,13 @@ static s32
krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
unsigned char *cksum, unsigned char *buf)
{
- struct crypto_blkcipher *cipher;
+ struct crypto_skcipher *cipher;
unsigned char plain[8];
s32 code;
dprintk("RPC: %s:\n", __func__);
- cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
- CRYPTO_ALG_ASYNC);
+ cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
+ CRYPTO_ALG_ASYNC);
if (IS_ERR(cipher))
return PTR_ERR(cipher);
@@ -68,12 +68,12 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
code = krb5_encrypt(cipher, cksum, plain, buf, 8);
out:
- crypto_free_blkcipher(cipher);
+ crypto_free_skcipher(cipher);
return code;
}
s32
krb5_make_seq_num(struct krb5_ctx *kctx,
- struct crypto_blkcipher *key,
+ struct crypto_skcipher *key,
int direction,
u32 seqnum,
unsigned char *cksum, unsigned char *buf)
@@ -101,13 +101,13 @@ static s32
krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
unsigned char *buf, int *direction, s32 *seqnum)
{
- struct crypto_blkcipher *cipher;
+ struct crypto_skcipher *cipher;
unsigned char plain[8];
s32 code;
dprintk("RPC: %s:\n", __func__);
- cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
- CRYPTO_ALG_ASYNC);
+ cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
+ CRYPTO_ALG_ASYNC);
if (IS_ERR(cipher))
return PTR_ERR(cipher);
@@ -130,7 +130,7 @@ krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
*seqnum = ((plain[0] << 24) | (plain[1] << 16) |
(plain[2] << 8) | (plain[3]));
out:
- crypto_free_blkcipher(cipher);
+ crypto_free_skcipher(cipher);
return code;
}
@@ -142,7 +142,7 @@ krb5_get_seq_num(struct krb5_ctx *kctx,
{
s32 code;
unsigned char plain[8];
- struct crypto_blkcipher *key = kctx->seq;
+ struct crypto_skcipher *key = kctx->seq;
dprintk("RPC: krb5_get_seq_num:\n");
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index ca7e92a32f84..765088e4ad84 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -28,12 +28,12 @@
* SUCH DAMAGES.
*/
+#include <crypto/skcipher.h>
#include <linux/types.h>
#include <linux/jiffies.h>
#include <linux/sunrpc/gss_krb5.h>
#include <linux/random.h>
#include <linux/pagemap.h>
-#include <linux/crypto.h>
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
@@ -174,7 +174,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
now = get_seconds();
- blocksize = crypto_blkcipher_blocksize(kctx->enc);
+ blocksize = crypto_skcipher_blocksize(kctx->enc);
gss_krb5_add_padding(buf, offset, blocksize);
BUG_ON((buf->len - offset) % blocksize);
plainlen = conflen + buf->len - offset;
@@ -239,10 +239,10 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
return GSS_S_FAILURE;
if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
- struct crypto_blkcipher *cipher;
+ struct crypto_skcipher *cipher;
int err;
- cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
- CRYPTO_ALG_ASYNC);
+ cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
+ CRYPTO_ALG_ASYNC);
if (IS_ERR(cipher))
return GSS_S_FAILURE;
@@ -250,7 +250,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
err = gss_encrypt_xdr_buf(cipher, buf,
offset + headlen - conflen, pages);
- crypto_free_blkcipher(cipher);
+ crypto_free_skcipher(cipher);
if (err)
return GSS_S_FAILURE;
} else {
@@ -327,18 +327,18 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
return GSS_S_BAD_SIG;
if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
- struct crypto_blkcipher *cipher;
+ struct crypto_skcipher *cipher;
int err;
- cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
- CRYPTO_ALG_ASYNC);
+ cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
+ CRYPTO_ALG_ASYNC);
if (IS_ERR(cipher))
return GSS_S_FAILURE;
krb5_rc4_setup_enc_key(kctx, cipher, seqnum);
err = gss_decrypt_xdr_buf(cipher, buf, crypt_offset);
- crypto_free_blkcipher(cipher);
+ crypto_free_skcipher(cipher);
if (err)
return GSS_S_DEFECTIVE_TOKEN;
} else {
@@ -371,7 +371,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
/* Copy the data back to the right position. XXX: Would probably be
* better to copy and encrypt at the same time. */
- blocksize = crypto_blkcipher_blocksize(kctx->enc);
+ blocksize = crypto_skcipher_blocksize(kctx->enc);
data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
conflen;
orig_start = buf->head[0].iov_base + offset;
@@ -473,7 +473,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
*ptr++ = 0xff;
be16ptr = (__be16 *)ptr;
- blocksize = crypto_blkcipher_blocksize(kctx->acceptor_enc);
+ blocksize = crypto_skcipher_blocksize(kctx->acceptor_enc);
*be16ptr++ = 0;
/* "inner" token header always uses 0 for RRC */
*be16ptr++ = 0;
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index 59eeed43eda2..f0c6a8c78a56 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -326,6 +326,9 @@ int gssp_accept_sec_context_upcall(struct net *net,
if (data->found_creds && client_name.data != NULL) {
char *c;
+ data->creds.cr_raw_principal = kstrndup(client_name.data,
+ client_name.len, GFP_KERNEL);
+
data->creds.cr_principal = kstrndup(client_name.data,
client_name.len, GFP_KERNEL);
if (data->creds.cr_principal) {
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 6255d141133b..229956bf8457 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -138,6 +138,14 @@ out_free:
*/
int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
{
+ if (!xprt->ops->bc_setup)
+ return 0;
+ return xprt->ops->bc_setup(xprt, min_reqs);
+}
+EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
+
+int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs)
+{
struct rpc_rqst *req;
struct list_head tmp_list;
int i;
@@ -192,7 +200,6 @@ out_free:
dprintk("RPC: setup backchannel transport failed\n");
return -ENOMEM;
}
-EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
/**
* xprt_destroy_backchannel - Destroys the backchannel preallocated structures.
@@ -205,6 +212,13 @@ EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
*/
void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs)
{
+ if (xprt->ops->bc_destroy)
+ xprt->ops->bc_destroy(xprt, max_reqs);
+}
+EXPORT_SYMBOL_GPL(xprt_destroy_backchannel);
+
+void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs)
+{
struct rpc_rqst *req = NULL, *tmp = NULL;
dprintk("RPC: destroy backchannel transport\n");
@@ -227,7 +241,6 @@ out:
dprintk("RPC: backchannel list empty= %s\n",
list_empty(&xprt->bc_pa_list) ? "true" : "false");
}
-EXPORT_SYMBOL_GPL(xprt_destroy_backchannel);
static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
{
@@ -264,6 +277,13 @@ void xprt_free_bc_request(struct rpc_rqst *req)
{
struct rpc_xprt *xprt = req->rq_xprt;
+ xprt->ops->bc_free_rqst(req);
+}
+
+void xprt_free_bc_rqst(struct rpc_rqst *req)
+{
+ struct rpc_xprt *xprt = req->rq_xprt;
+
dprintk("RPC: free backchannel req=%p\n", req);
req->rq_connect_cookie = xprt->connect_cookie - 1;
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 4a2340a54401..273bc3a35425 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -41,13 +41,16 @@
static bool cache_defer_req(struct cache_req *req, struct cache_head *item);
static void cache_revisit_request(struct cache_head *item);
-static void cache_init(struct cache_head *h)
+static void cache_init(struct cache_head *h, struct cache_detail *detail)
{
time_t now = seconds_since_boot();
INIT_HLIST_NODE(&h->cache_list);
h->flags = 0;
kref_init(&h->ref);
h->expiry_time = now + CACHE_NEW_EXPIRY;
+ if (now <= detail->flush_time)
+ /* ensure it isn't already expired */
+ now = detail->flush_time + 1;
h->last_refresh = now;
}
@@ -81,7 +84,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
* we might get lose if we need to
* cache_put it soon.
*/
- cache_init(new);
+ cache_init(new, detail);
detail->init(new, key);
write_lock(&detail->hash_lock);
@@ -116,10 +119,15 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
-static void cache_fresh_locked(struct cache_head *head, time_t expiry)
+static void cache_fresh_locked(struct cache_head *head, time_t expiry,
+ struct cache_detail *detail)
{
+ time_t now = seconds_since_boot();
+ if (now <= detail->flush_time)
+ /* ensure it isn't immediately treated as expired */
+ now = detail->flush_time + 1;
head->expiry_time = expiry;
- head->last_refresh = seconds_since_boot();
+ head->last_refresh = now;
smp_wmb(); /* paired with smp_rmb() in cache_is_valid() */
set_bit(CACHE_VALID, &head->flags);
}
@@ -149,7 +157,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
set_bit(CACHE_NEGATIVE, &old->flags);
else
detail->update(old, new);
- cache_fresh_locked(old, new->expiry_time);
+ cache_fresh_locked(old, new->expiry_time, detail);
write_unlock(&detail->hash_lock);
cache_fresh_unlocked(old, detail);
return old;
@@ -162,7 +170,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
cache_put(old, detail);
return NULL;
}
- cache_init(tmp);
+ cache_init(tmp, detail);
detail->init(tmp, old);
write_lock(&detail->hash_lock);
@@ -173,8 +181,8 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]);
detail->entries++;
cache_get(tmp);
- cache_fresh_locked(tmp, new->expiry_time);
- cache_fresh_locked(old, 0);
+ cache_fresh_locked(tmp, new->expiry_time, detail);
+ cache_fresh_locked(old, 0, detail);
write_unlock(&detail->hash_lock);
cache_fresh_unlocked(tmp, detail);
cache_fresh_unlocked(old, detail);
@@ -219,7 +227,8 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
rv = cache_is_valid(h);
if (rv == -EAGAIN) {
set_bit(CACHE_NEGATIVE, &h->flags);
- cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
+ cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY,
+ detail);
rv = -ENOENT;
}
write_unlock(&detail->hash_lock);
@@ -487,10 +496,13 @@ EXPORT_SYMBOL_GPL(cache_flush);
void cache_purge(struct cache_detail *detail)
{
- detail->flush_time = LONG_MAX;
+ time_t now = seconds_since_boot();
+ if (detail->flush_time >= now)
+ now = detail->flush_time + 1;
+ /* 'now' is the maximum value any 'last_refresh' can have */
+ detail->flush_time = now;
detail->nextcheck = seconds_since_boot();
cache_flush();
- detail->flush_time = 1;
}
EXPORT_SYMBOL_GPL(cache_purge);
@@ -759,7 +771,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
if (count == 0)
return 0;
- mutex_lock(&inode->i_mutex); /* protect against multiple concurrent
+ inode_lock(inode); /* protect against multiple concurrent
* readers on this file */
again:
spin_lock(&queue_lock);
@@ -772,7 +784,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
}
if (rp->q.list.next == &cd->queue) {
spin_unlock(&queue_lock);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
WARN_ON_ONCE(rp->offset);
return 0;
}
@@ -826,7 +838,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
}
if (err == -EAGAIN)
goto again;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err ? err : count;
}
@@ -897,9 +909,9 @@ static ssize_t cache_write(struct file *filp, const char __user *buf,
if (!cd->cache_parse)
goto out;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = cache_downcall(mapping, buf, count, cd);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
out:
return ret;
}
@@ -1213,7 +1225,7 @@ int qword_get(char **bpp, char *dest, int bufsize)
if (bp[0] == '\\' && bp[1] == 'x') {
/* HEX STRING */
bp += 2;
- while (len < bufsize) {
+ while (len < bufsize - 1) {
int h, l;
h = hex_to_bin(bp[0]);
@@ -1436,6 +1448,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
{
char tbuf[20];
char *bp, *ep;
+ time_t then, now;
if (*ppos || count > sizeof(tbuf)-1)
return -EINVAL;
@@ -1447,8 +1460,22 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
return -EINVAL;
bp = tbuf;
- cd->flush_time = get_expiry(&bp);
- cd->nextcheck = seconds_since_boot();
+ then = get_expiry(&bp);
+ now = seconds_since_boot();
+ cd->nextcheck = now;
+ /* Can only set flush_time to 1 second beyond "now", or
+ * possibly 1 second beyond flushtime. This is because
+ * flush_time never goes backwards so it mustn't get too far
+ * ahead of time.
+ */
+ if (then >= now) {
+ /* Want to flush everything, so behave like cache_purge() */
+ if (cd->flush_time >= now)
+ now = cd->flush_time + 1;
+ then = now;
+ }
+
+ cd->flush_time = then;
cache_flush();
*ppos += count;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 23608eb0ded2..b7f21044f4d8 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1217,6 +1217,7 @@ static int rpc_anyaddr(int family, struct sockaddr *buf, size_t buflen)
return -EINVAL;
memcpy(buf, &rpc_in6addr_loopback,
sizeof(rpc_in6addr_loopback));
+ break;
default:
dprintk("RPC: %s: address family not supported\n",
__func__);
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index d81186d34558..31789ef3e614 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -172,7 +172,7 @@ rpc_close_pipes(struct inode *inode)
int need_release;
LIST_HEAD(free_list);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
spin_lock(&pipe->lock);
need_release = pipe->nreaders != 0 || pipe->nwriters != 0;
pipe->nreaders = 0;
@@ -188,7 +188,7 @@ rpc_close_pipes(struct inode *inode)
cancel_delayed_work_sync(&pipe->queue_timeout);
rpc_inode_setowner(inode, NULL);
RPC_I(inode)->pipe = NULL;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
static struct inode *
@@ -221,7 +221,7 @@ rpc_pipe_open(struct inode *inode, struct file *filp)
int first_open;
int res = -ENXIO;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
pipe = RPC_I(inode)->pipe;
if (pipe == NULL)
goto out;
@@ -237,7 +237,7 @@ rpc_pipe_open(struct inode *inode, struct file *filp)
pipe->nwriters++;
res = 0;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return res;
}
@@ -248,7 +248,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
struct rpc_pipe_msg *msg;
int last_close;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
pipe = RPC_I(inode)->pipe;
if (pipe == NULL)
goto out;
@@ -278,7 +278,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
if (last_close && pipe->ops->release_pipe)
pipe->ops->release_pipe(inode);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return 0;
}
@@ -290,7 +290,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
struct rpc_pipe_msg *msg;
int res = 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
pipe = RPC_I(inode)->pipe;
if (pipe == NULL) {
res = -EPIPE;
@@ -322,7 +322,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
pipe->ops->destroy_msg(msg);
}
out_unlock:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return res;
}
@@ -332,11 +332,11 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of
struct inode *inode = file_inode(filp);
int res;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
res = -EPIPE;
if (RPC_I(inode)->pipe != NULL)
res = RPC_I(inode)->pipe->ops->downcall(filp, buf, len);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return res;
}
@@ -349,12 +349,12 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
poll_wait(filp, &rpci->waitq, wait);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (rpci->pipe == NULL)
mask |= POLLERR | POLLHUP;
else if (filp->private_data || !list_empty(&rpci->pipe->pipe))
mask |= POLLIN | POLLRDNORM;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return mask;
}
@@ -367,10 +367,10 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
switch (cmd) {
case FIONREAD:
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
pipe = RPC_I(inode)->pipe;
if (pipe == NULL) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return -EPIPE;
}
spin_lock(&pipe->lock);
@@ -381,7 +381,7 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
len += msg->len - msg->copied;
}
spin_unlock(&pipe->lock);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return put_user(len, (int __user *)arg);
default:
return -EINVAL;
@@ -617,9 +617,9 @@ int rpc_rmdir(struct dentry *dentry)
parent = dget_parent(dentry);
dir = d_inode(parent);
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
error = __rpc_rmdir(dir, dentry);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
dput(parent);
return error;
}
@@ -701,9 +701,9 @@ static void rpc_depopulate(struct dentry *parent,
{
struct inode *dir = d_inode(parent);
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(dir, I_MUTEX_CHILD);
__rpc_depopulate(parent, files, start, eof);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
}
static int rpc_populate(struct dentry *parent,
@@ -715,7 +715,7 @@ static int rpc_populate(struct dentry *parent,
struct dentry *dentry;
int i, err;
- mutex_lock(&dir->i_mutex);
+ inode_lock(dir);
for (i = start; i < eof; i++) {
dentry = __rpc_lookup_create_exclusive(parent, files[i].name);
err = PTR_ERR(dentry);
@@ -739,11 +739,11 @@ static int rpc_populate(struct dentry *parent,
if (err != 0)
goto out_bad;
}
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
return 0;
out_bad:
__rpc_depopulate(parent, files, start, eof);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
printk(KERN_WARNING "%s: %s failed to populate directory %pd\n",
__FILE__, __func__, parent);
return err;
@@ -757,7 +757,7 @@ static struct dentry *rpc_mkdir_populate(struct dentry *parent,
struct inode *dir = d_inode(parent);
int error;
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
dentry = __rpc_lookup_create_exclusive(parent, name);
if (IS_ERR(dentry))
goto out;
@@ -770,7 +770,7 @@ static struct dentry *rpc_mkdir_populate(struct dentry *parent,
goto err_rmdir;
}
out:
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
return dentry;
err_rmdir:
__rpc_rmdir(dir, dentry);
@@ -788,11 +788,11 @@ static int rpc_rmdir_depopulate(struct dentry *dentry,
parent = dget_parent(dentry);
dir = d_inode(parent);
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
if (depopulate != NULL)
depopulate(dentry);
error = __rpc_rmdir(dir, dentry);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
dput(parent);
return error;
}
@@ -828,7 +828,7 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
if (pipe->ops->downcall == NULL)
umode &= ~S_IWUGO;
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
dentry = __rpc_lookup_create_exclusive(parent, name);
if (IS_ERR(dentry))
goto out;
@@ -837,7 +837,7 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
if (err)
goto out_err;
out:
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
return dentry;
out_err:
dentry = ERR_PTR(err);
@@ -865,9 +865,9 @@ rpc_unlink(struct dentry *dentry)
parent = dget_parent(dentry);
dir = d_inode(parent);
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
error = __rpc_rmpipe(dir, dentry);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
dput(parent);
return error;
}
@@ -1500,7 +1500,7 @@ int register_rpc_pipefs(void)
rpc_inode_cachep = kmem_cache_create("rpc_inode_cache",
sizeof(struct rpc_inode),
0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
if (!rpc_inode_cachep)
return -ENOMEM;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index f14f24ee9983..73ad57a59989 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -250,11 +250,11 @@ void rpc_destroy_wait_queue(struct rpc_wait_queue *queue)
}
EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);
-static int rpc_wait_bit_killable(struct wait_bit_key *key)
+static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode)
{
- if (fatal_signal_pending(current))
- return -ERESTARTSYS;
freezable_schedule_unsafe();
+ if (signal_pending_state(mode, current))
+ return -ERESTARTSYS;
return 0;
}
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index a8f579df14d8..cc9852897395 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1364,14 +1364,22 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
memcpy(&rqstp->rq_arg, &req->rq_rcv_buf, sizeof(rqstp->rq_arg));
memcpy(&rqstp->rq_res, &req->rq_snd_buf, sizeof(rqstp->rq_res));
+ /* Adjust the argument buffer length */
+ rqstp->rq_arg.len = req->rq_private_buf.len;
+ if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) {
+ rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len;
+ rqstp->rq_arg.page_len = 0;
+ } else if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len +
+ rqstp->rq_arg.page_len)
+ rqstp->rq_arg.page_len = rqstp->rq_arg.len -
+ rqstp->rq_arg.head[0].iov_len;
+ else
+ rqstp->rq_arg.len = rqstp->rq_arg.head[0].iov_len +
+ rqstp->rq_arg.page_len;
+
/* reset result send buffer "put" position */
resv->iov_len = 0;
- if (rqstp->rq_prot != IPPROTO_TCP) {
- printk(KERN_ERR "No support for Non-TCP transports!\n");
- BUG();
- }
-
/*
* Skip the next two words because they've already been
* processed in the transport
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index a6cbb2104667..7422f28818b2 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -10,11 +10,13 @@
#include <linux/kthread.h>
#include <linux/slab.h>
#include <net/sock.h>
+#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/svc_xprt.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/xprt.h>
#include <linux/module.h>
+#include <linux/netdevice.h>
#include <trace/events/sunrpc.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
@@ -938,6 +940,49 @@ static void svc_age_temp_xprts(unsigned long closure)
mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
}
+/* Close temporary transports whose xpt_local matches server_addr immediately
+ * instead of waiting for them to be picked up by the timer.
+ *
+ * This is meant to be called from a notifier_block that runs when an ip
+ * address is deleted.
+ */
+void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr)
+{
+ struct svc_xprt *xprt;
+ struct svc_sock *svsk;
+ struct socket *sock;
+ struct list_head *le, *next;
+ LIST_HEAD(to_be_closed);
+ struct linger no_linger = {
+ .l_onoff = 1,
+ .l_linger = 0,
+ };
+
+ spin_lock_bh(&serv->sv_lock);
+ list_for_each_safe(le, next, &serv->sv_tempsocks) {
+ xprt = list_entry(le, struct svc_xprt, xpt_list);
+ if (rpc_cmp_addr(server_addr, (struct sockaddr *)
+ &xprt->xpt_local)) {
+ dprintk("svc_age_temp_xprts_now: found %p\n", xprt);
+ list_move(le, &to_be_closed);
+ }
+ }
+ spin_unlock_bh(&serv->sv_lock);
+
+ while (!list_empty(&to_be_closed)) {
+ le = to_be_closed.next;
+ list_del_init(le);
+ xprt = list_entry(le, struct svc_xprt, xpt_list);
+ dprintk("svc_age_temp_xprts_now: closing %p\n", xprt);
+ svsk = container_of(xprt, struct svc_sock, sk_xprt);
+ sock = svsk->sk_sock;
+ kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
+ (char *)&no_linger, sizeof(no_linger));
+ svc_close_xprt(xprt);
+ }
+}
+EXPORT_SYMBOL_GPL(svc_age_temp_xprts_now);
+
static void call_xpt_users(struct svc_xprt *xprt)
{
struct svc_xpt_user *u;
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 79c0f3459b5c..69841db1f533 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -55,6 +55,7 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
spin_unlock(&authtab_lock);
rqstp->rq_auth_slack = 0;
+ init_svc_cred(&rqstp->rq_cred);
rqstp->rq_authop = aops;
return aops->accept(rqstp, authp);
@@ -63,6 +64,7 @@ EXPORT_SYMBOL_GPL(svc_authenticate);
int svc_set_client(struct svc_rqst *rqstp)
{
+ rqstp->rq_client = NULL;
return rqstp->rq_authop->set_client(rqstp);
}
EXPORT_SYMBOL_GPL(svc_set_client);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 621ca7b4a155..dfacdc95b3f5 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -728,10 +728,6 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
struct kvec *resv = &rqstp->rq_res.head[0];
struct svc_cred *cred = &rqstp->rq_cred;
- cred->cr_group_info = NULL;
- cred->cr_principal = NULL;
- rqstp->rq_client = NULL;
-
if (argv->iov_len < 3*4)
return SVC_GARBAGE;
@@ -794,10 +790,6 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
u32 slen, i;
int len = argv->iov_len;
- cred->cr_group_info = NULL;
- cred->cr_principal = NULL;
- rqstp->rq_client = NULL;
-
if ((len -= 3*4) < 0)
return SVC_GARBAGE;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 0c8120229a03..1413cdcc131c 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -181,7 +181,7 @@ int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
struct page **ppage = xdr->pages;
size_t base = xdr->page_base;
unsigned int pglen = xdr->page_len;
- unsigned int flags = MSG_MORE;
+ unsigned int flags = MSG_MORE | MSG_SENDPAGE_NOTLAST;
int slen;
int len = 0;
@@ -399,6 +399,31 @@ static int svc_sock_secure_port(struct svc_rqst *rqstp)
return svc_port_is_privileged(svc_addr(rqstp));
}
+static bool sunrpc_waitqueue_active(wait_queue_head_t *wq)
+{
+ if (!wq)
+ return false;
+ /*
+ * There should normally be a memory * barrier here--see
+ * wq_has_sleeper().
+ *
+ * It appears that isn't currently necessary, though, basically
+ * because callers all appear to have sufficient memory barriers
+ * between the time the relevant change is made and the
+ * time they call these callbacks.
+ *
+ * The nfsd code itself doesn't actually explicitly wait on
+ * these waitqueues, but it may wait on them for example in
+ * sendpage() or sendmsg() calls. (And those may be the only
+ * places, since it it uses nonblocking reads.)
+ *
+ * Maybe we should add the memory barriers anyway, but these are
+ * hot paths so we'd need to be convinced there's no sigificant
+ * penalty.
+ */
+ return waitqueue_active(wq);
+}
+
/*
* INET callback when data has been received on the socket.
*/
@@ -414,7 +439,7 @@ static void svc_udp_data_ready(struct sock *sk)
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt);
}
- if (wq && waitqueue_active(wq))
+ if (sunrpc_waitqueue_active(wq))
wake_up_interruptible(wq);
}
@@ -432,7 +457,7 @@ static void svc_write_space(struct sock *sk)
svc_xprt_enqueue(&svsk->sk_xprt);
}
- if (wq && waitqueue_active(wq)) {
+ if (sunrpc_waitqueue_active(wq)) {
dprintk("RPC svc_write_space: someone sleeping on %p\n",
svsk);
wake_up_interruptible(wq);
@@ -787,7 +812,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
}
wq = sk_sleep(sk);
- if (wq && waitqueue_active(wq))
+ if (sunrpc_waitqueue_active(wq))
wake_up_interruptible_all(wq);
}
@@ -808,7 +833,7 @@ static void svc_tcp_state_change(struct sock *sk)
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt);
}
- if (wq && waitqueue_active(wq))
+ if (sunrpc_waitqueue_active(wq))
wake_up_interruptible_all(wq);
}
@@ -823,7 +848,7 @@ static void svc_tcp_data_ready(struct sock *sk)
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt);
}
- if (wq && waitqueue_active(wq))
+ if (sunrpc_waitqueue_active(wq))
wake_up_interruptible(wq);
}
@@ -1367,7 +1392,6 @@ EXPORT_SYMBOL_GPL(svc_sock_update_bufs);
/*
* Initialize socket for RPC use and create svc_sock struct
- * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF.
*/
static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
struct socket *sock,
@@ -1594,7 +1618,7 @@ static void svc_sock_detach(struct svc_xprt *xprt)
sk->sk_write_space = svsk->sk_owspace;
wq = sk_sleep(sk);
- if (wq && waitqueue_active(wq))
+ if (sunrpc_waitqueue_active(wq))
wake_up_interruptible(wq);
}
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 887f0183b4c6..c88d9bc06f5c 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -76,7 +76,7 @@ static int
proc_dodebug(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- char tmpbuf[20], c, *s;
+ char tmpbuf[20], c, *s = NULL;
char __user *p;
unsigned int value;
size_t left, len;
@@ -103,23 +103,24 @@ proc_dodebug(struct ctl_table *table, int write,
return -EFAULT;
tmpbuf[left] = '\0';
- for (s = tmpbuf, value = 0; '0' <= *s && *s <= '9'; s++, left--)
- value = 10 * value + (*s - '0');
- if (*s && !isspace(*s))
- return -EINVAL;
- while (left && isspace(*s))
- left--, s++;
+ value = simple_strtol(tmpbuf, &s, 0);
+ if (s) {
+ left -= (s - tmpbuf);
+ if (left && !isspace(*s))
+ return -EINVAL;
+ while (left && isspace(*s))
+ left--, s++;
+ } else
+ left = 0;
*(unsigned int *) table->data = value;
/* Display the RPC tasks on writing to rpc_debug */
if (strcmp(table->procname, "rpc_debug") == 0)
rpc_show_tasks(&init_net);
} else {
- if (!access_ok(VERIFY_WRITE, buffer, left))
- return -EFAULT;
- len = sprintf(tmpbuf, "%d", *(unsigned int *) table->data);
+ len = sprintf(tmpbuf, "0x%04x", *(unsigned int *) table->data);
if (len > left)
len = left;
- if (__copy_to_user(buffer, tmpbuf, len))
+ if (copy_to_user(buffer, tmpbuf, len))
return -EFAULT;
if ((left -= len) > 0) {
if (put_user('\n', (char __user *)buffer + len))
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 2e98f4a243e5..37edea6fa92d 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1425,3 +1425,4 @@ void xprt_put(struct rpc_xprt *xprt)
if (atomic_dec_and_test(&xprt->count))
xprt_destroy(xprt);
}
+EXPORT_SYMBOL_GPL(xprt_put);
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index 48913de240bd..dc9f3b513a05 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -2,6 +2,7 @@ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
rpcrdma-y := transport.o rpc_rdma.o verbs.o \
fmr_ops.o frwr_ops.o physical_ops.o \
- svc_rdma.o svc_rdma_transport.o \
+ svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
module.o
+rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
new file mode 100644
index 000000000000..2dcd7640eeb5
--- /dev/null
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2015 Oracle. All rights reserved.
+ *
+ * Support for backward direction RPCs on RPC/RDMA.
+ */
+
+#include <linux/module.h>
+#include <linux/sunrpc/xprt.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svc_xprt.h>
+
+#include "xprt_rdma.h"
+
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+# define RPCDBG_FACILITY RPCDBG_TRANS
+#endif
+
+#undef RPCRDMA_BACKCHANNEL_DEBUG
+
+static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt,
+ struct rpc_rqst *rqst)
+{
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+
+ spin_lock(&buf->rb_reqslock);
+ list_del(&req->rl_all);
+ spin_unlock(&buf->rb_reqslock);
+
+ rpcrdma_destroy_req(&r_xprt->rx_ia, req);
+
+ kfree(rqst);
+}
+
+static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
+ struct rpc_rqst *rqst)
+{
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+ struct rpcrdma_regbuf *rb;
+ struct rpcrdma_req *req;
+ struct xdr_buf *buf;
+ size_t size;
+
+ req = rpcrdma_create_req(r_xprt);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+ req->rl_backchannel = true;
+
+ size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst);
+ rb = rpcrdma_alloc_regbuf(ia, size, GFP_KERNEL);
+ if (IS_ERR(rb))
+ goto out_fail;
+ req->rl_rdmabuf = rb;
+
+ size += RPCRDMA_INLINE_READ_THRESHOLD(rqst);
+ rb = rpcrdma_alloc_regbuf(ia, size, GFP_KERNEL);
+ if (IS_ERR(rb))
+ goto out_fail;
+ rb->rg_owner = req;
+ req->rl_sendbuf = rb;
+ /* so that rpcr_to_rdmar works when receiving a request */
+ rqst->rq_buffer = (void *)req->rl_sendbuf->rg_base;
+
+ buf = &rqst->rq_snd_buf;
+ buf->head[0].iov_base = rqst->rq_buffer;
+ buf->head[0].iov_len = 0;
+ buf->tail[0].iov_base = NULL;
+ buf->tail[0].iov_len = 0;
+ buf->page_len = 0;
+ buf->len = 0;
+ buf->buflen = size;
+
+ return 0;
+
+out_fail:
+ rpcrdma_bc_free_rqst(r_xprt, rqst);
+ return -ENOMEM;
+}
+
+/* Allocate and add receive buffers to the rpcrdma_buffer's
+ * existing list of rep's. These are released when the
+ * transport is destroyed.
+ */
+static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
+ unsigned int count)
+{
+ struct rpcrdma_rep *rep;
+ int rc = 0;
+
+ while (count--) {
+ rep = rpcrdma_create_rep(r_xprt);
+ if (IS_ERR(rep)) {
+ pr_err("RPC: %s: reply buffer alloc failed\n",
+ __func__);
+ rc = PTR_ERR(rep);
+ break;
+ }
+
+ rpcrdma_recv_buffer_put(rep);
+ }
+
+ return rc;
+}
+
+/**
+ * xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests
+ * @xprt: transport associated with these backchannel resources
+ * @reqs: number of concurrent incoming requests to expect
+ *
+ * Returns 0 on success; otherwise a negative errno
+ */
+int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
+{
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
+ struct rpc_rqst *rqst;
+ unsigned int i;
+ int rc;
+
+ /* The backchannel reply path returns each rpc_rqst to the
+ * bc_pa_list _after_ the reply is sent. If the server is
+ * faster than the client, it can send another backward
+ * direction request before the rpc_rqst is returned to the
+ * list. The client rejects the request in this case.
+ *
+ * Twice as many rpc_rqsts are prepared to ensure there is
+ * always an rpc_rqst available as soon as a reply is sent.
+ */
+ if (reqs > RPCRDMA_BACKWARD_WRS >> 1)
+ goto out_err;
+
+ for (i = 0; i < (reqs << 1); i++) {
+ rqst = kzalloc(sizeof(*rqst), GFP_KERNEL);
+ if (!rqst) {
+ pr_err("RPC: %s: Failed to create bc rpc_rqst\n",
+ __func__);
+ goto out_free;
+ }
+ dprintk("RPC: %s: new rqst %p\n", __func__, rqst);
+
+ rqst->rq_xprt = &r_xprt->rx_xprt;
+ INIT_LIST_HEAD(&rqst->rq_list);
+ INIT_LIST_HEAD(&rqst->rq_bc_list);
+
+ if (rpcrdma_bc_setup_rqst(r_xprt, rqst))
+ goto out_free;
+
+ spin_lock_bh(&xprt->bc_pa_lock);
+ list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
+ spin_unlock_bh(&xprt->bc_pa_lock);
+ }
+
+ rc = rpcrdma_bc_setup_reps(r_xprt, reqs);
+ if (rc)
+ goto out_free;
+
+ rc = rpcrdma_ep_post_extra_recv(r_xprt, reqs);
+ if (rc)
+ goto out_free;
+
+ buffer->rb_bc_srv_max_requests = reqs;
+ request_module("svcrdma");
+
+ return 0;
+
+out_free:
+ xprt_rdma_bc_destroy(xprt, reqs);
+
+out_err:
+ pr_err("RPC: %s: setup backchannel transport failed\n", __func__);
+ return -ENOMEM;
+}
+
+/**
+ * xprt_rdma_bc_up - Create transport endpoint for backchannel service
+ * @serv: server endpoint
+ * @net: network namespace
+ *
+ * The "xprt" is an implied argument: it supplies the name of the
+ * backchannel transport class.
+ *
+ * Returns zero on success, negative errno on failure
+ */
+int xprt_rdma_bc_up(struct svc_serv *serv, struct net *net)
+{
+ int ret;
+
+ ret = svc_create_xprt(serv, "rdma-bc", net, PF_INET, 0, 0);
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+/**
+ * rpcrdma_bc_marshal_reply - Send backwards direction reply
+ * @rqst: buffer containing RPC reply data
+ *
+ * Returns zero on success.
+ */
+int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
+{
+ struct rpc_xprt *xprt = rqst->rq_xprt;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ struct rpcrdma_msg *headerp;
+ size_t rpclen;
+
+ headerp = rdmab_to_msg(req->rl_rdmabuf);
+ headerp->rm_xid = rqst->rq_xid;
+ headerp->rm_vers = rpcrdma_version;
+ headerp->rm_credit =
+ cpu_to_be32(r_xprt->rx_buf.rb_bc_srv_max_requests);
+ headerp->rm_type = rdma_msg;
+ headerp->rm_body.rm_chunks[0] = xdr_zero;
+ headerp->rm_body.rm_chunks[1] = xdr_zero;
+ headerp->rm_body.rm_chunks[2] = xdr_zero;
+
+ rpclen = rqst->rq_svec[0].iov_len;
+
+#ifdef RPCRDMA_BACKCHANNEL_DEBUG
+ pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n",
+ __func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf));
+ pr_info("RPC: %s: RPC/RDMA: %*ph\n",
+ __func__, (int)RPCRDMA_HDRLEN_MIN, headerp);
+ pr_info("RPC: %s: RPC: %*ph\n",
+ __func__, (int)rpclen, rqst->rq_svec[0].iov_base);
+#endif
+
+ req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf);
+ req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN;
+ req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf);
+
+ req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf);
+ req->rl_send_iov[1].length = rpclen;
+ req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf);
+
+ req->rl_niovs = 2;
+ return 0;
+}
+
+/**
+ * xprt_rdma_bc_destroy - Release resources for handling backchannel requests
+ * @xprt: transport associated with these backchannel resources
+ * @reqs: number of incoming requests to destroy; ignored
+ */
+void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
+{
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ struct rpc_rqst *rqst, *tmp;
+
+ spin_lock_bh(&xprt->bc_pa_lock);
+ list_for_each_entry_safe(rqst, tmp, &xprt->bc_pa_list, rq_bc_pa_list) {
+ list_del(&rqst->rq_bc_pa_list);
+ spin_unlock_bh(&xprt->bc_pa_lock);
+
+ rpcrdma_bc_free_rqst(r_xprt, rqst);
+
+ spin_lock_bh(&xprt->bc_pa_lock);
+ }
+ spin_unlock_bh(&xprt->bc_pa_lock);
+}
+
+/**
+ * xprt_rdma_bc_free_rqst - Release a backchannel rqst
+ * @rqst: request to release
+ */
+void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
+{
+ struct rpc_xprt *xprt = rqst->rq_xprt;
+
+ dprintk("RPC: %s: freeing rqst %p (req %p)\n",
+ __func__, rqst, rpcr_to_rdmar(rqst));
+
+ smp_mb__before_atomic();
+ WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state));
+ clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
+ smp_mb__after_atomic();
+
+ spin_lock_bh(&xprt->bc_pa_lock);
+ list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
+ spin_unlock_bh(&xprt->bc_pa_lock);
+}
+
+/**
+ * rpcrdma_bc_receive_call - Handle a backward direction call
+ * @xprt: transport receiving the call
+ * @rep: receive buffer containing the call
+ *
+ * Called in the RPC reply handler, which runs in a tasklet.
+ * Be quick about it.
+ *
+ * Operational assumptions:
+ * o Backchannel credits are ignored, just as the NFS server
+ * forechannel currently does
+ * o The ULP manages a replay cache (eg, NFSv4.1 sessions).
+ * No replay detection is done at the transport level
+ */
+void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
+ struct rpcrdma_rep *rep)
+{
+ struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+ struct rpcrdma_msg *headerp;
+ struct svc_serv *bc_serv;
+ struct rpcrdma_req *req;
+ struct rpc_rqst *rqst;
+ struct xdr_buf *buf;
+ size_t size;
+ __be32 *p;
+
+ headerp = rdmab_to_msg(rep->rr_rdmabuf);
+#ifdef RPCRDMA_BACKCHANNEL_DEBUG
+ pr_info("RPC: %s: callback XID %08x, length=%u\n",
+ __func__, be32_to_cpu(headerp->rm_xid), rep->rr_len);
+ pr_info("RPC: %s: %*ph\n", __func__, rep->rr_len, headerp);
+#endif
+
+ /* Sanity check:
+ * Need at least enough bytes for RPC/RDMA header, as code
+ * here references the header fields by array offset. Also,
+ * backward calls are always inline, so ensure there
+ * are some bytes beyond the RPC/RDMA header.
+ */
+ if (rep->rr_len < RPCRDMA_HDRLEN_MIN + 24)
+ goto out_short;
+ p = (__be32 *)((unsigned char *)headerp + RPCRDMA_HDRLEN_MIN);
+ size = rep->rr_len - RPCRDMA_HDRLEN_MIN;
+
+ /* Grab a free bc rqst */
+ spin_lock(&xprt->bc_pa_lock);
+ if (list_empty(&xprt->bc_pa_list)) {
+ spin_unlock(&xprt->bc_pa_lock);
+ goto out_overflow;
+ }
+ rqst = list_first_entry(&xprt->bc_pa_list,
+ struct rpc_rqst, rq_bc_pa_list);
+ list_del(&rqst->rq_bc_pa_list);
+ spin_unlock(&xprt->bc_pa_lock);
+ dprintk("RPC: %s: using rqst %p\n", __func__, rqst);
+
+ /* Prepare rqst */
+ rqst->rq_reply_bytes_recvd = 0;
+ rqst->rq_bytes_sent = 0;
+ rqst->rq_xid = headerp->rm_xid;
+
+ rqst->rq_private_buf.len = size;
+ set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
+
+ buf = &rqst->rq_rcv_buf;
+ memset(buf, 0, sizeof(*buf));
+ buf->head[0].iov_base = p;
+ buf->head[0].iov_len = size;
+ buf->len = size;
+
+ /* The receive buffer has to be hooked to the rpcrdma_req
+ * so that it can be reposted after the server is done
+ * parsing it but just before sending the backward
+ * direction reply.
+ */
+ req = rpcr_to_rdmar(rqst);
+ dprintk("RPC: %s: attaching rep %p to req %p\n",
+ __func__, rep, req);
+ req->rl_reply = rep;
+
+ /* Defeat the retransmit detection logic in send_request */
+ req->rl_connect_cookie = 0;
+
+ /* Queue rqst for ULP's callback service */
+ bc_serv = xprt->bc_serv;
+ spin_lock(&bc_serv->sv_cb_lock);
+ list_add(&rqst->rq_bc_list, &bc_serv->sv_cb_list);
+ spin_unlock(&bc_serv->sv_cb_lock);
+
+ wake_up(&bc_serv->sv_cb_waitq);
+
+ r_xprt->rx_stats.bcall_count++;
+ return;
+
+out_overflow:
+ pr_warn("RPC/RDMA backchannel overflow\n");
+ xprt_disconnect_done(xprt);
+ /* This receive buffer gets reposted automatically
+ * when the connection is re-established.
+ */
+ return;
+
+out_short:
+ pr_warn("RPC/RDMA short backward direction call\n");
+
+ if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
+ xprt_disconnect_done(xprt);
+ else
+ pr_warn("RPC: %s: reposting rep %p\n",
+ __func__, rep);
+}
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index f1e8dafbd507..c14f3a4bff68 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -179,6 +179,69 @@ out_maperr:
return rc;
}
+static void
+__fmr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
+{
+ struct ib_device *device = r_xprt->rx_ia.ri_device;
+ struct rpcrdma_mw *mw = seg->rl_mw;
+ int nsegs = seg->mr_nsegs;
+
+ seg->rl_mw = NULL;
+
+ while (nsegs--)
+ rpcrdma_unmap_one(device, seg++);
+
+ rpcrdma_put_mw(r_xprt, mw);
+}
+
+/* Invalidate all memory regions that were registered for "req".
+ *
+ * Sleeps until it is safe for the host CPU to access the
+ * previously mapped memory regions.
+ */
+static void
+fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+{
+ struct rpcrdma_mr_seg *seg;
+ unsigned int i, nchunks;
+ struct rpcrdma_mw *mw;
+ LIST_HEAD(unmap_list);
+ int rc;
+
+ dprintk("RPC: %s: req %p\n", __func__, req);
+
+ /* ORDER: Invalidate all of the req's MRs first
+ *
+ * ib_unmap_fmr() is slow, so use a single call instead
+ * of one call per mapped MR.
+ */
+ for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
+ seg = &req->rl_segments[i];
+ mw = seg->rl_mw;
+
+ list_add(&mw->r.fmr.fmr->list, &unmap_list);
+
+ i += seg->mr_nsegs;
+ }
+ rc = ib_unmap_fmr(&unmap_list);
+ if (rc)
+ pr_warn("%s: ib_unmap_fmr failed (%i)\n", __func__, rc);
+
+ /* ORDER: Now DMA unmap all of the req's MRs, and return
+ * them to the free MW list.
+ */
+ for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
+ seg = &req->rl_segments[i];
+
+ __fmr_dma_unmap(r_xprt, seg);
+
+ i += seg->mr_nsegs;
+ seg->mr_nsegs = 0;
+ }
+
+ req->rl_nchunks = 0;
+}
+
/* Use the ib_unmap_fmr() verb to prevent further remote
* access via RDMA READ or RDMA WRITE.
*/
@@ -231,6 +294,7 @@ fmr_op_destroy(struct rpcrdma_buffer *buf)
const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
.ro_map = fmr_op_map,
+ .ro_unmap_sync = fmr_op_unmap_sync,
.ro_unmap = fmr_op_unmap,
.ro_open = fmr_op_open,
.ro_maxpages = fmr_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 5318951b3b53..e16567389e28 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -151,9 +151,13 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth);
if (IS_ERR(f->fr_mr))
goto out_mr_err;
- f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth);
- if (IS_ERR(f->fr_pgl))
+
+ f->sg = kcalloc(depth, sizeof(*f->sg), GFP_KERNEL);
+ if (!f->sg)
goto out_list_err;
+
+ sg_init_table(f->sg, depth);
+
return 0;
out_mr_err:
@@ -163,9 +167,9 @@ out_mr_err:
return rc;
out_list_err:
- rc = PTR_ERR(f->fr_pgl);
- dprintk("RPC: %s: ib_alloc_fast_reg_page_list status %i\n",
- __func__, rc);
+ rc = -ENOMEM;
+ dprintk("RPC: %s: sg allocation failure\n",
+ __func__);
ib_dereg_mr(f->fr_mr);
return rc;
}
@@ -179,19 +183,18 @@ __frwr_release(struct rpcrdma_mw *r)
if (rc)
dprintk("RPC: %s: ib_dereg_mr status %i\n",
__func__, rc);
- ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
+ kfree(r->r.frmr.sg);
}
static int
frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
struct rpcrdma_create_data_internal *cdata)
{
- struct ib_device_attr *devattr = &ia->ri_devattr;
int depth, delta;
ia->ri_max_frmr_depth =
min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
- devattr->max_fast_reg_page_list_len);
+ ia->ri_device->attrs.max_fast_reg_page_list_len);
dprintk("RPC: %s: device's max FR page list len = %u\n",
__func__, ia->ri_max_frmr_depth);
@@ -218,8 +221,8 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
}
ep->rep_attr.cap.max_send_wr *= depth;
- if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) {
- cdata->max_requests = devattr->max_qp_wr / depth;
+ if (ep->rep_attr.cap.max_send_wr > ia->ri_device->attrs.max_qp_wr) {
+ cdata->max_requests = ia->ri_device->attrs.max_qp_wr / depth;
if (!cdata->max_requests)
return -EINVAL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests *
@@ -241,22 +244,41 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
}
-/* If FAST_REG or LOCAL_INV failed, indicate the frmr needs to be reset. */
+/* If FAST_REG or LOCAL_INV failed, indicate the frmr needs
+ * to be reset.
+ *
+ * WARNING: Only wr_id and status are reliable at this point
+ */
static void
-frwr_sendcompletion(struct ib_wc *wc)
+__frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_mw *r)
{
- struct rpcrdma_mw *r;
-
if (likely(wc->status == IB_WC_SUCCESS))
return;
/* WARNING: Only wr_id and status are reliable at this point */
r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
- pr_warn("RPC: %s: frmr %p flushed, status %s (%d)\n",
- __func__, r, ib_wc_status_msg(wc->status), wc->status);
+ if (wc->status == IB_WC_WR_FLUSH_ERR)
+ dprintk("RPC: %s: frmr %p flushed\n", __func__, r);
+ else
+ pr_warn("RPC: %s: frmr %p error, status %s (%d)\n",
+ __func__, r, ib_wc_status_msg(wc->status), wc->status);
+
r->r.frmr.fr_state = FRMR_IS_STALE;
}
+static void
+frwr_sendcompletion(struct ib_wc *wc)
+{
+ struct rpcrdma_mw *r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
+ struct rpcrdma_frmr *f = &r->r.frmr;
+
+ if (unlikely(wc->status != IB_WC_SUCCESS))
+ __frwr_sendcompletion_flush(wc, r);
+
+ if (f->fr_waiter)
+ complete(&f->fr_linv_done);
+}
+
static int
frwr_op_init(struct rpcrdma_xprt *r_xprt)
{
@@ -312,13 +334,10 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
struct rpcrdma_mw *mw;
struct rpcrdma_frmr *frmr;
struct ib_mr *mr;
- struct ib_send_wr fastreg_wr, *bad_wr;
+ struct ib_reg_wr *reg_wr;
+ struct ib_send_wr *bad_wr;
+ int rc, i, n, dma_nents;
u8 key;
- int len, pageoff;
- int i, rc;
- int seg_len;
- u64 pa;
- int page_no;
mw = seg1->rl_mw;
seg1->rl_mw = NULL;
@@ -331,68 +350,196 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
} while (mw->r.frmr.fr_state != FRMR_IS_INVALID);
frmr = &mw->r.frmr;
frmr->fr_state = FRMR_IS_VALID;
+ frmr->fr_waiter = false;
+ mr = frmr->fr_mr;
+ reg_wr = &frmr->fr_regwr;
- pageoff = offset_in_page(seg1->mr_offset);
- seg1->mr_offset -= pageoff; /* start of page */
- seg1->mr_len += pageoff;
- len = -pageoff;
if (nsegs > ia->ri_max_frmr_depth)
nsegs = ia->ri_max_frmr_depth;
- for (page_no = i = 0; i < nsegs;) {
- rpcrdma_map_one(device, seg, direction);
- pa = seg->mr_dma;
- for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
- frmr->fr_pgl->page_list[page_no++] = pa;
- pa += PAGE_SIZE;
- }
- len += seg->mr_len;
+ for (i = 0; i < nsegs;) {
+ if (seg->mr_page)
+ sg_set_page(&frmr->sg[i],
+ seg->mr_page,
+ seg->mr_len,
+ offset_in_page(seg->mr_offset));
+ else
+ sg_set_buf(&frmr->sg[i], seg->mr_offset,
+ seg->mr_len);
+
++seg;
++i;
+
/* Check for holes */
if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
- dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n",
- __func__, mw, i, len);
-
- memset(&fastreg_wr, 0, sizeof(fastreg_wr));
- fastreg_wr.wr_id = (unsigned long)(void *)mw;
- fastreg_wr.opcode = IB_WR_FAST_REG_MR;
- fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma + pageoff;
- fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
- fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
- fastreg_wr.wr.fast_reg.page_list_len = page_no;
- fastreg_wr.wr.fast_reg.length = len;
- fastreg_wr.wr.fast_reg.access_flags = writing ?
- IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
- IB_ACCESS_REMOTE_READ;
- mr = frmr->fr_mr;
+ frmr->sg_nents = i;
+
+ dma_nents = ib_dma_map_sg(device, frmr->sg, frmr->sg_nents, direction);
+ if (!dma_nents) {
+ pr_err("RPC: %s: failed to dma map sg %p sg_nents %u\n",
+ __func__, frmr->sg, frmr->sg_nents);
+ return -ENOMEM;
+ }
+
+ n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
+ if (unlikely(n != frmr->sg_nents)) {
+ pr_err("RPC: %s: failed to map mr %p (%u/%u)\n",
+ __func__, frmr->fr_mr, n, frmr->sg_nents);
+ rc = n < 0 ? n : -EINVAL;
+ goto out_senderr;
+ }
+
+ dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n",
+ __func__, mw, frmr->sg_nents, mr->length);
+
key = (u8)(mr->rkey & 0x000000FF);
ib_update_fast_reg_key(mr, ++key);
- fastreg_wr.wr.fast_reg.rkey = mr->rkey;
+
+ reg_wr->wr.next = NULL;
+ reg_wr->wr.opcode = IB_WR_REG_MR;
+ reg_wr->wr.wr_id = (uintptr_t)mw;
+ reg_wr->wr.num_sge = 0;
+ reg_wr->wr.send_flags = 0;
+ reg_wr->mr = mr;
+ reg_wr->key = mr->rkey;
+ reg_wr->access = writing ?
+ IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
+ IB_ACCESS_REMOTE_READ;
DECR_CQCOUNT(&r_xprt->rx_ep);
- rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
+ rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
if (rc)
goto out_senderr;
+ seg1->mr_dir = direction;
seg1->rl_mw = mw;
seg1->mr_rkey = mr->rkey;
- seg1->mr_base = seg1->mr_dma + pageoff;
- seg1->mr_nsegs = i;
- seg1->mr_len = len;
- return i;
+ seg1->mr_base = mr->iova;
+ seg1->mr_nsegs = frmr->sg_nents;
+ seg1->mr_len = mr->length;
+
+ return frmr->sg_nents;
out_senderr:
dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc);
- while (i--)
- rpcrdma_unmap_one(device, --seg);
+ ib_dma_unmap_sg(device, frmr->sg, dma_nents, direction);
__frwr_queue_recovery(mw);
return rc;
}
+static struct ib_send_wr *
+__frwr_prepare_linv_wr(struct rpcrdma_mr_seg *seg)
+{
+ struct rpcrdma_mw *mw = seg->rl_mw;
+ struct rpcrdma_frmr *f = &mw->r.frmr;
+ struct ib_send_wr *invalidate_wr;
+
+ f->fr_waiter = false;
+ f->fr_state = FRMR_IS_INVALID;
+ invalidate_wr = &f->fr_invwr;
+
+ memset(invalidate_wr, 0, sizeof(*invalidate_wr));
+ invalidate_wr->wr_id = (unsigned long)(void *)mw;
+ invalidate_wr->opcode = IB_WR_LOCAL_INV;
+ invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey;
+
+ return invalidate_wr;
+}
+
+static void
+__frwr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
+ int rc)
+{
+ struct ib_device *device = r_xprt->rx_ia.ri_device;
+ struct rpcrdma_mw *mw = seg->rl_mw;
+ struct rpcrdma_frmr *f = &mw->r.frmr;
+
+ seg->rl_mw = NULL;
+
+ ib_dma_unmap_sg(device, f->sg, f->sg_nents, seg->mr_dir);
+
+ if (!rc)
+ rpcrdma_put_mw(r_xprt, mw);
+ else
+ __frwr_queue_recovery(mw);
+}
+
+/* Invalidate all memory regions that were registered for "req".
+ *
+ * Sleeps until it is safe for the host CPU to access the
+ * previously mapped memory regions.
+ */
+static void
+frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+{
+ struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr;
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+ struct rpcrdma_mr_seg *seg;
+ unsigned int i, nchunks;
+ struct rpcrdma_frmr *f;
+ int rc;
+
+ dprintk("RPC: %s: req %p\n", __func__, req);
+
+ /* ORDER: Invalidate all of the req's MRs first
+ *
+ * Chain the LOCAL_INV Work Requests and post them with
+ * a single ib_post_send() call.
+ */
+ invalidate_wrs = pos = prev = NULL;
+ seg = NULL;
+ for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
+ seg = &req->rl_segments[i];
+
+ pos = __frwr_prepare_linv_wr(seg);
+
+ if (!invalidate_wrs)
+ invalidate_wrs = pos;
+ else
+ prev->next = pos;
+ prev = pos;
+
+ i += seg->mr_nsegs;
+ }
+ f = &seg->rl_mw->r.frmr;
+
+ /* Strong send queue ordering guarantees that when the
+ * last WR in the chain completes, all WRs in the chain
+ * are complete.
+ */
+ f->fr_invwr.send_flags = IB_SEND_SIGNALED;
+ f->fr_waiter = true;
+ init_completion(&f->fr_linv_done);
+ INIT_CQCOUNT(&r_xprt->rx_ep);
+
+ /* Transport disconnect drains the receive CQ before it
+ * replaces the QP. The RPC reply handler won't call us
+ * unless ri_id->qp is a valid pointer.
+ */
+ rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr);
+ if (rc)
+ pr_warn("%s: ib_post_send failed %i\n", __func__, rc);
+
+ wait_for_completion(&f->fr_linv_done);
+
+ /* ORDER: Now DMA unmap all of the req's MRs, and return
+ * them to the free MW list.
+ */
+ for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
+ seg = &req->rl_segments[i];
+
+ __frwr_dma_unmap(r_xprt, seg, rc);
+
+ i += seg->mr_nsegs;
+ seg->mr_nsegs = 0;
+ }
+
+ req->rl_nchunks = 0;
+}
+
/* Post a LOCAL_INV Work Request to prevent further remote access
* via RDMA READ or RDMA WRITE.
*/
@@ -402,24 +549,25 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
struct rpcrdma_mr_seg *seg1 = seg;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_mw *mw = seg1->rl_mw;
- struct ib_send_wr invalidate_wr, *bad_wr;
+ struct rpcrdma_frmr *frmr = &mw->r.frmr;
+ struct ib_send_wr *invalidate_wr, *bad_wr;
int rc, nsegs = seg->mr_nsegs;
dprintk("RPC: %s: FRMR %p\n", __func__, mw);
seg1->rl_mw = NULL;
- mw->r.frmr.fr_state = FRMR_IS_INVALID;
+ frmr->fr_state = FRMR_IS_INVALID;
+ invalidate_wr = &mw->r.frmr.fr_invwr;
- memset(&invalidate_wr, 0, sizeof(invalidate_wr));
- invalidate_wr.wr_id = (unsigned long)(void *)mw;
- invalidate_wr.opcode = IB_WR_LOCAL_INV;
- invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey;
+ memset(invalidate_wr, 0, sizeof(*invalidate_wr));
+ invalidate_wr->wr_id = (uintptr_t)mw;
+ invalidate_wr->opcode = IB_WR_LOCAL_INV;
+ invalidate_wr->ex.invalidate_rkey = frmr->fr_mr->rkey;
DECR_CQCOUNT(&r_xprt->rx_ep);
- while (seg1->mr_nsegs--)
- rpcrdma_unmap_one(ia->ri_device, seg++);
+ ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir);
read_lock(&ia->ri_qplock);
- rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
+ rc = ib_post_send(ia->ri_id->qp, invalidate_wr, &bad_wr);
read_unlock(&ia->ri_qplock);
if (rc)
goto out_err;
@@ -451,6 +599,7 @@ frwr_op_destroy(struct rpcrdma_buffer *buf)
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_map = frwr_op_map,
+ .ro_unmap_sync = frwr_op_unmap_sync,
.ro_unmap = frwr_op_unmap,
.ro_open = frwr_op_open,
.ro_maxpages = frwr_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
index 617b76f22154..dbb302ecf590 100644
--- a/net/sunrpc/xprtrdma/physical_ops.c
+++ b/net/sunrpc/xprtrdma/physical_ops.c
@@ -83,6 +83,18 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
return 1;
}
+/* DMA unmap all memory regions that were mapped for "req".
+ */
+static void
+physical_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+{
+ struct ib_device *device = r_xprt->rx_ia.ri_device;
+ unsigned int i;
+
+ for (i = 0; req->rl_nchunks; --req->rl_nchunks)
+ rpcrdma_unmap_one(device, &req->rl_segments[i++]);
+}
+
static void
physical_op_destroy(struct rpcrdma_buffer *buf)
{
@@ -90,6 +102,7 @@ physical_op_destroy(struct rpcrdma_buffer *buf)
const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
.ro_map = physical_op_map,
+ .ro_unmap_sync = physical_op_unmap_sync,
.ro_unmap = physical_op_unmap,
.ro_open = physical_op_open,
.ro_maxpages = physical_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index bc8bd6577467..0f28f2d743ed 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -441,6 +441,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
enum rpcrdma_chunktype rtype, wtype;
struct rpcrdma_msg *headerp;
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+ if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state))
+ return rpcrdma_bc_marshal_reply(rqst);
+#endif
+
/*
* rpclen gets amount of data in first buffer, which is the
* pre-registered buffer.
@@ -711,6 +716,37 @@ rpcrdma_connect_worker(struct work_struct *work)
spin_unlock_bh(&xprt->transport_lock);
}
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+/* By convention, backchannel calls arrive via rdma_msg type
+ * messages, and never populate the chunk lists. This makes
+ * the RPC/RDMA header small and fixed in size, so it is
+ * straightforward to check the RPC header's direction field.
+ */
+static bool
+rpcrdma_is_bcall(struct rpcrdma_msg *headerp)
+{
+ __be32 *p = (__be32 *)headerp;
+
+ if (headerp->rm_type != rdma_msg)
+ return false;
+ if (headerp->rm_body.rm_chunks[0] != xdr_zero)
+ return false;
+ if (headerp->rm_body.rm_chunks[1] != xdr_zero)
+ return false;
+ if (headerp->rm_body.rm_chunks[2] != xdr_zero)
+ return false;
+
+ /* sanity */
+ if (p[7] != headerp->rm_xid)
+ return false;
+ /* call direction */
+ if (p[8] != cpu_to_be32(RPC_CALL))
+ return false;
+
+ return true;
+}
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
+
/*
* This function is called when an async event is posted to
* the connection which changes the connection state. All it
@@ -723,8 +759,8 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep)
schedule_delayed_work(&ep->rep_connect_worker, 0);
}
-/*
- * Called as a tasklet to do req/reply match and complete a request
+/* Process received RPC/RDMA messages.
+ *
* Errors must result in the RPC task either being awakened, or
* allowed to timeout, to discover the errors at that time.
*/
@@ -741,53 +777,38 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
unsigned long cwnd;
u32 credits;
- /* Check status. If bad, signal disconnect and return rep to pool */
- if (rep->rr_len == ~0U) {
- rpcrdma_recv_buffer_put(rep);
- if (r_xprt->rx_ep.rep_connected == 1) {
- r_xprt->rx_ep.rep_connected = -EIO;
- rpcrdma_conn_func(&r_xprt->rx_ep);
- }
- return;
- }
- if (rep->rr_len < RPCRDMA_HDRLEN_MIN) {
- dprintk("RPC: %s: short/invalid reply\n", __func__);
- goto repost;
- }
+ dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
+
+ if (rep->rr_len == RPCRDMA_BAD_LEN)
+ goto out_badstatus;
+ if (rep->rr_len < RPCRDMA_HDRLEN_MIN)
+ goto out_shortreply;
+
headerp = rdmab_to_msg(rep->rr_rdmabuf);
- if (headerp->rm_vers != rpcrdma_version) {
- dprintk("RPC: %s: invalid version %d\n",
- __func__, be32_to_cpu(headerp->rm_vers));
- goto repost;
- }
+ if (headerp->rm_vers != rpcrdma_version)
+ goto out_badversion;
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+ if (rpcrdma_is_bcall(headerp))
+ goto out_bcall;
+#endif
- /* Get XID and try for a match. */
- spin_lock(&xprt->transport_lock);
+ /* Match incoming rpcrdma_rep to an rpcrdma_req to
+ * get context for handling any incoming chunks.
+ */
+ spin_lock_bh(&xprt->transport_lock);
rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
- if (rqst == NULL) {
- spin_unlock(&xprt->transport_lock);
- dprintk("RPC: %s: reply 0x%p failed "
- "to match any request xid 0x%08x len %d\n",
- __func__, rep, be32_to_cpu(headerp->rm_xid),
- rep->rr_len);
-repost:
- r_xprt->rx_stats.bad_reply_count++;
- if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
- rpcrdma_recv_buffer_put(rep);
+ if (!rqst)
+ goto out_nomatch;
- return;
- }
-
- /* get request object */
req = rpcr_to_rdmar(rqst);
- if (req->rl_reply) {
- spin_unlock(&xprt->transport_lock);
- dprintk("RPC: %s: duplicate reply 0x%p to RPC "
- "request 0x%p: xid 0x%08x\n", __func__, rep, req,
- be32_to_cpu(headerp->rm_xid));
- goto repost;
- }
+ if (req->rl_reply)
+ goto out_duplicate;
+ /* Sanity checking has passed. We are now committed
+ * to complete this transaction.
+ */
+ list_del_init(&rqst->rq_list);
+ spin_unlock_bh(&xprt->transport_lock);
dprintk("RPC: %s: reply 0x%p completes request 0x%p\n"
" RPC request 0x%p xid 0x%08x\n",
__func__, rep, req, rqst,
@@ -872,19 +893,72 @@ badheader:
break;
}
+ /* Invalidate and flush the data payloads before waking the
+ * waiting application. This guarantees the memory region is
+ * properly fenced from the server before the application
+ * accesses the data. It also ensures proper send flow
+ * control: waking the next RPC waits until this RPC has
+ * relinquished all its Send Queue entries.
+ */
+ if (req->rl_nchunks)
+ r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req);
+
credits = be32_to_cpu(headerp->rm_credit);
if (credits == 0)
credits = 1; /* don't deadlock */
else if (credits > r_xprt->rx_buf.rb_max_requests)
credits = r_xprt->rx_buf.rb_max_requests;
+ spin_lock_bh(&xprt->transport_lock);
cwnd = xprt->cwnd;
xprt->cwnd = credits << RPC_CWNDSHIFT;
if (xprt->cwnd > cwnd)
xprt_release_rqst_cong(rqst->rq_task);
+ xprt_complete_rqst(rqst->rq_task, status);
+ spin_unlock_bh(&xprt->transport_lock);
dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
__func__, xprt, rqst, status);
- xprt_complete_rqst(rqst->rq_task, status);
- spin_unlock(&xprt->transport_lock);
+ return;
+
+out_badstatus:
+ rpcrdma_recv_buffer_put(rep);
+ if (r_xprt->rx_ep.rep_connected == 1) {
+ r_xprt->rx_ep.rep_connected = -EIO;
+ rpcrdma_conn_func(&r_xprt->rx_ep);
+ }
+ return;
+
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+out_bcall:
+ rpcrdma_bc_receive_call(r_xprt, rep);
+ return;
+#endif
+
+out_shortreply:
+ dprintk("RPC: %s: short/invalid reply\n", __func__);
+ goto repost;
+
+out_badversion:
+ dprintk("RPC: %s: invalid version %d\n",
+ __func__, be32_to_cpu(headerp->rm_vers));
+ goto repost;
+
+out_nomatch:
+ spin_unlock_bh(&xprt->transport_lock);
+ dprintk("RPC: %s: no match for incoming xid 0x%08x len %d\n",
+ __func__, be32_to_cpu(headerp->rm_xid),
+ rep->rr_len);
+ goto repost;
+
+out_duplicate:
+ spin_unlock_bh(&xprt->transport_lock);
+ dprintk("RPC: %s: "
+ "duplicate reply %p to RPC request %p: xid 0x%08x\n",
+ __func__, rep, req, be32_to_cpu(headerp->rm_xid));
+
+repost:
+ r_xprt->rx_stats.bad_reply_count++;
+ if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
+ rpcrdma_recv_buffer_put(rep);
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 2cd252f023a5..c846ca9f1eba 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -55,6 +55,7 @@ unsigned int svcrdma_ord = RPCRDMA_ORD;
static unsigned int min_ord = 1;
static unsigned int max_ord = 4096;
unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
+unsigned int svcrdma_max_bc_requests = RPCRDMA_MAX_BC_REQUESTS;
static unsigned int min_max_requests = 4;
static unsigned int max_max_requests = 16384;
unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE;
@@ -71,10 +72,6 @@ atomic_t rdma_stat_rq_prod;
atomic_t rdma_stat_sq_poll;
atomic_t rdma_stat_sq_prod;
-/* Temporary NFS request map and context caches */
-struct kmem_cache *svc_rdma_map_cachep;
-struct kmem_cache *svc_rdma_ctxt_cachep;
-
struct workqueue_struct *svc_rdma_wq;
/*
@@ -239,18 +236,20 @@ void svc_rdma_cleanup(void)
unregister_sysctl_table(svcrdma_table_header);
svcrdma_table_header = NULL;
}
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+ svc_unreg_xprt_class(&svc_rdma_bc_class);
+#endif
svc_unreg_xprt_class(&svc_rdma_class);
- kmem_cache_destroy(svc_rdma_map_cachep);
- kmem_cache_destroy(svc_rdma_ctxt_cachep);
}
int svc_rdma_init(void)
{
dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord);
- dprintk("\tmax_requests : %d\n", svcrdma_max_requests);
- dprintk("\tsq_depth : %d\n",
+ dprintk("\tmax_requests : %u\n", svcrdma_max_requests);
+ dprintk("\tsq_depth : %u\n",
svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
+ dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests);
dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
@@ -261,36 +260,10 @@ int svc_rdma_init(void)
svcrdma_table_header =
register_sysctl_table(svcrdma_root_table);
- /* Create the temporary map cache */
- svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache",
- sizeof(struct svc_rdma_req_map),
- 0,
- SLAB_HWCACHE_ALIGN,
- NULL);
- if (!svc_rdma_map_cachep) {
- printk(KERN_INFO "Could not allocate map cache.\n");
- goto err0;
- }
-
- /* Create the temporary context cache */
- svc_rdma_ctxt_cachep =
- kmem_cache_create("svc_rdma_ctxt_cache",
- sizeof(struct svc_rdma_op_ctxt),
- 0,
- SLAB_HWCACHE_ALIGN,
- NULL);
- if (!svc_rdma_ctxt_cachep) {
- printk(KERN_INFO "Could not allocate WR ctxt cache.\n");
- goto err1;
- }
-
/* Register RDMA with the SVC transport switch */
svc_reg_xprt_class(&svc_rdma_class);
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+ svc_reg_xprt_class(&svc_rdma_bc_class);
+#endif
return 0;
- err1:
- kmem_cache_destroy(svc_rdma_map_cachep);
- err0:
- unregister_sysctl_table(svcrdma_table_header);
- destroy_workqueue(svc_rdma_wq);
- return -ENOMEM;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
new file mode 100644
index 000000000000..65a7c232a345
--- /dev/null
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2015 Oracle. All rights reserved.
+ *
+ * Support for backward direction RPCs on RPC/RDMA (server-side).
+ */
+
+#include <linux/sunrpc/svc_rdma.h>
+#include "xprt_rdma.h"
+
+#define RPCDBG_FACILITY RPCDBG_SVCXPRT
+
+#undef SVCRDMA_BACKCHANNEL_DEBUG
+
+int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp,
+ struct xdr_buf *rcvbuf)
+{
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ struct kvec *dst, *src = &rcvbuf->head[0];
+ struct rpc_rqst *req;
+ unsigned long cwnd;
+ u32 credits;
+ size_t len;
+ __be32 xid;
+ __be32 *p;
+ int ret;
+
+ p = (__be32 *)src->iov_base;
+ len = src->iov_len;
+ xid = rmsgp->rm_xid;
+
+#ifdef SVCRDMA_BACKCHANNEL_DEBUG
+ pr_info("%s: xid=%08x, length=%zu\n",
+ __func__, be32_to_cpu(xid), len);
+ pr_info("%s: RPC/RDMA: %*ph\n",
+ __func__, (int)RPCRDMA_HDRLEN_MIN, rmsgp);
+ pr_info("%s: RPC: %*ph\n",
+ __func__, (int)len, p);
+#endif
+
+ ret = -EAGAIN;
+ if (src->iov_len < 24)
+ goto out_shortreply;
+
+ spin_lock_bh(&xprt->transport_lock);
+ req = xprt_lookup_rqst(xprt, xid);
+ if (!req)
+ goto out_notfound;
+
+ dst = &req->rq_private_buf.head[0];
+ memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
+ if (dst->iov_len < len)
+ goto out_unlock;
+ memcpy(dst->iov_base, p, len);
+
+ credits = be32_to_cpu(rmsgp->rm_credit);
+ if (credits == 0)
+ credits = 1; /* don't deadlock */
+ else if (credits > r_xprt->rx_buf.rb_bc_max_requests)
+ credits = r_xprt->rx_buf.rb_bc_max_requests;
+
+ cwnd = xprt->cwnd;
+ xprt->cwnd = credits << RPC_CWNDSHIFT;
+ if (xprt->cwnd > cwnd)
+ xprt_release_rqst_cong(req->rq_task);
+
+ ret = 0;
+ xprt_complete_rqst(req->rq_task, rcvbuf->len);
+ rcvbuf->len = 0;
+
+out_unlock:
+ spin_unlock_bh(&xprt->transport_lock);
+out:
+ return ret;
+
+out_shortreply:
+ dprintk("svcrdma: short bc reply: xprt=%p, len=%zu\n",
+ xprt, src->iov_len);
+ goto out;
+
+out_notfound:
+ dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n",
+ xprt, be32_to_cpu(xid));
+
+ goto out_unlock;
+}
+
+/* Send a backwards direction RPC call.
+ *
+ * Caller holds the connection's mutex and has already marshaled
+ * the RPC/RDMA request.
+ *
+ * This is similar to svc_rdma_reply, but takes an rpc_rqst
+ * instead, does not support chunks, and avoids blocking memory
+ * allocation.
+ *
+ * XXX: There is still an opportunity to block in svc_rdma_send()
+ * if there are no SQ entries to post the Send. This may occur if
+ * the adapter has a small maximum SQ depth.
+ */
+static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
+ struct rpc_rqst *rqst)
+{
+ struct xdr_buf *sndbuf = &rqst->rq_snd_buf;
+ struct svc_rdma_op_ctxt *ctxt;
+ struct svc_rdma_req_map *vec;
+ struct ib_send_wr send_wr;
+ int ret;
+
+ vec = svc_rdma_get_req_map(rdma);
+ ret = svc_rdma_map_xdr(rdma, sndbuf, vec);
+ if (ret)
+ goto out_err;
+
+ /* Post a recv buffer to handle the reply for this request. */
+ ret = svc_rdma_post_recv(rdma, GFP_NOIO);
+ if (ret) {
+ pr_err("svcrdma: Failed to post bc receive buffer, err=%d.\n",
+ ret);
+ pr_err("svcrdma: closing transport %p.\n", rdma);
+ set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+ ret = -ENOTCONN;
+ goto out_err;
+ }
+
+ ctxt = svc_rdma_get_context(rdma);
+ ctxt->pages[0] = virt_to_page(rqst->rq_buffer);
+ ctxt->count = 1;
+
+ ctxt->wr_op = IB_WR_SEND;
+ ctxt->direction = DMA_TO_DEVICE;
+ ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
+ ctxt->sge[0].length = sndbuf->len;
+ ctxt->sge[0].addr =
+ ib_dma_map_page(rdma->sc_cm_id->device, ctxt->pages[0], 0,
+ sndbuf->len, DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) {
+ ret = -EIO;
+ goto out_unmap;
+ }
+ atomic_inc(&rdma->sc_dma_used);
+
+ memset(&send_wr, 0, sizeof(send_wr));
+ send_wr.wr_id = (unsigned long)ctxt;
+ send_wr.sg_list = ctxt->sge;
+ send_wr.num_sge = 1;
+ send_wr.opcode = IB_WR_SEND;
+ send_wr.send_flags = IB_SEND_SIGNALED;
+
+ ret = svc_rdma_send(rdma, &send_wr);
+ if (ret) {
+ ret = -EIO;
+ goto out_unmap;
+ }
+
+out_err:
+ svc_rdma_put_req_map(rdma, vec);
+ dprintk("svcrdma: %s returns %d\n", __func__, ret);
+ return ret;
+
+out_unmap:
+ svc_rdma_unmap_dma(ctxt);
+ svc_rdma_put_context(ctxt, 1);
+ goto out_err;
+}
+
+/* Server-side transport endpoint wants a whole page for its send
+ * buffer. The client RPC code constructs the RPC header in this
+ * buffer before it invokes ->send_request.
+ *
+ * Returns NULL if there was a temporary allocation failure.
+ */
+static void *
+xprt_rdma_bc_allocate(struct rpc_task *task, size_t size)
+{
+ struct rpc_rqst *rqst = task->tk_rqstp;
+ struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
+ struct svcxprt_rdma *rdma;
+ struct page *page;
+
+ rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
+
+ /* Prevent an infinite loop: try to make this case work */
+ if (size > PAGE_SIZE)
+ WARN_ONCE(1, "svcrdma: large bc buffer request (size %zu)\n",
+ size);
+
+ page = alloc_page(RPCRDMA_DEF_GFP);
+ if (!page)
+ return NULL;
+
+ return page_address(page);
+}
+
+static void
+xprt_rdma_bc_free(void *buffer)
+{
+ /* No-op: ctxt and page have already been freed. */
+}
+
+static int
+rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
+{
+ struct rpc_xprt *xprt = rqst->rq_xprt;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)rqst->rq_buffer;
+ int rc;
+
+ /* Space in the send buffer for an RPC/RDMA header is reserved
+ * via xprt->tsh_size.
+ */
+ headerp->rm_xid = rqst->rq_xid;
+ headerp->rm_vers = rpcrdma_version;
+ headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
+ headerp->rm_type = rdma_msg;
+ headerp->rm_body.rm_chunks[0] = xdr_zero;
+ headerp->rm_body.rm_chunks[1] = xdr_zero;
+ headerp->rm_body.rm_chunks[2] = xdr_zero;
+
+#ifdef SVCRDMA_BACKCHANNEL_DEBUG
+ pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
+#endif
+
+ rc = svc_rdma_bc_sendto(rdma, rqst);
+ if (rc)
+ goto drop_connection;
+ return rc;
+
+drop_connection:
+ dprintk("svcrdma: failed to send bc call\n");
+ xprt_disconnect_done(xprt);
+ return -ENOTCONN;
+}
+
+/* Send an RPC call on the passive end of a transport
+ * connection.
+ */
+static int
+xprt_rdma_bc_send_request(struct rpc_task *task)
+{
+ struct rpc_rqst *rqst = task->tk_rqstp;
+ struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
+ struct svcxprt_rdma *rdma;
+ int ret;
+
+ dprintk("svcrdma: sending bc call with xid: %08x\n",
+ be32_to_cpu(rqst->rq_xid));
+
+ if (!mutex_trylock(&sxprt->xpt_mutex)) {
+ rpc_sleep_on(&sxprt->xpt_bc_pending, task, NULL);
+ if (!mutex_trylock(&sxprt->xpt_mutex))
+ return -EAGAIN;
+ rpc_wake_up_queued_task(&sxprt->xpt_bc_pending, task);
+ }
+
+ ret = -ENOTCONN;
+ rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
+ if (!test_bit(XPT_DEAD, &sxprt->xpt_flags))
+ ret = rpcrdma_bc_send_request(rdma, rqst);
+
+ mutex_unlock(&sxprt->xpt_mutex);
+
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+static void
+xprt_rdma_bc_close(struct rpc_xprt *xprt)
+{
+ dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
+}
+
+static void
+xprt_rdma_bc_put(struct rpc_xprt *xprt)
+{
+ dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
+
+ xprt_free(xprt);
+ module_put(THIS_MODULE);
+}
+
+static struct rpc_xprt_ops xprt_rdma_bc_procs = {
+ .reserve_xprt = xprt_reserve_xprt_cong,
+ .release_xprt = xprt_release_xprt_cong,
+ .alloc_slot = xprt_alloc_slot,
+ .release_request = xprt_release_rqst_cong,
+ .buf_alloc = xprt_rdma_bc_allocate,
+ .buf_free = xprt_rdma_bc_free,
+ .send_request = xprt_rdma_bc_send_request,
+ .set_retrans_timeout = xprt_set_retrans_timeout_def,
+ .close = xprt_rdma_bc_close,
+ .destroy = xprt_rdma_bc_put,
+ .print_stats = xprt_rdma_print_stats
+};
+
+static const struct rpc_timeout xprt_rdma_bc_timeout = {
+ .to_initval = 60 * HZ,
+ .to_maxval = 60 * HZ,
+};
+
+/* It shouldn't matter if the number of backchannel session slots
+ * doesn't match the number of RPC/RDMA credits. That just means
+ * one or the other will have extra slots that aren't used.
+ */
+static struct rpc_xprt *
+xprt_setup_rdma_bc(struct xprt_create *args)
+{
+ struct rpc_xprt *xprt;
+ struct rpcrdma_xprt *new_xprt;
+
+ if (args->addrlen > sizeof(xprt->addr)) {
+ dprintk("RPC: %s: address too large\n", __func__);
+ return ERR_PTR(-EBADF);
+ }
+
+ xprt = xprt_alloc(args->net, sizeof(*new_xprt),
+ RPCRDMA_MAX_BC_REQUESTS,
+ RPCRDMA_MAX_BC_REQUESTS);
+ if (!xprt) {
+ dprintk("RPC: %s: couldn't allocate rpc_xprt\n",
+ __func__);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ xprt->timeout = &xprt_rdma_bc_timeout;
+ xprt_set_bound(xprt);
+ xprt_set_connected(xprt);
+ xprt->bind_timeout = RPCRDMA_BIND_TO;
+ xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
+ xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
+
+ xprt->prot = XPRT_TRANSPORT_BC_RDMA;
+ xprt->tsh_size = RPCRDMA_HDRLEN_MIN / sizeof(__be32);
+ xprt->ops = &xprt_rdma_bc_procs;
+
+ memcpy(&xprt->addr, args->dstaddr, args->addrlen);
+ xprt->addrlen = args->addrlen;
+ xprt_rdma_format_addresses(xprt, (struct sockaddr *)&xprt->addr);
+ xprt->resvport = 0;
+
+ xprt->max_payload = xprt_rdma_max_inline_read;
+
+ new_xprt = rpcx_to_rdmax(xprt);
+ new_xprt->rx_buf.rb_bc_max_requests = xprt->max_reqs;
+
+ xprt_get(xprt);
+ args->bc_xprt->xpt_bc_xprt = xprt;
+ xprt->bc_xprt = args->bc_xprt;
+
+ if (!try_module_get(THIS_MODULE))
+ goto out_fail;
+
+ /* Final put for backchannel xprt is in __svc_rdma_free */
+ xprt_get(xprt);
+ return xprt;
+
+out_fail:
+ xprt_rdma_free_addresses(xprt);
+ args->bc_xprt->xpt_bc_xprt = NULL;
+ xprt_put(xprt);
+ xprt_free(xprt);
+ return ERR_PTR(-EINVAL);
+}
+
+struct xprt_class xprt_rdma_bc = {
+ .list = LIST_HEAD_INIT(xprt_rdma_bc.list),
+ .name = "rdma backchannel",
+ .owner = THIS_MODULE,
+ .ident = XPRT_TRANSPORT_BC_RDMA,
+ .setup = xprt_setup_rdma_bc,
+};
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index f0c3ff67ca98..c8b8a8b4181e 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -126,7 +126,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
u64 rs_offset,
bool last)
{
- struct ib_send_wr read_wr;
+ struct ib_rdma_wr read_wr;
int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
int ret, read, pno;
@@ -144,6 +144,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
head->arg.page_len += len;
+
head->arg.len += len;
if (!pg_off)
head->count++;
@@ -160,8 +161,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
goto err;
atomic_inc(&xprt->sc_dma_used);
- /* The lkey here is either a local dma lkey or a dma_mr lkey */
- ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
+ ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey;
ctxt->sge[pno].length = len;
ctxt->count++;
@@ -180,16 +180,16 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
memset(&read_wr, 0, sizeof(read_wr));
- read_wr.wr_id = (unsigned long)ctxt;
- read_wr.opcode = IB_WR_RDMA_READ;
- ctxt->wr_op = read_wr.opcode;
- read_wr.send_flags = IB_SEND_SIGNALED;
- read_wr.wr.rdma.rkey = rs_handle;
- read_wr.wr.rdma.remote_addr = rs_offset;
- read_wr.sg_list = ctxt->sge;
- read_wr.num_sge = pages_needed;
-
- ret = svc_rdma_send(xprt, &read_wr);
+ read_wr.wr.wr_id = (unsigned long)ctxt;
+ read_wr.wr.opcode = IB_WR_RDMA_READ;
+ ctxt->wr_op = read_wr.wr.opcode;
+ read_wr.wr.send_flags = IB_SEND_SIGNALED;
+ read_wr.rkey = rs_handle;
+ read_wr.remote_addr = rs_offset;
+ read_wr.wr.sg_list = ctxt->sge;
+ read_wr.wr.num_sge = pages_needed;
+
+ ret = svc_rdma_send(xprt, &read_wr.wr);
if (ret) {
pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -219,14 +219,14 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
u64 rs_offset,
bool last)
{
- struct ib_send_wr read_wr;
+ struct ib_rdma_wr read_wr;
struct ib_send_wr inv_wr;
- struct ib_send_wr fastreg_wr;
+ struct ib_reg_wr reg_wr;
u8 key;
- int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
+ int nents = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
- int ret, read, pno;
+ int ret, read, pno, dma_nents, n;
u32 pg_off = *page_offset;
u32 pg_no = *page_no;
@@ -235,17 +235,14 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
ctxt->direction = DMA_FROM_DEVICE;
ctxt->frmr = frmr;
- pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len);
- read = min_t(int, (pages_needed << PAGE_SHIFT) - *page_offset,
- rs_length);
+ nents = min_t(unsigned int, nents, xprt->sc_frmr_pg_list_len);
+ read = min_t(int, (nents << PAGE_SHIFT) - *page_offset, rs_length);
- frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]);
frmr->direction = DMA_FROM_DEVICE;
frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
- frmr->map_len = pages_needed << PAGE_SHIFT;
- frmr->page_list_len = pages_needed;
+ frmr->sg_nents = nents;
- for (pno = 0; pno < pages_needed; pno++) {
+ for (pno = 0; pno < nents; pno++) {
int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
@@ -253,17 +250,12 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
head->arg.len += len;
if (!pg_off)
head->count++;
+
+ sg_set_page(&frmr->sg[pno], rqstp->rq_arg.pages[pg_no],
+ len, pg_off);
+
rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
rqstp->rq_next_page = rqstp->rq_respages + 1;
- frmr->page_list->page_list[pno] =
- ib_dma_map_page(xprt->sc_cm_id->device,
- head->arg.pages[pg_no], 0,
- PAGE_SIZE, DMA_FROM_DEVICE);
- ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
- frmr->page_list->page_list[pno]);
- if (ret)
- goto err;
- atomic_inc(&xprt->sc_dma_used);
/* adjust offset and wrap to next page if needed */
pg_off += len;
@@ -279,43 +271,57 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
else
clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+ dma_nents = ib_dma_map_sg(xprt->sc_cm_id->device,
+ frmr->sg, frmr->sg_nents,
+ frmr->direction);
+ if (!dma_nents) {
+ pr_err("svcrdma: failed to dma map sg %p\n",
+ frmr->sg);
+ return -ENOMEM;
+ }
+ atomic_inc(&xprt->sc_dma_used);
+
+ n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
+ if (unlikely(n != frmr->sg_nents)) {
+ pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n",
+ frmr->mr, n, frmr->sg_nents);
+ return n < 0 ? n : -EINVAL;
+ }
+
/* Bump the key */
key = (u8)(frmr->mr->lkey & 0x000000FF);
ib_update_fast_reg_key(frmr->mr, ++key);
- ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset;
+ ctxt->sge[0].addr = frmr->mr->iova;
ctxt->sge[0].lkey = frmr->mr->lkey;
- ctxt->sge[0].length = read;
+ ctxt->sge[0].length = frmr->mr->length;
ctxt->count = 1;
ctxt->read_hdr = head;
- /* Prepare FASTREG WR */
- memset(&fastreg_wr, 0, sizeof(fastreg_wr));
- fastreg_wr.opcode = IB_WR_FAST_REG_MR;
- fastreg_wr.send_flags = IB_SEND_SIGNALED;
- fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
- fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
- fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
- fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
- fastreg_wr.wr.fast_reg.length = frmr->map_len;
- fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
- fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
- fastreg_wr.next = &read_wr;
+ /* Prepare REG WR */
+ reg_wr.wr.opcode = IB_WR_REG_MR;
+ reg_wr.wr.wr_id = 0;
+ reg_wr.wr.send_flags = IB_SEND_SIGNALED;
+ reg_wr.wr.num_sge = 0;
+ reg_wr.mr = frmr->mr;
+ reg_wr.key = frmr->mr->lkey;
+ reg_wr.access = frmr->access_flags;
+ reg_wr.wr.next = &read_wr.wr;
/* Prepare RDMA_READ */
memset(&read_wr, 0, sizeof(read_wr));
- read_wr.send_flags = IB_SEND_SIGNALED;
- read_wr.wr.rdma.rkey = rs_handle;
- read_wr.wr.rdma.remote_addr = rs_offset;
- read_wr.sg_list = ctxt->sge;
- read_wr.num_sge = 1;
+ read_wr.wr.send_flags = IB_SEND_SIGNALED;
+ read_wr.rkey = rs_handle;
+ read_wr.remote_addr = rs_offset;
+ read_wr.wr.sg_list = ctxt->sge;
+ read_wr.wr.num_sge = 1;
if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
- read_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
- read_wr.wr_id = (unsigned long)ctxt;
- read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
+ read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
+ read_wr.wr.wr_id = (unsigned long)ctxt;
+ read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
} else {
- read_wr.opcode = IB_WR_RDMA_READ;
- read_wr.next = &inv_wr;
+ read_wr.wr.opcode = IB_WR_RDMA_READ;
+ read_wr.wr.next = &inv_wr;
/* Prepare invalidate */
memset(&inv_wr, 0, sizeof(inv_wr));
inv_wr.wr_id = (unsigned long)ctxt;
@@ -323,10 +329,10 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
}
- ctxt->wr_op = read_wr.opcode;
+ ctxt->wr_op = read_wr.wr.opcode;
/* Post the chain */
- ret = svc_rdma_send(xprt, &fastreg_wr);
+ ret = svc_rdma_send(xprt, &reg_wr.wr);
if (ret) {
pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -340,7 +346,8 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
atomic_inc(&rdma_stat_read);
return ret;
err:
- svc_rdma_unmap_dma(ctxt);
+ ib_dma_unmap_sg(xprt->sc_cm_id->device,
+ frmr->sg, frmr->sg_nents, frmr->direction);
svc_rdma_put_context(ctxt, 0);
svc_rdma_put_frmr(xprt, frmr);
return ret;
@@ -560,6 +567,38 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
return ret;
}
+/* By convention, backchannel calls arrive via rdma_msg type
+ * messages, and never populate the chunk lists. This makes
+ * the RPC/RDMA header small and fixed in size, so it is
+ * straightforward to check the RPC header's direction field.
+ */
+static bool
+svc_rdma_is_backchannel_reply(struct svc_xprt *xprt, struct rpcrdma_msg *rmsgp)
+{
+ __be32 *p = (__be32 *)rmsgp;
+
+ if (!xprt->xpt_bc_xprt)
+ return false;
+
+ if (rmsgp->rm_type != rdma_msg)
+ return false;
+ if (rmsgp->rm_body.rm_chunks[0] != xdr_zero)
+ return false;
+ if (rmsgp->rm_body.rm_chunks[1] != xdr_zero)
+ return false;
+ if (rmsgp->rm_body.rm_chunks[2] != xdr_zero)
+ return false;
+
+ /* sanity */
+ if (p[7] != rmsgp->rm_xid)
+ return false;
+ /* call direction */
+ if (p[8] == cpu_to_be32(RPC_CALL))
+ return false;
+
+ return true;
+}
+
/*
* Set up the rqstp thread context to point to the RQ buffer. If
* necessary, pull additional data from the client with an RDMA_READ
@@ -625,6 +664,15 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
goto close_out;
}
+ if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) {
+ ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp,
+ &rqstp->rq_arg);
+ svc_rdma_put_context(ctxt, 0);
+ if (ret)
+ goto repost;
+ return ret;
+ }
+
/* Read read-list data. */
ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
if (ret > 0) {
@@ -661,4 +709,15 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
set_bit(XPT_CLOSE, &xprt->xpt_flags);
defer:
return 0;
+
+repost:
+ ret = svc_rdma_post_recv(rdma_xprt, GFP_KERNEL);
+ if (ret) {
+ pr_err("svcrdma: could not post a receive buffer, err=%d.\n",
+ ret);
+ pr_err("svcrdma: closing transport %p.\n", rdma_xprt);
+ set_bit(XPT_CLOSE, &rdma_xprt->sc_xprt.xpt_flags);
+ ret = -ENOTCONN;
+ }
+ return ret;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 1dfae8317065..df57f3ce6cd2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -50,9 +50,9 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
-static int map_xdr(struct svcxprt_rdma *xprt,
- struct xdr_buf *xdr,
- struct svc_rdma_req_map *vec)
+int svc_rdma_map_xdr(struct svcxprt_rdma *xprt,
+ struct xdr_buf *xdr,
+ struct svc_rdma_req_map *vec)
{
int sge_no;
u32 sge_bytes;
@@ -62,7 +62,7 @@ static int map_xdr(struct svcxprt_rdma *xprt,
if (xdr->len !=
(xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) {
- pr_err("svcrdma: map_xdr: XDR buffer length error\n");
+ pr_err("svcrdma: %s: XDR buffer length error\n", __func__);
return -EIO;
}
@@ -97,9 +97,9 @@ static int map_xdr(struct svcxprt_rdma *xprt,
sge_no++;
}
- dprintk("svcrdma: map_xdr: sge_no %d page_no %d "
+ dprintk("svcrdma: %s: sge_no %d page_no %d "
"page_base %u page_len %u head_len %zu tail_len %zu\n",
- sge_no, page_no, xdr->page_base, xdr->page_len,
+ __func__, sge_no, page_no, xdr->page_base, xdr->page_len,
xdr->head[0].iov_len, xdr->tail[0].iov_len);
vec->count = sge_no;
@@ -217,7 +217,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
u32 xdr_off, int write_len,
struct svc_rdma_req_map *vec)
{
- struct ib_send_wr write_wr;
+ struct ib_rdma_wr write_wr;
struct ib_sge *sge;
int xdr_sge_no;
int sge_no;
@@ -265,7 +265,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
sge[sge_no].addr))
goto err;
atomic_inc(&xprt->sc_dma_used);
- sge[sge_no].lkey = xprt->sc_dma_lkey;
+ sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
ctxt->count++;
sge_off = 0;
sge_no++;
@@ -282,17 +282,17 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
/* Prepare WRITE WR */
memset(&write_wr, 0, sizeof write_wr);
ctxt->wr_op = IB_WR_RDMA_WRITE;
- write_wr.wr_id = (unsigned long)ctxt;
- write_wr.sg_list = &sge[0];
- write_wr.num_sge = sge_no;
- write_wr.opcode = IB_WR_RDMA_WRITE;
- write_wr.send_flags = IB_SEND_SIGNALED;
- write_wr.wr.rdma.rkey = rmr;
- write_wr.wr.rdma.remote_addr = to;
+ write_wr.wr.wr_id = (unsigned long)ctxt;
+ write_wr.wr.sg_list = &sge[0];
+ write_wr.wr.num_sge = sge_no;
+ write_wr.wr.opcode = IB_WR_RDMA_WRITE;
+ write_wr.wr.send_flags = IB_SEND_SIGNALED;
+ write_wr.rkey = rmr;
+ write_wr.remote_addr = to;
/* Post It */
atomic_inc(&rdma_stat_write);
- if (svc_rdma_send(xprt, &write_wr))
+ if (svc_rdma_send(xprt, &write_wr.wr))
goto err;
return write_len - bc;
err:
@@ -465,7 +465,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
int ret;
/* Post a recv buffer to handle another request. */
- ret = svc_rdma_post_recv(rdma);
+ ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
if (ret) {
printk(KERN_INFO
"svcrdma: could not post a receive buffer, err=%d."
@@ -480,7 +480,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->count = 1;
/* Prepare the SGE for the RPCRDMA Header */
- ctxt->sge[0].lkey = rdma->sc_dma_lkey;
+ ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
ctxt->sge[0].addr =
ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
@@ -504,7 +504,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->sge[sge_no].addr))
goto err;
atomic_inc(&rdma->sc_dma_used);
- ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
+ ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
ctxt->sge[sge_no].length = sge_bytes;
}
if (byte_count != 0) {
@@ -591,14 +591,17 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
/* Build an req vec for the XDR */
ctxt = svc_rdma_get_context(rdma);
ctxt->direction = DMA_TO_DEVICE;
- vec = svc_rdma_get_req_map();
- ret = map_xdr(rdma, &rqstp->rq_res, vec);
+ vec = svc_rdma_get_req_map(rdma);
+ ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec);
if (ret)
goto err0;
inline_bytes = rqstp->rq_res.len;
/* Create the RDMA response header */
- res_page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
+ ret = -ENOMEM;
+ res_page = alloc_page(GFP_KERNEL);
+ if (!res_page)
+ goto err0;
rdma_resp = page_address(res_page);
reply_ary = svc_rdma_get_reply_array(rdma_argp);
if (reply_ary)
@@ -630,14 +633,14 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
inline_bytes);
- svc_rdma_put_req_map(vec);
+ svc_rdma_put_req_map(rdma, vec);
dprintk("svcrdma: send_reply returns %d\n", ret);
return ret;
err1:
put_page(res_page);
err0:
- svc_rdma_put_req_map(vec);
+ svc_rdma_put_req_map(rdma, vec);
svc_rdma_put_context(ctxt, 0);
return ret;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index fcc3eb80c265..5763825d09bf 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -56,6 +56,7 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
+static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *, int);
static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
struct net *net,
struct sockaddr *sa, int salen,
@@ -95,18 +96,133 @@ struct svc_xprt_class svc_rdma_class = {
.xcl_ident = XPRT_TRANSPORT_RDMA,
};
-struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *,
+ struct sockaddr *, int, int);
+static void svc_rdma_bc_detach(struct svc_xprt *);
+static void svc_rdma_bc_free(struct svc_xprt *);
+
+static struct svc_xprt_ops svc_rdma_bc_ops = {
+ .xpo_create = svc_rdma_bc_create,
+ .xpo_detach = svc_rdma_bc_detach,
+ .xpo_free = svc_rdma_bc_free,
+ .xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
+ .xpo_secure_port = svc_rdma_secure_port,
+};
+
+struct svc_xprt_class svc_rdma_bc_class = {
+ .xcl_name = "rdma-bc",
+ .xcl_owner = THIS_MODULE,
+ .xcl_ops = &svc_rdma_bc_ops,
+ .xcl_max_payload = (1024 - RPCRDMA_HDRLEN_MIN)
+};
+
+static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv,
+ struct net *net,
+ struct sockaddr *sa, int salen,
+ int flags)
+{
+ struct svcxprt_rdma *cma_xprt;
+ struct svc_xprt *xprt;
+
+ cma_xprt = rdma_create_xprt(serv, 0);
+ if (!cma_xprt)
+ return ERR_PTR(-ENOMEM);
+ xprt = &cma_xprt->sc_xprt;
+
+ svc_xprt_init(net, &svc_rdma_bc_class, xprt, serv);
+ serv->sv_bc_xprt = xprt;
+
+ dprintk("svcrdma: %s(%p)\n", __func__, xprt);
+ return xprt;
+}
+
+static void svc_rdma_bc_detach(struct svc_xprt *xprt)
+{
+ dprintk("svcrdma: %s(%p)\n", __func__, xprt);
+}
+
+static void svc_rdma_bc_free(struct svc_xprt *xprt)
+{
+ struct svcxprt_rdma *rdma =
+ container_of(xprt, struct svcxprt_rdma, sc_xprt);
+
+ dprintk("svcrdma: %s(%p)\n", __func__, xprt);
+ if (xprt)
+ kfree(rdma);
+}
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
+
+static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt,
+ gfp_t flags)
{
struct svc_rdma_op_ctxt *ctxt;
- ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep,
- GFP_KERNEL | __GFP_NOFAIL);
- ctxt->xprt = xprt;
- INIT_LIST_HEAD(&ctxt->dto_q);
+ ctxt = kmalloc(sizeof(*ctxt), flags);
+ if (ctxt) {
+ ctxt->xprt = xprt;
+ INIT_LIST_HEAD(&ctxt->free);
+ INIT_LIST_HEAD(&ctxt->dto_q);
+ }
+ return ctxt;
+}
+
+static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
+{
+ unsigned int i;
+
+ /* Each RPC/RDMA credit can consume a number of send
+ * and receive WQEs. One ctxt is allocated for each.
+ */
+ i = xprt->sc_sq_depth + xprt->sc_rq_depth;
+
+ while (i--) {
+ struct svc_rdma_op_ctxt *ctxt;
+
+ ctxt = alloc_ctxt(xprt, GFP_KERNEL);
+ if (!ctxt) {
+ dprintk("svcrdma: No memory for RDMA ctxt\n");
+ return false;
+ }
+ list_add(&ctxt->free, &xprt->sc_ctxts);
+ }
+ return true;
+}
+
+struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
+{
+ struct svc_rdma_op_ctxt *ctxt = NULL;
+
+ spin_lock_bh(&xprt->sc_ctxt_lock);
+ xprt->sc_ctxt_used++;
+ if (list_empty(&xprt->sc_ctxts))
+ goto out_empty;
+
+ ctxt = list_first_entry(&xprt->sc_ctxts,
+ struct svc_rdma_op_ctxt, free);
+ list_del_init(&ctxt->free);
+ spin_unlock_bh(&xprt->sc_ctxt_lock);
+
+out:
ctxt->count = 0;
ctxt->frmr = NULL;
- atomic_inc(&xprt->sc_ctxt_used);
return ctxt;
+
+out_empty:
+ /* Either pre-allocation missed the mark, or send
+ * queue accounting is broken.
+ */
+ spin_unlock_bh(&xprt->sc_ctxt_lock);
+
+ ctxt = alloc_ctxt(xprt, GFP_NOIO);
+ if (ctxt)
+ goto out;
+
+ spin_lock_bh(&xprt->sc_ctxt_lock);
+ xprt->sc_ctxt_used--;
+ spin_unlock_bh(&xprt->sc_ctxt_lock);
+ WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n");
+ return NULL;
}
void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
@@ -116,11 +232,11 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
/*
* Unmap the DMA addr in the SGE if the lkey matches
- * the sc_dma_lkey, otherwise, ignore it since it is
+ * the local_dma_lkey, otherwise, ignore it since it is
* an FRMR lkey and will be unmapped later when the
* last WR that uses it completes.
*/
- if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
+ if (ctxt->sge[i].lkey == xprt->sc_pd->local_dma_lkey) {
atomic_dec(&xprt->sc_dma_used);
ib_dma_unmap_page(xprt->sc_cm_id->device,
ctxt->sge[i].addr,
@@ -132,35 +248,108 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
{
- struct svcxprt_rdma *xprt;
+ struct svcxprt_rdma *xprt = ctxt->xprt;
int i;
- xprt = ctxt->xprt;
if (free_pages)
for (i = 0; i < ctxt->count; i++)
put_page(ctxt->pages[i]);
- kmem_cache_free(svc_rdma_ctxt_cachep, ctxt);
- atomic_dec(&xprt->sc_ctxt_used);
+ spin_lock_bh(&xprt->sc_ctxt_lock);
+ xprt->sc_ctxt_used--;
+ list_add(&ctxt->free, &xprt->sc_ctxts);
+ spin_unlock_bh(&xprt->sc_ctxt_lock);
}
-/*
- * Temporary NFS req mappings are shared across all transport
- * instances. These are short lived and should be bounded by the number
- * of concurrent server threads * depth of the SQ.
- */
-struct svc_rdma_req_map *svc_rdma_get_req_map(void)
+static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
+{
+ while (!list_empty(&xprt->sc_ctxts)) {
+ struct svc_rdma_op_ctxt *ctxt;
+
+ ctxt = list_first_entry(&xprt->sc_ctxts,
+ struct svc_rdma_op_ctxt, free);
+ list_del(&ctxt->free);
+ kfree(ctxt);
+ }
+}
+
+static struct svc_rdma_req_map *alloc_req_map(gfp_t flags)
{
struct svc_rdma_req_map *map;
- map = kmem_cache_alloc(svc_rdma_map_cachep,
- GFP_KERNEL | __GFP_NOFAIL);
+
+ map = kmalloc(sizeof(*map), flags);
+ if (map)
+ INIT_LIST_HEAD(&map->free);
+ return map;
+}
+
+static bool svc_rdma_prealloc_maps(struct svcxprt_rdma *xprt)
+{
+ unsigned int i;
+
+ /* One for each receive buffer on this connection. */
+ i = xprt->sc_max_requests;
+
+ while (i--) {
+ struct svc_rdma_req_map *map;
+
+ map = alloc_req_map(GFP_KERNEL);
+ if (!map) {
+ dprintk("svcrdma: No memory for request map\n");
+ return false;
+ }
+ list_add(&map->free, &xprt->sc_maps);
+ }
+ return true;
+}
+
+struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *xprt)
+{
+ struct svc_rdma_req_map *map = NULL;
+
+ spin_lock(&xprt->sc_map_lock);
+ if (list_empty(&xprt->sc_maps))
+ goto out_empty;
+
+ map = list_first_entry(&xprt->sc_maps,
+ struct svc_rdma_req_map, free);
+ list_del_init(&map->free);
+ spin_unlock(&xprt->sc_map_lock);
+
+out:
map->count = 0;
return map;
+
+out_empty:
+ spin_unlock(&xprt->sc_map_lock);
+
+ /* Pre-allocation amount was incorrect */
+ map = alloc_req_map(GFP_NOIO);
+ if (map)
+ goto out;
+
+ WARN_ONCE(1, "svcrdma: empty request map list?\n");
+ return NULL;
+}
+
+void svc_rdma_put_req_map(struct svcxprt_rdma *xprt,
+ struct svc_rdma_req_map *map)
+{
+ spin_lock(&xprt->sc_map_lock);
+ list_add(&map->free, &xprt->sc_maps);
+ spin_unlock(&xprt->sc_map_lock);
}
-void svc_rdma_put_req_map(struct svc_rdma_req_map *map)
+static void svc_rdma_destroy_maps(struct svcxprt_rdma *xprt)
{
- kmem_cache_free(svc_rdma_map_cachep, map);
+ while (!list_empty(&xprt->sc_maps)) {
+ struct svc_rdma_req_map *map;
+
+ map = list_first_entry(&xprt->sc_maps,
+ struct svc_rdma_req_map, free);
+ list_del(&map->free);
+ kfree(map);
+ }
}
/* ib_cq event handler */
@@ -328,46 +517,44 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
static void process_context(struct svcxprt_rdma *xprt,
struct svc_rdma_op_ctxt *ctxt)
{
+ struct svc_rdma_op_ctxt *read_hdr;
+ int free_pages = 0;
+
svc_rdma_unmap_dma(ctxt);
switch (ctxt->wr_op) {
case IB_WR_SEND:
- if (ctxt->frmr)
- pr_err("svcrdma: SEND: ctxt->frmr != NULL\n");
- svc_rdma_put_context(ctxt, 1);
+ free_pages = 1;
break;
case IB_WR_RDMA_WRITE:
- if (ctxt->frmr)
- pr_err("svcrdma: WRITE: ctxt->frmr != NULL\n");
- svc_rdma_put_context(ctxt, 0);
break;
case IB_WR_RDMA_READ:
case IB_WR_RDMA_READ_WITH_INV:
svc_rdma_put_frmr(xprt, ctxt->frmr);
- if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
- struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
- if (read_hdr) {
- spin_lock_bh(&xprt->sc_rq_dto_lock);
- set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
- list_add_tail(&read_hdr->dto_q,
- &xprt->sc_read_complete_q);
- spin_unlock_bh(&xprt->sc_rq_dto_lock);
- } else {
- pr_err("svcrdma: ctxt->read_hdr == NULL\n");
- }
- svc_xprt_enqueue(&xprt->sc_xprt);
- }
+
+ if (!test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags))
+ break;
+
+ read_hdr = ctxt->read_hdr;
svc_rdma_put_context(ctxt, 0);
- break;
+
+ spin_lock_bh(&xprt->sc_rq_dto_lock);
+ set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
+ list_add_tail(&read_hdr->dto_q,
+ &xprt->sc_read_complete_q);
+ spin_unlock_bh(&xprt->sc_rq_dto_lock);
+ svc_xprt_enqueue(&xprt->sc_xprt);
+ return;
default:
- printk(KERN_ERR "svcrdma: unexpected completion type, "
- "opcode=%d\n",
- ctxt->wr_op);
+ dprintk("svcrdma: unexpected completion opcode=%d\n",
+ ctxt->wr_op);
break;
}
+
+ svc_rdma_put_context(ctxt, free_pages);
}
/*
@@ -465,19 +652,15 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
+ INIT_LIST_HEAD(&cma_xprt->sc_ctxts);
+ INIT_LIST_HEAD(&cma_xprt->sc_maps);
init_waitqueue_head(&cma_xprt->sc_send_wait);
spin_lock_init(&cma_xprt->sc_lock);
spin_lock_init(&cma_xprt->sc_rq_dto_lock);
spin_lock_init(&cma_xprt->sc_frmr_q_lock);
-
- cma_xprt->sc_ord = svcrdma_ord;
-
- cma_xprt->sc_max_req_size = svcrdma_max_req_size;
- cma_xprt->sc_max_requests = svcrdma_max_requests;
- cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT;
- atomic_set(&cma_xprt->sc_sq_count, 0);
- atomic_set(&cma_xprt->sc_ctxt_used, 0);
+ spin_lock_init(&cma_xprt->sc_ctxt_lock);
+ spin_lock_init(&cma_xprt->sc_map_lock);
if (listener)
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
@@ -485,7 +668,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
return cma_xprt;
}
-int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
+int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
{
struct ib_recv_wr recv_wr, *bad_recv_wr;
struct svc_rdma_op_ctxt *ctxt;
@@ -503,7 +686,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
pr_err("svcrdma: Too many sges (%d)\n", sge_no);
goto err_put_ctxt;
}
- page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
+ page = alloc_page(flags);
+ if (!page)
+ goto err_put_ctxt;
ctxt->pages[sge_no] = page;
pa = ib_dma_map_page(xprt->sc_cm_id->device,
page, 0, PAGE_SIZE,
@@ -513,7 +698,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
atomic_inc(&xprt->sc_dma_used);
ctxt->sge[sge_no].addr = pa;
ctxt->sge[sge_no].length = PAGE_SIZE;
- ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
+ ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
ctxt->count = sge_no + 1;
buflen += PAGE_SIZE;
}
@@ -692,8 +877,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
if (!cma_xprt)
return ERR_PTR(-ENOMEM);
- listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP,
- IB_QPT_RC);
+ listen_id = rdma_create_id(&init_net, rdma_listen_handler, cma_xprt,
+ RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(listen_id)) {
ret = PTR_ERR(listen_id);
dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
@@ -732,7 +917,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
{
struct ib_mr *mr;
- struct ib_fast_reg_page_list *pl;
+ struct scatterlist *sg;
struct svc_rdma_fastreg_mr *frmr;
u32 num_sg;
@@ -745,13 +930,14 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
if (IS_ERR(mr))
goto err_free_frmr;
- pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device,
- num_sg);
- if (IS_ERR(pl))
+ sg = kcalloc(RPCSVC_MAXPAGES, sizeof(*sg), GFP_KERNEL);
+ if (!sg)
goto err_free_mr;
+ sg_init_table(sg, RPCSVC_MAXPAGES);
+
frmr->mr = mr;
- frmr->page_list = pl;
+ frmr->sg = sg;
INIT_LIST_HEAD(&frmr->frmr_list);
return frmr;
@@ -771,8 +957,8 @@ static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
frmr = list_entry(xprt->sc_frmr_q.next,
struct svc_rdma_fastreg_mr, frmr_list);
list_del_init(&frmr->frmr_list);
+ kfree(frmr->sg);
ib_dereg_mr(frmr->mr);
- ib_free_fast_reg_page_list(frmr->page_list);
kfree(frmr);
}
}
@@ -786,8 +972,7 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
frmr = list_entry(rdma->sc_frmr_q.next,
struct svc_rdma_fastreg_mr, frmr_list);
list_del_init(&frmr->frmr_list);
- frmr->map_len = 0;
- frmr->page_list_len = 0;
+ frmr->sg_nents = 0;
}
spin_unlock_bh(&rdma->sc_frmr_q_lock);
if (frmr)
@@ -796,25 +981,13 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
return rdma_alloc_frmr(rdma);
}
-static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
- struct svc_rdma_fastreg_mr *frmr)
-{
- int page_no;
- for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
- dma_addr_t addr = frmr->page_list->page_list[page_no];
- if (ib_dma_mapping_error(frmr->mr->device, addr))
- continue;
- atomic_dec(&xprt->sc_dma_used);
- ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE,
- frmr->direction);
- }
-}
-
void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
struct svc_rdma_fastreg_mr *frmr)
{
if (frmr) {
- frmr_unmap_dma(rdma, frmr);
+ ib_dma_unmap_sg(rdma->sc_cm_id->device,
+ frmr->sg, frmr->sg_nents, frmr->direction);
+ atomic_dec(&rdma->sc_dma_used);
spin_lock_bh(&rdma->sc_frmr_q_lock);
WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
@@ -840,11 +1013,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
struct rdma_conn_param conn_param;
struct ib_cq_init_attr cq_attr = {};
struct ib_qp_init_attr qp_attr;
- struct ib_device_attr devattr;
- int uninitialized_var(dma_mr_acc);
- int need_dma_mr = 0;
- int ret;
- int i;
+ struct ib_device *dev;
+ unsigned int i;
+ int ret = 0;
listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
clear_bit(XPT_CONN, &xprt->xpt_flags);
@@ -864,37 +1035,42 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n",
newxprt, newxprt->sc_cm_id);
- ret = ib_query_device(newxprt->sc_cm_id->device, &devattr);
- if (ret) {
- dprintk("svcrdma: could not query device attributes on "
- "device %p, rc=%d\n", newxprt->sc_cm_id->device, ret);
- goto errout;
- }
+ dev = newxprt->sc_cm_id->device;
/* Qualify the transport resource defaults with the
* capabilities of this particular device */
- newxprt->sc_max_sge = min((size_t)devattr.max_sge,
+ newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge,
(size_t)RPCSVC_MAXPAGES);
- newxprt->sc_max_sge_rd = min_t(size_t, devattr.max_sge_rd,
+ newxprt->sc_max_sge_rd = min_t(size_t, dev->attrs.max_sge_rd,
RPCSVC_MAXPAGES);
- newxprt->sc_max_requests = min((size_t)devattr.max_qp_wr,
- (size_t)svcrdma_max_requests);
- newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests;
+ newxprt->sc_max_req_size = svcrdma_max_req_size;
+ newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
+ svcrdma_max_requests);
+ newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr,
+ svcrdma_max_bc_requests);
+ newxprt->sc_rq_depth = newxprt->sc_max_requests +
+ newxprt->sc_max_bc_requests;
+ newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_rq_depth;
+
+ if (!svc_rdma_prealloc_ctxts(newxprt))
+ goto errout;
+ if (!svc_rdma_prealloc_maps(newxprt))
+ goto errout;
/*
* Limit ORD based on client limit, local device limit, and
* configured svcrdma limit.
*/
- newxprt->sc_ord = min_t(size_t, devattr.max_qp_rd_atom, newxprt->sc_ord);
+ newxprt->sc_ord = min_t(size_t, dev->attrs.max_qp_rd_atom, newxprt->sc_ord);
newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord);
- newxprt->sc_pd = ib_alloc_pd(newxprt->sc_cm_id->device);
+ newxprt->sc_pd = ib_alloc_pd(dev);
if (IS_ERR(newxprt->sc_pd)) {
dprintk("svcrdma: error creating PD for connect request\n");
goto errout;
}
cq_attr.cqe = newxprt->sc_sq_depth;
- newxprt->sc_sq_cq = ib_create_cq(newxprt->sc_cm_id->device,
+ newxprt->sc_sq_cq = ib_create_cq(dev,
sq_comp_handler,
cq_event_handler,
newxprt,
@@ -903,8 +1079,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk("svcrdma: error creating SQ CQ for connect request\n");
goto errout;
}
- cq_attr.cqe = newxprt->sc_max_requests;
- newxprt->sc_rq_cq = ib_create_cq(newxprt->sc_cm_id->device,
+ cq_attr.cqe = newxprt->sc_rq_depth;
+ newxprt->sc_rq_cq = ib_create_cq(dev,
rq_comp_handler,
cq_event_handler,
newxprt,
@@ -918,7 +1094,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
qp_attr.event_handler = qp_event_handler;
qp_attr.qp_context = &newxprt->sc_xprt;
qp_attr.cap.max_send_wr = newxprt->sc_sq_depth;
- qp_attr.cap.max_recv_wr = newxprt->sc_max_requests;
+ qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth;
qp_attr.cap.max_send_sge = newxprt->sc_max_sge;
qp_attr.cap.max_recv_sge = newxprt->sc_max_sge;
qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -932,7 +1108,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
" cap.max_send_sge = %d\n"
" cap.max_recv_sge = %d\n",
newxprt->sc_cm_id, newxprt->sc_pd,
- newxprt->sc_cm_id->device, newxprt->sc_pd->device,
+ dev, newxprt->sc_pd->device,
qp_attr.cap.max_send_wr,
qp_attr.cap.max_recv_wr,
qp_attr.cap.max_send_sge,
@@ -968,9 +1144,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
* of an RDMA_READ. IB does not.
*/
newxprt->sc_reader = rdma_read_chunk_lcl;
- if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+ if (dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
newxprt->sc_frmr_pg_list_len =
- devattr.max_fast_reg_page_list_len;
+ dev->attrs.max_fast_reg_page_list_len;
newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
newxprt->sc_reader = rdma_read_chunk_frmr;
}
@@ -978,44 +1154,16 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
/*
* Determine if a DMA MR is required and if so, what privs are required
*/
- if (!rdma_protocol_iwarp(newxprt->sc_cm_id->device,
- newxprt->sc_cm_id->port_num) &&
- !rdma_ib_or_roce(newxprt->sc_cm_id->device,
- newxprt->sc_cm_id->port_num))
+ if (!rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num) &&
+ !rdma_ib_or_roce(dev, newxprt->sc_cm_id->port_num))
goto errout;
- if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) ||
- !(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
- need_dma_mr = 1;
- dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
- if (rdma_protocol_iwarp(newxprt->sc_cm_id->device,
- newxprt->sc_cm_id->port_num) &&
- !(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG))
- dma_mr_acc |= IB_ACCESS_REMOTE_WRITE;
- }
-
- if (rdma_protocol_iwarp(newxprt->sc_cm_id->device,
- newxprt->sc_cm_id->port_num))
+ if (rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num))
newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
- /* Create the DMA MR if needed, otherwise, use the DMA LKEY */
- if (need_dma_mr) {
- /* Register all of physical memory */
- newxprt->sc_phys_mr =
- ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc);
- if (IS_ERR(newxprt->sc_phys_mr)) {
- dprintk("svcrdma: Failed to create DMA MR ret=%d\n",
- ret);
- goto errout;
- }
- newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey;
- } else
- newxprt->sc_dma_lkey =
- newxprt->sc_cm_id->device->local_dma_lkey;
-
/* Post receive buffers */
- for (i = 0; i < newxprt->sc_max_requests; i++) {
- ret = svc_rdma_post_recv(newxprt);
+ for (i = 0; i < newxprt->sc_rq_depth; i++) {
+ ret = svc_rdma_post_recv(newxprt, GFP_KERNEL);
if (ret) {
dprintk("svcrdma: failure posting receive buffers\n");
goto errout;
@@ -1114,12 +1262,14 @@ static void __svc_rdma_free(struct work_struct *work)
{
struct svcxprt_rdma *rdma =
container_of(work, struct svcxprt_rdma, sc_work);
- dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
+ struct svc_xprt *xprt = &rdma->sc_xprt;
+
+ dprintk("svcrdma: %s(%p)\n", __func__, rdma);
/* We should only be called from kref_put */
- if (atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0)
+ if (atomic_read(&xprt->xpt_ref.refcount) != 0)
pr_err("svcrdma: sc_xprt still in use? (%d)\n",
- atomic_read(&rdma->sc_xprt.xpt_ref.refcount));
+ atomic_read(&xprt->xpt_ref.refcount));
/*
* Destroy queued, but not processed read completions. Note
@@ -1147,15 +1297,22 @@ static void __svc_rdma_free(struct work_struct *work)
}
/* Warn if we leaked a resource or under-referenced */
- if (atomic_read(&rdma->sc_ctxt_used) != 0)
+ if (rdma->sc_ctxt_used != 0)
pr_err("svcrdma: ctxt still in use? (%d)\n",
- atomic_read(&rdma->sc_ctxt_used));
+ rdma->sc_ctxt_used);
if (atomic_read(&rdma->sc_dma_used) != 0)
pr_err("svcrdma: dma still in use? (%d)\n",
atomic_read(&rdma->sc_dma_used));
- /* De-allocate fastreg mr */
+ /* Final put of backchannel client transport */
+ if (xprt->xpt_bc_xprt) {
+ xprt_put(xprt->xpt_bc_xprt);
+ xprt->xpt_bc_xprt = NULL;
+ }
+
rdma_dealloc_frmr_q(rdma);
+ svc_rdma_destroy_ctxts(rdma);
+ svc_rdma_destroy_maps(rdma);
/* Destroy the QP if present (not a listener) */
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
@@ -1167,9 +1324,6 @@ static void __svc_rdma_free(struct work_struct *work)
if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq))
ib_destroy_cq(rdma->sc_rq_cq);
- if (rdma->sc_phys_mr && !IS_ERR(rdma->sc_phys_mr))
- ib_dereg_mr(rdma->sc_phys_mr);
-
if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
ib_dealloc_pd(rdma->sc_pd);
@@ -1275,7 +1429,9 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
int length;
int ret;
- p = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
+ p = alloc_page(GFP_KERNEL);
+ if (!p)
+ return;
va = page_address(p);
/* XDR encode error */
@@ -1295,7 +1451,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
return;
}
atomic_inc(&xprt->sc_dma_used);
- ctxt->sge[0].lkey = xprt->sc_dma_lkey;
+ ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey;
ctxt->sge[0].length = length;
/* Prepare SEND WR */
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 41e452bc580c..b1b009f10ea3 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -63,7 +63,7 @@
*/
static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
-static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
+unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_inline_write_padding;
static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
@@ -143,12 +143,7 @@ static struct ctl_table sunrpc_table[] = {
#endif
-#define RPCRDMA_BIND_TO (60U * HZ)
-#define RPCRDMA_INIT_REEST_TO (5U * HZ)
-#define RPCRDMA_MAX_REEST_TO (30U * HZ)
-#define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ)
-
-static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */
+static struct rpc_xprt_ops xprt_rdma_procs; /*forward reference */
static void
xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap)
@@ -174,7 +169,7 @@ xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap)
xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6;
}
-static void
+void
xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap)
{
char buf[128];
@@ -203,7 +198,7 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap)
xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
}
-static void
+void
xprt_rdma_free_addresses(struct rpc_xprt *xprt)
{
unsigned int i;
@@ -499,7 +494,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
if (req == NULL)
return NULL;
- flags = GFP_NOIO | __GFP_NOWARN;
+ flags = RPCRDMA_DEF_GFP;
if (RPC_IS_SWAPPER(task))
flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
@@ -576,6 +571,9 @@ xprt_rdma_free(void *buffer)
rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]);
req = rb->rg_owner;
+ if (req->rl_backchannel)
+ return;
+
r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf);
dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
@@ -639,7 +637,7 @@ drop_connection:
return -ENOTCONN; /* implies disconnect */
}
-static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
long idle_time = 0;
@@ -676,7 +674,7 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
static int
xprt_rdma_enable_swap(struct rpc_xprt *xprt)
{
- return -EINVAL;
+ return 0;
}
static void
@@ -705,7 +703,13 @@ static struct rpc_xprt_ops xprt_rdma_procs = {
.print_stats = xprt_rdma_print_stats,
.enable_swap = xprt_rdma_enable_swap,
.disable_swap = xprt_rdma_disable_swap,
- .inject_disconnect = xprt_rdma_inject_disconnect
+ .inject_disconnect = xprt_rdma_inject_disconnect,
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+ .bc_setup = xprt_rdma_bc_setup,
+ .bc_up = xprt_rdma_bc_up,
+ .bc_free_rqst = xprt_rdma_bc_free_rqst,
+ .bc_destroy = xprt_rdma_bc_destroy,
+#endif
};
static struct xprt_class xprt_rdma = {
@@ -732,7 +736,13 @@ void xprt_rdma_cleanup(void)
dprintk("RPC: %s: xprt_unregister returned %i\n",
__func__, rc);
+ rpcrdma_destroy_wq();
frwr_destroy_recovery_wq();
+
+ rc = xprt_unregister_transport(&xprt_rdma_bc);
+ if (rc)
+ dprintk("RPC: %s: xprt_unregister(bc) returned %i\n",
+ __func__, rc);
}
int xprt_rdma_init(void)
@@ -743,8 +753,23 @@ int xprt_rdma_init(void)
if (rc)
return rc;
+ rc = rpcrdma_alloc_wq();
+ if (rc) {
+ frwr_destroy_recovery_wq();
+ return rc;
+ }
+
rc = xprt_register_transport(&xprt_rdma);
if (rc) {
+ rpcrdma_destroy_wq();
+ frwr_destroy_recovery_wq();
+ return rc;
+ }
+
+ rc = xprt_register_transport(&xprt_rdma_bc);
+ if (rc) {
+ xprt_unregister_transport(&xprt_rdma);
+ rpcrdma_destroy_wq();
frwr_destroy_recovery_wq();
return rc;
}
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 5502d4dade74..878f1bfb1db9 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -68,47 +68,33 @@
* internal functions
*/
-/*
- * handle replies in tasklet context, using a single, global list
- * rdma tasklet function -- just turn around and call the func
- * for all replies on the list
- */
-
-static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
-static LIST_HEAD(rpcrdma_tasklets_g);
+static struct workqueue_struct *rpcrdma_receive_wq;
-static void
-rpcrdma_run_tasklet(unsigned long data)
+int
+rpcrdma_alloc_wq(void)
{
- struct rpcrdma_rep *rep;
- unsigned long flags;
-
- data = data;
- spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
- while (!list_empty(&rpcrdma_tasklets_g)) {
- rep = list_entry(rpcrdma_tasklets_g.next,
- struct rpcrdma_rep, rr_list);
- list_del(&rep->rr_list);
- spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
+ struct workqueue_struct *recv_wq;
- rpcrdma_reply_handler(rep);
+ recv_wq = alloc_workqueue("xprtrdma_receive",
+ WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_HIGHPRI,
+ 0);
+ if (!recv_wq)
+ return -ENOMEM;
- spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
- }
- spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
+ rpcrdma_receive_wq = recv_wq;
+ return 0;
}
-static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
-
-static void
-rpcrdma_schedule_tasklet(struct list_head *sched_list)
+void
+rpcrdma_destroy_wq(void)
{
- unsigned long flags;
+ struct workqueue_struct *wq;
- spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
- list_splice_tail(sched_list, &rpcrdma_tasklets_g);
- spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
- tasklet_schedule(&rpcrdma_tasklet_g);
+ if (rpcrdma_receive_wq) {
+ wq = rpcrdma_receive_wq;
+ rpcrdma_receive_wq = NULL;
+ destroy_workqueue(wq);
+ }
}
static void
@@ -158,63 +144,54 @@ rpcrdma_sendcq_process_wc(struct ib_wc *wc)
}
}
-static int
-rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
+/* The common case is a single send completion is waiting. By
+ * passing two WC entries to ib_poll_cq, a return code of 1
+ * means there is exactly one WC waiting and no more. We don't
+ * have to invoke ib_poll_cq again to know that the CQ has been
+ * properly drained.
+ */
+static void
+rpcrdma_sendcq_poll(struct ib_cq *cq)
{
- struct ib_wc *wcs;
- int budget, count, rc;
+ struct ib_wc *pos, wcs[2];
+ int count, rc;
- budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
do {
- wcs = ep->rep_send_wcs;
+ pos = wcs;
- rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
- if (rc <= 0)
- return rc;
+ rc = ib_poll_cq(cq, ARRAY_SIZE(wcs), pos);
+ if (rc < 0)
+ break;
count = rc;
while (count-- > 0)
- rpcrdma_sendcq_process_wc(wcs++);
- } while (rc == RPCRDMA_POLLSIZE && --budget);
- return 0;
+ rpcrdma_sendcq_process_wc(pos++);
+ } while (rc == ARRAY_SIZE(wcs));
+ return;
}
-/*
- * Handle send, fast_reg_mr, and local_inv completions.
- *
- * Send events are typically suppressed and thus do not result
- * in an upcall. Occasionally one is signaled, however. This
- * prevents the provider's completion queue from wrapping and
- * losing a completion.
+/* Handle provider send completion upcalls.
*/
static void
rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
{
- struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
- int rc;
-
- rc = rpcrdma_sendcq_poll(cq, ep);
- if (rc) {
- dprintk("RPC: %s: ib_poll_cq failed: %i\n",
- __func__, rc);
- return;
- }
+ do {
+ rpcrdma_sendcq_poll(cq);
+ } while (ib_req_notify_cq(cq, IB_CQ_NEXT_COMP |
+ IB_CQ_REPORT_MISSED_EVENTS) > 0);
+}
- rc = ib_req_notify_cq(cq,
- IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
- if (rc == 0)
- return;
- if (rc < 0) {
- dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
- __func__, rc);
- return;
- }
+static void
+rpcrdma_receive_worker(struct work_struct *work)
+{
+ struct rpcrdma_rep *rep =
+ container_of(work, struct rpcrdma_rep, rr_work);
- rpcrdma_sendcq_poll(cq, ep);
+ rpcrdma_reply_handler(rep);
}
static void
-rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
+rpcrdma_recvcq_process_wc(struct ib_wc *wc)
{
struct rpcrdma_rep *rep =
(struct rpcrdma_rep *)(unsigned long)wc->wr_id;
@@ -237,91 +214,60 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
prefetch(rdmab_to_msg(rep->rr_rdmabuf));
out_schedule:
- list_add_tail(&rep->rr_list, sched_list);
+ queue_work(rpcrdma_receive_wq, &rep->rr_work);
return;
+
out_fail:
if (wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("RPC: %s: rep %p: %s\n",
__func__, rep, ib_wc_status_msg(wc->status));
- rep->rr_len = ~0U;
+ rep->rr_len = RPCRDMA_BAD_LEN;
goto out_schedule;
}
-static int
-rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
+/* The wc array is on stack: automatic memory is always CPU-local.
+ *
+ * struct ib_wc is 64 bytes, making the poll array potentially
+ * large. But this is at the bottom of the call chain. Further
+ * substantial work is done in another thread.
+ */
+static void
+rpcrdma_recvcq_poll(struct ib_cq *cq)
{
- struct list_head sched_list;
- struct ib_wc *wcs;
- int budget, count, rc;
+ struct ib_wc *pos, wcs[4];
+ int count, rc;
- INIT_LIST_HEAD(&sched_list);
- budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
do {
- wcs = ep->rep_recv_wcs;
+ pos = wcs;
- rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
- if (rc <= 0)
- goto out_schedule;
+ rc = ib_poll_cq(cq, ARRAY_SIZE(wcs), pos);
+ if (rc < 0)
+ break;
count = rc;
while (count-- > 0)
- rpcrdma_recvcq_process_wc(wcs++, &sched_list);
- } while (rc == RPCRDMA_POLLSIZE && --budget);
- rc = 0;
-
-out_schedule:
- rpcrdma_schedule_tasklet(&sched_list);
- return rc;
+ rpcrdma_recvcq_process_wc(pos++);
+ } while (rc == ARRAY_SIZE(wcs));
}
-/*
- * Handle receive completions.
- *
- * It is reentrant but processes single events in order to maintain
- * ordering of receives to keep server credits.
- *
- * It is the responsibility of the scheduled tasklet to return
- * recv buffers to the pool. NOTE: this affects synchronization of
- * connection shutdown. That is, the structures required for
- * the completion of the reply handler must remain intact until
- * all memory has been reclaimed.
+/* Handle provider receive completion upcalls.
*/
static void
rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
{
- struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
- int rc;
-
- rc = rpcrdma_recvcq_poll(cq, ep);
- if (rc) {
- dprintk("RPC: %s: ib_poll_cq failed: %i\n",
- __func__, rc);
- return;
- }
-
- rc = ib_req_notify_cq(cq,
- IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
- if (rc == 0)
- return;
- if (rc < 0) {
- dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
- __func__, rc);
- return;
- }
-
- rpcrdma_recvcq_poll(cq, ep);
+ do {
+ rpcrdma_recvcq_poll(cq);
+ } while (ib_req_notify_cq(cq, IB_CQ_NEXT_COMP |
+ IB_CQ_REPORT_MISSED_EVENTS) > 0);
}
static void
rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
{
struct ib_wc wc;
- LIST_HEAD(sched_list);
while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
- rpcrdma_recvcq_process_wc(&wc, &sched_list);
- if (!list_empty(&sched_list))
- rpcrdma_schedule_tasklet(&sched_list);
+ rpcrdma_recvcq_process_wc(&wc);
while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
rpcrdma_sendcq_process_wc(&wc);
}
@@ -432,7 +378,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
init_completion(&ia->ri_done);
- id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
+ id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP,
+ IB_QPT_RC);
if (IS_ERR(id)) {
rc = PTR_ERR(id);
dprintk("RPC: %s: rdma_create_id() failed %i\n",
@@ -515,7 +462,6 @@ int
rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
{
struct rpcrdma_ia *ia = &xprt->rx_ia;
- struct ib_device_attr *devattr = &ia->ri_devattr;
int rc;
ia->ri_dma_mr = NULL;
@@ -535,16 +481,10 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
goto out2;
}
- rc = ib_query_device(ia->ri_device, devattr);
- if (rc) {
- dprintk("RPC: %s: ib_query_device failed %d\n",
- __func__, rc);
- goto out3;
- }
-
if (memreg == RPCRDMA_FRMR) {
- if (!(devattr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) ||
- (devattr->max_fast_reg_page_list_len == 0)) {
+ if (!(ia->ri_device->attrs.device_cap_flags &
+ IB_DEVICE_MEM_MGT_EXTENSIONS) ||
+ (ia->ri_device->attrs.max_fast_reg_page_list_len == 0)) {
dprintk("RPC: %s: FRMR registration "
"not supported by HCA\n", __func__);
memreg = RPCRDMA_MTHCAFMR;
@@ -619,29 +559,38 @@ int
rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
struct rpcrdma_create_data_internal *cdata)
{
- struct ib_device_attr *devattr = &ia->ri_devattr;
struct ib_cq *sendcq, *recvcq;
struct ib_cq_init_attr cq_attr = {};
+ unsigned int max_qp_wr;
int rc, err;
- if (devattr->max_sge < RPCRDMA_MAX_IOVS) {
+ if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_IOVS) {
dprintk("RPC: %s: insufficient sge's available\n",
__func__);
return -ENOMEM;
}
+ if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
+ dprintk("RPC: %s: insufficient wqe's available\n",
+ __func__);
+ return -ENOMEM;
+ }
+ max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS;
+
/* check provider's send/recv wr limits */
- if (cdata->max_requests > devattr->max_qp_wr)
- cdata->max_requests = devattr->max_qp_wr;
+ if (cdata->max_requests > max_qp_wr)
+ cdata->max_requests = max_qp_wr;
ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
ep->rep_attr.qp_context = ep;
ep->rep_attr.srq = NULL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests;
+ ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
rc = ia->ri_ops->ro_open(ia, ep, cdata);
if (rc)
return rc;
ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
+ ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS;
ep->rep_attr.cap.max_recv_sge = 1;
ep->rep_attr.cap.max_inline_data = 0;
@@ -659,17 +608,15 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
/* set trigger for requesting send completion */
ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
- if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS)
- ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS;
- else if (ep->rep_cqinit <= 2)
- ep->rep_cqinit = 0;
+ if (ep->rep_cqinit <= 2)
+ ep->rep_cqinit = 0; /* always signal? */
INIT_CQCOUNT(ep);
init_waitqueue_head(&ep->rep_connect_wait);
INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
cq_attr.cqe = ep->rep_attr.cap.max_send_wr + 1;
sendcq = ib_create_cq(ia->ri_device, rpcrdma_sendcq_upcall,
- rpcrdma_cq_async_error_upcall, ep, &cq_attr);
+ rpcrdma_cq_async_error_upcall, NULL, &cq_attr);
if (IS_ERR(sendcq)) {
rc = PTR_ERR(sendcq);
dprintk("RPC: %s: failed to create send CQ: %i\n",
@@ -686,7 +633,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
cq_attr.cqe = ep->rep_attr.cap.max_recv_wr + 1;
recvcq = ib_create_cq(ia->ri_device, rpcrdma_recvcq_upcall,
- rpcrdma_cq_async_error_upcall, ep, &cq_attr);
+ rpcrdma_cq_async_error_upcall, NULL, &cq_attr);
if (IS_ERR(recvcq)) {
rc = PTR_ERR(recvcq);
dprintk("RPC: %s: failed to create recv CQ: %i\n",
@@ -713,11 +660,11 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
/* Client offers RDMA Read but does not initiate */
ep->rep_remote_cma.initiator_depth = 0;
- if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */
+ if (ia->ri_device->attrs.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
ep->rep_remote_cma.responder_resources = 32;
else
ep->rep_remote_cma.responder_resources =
- devattr->max_qp_rd_atom;
+ ia->ri_device->attrs.max_qp_rd_atom;
ep->rep_remote_cma.retry_count = 7;
ep->rep_remote_cma.flow_control = 0;
@@ -885,7 +832,22 @@ retry:
}
rc = ep->rep_connected;
} else {
+ struct rpcrdma_xprt *r_xprt;
+ unsigned int extras;
+
dprintk("RPC: %s: connected\n", __func__);
+
+ r_xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
+ extras = r_xprt->rx_buf.rb_bc_srv_max_requests;
+
+ if (extras) {
+ rc = rpcrdma_ep_post_extra_recv(r_xprt, extras);
+ if (rc) {
+ pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n",
+ __func__, rc);
+ rc = 0;
+ }
+ }
}
out:
@@ -922,20 +884,25 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
}
}
-static struct rpcrdma_req *
+struct rpcrdma_req *
rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
{
+ struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
struct rpcrdma_req *req;
req = kzalloc(sizeof(*req), GFP_KERNEL);
if (req == NULL)
return ERR_PTR(-ENOMEM);
+ INIT_LIST_HEAD(&req->rl_free);
+ spin_lock(&buffer->rb_reqslock);
+ list_add(&req->rl_all, &buffer->rb_allreqs);
+ spin_unlock(&buffer->rb_reqslock);
req->rl_buffer = &r_xprt->rx_buf;
return req;
}
-static struct rpcrdma_rep *
+struct rpcrdma_rep *
rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
@@ -957,6 +924,7 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
rep->rr_device = ia->ri_device;
rep->rr_rxprt = r_xprt;
+ INIT_WORK(&rep->rr_work, rpcrdma_receive_worker);
return rep;
out_free:
@@ -970,44 +938,21 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
- char *p;
- size_t len;
int i, rc;
- buf->rb_max_requests = cdata->max_requests;
+ buf->rb_max_requests = r_xprt->rx_data.max_requests;
+ buf->rb_bc_srv_max_requests = 0;
spin_lock_init(&buf->rb_lock);
- /* Need to allocate:
- * 1. arrays for send and recv pointers
- * 2. arrays of struct rpcrdma_req to fill in pointers
- * 3. array of struct rpcrdma_rep for replies
- * Send/recv buffers in req/rep need to be registered
- */
- len = buf->rb_max_requests *
- (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
-
- p = kzalloc(len, GFP_KERNEL);
- if (p == NULL) {
- dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
- __func__, len);
- rc = -ENOMEM;
- goto out;
- }
- buf->rb_pool = p; /* for freeing it later */
-
- buf->rb_send_bufs = (struct rpcrdma_req **) p;
- p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
- buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
- p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
-
rc = ia->ri_ops->ro_init(r_xprt);
if (rc)
goto out;
+ INIT_LIST_HEAD(&buf->rb_send_bufs);
+ INIT_LIST_HEAD(&buf->rb_allreqs);
+ spin_lock_init(&buf->rb_reqslock);
for (i = 0; i < buf->rb_max_requests; i++) {
struct rpcrdma_req *req;
- struct rpcrdma_rep *rep;
req = rpcrdma_create_req(r_xprt);
if (IS_ERR(req)) {
@@ -1016,7 +961,13 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
rc = PTR_ERR(req);
goto out;
}
- buf->rb_send_bufs[i] = req;
+ req->rl_backchannel = false;
+ list_add(&req->rl_free, &buf->rb_send_bufs);
+ }
+
+ INIT_LIST_HEAD(&buf->rb_recv_bufs);
+ for (i = 0; i < buf->rb_max_requests + 2; i++) {
+ struct rpcrdma_rep *rep;
rep = rpcrdma_create_rep(r_xprt);
if (IS_ERR(rep)) {
@@ -1025,7 +976,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
rc = PTR_ERR(rep);
goto out;
}
- buf->rb_recv_bufs[i] = rep;
+ list_add(&rep->rr_list, &buf->rb_recv_bufs);
}
return 0;
@@ -1034,22 +985,38 @@ out:
return rc;
}
+static struct rpcrdma_req *
+rpcrdma_buffer_get_req_locked(struct rpcrdma_buffer *buf)
+{
+ struct rpcrdma_req *req;
+
+ req = list_first_entry(&buf->rb_send_bufs,
+ struct rpcrdma_req, rl_free);
+ list_del(&req->rl_free);
+ return req;
+}
+
+static struct rpcrdma_rep *
+rpcrdma_buffer_get_rep_locked(struct rpcrdma_buffer *buf)
+{
+ struct rpcrdma_rep *rep;
+
+ rep = list_first_entry(&buf->rb_recv_bufs,
+ struct rpcrdma_rep, rr_list);
+ list_del(&rep->rr_list);
+ return rep;
+}
+
static void
rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep)
{
- if (!rep)
- return;
-
rpcrdma_free_regbuf(ia, rep->rr_rdmabuf);
kfree(rep);
}
-static void
+void
rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
{
- if (!req)
- return;
-
rpcrdma_free_regbuf(ia, req->rl_sendbuf);
rpcrdma_free_regbuf(ia, req->rl_rdmabuf);
kfree(req);
@@ -1059,25 +1026,29 @@ void
rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
{
struct rpcrdma_ia *ia = rdmab_to_ia(buf);
- int i;
- /* clean up in reverse order from create
- * 1. recv mr memory (mr free, then kfree)
- * 2. send mr memory (mr free, then kfree)
- * 3. MWs
- */
- dprintk("RPC: %s: entering\n", __func__);
+ while (!list_empty(&buf->rb_recv_bufs)) {
+ struct rpcrdma_rep *rep;
- for (i = 0; i < buf->rb_max_requests; i++) {
- if (buf->rb_recv_bufs)
- rpcrdma_destroy_rep(ia, buf->rb_recv_bufs[i]);
- if (buf->rb_send_bufs)
- rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]);
+ rep = rpcrdma_buffer_get_rep_locked(buf);
+ rpcrdma_destroy_rep(ia, rep);
}
- ia->ri_ops->ro_destroy(buf);
+ spin_lock(&buf->rb_reqslock);
+ while (!list_empty(&buf->rb_allreqs)) {
+ struct rpcrdma_req *req;
+
+ req = list_first_entry(&buf->rb_allreqs,
+ struct rpcrdma_req, rl_all);
+ list_del(&req->rl_all);
+
+ spin_unlock(&buf->rb_reqslock);
+ rpcrdma_destroy_req(ia, req);
+ spin_lock(&buf->rb_reqslock);
+ }
+ spin_unlock(&buf->rb_reqslock);
- kfree(buf->rb_pool);
+ ia->ri_ops->ro_destroy(buf);
}
struct rpcrdma_mw *
@@ -1109,53 +1080,34 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
spin_unlock(&buf->rb_mwlock);
}
-static void
-rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
-{
- buf->rb_send_bufs[--buf->rb_send_index] = req;
- req->rl_niovs = 0;
- if (req->rl_reply) {
- buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
- req->rl_reply = NULL;
- }
-}
-
/*
* Get a set of request/reply buffers.
*
- * Reply buffer (if needed) is attached to send buffer upon return.
- * Rule:
- * rb_send_index and rb_recv_index MUST always be pointing to the
- * *next* available buffer (non-NULL). They are incremented after
- * removing buffers, and decremented *before* returning them.
+ * Reply buffer (if available) is attached to send buffer upon return.
*/
struct rpcrdma_req *
rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
{
struct rpcrdma_req *req;
- unsigned long flags;
-
- spin_lock_irqsave(&buffers->rb_lock, flags);
-
- if (buffers->rb_send_index == buffers->rb_max_requests) {
- spin_unlock_irqrestore(&buffers->rb_lock, flags);
- dprintk("RPC: %s: out of request buffers\n", __func__);
- return ((struct rpcrdma_req *)NULL);
- }
- req = buffers->rb_send_bufs[buffers->rb_send_index];
- if (buffers->rb_send_index < buffers->rb_recv_index) {
- dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
- __func__,
- buffers->rb_recv_index - buffers->rb_send_index);
- req->rl_reply = NULL;
- } else {
- req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
- buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
- }
- buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
+ spin_lock(&buffers->rb_lock);
+ if (list_empty(&buffers->rb_send_bufs))
+ goto out_reqbuf;
+ req = rpcrdma_buffer_get_req_locked(buffers);
+ if (list_empty(&buffers->rb_recv_bufs))
+ goto out_repbuf;
+ req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers);
+ spin_unlock(&buffers->rb_lock);
+ return req;
- spin_unlock_irqrestore(&buffers->rb_lock, flags);
+out_reqbuf:
+ spin_unlock(&buffers->rb_lock);
+ pr_warn("RPC: %s: out of request buffers\n", __func__);
+ return NULL;
+out_repbuf:
+ spin_unlock(&buffers->rb_lock);
+ pr_warn("RPC: %s: out of reply buffers\n", __func__);
+ req->rl_reply = NULL;
return req;
}
@@ -1167,30 +1119,31 @@ void
rpcrdma_buffer_put(struct rpcrdma_req *req)
{
struct rpcrdma_buffer *buffers = req->rl_buffer;
- unsigned long flags;
+ struct rpcrdma_rep *rep = req->rl_reply;
+
+ req->rl_niovs = 0;
+ req->rl_reply = NULL;
- spin_lock_irqsave(&buffers->rb_lock, flags);
- rpcrdma_buffer_put_sendbuf(req, buffers);
- spin_unlock_irqrestore(&buffers->rb_lock, flags);
+ spin_lock(&buffers->rb_lock);
+ list_add_tail(&req->rl_free, &buffers->rb_send_bufs);
+ if (rep)
+ list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
+ spin_unlock(&buffers->rb_lock);
}
/*
* Recover reply buffers from pool.
- * This happens when recovering from error conditions.
- * Post-increment counter/array index.
+ * This happens when recovering from disconnect.
*/
void
rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
{
struct rpcrdma_buffer *buffers = req->rl_buffer;
- unsigned long flags;
- spin_lock_irqsave(&buffers->rb_lock, flags);
- if (buffers->rb_recv_index < buffers->rb_max_requests) {
- req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
- buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
- }
- spin_unlock_irqrestore(&buffers->rb_lock, flags);
+ spin_lock(&buffers->rb_lock);
+ if (!list_empty(&buffers->rb_recv_bufs))
+ req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers);
+ spin_unlock(&buffers->rb_lock);
}
/*
@@ -1201,11 +1154,10 @@ void
rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
{
struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
- unsigned long flags;
- spin_lock_irqsave(&buffers->rb_lock, flags);
- buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
- spin_unlock_irqrestore(&buffers->rb_lock, flags);
+ spin_lock(&buffers->rb_lock);
+ list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
+ spin_unlock(&buffers->rb_lock);
}
/*
@@ -1362,6 +1314,46 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
return rc;
}
+/**
+ * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests
+ * @r_xprt: transport associated with these backchannel resources
+ * @min_reqs: minimum number of incoming requests expected
+ *
+ * Returns zero if all requested buffers were posted, or a negative errno.
+ */
+int
+rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
+{
+ struct rpcrdma_buffer *buffers = &r_xprt->rx_buf;
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+ struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+ struct rpcrdma_rep *rep;
+ int rc;
+
+ while (count--) {
+ spin_lock(&buffers->rb_lock);
+ if (list_empty(&buffers->rb_recv_bufs))
+ goto out_reqbuf;
+ rep = rpcrdma_buffer_get_rep_locked(buffers);
+ spin_unlock(&buffers->rb_lock);
+
+ rc = rpcrdma_ep_post_recv(ia, ep, rep);
+ if (rc)
+ goto out_rc;
+ }
+
+ return 0;
+
+out_reqbuf:
+ spin_unlock(&buffers->rb_lock);
+ pr_warn("%s: no extra receive buffers\n", __func__);
+ return -ENOMEM;
+
+out_rc:
+ rpcrdma_recv_buffer_put(rep);
+ return rc;
+}
+
/* How many chunk list items fit within our inline buffers?
*/
unsigned int
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index c09414e6f91b..38fe11b09875 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -55,6 +55,11 @@
#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
+#define RPCRDMA_BIND_TO (60U * HZ)
+#define RPCRDMA_INIT_REEST_TO (5U * HZ)
+#define RPCRDMA_MAX_REEST_TO (30U * HZ)
+#define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ)
+
/*
* Interface Adapter -- one per transport instance
*/
@@ -68,7 +73,6 @@ struct rpcrdma_ia {
struct completion ri_done;
int ri_async_rc;
unsigned int ri_max_frmr_depth;
- struct ib_device_attr ri_devattr;
struct ib_qp_attr ri_qp_attr;
struct ib_qp_init_attr ri_qp_init_attr;
};
@@ -77,9 +81,6 @@ struct rpcrdma_ia {
* RDMA Endpoint -- one per transport instance
*/
-#define RPCRDMA_WC_BUDGET (128)
-#define RPCRDMA_POLLSIZE (16)
-
struct rpcrdma_ep {
atomic_t rep_cqcount;
int rep_cqinit;
@@ -89,16 +90,8 @@ struct rpcrdma_ep {
struct rdma_conn_param rep_remote_cma;
struct sockaddr_storage rep_remote_addr;
struct delayed_work rep_connect_worker;
- struct ib_wc rep_send_wcs[RPCRDMA_POLLSIZE];
- struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE];
};
-/*
- * Force a signaled SEND Work Request every so often,
- * in case the provider needs to do some housekeeping.
- */
-#define RPCRDMA_MAX_UNSIGNALED_SENDS (32)
-
#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
@@ -106,6 +99,16 @@ struct rpcrdma_ep {
*/
#define RPCRDMA_IGNORE_COMPLETION (0ULL)
+/* Pre-allocate extra Work Requests for handling backward receives
+ * and sends. This is a fixed value because the Work Queues are
+ * allocated when the forward channel is set up.
+ */
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+#define RPCRDMA_BACKWARD_WRS (8)
+#else
+#define RPCRDMA_BACKWARD_WRS (0)
+#endif
+
/* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV
*
* The below structure appears at the front of a large region of kmalloc'd
@@ -143,6 +146,8 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
return (struct rpcrdma_msg *)rb->rg_base;
}
+#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN)
+
/*
* struct rpcrdma_rep -- this structure encapsulates state required to recv
* and complete a reply, asychronously. It needs several pieces of
@@ -169,10 +174,13 @@ struct rpcrdma_rep {
unsigned int rr_len;
struct ib_device *rr_device;
struct rpcrdma_xprt *rr_rxprt;
+ struct work_struct rr_work;
struct list_head rr_list;
struct rpcrdma_regbuf *rr_rdmabuf;
};
+#define RPCRDMA_BAD_LEN (~0U)
+
/*
* struct rpcrdma_mw - external memory region metadata
*
@@ -193,11 +201,18 @@ enum rpcrdma_frmr_state {
};
struct rpcrdma_frmr {
- struct ib_fast_reg_page_list *fr_pgl;
+ struct scatterlist *sg;
+ int sg_nents;
struct ib_mr *fr_mr;
enum rpcrdma_frmr_state fr_state;
struct work_struct fr_work;
struct rpcrdma_xprt *fr_xprt;
+ bool fr_waiter;
+ struct completion fr_linv_done;;
+ union {
+ struct ib_reg_wr fr_regwr;
+ struct ib_send_wr fr_invwr;
+ };
};
struct rpcrdma_fmr {
@@ -255,6 +270,7 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
#define RPCRDMA_MAX_IOVS (2)
struct rpcrdma_req {
+ struct list_head rl_free;
unsigned int rl_niovs;
unsigned int rl_nchunks;
unsigned int rl_connect_cookie;
@@ -264,6 +280,9 @@ struct rpcrdma_req {
struct rpcrdma_regbuf *rl_rdmabuf;
struct rpcrdma_regbuf *rl_sendbuf;
struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
+
+ struct list_head rl_all;
+ bool rl_backchannel;
};
static inline struct rpcrdma_req *
@@ -288,12 +307,16 @@ struct rpcrdma_buffer {
struct list_head rb_all;
char *rb_pool;
- spinlock_t rb_lock; /* protect buf arrays */
+ spinlock_t rb_lock; /* protect buf lists */
+ struct list_head rb_send_bufs;
+ struct list_head rb_recv_bufs;
u32 rb_max_requests;
- int rb_send_index;
- int rb_recv_index;
- struct rpcrdma_req **rb_send_bufs;
- struct rpcrdma_rep **rb_recv_bufs;
+
+ u32 rb_bc_srv_max_requests;
+ spinlock_t rb_reqslock; /* protect rb_allreqs */
+ struct list_head rb_allreqs;
+
+ u32 rb_bc_max_requests;
};
#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
@@ -339,6 +362,7 @@ struct rpcrdma_stats {
unsigned long failed_marshal_count;
unsigned long bad_reply_count;
unsigned long nomsg_call_count;
+ unsigned long bcall_count;
};
/*
@@ -348,6 +372,8 @@ struct rpcrdma_xprt;
struct rpcrdma_memreg_ops {
int (*ro_map)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *, int, bool);
+ void (*ro_unmap_sync)(struct rpcrdma_xprt *,
+ struct rpcrdma_req *);
int (*ro_unmap)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *);
int (*ro_open)(struct rpcrdma_ia *,
@@ -414,6 +440,9 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *,
/*
* Buffer calls - xprtrdma/verbs.c
*/
+struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
+struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
+void rpcrdma_destroy_req(struct rpcrdma_ia *, struct rpcrdma_req *);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
@@ -430,10 +459,14 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *,
struct rpcrdma_regbuf *);
unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
+int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int);
int frwr_alloc_recovery_wq(void);
void frwr_destroy_recovery_wq(void);
+int rpcrdma_alloc_wq(void);
+void rpcrdma_destroy_wq(void);
+
/*
* Wrappers for chunk registration, shared by read/write chunk code.
*/
@@ -491,14 +524,25 @@ int rpcrdma_marshal_req(struct rpc_rqst *);
/* RPC/RDMA module init - xprtrdma/transport.c
*/
+extern unsigned int xprt_rdma_max_inline_read;
+void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
+void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
+void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
int xprt_rdma_init(void);
void xprt_rdma_cleanup(void);
-/* Temporary NFS request map cache. Created in svc_rdma.c */
-extern struct kmem_cache *svc_rdma_map_cachep;
-/* WR context cache. Created in svc_rdma.c */
-extern struct kmem_cache *svc_rdma_ctxt_cachep;
-/* Workqueue created in svc_rdma.c */
-extern struct workqueue_struct *svc_rdma_wq;
+/* Backchannel calls - xprtrdma/backchannel.c
+ */
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
+int xprt_rdma_bc_up(struct svc_serv *, struct net *);
+int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
+void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
+int rpcrdma_bc_marshal_reply(struct rpc_rqst *);
+void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
+void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
+
+extern struct xprt_class xprt_rdma_bc;
#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 1a85e0ed0b48..fde2138b81e7 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -360,8 +360,10 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
int flags = XS_SENDMSG_FLAGS;
remainder -= len;
- if (remainder != 0 || more)
+ if (more)
flags |= MSG_MORE;
+ if (remainder != 0)
+ flags |= MSG_SENDPAGE_NOTLAST | MSG_MORE;
err = do_sendpage(sock, *ppage, base, len, flags);
if (remainder == 0 || err != len)
break;
@@ -396,7 +398,6 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
if (unlikely(!sock))
return -ENOTSOCK;
- clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
if (base != 0) {
addr = NULL;
addrlen = 0;
@@ -440,7 +441,6 @@ static void xs_nospace_callback(struct rpc_task *task)
struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt);
transport->inet->sk_write_pending--;
- clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
}
/**
@@ -465,20 +465,11 @@ static int xs_nospace(struct rpc_task *task)
/* Don't race with disconnect */
if (xprt_connected(xprt)) {
- if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
- /*
- * Notify TCP that we're limited by the application
- * window size
- */
- set_bit(SOCK_NOSPACE, &transport->sock->flags);
- sk->sk_write_pending++;
- /* ...and wait for more buffer space */
- xprt_wait_for_buffer_space(task, xs_nospace_callback);
- }
- } else {
- clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+ /* wait for more buffer space */
+ sk->sk_write_pending++;
+ xprt_wait_for_buffer_space(task, xs_nospace_callback);
+ } else
ret = -ENOTCONN;
- }
spin_unlock_bh(&xprt->transport_lock);
@@ -614,9 +605,6 @@ process_status:
case -EAGAIN:
status = xs_nospace(task);
break;
- default:
- dprintk("RPC: sendmsg returned unrecognized error %d\n",
- -status);
case -ENETUNREACH:
case -ENOBUFS:
case -EPIPE:
@@ -624,7 +612,10 @@ process_status:
case -EPERM:
/* When the server has died, an ICMP port unreachable message
* prompts ECONNREFUSED. */
- clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+ break;
+ default:
+ dprintk("RPC: sendmsg returned unrecognized error %d\n",
+ -status);
}
return status;
@@ -704,16 +695,16 @@ static int xs_tcp_send_request(struct rpc_task *task)
case -EAGAIN:
status = xs_nospace(task);
break;
- default:
- dprintk("RPC: sendmsg returned unrecognized error %d\n",
- -status);
case -ECONNRESET:
case -ECONNREFUSED:
case -ENOTCONN:
case -EADDRINUSE:
case -ENOBUFS:
case -EPIPE:
- clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+ break;
+ default:
+ dprintk("RPC: sendmsg returned unrecognized error %d\n",
+ -status);
}
return status;
@@ -823,6 +814,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
kernel_sock_shutdown(sock, SHUT_RDWR);
+ mutex_lock(&transport->recv_mutex);
write_lock_bh(&sk->sk_callback_lock);
transport->inet = NULL;
transport->sock = NULL;
@@ -833,6 +825,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
xprt_clear_connected(xprt);
write_unlock_bh(&sk->sk_callback_lock);
xs_sock_reset_connection_flags(xprt);
+ mutex_unlock(&transport->recv_mutex);
trace_rpc_socket_close(xprt, sock);
sock_release(sock);
@@ -886,6 +879,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
cancel_delayed_work_sync(&transport->connect_worker);
xs_close(xprt);
+ cancel_work_sync(&transport->recv_worker);
xs_xprt_free(xprt);
module_put(THIS_MODULE);
}
@@ -906,44 +900,36 @@ static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
}
/**
- * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets
- * @sk: socket with data to read
+ * xs_local_data_read_skb
+ * @xprt: transport
+ * @sk: socket
+ * @skb: skbuff
*
* Currently this assumes we can read the whole reply in a single gulp.
*/
-static void xs_local_data_ready(struct sock *sk)
+static void xs_local_data_read_skb(struct rpc_xprt *xprt,
+ struct sock *sk,
+ struct sk_buff *skb)
{
struct rpc_task *task;
- struct rpc_xprt *xprt;
struct rpc_rqst *rovr;
- struct sk_buff *skb;
- int err, repsize, copied;
+ int repsize, copied;
u32 _xid;
__be32 *xp;
- read_lock_bh(&sk->sk_callback_lock);
- dprintk("RPC: %s...\n", __func__);
- xprt = xprt_from_sock(sk);
- if (xprt == NULL)
- goto out;
-
- skb = skb_recv_datagram(sk, 0, 1, &err);
- if (skb == NULL)
- goto out;
-
repsize = skb->len - sizeof(rpc_fraghdr);
if (repsize < 4) {
dprintk("RPC: impossible RPC reply size %d\n", repsize);
- goto dropit;
+ return;
}
/* Copy the XID from the skb... */
xp = skb_header_pointer(skb, sizeof(rpc_fraghdr), sizeof(_xid), &_xid);
if (xp == NULL)
- goto dropit;
+ return;
/* Look up and lock the request corresponding to the given XID */
- spin_lock(&xprt->transport_lock);
+ spin_lock_bh(&xprt->transport_lock);
rovr = xprt_lookup_rqst(xprt, *xp);
if (!rovr)
goto out_unlock;
@@ -961,50 +947,68 @@ static void xs_local_data_ready(struct sock *sk)
xprt_complete_rqst(task, copied);
out_unlock:
- spin_unlock(&xprt->transport_lock);
- dropit:
- skb_free_datagram(sk, skb);
- out:
- read_unlock_bh(&sk->sk_callback_lock);
+ spin_unlock_bh(&xprt->transport_lock);
+}
+
+static void xs_local_data_receive(struct sock_xprt *transport)
+{
+ struct sk_buff *skb;
+ struct sock *sk;
+ int err;
+
+ mutex_lock(&transport->recv_mutex);
+ sk = transport->inet;
+ if (sk == NULL)
+ goto out;
+ for (;;) {
+ skb = skb_recv_datagram(sk, 0, 1, &err);
+ if (skb == NULL)
+ break;
+ xs_local_data_read_skb(&transport->xprt, sk, skb);
+ skb_free_datagram(sk, skb);
+ }
+out:
+ mutex_unlock(&transport->recv_mutex);
+}
+
+static void xs_local_data_receive_workfn(struct work_struct *work)
+{
+ struct sock_xprt *transport =
+ container_of(work, struct sock_xprt, recv_worker);
+ xs_local_data_receive(transport);
}
/**
- * xs_udp_data_ready - "data ready" callback for UDP sockets
- * @sk: socket with data to read
+ * xs_udp_data_read_skb - receive callback for UDP sockets
+ * @xprt: transport
+ * @sk: socket
+ * @skb: skbuff
*
*/
-static void xs_udp_data_ready(struct sock *sk)
+static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
+ struct sock *sk,
+ struct sk_buff *skb)
{
struct rpc_task *task;
- struct rpc_xprt *xprt;
struct rpc_rqst *rovr;
- struct sk_buff *skb;
- int err, repsize, copied;
+ int repsize, copied;
u32 _xid;
__be32 *xp;
- read_lock_bh(&sk->sk_callback_lock);
- dprintk("RPC: xs_udp_data_ready...\n");
- if (!(xprt = xprt_from_sock(sk)))
- goto out;
-
- if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL)
- goto out;
-
repsize = skb->len - sizeof(struct udphdr);
if (repsize < 4) {
dprintk("RPC: impossible RPC reply size %d!\n", repsize);
- goto dropit;
+ return;
}
/* Copy the XID from the skb... */
xp = skb_header_pointer(skb, sizeof(struct udphdr),
sizeof(_xid), &_xid);
if (xp == NULL)
- goto dropit;
+ return;
/* Look up and lock the request corresponding to the given XID */
- spin_lock(&xprt->transport_lock);
+ spin_lock_bh(&xprt->transport_lock);
rovr = xprt_lookup_rqst(xprt, *xp);
if (!rovr)
goto out_unlock;
@@ -1025,10 +1029,54 @@ static void xs_udp_data_ready(struct sock *sk)
xprt_complete_rqst(task, copied);
out_unlock:
- spin_unlock(&xprt->transport_lock);
- dropit:
- skb_free_datagram(sk, skb);
- out:
+ spin_unlock_bh(&xprt->transport_lock);
+}
+
+static void xs_udp_data_receive(struct sock_xprt *transport)
+{
+ struct sk_buff *skb;
+ struct sock *sk;
+ int err;
+
+ mutex_lock(&transport->recv_mutex);
+ sk = transport->inet;
+ if (sk == NULL)
+ goto out;
+ for (;;) {
+ skb = skb_recv_datagram(sk, 0, 1, &err);
+ if (skb == NULL)
+ break;
+ xs_udp_data_read_skb(&transport->xprt, sk, skb);
+ skb_free_datagram(sk, skb);
+ }
+out:
+ mutex_unlock(&transport->recv_mutex);
+}
+
+static void xs_udp_data_receive_workfn(struct work_struct *work)
+{
+ struct sock_xprt *transport =
+ container_of(work, struct sock_xprt, recv_worker);
+ xs_udp_data_receive(transport);
+}
+
+/**
+ * xs_data_ready - "data ready" callback for UDP sockets
+ * @sk: socket with data to read
+ *
+ */
+static void xs_data_ready(struct sock *sk)
+{
+ struct rpc_xprt *xprt;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ dprintk("RPC: xs_data_ready...\n");
+ xprt = xprt_from_sock(sk);
+ if (xprt != NULL) {
+ struct sock_xprt *transport = container_of(xprt,
+ struct sock_xprt, xprt);
+ queue_work(rpciod_workqueue, &transport->recv_worker);
+ }
read_unlock_bh(&sk->sk_callback_lock);
}
@@ -1243,12 +1291,12 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid));
/* Find and lock the request corresponding to this xid */
- spin_lock(&xprt->transport_lock);
+ spin_lock_bh(&xprt->transport_lock);
req = xprt_lookup_rqst(xprt, transport->tcp_xid);
if (!req) {
dprintk("RPC: XID %08x request not found!\n",
ntohl(transport->tcp_xid));
- spin_unlock(&xprt->transport_lock);
+ spin_unlock_bh(&xprt->transport_lock);
return -1;
}
@@ -1257,7 +1305,7 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
xprt_complete_rqst(req->rq_task, transport->tcp_copied);
- spin_unlock(&xprt->transport_lock);
+ spin_unlock_bh(&xprt->transport_lock);
return 0;
}
@@ -1277,10 +1325,10 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
struct rpc_rqst *req;
/* Look up and lock the request corresponding to the given XID */
- spin_lock(&xprt->transport_lock);
+ spin_lock_bh(&xprt->transport_lock);
req = xprt_lookup_bc_request(xprt, transport->tcp_xid);
if (req == NULL) {
- spin_unlock(&xprt->transport_lock);
+ spin_unlock_bh(&xprt->transport_lock);
printk(KERN_WARNING "Callback slot table overflowed\n");
xprt_force_disconnect(xprt);
return -1;
@@ -1291,7 +1339,7 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
xprt_complete_bc_request(req, transport->tcp_copied);
- spin_unlock(&xprt->transport_lock);
+ spin_unlock_bh(&xprt->transport_lock);
return 0;
}
@@ -1306,6 +1354,17 @@ static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
xs_tcp_read_reply(xprt, desc) :
xs_tcp_read_callback(xprt, desc);
}
+
+static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net)
+{
+ int ret;
+
+ ret = svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0,
+ SVC_SOCK_ANONYMOUS);
+ if (ret < 0)
+ return ret;
+ return 0;
+}
#else
static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
struct xdr_skb_reader *desc)
@@ -1391,6 +1450,44 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
return len - desc.count;
}
+static void xs_tcp_data_receive(struct sock_xprt *transport)
+{
+ struct rpc_xprt *xprt = &transport->xprt;
+ struct sock *sk;
+ read_descriptor_t rd_desc = {
+ .count = 2*1024*1024,
+ .arg.data = xprt,
+ };
+ unsigned long total = 0;
+ int read = 0;
+
+ mutex_lock(&transport->recv_mutex);
+ sk = transport->inet;
+ if (sk == NULL)
+ goto out;
+
+ /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
+ for (;;) {
+ lock_sock(sk);
+ read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
+ release_sock(sk);
+ if (read <= 0)
+ break;
+ total += read;
+ rd_desc.count = 65536;
+ }
+out:
+ mutex_unlock(&transport->recv_mutex);
+ trace_xs_tcp_data_ready(xprt, read, total);
+}
+
+static void xs_tcp_data_receive_workfn(struct work_struct *work)
+{
+ struct sock_xprt *transport =
+ container_of(work, struct sock_xprt, recv_worker);
+ xs_tcp_data_receive(transport);
+}
+
/**
* xs_tcp_data_ready - "data ready" callback for TCP sockets
* @sk: socket with data to read
@@ -1398,34 +1495,24 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
*/
static void xs_tcp_data_ready(struct sock *sk)
{
+ struct sock_xprt *transport;
struct rpc_xprt *xprt;
- read_descriptor_t rd_desc;
- int read;
- unsigned long total = 0;
dprintk("RPC: xs_tcp_data_ready...\n");
read_lock_bh(&sk->sk_callback_lock);
- if (!(xprt = xprt_from_sock(sk))) {
- read = 0;
+ if (!(xprt = xprt_from_sock(sk)))
goto out;
- }
+ transport = container_of(xprt, struct sock_xprt, xprt);
+
/* Any data means we had a useful conversation, so
* the we don't need to delay the next reconnect
*/
if (xprt->reestablish_timeout)
xprt->reestablish_timeout = 0;
+ queue_work(rpciod_workqueue, &transport->recv_worker);
- /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
- rd_desc.arg.data = xprt;
- do {
- rd_desc.count = 65536;
- read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
- if (read > 0)
- total += read;
- } while (read > 0);
out:
- trace_xs_tcp_data_ready(xprt, read, total);
read_unlock_bh(&sk->sk_callback_lock);
}
@@ -1511,19 +1598,23 @@ static void xs_tcp_state_change(struct sock *sk)
static void xs_write_space(struct sock *sk)
{
- struct socket *sock;
+ struct socket_wq *wq;
struct rpc_xprt *xprt;
- if (unlikely(!(sock = sk->sk_socket)))
+ if (!sk->sk_socket)
return;
- clear_bit(SOCK_NOSPACE, &sock->flags);
+ clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
if (unlikely(!(xprt = xprt_from_sock(sk))))
return;
- if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
- return;
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0)
+ goto out;
xprt_write_space(xprt);
+out:
+ rcu_read_unlock();
}
/**
@@ -1809,18 +1900,6 @@ static inline void xs_reclassify_socket(int family, struct socket *sock)
}
}
#else
-static inline void xs_reclassify_socketu(struct socket *sock)
-{
-}
-
-static inline void xs_reclassify_socket4(struct socket *sock)
-{
-}
-
-static inline void xs_reclassify_socket6(struct socket *sock)
-{
-}
-
static inline void xs_reclassify_socket(int family, struct socket *sock)
{
}
@@ -1873,7 +1952,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
xs_save_old_callbacks(transport, sk);
sk->sk_user_data = xprt;
- sk->sk_data_ready = xs_local_data_ready;
+ sk->sk_data_ready = xs_data_ready;
sk->sk_write_space = xs_udp_write_space;
sk->sk_error_report = xs_error_report;
sk->sk_allocation = GFP_NOIO;
@@ -1910,7 +1989,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
"transport socket (%d).\n", -status);
goto out;
}
- xs_reclassify_socketu(sock);
+ xs_reclassify_socket(AF_LOCAL, sock);
dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n",
xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
@@ -2059,7 +2138,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
xs_save_old_callbacks(transport, sk);
sk->sk_user_data = xprt;
- sk->sk_data_ready = xs_udp_data_ready;
+ sk->sk_data_ready = xs_data_ready;
sk->sk_write_space = xs_udp_write_space;
sk->sk_allocation = GFP_NOIO;
@@ -2472,7 +2551,7 @@ static int bc_send_request(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
struct svc_xprt *xprt;
- u32 len;
+ int len;
dprintk("sending request with xid: %08x\n", ntohl(req->rq_xid));
/*
@@ -2580,6 +2659,12 @@ static struct rpc_xprt_ops xs_tcp_ops = {
.enable_swap = xs_enable_swap,
.disable_swap = xs_disable_swap,
.inject_disconnect = xs_inject_disconnect,
+#ifdef CONFIG_SUNRPC_BACKCHANNEL
+ .bc_setup = xprt_setup_bc,
+ .bc_up = xs_tcp_bc_up,
+ .bc_free_rqst = xprt_free_bc_rqst,
+ .bc_destroy = xprt_destroy_bc,
+#endif
};
/*
@@ -2650,6 +2735,7 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
}
new = container_of(xprt, struct sock_xprt, xprt);
+ mutex_init(&new->recv_mutex);
memcpy(&xprt->addr, args->dstaddr, args->addrlen);
xprt->addrlen = args->addrlen;
if (args->srcaddr)
@@ -2703,6 +2789,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
xprt->ops = &xs_local_ops;
xprt->timeout = &xs_local_default_timeout;
+ INIT_WORK(&transport->recv_worker, xs_local_data_receive_workfn);
INIT_DELAYED_WORK(&transport->connect_worker,
xs_dummy_setup_socket);
@@ -2774,21 +2861,20 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
xprt->timeout = &xs_udp_default_timeout;
+ INIT_WORK(&transport->recv_worker, xs_udp_data_receive_workfn);
+ INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_setup_socket);
+
switch (addr->sa_family) {
case AF_INET:
if (((struct sockaddr_in *)addr)->sin_port != htons(0))
xprt_set_bound(xprt);
- INIT_DELAYED_WORK(&transport->connect_worker,
- xs_udp_setup_socket);
xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP);
break;
case AF_INET6:
if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
xprt_set_bound(xprt);
- INIT_DELAYED_WORK(&transport->connect_worker,
- xs_udp_setup_socket);
xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
break;
default:
@@ -2853,21 +2939,20 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
xprt->ops = &xs_tcp_ops;
xprt->timeout = &xs_tcp_default_timeout;
+ INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn);
+ INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
+
switch (addr->sa_family) {
case AF_INET:
if (((struct sockaddr_in *)addr)->sin_port != htons(0))
xprt_set_bound(xprt);
- INIT_DELAYED_WORK(&transport->connect_worker,
- xs_tcp_setup_socket);
xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
break;
case AF_INET6:
if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
xprt_set_bound(xprt);
- INIT_DELAYED_WORK(&transport->connect_worker,
- xs_tcp_setup_socket);
xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
break;
default:
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index f34e535e93bd..8b5833c1ff2e 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -20,6 +20,7 @@
#include <linux/list.h>
#include <linux/workqueue.h>
#include <linux/if_vlan.h>
+#include <linux/rtnetlink.h>
#include <net/ip_fib.h>
#include <net/switchdev.h>
@@ -345,6 +346,8 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj)
return sizeof(struct switchdev_obj_ipv4_fib);
case SWITCHDEV_OBJ_ID_PORT_FDB:
return sizeof(struct switchdev_obj_port_fdb);
+ case SWITCHDEV_OBJ_ID_PORT_MDB:
+ return sizeof(struct switchdev_obj_port_mdb);
default:
BUG();
}
@@ -565,7 +568,6 @@ int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
}
EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
-static DEFINE_MUTEX(switchdev_mutex);
static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
/**
@@ -580,9 +582,9 @@ int register_switchdev_notifier(struct notifier_block *nb)
{
int err;
- mutex_lock(&switchdev_mutex);
+ rtnl_lock();
err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
- mutex_unlock(&switchdev_mutex);
+ rtnl_unlock();
return err;
}
EXPORT_SYMBOL_GPL(register_switchdev_notifier);
@@ -598,9 +600,9 @@ int unregister_switchdev_notifier(struct notifier_block *nb)
{
int err;
- mutex_lock(&switchdev_mutex);
+ rtnl_lock();
err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
- mutex_unlock(&switchdev_mutex);
+ rtnl_unlock();
return err;
}
EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
@@ -614,16 +616,17 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
* Call all network notifier blocks. This should be called by driver
* when it needs to propagate hardware event.
* Return values are same as for atomic_notifier_call_chain().
+ * rtnl_lock must be held.
*/
int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
struct switchdev_notifier_info *info)
{
int err;
+ ASSERT_RTNL();
+
info->dev = dev;
- mutex_lock(&switchdev_mutex);
err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
- mutex_unlock(&switchdev_mutex);
return err;
}
EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
@@ -723,6 +726,7 @@ static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
u32 filter_mask)
{
struct switchdev_vlan_dump dump = {
+ .vlan.obj.orig_dev = dev,
.vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
.skb = skb,
.filter_mask = filter_mask,
@@ -757,6 +761,7 @@ int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
int nlflags)
{
struct switchdev_attr attr = {
+ .orig_dev = dev,
.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
};
u16 mode = BRIDGE_MODE_UNDEF;
@@ -778,6 +783,7 @@ static int switchdev_port_br_setflag(struct net_device *dev,
unsigned long brport_flag)
{
struct switchdev_attr attr = {
+ .orig_dev = dev,
.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
};
u8 flag = nla_get_u8(nlattr);
@@ -853,6 +859,7 @@ static int switchdev_port_br_afspec(struct net_device *dev,
struct nlattr *attr;
struct bridge_vlan_info *vinfo;
struct switchdev_obj_port_vlan vlan = {
+ .obj.orig_dev = dev,
.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
};
int rem;
@@ -975,6 +982,7 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
u16 vid, u16 nlm_flags)
{
struct switchdev_obj_port_fdb fdb = {
+ .obj.orig_dev = dev,
.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
.vid = vid,
};
@@ -1000,6 +1008,7 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
u16 vid)
{
struct switchdev_obj_port_fdb fdb = {
+ .obj.orig_dev = dev,
.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
.vid = vid,
};
@@ -1077,14 +1086,18 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *filter_dev, int idx)
{
struct switchdev_fdb_dump dump = {
+ .fdb.obj.orig_dev = dev,
.fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
.dev = dev,
.skb = skb,
.cb = cb,
.idx = idx,
};
+ int err;
- switchdev_port_obj_dump(dev, &dump.fdb.obj, switchdev_port_fdb_dump_cb);
+ err = switchdev_port_obj_dump(dev, &dump.fdb.obj,
+ switchdev_port_fdb_dump_cb);
+ cb->args[1] = err;
return dump.idx;
}
EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
@@ -1135,6 +1148,7 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
if (!dev)
return NULL;
+ attr.orig_dev = dev;
if (switchdev_port_attr_get(dev, &attr))
return NULL;
@@ -1194,6 +1208,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
if (!dev)
return 0;
+ ipv4_fib.obj.orig_dev = dev;
err = switchdev_port_obj_add(dev, &ipv4_fib.obj);
if (!err)
fi->fib_flags |= RTNH_F_OFFLOAD;
@@ -1238,6 +1253,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
if (!dev)
return 0;
+ ipv4_fib.obj.orig_dev = dev;
err = switchdev_port_obj_del(dev, &ipv4_fib.obj);
if (!err)
fi->fib_flags &= ~RTNH_F_OFFLOAD;
@@ -1270,10 +1286,12 @@ static bool switchdev_port_same_parent_id(struct net_device *a,
struct net_device *b)
{
struct switchdev_attr a_attr = {
+ .orig_dev = a,
.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
.flags = SWITCHDEV_F_NO_RECURSE,
};
struct switchdev_attr b_attr = {
+ .orig_dev = b,
.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
.flags = SWITCHDEV_F_NO_RECURSE,
};
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 9dc239dfe192..e401108360a2 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -332,131 +332,15 @@ void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_l)
tipc_sk_rcv(net, inputq);
}
-static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb,
- struct tipc_stats *stats)
-{
- int i;
- struct nlattr *nest;
-
- struct nla_map {
- __u32 key;
- __u32 val;
- };
-
- struct nla_map map[] = {
- {TIPC_NLA_STATS_RX_INFO, stats->recv_info},
- {TIPC_NLA_STATS_RX_FRAGMENTS, stats->recv_fragments},
- {TIPC_NLA_STATS_RX_FRAGMENTED, stats->recv_fragmented},
- {TIPC_NLA_STATS_RX_BUNDLES, stats->recv_bundles},
- {TIPC_NLA_STATS_RX_BUNDLED, stats->recv_bundled},
- {TIPC_NLA_STATS_TX_INFO, stats->sent_info},
- {TIPC_NLA_STATS_TX_FRAGMENTS, stats->sent_fragments},
- {TIPC_NLA_STATS_TX_FRAGMENTED, stats->sent_fragmented},
- {TIPC_NLA_STATS_TX_BUNDLES, stats->sent_bundles},
- {TIPC_NLA_STATS_TX_BUNDLED, stats->sent_bundled},
- {TIPC_NLA_STATS_RX_NACKS, stats->recv_nacks},
- {TIPC_NLA_STATS_RX_DEFERRED, stats->deferred_recv},
- {TIPC_NLA_STATS_TX_NACKS, stats->sent_nacks},
- {TIPC_NLA_STATS_TX_ACKS, stats->sent_acks},
- {TIPC_NLA_STATS_RETRANSMITTED, stats->retransmitted},
- {TIPC_NLA_STATS_DUPLICATES, stats->duplicates},
- {TIPC_NLA_STATS_LINK_CONGS, stats->link_congs},
- {TIPC_NLA_STATS_MAX_QUEUE, stats->max_queue_sz},
- {TIPC_NLA_STATS_AVG_QUEUE, stats->queue_sz_counts ?
- (stats->accu_queue_sz / stats->queue_sz_counts) : 0}
- };
-
- nest = nla_nest_start(skb, TIPC_NLA_LINK_STATS);
- if (!nest)
- return -EMSGSIZE;
-
- for (i = 0; i < ARRAY_SIZE(map); i++)
- if (nla_put_u32(skb, map[i].key, map[i].val))
- goto msg_full;
-
- nla_nest_end(skb, nest);
-
- return 0;
-msg_full:
- nla_nest_cancel(skb, nest);
-
- return -EMSGSIZE;
-}
-
-int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
-{
- int err;
- void *hdr;
- struct nlattr *attrs;
- struct nlattr *prop;
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_link *bcl = tn->bcl;
-
- if (!bcl)
- return 0;
-
- tipc_bcast_lock(net);
-
- hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
- NLM_F_MULTI, TIPC_NL_LINK_GET);
- if (!hdr)
- return -EMSGSIZE;
-
- attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK);
- if (!attrs)
- goto msg_full;
-
- /* The broadcast link is always up */
- if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP))
- goto attr_msg_full;
-
- if (nla_put_flag(msg->skb, TIPC_NLA_LINK_BROADCAST))
- goto attr_msg_full;
- if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name))
- goto attr_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, bcl->rcv_nxt))
- goto attr_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, bcl->snd_nxt))
- goto attr_msg_full;
-
- prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP);
- if (!prop)
- goto attr_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window))
- goto prop_msg_full;
- nla_nest_end(msg->skb, prop);
-
- err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats);
- if (err)
- goto attr_msg_full;
-
- tipc_bcast_unlock(net);
- nla_nest_end(msg->skb, attrs);
- genlmsg_end(msg->skb, hdr);
-
- return 0;
-
-prop_msg_full:
- nla_nest_cancel(msg->skb, prop);
-attr_msg_full:
- nla_nest_cancel(msg->skb, attrs);
-msg_full:
- tipc_bcast_unlock(net);
- genlmsg_cancel(msg->skb, hdr);
-
- return -EMSGSIZE;
-}
-
int tipc_bclink_reset_stats(struct net *net)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_link *bcl = tn->bcl;
+ struct tipc_link *l = tipc_bc_sndlink(net);
- if (!bcl)
+ if (!l)
return -ENOPROTOOPT;
tipc_bcast_lock(net);
- memset(&bcl->stats, 0, sizeof(bcl->stats));
+ tipc_link_reset_stats(l);
tipc_bcast_unlock(net);
return 0;
}
@@ -530,9 +414,7 @@ enomem:
void tipc_bcast_reinit(struct net *net)
{
- struct tipc_bc_base *b = tipc_bc_base(net);
-
- msg_set_prevnode(b->link->pmsg, tipc_own_addr(net));
+ tipc_link_reinit(tipc_bc_sndlink(net), tipc_own_addr(net));
}
void tipc_bcast_stop(struct net *net)
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 2855b9356a15..1944c6c00bb9 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -43,6 +43,7 @@ struct tipc_node;
struct tipc_msg;
struct tipc_nl_msg;
struct tipc_node_map;
+extern const char tipc_bclink_name[];
int tipc_bcast_init(struct net *net);
void tipc_bcast_reinit(struct net *net);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 648f2a67f314..802ffad3200d 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -71,7 +71,7 @@ static const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = {
[TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED }
};
-static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr);
+static void bearer_disable(struct net *net, struct tipc_bearer *b);
/**
* tipc_media_find - locates specified media object by name
@@ -107,13 +107,13 @@ static struct tipc_media *media_find_id(u8 type)
void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a)
{
char addr_str[MAX_ADDR_STR];
- struct tipc_media *m_ptr;
+ struct tipc_media *m;
int ret;
- m_ptr = media_find_id(a->media_id);
+ m = media_find_id(a->media_id);
- if (m_ptr && !m_ptr->addr2str(a, addr_str, sizeof(addr_str)))
- ret = scnprintf(buf, len, "%s(%s)", m_ptr->name, addr_str);
+ if (m && !m->addr2str(a, addr_str, sizeof(addr_str)))
+ ret = scnprintf(buf, len, "%s(%s)", m->name, addr_str);
else {
u32 i;
@@ -175,13 +175,13 @@ static int bearer_name_validate(const char *name,
struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_bearer *b_ptr;
+ struct tipc_bearer *b;
u32 i;
for (i = 0; i < MAX_BEARERS; i++) {
- b_ptr = rtnl_dereference(tn->bearer_list[i]);
- if (b_ptr && (!strcmp(b_ptr->name, name)))
- return b_ptr;
+ b = rtnl_dereference(tn->bearer_list[i]);
+ if (b && (!strcmp(b->name, name)))
+ return b;
}
return NULL;
}
@@ -189,24 +189,24 @@ struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name)
void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_bearer *b_ptr;
+ struct tipc_bearer *b;
rcu_read_lock();
- b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
- if (b_ptr)
- tipc_disc_add_dest(b_ptr->link_req);
+ b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
+ if (b)
+ tipc_disc_add_dest(b->link_req);
rcu_read_unlock();
}
void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_bearer *b_ptr;
+ struct tipc_bearer *b;
rcu_read_lock();
- b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
- if (b_ptr)
- tipc_disc_remove_dest(b_ptr->link_req);
+ b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
+ if (b)
+ tipc_disc_remove_dest(b->link_req);
rcu_read_unlock();
}
@@ -218,8 +218,8 @@ static int tipc_enable_bearer(struct net *net, const char *name,
struct nlattr *attr[])
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_bearer *b_ptr;
- struct tipc_media *m_ptr;
+ struct tipc_bearer *b;
+ struct tipc_media *m;
struct tipc_bearer_names b_names;
char addr_string[16];
u32 bearer_id;
@@ -255,31 +255,31 @@ static int tipc_enable_bearer(struct net *net, const char *name,
return -EINVAL;
}
- m_ptr = tipc_media_find(b_names.media_name);
- if (!m_ptr) {
+ m = tipc_media_find(b_names.media_name);
+ if (!m) {
pr_warn("Bearer <%s> rejected, media <%s> not registered\n",
name, b_names.media_name);
return -EINVAL;
}
if (priority == TIPC_MEDIA_LINK_PRI)
- priority = m_ptr->priority;
+ priority = m->priority;
restart:
bearer_id = MAX_BEARERS;
with_this_prio = 1;
for (i = MAX_BEARERS; i-- != 0; ) {
- b_ptr = rtnl_dereference(tn->bearer_list[i]);
- if (!b_ptr) {
+ b = rtnl_dereference(tn->bearer_list[i]);
+ if (!b) {
bearer_id = i;
continue;
}
- if (!strcmp(name, b_ptr->name)) {
+ if (!strcmp(name, b->name)) {
pr_warn("Bearer <%s> rejected, already enabled\n",
name);
return -EINVAL;
}
- if ((b_ptr->priority == priority) &&
+ if ((b->priority == priority) &&
(++with_this_prio > 2)) {
if (priority-- == 0) {
pr_warn("Bearer <%s> rejected, duplicate priority\n",
@@ -297,35 +297,35 @@ restart:
return -EINVAL;
}
- b_ptr = kzalloc(sizeof(*b_ptr), GFP_ATOMIC);
- if (!b_ptr)
+ b = kzalloc(sizeof(*b), GFP_ATOMIC);
+ if (!b)
return -ENOMEM;
- strcpy(b_ptr->name, name);
- b_ptr->media = m_ptr;
- res = m_ptr->enable_media(net, b_ptr, attr);
+ strcpy(b->name, name);
+ b->media = m;
+ res = m->enable_media(net, b, attr);
if (res) {
pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
name, -res);
return -EINVAL;
}
- b_ptr->identity = bearer_id;
- b_ptr->tolerance = m_ptr->tolerance;
- b_ptr->window = m_ptr->window;
- b_ptr->domain = disc_domain;
- b_ptr->net_plane = bearer_id + 'A';
- b_ptr->priority = priority;
+ b->identity = bearer_id;
+ b->tolerance = m->tolerance;
+ b->window = m->window;
+ b->domain = disc_domain;
+ b->net_plane = bearer_id + 'A';
+ b->priority = priority;
- res = tipc_disc_create(net, b_ptr, &b_ptr->bcast_addr);
+ res = tipc_disc_create(net, b, &b->bcast_addr);
if (res) {
- bearer_disable(net, b_ptr);
+ bearer_disable(net, b);
pr_warn("Bearer <%s> rejected, discovery object creation failed\n",
name);
return -EINVAL;
}
- rcu_assign_pointer(tn->bearer_list[bearer_id], b_ptr);
+ rcu_assign_pointer(tn->bearer_list[bearer_id], b);
pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
name,
@@ -336,11 +336,11 @@ restart:
/**
* tipc_reset_bearer - Reset all links established over this bearer
*/
-static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr)
+static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b)
{
- pr_info("Resetting bearer <%s>\n", b_ptr->name);
- tipc_node_delete_links(net, b_ptr->identity);
- tipc_disc_reset(net, b_ptr);
+ pr_info("Resetting bearer <%s>\n", b->name);
+ tipc_node_delete_links(net, b->identity);
+ tipc_disc_reset(net, b);
return 0;
}
@@ -349,26 +349,26 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr)
*
* Note: This routine assumes caller holds RTNL lock.
*/
-static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr)
+static void bearer_disable(struct net *net, struct tipc_bearer *b)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
u32 i;
- pr_info("Disabling bearer <%s>\n", b_ptr->name);
- b_ptr->media->disable_media(b_ptr);
+ pr_info("Disabling bearer <%s>\n", b->name);
+ b->media->disable_media(b);
- tipc_node_delete_links(net, b_ptr->identity);
- RCU_INIT_POINTER(b_ptr->media_ptr, NULL);
- if (b_ptr->link_req)
- tipc_disc_delete(b_ptr->link_req);
+ tipc_node_delete_links(net, b->identity);
+ RCU_INIT_POINTER(b->media_ptr, NULL);
+ if (b->link_req)
+ tipc_disc_delete(b->link_req);
for (i = 0; i < MAX_BEARERS; i++) {
- if (b_ptr == rtnl_dereference(tn->bearer_list[i])) {
+ if (b == rtnl_dereference(tn->bearer_list[i])) {
RCU_INIT_POINTER(tn->bearer_list[i], NULL);
break;
}
}
- kfree_rcu(b_ptr, rcu);
+ kfree_rcu(b, rcu);
}
int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
@@ -411,7 +411,7 @@ void tipc_disable_l2_media(struct tipc_bearer *b)
/**
* tipc_l2_send_msg - send a TIPC packet out over an L2 interface
* @buf: the packet to be sent
- * @b_ptr: the bearer through which the packet is to be sent
+ * @b: the bearer through which the packet is to be sent
* @dest: peer destination address
*/
int tipc_l2_send_msg(struct net *net, struct sk_buff *skb,
@@ -532,14 +532,14 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
- struct tipc_bearer *b_ptr;
+ struct tipc_bearer *b;
rcu_read_lock();
- b_ptr = rcu_dereference_rtnl(dev->tipc_ptr);
- if (likely(b_ptr)) {
+ b = rcu_dereference_rtnl(dev->tipc_ptr);
+ if (likely(b)) {
if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
buf->next = NULL;
- tipc_rcv(dev_net(dev), buf, b_ptr);
+ tipc_rcv(dev_net(dev), buf, b);
rcu_read_unlock();
return NET_RX_SUCCESS;
}
@@ -564,13 +564,13 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
- struct tipc_bearer *b_ptr;
+ struct tipc_bearer *b;
- b_ptr = rtnl_dereference(dev->tipc_ptr);
- if (!b_ptr)
+ b = rtnl_dereference(dev->tipc_ptr);
+ if (!b)
return NOTIFY_DONE;
- b_ptr->mtu = dev->mtu;
+ b->mtu = dev->mtu;
switch (evt) {
case NETDEV_CHANGE:
@@ -578,16 +578,16 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
break;
case NETDEV_GOING_DOWN:
case NETDEV_CHANGEMTU:
- tipc_reset_bearer(net, b_ptr);
+ tipc_reset_bearer(net, b);
break;
case NETDEV_CHANGEADDR:
- b_ptr->media->raw2addr(b_ptr, &b_ptr->addr,
+ b->media->raw2addr(b, &b->addr,
(char *)dev->dev_addr);
- tipc_reset_bearer(net, b_ptr);
+ tipc_reset_bearer(net, b);
break;
case NETDEV_UNREGISTER:
case NETDEV_CHANGENAME:
- bearer_disable(dev_net(dev), b_ptr);
+ bearer_disable(dev_net(dev), b);
break;
}
return NOTIFY_OK;
@@ -623,13 +623,13 @@ void tipc_bearer_cleanup(void)
void tipc_bearer_stop(struct net *net)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_bearer *b_ptr;
+ struct tipc_bearer *b;
u32 i;
for (i = 0; i < MAX_BEARERS; i++) {
- b_ptr = rtnl_dereference(tn->bearer_list[i]);
- if (b_ptr) {
- bearer_disable(net, b_ptr);
+ b = rtnl_dereference(tn->bearer_list[i]);
+ if (b) {
+ bearer_disable(net, b);
tn->bearer_list[i] = NULL;
}
}
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 552185bc4773..e31820516774 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -103,11 +103,11 @@ struct tipc_bearer;
*/
struct tipc_media {
int (*send_msg)(struct net *net, struct sk_buff *buf,
- struct tipc_bearer *b_ptr,
+ struct tipc_bearer *b,
struct tipc_media_addr *dest);
- int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr,
+ int (*enable_media)(struct net *net, struct tipc_bearer *b,
struct nlattr *attr[]);
- void (*disable_media)(struct tipc_bearer *b_ptr);
+ void (*disable_media)(struct tipc_bearer *b);
int (*addr2str)(struct tipc_media_addr *addr,
char *strbuf,
int bufsz);
@@ -176,7 +176,7 @@ struct tipc_bearer_names {
* TIPC routines available to supported media types
*/
-void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr);
+void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b);
/*
* Routines made available to TIPC by supported media types
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 18e95a8020cd..5504d63503df 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -118,6 +118,11 @@ static inline int tipc_netid(struct net *net)
return tipc_net(net)->net_id;
}
+static inline struct list_head *tipc_nodes(struct net *net)
+{
+ return &tipc_net(net)->node_list;
+}
+
static inline u16 mod(u16 x)
{
return x & 0xffffu;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index afe8c47c4085..f1e738e80535 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -75,14 +75,14 @@ struct tipc_link_req {
* tipc_disc_init_msg - initialize a link setup message
* @net: the applicable net namespace
* @type: message type (request or response)
- * @b_ptr: ptr to bearer issuing message
+ * @b: ptr to bearer issuing message
*/
static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type,
- struct tipc_bearer *b_ptr)
+ struct tipc_bearer *b)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_msg *msg;
- u32 dest_domain = b_ptr->domain;
+ u32 dest_domain = b->domain;
msg = buf_msg(buf);
tipc_msg_init(tn->own_addr, msg, LINK_CONFIG, type,
@@ -92,16 +92,16 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type,
msg_set_node_capabilities(msg, TIPC_NODE_CAPABILITIES);
msg_set_dest_domain(msg, dest_domain);
msg_set_bc_netid(msg, tn->net_id);
- b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr);
+ b->media->addr2msg(msg_media_addr(msg), &b->addr);
}
/**
* disc_dupl_alert - issue node address duplication alert
- * @b_ptr: pointer to bearer detecting duplication
+ * @b: pointer to bearer detecting duplication
* @node_addr: duplicated node address
* @media_addr: media address advertised by duplicated node
*/
-static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr,
+static void disc_dupl_alert(struct tipc_bearer *b, u32 node_addr,
struct tipc_media_addr *media_addr)
{
char node_addr_str[16];
@@ -111,7 +111,7 @@ static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr,
tipc_media_addr_printf(media_addr_str, sizeof(media_addr_str),
media_addr);
pr_warn("Duplicate %s using %s seen on <%s>\n", node_addr_str,
- media_addr_str, b_ptr->name);
+ media_addr_str, b->name);
}
/**
@@ -261,13 +261,13 @@ exit:
/**
* tipc_disc_create - create object to send periodic link setup requests
* @net: the applicable net namespace
- * @b_ptr: ptr to bearer issuing requests
+ * @b: ptr to bearer issuing requests
* @dest: destination address for request messages
* @dest_domain: network domain to which links can be established
*
* Returns 0 if successful, otherwise -errno.
*/
-int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr,
+int tipc_disc_create(struct net *net, struct tipc_bearer *b,
struct tipc_media_addr *dest)
{
struct tipc_link_req *req;
@@ -282,17 +282,17 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr,
return -ENOMEM;
}
- tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr);
+ tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b);
memcpy(&req->dest, dest, sizeof(*dest));
req->net = net;
- req->bearer_id = b_ptr->identity;
- req->domain = b_ptr->domain;
+ req->bearer_id = b->identity;
+ req->domain = b->domain;
req->num_nodes = 0;
req->timer_intv = TIPC_LINK_REQ_INIT;
spin_lock_init(&req->lock);
setup_timer(&req->timer, disc_timeout, (unsigned long)req);
mod_timer(&req->timer, jiffies + req->timer_intv);
- b_ptr->link_req = req;
+ b->link_req = req;
skb = skb_clone(req->buf, GFP_ATOMIC);
if (skb)
tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest);
@@ -313,19 +313,19 @@ void tipc_disc_delete(struct tipc_link_req *req)
/**
* tipc_disc_reset - reset object to send periodic link setup requests
* @net: the applicable net namespace
- * @b_ptr: ptr to bearer issuing requests
+ * @b: ptr to bearer issuing requests
* @dest_domain: network domain to which links can be established
*/
-void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr)
+void tipc_disc_reset(struct net *net, struct tipc_bearer *b)
{
- struct tipc_link_req *req = b_ptr->link_req;
+ struct tipc_link_req *req = b->link_req;
struct sk_buff *skb;
spin_lock_bh(&req->lock);
- tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr);
+ tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b);
req->net = net;
- req->bearer_id = b_ptr->identity;
- req->domain = b_ptr->domain;
+ req->bearer_id = b->identity;
+ req->domain = b->domain;
req->num_nodes = 0;
req->timer_intv = TIPC_LINK_REQ_INIT;
mod_timer(&req->timer, jiffies + req->timer_intv);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 9efbdbde2b08..347cdc99ed09 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -45,28 +45,156 @@
#include <linux/pkt_sched.h>
+struct tipc_stats {
+ u32 sent_info; /* used in counting # sent packets */
+ u32 recv_info; /* used in counting # recv'd packets */
+ u32 sent_states;
+ u32 recv_states;
+ u32 sent_probes;
+ u32 recv_probes;
+ u32 sent_nacks;
+ u32 recv_nacks;
+ u32 sent_acks;
+ u32 sent_bundled;
+ u32 sent_bundles;
+ u32 recv_bundled;
+ u32 recv_bundles;
+ u32 retransmitted;
+ u32 sent_fragmented;
+ u32 sent_fragments;
+ u32 recv_fragmented;
+ u32 recv_fragments;
+ u32 link_congs; /* # port sends blocked by congestion */
+ u32 deferred_recv;
+ u32 duplicates;
+ u32 max_queue_sz; /* send queue size high water mark */
+ u32 accu_queue_sz; /* used for send queue size profiling */
+ u32 queue_sz_counts; /* used for send queue size profiling */
+ u32 msg_length_counts; /* used for message length profiling */
+ u32 msg_lengths_total; /* used for message length profiling */
+ u32 msg_length_profile[7]; /* used for msg. length profiling */
+};
+
+/**
+ * struct tipc_link - TIPC link data structure
+ * @addr: network address of link's peer node
+ * @name: link name character string
+ * @media_addr: media address to use when sending messages over link
+ * @timer: link timer
+ * @net: pointer to namespace struct
+ * @refcnt: reference counter for permanent references (owner node & timer)
+ * @peer_session: link session # being used by peer end of link
+ * @peer_bearer_id: bearer id used by link's peer endpoint
+ * @bearer_id: local bearer id used by link
+ * @tolerance: minimum link continuity loss needed to reset link [in ms]
+ * @keepalive_intv: link keepalive timer interval
+ * @abort_limit: # of unacknowledged continuity probes needed to reset link
+ * @state: current state of link FSM
+ * @peer_caps: bitmap describing capabilities of peer node
+ * @silent_intv_cnt: # of timer intervals without any reception from peer
+ * @proto_msg: template for control messages generated by link
+ * @pmsg: convenience pointer to "proto_msg" field
+ * @priority: current link priority
+ * @net_plane: current link network plane ('A' through 'H')
+ * @backlog_limit: backlog queue congestion thresholds (indexed by importance)
+ * @exp_msg_count: # of tunnelled messages expected during link changeover
+ * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset
+ * @mtu: current maximum packet size for this link
+ * @advertised_mtu: advertised own mtu when link is being established
+ * @transmitq: queue for sent, non-acked messages
+ * @backlogq: queue for messages waiting to be sent
+ * @snt_nxt: next sequence number to use for outbound messages
+ * @last_retransmitted: sequence number of most recently retransmitted message
+ * @stale_count: # of identical retransmit requests made by peer
+ * @ackers: # of peers that needs to ack each packet before it can be released
+ * @acked: # last packet acked by a certain peer. Used for broadcast.
+ * @rcv_nxt: next sequence number to expect for inbound messages
+ * @deferred_queue: deferred queue saved OOS b'cast message received from node
+ * @unacked_window: # of inbound messages rx'd without ack'ing back to peer
+ * @inputq: buffer queue for messages to be delivered upwards
+ * @namedq: buffer queue for name table messages to be delivered upwards
+ * @next_out: ptr to first unsent outbound message in queue
+ * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate
+ * @long_msg_seq_no: next identifier to use for outbound fragmented messages
+ * @reasm_buf: head of partially reassembled inbound message fragments
+ * @bc_rcvr: marks that this is a broadcast receiver link
+ * @stats: collects statistics regarding link activity
+ */
+struct tipc_link {
+ u32 addr;
+ char name[TIPC_MAX_LINK_NAME];
+ struct tipc_media_addr *media_addr;
+ struct net *net;
+
+ /* Management and link supervision data */
+ u32 peer_session;
+ u32 peer_bearer_id;
+ u32 bearer_id;
+ u32 tolerance;
+ unsigned long keepalive_intv;
+ u32 abort_limit;
+ u32 state;
+ u16 peer_caps;
+ bool active;
+ u32 silent_intv_cnt;
+ struct {
+ unchar hdr[INT_H_SIZE];
+ unchar body[TIPC_MAX_IF_NAME];
+ } proto_msg;
+ struct tipc_msg *pmsg;
+ u32 priority;
+ char net_plane;
+
+ /* Failover/synch */
+ u16 drop_point;
+ struct sk_buff *failover_reasm_skb;
+
+ /* Max packet negotiation */
+ u16 mtu;
+ u16 advertised_mtu;
+
+ /* Sending */
+ struct sk_buff_head transmq;
+ struct sk_buff_head backlogq;
+ struct {
+ u16 len;
+ u16 limit;
+ } backlog[5];
+ u16 snd_nxt;
+ u16 last_retransm;
+ u16 window;
+ u32 stale_count;
+
+ /* Reception */
+ u16 rcv_nxt;
+ u32 rcv_unacked;
+ struct sk_buff_head deferdq;
+ struct sk_buff_head *inputq;
+ struct sk_buff_head *namedq;
+
+ /* Congestion handling */
+ struct sk_buff_head wakeupq;
+
+ /* Fragmentation/reassembly */
+ struct sk_buff *reasm_buf;
+
+ /* Broadcast */
+ u16 ackers;
+ u16 acked;
+ struct tipc_link *bc_rcvlink;
+ struct tipc_link *bc_sndlink;
+ int nack_state;
+ bool bc_peer_is_up;
+
+ /* Statistics */
+ struct tipc_stats stats;
+};
+
/*
* Error message prefixes
*/
static const char *link_co_err = "Link tunneling error, ";
static const char *link_rst_msg = "Resetting link ";
-static const char tipc_bclink_name[] = "broadcast-link";
-
-static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
- [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC },
- [TIPC_NLA_LINK_NAME] = {
- .type = NLA_STRING,
- .len = TIPC_MAX_LINK_NAME
- },
- [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 },
- [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG },
- [TIPC_NLA_LINK_UP] = { .type = NLA_FLAG },
- [TIPC_NLA_LINK_ACTIVE] = { .type = NLA_FLAG },
- [TIPC_NLA_LINK_PROP] = { .type = NLA_NESTED },
- [TIPC_NLA_LINK_STATS] = { .type = NLA_NESTED },
- [TIPC_NLA_LINK_RX] = { .type = NLA_U32 },
- [TIPC_NLA_LINK_TX] = { .type = NLA_U32 }
-};
/* Properties valid for media, bearar and link */
static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
@@ -117,8 +245,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
u16 rcvgap, int tolerance, int priority,
struct sk_buff_head *xmitq);
-static void link_reset_statistics(struct tipc_link *l_ptr);
-static void link_print(struct tipc_link *l_ptr, const char *str);
+static void link_print(struct tipc_link *l, const char *str);
static void tipc_link_build_nack_msg(struct tipc_link *l,
struct sk_buff_head *xmitq);
static void tipc_link_build_bc_init_msg(struct tipc_link *l,
@@ -183,6 +310,36 @@ void tipc_link_set_active(struct tipc_link *l, bool active)
l->active = active;
}
+u32 tipc_link_id(struct tipc_link *l)
+{
+ return l->peer_bearer_id << 16 | l->bearer_id;
+}
+
+int tipc_link_window(struct tipc_link *l)
+{
+ return l->window;
+}
+
+int tipc_link_prio(struct tipc_link *l)
+{
+ return l->priority;
+}
+
+unsigned long tipc_link_tolerance(struct tipc_link *l)
+{
+ return l->tolerance;
+}
+
+struct sk_buff_head *tipc_link_inputq(struct tipc_link *l)
+{
+ return l->inputq;
+}
+
+char tipc_link_plane(struct tipc_link *l)
+{
+ return l->net_plane;
+}
+
void tipc_link_add_bc_peer(struct tipc_link *snd_l,
struct tipc_link *uc_l,
struct sk_buff_head *xmitq)
@@ -191,6 +348,7 @@ void tipc_link_add_bc_peer(struct tipc_link *snd_l,
snd_l->ackers++;
rcv_l->acked = snd_l->snd_nxt - 1;
+ snd_l->state = LINK_ESTABLISHED;
tipc_link_build_bc_init_msg(uc_l, xmitq);
}
@@ -206,6 +364,7 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
rcv_l->state = LINK_RESET;
if (!snd_l->ackers) {
tipc_link_reset(snd_l);
+ snd_l->state = LINK_RESET;
__skb_queue_purge(xmitq);
}
}
@@ -225,11 +384,31 @@ int tipc_link_mtu(struct tipc_link *l)
return l->mtu;
}
+u16 tipc_link_rcv_nxt(struct tipc_link *l)
+{
+ return l->rcv_nxt;
+}
+
+u16 tipc_link_acked(struct tipc_link *l)
+{
+ return l->acked;
+}
+
+char *tipc_link_name(struct tipc_link *l)
+{
+ return l->name;
+}
+
static u32 link_own_addr(struct tipc_link *l)
{
return msg_prevnode(l->pmsg);
}
+void tipc_link_reinit(struct tipc_link *l, u32 addr)
+{
+ msg_set_prevnode(l->pmsg, addr);
+}
+
/**
* tipc_link_create - create a new link
* @n: pointer to associated node
@@ -692,7 +871,7 @@ void tipc_link_reset(struct tipc_link *l)
l->stats.recv_info = 0;
l->stale_count = 0;
l->bc_peer_is_up = false;
- link_reset_statistics(l);
+ tipc_link_reset_stats(l);
}
/**
@@ -1085,8 +1264,9 @@ drop:
/*
* Send protocol message to the other endpoint.
*/
-void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, int probe_msg,
- u32 gap, u32 tolerance, u32 priority)
+static void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ,
+ int probe_msg, u32 gap, u32 tolerance,
+ u32 priority)
{
struct sk_buff *skb = NULL;
struct sk_buff_head xmitq;
@@ -1260,6 +1440,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
/* fall thru' */
case ACTIVATE_MSG:
+ skb_linearize(skb);
+ hdr = buf_msg(skb);
/* Complete own link name with peer's interface name */
if_name = strrchr(l->name, ':') + 1;
@@ -1525,53 +1707,17 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 win)
l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk;
}
-/* tipc_link_find_owner - locate owner node of link by link's name
- * @net: the applicable net namespace
- * @name: pointer to link name string
- * @bearer_id: pointer to index in 'node->links' array where the link was found.
- *
- * Returns pointer to node owning the link, or 0 if no matching link is found.
- */
-static struct tipc_node *tipc_link_find_owner(struct net *net,
- const char *link_name,
- unsigned int *bearer_id)
-{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_link *l_ptr;
- struct tipc_node *n_ptr;
- struct tipc_node *found_node = NULL;
- int i;
-
- *bearer_id = 0;
- rcu_read_lock();
- list_for_each_entry_rcu(n_ptr, &tn->node_list, list) {
- tipc_node_lock(n_ptr);
- for (i = 0; i < MAX_BEARERS; i++) {
- l_ptr = n_ptr->links[i].link;
- if (l_ptr && !strcmp(l_ptr->name, link_name)) {
- *bearer_id = i;
- found_node = n_ptr;
- break;
- }
- }
- tipc_node_unlock(n_ptr);
- if (found_node)
- break;
- }
- rcu_read_unlock();
-
- return found_node;
-}
-
/**
- * link_reset_statistics - reset link statistics
- * @l_ptr: pointer to link
+ * link_reset_stats - reset link statistics
+ * @l: pointer to link
*/
-static void link_reset_statistics(struct tipc_link *l_ptr)
+void tipc_link_reset_stats(struct tipc_link *l)
{
- memset(&l_ptr->stats, 0, sizeof(l_ptr->stats));
- l_ptr->stats.sent_info = l_ptr->snd_nxt;
- l_ptr->stats.recv_info = l_ptr->rcv_nxt;
+ memset(&l->stats, 0, sizeof(l->stats));
+ if (!link_is_bc_sndlink(l)) {
+ l->stats.sent_info = l->snd_nxt;
+ l->stats.recv_info = l->rcv_nxt;
+ }
}
static void link_print(struct tipc_link *l, const char *str)
@@ -1624,84 +1770,6 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[])
return 0;
}
-int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info)
-{
- int err;
- int res = 0;
- int bearer_id;
- char *name;
- struct tipc_link *link;
- struct tipc_node *node;
- struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
- struct net *net = sock_net(skb->sk);
-
- if (!info->attrs[TIPC_NLA_LINK])
- return -EINVAL;
-
- err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX,
- info->attrs[TIPC_NLA_LINK],
- tipc_nl_link_policy);
- if (err)
- return err;
-
- if (!attrs[TIPC_NLA_LINK_NAME])
- return -EINVAL;
-
- name = nla_data(attrs[TIPC_NLA_LINK_NAME]);
-
- if (strcmp(name, tipc_bclink_name) == 0)
- return tipc_nl_bc_link_set(net, attrs);
-
- node = tipc_link_find_owner(net, name, &bearer_id);
- if (!node)
- return -EINVAL;
-
- tipc_node_lock(node);
-
- link = node->links[bearer_id].link;
- if (!link) {
- res = -EINVAL;
- goto out;
- }
-
- if (attrs[TIPC_NLA_LINK_PROP]) {
- struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
-
- err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP],
- props);
- if (err) {
- res = err;
- goto out;
- }
-
- if (props[TIPC_NLA_PROP_TOL]) {
- u32 tol;
-
- tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
- link->tolerance = tol;
- tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0);
- }
- if (props[TIPC_NLA_PROP_PRIO]) {
- u32 prio;
-
- prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
- link->priority = prio;
- tipc_link_proto_xmit(link, STATE_MSG, 0, 0, 0, prio);
- }
- if (props[TIPC_NLA_PROP_WIN]) {
- u32 win;
-
- win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
- tipc_link_set_queue_limits(link, win);
- }
- }
-
-out:
- tipc_node_unlock(node);
-
- return res;
-}
-
static int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s)
{
int i;
@@ -1768,8 +1836,8 @@ msg_full:
}
/* Caller should hold appropriate locks to protect the link */
-static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
- struct tipc_link *link, int nlflags)
+int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
+ struct tipc_link *link, int nlflags)
{
int err;
void *hdr;
@@ -1838,198 +1906,136 @@ msg_full:
return -EMSGSIZE;
}
-/* Caller should hold node lock */
-static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg,
- struct tipc_node *node, u32 *prev_link)
+static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb,
+ struct tipc_stats *stats)
{
- u32 i;
- int err;
-
- for (i = *prev_link; i < MAX_BEARERS; i++) {
- *prev_link = i;
-
- if (!node->links[i].link)
- continue;
+ int i;
+ struct nlattr *nest;
- err = __tipc_nl_add_link(net, msg,
- node->links[i].link, NLM_F_MULTI);
- if (err)
- return err;
- }
- *prev_link = 0;
+ struct nla_map {
+ __u32 key;
+ __u32 val;
+ };
- return 0;
-}
+ struct nla_map map[] = {
+ {TIPC_NLA_STATS_RX_INFO, stats->recv_info},
+ {TIPC_NLA_STATS_RX_FRAGMENTS, stats->recv_fragments},
+ {TIPC_NLA_STATS_RX_FRAGMENTED, stats->recv_fragmented},
+ {TIPC_NLA_STATS_RX_BUNDLES, stats->recv_bundles},
+ {TIPC_NLA_STATS_RX_BUNDLED, stats->recv_bundled},
+ {TIPC_NLA_STATS_TX_INFO, stats->sent_info},
+ {TIPC_NLA_STATS_TX_FRAGMENTS, stats->sent_fragments},
+ {TIPC_NLA_STATS_TX_FRAGMENTED, stats->sent_fragmented},
+ {TIPC_NLA_STATS_TX_BUNDLES, stats->sent_bundles},
+ {TIPC_NLA_STATS_TX_BUNDLED, stats->sent_bundled},
+ {TIPC_NLA_STATS_RX_NACKS, stats->recv_nacks},
+ {TIPC_NLA_STATS_RX_DEFERRED, stats->deferred_recv},
+ {TIPC_NLA_STATS_TX_NACKS, stats->sent_nacks},
+ {TIPC_NLA_STATS_TX_ACKS, stats->sent_acks},
+ {TIPC_NLA_STATS_RETRANSMITTED, stats->retransmitted},
+ {TIPC_NLA_STATS_DUPLICATES, stats->duplicates},
+ {TIPC_NLA_STATS_LINK_CONGS, stats->link_congs},
+ {TIPC_NLA_STATS_MAX_QUEUE, stats->max_queue_sz},
+ {TIPC_NLA_STATS_AVG_QUEUE, stats->queue_sz_counts ?
+ (stats->accu_queue_sz / stats->queue_sz_counts) : 0}
+ };
-int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct net *net = sock_net(skb->sk);
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_node *node;
- struct tipc_nl_msg msg;
- u32 prev_node = cb->args[0];
- u32 prev_link = cb->args[1];
- int done = cb->args[2];
- int err;
+ nest = nla_nest_start(skb, TIPC_NLA_LINK_STATS);
+ if (!nest)
+ return -EMSGSIZE;
- if (done)
- return 0;
+ for (i = 0; i < ARRAY_SIZE(map); i++)
+ if (nla_put_u32(skb, map[i].key, map[i].val))
+ goto msg_full;
- msg.skb = skb;
- msg.portid = NETLINK_CB(cb->skb).portid;
- msg.seq = cb->nlh->nlmsg_seq;
-
- rcu_read_lock();
- if (prev_node) {
- node = tipc_node_find(net, prev_node);
- if (!node) {
- /* We never set seq or call nl_dump_check_consistent()
- * this means that setting prev_seq here will cause the
- * consistence check to fail in the netlink callback
- * handler. Resulting in the last NLMSG_DONE message
- * having the NLM_F_DUMP_INTR flag set.
- */
- cb->prev_seq = 1;
- goto out;
- }
- tipc_node_put(node);
-
- list_for_each_entry_continue_rcu(node, &tn->node_list,
- list) {
- tipc_node_lock(node);
- err = __tipc_nl_add_node_links(net, &msg, node,
- &prev_link);
- tipc_node_unlock(node);
- if (err)
- goto out;
-
- prev_node = node->addr;
- }
- } else {
- err = tipc_nl_add_bc_link(net, &msg);
- if (err)
- goto out;
-
- list_for_each_entry_rcu(node, &tn->node_list, list) {
- tipc_node_lock(node);
- err = __tipc_nl_add_node_links(net, &msg, node,
- &prev_link);
- tipc_node_unlock(node);
- if (err)
- goto out;
-
- prev_node = node->addr;
- }
- }
- done = 1;
-out:
- rcu_read_unlock();
+ nla_nest_end(skb, nest);
- cb->args[0] = prev_node;
- cb->args[1] = prev_link;
- cb->args[2] = done;
+ return 0;
+msg_full:
+ nla_nest_cancel(skb, nest);
- return skb->len;
+ return -EMSGSIZE;
}
-int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info)
+int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
{
- struct net *net = genl_info_net(info);
- struct tipc_nl_msg msg;
- char *name;
int err;
+ void *hdr;
+ struct nlattr *attrs;
+ struct nlattr *prop;
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_link *bcl = tn->bcl;
- msg.portid = info->snd_portid;
- msg.seq = info->snd_seq;
-
- if (!info->attrs[TIPC_NLA_LINK_NAME])
- return -EINVAL;
- name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]);
+ if (!bcl)
+ return 0;
- msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!msg.skb)
- return -ENOMEM;
+ tipc_bcast_lock(net);
- if (strcmp(name, tipc_bclink_name) == 0) {
- err = tipc_nl_add_bc_link(net, &msg);
- if (err) {
- nlmsg_free(msg.skb);
- return err;
- }
- } else {
- int bearer_id;
- struct tipc_node *node;
- struct tipc_link *link;
+ hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
+ NLM_F_MULTI, TIPC_NL_LINK_GET);
+ if (!hdr) {
+ tipc_bcast_unlock(net);
+ return -EMSGSIZE;
+ }
- node = tipc_link_find_owner(net, name, &bearer_id);
- if (!node)
- return -EINVAL;
+ attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK);
+ if (!attrs)
+ goto msg_full;
- tipc_node_lock(node);
- link = node->links[bearer_id].link;
- if (!link) {
- tipc_node_unlock(node);
- nlmsg_free(msg.skb);
- return -EINVAL;
- }
+ /* The broadcast link is always up */
+ if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP))
+ goto attr_msg_full;
- err = __tipc_nl_add_link(net, &msg, link, 0);
- tipc_node_unlock(node);
- if (err) {
- nlmsg_free(msg.skb);
- return err;
- }
- }
+ if (nla_put_flag(msg->skb, TIPC_NLA_LINK_BROADCAST))
+ goto attr_msg_full;
+ if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name))
+ goto attr_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, bcl->rcv_nxt))
+ goto attr_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, bcl->snd_nxt))
+ goto attr_msg_full;
- return genlmsg_reply(msg.skb, info);
-}
+ prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP);
+ if (!prop)
+ goto attr_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window))
+ goto prop_msg_full;
+ nla_nest_end(msg->skb, prop);
-int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info)
-{
- int err;
- char *link_name;
- unsigned int bearer_id;
- struct tipc_link *link;
- struct tipc_node *node;
- struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
- struct net *net = sock_net(skb->sk);
-
- if (!info->attrs[TIPC_NLA_LINK])
- return -EINVAL;
-
- err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX,
- info->attrs[TIPC_NLA_LINK],
- tipc_nl_link_policy);
+ err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats);
if (err)
- return err;
-
- if (!attrs[TIPC_NLA_LINK_NAME])
- return -EINVAL;
-
- link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]);
+ goto attr_msg_full;
- if (strcmp(link_name, tipc_bclink_name) == 0) {
- err = tipc_bclink_reset_stats(net);
- if (err)
- return err;
- return 0;
- }
+ tipc_bcast_unlock(net);
+ nla_nest_end(msg->skb, attrs);
+ genlmsg_end(msg->skb, hdr);
- node = tipc_link_find_owner(net, link_name, &bearer_id);
- if (!node)
- return -EINVAL;
+ return 0;
- tipc_node_lock(node);
+prop_msg_full:
+ nla_nest_cancel(msg->skb, prop);
+attr_msg_full:
+ nla_nest_cancel(msg->skb, attrs);
+msg_full:
+ tipc_bcast_unlock(net);
+ genlmsg_cancel(msg->skb, hdr);
- link = node->links[bearer_id].link;
- if (!link) {
- tipc_node_unlock(node);
- return -EINVAL;
- }
+ return -EMSGSIZE;
+}
- link_reset_statistics(link);
+void tipc_link_set_tolerance(struct tipc_link *l, u32 tol)
+{
+ l->tolerance = tol;
+ tipc_link_proto_xmit(l, STATE_MSG, 0, 0, tol, 0);
+}
- tipc_node_unlock(node);
+void tipc_link_set_prio(struct tipc_link *l, u32 prio)
+{
+ l->priority = prio;
+ tipc_link_proto_xmit(l, STATE_MSG, 0, 0, 0, prio);
+}
- return 0;
+void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit)
+{
+ l->abort_limit = limit;
}
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 66d859b66c84..b2ae0f4276af 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -45,10 +45,6 @@
*/
#define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */
-/* Out-of-range value for link sequence numbers
- */
-#define INVALID_LINK_SEQ 0x10000
-
/* Link FSM events:
*/
enum {
@@ -75,151 +71,6 @@ enum {
*/
#define MAX_PKT_DEFAULT 1500
-struct tipc_stats {
- u32 sent_info; /* used in counting # sent packets */
- u32 recv_info; /* used in counting # recv'd packets */
- u32 sent_states;
- u32 recv_states;
- u32 sent_probes;
- u32 recv_probes;
- u32 sent_nacks;
- u32 recv_nacks;
- u32 sent_acks;
- u32 sent_bundled;
- u32 sent_bundles;
- u32 recv_bundled;
- u32 recv_bundles;
- u32 retransmitted;
- u32 sent_fragmented;
- u32 sent_fragments;
- u32 recv_fragmented;
- u32 recv_fragments;
- u32 link_congs; /* # port sends blocked by congestion */
- u32 deferred_recv;
- u32 duplicates;
- u32 max_queue_sz; /* send queue size high water mark */
- u32 accu_queue_sz; /* used for send queue size profiling */
- u32 queue_sz_counts; /* used for send queue size profiling */
- u32 msg_length_counts; /* used for message length profiling */
- u32 msg_lengths_total; /* used for message length profiling */
- u32 msg_length_profile[7]; /* used for msg. length profiling */
-};
-
-/**
- * struct tipc_link - TIPC link data structure
- * @addr: network address of link's peer node
- * @name: link name character string
- * @media_addr: media address to use when sending messages over link
- * @timer: link timer
- * @net: pointer to namespace struct
- * @refcnt: reference counter for permanent references (owner node & timer)
- * @peer_session: link session # being used by peer end of link
- * @peer_bearer_id: bearer id used by link's peer endpoint
- * @bearer_id: local bearer id used by link
- * @tolerance: minimum link continuity loss needed to reset link [in ms]
- * @keepalive_intv: link keepalive timer interval
- * @abort_limit: # of unacknowledged continuity probes needed to reset link
- * @state: current state of link FSM
- * @peer_caps: bitmap describing capabilities of peer node
- * @silent_intv_cnt: # of timer intervals without any reception from peer
- * @proto_msg: template for control messages generated by link
- * @pmsg: convenience pointer to "proto_msg" field
- * @priority: current link priority
- * @net_plane: current link network plane ('A' through 'H')
- * @backlog_limit: backlog queue congestion thresholds (indexed by importance)
- * @exp_msg_count: # of tunnelled messages expected during link changeover
- * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset
- * @mtu: current maximum packet size for this link
- * @advertised_mtu: advertised own mtu when link is being established
- * @transmitq: queue for sent, non-acked messages
- * @backlogq: queue for messages waiting to be sent
- * @snt_nxt: next sequence number to use for outbound messages
- * @last_retransmitted: sequence number of most recently retransmitted message
- * @stale_count: # of identical retransmit requests made by peer
- * @ackers: # of peers that needs to ack each packet before it can be released
- * @acked: # last packet acked by a certain peer. Used for broadcast.
- * @rcv_nxt: next sequence number to expect for inbound messages
- * @deferred_queue: deferred queue saved OOS b'cast message received from node
- * @unacked_window: # of inbound messages rx'd without ack'ing back to peer
- * @inputq: buffer queue for messages to be delivered upwards
- * @namedq: buffer queue for name table messages to be delivered upwards
- * @next_out: ptr to first unsent outbound message in queue
- * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate
- * @long_msg_seq_no: next identifier to use for outbound fragmented messages
- * @reasm_buf: head of partially reassembled inbound message fragments
- * @bc_rcvr: marks that this is a broadcast receiver link
- * @stats: collects statistics regarding link activity
- */
-struct tipc_link {
- u32 addr;
- char name[TIPC_MAX_LINK_NAME];
- struct tipc_media_addr *media_addr;
- struct net *net;
-
- /* Management and link supervision data */
- u32 peer_session;
- u32 peer_bearer_id;
- u32 bearer_id;
- u32 tolerance;
- unsigned long keepalive_intv;
- u32 abort_limit;
- u32 state;
- u16 peer_caps;
- bool active;
- u32 silent_intv_cnt;
- struct {
- unchar hdr[INT_H_SIZE];
- unchar body[TIPC_MAX_IF_NAME];
- } proto_msg;
- struct tipc_msg *pmsg;
- u32 priority;
- char net_plane;
-
- /* Failover/synch */
- u16 drop_point;
- struct sk_buff *failover_reasm_skb;
-
- /* Max packet negotiation */
- u16 mtu;
- u16 advertised_mtu;
-
- /* Sending */
- struct sk_buff_head transmq;
- struct sk_buff_head backlogq;
- struct {
- u16 len;
- u16 limit;
- } backlog[5];
- u16 snd_nxt;
- u16 last_retransm;
- u16 window;
- u32 stale_count;
-
- /* Reception */
- u16 rcv_nxt;
- u32 rcv_unacked;
- struct sk_buff_head deferdq;
- struct sk_buff_head *inputq;
- struct sk_buff_head *namedq;
-
- /* Congestion handling */
- struct sk_buff_head wakeupq;
-
- /* Fragmentation/reassembly */
- struct sk_buff *reasm_buf;
-
- /* Broadcast */
- u16 ackers;
- u16 acked;
- struct tipc_link *bc_rcvlink;
- struct tipc_link *bc_sndlink;
- int nack_state;
- bool bc_peer_is_up;
-
- /* Statistics */
- struct tipc_stats stats;
-};
-
bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
int tolerance, char net_plane, u32 mtu, int priority,
int window, u32 session, u32 ownnode, u32 peer,
@@ -235,11 +86,11 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
struct sk_buff_head *namedq,
struct tipc_link *bc_sndlink,
struct tipc_link **link);
+void tipc_link_reinit(struct tipc_link *l, u32 addr);
void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
int mtyp, struct sk_buff_head *xmitq);
void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq);
int tipc_link_fsm_evt(struct tipc_link *l, int evt);
-void tipc_link_reset_fragments(struct tipc_link *l_ptr);
bool tipc_link_is_up(struct tipc_link *l);
bool tipc_link_peer_is_down(struct tipc_link *l);
bool tipc_link_is_reset(struct tipc_link *l);
@@ -248,15 +99,25 @@ bool tipc_link_is_synching(struct tipc_link *l);
bool tipc_link_is_failingover(struct tipc_link *l);
bool tipc_link_is_blocked(struct tipc_link *l);
void tipc_link_set_active(struct tipc_link *l, bool active);
-void tipc_link_reset(struct tipc_link *l_ptr);
-int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list,
+void tipc_link_reset(struct tipc_link *l);
+void tipc_link_reset_stats(struct tipc_link *l);
+int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list,
struct sk_buff_head *xmitq);
+struct sk_buff_head *tipc_link_inputq(struct tipc_link *l);
+u16 tipc_link_rcv_nxt(struct tipc_link *l);
+u16 tipc_link_acked(struct tipc_link *l);
+u32 tipc_link_id(struct tipc_link *l);
+char *tipc_link_name(struct tipc_link *l);
+char tipc_link_plane(struct tipc_link *l);
+int tipc_link_prio(struct tipc_link *l);
+int tipc_link_window(struct tipc_link *l);
+unsigned long tipc_link_tolerance(struct tipc_link *l);
+void tipc_link_set_tolerance(struct tipc_link *l, u32 tol);
+void tipc_link_set_prio(struct tipc_link *l, u32 prio);
+void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit);
void tipc_link_set_queue_limits(struct tipc_link *l, u32 window);
-
-int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb);
-int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info);
-int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info);
-int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info);
+int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
+ struct tipc_link *link, int nlflags);
int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]);
int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq);
int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index c07612bab95c..ebe9d0ff6e9e 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -84,31 +84,6 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
return buf;
}
-void named_cluster_distribute(struct net *net, struct sk_buff *skb)
-{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct sk_buff *oskb;
- struct tipc_node *node;
- u32 dnode;
-
- rcu_read_lock();
- list_for_each_entry_rcu(node, &tn->node_list, list) {
- dnode = node->addr;
- if (in_own_node(net, dnode))
- continue;
- if (!tipc_node_is_up(node))
- continue;
- oskb = pskb_copy(skb, GFP_ATOMIC);
- if (!oskb)
- break;
- msg_set_destnode(buf_msg(oskb), dnode);
- tipc_node_xmit_skb(net, oskb, dnode, 0);
- }
- rcu_read_unlock();
-
- kfree_skb(skb);
-}
-
/**
* tipc_named_publish - tell other nodes about a new publication by this node
*/
@@ -226,42 +201,6 @@ void tipc_named_node_up(struct net *net, u32 dnode)
tipc_node_xmit(net, &head, dnode, 0);
}
-static void tipc_publ_subscribe(struct net *net, struct publication *publ,
- u32 addr)
-{
- struct tipc_node *node;
-
- if (in_own_node(net, addr))
- return;
-
- node = tipc_node_find(net, addr);
- if (!node) {
- pr_warn("Node subscription rejected, unknown node 0x%x\n",
- addr);
- return;
- }
-
- tipc_node_lock(node);
- list_add_tail(&publ->nodesub_list, &node->publ_list);
- tipc_node_unlock(node);
- tipc_node_put(node);
-}
-
-static void tipc_publ_unsubscribe(struct net *net, struct publication *publ,
- u32 addr)
-{
- struct tipc_node *node;
-
- node = tipc_node_find(net, addr);
- if (!node)
- return;
-
- tipc_node_lock(node);
- list_del_init(&publ->nodesub_list);
- tipc_node_unlock(node);
- tipc_node_put(node);
-}
-
/**
* tipc_publ_purge - remove publication associated with a failed node
*
@@ -277,7 +216,7 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr)
p = tipc_nametbl_remove_publ(net, publ->type, publ->lower,
publ->node, publ->ref, publ->key);
if (p)
- tipc_publ_unsubscribe(net, p, addr);
+ tipc_node_unsubscribe(net, &p->nodesub_list, addr);
spin_unlock_bh(&tn->nametbl_lock);
if (p != publ) {
@@ -317,7 +256,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
TIPC_CLUSTER_SCOPE, node,
ntohl(i->ref), ntohl(i->key));
if (publ) {
- tipc_publ_subscribe(net, publ, node);
+ tipc_node_subscribe(net, &publ->nodesub_list, node);
return true;
}
} else if (dtype == WITHDRAWAL) {
@@ -326,7 +265,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
node, ntohl(i->ref),
ntohl(i->key));
if (publ) {
- tipc_publ_unsubscribe(net, publ, node);
+ tipc_node_unsubscribe(net, &publ->nodesub_list, node);
kfree_rcu(publ, rcu);
return true;
}
@@ -397,6 +336,7 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq)
spin_lock_bh(&tn->nametbl_lock);
for (skb = skb_dequeue(inputq); skb; skb = skb_dequeue(inputq)) {
+ skb_linearize(skb);
msg = buf_msg(skb);
mtype = msg_type(msg);
item = (struct distr_item *)msg_data(msg);
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index dd2d9fd80da2..1264ba0af937 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -69,7 +69,6 @@ struct distr_item {
struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ);
struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ);
-void named_cluster_distribute(struct net *net, struct sk_buff *buf);
void tipc_named_node_up(struct net *net, u32 dnode);
void tipc_named_rcv(struct net *net, struct sk_buff_head *msg_queue);
void tipc_named_reinit(struct net *net);
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 0f47f08bf38f..91fce70291a8 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -42,6 +42,7 @@
#include "subscr.h"
#include "bcast.h"
#include "addr.h"
+#include "node.h"
#include <net/genetlink.h>
#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */
@@ -677,7 +678,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
spin_unlock_bh(&tn->nametbl_lock);
if (buf)
- named_cluster_distribute(net, buf);
+ tipc_node_broadcast(net, buf);
return publ;
}
@@ -709,7 +710,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
spin_unlock_bh(&tn->nametbl_lock);
if (skb) {
- named_cluster_distribute(net, skb);
+ tipc_node_broadcast(net, skb);
return 1;
}
return 0;
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 7f6475efc984..8975b0135b76 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -101,18 +101,18 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
},
{
.cmd = TIPC_NL_LINK_GET,
- .doit = tipc_nl_link_get,
- .dumpit = tipc_nl_link_dump,
+ .doit = tipc_nl_node_get_link,
+ .dumpit = tipc_nl_node_dump_link,
.policy = tipc_nl_policy,
},
{
.cmd = TIPC_NL_LINK_SET,
- .doit = tipc_nl_link_set,
+ .doit = tipc_nl_node_set_link,
.policy = tipc_nl_policy,
},
{
.cmd = TIPC_NL_LINK_RESET_STATS,
- .doit = tipc_nl_link_reset_stats,
+ .doit = tipc_nl_node_reset_link_stats,
.policy = tipc_nl_policy,
},
{
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 1eadc95e1132..2c016fdefe97 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -1023,25 +1023,25 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
msg->req_type = TIPC_TLV_LINK_NAME;
msg->rep_size = ULTRA_STRING_MAX_LEN;
msg->rep_type = TIPC_TLV_ULTRA_STRING;
- dump.dumpit = tipc_nl_link_dump;
+ dump.dumpit = tipc_nl_node_dump_link;
dump.format = tipc_nl_compat_link_stat_dump;
return tipc_nl_compat_dumpit(&dump, msg);
case TIPC_CMD_GET_LINKS:
msg->req_type = TIPC_TLV_NET_ADDR;
msg->rep_size = ULTRA_STRING_MAX_LEN;
- dump.dumpit = tipc_nl_link_dump;
+ dump.dumpit = tipc_nl_node_dump_link;
dump.format = tipc_nl_compat_link_dump;
return tipc_nl_compat_dumpit(&dump, msg);
case TIPC_CMD_SET_LINK_TOL:
case TIPC_CMD_SET_LINK_PRI:
case TIPC_CMD_SET_LINK_WINDOW:
msg->req_type = TIPC_TLV_LINK_CONFIG;
- doit.doit = tipc_nl_link_set;
+ doit.doit = tipc_nl_node_set_link;
doit.transcode = tipc_nl_compat_link_set;
return tipc_nl_compat_doit(&doit, msg);
case TIPC_CMD_RESET_LINK_STATS:
msg->req_type = TIPC_TLV_LINK_NAME;
- doit.doit = tipc_nl_link_reset_stats;
+ doit.doit = tipc_nl_node_reset_link_stats;
doit.transcode = tipc_nl_compat_link_reset_stats;
return tipc_nl_compat_doit(&doit, msg);
case TIPC_CMD_SHOW_NAME_TABLE:
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 20cddec0a43c..9d7a16fc5ca4 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -42,6 +42,84 @@
#include "bcast.h"
#include "discover.h"
+#define INVALID_NODE_SIG 0x10000
+
+/* Flags used to take different actions according to flag type
+ * TIPC_NOTIFY_NODE_DOWN: notify node is down
+ * TIPC_NOTIFY_NODE_UP: notify node is up
+ * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type
+ */
+enum {
+ TIPC_NOTIFY_NODE_DOWN = (1 << 3),
+ TIPC_NOTIFY_NODE_UP = (1 << 4),
+ TIPC_NOTIFY_LINK_UP = (1 << 6),
+ TIPC_NOTIFY_LINK_DOWN = (1 << 7)
+};
+
+struct tipc_link_entry {
+ struct tipc_link *link;
+ spinlock_t lock; /* per link */
+ u32 mtu;
+ struct sk_buff_head inputq;
+ struct tipc_media_addr maddr;
+};
+
+struct tipc_bclink_entry {
+ struct tipc_link *link;
+ struct sk_buff_head inputq1;
+ struct sk_buff_head arrvq;
+ struct sk_buff_head inputq2;
+ struct sk_buff_head namedq;
+};
+
+/**
+ * struct tipc_node - TIPC node structure
+ * @addr: network address of node
+ * @ref: reference counter to node object
+ * @lock: rwlock governing access to structure
+ * @net: the applicable net namespace
+ * @hash: links to adjacent nodes in unsorted hash chain
+ * @inputq: pointer to input queue containing messages for msg event
+ * @namedq: pointer to name table input queue with name table messages
+ * @active_links: bearer ids of active links, used as index into links[] array
+ * @links: array containing references to all links to node
+ * @action_flags: bit mask of different types of node actions
+ * @state: connectivity state vs peer node
+ * @sync_point: sequence number where synch/failover is finished
+ * @list: links to adjacent nodes in sorted list of cluster's nodes
+ * @working_links: number of working links to node (both active and standby)
+ * @link_cnt: number of links to node
+ * @capabilities: bitmap, indicating peer node's functional capabilities
+ * @signature: node instance identifier
+ * @link_id: local and remote bearer ids of changing link, if any
+ * @publ_list: list of publications
+ * @rcu: rcu struct for tipc_node
+ */
+struct tipc_node {
+ u32 addr;
+ struct kref kref;
+ rwlock_t lock;
+ struct net *net;
+ struct hlist_node hash;
+ int active_links[2];
+ struct tipc_link_entry links[MAX_BEARERS];
+ struct tipc_bclink_entry bc_entry;
+ int action_flags;
+ struct list_head list;
+ int state;
+ u16 sync_point;
+ int link_cnt;
+ u16 working_links;
+ u16 capabilities;
+ u32 signature;
+ u32 link_id;
+ struct list_head publ_list;
+ struct list_head conn_sks;
+ unsigned long keepalive_intv;
+ struct timer_list timer;
+ struct rcu_head rcu;
+};
+
/* Node FSM states and events:
*/
enum {
@@ -75,6 +153,9 @@ static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq);
static void tipc_node_delete(struct tipc_node *node);
static void tipc_node_timeout(unsigned long data);
static void tipc_node_fsm_evt(struct tipc_node *n, int evt);
+static struct tipc_node *tipc_node_find(struct net *net, u32 addr);
+static void tipc_node_put(struct tipc_node *node);
+static bool tipc_node_is_up(struct tipc_node *n);
struct tipc_sock_conn {
u32 port;
@@ -83,12 +164,54 @@ struct tipc_sock_conn {
struct list_head list;
};
+static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
+ [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC },
+ [TIPC_NLA_LINK_NAME] = {
+ .type = NLA_STRING,
+ .len = TIPC_MAX_LINK_NAME
+ },
+ [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 },
+ [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG },
+ [TIPC_NLA_LINK_UP] = { .type = NLA_FLAG },
+ [TIPC_NLA_LINK_ACTIVE] = { .type = NLA_FLAG },
+ [TIPC_NLA_LINK_PROP] = { .type = NLA_NESTED },
+ [TIPC_NLA_LINK_STATS] = { .type = NLA_NESTED },
+ [TIPC_NLA_LINK_RX] = { .type = NLA_U32 },
+ [TIPC_NLA_LINK_TX] = { .type = NLA_U32 }
+};
+
static const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = {
[TIPC_NLA_NODE_UNSPEC] = { .type = NLA_UNSPEC },
[TIPC_NLA_NODE_ADDR] = { .type = NLA_U32 },
[TIPC_NLA_NODE_UP] = { .type = NLA_FLAG }
};
+static struct tipc_link *node_active_link(struct tipc_node *n, int sel)
+{
+ int bearer_id = n->active_links[sel & 1];
+
+ if (unlikely(bearer_id == INVALID_BEARER_ID))
+ return NULL;
+
+ return n->links[bearer_id].link;
+}
+
+int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel)
+{
+ struct tipc_node *n;
+ int bearer_id;
+ unsigned int mtu = MAX_MSG_SIZE;
+
+ n = tipc_node_find(net, addr);
+ if (unlikely(!n))
+ return mtu;
+
+ bearer_id = n->active_links[sel & 1];
+ if (likely(bearer_id != INVALID_BEARER_ID))
+ mtu = n->links[bearer_id].mtu;
+ tipc_node_put(n);
+ return mtu;
+}
/*
* A trivial power-of-two bitmask technique is used for speed, since this
* operation is done for every incoming TIPC packet. The number of hash table
@@ -107,7 +230,7 @@ static void tipc_node_kref_release(struct kref *kref)
tipc_node_delete(node);
}
-void tipc_node_put(struct tipc_node *node)
+static void tipc_node_put(struct tipc_node *node)
{
kref_put(&node->kref, tipc_node_kref_release);
}
@@ -120,7 +243,7 @@ static void tipc_node_get(struct tipc_node *node)
/*
* tipc_node_find - locate specified node object, if it exists
*/
-struct tipc_node *tipc_node_find(struct net *net, u32 addr)
+static struct tipc_node *tipc_node_find(struct net *net, u32 addr)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_node *node;
@@ -141,66 +264,122 @@ struct tipc_node *tipc_node_find(struct net *net, u32 addr)
return NULL;
}
+static void tipc_node_read_lock(struct tipc_node *n)
+{
+ read_lock_bh(&n->lock);
+}
+
+static void tipc_node_read_unlock(struct tipc_node *n)
+{
+ read_unlock_bh(&n->lock);
+}
+
+static void tipc_node_write_lock(struct tipc_node *n)
+{
+ write_lock_bh(&n->lock);
+}
+
+static void tipc_node_write_unlock(struct tipc_node *n)
+{
+ struct net *net = n->net;
+ u32 addr = 0;
+ u32 flags = n->action_flags;
+ u32 link_id = 0;
+ struct list_head *publ_list;
+
+ if (likely(!flags)) {
+ write_unlock_bh(&n->lock);
+ return;
+ }
+
+ addr = n->addr;
+ link_id = n->link_id;
+ publ_list = &n->publ_list;
+
+ n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
+ TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP);
+
+ write_unlock_bh(&n->lock);
+
+ if (flags & TIPC_NOTIFY_NODE_DOWN)
+ tipc_publ_notify(net, publ_list, addr);
+
+ if (flags & TIPC_NOTIFY_NODE_UP)
+ tipc_named_node_up(net, addr);
+
+ if (flags & TIPC_NOTIFY_LINK_UP)
+ tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr,
+ TIPC_NODE_SCOPE, link_id, addr);
+
+ if (flags & TIPC_NOTIFY_LINK_DOWN)
+ tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
+ link_id, addr);
+}
+
struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_node *n_ptr, *temp_node;
+ struct tipc_node *n, *temp_node;
+ int i;
spin_lock_bh(&tn->node_list_lock);
- n_ptr = tipc_node_find(net, addr);
- if (n_ptr)
+ n = tipc_node_find(net, addr);
+ if (n)
goto exit;
- n_ptr = kzalloc(sizeof(*n_ptr), GFP_ATOMIC);
- if (!n_ptr) {
+ n = kzalloc(sizeof(*n), GFP_ATOMIC);
+ if (!n) {
pr_warn("Node creation failed, no memory\n");
goto exit;
}
- n_ptr->addr = addr;
- n_ptr->net = net;
- n_ptr->capabilities = capabilities;
- kref_init(&n_ptr->kref);
- spin_lock_init(&n_ptr->lock);
- INIT_HLIST_NODE(&n_ptr->hash);
- INIT_LIST_HEAD(&n_ptr->list);
- INIT_LIST_HEAD(&n_ptr->publ_list);
- INIT_LIST_HEAD(&n_ptr->conn_sks);
- skb_queue_head_init(&n_ptr->bc_entry.namedq);
- skb_queue_head_init(&n_ptr->bc_entry.inputq1);
- __skb_queue_head_init(&n_ptr->bc_entry.arrvq);
- skb_queue_head_init(&n_ptr->bc_entry.inputq2);
- hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]);
- list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
- if (n_ptr->addr < temp_node->addr)
- break;
- }
- list_add_tail_rcu(&n_ptr->list, &temp_node->list);
- n_ptr->state = SELF_DOWN_PEER_LEAVING;
- n_ptr->signature = INVALID_NODE_SIG;
- n_ptr->active_links[0] = INVALID_BEARER_ID;
- n_ptr->active_links[1] = INVALID_BEARER_ID;
- if (!tipc_link_bc_create(net, tipc_own_addr(net), n_ptr->addr,
- U16_MAX, tipc_bc_sndlink(net)->window,
- n_ptr->capabilities,
- &n_ptr->bc_entry.inputq1,
- &n_ptr->bc_entry.namedq,
+ n->addr = addr;
+ n->net = net;
+ n->capabilities = capabilities;
+ kref_init(&n->kref);
+ rwlock_init(&n->lock);
+ INIT_HLIST_NODE(&n->hash);
+ INIT_LIST_HEAD(&n->list);
+ INIT_LIST_HEAD(&n->publ_list);
+ INIT_LIST_HEAD(&n->conn_sks);
+ skb_queue_head_init(&n->bc_entry.namedq);
+ skb_queue_head_init(&n->bc_entry.inputq1);
+ __skb_queue_head_init(&n->bc_entry.arrvq);
+ skb_queue_head_init(&n->bc_entry.inputq2);
+ for (i = 0; i < MAX_BEARERS; i++)
+ spin_lock_init(&n->links[i].lock);
+ n->state = SELF_DOWN_PEER_LEAVING;
+ n->signature = INVALID_NODE_SIG;
+ n->active_links[0] = INVALID_BEARER_ID;
+ n->active_links[1] = INVALID_BEARER_ID;
+ if (!tipc_link_bc_create(net, tipc_own_addr(net), n->addr,
+ U16_MAX,
+ tipc_link_window(tipc_bc_sndlink(net)),
+ n->capabilities,
+ &n->bc_entry.inputq1,
+ &n->bc_entry.namedq,
tipc_bc_sndlink(net),
- &n_ptr->bc_entry.link)) {
+ &n->bc_entry.link)) {
pr_warn("Broadcast rcv link creation failed, no memory\n");
- kfree(n_ptr);
- n_ptr = NULL;
+ kfree(n);
+ n = NULL;
goto exit;
}
- tipc_node_get(n_ptr);
- setup_timer(&n_ptr->timer, tipc_node_timeout, (unsigned long)n_ptr);
- n_ptr->keepalive_intv = U32_MAX;
+ tipc_node_get(n);
+ setup_timer(&n->timer, tipc_node_timeout, (unsigned long)n);
+ n->keepalive_intv = U32_MAX;
+ hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
+ list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
+ if (n->addr < temp_node->addr)
+ break;
+ }
+ list_add_tail_rcu(&n->list, &temp_node->list);
exit:
spin_unlock_bh(&tn->node_list_lock);
- return n_ptr;
+ return n;
}
static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l)
{
- unsigned long tol = l->tolerance;
+ unsigned long tol = tipc_link_tolerance(l);
unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4;
unsigned long keepalive_intv = msecs_to_jiffies(intv);
@@ -209,7 +388,7 @@ static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l)
n->keepalive_intv = keepalive_intv;
/* Ensure link's abort limit corresponds to current interval */
- l->abort_limit = l->tolerance / jiffies_to_msecs(n->keepalive_intv);
+ tipc_link_set_abort_limit(l, tol / jiffies_to_msecs(n->keepalive_intv));
}
static void tipc_node_delete(struct tipc_node *node)
@@ -234,6 +413,42 @@ void tipc_node_stop(struct net *net)
spin_unlock_bh(&tn->node_list_lock);
}
+void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr)
+{
+ struct tipc_node *n;
+
+ if (in_own_node(net, addr))
+ return;
+
+ n = tipc_node_find(net, addr);
+ if (!n) {
+ pr_warn("Node subscribe rejected, unknown node 0x%x\n", addr);
+ return;
+ }
+ tipc_node_write_lock(n);
+ list_add_tail(subscr, &n->publ_list);
+ tipc_node_write_unlock(n);
+ tipc_node_put(n);
+}
+
+void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr)
+{
+ struct tipc_node *n;
+
+ if (in_own_node(net, addr))
+ return;
+
+ n = tipc_node_find(net, addr);
+ if (!n) {
+ pr_warn("Node unsubscribe rejected, unknown node 0x%x\n", addr);
+ return;
+ }
+ tipc_node_write_lock(n);
+ list_del_init(subscr);
+ tipc_node_write_unlock(n);
+ tipc_node_put(n);
+}
+
int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port)
{
struct tipc_node *node;
@@ -257,9 +472,9 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port)
conn->port = port;
conn->peer_port = peer_port;
- tipc_node_lock(node);
+ tipc_node_write_lock(node);
list_add_tail(&conn->list, &node->conn_sks);
- tipc_node_unlock(node);
+ tipc_node_write_unlock(node);
exit:
tipc_node_put(node);
return err;
@@ -277,14 +492,14 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port)
if (!node)
return;
- tipc_node_lock(node);
+ tipc_node_write_lock(node);
list_for_each_entry_safe(conn, safe, &node->conn_sks, list) {
if (port != conn->port)
continue;
list_del(&conn->list);
kfree(conn);
}
- tipc_node_unlock(node);
+ tipc_node_write_unlock(node);
tipc_node_put(node);
}
@@ -301,14 +516,16 @@ static void tipc_node_timeout(unsigned long data)
__skb_queue_head_init(&xmitq);
for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
- tipc_node_lock(n);
+ tipc_node_read_lock(n);
le = &n->links[bearer_id];
+ spin_lock_bh(&le->lock);
if (le->link) {
/* Link tolerance may change asynchronously: */
tipc_node_calculate_timer(n, le->link);
rc = tipc_link_timeout(le->link, &xmitq);
}
- tipc_node_unlock(n);
+ spin_unlock_bh(&le->lock);
+ tipc_node_read_unlock(n);
tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr);
if (rc & TIPC_LINK_DOWN_EVT)
tipc_node_link_down(n, bearer_id, false);
@@ -340,16 +557,16 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
n->working_links++;
n->action_flags |= TIPC_NOTIFY_LINK_UP;
- n->link_id = nl->peer_bearer_id << 16 | bearer_id;
+ n->link_id = tipc_link_id(nl);
/* Leave room for tunnel header when returning 'mtu' to users: */
- n->links[bearer_id].mtu = nl->mtu - INT_H_SIZE;
+ n->links[bearer_id].mtu = tipc_link_mtu(nl) - INT_H_SIZE;
tipc_bearer_add_dest(n->net, bearer_id, n->addr);
tipc_bcast_inc_bearer_dst_cnt(n->net, bearer_id);
pr_debug("Established link <%s> on network plane %c\n",
- nl->name, nl->net_plane);
+ tipc_link_name(nl), tipc_link_plane(nl));
/* First link? => give it both slots */
if (!ol) {
@@ -362,17 +579,17 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
}
/* Second link => redistribute slots */
- if (nl->priority > ol->priority) {
- pr_debug("Old link <%s> becomes standby\n", ol->name);
+ if (tipc_link_prio(nl) > tipc_link_prio(ol)) {
+ pr_debug("Old link <%s> becomes standby\n", tipc_link_name(ol));
*slot0 = bearer_id;
*slot1 = bearer_id;
tipc_link_set_active(nl, true);
tipc_link_set_active(ol, false);
- } else if (nl->priority == ol->priority) {
+ } else if (tipc_link_prio(nl) == tipc_link_prio(ol)) {
tipc_link_set_active(nl, true);
*slot1 = bearer_id;
} else {
- pr_debug("New link <%s> is standby\n", nl->name);
+ pr_debug("New link <%s> is standby\n", tipc_link_name(nl));
}
/* Prepare synchronization with first link */
@@ -387,9 +604,9 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
static void tipc_node_link_up(struct tipc_node *n, int bearer_id,
struct sk_buff_head *xmitq)
{
- tipc_node_lock(n);
+ tipc_node_write_lock(n);
__tipc_node_link_up(n, bearer_id, xmitq);
- tipc_node_unlock(n);
+ tipc_node_write_unlock(n);
}
/**
@@ -402,7 +619,7 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
struct tipc_link_entry *le = &n->links[*bearer_id];
int *slot0 = &n->active_links[0];
int *slot1 = &n->active_links[1];
- int i, highest = 0;
+ int i, highest = 0, prio;
struct tipc_link *l, *_l, *tnl;
l = n->links[*bearer_id].link;
@@ -411,12 +628,12 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
n->working_links--;
n->action_flags |= TIPC_NOTIFY_LINK_DOWN;
- n->link_id = l->peer_bearer_id << 16 | *bearer_id;
+ n->link_id = tipc_link_id(l);
tipc_bearer_remove_dest(n->net, *bearer_id, n->addr);
pr_debug("Lost link <%s> on network plane %c\n",
- l->name, l->net_plane);
+ tipc_link_name(l), tipc_link_plane(l));
/* Select new active link if any available */
*slot0 = INVALID_BEARER_ID;
@@ -427,10 +644,11 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
continue;
if (_l == l)
continue;
- if (_l->priority < highest)
+ prio = tipc_link_prio(_l);
+ if (prio < highest)
continue;
- if (_l->priority > highest) {
- highest = _l->priority;
+ if (prio > highest) {
+ highest = prio;
*slot0 = i;
*slot1 = i;
continue;
@@ -453,17 +671,17 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id);
/* There is still a working link => initiate failover */
- tnl = node_active_link(n, 0);
+ *bearer_id = n->active_links[0];
+ tnl = n->links[*bearer_id].link;
tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT);
tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT);
- n->sync_point = tnl->rcv_nxt + (U16_MAX / 2 - 1);
+ n->sync_point = tipc_link_rcv_nxt(tnl) + (U16_MAX / 2 - 1);
tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq);
tipc_link_reset(l);
tipc_link_fsm_evt(l, LINK_RESET_EVT);
tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT);
- *maddr = &n->links[tnl->bearer_id].maddr;
- *bearer_id = tnl->bearer_id;
+ *maddr = &n->links[*bearer_id].maddr;
}
static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
@@ -478,7 +696,7 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
__skb_queue_head_init(&xmitq);
- tipc_node_lock(n);
+ tipc_node_write_lock(n);
if (!tipc_link_is_establishing(l)) {
__tipc_node_link_down(n, &bearer_id, &xmitq, &maddr);
if (delete) {
@@ -490,12 +708,12 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
/* Defuse pending tipc_node_link_up() */
tipc_link_fsm_evt(l, LINK_RESET_EVT);
}
- tipc_node_unlock(n);
+ tipc_node_write_unlock(n);
tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr);
tipc_sk_rcv(n->net, &le->inputq);
}
-bool tipc_node_is_up(struct tipc_node *n)
+static bool tipc_node_is_up(struct tipc_node *n)
{
return n->active_links[0] != INVALID_BEARER_ID;
}
@@ -523,7 +741,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
if (!n)
return;
- tipc_node_lock(n);
+ tipc_node_write_lock(n);
le = &n->links[b->identity];
@@ -626,7 +844,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
}
memcpy(&le->maddr, maddr, sizeof(*maddr));
exit:
- tipc_node_unlock(n);
+ tipc_node_write_unlock(n);
if (reset && !tipc_link_is_reset(l))
tipc_node_link_down(n, b->identity, false);
tipc_node_put(n);
@@ -834,24 +1052,6 @@ illegal_evt:
pr_err("Illegal node fsm evt %x in state %x\n", evt, state);
}
-bool tipc_node_filter_pkt(struct tipc_node *n, struct tipc_msg *hdr)
-{
- int state = n->state;
-
- if (likely(state == SELF_UP_PEER_UP))
- return true;
-
- if (state == SELF_LEAVING_PEER_DOWN)
- return false;
-
- if (state == SELF_DOWN_PEER_LEAVING) {
- if (msg_peer_node_is_up(hdr))
- return false;
- }
-
- return true;
-}
-
static void node_lost_contact(struct tipc_node *n,
struct sk_buff_head *inputq)
{
@@ -913,56 +1113,18 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr,
if (bearer_id >= MAX_BEARERS)
goto exit;
- tipc_node_lock(node);
+ tipc_node_read_lock(node);
link = node->links[bearer_id].link;
if (link) {
- strncpy(linkname, link->name, len);
+ strncpy(linkname, tipc_link_name(link), len);
err = 0;
}
exit:
- tipc_node_unlock(node);
+ tipc_node_read_unlock(node);
tipc_node_put(node);
return err;
}
-void tipc_node_unlock(struct tipc_node *node)
-{
- struct net *net = node->net;
- u32 addr = 0;
- u32 flags = node->action_flags;
- u32 link_id = 0;
- struct list_head *publ_list;
-
- if (likely(!flags)) {
- spin_unlock_bh(&node->lock);
- return;
- }
-
- addr = node->addr;
- link_id = node->link_id;
- publ_list = &node->publ_list;
-
- node->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
- TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP);
-
- spin_unlock_bh(&node->lock);
-
- if (flags & TIPC_NOTIFY_NODE_DOWN)
- tipc_publ_notify(net, publ_list, addr);
-
- if (flags & TIPC_NOTIFY_NODE_UP)
- tipc_named_node_up(net, addr);
-
- if (flags & TIPC_NOTIFY_LINK_UP)
- tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr,
- TIPC_NODE_SCOPE, link_id, addr);
-
- if (flags & TIPC_NOTIFY_LINK_DOWN)
- tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
- link_id, addr);
-
-}
-
/* Caller should hold node lock for the passed node */
static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node)
{
@@ -997,20 +1159,6 @@ msg_full:
return -EMSGSIZE;
}
-static struct tipc_link *tipc_node_select_link(struct tipc_node *n, int sel,
- int *bearer_id,
- struct tipc_media_addr **maddr)
-{
- int id = n->active_links[sel & 1];
-
- if (unlikely(id < 0))
- return NULL;
-
- *bearer_id = id;
- *maddr = &n->links[id].maddr;
- return n->links[id].link;
-}
-
/**
* tipc_node_xmit() is the general link level function for message sending
* @net: the applicable net namespace
@@ -1023,29 +1171,32 @@ static struct tipc_link *tipc_node_select_link(struct tipc_node *n, int sel,
int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
u32 dnode, int selector)
{
- struct tipc_link *l = NULL;
+ struct tipc_link_entry *le = NULL;
struct tipc_node *n;
struct sk_buff_head xmitq;
- struct tipc_media_addr *maddr;
- int bearer_id;
+ int bearer_id = -1;
int rc = -EHOSTUNREACH;
__skb_queue_head_init(&xmitq);
n = tipc_node_find(net, dnode);
if (likely(n)) {
- tipc_node_lock(n);
- l = tipc_node_select_link(n, selector, &bearer_id, &maddr);
- if (likely(l))
- rc = tipc_link_xmit(l, list, &xmitq);
- tipc_node_unlock(n);
- if (unlikely(rc == -ENOBUFS))
+ tipc_node_read_lock(n);
+ bearer_id = n->active_links[selector & 1];
+ if (bearer_id >= 0) {
+ le = &n->links[bearer_id];
+ spin_lock_bh(&le->lock);
+ rc = tipc_link_xmit(le->link, list, &xmitq);
+ spin_unlock_bh(&le->lock);
+ }
+ tipc_node_read_unlock(n);
+ if (likely(!rc))
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
+ else if (rc == -ENOBUFS)
tipc_node_link_down(n, bearer_id, false);
tipc_node_put(n);
+ return rc;
}
- if (likely(!rc)) {
- tipc_bearer_xmit(net, bearer_id, &xmitq, maddr);
- return 0;
- }
+
if (likely(in_own_node(net, dnode))) {
tipc_sk_rcv(net, list);
return 0;
@@ -1075,6 +1226,30 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode,
return 0;
}
+void tipc_node_broadcast(struct net *net, struct sk_buff *skb)
+{
+ struct sk_buff *txskb;
+ struct tipc_node *n;
+ u32 dst;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(n, tipc_nodes(net), list) {
+ dst = n->addr;
+ if (in_own_node(net, dst))
+ continue;
+ if (!tipc_node_is_up(n))
+ continue;
+ txskb = pskb_copy(skb, GFP_ATOMIC);
+ if (!txskb)
+ break;
+ msg_set_destnode(buf_msg(txskb), dst);
+ tipc_node_xmit_skb(net, txskb, dst, 0);
+ }
+ rcu_read_unlock();
+
+ kfree_skb(skb);
+}
+
/**
* tipc_node_bc_rcv - process TIPC broadcast packet arriving from off-node
* @net: the applicable net namespace
@@ -1116,9 +1291,9 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id
/* Broadcast ACKs are sent on a unicast link */
if (rc & TIPC_LINK_SND_BC_ACK) {
- tipc_node_lock(n);
+ tipc_node_read_lock(n);
tipc_link_build_ack_msg(le->link, &xmitq);
- tipc_node_unlock(n);
+ tipc_node_read_unlock(n);
}
if (!skb_queue_empty(&xmitq))
@@ -1151,30 +1326,30 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
u16 oseqno = msg_seqno(hdr);
u16 iseqno = msg_seqno(msg_get_wrapped(hdr));
u16 exp_pkts = msg_msgcnt(hdr);
- u16 rcv_nxt, syncpt, dlv_nxt;
+ u16 rcv_nxt, syncpt, dlv_nxt, inputq_len;
int state = n->state;
struct tipc_link *l, *tnl, *pl = NULL;
struct tipc_media_addr *maddr;
- int i, pb_id;
+ int pb_id;
l = n->links[bearer_id].link;
if (!l)
return false;
- rcv_nxt = l->rcv_nxt;
+ rcv_nxt = tipc_link_rcv_nxt(l);
if (likely((state == SELF_UP_PEER_UP) && (usr != TUNNEL_PROTOCOL)))
return true;
/* Find parallel link, if any */
- for (i = 0; i < MAX_BEARERS; i++) {
- if ((i != bearer_id) && n->links[i].link) {
- pl = n->links[i].link;
+ for (pb_id = 0; pb_id < MAX_BEARERS; pb_id++) {
+ if ((pb_id != bearer_id) && n->links[pb_id].link) {
+ pl = n->links[pb_id].link;
break;
}
}
- /* Update node accesibility if applicable */
+ /* Check and update node accesibility if applicable */
if (state == SELF_UP_PEER_COMING) {
if (!tipc_link_is_up(l))
return true;
@@ -1187,8 +1362,12 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
if (msg_peer_node_is_up(hdr))
return false;
tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT);
+ return true;
}
+ if (state == SELF_LEAVING_PEER_DOWN)
+ return false;
+
/* Ignore duplicate packets */
if ((usr != LINK_PROTOCOL) && less(oseqno, rcv_nxt))
return true;
@@ -1197,9 +1376,9 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
if ((usr == TUNNEL_PROTOCOL) && (mtyp == FAILOVER_MSG)) {
syncpt = oseqno + exp_pkts - 1;
if (pl && tipc_link_is_up(pl)) {
- pb_id = pl->bearer_id;
__tipc_node_link_down(n, &pb_id, xmitq, &maddr);
- tipc_skb_queue_splice_tail_init(pl->inputq, l->inputq);
+ tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl),
+ tipc_link_inputq(l));
}
/* If pkts arrive out of order, use lowest calculated syncpt */
if (less(syncpt, n->sync_point))
@@ -1232,19 +1411,18 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
tipc_link_fsm_evt(l, LINK_SYNCH_BEGIN_EVT);
tipc_node_fsm_evt(n, NODE_SYNCH_BEGIN_EVT);
}
- if (less(syncpt, n->sync_point))
- n->sync_point = syncpt;
}
/* Open tunnel link when parallel link reaches synch point */
- if ((n->state == NODE_SYNCHING) && tipc_link_is_synching(l)) {
+ if (n->state == NODE_SYNCHING) {
if (tipc_link_is_synching(l)) {
tnl = l;
} else {
tnl = pl;
pl = l;
}
- dlv_nxt = pl->rcv_nxt - mod(skb_queue_len(pl->inputq));
+ inputq_len = skb_queue_len(tipc_link_inputq(pl));
+ dlv_nxt = tipc_link_rcv_nxt(pl) - inputq_len;
if (more(dlv_nxt, n->sync_point)) {
tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT);
tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT);
@@ -1304,22 +1482,32 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
/* Ensure broadcast reception is in synch with peer's send state */
if (unlikely(usr == LINK_PROTOCOL))
tipc_bcast_sync_rcv(net, n->bc_entry.link, hdr);
- else if (unlikely(n->bc_entry.link->acked != bc_ack))
+ else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack))
tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack);
- tipc_node_lock(n);
-
- /* Is reception permitted at the moment ? */
- if (!tipc_node_filter_pkt(n, hdr))
- goto unlock;
-
- /* Check and if necessary update node state */
- if (likely(tipc_node_check_state(n, skb, bearer_id, &xmitq))) {
- rc = tipc_link_rcv(le->link, skb, &xmitq);
- skb = NULL;
+ /* Receive packet directly if conditions permit */
+ tipc_node_read_lock(n);
+ if (likely((n->state == SELF_UP_PEER_UP) && (usr != TUNNEL_PROTOCOL))) {
+ spin_lock_bh(&le->lock);
+ if (le->link) {
+ rc = tipc_link_rcv(le->link, skb, &xmitq);
+ skb = NULL;
+ }
+ spin_unlock_bh(&le->lock);
+ }
+ tipc_node_read_unlock(n);
+
+ /* Check/update node state before receiving */
+ if (unlikely(skb)) {
+ tipc_node_write_lock(n);
+ if (tipc_node_check_state(n, skb, bearer_id, &xmitq)) {
+ if (le->link) {
+ rc = tipc_link_rcv(le->link, skb, &xmitq);
+ skb = NULL;
+ }
+ }
+ tipc_node_write_unlock(n);
}
-unlock:
- tipc_node_unlock(n);
if (unlikely(rc & TIPC_LINK_UP_EVT))
tipc_node_link_up(n, bearer_id, &xmitq);
@@ -1384,15 +1572,15 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb)
continue;
}
- tipc_node_lock(node);
+ tipc_node_read_lock(node);
err = __tipc_nl_add_node(&msg, node);
if (err) {
last_addr = node->addr;
- tipc_node_unlock(node);
+ tipc_node_read_unlock(node);
goto out;
}
- tipc_node_unlock(node);
+ tipc_node_read_unlock(node);
}
done = 1;
out:
@@ -1402,3 +1590,314 @@ out:
return skb->len;
}
+
+/* tipc_node_find_by_name - locate owner node of link by link's name
+ * @net: the applicable net namespace
+ * @name: pointer to link name string
+ * @bearer_id: pointer to index in 'node->links' array where the link was found.
+ *
+ * Returns pointer to node owning the link, or 0 if no matching link is found.
+ */
+static struct tipc_node *tipc_node_find_by_name(struct net *net,
+ const char *link_name,
+ unsigned int *bearer_id)
+{
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_link *l;
+ struct tipc_node *n;
+ struct tipc_node *found_node = NULL;
+ int i;
+
+ *bearer_id = 0;
+ rcu_read_lock();
+ list_for_each_entry_rcu(n, &tn->node_list, list) {
+ tipc_node_read_lock(n);
+ for (i = 0; i < MAX_BEARERS; i++) {
+ l = n->links[i].link;
+ if (l && !strcmp(tipc_link_name(l), link_name)) {
+ *bearer_id = i;
+ found_node = n;
+ break;
+ }
+ }
+ tipc_node_read_unlock(n);
+ if (found_node)
+ break;
+ }
+ rcu_read_unlock();
+
+ return found_node;
+}
+
+int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+ int res = 0;
+ int bearer_id;
+ char *name;
+ struct tipc_link *link;
+ struct tipc_node *node;
+ struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+
+ if (!info->attrs[TIPC_NLA_LINK])
+ return -EINVAL;
+
+ err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX,
+ info->attrs[TIPC_NLA_LINK],
+ tipc_nl_link_policy);
+ if (err)
+ return err;
+
+ if (!attrs[TIPC_NLA_LINK_NAME])
+ return -EINVAL;
+
+ name = nla_data(attrs[TIPC_NLA_LINK_NAME]);
+
+ if (strcmp(name, tipc_bclink_name) == 0)
+ return tipc_nl_bc_link_set(net, attrs);
+
+ node = tipc_node_find_by_name(net, name, &bearer_id);
+ if (!node)
+ return -EINVAL;
+
+ tipc_node_read_lock(node);
+
+ link = node->links[bearer_id].link;
+ if (!link) {
+ res = -EINVAL;
+ goto out;
+ }
+
+ if (attrs[TIPC_NLA_LINK_PROP]) {
+ struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
+
+ err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP],
+ props);
+ if (err) {
+ res = err;
+ goto out;
+ }
+
+ if (props[TIPC_NLA_PROP_TOL]) {
+ u32 tol;
+
+ tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
+ tipc_link_set_tolerance(link, tol);
+ }
+ if (props[TIPC_NLA_PROP_PRIO]) {
+ u32 prio;
+
+ prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
+ tipc_link_set_prio(link, prio);
+ }
+ if (props[TIPC_NLA_PROP_WIN]) {
+ u32 win;
+
+ win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+ tipc_link_set_queue_limits(link, win);
+ }
+ }
+
+out:
+ tipc_node_read_unlock(node);
+
+ return res;
+}
+
+int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ struct tipc_nl_msg msg;
+ char *name;
+ int err;
+
+ msg.portid = info->snd_portid;
+ msg.seq = info->snd_seq;
+
+ if (!info->attrs[TIPC_NLA_LINK_NAME])
+ return -EINVAL;
+ name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]);
+
+ msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!msg.skb)
+ return -ENOMEM;
+
+ if (strcmp(name, tipc_bclink_name) == 0) {
+ err = tipc_nl_add_bc_link(net, &msg);
+ if (err) {
+ nlmsg_free(msg.skb);
+ return err;
+ }
+ } else {
+ int bearer_id;
+ struct tipc_node *node;
+ struct tipc_link *link;
+
+ node = tipc_node_find_by_name(net, name, &bearer_id);
+ if (!node)
+ return -EINVAL;
+
+ tipc_node_read_lock(node);
+ link = node->links[bearer_id].link;
+ if (!link) {
+ tipc_node_read_unlock(node);
+ nlmsg_free(msg.skb);
+ return -EINVAL;
+ }
+
+ err = __tipc_nl_add_link(net, &msg, link, 0);
+ tipc_node_read_unlock(node);
+ if (err) {
+ nlmsg_free(msg.skb);
+ return err;
+ }
+ }
+
+ return genlmsg_reply(msg.skb, info);
+}
+
+int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+ char *link_name;
+ unsigned int bearer_id;
+ struct tipc_link *link;
+ struct tipc_node *node;
+ struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+ struct tipc_link_entry *le;
+
+ if (!info->attrs[TIPC_NLA_LINK])
+ return -EINVAL;
+
+ err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX,
+ info->attrs[TIPC_NLA_LINK],
+ tipc_nl_link_policy);
+ if (err)
+ return err;
+
+ if (!attrs[TIPC_NLA_LINK_NAME])
+ return -EINVAL;
+
+ link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]);
+
+ if (strcmp(link_name, tipc_bclink_name) == 0) {
+ err = tipc_bclink_reset_stats(net);
+ if (err)
+ return err;
+ return 0;
+ }
+
+ node = tipc_node_find_by_name(net, link_name, &bearer_id);
+ if (!node)
+ return -EINVAL;
+
+ le = &node->links[bearer_id];
+ tipc_node_read_lock(node);
+ spin_lock_bh(&le->lock);
+ link = node->links[bearer_id].link;
+ if (!link) {
+ spin_unlock_bh(&le->lock);
+ tipc_node_read_unlock(node);
+ return -EINVAL;
+ }
+ tipc_link_reset_stats(link);
+ spin_unlock_bh(&le->lock);
+ tipc_node_read_unlock(node);
+ return 0;
+}
+
+/* Caller should hold node lock */
+static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg,
+ struct tipc_node *node, u32 *prev_link)
+{
+ u32 i;
+ int err;
+
+ for (i = *prev_link; i < MAX_BEARERS; i++) {
+ *prev_link = i;
+
+ if (!node->links[i].link)
+ continue;
+
+ err = __tipc_nl_add_link(net, msg,
+ node->links[i].link, NLM_F_MULTI);
+ if (err)
+ return err;
+ }
+ *prev_link = 0;
+
+ return 0;
+}
+
+int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_node *node;
+ struct tipc_nl_msg msg;
+ u32 prev_node = cb->args[0];
+ u32 prev_link = cb->args[1];
+ int done = cb->args[2];
+ int err;
+
+ if (done)
+ return 0;
+
+ msg.skb = skb;
+ msg.portid = NETLINK_CB(cb->skb).portid;
+ msg.seq = cb->nlh->nlmsg_seq;
+
+ rcu_read_lock();
+ if (prev_node) {
+ node = tipc_node_find(net, prev_node);
+ if (!node) {
+ /* We never set seq or call nl_dump_check_consistent()
+ * this means that setting prev_seq here will cause the
+ * consistence check to fail in the netlink callback
+ * handler. Resulting in the last NLMSG_DONE message
+ * having the NLM_F_DUMP_INTR flag set.
+ */
+ cb->prev_seq = 1;
+ goto out;
+ }
+ tipc_node_put(node);
+
+ list_for_each_entry_continue_rcu(node, &tn->node_list,
+ list) {
+ tipc_node_read_lock(node);
+ err = __tipc_nl_add_node_links(net, &msg, node,
+ &prev_link);
+ tipc_node_read_unlock(node);
+ if (err)
+ goto out;
+
+ prev_node = node->addr;
+ }
+ } else {
+ err = tipc_nl_add_bc_link(net, &msg);
+ if (err)
+ goto out;
+
+ list_for_each_entry_rcu(node, &tn->node_list, list) {
+ tipc_node_read_lock(node);
+ err = __tipc_nl_add_node_links(net, &msg, node,
+ &prev_link);
+ tipc_node_read_unlock(node);
+ if (err)
+ goto out;
+
+ prev_node = node->addr;
+ }
+ }
+ done = 1;
+out:
+ rcu_read_unlock();
+
+ cb->args[0] = prev_node;
+ cb->args[1] = prev_link;
+ cb->args[2] = done;
+
+ return skb->len;
+}
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 6734562d3c6e..f39d9d06e8bb 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -42,23 +42,6 @@
#include "bearer.h"
#include "msg.h"
-/* Out-of-range value for node signature */
-#define INVALID_NODE_SIG 0x10000
-
-#define INVALID_BEARER_ID -1
-
-/* Flags used to take different actions according to flag type
- * TIPC_NOTIFY_NODE_DOWN: notify node is down
- * TIPC_NOTIFY_NODE_UP: notify node is up
- * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type
- */
-enum {
- TIPC_NOTIFY_NODE_DOWN = (1 << 3),
- TIPC_NOTIFY_NODE_UP = (1 << 4),
- TIPC_NOTIFY_LINK_UP = (1 << 6),
- TIPC_NOTIFY_LINK_DOWN = (1 << 7)
-};
-
/* Optional capabilities supported by this code version
*/
enum {
@@ -66,72 +49,8 @@ enum {
};
#define TIPC_NODE_CAPABILITIES TIPC_BCAST_SYNCH
+#define INVALID_BEARER_ID -1
-struct tipc_link_entry {
- struct tipc_link *link;
- u32 mtu;
- struct sk_buff_head inputq;
- struct tipc_media_addr maddr;
-};
-
-struct tipc_bclink_entry {
- struct tipc_link *link;
- struct sk_buff_head inputq1;
- struct sk_buff_head arrvq;
- struct sk_buff_head inputq2;
- struct sk_buff_head namedq;
-};
-
-/**
- * struct tipc_node - TIPC node structure
- * @addr: network address of node
- * @ref: reference counter to node object
- * @lock: spinlock governing access to structure
- * @net: the applicable net namespace
- * @hash: links to adjacent nodes in unsorted hash chain
- * @inputq: pointer to input queue containing messages for msg event
- * @namedq: pointer to name table input queue with name table messages
- * @active_links: bearer ids of active links, used as index into links[] array
- * @links: array containing references to all links to node
- * @action_flags: bit mask of different types of node actions
- * @state: connectivity state vs peer node
- * @sync_point: sequence number where synch/failover is finished
- * @list: links to adjacent nodes in sorted list of cluster's nodes
- * @working_links: number of working links to node (both active and standby)
- * @link_cnt: number of links to node
- * @capabilities: bitmap, indicating peer node's functional capabilities
- * @signature: node instance identifier
- * @link_id: local and remote bearer ids of changing link, if any
- * @publ_list: list of publications
- * @rcu: rcu struct for tipc_node
- */
-struct tipc_node {
- u32 addr;
- struct kref kref;
- spinlock_t lock;
- struct net *net;
- struct hlist_node hash;
- int active_links[2];
- struct tipc_link_entry links[MAX_BEARERS];
- struct tipc_bclink_entry bc_entry;
- int action_flags;
- struct list_head list;
- int state;
- u16 sync_point;
- int link_cnt;
- u16 working_links;
- u16 capabilities;
- u32 signature;
- u32 link_id;
- struct list_head publ_list;
- struct list_head conn_sks;
- unsigned long keepalive_intv;
- struct timer_list timer;
- struct rcu_head rcu;
-};
-
-struct tipc_node *tipc_node_find(struct net *net, u32 addr);
-void tipc_node_put(struct tipc_node *node);
void tipc_node_stop(struct net *net);
void tipc_node_check_dest(struct net *net, u32 onode,
struct tipc_bearer *bearer,
@@ -139,50 +58,22 @@ void tipc_node_check_dest(struct net *net, u32 onode,
struct tipc_media_addr *maddr,
bool *respond, bool *dupl_addr);
void tipc_node_delete_links(struct net *net, int bearer_id);
-void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
-void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
-bool tipc_node_is_up(struct tipc_node *n);
int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
char *linkname, size_t len);
-void tipc_node_unlock(struct tipc_node *node);
int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
int selector);
int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
u32 selector);
+void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr);
+void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr);
+void tipc_node_broadcast(struct net *net, struct sk_buff *skb);
int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
+int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel);
int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb);
-
-static inline void tipc_node_lock(struct tipc_node *node)
-{
- spin_lock_bh(&node->lock);
-}
-
-static inline struct tipc_link *node_active_link(struct tipc_node *n, int sel)
-{
- int bearer_id = n->active_links[sel & 1];
-
- if (unlikely(bearer_id == INVALID_BEARER_ID))
- return NULL;
-
- return n->links[bearer_id].link;
-}
-
-static inline unsigned int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel)
-{
- struct tipc_node *n;
- int bearer_id;
- unsigned int mtu = MAX_MSG_SIZE;
-
- n = tipc_node_find(net, addr);
- if (unlikely(!n))
- return mtu;
-
- bearer_id = n->active_links[sel & 1];
- if (likely(bearer_id != INVALID_BEARER_ID))
- mtu = n->links[bearer_id].mtu;
- tipc_node_put(n);
- return mtu;
-}
+int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb);
+int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info);
#endif
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 552dbaba9cf3..4d420bb27396 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -105,6 +105,7 @@ struct tipc_sock {
static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
static void tipc_data_ready(struct sock *sk);
static void tipc_write_space(struct sock *sk);
+static void tipc_sock_destruct(struct sock *sk);
static int tipc_release(struct socket *sock);
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p);
@@ -381,6 +382,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
sk->sk_rcvbuf = sysctl_tipc_rmem[1];
sk->sk_data_ready = tipc_data_ready;
sk->sk_write_space = tipc_write_space;
+ sk->sk_destruct = tipc_sock_destruct;
tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
tsk->sent_unacked = 0;
atomic_set(&tsk->dupl_rcvcnt, 0);
@@ -470,9 +472,6 @@ static int tipc_release(struct socket *sock)
tipc_node_remove_conn(net, dnode, tsk->portid);
}
- /* Discard any remaining (connection-based) messages in receive queue */
- __skb_queue_purge(&sk->sk_receive_queue);
-
/* Reject any messages that accumulated in backlog queue */
sock->state = SS_DISCONNECTING;
release_sock(sk);
@@ -674,7 +673,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
struct tipc_sock *tsk = tipc_sk(sk);
struct net *net = sock_net(sk);
struct tipc_msg *mhdr = &tsk->phdr;
- struct sk_buff_head *pktchain = &sk->sk_write_queue;
+ struct sk_buff_head pktchain;
struct iov_iter save = msg->msg_iter;
uint mtu;
int rc;
@@ -688,14 +687,16 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
msg_set_nameupper(mhdr, seq->upper);
msg_set_hdr_sz(mhdr, MCAST_H_SIZE);
+ skb_queue_head_init(&pktchain);
+
new_mtu:
mtu = tipc_bcast_get_mtu(net);
- rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain);
+ rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &pktchain);
if (unlikely(rc < 0))
return rc;
do {
- rc = tipc_bcast_xmit(net, pktchain);
+ rc = tipc_bcast_xmit(net, &pktchain);
if (likely(!rc))
return dsz;
@@ -705,7 +706,7 @@ new_mtu:
if (!rc)
continue;
}
- __skb_queue_purge(pktchain);
+ __skb_queue_purge(&pktchain);
if (rc == -EMSGSIZE) {
msg->msg_iter = save;
goto new_mtu;
@@ -864,7 +865,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
struct net *net = sock_net(sk);
struct tipc_msg *mhdr = &tsk->phdr;
u32 dnode, dport;
- struct sk_buff_head *pktchain = &sk->sk_write_queue;
+ struct sk_buff_head pktchain;
struct sk_buff *skb;
struct tipc_name_seq *seq;
struct iov_iter save;
@@ -925,17 +926,18 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
msg_set_hdr_sz(mhdr, BASIC_H_SIZE);
}
+ skb_queue_head_init(&pktchain);
save = m->msg_iter;
new_mtu:
mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
- rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain);
+ rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &pktchain);
if (rc < 0)
return rc;
do {
- skb = skb_peek(pktchain);
+ skb = skb_peek(&pktchain);
TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;
- rc = tipc_node_xmit(net, pktchain, dnode, tsk->portid);
+ rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid);
if (likely(!rc)) {
if (sock->state != SS_READY)
sock->state = SS_CONNECTING;
@@ -947,7 +949,7 @@ new_mtu:
if (!rc)
continue;
}
- __skb_queue_purge(pktchain);
+ __skb_queue_purge(&pktchain);
if (rc == -EMSGSIZE) {
m->msg_iter = save;
goto new_mtu;
@@ -1017,7 +1019,7 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
struct net *net = sock_net(sk);
struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_msg *mhdr = &tsk->phdr;
- struct sk_buff_head *pktchain = &sk->sk_write_queue;
+ struct sk_buff_head pktchain;
DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
u32 portid = tsk->portid;
int rc = -EINVAL;
@@ -1045,17 +1047,19 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
dnode = tsk_peer_node(tsk);
+ skb_queue_head_init(&pktchain);
next:
save = m->msg_iter;
mtu = tsk->max_pkt;
send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE);
- rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain);
+ rc = tipc_msg_build(mhdr, m, sent, send, mtu, &pktchain);
if (unlikely(rc < 0))
return rc;
+
do {
if (likely(!tsk_conn_cong(tsk))) {
- rc = tipc_node_xmit(net, pktchain, dnode, portid);
+ rc = tipc_node_xmit(net, &pktchain, dnode, portid);
if (likely(!rc)) {
tsk->sent_unacked++;
sent += send;
@@ -1064,7 +1068,7 @@ next:
goto next;
}
if (rc == -EMSGSIZE) {
- __skb_queue_purge(pktchain);
+ __skb_queue_purge(&pktchain);
tsk->max_pkt = tipc_node_get_mtu(net, dnode,
portid);
m->msg_iter = save;
@@ -1078,7 +1082,7 @@ next:
rc = tipc_wait_for_sndpkt(sock, &timeo);
} while (!rc);
- __skb_queue_purge(pktchain);
+ __skb_queue_purge(&pktchain);
return sent ? sent : rc;
}
@@ -1492,7 +1496,7 @@ static void tipc_write_space(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
- if (wq_has_sleeper(wq))
+ if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
POLLWRNORM | POLLWRBAND);
rcu_read_unlock();
@@ -1509,12 +1513,17 @@ static void tipc_data_ready(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
- if (wq_has_sleeper(wq))
+ if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
POLLRDNORM | POLLRDBAND);
rcu_read_unlock();
}
+static void tipc_sock_destruct(struct sock *sk)
+{
+ __skb_queue_purge(&sk->sk_receive_queue);
+}
+
/**
* filter_connect - Handle all incoming messages for a connection-based socket
* @tsk: TIPC socket
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 350cca33ee0a..f9ff73a8d815 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -289,15 +289,15 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid,
struct sockaddr_tipc *addr, void *usr_data,
void *buf, size_t len)
{
- struct tipc_subscriber *subscriber = usr_data;
+ struct tipc_subscriber *subscrb = usr_data;
struct tipc_subscription *sub = NULL;
struct tipc_net *tn = net_generic(net, tipc_net_id);
- tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscriber, &sub);
+ if (tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscrb, &sub))
+ return tipc_conn_terminate(tn->topsrv, subscrb->conid);
+
if (sub)
tipc_nametbl_subscribe(sub);
- else
- tipc_conn_terminate(tn->topsrv, subscriber->conid);
}
/* Handle one request to establish a new subscriber */
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index ad2719ad4c1b..d63a911e7fe2 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -48,7 +48,6 @@
#include <linux/tipc_netlink.h>
#include "core.h"
#include "bearer.h"
-#include "msg.h"
/* IANA assigned UDP port */
#define UDP_PORT_DEFAULT 6118
@@ -158,8 +157,11 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
struct rtable *rt;
- if (skb_headroom(skb) < UDP_MIN_HEADROOM)
- pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
+ if (skb_headroom(skb) < UDP_MIN_HEADROOM) {
+ err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
+ if (err)
+ goto tx_error;
+ }
skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
ub = rcu_dereference_rtnl(b->media_ptr);
@@ -180,15 +182,9 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
goto tx_error;
}
ttl = ip4_dst_hoplimit(&rt->dst);
- err = udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb,
- src->ipv4.s_addr,
- dst->ipv4.s_addr, 0, ttl, 0,
- src->udp_port, dst->udp_port,
- false, true);
- if (err < 0) {
- ip_rt_put(rt);
- goto tx_error;
- }
+ udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr,
+ dst->ipv4.s_addr, 0, ttl, 0, src->udp_port,
+ dst->udp_port, false, true);
#if IS_ENABLED(CONFIG_IPV6)
} else {
struct dst_entry *ndst;
@@ -221,10 +217,6 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
{
struct udp_bearer *ub;
struct tipc_bearer *b;
- int usr = msg_user(buf_msg(skb));
-
- if ((usr == LINK_PROTOCOL) || (usr == NAME_DISTRIBUTOR))
- skb_linearize(skb);
ub = rcu_dereference_sk_user_data(sk);
if (!ub) {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index aaa0b58d6aba..f75f847e688d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -326,6 +326,118 @@ found:
return s;
}
+/* Support code for asymmetrically connected dgram sockets
+ *
+ * If a datagram socket is connected to a socket not itself connected
+ * to the first socket (eg, /dev/log), clients may only enqueue more
+ * messages if the present receive queue of the server socket is not
+ * "too large". This means there's a second writeability condition
+ * poll and sendmsg need to test. The dgram recv code will do a wake
+ * up on the peer_wait wait queue of a socket upon reception of a
+ * datagram which needs to be propagated to sleeping would-be writers
+ * since these might not have sent anything so far. This can't be
+ * accomplished via poll_wait because the lifetime of the server
+ * socket might be less than that of its clients if these break their
+ * association with it or if the server socket is closed while clients
+ * are still connected to it and there's no way to inform "a polling
+ * implementation" that it should let go of a certain wait queue
+ *
+ * In order to propagate a wake up, a wait_queue_t of the client
+ * socket is enqueued on the peer_wait queue of the server socket
+ * whose wake function does a wake_up on the ordinary client socket
+ * wait queue. This connection is established whenever a write (or
+ * poll for write) hit the flow control condition and broken when the
+ * association to the server socket is dissolved or after a wake up
+ * was relayed.
+ */
+
+static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
+ void *key)
+{
+ struct unix_sock *u;
+ wait_queue_head_t *u_sleep;
+
+ u = container_of(q, struct unix_sock, peer_wake);
+
+ __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
+ q);
+ u->peer_wake.private = NULL;
+
+ /* relaying can only happen while the wq still exists */
+ u_sleep = sk_sleep(&u->sk);
+ if (u_sleep)
+ wake_up_interruptible_poll(u_sleep, key);
+
+ return 0;
+}
+
+static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
+{
+ struct unix_sock *u, *u_other;
+ int rc;
+
+ u = unix_sk(sk);
+ u_other = unix_sk(other);
+ rc = 0;
+ spin_lock(&u_other->peer_wait.lock);
+
+ if (!u->peer_wake.private) {
+ u->peer_wake.private = other;
+ __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
+
+ rc = 1;
+ }
+
+ spin_unlock(&u_other->peer_wait.lock);
+ return rc;
+}
+
+static void unix_dgram_peer_wake_disconnect(struct sock *sk,
+ struct sock *other)
+{
+ struct unix_sock *u, *u_other;
+
+ u = unix_sk(sk);
+ u_other = unix_sk(other);
+ spin_lock(&u_other->peer_wait.lock);
+
+ if (u->peer_wake.private == other) {
+ __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
+ u->peer_wake.private = NULL;
+ }
+
+ spin_unlock(&u_other->peer_wait.lock);
+}
+
+static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
+ struct sock *other)
+{
+ unix_dgram_peer_wake_disconnect(sk, other);
+ wake_up_interruptible_poll(sk_sleep(sk),
+ POLLOUT |
+ POLLWRNORM |
+ POLLWRBAND);
+}
+
+/* preconditions:
+ * - unix_peer(sk) == other
+ * - association is stable
+ */
+static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
+{
+ int connected;
+
+ connected = unix_dgram_peer_wake_connect(sk, other);
+
+ if (unix_recvq_full(other))
+ return 1;
+
+ if (connected)
+ unix_dgram_peer_wake_disconnect(sk, other);
+
+ return 0;
+}
+
static int unix_writable(const struct sock *sk)
{
return sk->sk_state != TCP_LISTEN &&
@@ -339,7 +451,7 @@ static void unix_write_space(struct sock *sk)
rcu_read_lock();
if (unix_writable(sk)) {
wq = rcu_dereference(sk->sk_wq);
- if (wq_has_sleeper(wq))
+ if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait,
POLLOUT | POLLWRNORM | POLLWRBAND);
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
@@ -431,6 +543,8 @@ static void unix_release_sock(struct sock *sk, int embrion)
skpair->sk_state_change(skpair);
sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
}
+
+ unix_dgram_peer_wake_disconnect(sk, skpair);
sock_put(skpair); /* It may now die */
unix_peer(sk) = NULL;
}
@@ -441,6 +555,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
if (state == TCP_LISTEN)
unix_release_sock(skb->sk, 1);
/* passed fds are erased in the kfree_skb hook */
+ UNIXCB(skb).consumed = skb->len;
kfree_skb(skb);
}
@@ -665,6 +780,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
INIT_LIST_HEAD(&u->link);
mutex_init(&u->readlock); /* single task reading lock */
init_waitqueue_head(&u->peer_wait);
+ init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
unix_insert_socket(unix_sockets_unbound(sk), sk);
out:
if (sk == NULL)
@@ -837,32 +953,20 @@ fail:
return NULL;
}
-static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
+static int unix_mknod(struct dentry *dentry, struct path *path, umode_t mode,
+ struct path *res)
{
- struct dentry *dentry;
- struct path path;
- int err = 0;
- /*
- * Get the parent directory, calculate the hash for last
- * component.
- */
- dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
- err = PTR_ERR(dentry);
- if (IS_ERR(dentry))
- return err;
+ int err;
- /*
- * All right, let's create it.
- */
- err = security_path_mknod(&path, dentry, mode, 0);
+ err = security_path_mknod(path, dentry, mode, 0);
if (!err) {
- err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
+ err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0);
if (!err) {
- res->mnt = mntget(path.mnt);
+ res->mnt = mntget(path->mnt);
res->dentry = dget(dentry);
}
}
- done_path_create(&path, dentry);
+
return err;
}
@@ -873,10 +977,12 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct unix_sock *u = unix_sk(sk);
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
char *sun_path = sunaddr->sun_path;
- int err;
+ int err, name_err;
unsigned int hash;
struct unix_address *addr;
struct hlist_head *list;
+ struct path path;
+ struct dentry *dentry;
err = -EINVAL;
if (sunaddr->sun_family != AF_UNIX)
@@ -892,14 +998,34 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
addr_len = err;
+ name_err = 0;
+ dentry = NULL;
+ if (sun_path[0]) {
+ /* Get the parent directory, calculate the hash for last
+ * component.
+ */
+ dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
+
+ if (IS_ERR(dentry)) {
+ /* delay report until after 'already bound' check */
+ name_err = PTR_ERR(dentry);
+ dentry = NULL;
+ }
+ }
+
err = mutex_lock_interruptible(&u->readlock);
if (err)
- goto out;
+ goto out_path;
err = -EINVAL;
if (u->addr)
goto out_up;
+ if (name_err) {
+ err = name_err == -EEXIST ? -EADDRINUSE : name_err;
+ goto out_up;
+ }
+
err = -ENOMEM;
addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
if (!addr)
@@ -910,11 +1036,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
addr->hash = hash ^ sk->sk_type;
atomic_set(&addr->refcnt, 1);
- if (sun_path[0]) {
- struct path path;
+ if (dentry) {
+ struct path u_path;
umode_t mode = S_IFSOCK |
(SOCK_INODE(sock)->i_mode & ~current_umask());
- err = unix_mknod(sun_path, mode, &path);
+ err = unix_mknod(dentry, &path, mode, &u_path);
if (err) {
if (err == -EEXIST)
err = -EADDRINUSE;
@@ -922,9 +1048,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out_up;
}
addr->hash = UNIX_HASH_SIZE;
- hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1);
+ hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
spin_lock(&unix_table_lock);
- u->path = path;
+ u->path = u_path;
list = &unix_socket_table[hash];
} else {
spin_lock(&unix_table_lock);
@@ -947,6 +1073,10 @@ out_unlock:
spin_unlock(&unix_table_lock);
out_up:
mutex_unlock(&u->readlock);
+out_path:
+ if (dentry)
+ done_path_create(&path, dentry);
+
out:
return err;
}
@@ -1032,6 +1162,8 @@ restart:
if (unix_peer(sk)) {
struct sock *old_peer = unix_peer(sk);
unix_peer(sk) = other;
+ unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
+
unix_state_double_unlock(sk, other);
if (other != old_peer)
@@ -1364,7 +1496,7 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
UNIXCB(skb).fp = NULL;
for (i = scm->fp->count-1; i >= 0; i--)
- unix_notinflight(scm->fp->fp[i]);
+ unix_notinflight(scm->fp->user, scm->fp->fp[i]);
}
static void unix_destruct_scm(struct sk_buff *skb)
@@ -1381,6 +1513,21 @@ static void unix_destruct_scm(struct sk_buff *skb)
sock_wfree(skb);
}
+/*
+ * The "user->unix_inflight" variable is protected by the garbage
+ * collection lock, and we just read it locklessly here. If you go
+ * over the limit, there might be a tiny race in actually noticing
+ * it across threads. Tough.
+ */
+static inline bool too_many_unix_fds(struct task_struct *p)
+{
+ struct user_struct *user = current_user();
+
+ if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
+ return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
+ return false;
+}
+
#define MAX_RECURSION_LEVEL 4
static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
@@ -1389,6 +1536,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
unsigned char max_level = 0;
int unix_sock_count = 0;
+ if (too_many_unix_fds(current))
+ return -ETOOMANYREFS;
+
for (i = scm->fp->count - 1; i >= 0; i--) {
struct sock *sk = unix_get_socket(scm->fp->fp[i]);
@@ -1410,10 +1560,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
if (!UNIXCB(skb).fp)
return -ENOMEM;
- if (unix_sock_count) {
- for (i = scm->fp->count - 1; i >= 0; i--)
- unix_inflight(scm->fp->fp[i]);
- }
+ for (i = scm->fp->count - 1; i >= 0; i--)
+ unix_inflight(scm->fp->user, scm->fp->fp[i]);
return max_level;
}
@@ -1433,6 +1581,14 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
return err;
}
+static bool unix_passcred_enabled(const struct socket *sock,
+ const struct sock *other)
+{
+ return test_bit(SOCK_PASSCRED, &sock->flags) ||
+ !other->sk_socket ||
+ test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
+}
+
/*
* Some apps rely on write() giving SCM_CREDENTIALS
* We include credentials if source or destination socket
@@ -1443,14 +1599,41 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
{
if (UNIXCB(skb).pid)
return;
- if (test_bit(SOCK_PASSCRED, &sock->flags) ||
- !other->sk_socket ||
- test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
+ if (unix_passcred_enabled(sock, other)) {
UNIXCB(skb).pid = get_pid(task_tgid(current));
current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
}
}
+static int maybe_init_creds(struct scm_cookie *scm,
+ struct socket *socket,
+ const struct sock *other)
+{
+ int err;
+ struct msghdr msg = { .msg_controllen = 0 };
+
+ err = scm_send(socket, &msg, scm, false);
+ if (err)
+ return err;
+
+ if (unix_passcred_enabled(socket, other)) {
+ scm->pid = get_pid(task_tgid(current));
+ current_uid_gid(&scm->creds.uid, &scm->creds.gid);
+ }
+ return err;
+}
+
+static bool unix_skb_scm_eq(struct sk_buff *skb,
+ struct scm_cookie *scm)
+{
+ const struct unix_skb_parms *u = &UNIXCB(skb);
+
+ return u->pid == scm->pid &&
+ uid_eq(u->uid, scm->creds.uid) &&
+ gid_eq(u->gid, scm->creds.gid) &&
+ unix_secdata_eq(scm, skb);
+}
+
/*
* Send AF_UNIX data.
*/
@@ -1471,6 +1654,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
struct scm_cookie scm;
int max_level;
int data_len = 0;
+ int sk_locked;
wait_for_unix_gc();
err = scm_send(sock, msg, &scm, false);
@@ -1549,12 +1733,14 @@ restart:
goto out_free;
}
+ sk_locked = 0;
unix_state_lock(other);
+restart_locked:
err = -EPERM;
if (!unix_may_send(sk, other))
goto out_unlock;
- if (sock_flag(other, SOCK_DEAD)) {
+ if (unlikely(sock_flag(other, SOCK_DEAD))) {
/*
* Check with 1003.1g - what should
* datagram error
@@ -1562,10 +1748,14 @@ restart:
unix_state_unlock(other);
sock_put(other);
+ if (!sk_locked)
+ unix_state_lock(sk);
+
err = 0;
- unix_state_lock(sk);
if (unix_peer(sk) == other) {
unix_peer(sk) = NULL;
+ unix_dgram_peer_wake_disconnect_wakeup(sk, other);
+
unix_state_unlock(sk);
unix_dgram_disconnected(sk, other);
@@ -1591,21 +1781,43 @@ restart:
goto out_unlock;
}
- if (unix_peer(other) != sk && unix_recvq_full(other)) {
- if (!timeo) {
- err = -EAGAIN;
- goto out_unlock;
+ /* other == sk && unix_peer(other) != sk if
+ * - unix_peer(sk) == NULL, destination address bound to sk
+ * - unix_peer(sk) == sk by time of get but disconnected before lock
+ */
+ if (other != sk &&
+ unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
+ if (timeo) {
+ timeo = unix_wait_for_peer(other, timeo);
+
+ err = sock_intr_errno(timeo);
+ if (signal_pending(current))
+ goto out_free;
+
+ goto restart;
}
- timeo = unix_wait_for_peer(other, timeo);
+ if (!sk_locked) {
+ unix_state_unlock(other);
+ unix_state_double_lock(sk, other);
+ }
- err = sock_intr_errno(timeo);
- if (signal_pending(current))
- goto out_free;
+ if (unix_peer(sk) != other ||
+ unix_dgram_peer_wake_me(sk, other)) {
+ err = -EAGAIN;
+ sk_locked = 1;
+ goto out_unlock;
+ }
- goto restart;
+ if (!sk_locked) {
+ sk_locked = 1;
+ goto restart_locked;
+ }
}
+ if (unlikely(sk_locked))
+ unix_state_unlock(sk);
+
if (sock_flag(other, SOCK_RCVTSTAMP))
__net_timestamp(skb);
maybe_add_creds(skb, sock, other);
@@ -1619,6 +1831,8 @@ restart:
return len;
out_unlock:
+ if (sk_locked)
+ unix_state_unlock(sk);
unix_state_unlock(other);
out_free:
kfree_skb(skb);
@@ -1740,8 +1954,10 @@ out_err:
static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
int offset, size_t size, int flags)
{
- int err = 0;
- bool send_sigpipe = true;
+ int err;
+ bool send_sigpipe = false;
+ bool init_scm = true;
+ struct scm_cookie scm;
struct sock *other, *sk = socket->sk;
struct sk_buff *skb, *newskb = NULL, *tail = NULL;
@@ -1759,7 +1975,7 @@ alloc_skb:
newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
&err, 0);
if (!newskb)
- return err;
+ goto err;
}
/* we must acquire readlock as we modify already present
@@ -1768,12 +1984,12 @@ alloc_skb:
err = mutex_lock_interruptible(&unix_sk(other)->readlock);
if (err) {
err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
- send_sigpipe = false;
goto err;
}
if (sk->sk_shutdown & SEND_SHUTDOWN) {
err = -EPIPE;
+ send_sigpipe = true;
goto err_unlock;
}
@@ -1782,23 +1998,34 @@ alloc_skb:
if (sock_flag(other, SOCK_DEAD) ||
other->sk_shutdown & RCV_SHUTDOWN) {
err = -EPIPE;
+ send_sigpipe = true;
goto err_state_unlock;
}
+ if (init_scm) {
+ err = maybe_init_creds(&scm, socket, other);
+ if (err)
+ goto err_state_unlock;
+ init_scm = false;
+ }
+
skb = skb_peek_tail(&other->sk_receive_queue);
if (tail && tail == skb) {
skb = newskb;
- } else if (!skb) {
- if (newskb)
+ } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
+ if (newskb) {
skb = newskb;
- else
+ } else {
+ tail = skb;
goto alloc_skb;
+ }
} else if (newskb) {
/* this is fast path, we don't necessarily need to
* call to kfree_skb even though with newskb == NULL
* this - does no harm
*/
consume_skb(newskb);
+ newskb = NULL;
}
if (skb_append_pagefrags(skb, page, offset, size)) {
@@ -1811,14 +2038,20 @@ alloc_skb:
skb->truesize += size;
atomic_add(size, &sk->sk_wmem_alloc);
- if (newskb)
+ if (newskb) {
+ err = unix_scm_to_skb(&scm, skb, false);
+ if (err)
+ goto err_state_unlock;
+ spin_lock(&other->sk_receive_queue.lock);
__skb_queue_tail(&other->sk_receive_queue, newskb);
+ spin_unlock(&other->sk_receive_queue.lock);
+ }
unix_state_unlock(other);
mutex_unlock(&unix_sk(other)->readlock);
other->sk_data_ready(other);
-
+ scm_destroy(&scm);
return size;
err_state_unlock:
@@ -1829,6 +2062,8 @@ err:
kfree_skb(newskb);
if (send_sigpipe && !(flags & MSG_NOSIGNAL))
send_sig(SIGPIPE, current, 0);
+ if (!init_scm)
+ scm_destroy(&scm);
return err;
}
@@ -1878,8 +2113,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
struct scm_cookie scm;
struct sock *sk = sock->sk;
struct unix_sock *u = unix_sk(sk);
- int noblock = flags & MSG_DONTWAIT;
- struct sk_buff *skb;
+ struct sk_buff *skb, *last;
+ long timeo;
int err;
int peeked, skip;
@@ -1887,30 +2122,38 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
if (flags&MSG_OOB)
goto out;
- err = mutex_lock_interruptible(&u->readlock);
- if (unlikely(err)) {
- /* recvmsg() in non blocking mode is supposed to return -EAGAIN
- * sk_rcvtimeo is not honored by mutex_lock_interruptible()
- */
- err = noblock ? -EAGAIN : -ERESTARTSYS;
- goto out;
- }
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+
+ do {
+ mutex_lock(&u->readlock);
+
+ skip = sk_peek_offset(sk, flags);
+ skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
+ &last);
+ if (skb)
+ break;
- skip = sk_peek_offset(sk, flags);
+ mutex_unlock(&u->readlock);
- skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
- if (!skb) {
+ if (err != -EAGAIN)
+ break;
+ } while (timeo &&
+ !__skb_wait_for_more_packets(sk, &err, &timeo, last));
+
+ if (!skb) { /* implies readlock unlocked */
unix_state_lock(sk);
/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
(sk->sk_shutdown & RCV_SHUTDOWN))
err = 0;
unix_state_unlock(sk);
- goto out_unlock;
+ goto out;
}
- wake_up_interruptible_sync_poll(&u->peer_wait,
- POLLOUT | POLLWRNORM | POLLWRBAND);
+ if (wq_has_sleeper(&u->peer_wait))
+ wake_up_interruptible_sync_poll(&u->peer_wait,
+ POLLOUT | POLLWRNORM |
+ POLLWRBAND);
if (msg->msg_name)
unix_copy_addr(msg, skb->sk);
@@ -1962,7 +2205,6 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
out_free:
skb_free_datagram(sk, skb);
-out_unlock:
mutex_unlock(&u->readlock);
out:
return err;
@@ -1991,7 +2233,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
!timeo)
break;
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
unix_state_unlock(sk);
timeo = freezable_schedule_timeout(timeo);
unix_state_lock(sk);
@@ -1999,7 +2241,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
if (sock_flag(sk, SOCK_DEAD))
break;
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
}
finish_wait(sk_sleep(sk), &wait);
@@ -2040,13 +2282,15 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
size_t size = state->size;
unsigned int last_len;
- err = -EINVAL;
- if (sk->sk_state != TCP_ESTABLISHED)
+ if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
+ err = -EINVAL;
goto out;
+ }
- err = -EOPNOTSUPP;
- if (flags & MSG_OOB)
+ if (unlikely(flags & MSG_OOB)) {
+ err = -EOPNOTSUPP;
goto out;
+ }
target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
timeo = sock_rcvtimeo(sk, noblock);
@@ -2056,14 +2300,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
/* Lock the socket to prevent queue disordering
* while sleeps in memcpy_tomsg
*/
- err = mutex_lock_interruptible(&u->readlock);
- if (unlikely(err)) {
- /* recvmsg() in non blocking mode is supposed to return -EAGAIN
- * sk_rcvtimeo is not honored by mutex_lock_interruptible()
- */
- err = noblock ? -EAGAIN : -ERESTARTSYS;
- goto out;
- }
+ mutex_lock(&u->readlock);
if (flags & MSG_PEEK)
skip = sk_peek_offset(sk, flags);
@@ -2072,8 +2309,10 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
do {
int chunk;
+ bool drop_skb;
struct sk_buff *skb, *last;
+redo:
unix_state_lock(sk);
if (sock_flag(sk, SOCK_DEAD)) {
err = -ECONNRESET;
@@ -2098,21 +2337,24 @@ again:
goto unlock;
unix_state_unlock(sk);
- err = -EAGAIN;
- if (!timeo)
+ if (!timeo) {
+ err = -EAGAIN;
break;
+ }
+
mutex_unlock(&u->readlock);
timeo = unix_stream_data_wait(sk, timeo, last,
last_len);
- if (signal_pending(current) ||
- mutex_lock_interruptible(&u->readlock)) {
+ if (signal_pending(current)) {
err = sock_intr_errno(timeo);
+ scm_destroy(&scm);
goto out;
}
- continue;
+ mutex_lock(&u->readlock);
+ goto redo;
unlock:
unix_state_unlock(sk);
break;
@@ -2131,10 +2373,7 @@ unlock:
if (check_creds) {
/* Never glue messages from different writers */
- if ((UNIXCB(skb).pid != scm.pid) ||
- !uid_eq(UNIXCB(skb).uid, scm.creds.uid) ||
- !gid_eq(UNIXCB(skb).gid, scm.creds.gid) ||
- !unix_secdata_eq(&scm, skb))
+ if (!unix_skb_scm_eq(skb, &scm))
break;
} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
/* Copy credentials */
@@ -2152,7 +2391,11 @@ unlock:
}
chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
+ skb_get(skb);
chunk = state->recv_actor(skb, skip, chunk, state);
+ drop_skb = !unix_skb_len(skb);
+ /* skb is only safe to use if !drop_skb */
+ consume_skb(skb);
if (chunk < 0) {
if (copied == 0)
copied = -EFAULT;
@@ -2161,6 +2404,18 @@ unlock:
copied += chunk;
size -= chunk;
+ if (drop_skb) {
+ /* the skb was touched by a concurrent reader;
+ * we should not expect anything from this skb
+ * anymore and assume it invalid - we can be
+ * sure it was dropped from the socket queue
+ *
+ * let's report a short read
+ */
+ err = 0;
+ break;
+ }
+
/* Mark read part of skb as used */
if (!(flags & MSG_PEEK)) {
UNIXCB(skb).consumed += chunk;
@@ -2454,20 +2709,22 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
return mask;
writable = unix_writable(sk);
- other = unix_peer_get(sk);
- if (other) {
- if (unix_peer(other) != sk) {
- sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
- if (unix_recvq_full(other))
- writable = 0;
- }
- sock_put(other);
+ if (writable) {
+ unix_state_lock(sk);
+
+ other = unix_peer(sk);
+ if (other && unix_peer(other) != sk &&
+ unix_recvq_full(other) &&
+ unix_dgram_peer_wake_me(sk, other))
+ writable = 0;
+
+ unix_state_unlock(sk);
}
if (writable)
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
return mask;
}
diff --git a/net/unix/diag.c b/net/unix/diag.c
index c512f64d5287..4d9679701a6d 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -220,7 +220,7 @@ done:
return skb->len;
}
-static struct sock *unix_lookup_by_ino(int ino)
+static struct sock *unix_lookup_by_ino(unsigned int ino)
{
int i;
struct sock *sk;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index a73a226f2d33..6a0d48525fcf 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -116,15 +116,15 @@ struct sock *unix_get_socket(struct file *filp)
* descriptor if it is for an AF_UNIX socket.
*/
-void unix_inflight(struct file *fp)
+void unix_inflight(struct user_struct *user, struct file *fp)
{
struct sock *s = unix_get_socket(fp);
+ spin_lock(&unix_gc_lock);
+
if (s) {
struct unix_sock *u = unix_sk(s);
- spin_lock(&unix_gc_lock);
-
if (atomic_long_inc_return(&u->inflight) == 1) {
BUG_ON(!list_empty(&u->link));
list_add_tail(&u->link, &gc_inflight_list);
@@ -132,25 +132,28 @@ void unix_inflight(struct file *fp)
BUG_ON(list_empty(&u->link));
}
unix_tot_inflight++;
- spin_unlock(&unix_gc_lock);
}
+ user->unix_inflight++;
+ spin_unlock(&unix_gc_lock);
}
-void unix_notinflight(struct file *fp)
+void unix_notinflight(struct user_struct *user, struct file *fp)
{
struct sock *s = unix_get_socket(fp);
+ spin_lock(&unix_gc_lock);
+
if (s) {
struct unix_sock *u = unix_sk(s);
- spin_lock(&unix_gc_lock);
BUG_ON(list_empty(&u->link));
if (atomic_long_dec_and_test(&u->inflight))
list_del_init(&u->link);
unix_tot_inflight--;
- spin_unlock(&unix_gc_lock);
}
+ user->unix_inflight--;
+ spin_unlock(&unix_gc_lock);
}
static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 7fd1220fbfa0..bbe65dcb9738 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1557,8 +1557,6 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
if (err < 0)
goto out;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-
while (total_written < len) {
ssize_t written;
@@ -1578,7 +1576,9 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
goto out_wait;
release_sock(sk);
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
timeout = schedule_timeout(timeout);
+ finish_wait(sk_sleep(sk), &wait);
lock_sock(sk);
if (signal_pending(current)) {
err = sock_intr_errno(timeout);
@@ -1588,8 +1588,6 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
goto out_wait;
}
- prepare_to_wait(sk_sleep(sk), &wait,
- TASK_INTERRUPTIBLE);
}
/* These checks occur both as part of and after the loop
@@ -1635,7 +1633,6 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
out_wait:
if (total_written > 0)
err = total_written;
- finish_wait(sk_sleep(sk), &wait);
out:
release_sock(sk);
return err;
@@ -1716,7 +1713,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (err < 0)
goto out;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
while (1) {
s64 ready = vsock_stream_has_data(vsk);
@@ -1727,7 +1723,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
*/
err = -ENOMEM;
- goto out_wait;
+ goto out;
} else if (ready > 0) {
ssize_t read;
@@ -1750,7 +1746,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
vsk, target, read,
!(flags & MSG_PEEK), &recv_data);
if (err < 0)
- goto out_wait;
+ goto out;
if (read >= target || flags & MSG_PEEK)
break;
@@ -1773,7 +1769,9 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
break;
release_sock(sk);
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
timeout = schedule_timeout(timeout);
+ finish_wait(sk_sleep(sk), &wait);
lock_sock(sk);
if (signal_pending(current)) {
@@ -1783,9 +1781,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
err = -EAGAIN;
break;
}
-
- prepare_to_wait(sk_sleep(sk), &wait,
- TASK_INTERRUPTIBLE);
}
}
@@ -1816,8 +1811,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
err = copied;
}
-out_wait:
- finish_wait(sk_sleep(sk), &wait);
out:
release_sock(sk);
return err;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 400d87294de3..0a369bb440e7 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1234,7 +1234,7 @@ vmci_transport_recv_connecting_server(struct sock *listener,
/* Callers of accept() will be be waiting on the listening socket, not
* the pending socket.
*/
- listener->sk_state_change(listener);
+ listener->sk_data_ready(listener);
return 0;
diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
index 2ad46f39649f..1820e74a5752 100644
--- a/net/vmw_vsock/vmci_transport.h
+++ b/net/vmw_vsock/vmci_transport.h
@@ -121,7 +121,7 @@ struct vmci_transport {
u64 queue_pair_max_size;
u32 detach_sub_id;
union vmci_transport_notify notify;
- struct vmci_transport_notify_ops *notify_ops;
+ const struct vmci_transport_notify_ops *notify_ops;
struct list_head elem;
struct sock *sk;
spinlock_t lock; /* protects sk. */
diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
index 9b7f207f2bee..fd8cf0214d51 100644
--- a/net/vmw_vsock/vmci_transport_notify.c
+++ b/net/vmw_vsock/vmci_transport_notify.c
@@ -661,7 +661,7 @@ static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
}
/* Socket control packet based operations. */
-struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = {
+const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = {
vmci_transport_notify_pkt_socket_init,
vmci_transport_notify_pkt_socket_destruct,
vmci_transport_notify_pkt_poll_in,
diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h
index 7df793249b6c..3c464d394a8f 100644
--- a/net/vmw_vsock/vmci_transport_notify.h
+++ b/net/vmw_vsock/vmci_transport_notify.h
@@ -77,7 +77,8 @@ struct vmci_transport_notify_ops {
void (*process_negotiate) (struct sock *sk);
};
-extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops;
-extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops;
+extern const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops;
+extern const
+struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops;
#endif /* __VMCI_TRANSPORT_NOTIFY_H__ */
diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
index dc9c7929a2f9..21e591dafb03 100644
--- a/net/vmw_vsock/vmci_transport_notify_qstate.c
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c
@@ -419,7 +419,7 @@ vmci_transport_notify_pkt_send_pre_enqueue(
}
/* Socket always on control packet based operations. */
-struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = {
+const struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = {
vmci_transport_notify_pkt_socket_init,
vmci_transport_notify_pkt_socket_destruct,
vmci_transport_notify_pkt_poll_in,
diff --git a/net/wireless/core.c b/net/wireless/core.c
index b0915515640e..8f0bac7e03c4 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1147,6 +1147,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
return NOTIFY_DONE;
}
+ wireless_nlevent_flush();
+
return NOTIFY_OK;
}
diff --git a/net/wireless/core.h b/net/wireless/core.h
index a618b4b86fa4..022ccad06cbe 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -416,13 +416,6 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev);
void cfg80211_process_wdev_events(struct wireless_dev *wdev);
-int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
- struct wireless_dev *wdev,
- enum nl80211_iftype iftype,
- struct ieee80211_channel *chan,
- enum cfg80211_chan_mode chanmode,
- u8 radar_detect);
-
/**
* cfg80211_chandef_dfs_usable - checks if chandef is DFS usable
* @wiphy: the wiphy to validate against
diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c
index dc0e59e53dbf..6beab0cfcb99 100644
--- a/net/wireless/lib80211_crypt_ccmp.c
+++ b/net/wireless/lib80211_crypt_ccmp.c
@@ -311,8 +311,8 @@ static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
}
keyidx >>= 6;
if (key->key_idx != keyidx) {
- printk(KERN_DEBUG "CCMP: RX tkey->key_idx=%d frame "
- "keyidx=%d priv=%p\n", key->key_idx, keyidx, priv);
+ net_dbg_ratelimited("CCMP: RX tkey->key_idx=%d frame keyidx=%d\n",
+ key->key_idx, keyidx);
return -6;
}
if (!key->key_set) {
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index 8c90ba79e56e..71447cf86306 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -29,7 +29,8 @@
#include <linux/ieee80211.h>
#include <net/iw_handler.h>
-#include <linux/crypto.h>
+#include <crypto/hash.h>
+#include <crypto/skcipher.h>
#include <linux/crc32.h>
#include <net/lib80211.h>
@@ -63,10 +64,10 @@ struct lib80211_tkip_data {
int key_idx;
- struct crypto_blkcipher *rx_tfm_arc4;
- struct crypto_hash *rx_tfm_michael;
- struct crypto_blkcipher *tx_tfm_arc4;
- struct crypto_hash *tx_tfm_michael;
+ struct crypto_skcipher *rx_tfm_arc4;
+ struct crypto_ahash *rx_tfm_michael;
+ struct crypto_skcipher *tx_tfm_arc4;
+ struct crypto_ahash *tx_tfm_michael;
/* scratch buffers for virt_to_page() (crypto API) */
u8 rx_hdr[16], tx_hdr[16];
@@ -98,29 +99,29 @@ static void *lib80211_tkip_init(int key_idx)
priv->key_idx = key_idx;
- priv->tx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0,
- CRYPTO_ALG_ASYNC);
+ priv->tx_tfm_arc4 = crypto_alloc_skcipher("ecb(arc4)", 0,
+ CRYPTO_ALG_ASYNC);
if (IS_ERR(priv->tx_tfm_arc4)) {
priv->tx_tfm_arc4 = NULL;
goto fail;
}
- priv->tx_tfm_michael = crypto_alloc_hash("michael_mic", 0,
- CRYPTO_ALG_ASYNC);
+ priv->tx_tfm_michael = crypto_alloc_ahash("michael_mic", 0,
+ CRYPTO_ALG_ASYNC);
if (IS_ERR(priv->tx_tfm_michael)) {
priv->tx_tfm_michael = NULL;
goto fail;
}
- priv->rx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0,
- CRYPTO_ALG_ASYNC);
+ priv->rx_tfm_arc4 = crypto_alloc_skcipher("ecb(arc4)", 0,
+ CRYPTO_ALG_ASYNC);
if (IS_ERR(priv->rx_tfm_arc4)) {
priv->rx_tfm_arc4 = NULL;
goto fail;
}
- priv->rx_tfm_michael = crypto_alloc_hash("michael_mic", 0,
- CRYPTO_ALG_ASYNC);
+ priv->rx_tfm_michael = crypto_alloc_ahash("michael_mic", 0,
+ CRYPTO_ALG_ASYNC);
if (IS_ERR(priv->rx_tfm_michael)) {
priv->rx_tfm_michael = NULL;
goto fail;
@@ -130,14 +131,10 @@ static void *lib80211_tkip_init(int key_idx)
fail:
if (priv) {
- if (priv->tx_tfm_michael)
- crypto_free_hash(priv->tx_tfm_michael);
- if (priv->tx_tfm_arc4)
- crypto_free_blkcipher(priv->tx_tfm_arc4);
- if (priv->rx_tfm_michael)
- crypto_free_hash(priv->rx_tfm_michael);
- if (priv->rx_tfm_arc4)
- crypto_free_blkcipher(priv->rx_tfm_arc4);
+ crypto_free_ahash(priv->tx_tfm_michael);
+ crypto_free_skcipher(priv->tx_tfm_arc4);
+ crypto_free_ahash(priv->rx_tfm_michael);
+ crypto_free_skcipher(priv->rx_tfm_arc4);
kfree(priv);
}
@@ -148,14 +145,10 @@ static void lib80211_tkip_deinit(void *priv)
{
struct lib80211_tkip_data *_priv = priv;
if (_priv) {
- if (_priv->tx_tfm_michael)
- crypto_free_hash(_priv->tx_tfm_michael);
- if (_priv->tx_tfm_arc4)
- crypto_free_blkcipher(_priv->tx_tfm_arc4);
- if (_priv->rx_tfm_michael)
- crypto_free_hash(_priv->rx_tfm_michael);
- if (_priv->rx_tfm_arc4)
- crypto_free_blkcipher(_priv->rx_tfm_arc4);
+ crypto_free_ahash(_priv->tx_tfm_michael);
+ crypto_free_skcipher(_priv->tx_tfm_arc4);
+ crypto_free_ahash(_priv->rx_tfm_michael);
+ crypto_free_skcipher(_priv->rx_tfm_arc4);
}
kfree(priv);
}
@@ -353,11 +346,12 @@ static int lib80211_tkip_hdr(struct sk_buff *skb, int hdr_len,
static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
{
struct lib80211_tkip_data *tkey = priv;
- struct blkcipher_desc desc = { .tfm = tkey->tx_tfm_arc4 };
+ SKCIPHER_REQUEST_ON_STACK(req, tkey->tx_tfm_arc4);
int len;
u8 rc4key[16], *pos, *icv;
u32 crc;
struct scatterlist sg;
+ int err;
if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) {
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
@@ -382,9 +376,14 @@ static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
icv[2] = crc >> 16;
icv[3] = crc >> 24;
- crypto_blkcipher_setkey(tkey->tx_tfm_arc4, rc4key, 16);
+ crypto_skcipher_setkey(tkey->tx_tfm_arc4, rc4key, 16);
sg_init_one(&sg, pos, len + 4);
- return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4);
+ skcipher_request_set_tfm(req, tkey->tx_tfm_arc4);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, &sg, &sg, len + 4, NULL);
+ err = crypto_skcipher_encrypt(req);
+ skcipher_request_zero(req);
+ return err;
}
/*
@@ -403,7 +402,7 @@ static inline int tkip_replay_check(u32 iv32_n, u16 iv16_n,
static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
{
struct lib80211_tkip_data *tkey = priv;
- struct blkcipher_desc desc = { .tfm = tkey->rx_tfm_arc4 };
+ SKCIPHER_REQUEST_ON_STACK(req, tkey->rx_tfm_arc4);
u8 rc4key[16];
u8 keyidx, *pos;
u32 iv32;
@@ -413,6 +412,7 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
u32 crc;
struct scatterlist sg;
int plen;
+ int err;
hdr = (struct ieee80211_hdr *)skb->data;
@@ -434,8 +434,8 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
}
keyidx >>= 6;
if (tkey->key_idx != keyidx) {
- printk(KERN_DEBUG "TKIP: RX tkey->key_idx=%d frame "
- "keyidx=%d priv=%p\n", tkey->key_idx, keyidx, priv);
+ net_dbg_ratelimited("TKIP: RX tkey->key_idx=%d frame keyidx=%d\n",
+ tkey->key_idx, keyidx);
return -6;
}
if (!tkey->key_set) {
@@ -465,9 +465,14 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
plen = skb->len - hdr_len - 12;
- crypto_blkcipher_setkey(tkey->rx_tfm_arc4, rc4key, 16);
+ crypto_skcipher_setkey(tkey->rx_tfm_arc4, rc4key, 16);
sg_init_one(&sg, pos, plen + 4);
- if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) {
+ skcipher_request_set_tfm(req, tkey->rx_tfm_arc4);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, &sg, &sg, plen + 4, NULL);
+ err = crypto_skcipher_decrypt(req);
+ skcipher_request_zero(req);
+ if (err) {
net_dbg_ratelimited("TKIP: failed to decrypt received packet from %pM\n",
hdr->addr2);
return -7;
@@ -505,11 +510,12 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
return keyidx;
}
-static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr,
+static int michael_mic(struct crypto_ahash *tfm_michael, u8 * key, u8 * hdr,
u8 * data, size_t data_len, u8 * mic)
{
- struct hash_desc desc;
+ AHASH_REQUEST_ON_STACK(req, tfm_michael);
struct scatterlist sg[2];
+ int err;
if (tfm_michael == NULL) {
pr_warn("%s(): tfm_michael == NULL\n", __func__);
@@ -519,12 +525,15 @@ static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr,
sg_set_buf(&sg[0], hdr, 16);
sg_set_buf(&sg[1], data, data_len);
- if (crypto_hash_setkey(tfm_michael, key, 8))
+ if (crypto_ahash_setkey(tfm_michael, key, 8))
return -1;
- desc.tfm = tfm_michael;
- desc.flags = 0;
- return crypto_hash_digest(&desc, sg, data_len + 16, mic);
+ ahash_request_set_tfm(req, tfm_michael);
+ ahash_request_set_callback(req, 0, NULL, NULL);
+ ahash_request_set_crypt(req, sg, mic, data_len + 16);
+ err = crypto_ahash_digest(req);
+ ahash_request_zero(req);
+ return err;
}
static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr)
@@ -645,10 +654,10 @@ static int lib80211_tkip_set_key(void *key, int len, u8 * seq, void *priv)
{
struct lib80211_tkip_data *tkey = priv;
int keyidx;
- struct crypto_hash *tfm = tkey->tx_tfm_michael;
- struct crypto_blkcipher *tfm2 = tkey->tx_tfm_arc4;
- struct crypto_hash *tfm3 = tkey->rx_tfm_michael;
- struct crypto_blkcipher *tfm4 = tkey->rx_tfm_arc4;
+ struct crypto_ahash *tfm = tkey->tx_tfm_michael;
+ struct crypto_skcipher *tfm2 = tkey->tx_tfm_arc4;
+ struct crypto_ahash *tfm3 = tkey->rx_tfm_michael;
+ struct crypto_skcipher *tfm4 = tkey->rx_tfm_arc4;
keyidx = tkey->key_idx;
memset(tkey, 0, sizeof(*tkey));
diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c
index 1c292e4ea7b6..d05f58b0fd04 100644
--- a/net/wireless/lib80211_crypt_wep.c
+++ b/net/wireless/lib80211_crypt_wep.c
@@ -22,7 +22,7 @@
#include <net/lib80211.h>
-#include <linux/crypto.h>
+#include <crypto/skcipher.h>
#include <linux/crc32.h>
MODULE_AUTHOR("Jouni Malinen");
@@ -35,8 +35,8 @@ struct lib80211_wep_data {
u8 key[WEP_KEY_LEN + 1];
u8 key_len;
u8 key_idx;
- struct crypto_blkcipher *tx_tfm;
- struct crypto_blkcipher *rx_tfm;
+ struct crypto_skcipher *tx_tfm;
+ struct crypto_skcipher *rx_tfm;
};
static void *lib80211_wep_init(int keyidx)
@@ -48,13 +48,13 @@ static void *lib80211_wep_init(int keyidx)
goto fail;
priv->key_idx = keyidx;
- priv->tx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+ priv->tx_tfm = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(priv->tx_tfm)) {
priv->tx_tfm = NULL;
goto fail;
}
- priv->rx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+ priv->rx_tfm = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(priv->rx_tfm)) {
priv->rx_tfm = NULL;
goto fail;
@@ -66,10 +66,8 @@ static void *lib80211_wep_init(int keyidx)
fail:
if (priv) {
- if (priv->tx_tfm)
- crypto_free_blkcipher(priv->tx_tfm);
- if (priv->rx_tfm)
- crypto_free_blkcipher(priv->rx_tfm);
+ crypto_free_skcipher(priv->tx_tfm);
+ crypto_free_skcipher(priv->rx_tfm);
kfree(priv);
}
return NULL;
@@ -79,10 +77,8 @@ static void lib80211_wep_deinit(void *priv)
{
struct lib80211_wep_data *_priv = priv;
if (_priv) {
- if (_priv->tx_tfm)
- crypto_free_blkcipher(_priv->tx_tfm);
- if (_priv->rx_tfm)
- crypto_free_blkcipher(_priv->rx_tfm);
+ crypto_free_skcipher(_priv->tx_tfm);
+ crypto_free_skcipher(_priv->rx_tfm);
}
kfree(priv);
}
@@ -133,11 +129,12 @@ static int lib80211_wep_build_iv(struct sk_buff *skb, int hdr_len,
static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
{
struct lib80211_wep_data *wep = priv;
- struct blkcipher_desc desc = { .tfm = wep->tx_tfm };
+ SKCIPHER_REQUEST_ON_STACK(req, wep->tx_tfm);
u32 crc, klen, len;
u8 *pos, *icv;
struct scatterlist sg;
u8 key[WEP_KEY_LEN + 3];
+ int err;
/* other checks are in lib80211_wep_build_iv */
if (skb_tailroom(skb) < 4)
@@ -165,9 +162,14 @@ static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
icv[2] = crc >> 16;
icv[3] = crc >> 24;
- crypto_blkcipher_setkey(wep->tx_tfm, key, klen);
+ crypto_skcipher_setkey(wep->tx_tfm, key, klen);
sg_init_one(&sg, pos, len + 4);
- return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4);
+ skcipher_request_set_tfm(req, wep->tx_tfm);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, &sg, &sg, len + 4, NULL);
+ err = crypto_skcipher_encrypt(req);
+ skcipher_request_zero(req);
+ return err;
}
/* Perform WEP decryption on given buffer. Buffer includes whole WEP part of
@@ -180,11 +182,12 @@ static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
static int lib80211_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
{
struct lib80211_wep_data *wep = priv;
- struct blkcipher_desc desc = { .tfm = wep->rx_tfm };
+ SKCIPHER_REQUEST_ON_STACK(req, wep->rx_tfm);
u32 crc, klen, plen;
u8 key[WEP_KEY_LEN + 3];
u8 keyidx, *pos, icv[4];
struct scatterlist sg;
+ int err;
if (skb->len < hdr_len + 8)
return -1;
@@ -205,9 +208,14 @@ static int lib80211_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
/* Apply RC4 to data and compute CRC32 over decrypted data */
plen = skb->len - hdr_len - 8;
- crypto_blkcipher_setkey(wep->rx_tfm, key, klen);
+ crypto_skcipher_setkey(wep->rx_tfm, key, klen);
sg_init_one(&sg, pos, plen + 4);
- if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4))
+ skcipher_request_set_tfm(req, wep->rx_tfm);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, &sg, &sg, plen + 4, NULL);
+ err = crypto_skcipher_decrypt(req);
+ skcipher_request_zero(req);
+ if (err)
return -7;
crc = ~crc32_le(~0, pos, plen);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c71e274c810a..711cb7ad6ae0 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4256,8 +4256,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
* station. Include these parameters here and will check them in
* cfg80211_check_station_change().
*/
- if (info->attrs[NL80211_ATTR_PEER_AID])
- params.aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]);
+ if (info->attrs[NL80211_ATTR_STA_AID])
+ params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]);
if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL])
params.listen_interval =
@@ -4359,6 +4359,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
struct net_device *dev = info->user_ptr[1];
struct station_parameters params;
u8 *mac_addr = NULL;
+ u32 auth_assoc = BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+ BIT(NL80211_STA_FLAG_ASSOCIATED);
memset(&params, 0, sizeof(params));
@@ -4470,10 +4472,23 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
/* allow authenticated/associated only if driver handles it */
if (!(rdev->wiphy.features &
NL80211_FEATURE_FULL_AP_CLIENT_STATE) &&
- params.sta_flags_mask &
- (BIT(NL80211_STA_FLAG_AUTHENTICATED) |
- BIT(NL80211_STA_FLAG_ASSOCIATED)))
- return -EINVAL;
+ params.sta_flags_mask & auth_assoc)
+ return -EINVAL;
+
+ /* Older userspace, or userspace wanting to be compatible with
+ * !NL80211_FEATURE_FULL_AP_CLIENT_STATE, will not set the auth
+ * and assoc flags in the mask, but assumes the station will be
+ * added as associated anyway since this was the required driver
+ * behaviour before NL80211_FEATURE_FULL_AP_CLIENT_STATE was
+ * introduced.
+ * In order to not bother drivers with this quirk in the API
+ * set the flags in both the mask and set for new stations in
+ * this case.
+ */
+ if (!(params.sta_flags_mask & auth_assoc)) {
+ params.sta_flags_mask |= auth_assoc;
+ params.sta_flags_set |= auth_assoc;
+ }
/* must be last in here for error handling */
params.vlan = get_vlan(info, rdev);
@@ -5997,6 +6012,24 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
return err;
}
+static int nl80211_abort_scan(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct wireless_dev *wdev = info->user_ptr[1];
+
+ if (!rdev->ops->abort_scan)
+ return -EOPNOTSUPP;
+
+ if (rdev->scan_msg)
+ return 0;
+
+ if (!rdev->scan_req)
+ return -ENOENT;
+
+ rdev_abort_scan(rdev, wdev);
+ return 0;
+}
+
static int
nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans,
struct cfg80211_sched_scan_request *request,
@@ -6507,8 +6540,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
if (WARN_ON(!cac_time_ms))
cac_time_ms = IEEE80211_DFS_MIN_CAC_TIME_MS;
- err = rdev->ops->start_radar_detection(&rdev->wiphy, dev, &chandef,
- cac_time_ms);
+ err = rdev_start_radar_detection(rdev, dev, &chandef, cac_time_ms);
if (!err) {
wdev->chandef = chandef;
wdev->cac_started = true;
@@ -7515,7 +7547,7 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
if ((ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT) &&
no_ht) {
- kfree(connkeys);
+ kzfree(connkeys);
return -EINVAL;
}
}
@@ -7571,7 +7603,7 @@ static int nl80211_set_mcast_rate(struct sk_buff *skb, struct genl_info *info)
if (!nl80211_parse_mcast_rate(rdev, mcast_rate, nla_rate))
return -EINVAL;
- err = rdev->ops->set_mcast_rate(&rdev->wiphy, dev, mcast_rate);
+ err = rdev_set_mcast_rate(rdev, dev, mcast_rate);
return err;
}
@@ -7941,8 +7973,10 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
if (nla_get_flag(info->attrs[NL80211_ATTR_USE_RRM])) {
if (!(rdev->wiphy.features &
NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES) ||
- !(rdev->wiphy.features & NL80211_FEATURE_QUIET))
+ !(rdev->wiphy.features & NL80211_FEATURE_QUIET)) {
+ kzfree(connkeys);
return -EINVAL;
+ }
connect.flags |= ASSOC_REQ_USE_RRM;
}
@@ -9503,6 +9537,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
if (new_triggers.tcp && new_triggers.tcp->sock)
sock_release(new_triggers.tcp->sock);
kfree(new_triggers.tcp);
+ kfree(new_triggers.nd_config);
return err;
}
#endif
@@ -9716,7 +9751,7 @@ static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info)
if (!info->attrs[NL80211_ATTR_COALESCE_RULE]) {
cfg80211_rdev_free_coalesce(rdev);
- rdev->ops->set_coalesce(&rdev->wiphy, NULL);
+ rdev_set_coalesce(rdev, NULL);
return 0;
}
@@ -9744,7 +9779,7 @@ static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info)
i++;
}
- err = rdev->ops->set_coalesce(&rdev->wiphy, &new_coalesce);
+ err = rdev_set_coalesce(rdev, &new_coalesce);
if (err)
goto error;
@@ -10946,6 +10981,14 @@ static const struct genl_ops nl80211_ops[] = {
NL80211_FLAG_NEED_RTNL,
},
{
+ .cmd = NL80211_CMD_ABORT_SCAN,
+ .doit = nl80211_abort_scan,
+ .policy = nl80211_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
+ NL80211_FLAG_NEED_RTNL,
+ },
+ {
.cmd = NL80211_CMD_GET_SCAN,
.policy = nl80211_policy,
.dumpit = nl80211_dump_scan,
diff --git a/net/wireless/ocb.c b/net/wireless/ocb.c
index c00d4a792319..e64dbf16330c 100644
--- a/net/wireless/ocb.c
+++ b/net/wireless/ocb.c
@@ -29,6 +29,9 @@ int __cfg80211_join_ocb(struct cfg80211_registered_device *rdev,
if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_OCB)
return -EOPNOTSUPP;
+ if (!rdev->ops->join_ocb)
+ return -EOPNOTSUPP;
+
if (WARN_ON(!setup->chandef.chan))
return -EINVAL;
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index c23516d0f807..8ae0c04f9fc7 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -427,6 +427,14 @@ static inline int rdev_scan(struct cfg80211_registered_device *rdev,
return ret;
}
+static inline void rdev_abort_scan(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev)
+{
+ trace_rdev_abort_scan(&rdev->wiphy, wdev);
+ rdev->ops->abort_scan(&rdev->wiphy, wdev);
+ trace_rdev_return_void(&rdev->wiphy);
+}
+
static inline int rdev_auth(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct cfg80211_auth_request *req)
@@ -1020,4 +1028,47 @@ rdev_tdls_cancel_channel_switch(struct cfg80211_registered_device *rdev,
trace_rdev_return_void(&rdev->wiphy);
}
+static inline int
+rdev_start_radar_detection(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ struct cfg80211_chan_def *chandef,
+ u32 cac_time_ms)
+{
+ int ret = -ENOTSUPP;
+
+ trace_rdev_start_radar_detection(&rdev->wiphy, dev, chandef,
+ cac_time_ms);
+ if (rdev->ops->start_radar_detection)
+ ret = rdev->ops->start_radar_detection(&rdev->wiphy, dev,
+ chandef, cac_time_ms);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+ return ret;
+}
+
+static inline int
+rdev_set_mcast_rate(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ int mcast_rate[IEEE80211_NUM_BANDS])
+{
+ int ret = -ENOTSUPP;
+
+ trace_rdev_set_mcast_rate(&rdev->wiphy, dev, mcast_rate);
+ if (rdev->ops->set_mcast_rate)
+ ret = rdev->ops->set_mcast_rate(&rdev->wiphy, dev, mcast_rate);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+ return ret;
+}
+
+static inline int
+rdev_set_coalesce(struct cfg80211_registered_device *rdev,
+ struct cfg80211_coalesce *coalesce)
+{
+ int ret = -ENOTSUPP;
+
+ trace_rdev_set_coalesce(&rdev->wiphy, coalesce);
+ if (rdev->ops->set_coalesce)
+ ret = rdev->ops->set_coalesce(&rdev->wiphy, coalesce);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+ return ret;
+}
#endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 2e8d6f39ed56..547ceecc0523 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -231,20 +231,22 @@ static const struct ieee80211_regdomain world_regdom = {
/* IEEE 802.11b/g, channels 1..11 */
REG_RULE(2412-10, 2462+10, 40, 6, 20, 0),
/* IEEE 802.11b/g, channels 12..13. */
- REG_RULE(2467-10, 2472+10, 40, 6, 20,
- NL80211_RRF_NO_IR),
+ REG_RULE(2467-10, 2472+10, 20, 6, 20,
+ NL80211_RRF_NO_IR | NL80211_RRF_AUTO_BW),
/* IEEE 802.11 channel 14 - Only JP enables
* this and for 802.11b only */
REG_RULE(2484-10, 2484+10, 20, 6, 20,
NL80211_RRF_NO_IR |
NL80211_RRF_NO_OFDM),
/* IEEE 802.11a, channel 36..48 */
- REG_RULE(5180-10, 5240+10, 160, 6, 20,
- NL80211_RRF_NO_IR),
+ REG_RULE(5180-10, 5240+10, 80, 6, 20,
+ NL80211_RRF_NO_IR |
+ NL80211_RRF_AUTO_BW),
/* IEEE 802.11a, channel 52..64 - DFS required */
- REG_RULE(5260-10, 5320+10, 160, 6, 20,
+ REG_RULE(5260-10, 5320+10, 80, 6, 20,
NL80211_RRF_NO_IR |
+ NL80211_RRF_AUTO_BW |
NL80211_RRF_DFS),
/* IEEE 802.11a, channel 100..144 - DFS required */
@@ -1052,7 +1054,7 @@ static u32 map_regdom_flags(u32 rd_flags)
}
static const struct ieee80211_reg_rule *
-freq_reg_info_regd(struct wiphy *wiphy, u32 center_freq,
+freq_reg_info_regd(u32 center_freq,
const struct ieee80211_regdomain *regd, u32 bw)
{
int i;
@@ -1097,7 +1099,7 @@ __freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 min_bw)
u32 bw;
for (bw = MHZ_TO_KHZ(20); bw >= min_bw; bw = bw / 2) {
- reg_rule = freq_reg_info_regd(wiphy, center_freq, regd, bw);
+ reg_rule = freq_reg_info_regd(center_freq, regd, bw);
if (!IS_ERR(reg_rule))
return reg_rule;
}
@@ -1166,6 +1168,41 @@ static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd,
#endif
}
+static uint32_t reg_rule_to_chan_bw_flags(const struct ieee80211_regdomain *regd,
+ const struct ieee80211_reg_rule *reg_rule,
+ const struct ieee80211_channel *chan)
+{
+ const struct ieee80211_freq_range *freq_range = NULL;
+ u32 max_bandwidth_khz, bw_flags = 0;
+
+ freq_range = &reg_rule->freq_range;
+
+ max_bandwidth_khz = freq_range->max_bandwidth_khz;
+ /* Check if auto calculation requested */
+ if (reg_rule->flags & NL80211_RRF_AUTO_BW)
+ max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule);
+
+ /* If we get a reg_rule we can assume that at least 5Mhz fit */
+ if (!reg_does_bw_fit(freq_range, MHZ_TO_KHZ(chan->center_freq),
+ MHZ_TO_KHZ(10)))
+ bw_flags |= IEEE80211_CHAN_NO_10MHZ;
+ if (!reg_does_bw_fit(freq_range, MHZ_TO_KHZ(chan->center_freq),
+ MHZ_TO_KHZ(20)))
+ bw_flags |= IEEE80211_CHAN_NO_20MHZ;
+
+ if (max_bandwidth_khz < MHZ_TO_KHZ(10))
+ bw_flags |= IEEE80211_CHAN_NO_10MHZ;
+ if (max_bandwidth_khz < MHZ_TO_KHZ(20))
+ bw_flags |= IEEE80211_CHAN_NO_20MHZ;
+ if (max_bandwidth_khz < MHZ_TO_KHZ(40))
+ bw_flags |= IEEE80211_CHAN_NO_HT40;
+ if (max_bandwidth_khz < MHZ_TO_KHZ(80))
+ bw_flags |= IEEE80211_CHAN_NO_80MHZ;
+ if (max_bandwidth_khz < MHZ_TO_KHZ(160))
+ bw_flags |= IEEE80211_CHAN_NO_160MHZ;
+ return bw_flags;
+}
+
/*
* Note that right now we assume the desired channel bandwidth
* is always 20 MHz for each individual channel (HT40 uses 20 MHz
@@ -1178,11 +1215,9 @@ static void handle_channel(struct wiphy *wiphy,
u32 flags, bw_flags = 0;
const struct ieee80211_reg_rule *reg_rule = NULL;
const struct ieee80211_power_rule *power_rule = NULL;
- const struct ieee80211_freq_range *freq_range = NULL;
struct wiphy *request_wiphy = NULL;
struct regulatory_request *lr = get_last_request();
const struct ieee80211_regdomain *regd;
- u32 max_bandwidth_khz;
request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx);
@@ -1223,31 +1258,7 @@ static void handle_channel(struct wiphy *wiphy,
chan_reg_rule_print_dbg(regd, chan, reg_rule);
power_rule = &reg_rule->power_rule;
- freq_range = &reg_rule->freq_range;
-
- max_bandwidth_khz = freq_range->max_bandwidth_khz;
- /* Check if auto calculation requested */
- if (reg_rule->flags & NL80211_RRF_AUTO_BW)
- max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule);
-
- /* If we get a reg_rule we can assume that at least 5Mhz fit */
- if (!reg_does_bw_fit(freq_range, MHZ_TO_KHZ(chan->center_freq),
- MHZ_TO_KHZ(10)))
- bw_flags |= IEEE80211_CHAN_NO_10MHZ;
- if (!reg_does_bw_fit(freq_range, MHZ_TO_KHZ(chan->center_freq),
- MHZ_TO_KHZ(20)))
- bw_flags |= IEEE80211_CHAN_NO_20MHZ;
-
- if (max_bandwidth_khz < MHZ_TO_KHZ(10))
- bw_flags |= IEEE80211_CHAN_NO_10MHZ;
- if (max_bandwidth_khz < MHZ_TO_KHZ(20))
- bw_flags |= IEEE80211_CHAN_NO_20MHZ;
- if (max_bandwidth_khz < MHZ_TO_KHZ(40))
- bw_flags |= IEEE80211_CHAN_NO_HT40;
- if (max_bandwidth_khz < MHZ_TO_KHZ(80))
- bw_flags |= IEEE80211_CHAN_NO_80MHZ;
- if (max_bandwidth_khz < MHZ_TO_KHZ(160))
- bw_flags |= IEEE80211_CHAN_NO_160MHZ;
+ bw_flags = reg_rule_to_chan_bw_flags(regd, reg_rule, chan);
if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
request_wiphy && request_wiphy == wiphy &&
@@ -1760,13 +1771,10 @@ static void handle_channel_custom(struct wiphy *wiphy,
u32 bw_flags = 0;
const struct ieee80211_reg_rule *reg_rule = NULL;
const struct ieee80211_power_rule *power_rule = NULL;
- const struct ieee80211_freq_range *freq_range = NULL;
- u32 max_bandwidth_khz;
u32 bw;
for (bw = MHZ_TO_KHZ(20); bw >= MHZ_TO_KHZ(5); bw = bw / 2) {
- reg_rule = freq_reg_info_regd(wiphy,
- MHZ_TO_KHZ(chan->center_freq),
+ reg_rule = freq_reg_info_regd(MHZ_TO_KHZ(chan->center_freq),
regd, bw);
if (!IS_ERR(reg_rule))
break;
@@ -1787,31 +1795,7 @@ static void handle_channel_custom(struct wiphy *wiphy,
chan_reg_rule_print_dbg(regd, chan, reg_rule);
power_rule = &reg_rule->power_rule;
- freq_range = &reg_rule->freq_range;
-
- max_bandwidth_khz = freq_range->max_bandwidth_khz;
- /* Check if auto calculation requested */
- if (reg_rule->flags & NL80211_RRF_AUTO_BW)
- max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule);
-
- /* If we get a reg_rule we can assume that at least 5Mhz fit */
- if (!reg_does_bw_fit(freq_range, MHZ_TO_KHZ(chan->center_freq),
- MHZ_TO_KHZ(10)))
- bw_flags |= IEEE80211_CHAN_NO_10MHZ;
- if (!reg_does_bw_fit(freq_range, MHZ_TO_KHZ(chan->center_freq),
- MHZ_TO_KHZ(20)))
- bw_flags |= IEEE80211_CHAN_NO_20MHZ;
-
- if (max_bandwidth_khz < MHZ_TO_KHZ(10))
- bw_flags |= IEEE80211_CHAN_NO_10MHZ;
- if (max_bandwidth_khz < MHZ_TO_KHZ(20))
- bw_flags |= IEEE80211_CHAN_NO_20MHZ;
- if (max_bandwidth_khz < MHZ_TO_KHZ(40))
- bw_flags |= IEEE80211_CHAN_NO_HT40;
- if (max_bandwidth_khz < MHZ_TO_KHZ(80))
- bw_flags |= IEEE80211_CHAN_NO_80MHZ;
- if (max_bandwidth_khz < MHZ_TO_KHZ(160))
- bw_flags |= IEEE80211_CHAN_NO_160MHZ;
+ bw_flags = reg_rule_to_chan_bw_flags(regd, reg_rule, chan);
chan->dfs_state_entered = jiffies;
chan->dfs_state = NL80211_DFS_USABLE;
@@ -2763,7 +2747,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
const struct ieee80211_power_rule *power_rule = NULL;
char bw[32], cac_time[32];
- pr_info(" (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp), (dfs_cac_time)\n");
+ pr_debug(" (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp), (dfs_cac_time)\n");
for (i = 0; i < rd->n_reg_rules; i++) {
reg_rule = &rd->reg_rules[i];
@@ -2790,7 +2774,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
* in certain regions
*/
if (power_rule->max_antenna_gain)
- pr_info(" (%d KHz - %d KHz @ %s), (%d mBi, %d mBm), (%s)\n",
+ pr_debug(" (%d KHz - %d KHz @ %s), (%d mBi, %d mBm), (%s)\n",
freq_range->start_freq_khz,
freq_range->end_freq_khz,
bw,
@@ -2798,7 +2782,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
power_rule->max_eirp,
cac_time);
else
- pr_info(" (%d KHz - %d KHz @ %s), (N/A, %d mBm), (%s)\n",
+ pr_debug(" (%d KHz - %d KHz @ %s), (N/A, %d mBm), (%s)\n",
freq_range->start_freq_khz,
freq_range->end_freq_khz,
bw,
@@ -2831,35 +2815,35 @@ static void print_regdomain(const struct ieee80211_regdomain *rd)
struct cfg80211_registered_device *rdev;
rdev = cfg80211_rdev_by_wiphy_idx(lr->wiphy_idx);
if (rdev) {
- pr_info("Current regulatory domain updated by AP to: %c%c\n",
+ pr_debug("Current regulatory domain updated by AP to: %c%c\n",
rdev->country_ie_alpha2[0],
rdev->country_ie_alpha2[1]);
} else
- pr_info("Current regulatory domain intersected:\n");
+ pr_debug("Current regulatory domain intersected:\n");
} else
- pr_info("Current regulatory domain intersected:\n");
+ pr_debug("Current regulatory domain intersected:\n");
} else if (is_world_regdom(rd->alpha2)) {
- pr_info("World regulatory domain updated:\n");
+ pr_debug("World regulatory domain updated:\n");
} else {
if (is_unknown_alpha2(rd->alpha2))
- pr_info("Regulatory domain changed to driver built-in settings (unknown country)\n");
+ pr_debug("Regulatory domain changed to driver built-in settings (unknown country)\n");
else {
if (reg_request_cell_base(lr))
- pr_info("Regulatory domain changed to country: %c%c by Cell Station\n",
+ pr_debug("Regulatory domain changed to country: %c%c by Cell Station\n",
rd->alpha2[0], rd->alpha2[1]);
else
- pr_info("Regulatory domain changed to country: %c%c\n",
+ pr_debug("Regulatory domain changed to country: %c%c\n",
rd->alpha2[0], rd->alpha2[1]);
}
}
- pr_info(" DFS Master region: %s", reg_dfs_region_str(rd->dfs_region));
+ pr_debug(" DFS Master region: %s", reg_dfs_region_str(rd->dfs_region));
print_rd_rules(rd);
}
static void print_regdomain_info(const struct ieee80211_regdomain *rd)
{
- pr_info("Regulatory domain: %c%c\n", rd->alpha2[0], rd->alpha2[1]);
+ pr_debug("Regulatory domain: %c%c\n", rd->alpha2[0], rd->alpha2[1]);
print_rd_rules(rd);
}
@@ -2880,7 +2864,8 @@ static int reg_set_rd_user(const struct ieee80211_regdomain *rd,
return -EALREADY;
if (!is_valid_rd(rd)) {
- pr_err("Invalid regulatory domain detected:\n");
+ pr_err("Invalid regulatory domain detected: %c%c\n",
+ rd->alpha2[0], rd->alpha2[1]);
print_regdomain_info(rd);
return -EINVAL;
}
@@ -2916,7 +2901,8 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd,
return -EALREADY;
if (!is_valid_rd(rd)) {
- pr_err("Invalid regulatory domain detected:\n");
+ pr_err("Invalid regulatory domain detected: %c%c\n",
+ rd->alpha2[0], rd->alpha2[1]);
print_regdomain_info(rd);
return -EINVAL;
}
@@ -2974,7 +2960,8 @@ static int reg_set_rd_country_ie(const struct ieee80211_regdomain *rd,
*/
if (!is_valid_rd(rd)) {
- pr_err("Invalid regulatory domain detected:\n");
+ pr_err("Invalid regulatory domain detected: %c%c\n",
+ rd->alpha2[0], rd->alpha2[1]);
print_regdomain_info(rd);
return -EINVAL;
}
@@ -3029,6 +3016,7 @@ int set_regdom(const struct ieee80211_regdomain *rd,
break;
default:
WARN(1, "invalid initiator %d\n", lr->initiator);
+ kfree(rd);
return -EINVAL;
}
@@ -3221,8 +3209,10 @@ int __init regulatory_init(void)
/* We always try to get an update for the static regdomain */
err = regulatory_hint_core(cfg80211_world_regdom->alpha2);
if (err) {
- if (err == -ENOMEM)
+ if (err == -ENOMEM) {
+ platform_device_unregister(reg_pdev);
return err;
+ }
/*
* N.B. kobject_uevent_env() can fail mainly for when we're out
* memory which is handled and propagated appropriately above
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 8020b5b094d4..d49ed7666d4c 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -917,6 +917,12 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
nl80211_send_disconnected(rdev, dev, reason, ie, ie_len, from_ap);
+ /* stop critical protocol if supported */
+ if (rdev->ops->crit_proto_stop && rdev->crit_proto_nlportid) {
+ rdev->crit_proto_nlportid = 0;
+ rdev_crit_proto_stop(rdev, wdev);
+ }
+
/*
* Delete all the keys ... pairwise keys can't really
* exist any more anyway, but default keys might.
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 0c392d36781b..09b242b09bed 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -623,12 +623,24 @@ DECLARE_EVENT_CLASS(station_add_change,
__field(u32, sta_flags_set)
__field(u32, sta_modify_mask)
__field(int, listen_interval)
+ __field(u16, capability)
__field(u16, aid)
__field(u8, plink_action)
__field(u8, plink_state)
__field(u8, uapsd_queues)
+ __field(u8, max_sp)
+ __field(u8, opmode_notif)
+ __field(bool, opmode_notif_used)
__array(u8, ht_capa, (int)sizeof(struct ieee80211_ht_cap))
+ __array(u8, vht_capa, (int)sizeof(struct ieee80211_vht_cap))
__array(char, vlan, IFNAMSIZ)
+ __dynamic_array(u8, supported_rates,
+ params->supported_rates_len)
+ __dynamic_array(u8, ext_capab, params->ext_capab_len)
+ __dynamic_array(u8, supported_channels,
+ params->supported_channels_len)
+ __dynamic_array(u8, supported_oper_classes,
+ params->supported_oper_classes_len)
),
TP_fast_assign(
WIPHY_ASSIGN;
@@ -646,9 +658,35 @@ DECLARE_EVENT_CLASS(station_add_change,
if (params->ht_capa)
memcpy(__entry->ht_capa, params->ht_capa,
sizeof(struct ieee80211_ht_cap));
+ memset(__entry->vht_capa, 0, sizeof(struct ieee80211_vht_cap));
+ if (params->vht_capa)
+ memcpy(__entry->vht_capa, params->vht_capa,
+ sizeof(struct ieee80211_vht_cap));
memset(__entry->vlan, 0, sizeof(__entry->vlan));
if (params->vlan)
memcpy(__entry->vlan, params->vlan->name, IFNAMSIZ);
+ if (params->supported_rates && params->supported_rates_len)
+ memcpy(__get_dynamic_array(supported_rates),
+ params->supported_rates,
+ params->supported_rates_len);
+ if (params->ext_capab && params->ext_capab_len)
+ memcpy(__get_dynamic_array(ext_capab),
+ params->ext_capab,
+ params->ext_capab_len);
+ if (params->supported_channels &&
+ params->supported_channels_len)
+ memcpy(__get_dynamic_array(supported_channels),
+ params->supported_channels,
+ params->supported_channels_len);
+ if (params->supported_oper_classes &&
+ params->supported_oper_classes_len)
+ memcpy(__get_dynamic_array(supported_oper_classes),
+ params->supported_oper_classes,
+ params->supported_oper_classes_len);
+ __entry->max_sp = params->max_sp;
+ __entry->capability = params->capability;
+ __entry->opmode_notif = params->opmode_notif;
+ __entry->opmode_notif_used = params->opmode_notif_used;
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: " MAC_PR_FMT
", station flags mask: %u, station flags set: %u, "
@@ -2818,6 +2856,71 @@ TRACE_EVENT(cfg80211_stop_iface,
WIPHY_PR_ARG, WDEV_PR_ARG)
);
+TRACE_EVENT(rdev_start_radar_detection,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_chan_def *chandef,
+ u32 cac_time_ms),
+ TP_ARGS(wiphy, netdev, chandef, cac_time_ms),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ CHAN_DEF_ENTRY
+ __field(u32, cac_time_ms)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ CHAN_DEF_ASSIGN(chandef);
+ __entry->cac_time_ms = cac_time_ms;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT
+ ", cac_time_ms=%u",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG,
+ __entry->cac_time_ms)
+);
+
+TRACE_EVENT(rdev_set_mcast_rate,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ int mcast_rate[IEEE80211_NUM_BANDS]),
+ TP_ARGS(wiphy, netdev, mcast_rate),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ __array(int, mcast_rate, IEEE80211_NUM_BANDS)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ memcpy(__entry->mcast_rate, mcast_rate,
+ sizeof(int) * IEEE80211_NUM_BANDS);
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", "
+ "mcast_rates [2.4GHz=0x%x, 5.2GHz=0x%x, 60GHz=0x%x]",
+ WIPHY_PR_ARG, NETDEV_PR_ARG,
+ __entry->mcast_rate[IEEE80211_BAND_2GHZ],
+ __entry->mcast_rate[IEEE80211_BAND_5GHZ],
+ __entry->mcast_rate[IEEE80211_BAND_60GHZ])
+);
+
+TRACE_EVENT(rdev_set_coalesce,
+ TP_PROTO(struct wiphy *wiphy, struct cfg80211_coalesce *coalesce),
+ TP_ARGS(wiphy, coalesce),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ __field(int, n_rules)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ __entry->n_rules = coalesce ? coalesce->n_rules : 0;
+ ),
+ TP_printk(WIPHY_PR_FMT ", n_rules=%d",
+ WIPHY_PR_ARG, __entry->n_rules)
+);
+
+DEFINE_EVENT(wiphy_wdev_evt, rdev_abort_scan,
+ TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
+ TP_ARGS(wiphy, wdev)
+);
#endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/wireless/util.c b/net/wireless/util.c
index baf7218cec15..92770427b211 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1325,13 +1325,6 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
}
EXPORT_SYMBOL(ieee80211_ie_split_ric);
-size_t ieee80211_ie_split(const u8 *ies, size_t ielen,
- const u8 *ids, int n_ids, size_t offset)
-{
- return ieee80211_ie_split_ric(ies, ielen, ids, n_ids, NULL, 0, offset);
-}
-EXPORT_SYMBOL(ieee80211_ie_split);
-
bool ieee80211_operating_class_to_band(u8 operating_class,
enum ieee80211_band *band)
{
@@ -1620,120 +1613,6 @@ int cfg80211_check_combinations(struct wiphy *wiphy,
}
EXPORT_SYMBOL(cfg80211_check_combinations);
-int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
- struct wireless_dev *wdev,
- enum nl80211_iftype iftype,
- struct ieee80211_channel *chan,
- enum cfg80211_chan_mode chanmode,
- u8 radar_detect)
-{
- struct wireless_dev *wdev_iter;
- int num[NUM_NL80211_IFTYPES];
- struct ieee80211_channel
- *used_channels[CFG80211_MAX_NUM_DIFFERENT_CHANNELS];
- struct ieee80211_channel *ch;
- enum cfg80211_chan_mode chmode;
- int num_different_channels = 0;
- int total = 1;
- int i;
-
- ASSERT_RTNL();
-
- if (WARN_ON(hweight32(radar_detect) > 1))
- return -EINVAL;
-
- if (WARN_ON(iftype >= NUM_NL80211_IFTYPES))
- return -EINVAL;
-
- /* Always allow software iftypes */
- if (rdev->wiphy.software_iftypes & BIT(iftype)) {
- if (radar_detect)
- return -EINVAL;
- return 0;
- }
-
- memset(num, 0, sizeof(num));
- memset(used_channels, 0, sizeof(used_channels));
-
- num[iftype] = 1;
-
- /* TODO: We'll probably not need this anymore, since this
- * should only be called with CHAN_MODE_UNDEFINED. There are
- * still a couple of pending calls where other chanmodes are
- * used, but we should get rid of them.
- */
- switch (chanmode) {
- case CHAN_MODE_UNDEFINED:
- break;
- case CHAN_MODE_SHARED:
- WARN_ON(!chan);
- used_channels[0] = chan;
- num_different_channels++;
- break;
- case CHAN_MODE_EXCLUSIVE:
- num_different_channels++;
- break;
- }
-
- list_for_each_entry(wdev_iter, &rdev->wdev_list, list) {
- if (wdev_iter == wdev)
- continue;
- if (wdev_iter->iftype == NL80211_IFTYPE_P2P_DEVICE) {
- if (!wdev_iter->p2p_started)
- continue;
- } else if (wdev_iter->netdev) {
- if (!netif_running(wdev_iter->netdev))
- continue;
- } else {
- WARN_ON(1);
- }
-
- if (rdev->wiphy.software_iftypes & BIT(wdev_iter->iftype))
- continue;
-
- /*
- * We may be holding the "wdev" mutex, but now need to lock
- * wdev_iter. This is OK because once we get here wdev_iter
- * is not wdev (tested above), but we need to use the nested
- * locking for lockdep.
- */
- mutex_lock_nested(&wdev_iter->mtx, 1);
- __acquire(wdev_iter->mtx);
- cfg80211_get_chan_state(wdev_iter, &ch, &chmode, &radar_detect);
- wdev_unlock(wdev_iter);
-
- switch (chmode) {
- case CHAN_MODE_UNDEFINED:
- break;
- case CHAN_MODE_SHARED:
- for (i = 0; i < CFG80211_MAX_NUM_DIFFERENT_CHANNELS; i++)
- if (!used_channels[i] || used_channels[i] == ch)
- break;
-
- if (i == CFG80211_MAX_NUM_DIFFERENT_CHANNELS)
- return -EBUSY;
-
- if (used_channels[i] == NULL) {
- used_channels[i] = ch;
- num_different_channels++;
- }
- break;
- case CHAN_MODE_EXCLUSIVE:
- num_different_channels++;
- break;
- }
-
- num[wdev_iter->iftype]++;
- total++;
- }
-
- if (total == 1 && !radar_detect)
- return 0;
-
- return cfg80211_check_combinations(&rdev->wiphy, num_different_channels,
- radar_detect, num);
-}
-
int ieee80211_get_ratemask(struct ieee80211_supported_band *sband,
const u8 *rates, unsigned int n_rates,
u32 *mask)
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index c8717c1d082e..b50ee5d622e1 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -342,6 +342,40 @@ static const int compat_event_type_size[] = {
/* IW event code */
+void wireless_nlevent_flush(void)
+{
+ struct sk_buff *skb;
+ struct net *net;
+
+ ASSERT_RTNL();
+
+ for_each_net(net) {
+ while ((skb = skb_dequeue(&net->wext_nlevents)))
+ rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
+ GFP_KERNEL);
+ }
+}
+EXPORT_SYMBOL_GPL(wireless_nlevent_flush);
+
+static int wext_netdev_notifier_call(struct notifier_block *nb,
+ unsigned long state, void *ptr)
+{
+ /*
+ * When a netdev changes state in any way, flush all pending messages
+ * to avoid them going out in a strange order, e.g. RTM_NEWLINK after
+ * RTM_DELLINK, or with IFF_UP after without IFF_UP during dev_close()
+ * or similar - all of which could otherwise happen due to delays from
+ * schedule_work().
+ */
+ wireless_nlevent_flush();
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block wext_netdev_notifier = {
+ .notifier_call = wext_netdev_notifier_call,
+};
+
static int __net_init wext_pernet_init(struct net *net)
{
skb_queue_head_init(&net->wext_nlevents);
@@ -360,7 +394,12 @@ static struct pernet_operations wext_pernet_ops = {
static int __init wireless_nlevent_init(void)
{
- return register_pernet_subsys(&wext_pernet_ops);
+ int err = register_pernet_subsys(&wext_pernet_ops);
+
+ if (err)
+ return err;
+
+ return register_netdevice_notifier(&wext_netdev_notifier);
}
subsys_initcall(wireless_nlevent_init);
@@ -368,17 +407,8 @@ subsys_initcall(wireless_nlevent_init);
/* Process events generated by the wireless layer or the driver. */
static void wireless_nlevent_process(struct work_struct *work)
{
- struct sk_buff *skb;
- struct net *net;
-
rtnl_lock();
-
- for_each_net(net) {
- while ((skb = skb_dequeue(&net->wext_nlevents)))
- rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
- GFP_KERNEL);
- }
-
+ wireless_nlevent_flush();
rtnl_unlock();
}
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index f07224d8b88f..250e567ba3d6 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -9,6 +9,8 @@
* any later version.
*/
+#include <crypto/hash.h>
+#include <crypto/skcipher.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/pfkeyv2.h>
@@ -782,14 +784,13 @@ void xfrm_probe_algs(void)
BUG_ON(in_softirq());
for (i = 0; i < aalg_entries(); i++) {
- status = crypto_has_hash(aalg_list[i].name, 0,
- CRYPTO_ALG_ASYNC);
+ status = crypto_has_ahash(aalg_list[i].name, 0, 0);
if (aalg_list[i].available != status)
aalg_list[i].available = status;
}
for (i = 0; i < ealg_entries(); i++) {
- status = crypto_has_ablkcipher(ealg_list[i].name, 0, 0);
+ status = crypto_has_skcipher(ealg_list[i].name, 0, 0);
if (ealg_list[i].available != status)
ealg_list[i].available = status;
}
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index cc3676eb6239..ff4a91fcab9f 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -167,6 +167,8 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb
{
struct sk_buff *segs;
+ BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET);
+ BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_SGO_CB_OFFSET);
segs = skb_gso_segment(skb, 0);
kfree_skb(skb);
if (IS_ERR(segs))
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 09bfcbac63bb..b5e665b3cfb0 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -303,6 +303,14 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
}
EXPORT_SYMBOL(xfrm_policy_alloc);
+static void xfrm_policy_destroy_rcu(struct rcu_head *head)
+{
+ struct xfrm_policy *policy = container_of(head, struct xfrm_policy, rcu);
+
+ security_xfrm_policy_free(policy->security);
+ kfree(policy);
+}
+
/* Destroy xfrm_policy: descendant resources must be released to this moment. */
void xfrm_policy_destroy(struct xfrm_policy *policy)
@@ -312,8 +320,7 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
BUG();
- security_xfrm_policy_free(policy->security);
- kfree(policy);
+ call_rcu(&policy->rcu, xfrm_policy_destroy_rcu);
}
EXPORT_SYMBOL(xfrm_policy_destroy);
@@ -1214,8 +1221,10 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
struct xfrm_policy *pol;
struct net *net = sock_net(sk);
+ rcu_read_lock();
read_lock_bh(&net->xfrm.xfrm_policy_lock);
- if ((pol = sk->sk_policy[dir]) != NULL) {
+ pol = rcu_dereference(sk->sk_policy[dir]);
+ if (pol != NULL) {
bool match = xfrm_selector_match(&pol->selector, fl,
sk->sk_family);
int err = 0;
@@ -1239,6 +1248,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
}
out:
read_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ rcu_read_unlock();
return pol;
}
@@ -1307,13 +1317,14 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
#endif
write_lock_bh(&net->xfrm.xfrm_policy_lock);
- old_pol = sk->sk_policy[dir];
- sk->sk_policy[dir] = pol;
+ old_pol = rcu_dereference_protected(sk->sk_policy[dir],
+ lockdep_is_held(&net->xfrm.xfrm_policy_lock));
if (pol) {
pol->curlft.add_time = get_seconds();
pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0);
xfrm_sk_policy_link(pol, dir);
}
+ rcu_assign_pointer(sk->sk_policy[dir], pol);
if (old_pol) {
if (pol)
xfrm_policy_requeue(old_pol, pol);
@@ -1361,17 +1372,26 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
return newp;
}
-int __xfrm_sk_clone_policy(struct sock *sk)
+int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
{
- struct xfrm_policy *p0 = sk->sk_policy[0],
- *p1 = sk->sk_policy[1];
+ const struct xfrm_policy *p;
+ struct xfrm_policy *np;
+ int i, ret = 0;
- sk->sk_policy[0] = sk->sk_policy[1] = NULL;
- if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
- return -ENOMEM;
- if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
- return -ENOMEM;
- return 0;
+ rcu_read_lock();
+ for (i = 0; i < 2; i++) {
+ p = rcu_dereference(osk->sk_policy[i]);
+ if (p) {
+ np = clone_policy(p, i);
+ if (unlikely(!np)) {
+ ret = -ENOMEM;
+ break;
+ }
+ rcu_assign_pointer(sk->sk_policy[i], np);
+ }
+ }
+ rcu_read_unlock();
+ return ret;
}
static int
@@ -2198,6 +2218,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
xdst = NULL;
route = NULL;
+ sk = sk_const_to_full_sk(sk);
if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
num_pols = 1;
pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
@@ -2477,6 +2498,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
}
pol = NULL;
+ sk = sk_to_full_sk(sk);
if (sk && sk->sk_policy[dir]) {
pol = xfrm_sk_policy_lookup(sk, dir, &fl);
if (IS_ERR(pol)) {
@@ -2804,7 +2826,6 @@ static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
{
- struct net *net;
int err = 0;
if (unlikely(afinfo == NULL))
return -EINVAL;
@@ -2835,26 +2856,6 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
}
spin_unlock(&xfrm_policy_afinfo_lock);
- rtnl_lock();
- for_each_net(net) {
- struct dst_ops *xfrm_dst_ops;
-
- switch (afinfo->family) {
- case AF_INET:
- xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;
- break;
-#if IS_ENABLED(CONFIG_IPV6)
- case AF_INET6:
- xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;
- break;
-#endif
- default:
- BUG();
- }
- *xfrm_dst_ops = *afinfo->dst_ops;
- }
- rtnl_unlock();
-
return err;
}
EXPORT_SYMBOL(xfrm_policy_register_afinfo);
@@ -2890,22 +2891,6 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
}
EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
-static void __net_init xfrm_dst_ops_init(struct net *net)
-{
- struct xfrm_policy_afinfo *afinfo;
-
- rcu_read_lock();
- afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]);
- if (afinfo)
- net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
-#if IS_ENABLED(CONFIG_IPV6)
- afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]);
- if (afinfo)
- net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
-#endif
- rcu_read_unlock();
-}
-
static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
@@ -3054,7 +3039,6 @@ static int __net_init xfrm_net_init(struct net *net)
rv = xfrm_policy_init(net);
if (rv < 0)
goto out_policy;
- xfrm_dst_ops_init(net);
rv = xfrm_sysctl_init(net);
if (rv < 0)
goto out_sysctl;