diff options
Diffstat (limited to 'include/net')
34 files changed, 466 insertions, 677 deletions
diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 269ec27385e9..2f67ae854ff0 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -238,53 +238,6 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, void ipv6_mc_dad_complete(struct inet6_dev *idev); -/* A stub used by vxlan module. This is ugly, ideally these - * symbols should be built into the core kernel. - */ -struct ipv6_stub { - int (*ipv6_sock_mc_join)(struct sock *sk, int ifindex, - const struct in6_addr *addr); - int (*ipv6_sock_mc_drop)(struct sock *sk, int ifindex, - const struct in6_addr *addr); - int (*ipv6_dst_lookup)(struct net *net, struct sock *sk, - struct dst_entry **dst, struct flowi6 *fl6); - int (*ipv6_route_input)(struct sk_buff *skb); - - struct fib6_table *(*fib6_get_table)(struct net *net, u32 id); - struct fib6_info *(*fib6_lookup)(struct net *net, int oif, - struct flowi6 *fl6, int flags); - struct fib6_info *(*fib6_table_lookup)(struct net *net, - struct fib6_table *table, - int oif, struct flowi6 *fl6, - int flags); - struct fib6_info *(*fib6_multipath_select)(const struct net *net, - struct fib6_info *f6i, - struct flowi6 *fl6, int oif, - const struct sk_buff *skb, - int strict); - u32 (*ip6_mtu_from_fib6)(struct fib6_info *f6i, struct in6_addr *daddr, - struct in6_addr *saddr); - - void (*udpv6_encap_enable)(void); - void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr, - const struct in6_addr *solicited_addr, - bool router, bool solicited, bool override, bool inc_opt); - struct neigh_table *nd_tbl; -}; -extern const struct ipv6_stub *ipv6_stub __read_mostly; - -/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */ -struct ipv6_bpf_stub { - int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len, - bool force_bind_address_no_port, bool with_lock); - struct sock *(*udp6_lib_lookup)(struct net *net, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, __be16 dport, - int dif, int sdif, struct udp_table *tbl, - struct sk_buff *skb); -}; -extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; - /* * identify MLD packets for MLD filter exceptions */ @@ -425,6 +378,14 @@ static inline void in6_dev_hold(struct inet6_dev *idev) refcount_inc(&idev->refcnt); } +/* called with rcu_read_lock held */ +static inline bool ip6_ignore_linkdown(const struct net_device *dev) +{ + const struct inet6_dev *idev = __in6_dev_get(dev); + + return !!idev->cnf.ignore_routes_with_linkdown; +} + void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp); static inline void in6_ifa_put(struct inet6_ifaddr *ifp) diff --git a/include/net/devlink.h b/include/net/devlink.h index 63de99e09f04..70c7d1ac8344 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -16,6 +16,7 @@ #include <linux/gfp.h> #include <linux/list.h> #include <linux/netdevice.h> +#include <linux/spinlock.h> #include <net/net_namespace.h> #include <uapi/linux/devlink.h> @@ -40,11 +41,13 @@ struct devlink { }; struct devlink_port_attrs { - bool set; + u8 set:1, + split:1, + switch_port:1; enum devlink_port_flavour flavour; u32 port_number; /* same value as "split group" */ - bool split; u32 split_subport_number; + struct netdev_phys_item_id switch_id; }; struct devlink_port { @@ -53,6 +56,9 @@ struct devlink_port { struct devlink *devlink; unsigned index; bool registered; + spinlock_t type_lock; /* Protects type and type_dev + * pointer consistency. + */ enum devlink_port_type type; enum devlink_port_type desired_type; void *type_dev; @@ -543,19 +549,25 @@ static inline struct devlink *priv_to_devlink(void *priv) return container_of(priv, struct devlink, priv); } +static inline struct devlink_port * +netdev_to_devlink_port(struct net_device *dev) +{ + if (dev->netdev_ops->ndo_get_devlink_port) + return dev->netdev_ops->ndo_get_devlink_port(dev); + return NULL; +} + static inline struct devlink *netdev_to_devlink(struct net_device *dev) { -#if IS_ENABLED(CONFIG_NET_DEVLINK) - if (dev->netdev_ops->ndo_get_devlink) - return dev->netdev_ops->ndo_get_devlink(dev); -#endif + struct devlink_port *devlink_port = netdev_to_devlink_port(dev); + + if (devlink_port) + return devlink_port->devlink; return NULL; } struct ib_device; -#if IS_ENABLED(CONFIG_NET_DEVLINK) - struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size); int devlink_register(struct devlink *devlink, struct device *dev); void devlink_unregister(struct devlink *devlink); @@ -572,9 +584,9 @@ void devlink_port_type_clear(struct devlink_port *devlink_port); void devlink_port_attrs_set(struct devlink_port *devlink_port, enum devlink_port_flavour flavour, u32 port_number, bool split, - u32 split_subport_number); -int devlink_port_get_phys_port_name(struct devlink_port *devlink_port, - char *name, size_t len); + u32 split_subport_number, + const unsigned char *switch_id, + unsigned char switch_id_len); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, @@ -724,510 +736,43 @@ void devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, enum devlink_health_reporter_state state); +#if IS_ENABLED(CONFIG_NET_DEVLINK) + void devlink_compat_running_version(struct net_device *dev, char *buf, size_t len); int devlink_compat_flash_update(struct net_device *dev, const char *file_name); +int devlink_compat_phys_port_name_get(struct net_device *dev, + char *name, size_t len); +int devlink_compat_switch_id_get(struct net_device *dev, + struct netdev_phys_item_id *ppid); #else -static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, - size_t priv_size) -{ - return kzalloc(sizeof(struct devlink) + priv_size, GFP_KERNEL); -} - -static inline int devlink_register(struct devlink *devlink, struct device *dev) -{ - return 0; -} - -static inline void devlink_unregister(struct devlink *devlink) -{ -} - -static inline void devlink_params_publish(struct devlink *devlink) -{ -} - -static inline void devlink_params_unpublish(struct devlink *devlink) -{ -} - -static inline void devlink_free(struct devlink *devlink) -{ - kfree(devlink); -} - -static inline int devlink_port_register(struct devlink *devlink, - struct devlink_port *devlink_port, - unsigned int port_index) -{ - return 0; -} - -static inline void devlink_port_unregister(struct devlink_port *devlink_port) -{ -} - -static inline void devlink_port_type_eth_set(struct devlink_port *devlink_port, - struct net_device *netdev) -{ -} - -static inline void devlink_port_type_ib_set(struct devlink_port *devlink_port, - struct ib_device *ibdev) -{ -} - -static inline void devlink_port_type_clear(struct devlink_port *devlink_port) -{ -} - -static inline void devlink_port_attrs_set(struct devlink_port *devlink_port, - enum devlink_port_flavour flavour, - u32 port_number, bool split, - u32 split_subport_number) -{ -} - -static inline int -devlink_port_get_phys_port_name(struct devlink_port *devlink_port, - char *name, size_t len) -{ - return -EOPNOTSUPP; -} - -static inline int devlink_sb_register(struct devlink *devlink, - unsigned int sb_index, u32 size, - u16 ingress_pools_count, - u16 egress_pools_count, - u16 ingress_tc_count, - u16 egress_tc_count) -{ - return 0; -} - -static inline void devlink_sb_unregister(struct devlink *devlink, - unsigned int sb_index) -{ -} - -static inline int -devlink_dpipe_table_register(struct devlink *devlink, - const char *table_name, - struct devlink_dpipe_table_ops *table_ops, - void *priv, bool counter_control_extern) -{ - return 0; -} - -static inline void devlink_dpipe_table_unregister(struct devlink *devlink, - const char *table_name) -{ -} - -static inline int devlink_dpipe_headers_register(struct devlink *devlink, - struct devlink_dpipe_headers * - dpipe_headers) -{ - return 0; -} - -static inline void devlink_dpipe_headers_unregister(struct devlink *devlink) -{ -} - -static inline bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, - const char *table_name) -{ - return false; -} - -static inline int -devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx) -{ - return 0; -} - -static inline int -devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx, - struct devlink_dpipe_entry *entry) -{ - return 0; -} - -static inline int -devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx) -{ - return 0; -} - -static inline void -devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry) -{ -} - -static inline int -devlink_dpipe_action_put(struct sk_buff *skb, - struct devlink_dpipe_action *action) -{ - return 0; -} - -static inline int -devlink_dpipe_match_put(struct sk_buff *skb, - struct devlink_dpipe_match *match) -{ - return 0; -} - -static inline int -devlink_resource_register(struct devlink *devlink, - const char *resource_name, - u64 resource_size, - u64 resource_id, - u64 parent_resource_id, - const struct devlink_resource_size_params *size_params) -{ - return 0; -} - static inline void -devlink_resources_unregister(struct devlink *devlink, - struct devlink_resource *resource) -{ -} - -static inline int -devlink_resource_size_get(struct devlink *devlink, u64 resource_id, - u64 *p_resource_size) -{ - return -EOPNOTSUPP; -} - -static inline int -devlink_dpipe_table_resource_set(struct devlink *devlink, - const char *table_name, u64 resource_id, - u64 resource_units) -{ - return -EOPNOTSUPP; -} - -static inline void -devlink_resource_occ_get_register(struct devlink *devlink, - u64 resource_id, - devlink_resource_occ_get_t *occ_get, - void *occ_get_priv) -{ -} - -static inline void -devlink_resource_occ_get_unregister(struct devlink *devlink, - u64 resource_id) -{ -} - -static inline int -devlink_params_register(struct devlink *devlink, - const struct devlink_param *params, - size_t params_count) -{ - return 0; -} - -static inline void -devlink_params_unregister(struct devlink *devlink, - const struct devlink_param *params, - size_t params_count) -{ - -} - -static inline int -devlink_port_params_register(struct devlink_port *devlink_port, - const struct devlink_param *params, - size_t params_count) -{ - return 0; -} - -static inline void -devlink_port_params_unregister(struct devlink_port *devlink_port, - const struct devlink_param *params, - size_t params_count) -{ -} - -static inline int -devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, - union devlink_param_value *init_val) +devlink_compat_running_version(struct net_device *dev, char *buf, size_t len) { - return -EOPNOTSUPP; } static inline int -devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, - union devlink_param_value init_val) +devlink_compat_flash_update(struct net_device *dev, const char *file_name) { return -EOPNOTSUPP; } static inline int -devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port, - u32 param_id, - union devlink_param_value *init_val) +devlink_compat_phys_port_name_get(struct net_device *dev, + char *name, size_t len) { return -EOPNOTSUPP; } static inline int -devlink_port_param_driverinit_value_set(struct devlink_port *devlink_port, - u32 param_id, - union devlink_param_value init_val) +devlink_compat_switch_id_get(struct net_device *dev, + struct netdev_phys_item_id *ppid) { return -EOPNOTSUPP; } -static inline void -devlink_param_value_changed(struct devlink *devlink, u32 param_id) -{ -} - -static inline void -devlink_port_param_value_changed(struct devlink_port *devlink_port, - u32 param_id) -{ -} - -static inline void -devlink_param_value_str_fill(union devlink_param_value *dst_val, - const char *src) -{ -} - -static inline struct devlink_region * -devlink_region_create(struct devlink *devlink, - const char *region_name, - u32 region_max_snapshots, - u64 region_size) -{ - return NULL; -} - -static inline void -devlink_region_destroy(struct devlink_region *region) -{ -} - -static inline u32 -devlink_region_shapshot_id_get(struct devlink *devlink) -{ - return 0; -} - -static inline int -devlink_region_snapshot_create(struct devlink_region *region, u64 data_len, - u8 *data, u32 snapshot_id, - devlink_snapshot_data_dest_t *data_destructor) -{ - return 0; -} - -static inline int -devlink_info_driver_name_put(struct devlink_info_req *req, const char *name) -{ - return 0; -} - -static inline int -devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn) -{ - return 0; -} - -static inline int -devlink_info_version_fixed_put(struct devlink_info_req *req, - const char *version_name, - const char *version_value) -{ - return 0; -} - -static inline int -devlink_info_version_stored_put(struct devlink_info_req *req, - const char *version_name, - const char *version_value) -{ - return 0; -} - -static inline int -devlink_info_version_running_put(struct devlink_info_req *req, - const char *version_name, - const char *version_value) -{ - return 0; -} - -static inline int -devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg) -{ - return 0; -} - -static inline int -devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg) -{ - return 0; -} - -static inline int -devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name) -{ - return 0; -} - -static inline int -devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg) -{ - return 0; -} - -static inline int -devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, - const char *name) -{ - return 0; -} - -static inline int -devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg) -{ - return 0; -} - -static inline int -devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value) -{ - return 0; -} - -static inline int -devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value) -{ - return 0; -} - -static inline int -devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value) -{ - return 0; -} - -static inline int -devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value) -{ - return 0; -} - -static inline int -devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value) -{ - return 0; -} - -static inline int -devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, - u16 value_len) -{ - return 0; -} - -static inline int -devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, - bool value) -{ - return 0; -} - -static inline int -devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name, - u8 value) -{ - return 0; -} - -static inline int -devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name, - u32 value) -{ - return 0; -} - -static inline int -devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name, - u64 value) -{ - return 0; -} - -static inline int -devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, - const char *value) -{ - return 0; -} - -static inline int -devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, - const void *value, u16 value_len) -{ - return 0; -} - -static inline struct devlink_health_reporter * -devlink_health_reporter_create(struct devlink *devlink, - const struct devlink_health_reporter_ops *ops, - u64 graceful_period, bool auto_recover, - void *priv) -{ - return NULL; -} - -static inline void -devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) -{ -} - -static inline void * -devlink_health_reporter_priv(struct devlink_health_reporter *reporter) -{ - return NULL; -} - -static inline int -devlink_health_report(struct devlink_health_reporter *reporter, - const char *msg, void *priv_ctx) -{ - return 0; -} - -static inline void -devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, - enum devlink_health_reporter_state state) -{ -} - -static inline void -devlink_compat_running_version(struct net_device *dev, char *buf, size_t len) -{ -} - -static inline int -devlink_compat_flash_update(struct net_device *dev, const char *file_name) -{ - return -EOPNOTSUPP; -} #endif #endif /* _NET_DEVLINK_H_ */ diff --git a/include/net/dsa.h b/include/net/dsa.h index ae480bba11f5..0cfc2f828b87 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -140,6 +140,7 @@ struct dsa_port { unsigned int index; const char *name; const struct dsa_port *cpu_dp; + const char *mac; struct device_node *dn; unsigned int ageing_time; u8 stp_state; diff --git a/include/net/dst.h b/include/net/dst.h index 6cf0870414c7..12b31c602cb0 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -19,17 +19,6 @@ #include <net/neighbour.h> #include <asm/processor.h> -#define DST_GC_MIN (HZ/10) -#define DST_GC_INC (HZ/2) -#define DST_GC_MAX (120*HZ) - -/* Each dst_entry has reference count and sits in some parent list(s). - * When it is removed from parent list, it is "freed" (dst_free). - * After this it enters dead state (dst->obsolete > 0) and if its refcnt - * is zero, it can be destroyed immediately, otherwise it is added - * to gc list and garbage collector periodically checks the refcnt. - */ - struct sk_buff; struct dst_entry { diff --git a/include/net/genetlink.h b/include/net/genetlink.h index aa2e5888f18d..6850c7b1a3a6 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -26,6 +26,7 @@ struct genl_info; * @name: name of family * @version: protocol version * @maxattr: maximum number of attributes supported + * @policy: netlink policy * @netnsok: set to true if the family can handle network * namespaces and should be presented in all of them * @parallel_ops: operations can be called in parallel and aren't @@ -56,6 +57,7 @@ struct genl_family { unsigned int maxattr; bool netnsok; bool parallel_ops; + const struct nla_policy *policy; int (*pre_doit)(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info); @@ -124,14 +126,12 @@ static inline int genl_err_attr(struct genl_info *info, int err, * @cmd: command identifier * @internal_flags: flags used by the family * @flags: flags - * @policy: attribute validation policy * @doit: standard command callback * @start: start callback for dumps * @dumpit: callback for dumpers * @done: completion callback for dumps */ struct genl_ops { - const struct nla_policy *policy; int (*doit)(struct sk_buff *skb, struct genl_info *info); int (*start)(struct netlink_callback *cb); diff --git a/include/net/geneve.h b/include/net/geneve.h index fc6a7e0a874a..bced0b1d9fe4 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -4,6 +4,8 @@ #include <net/udp_tunnel.h> +#define GENEVE_UDP_PORT 6081 + /* Geneve Header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |Ver| Opt Len |O|C| Rsvd. | Protocol Type | diff --git a/include/net/ip.h b/include/net/ip.h index 583526aad1d0..2d3cce7c3e8a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -38,6 +38,10 @@ #define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */ #define IPV4_MIN_MTU 68 /* RFC 791 */ +extern unsigned int sysctl_fib_sync_mem; +extern unsigned int sysctl_fib_sync_mem_min; +extern unsigned int sysctl_fib_sync_mem_max; + struct sock; struct inet_skb_parm { diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 84097010237c..2e9235adfa0d 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -19,6 +19,7 @@ #include <linux/notifier.h> #include <net/dst.h> #include <net/flow.h> +#include <net/ip_fib.h> #include <net/netlink.h> #include <net/inetpeer.h> #include <net/fib_notifier.h> @@ -50,7 +51,8 @@ struct fib6_config { u32 fc_protocol; u16 fc_type; /* only 8 bits are used */ u16 fc_delete_all_nh : 1, - __unused : 15; + fc_ignore_dev_down:1, + __unused : 14; struct in6_addr fc_dst; struct in6_addr fc_src; @@ -124,13 +126,11 @@ struct rt6_exception { #define FIB6_MAX_DEPTH 5 struct fib6_nh { - struct in6_addr nh_gw; - struct net_device *nh_dev; - struct lwtunnel_state *nh_lwtstate; + struct fib_nh_common nh_common; - unsigned int nh_flags; - atomic_t nh_upper_bound; - int nh_weight; +#ifdef CONFIG_IPV6_ROUTER_PREF + unsigned long last_probe; +#endif }; struct fib6_info { @@ -159,10 +159,6 @@ struct fib6_info { struct rt6_info * __percpu *rt6i_pcpu; struct rt6_exception_bucket __rcu *rt6i_exception_bucket; -#ifdef CONFIG_IPV6_ROUTER_PREF - unsigned long last_probe; -#endif - u32 fib6_metric; u8 fib6_protocol; u8 fib6_type; @@ -440,13 +436,18 @@ void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr) static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i) { - return f6i->fib6_nh.nh_dev; + return f6i->fib6_nh.fib_nh_dev; } +int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, + struct fib6_config *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack); +void fib6_nh_release(struct fib6_nh *fib6_nh); + static inline struct lwtunnel_state *fib6_info_nh_lwt(const struct fib6_info *f6i) { - return f6i->fib6_nh.nh_lwtstate; + return f6i->fib6_nh.fib_nh_lws; } void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 7ab119936e69..5909fc421305 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -68,8 +68,8 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i) { - return (f6i->fib6_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == - RTF_GATEWAY; + return !(f6i->fib6_flags & (RTF_ADDRCONF|RTF_DYNAMIC)) && + f6i->fib6_nh.fib_nh_gw_family; } void ip6_route_input(struct sk_buff *skb); @@ -274,9 +274,11 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b) { - return a->fib6_nh.nh_dev == b->fib6_nh.nh_dev && - ipv6_addr_equal(&a->fib6_nh.nh_gw, &b->fib6_nh.nh_gw) && - !lwtunnel_cmp_encap(a->fib6_nh.nh_lwtstate, b->fib6_nh.nh_lwtstate); + struct fib6_nh *nha = &a->fib6_nh, *nhb = &b->fib6_nh; + + return nha->fib_nh_dev == nhb->fib_nh_dev && + ipv6_addr_equal(&nha->fib_nh_gw6, &nhb->fib_nh_gw6) && + !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws); } static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 9c8214d2116d..d8195c77e247 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -32,10 +32,14 @@ struct fib_config { u8 fc_protocol; u8 fc_scope; u8 fc_type; - /* 3 bytes unused */ + u8 fc_gw_family; + /* 2 bytes unused */ u32 fc_table; __be32 fc_dst; - __be32 fc_gw; + union { + __be32 fc_gw4; + struct in6_addr fc_gw6; + }; int fc_oif; u32 fc_flags; u32 fc_priority; @@ -76,27 +80,47 @@ struct fnhe_hash_bucket { #define FNHE_HASH_SIZE (1 << FNHE_HASH_SHIFT) #define FNHE_RECLAIM_DEPTH 5 +struct fib_nh_common { + struct net_device *nhc_dev; + int nhc_oif; + unsigned int nhc_flags; + struct lwtunnel_state *nhc_lwtstate; + unsigned char nhc_scope; + u8 nhc_family; + u8 nhc_gw_family; + + union { + __be32 ipv4; + struct in6_addr ipv6; + } nhc_gw; + + int nhc_weight; + atomic_t nhc_upper_bound; +}; + struct fib_nh { - struct net_device *nh_dev; + struct fib_nh_common nh_common; struct hlist_node nh_hash; struct fib_info *nh_parent; - unsigned int nh_flags; - unsigned char nh_scope; -#ifdef CONFIG_IP_ROUTE_MULTIPATH - int nh_weight; - atomic_t nh_upper_bound; -#endif #ifdef CONFIG_IP_ROUTE_CLASSID __u32 nh_tclassid; #endif - int nh_oif; - __be32 nh_gw; __be32 nh_saddr; int nh_saddr_genid; struct rtable __rcu * __percpu *nh_pcpu_rth_output; struct rtable __rcu *nh_rth_input; struct fnhe_hash_bucket __rcu *nh_exceptions; - struct lwtunnel_state *nh_lwtstate; +#define fib_nh_family nh_common.nhc_family +#define fib_nh_dev nh_common.nhc_dev +#define fib_nh_oif nh_common.nhc_oif +#define fib_nh_flags nh_common.nhc_flags +#define fib_nh_lws nh_common.nhc_lwtstate +#define fib_nh_scope nh_common.nhc_scope +#define fib_nh_gw_family nh_common.nhc_gw_family +#define fib_nh_gw4 nh_common.nhc_gw.ipv4 +#define fib_nh_gw6 nh_common.nhc_gw.ipv6 +#define fib_nh_weight nh_common.nhc_weight +#define fib_nh_upper_bound nh_common.nhc_upper_bound }; /* @@ -123,9 +147,10 @@ struct fib_info { #define fib_rtt fib_metrics->metrics[RTAX_RTT-1] #define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1] int fib_nhs; + bool fib_nh_is_v6; struct rcu_head rcu; struct fib_nh fib_nh[0]; -#define fib_dev fib_nh[0].nh_dev +#define fib_dev fib_nh[0].fib_nh_dev }; @@ -135,15 +160,16 @@ struct fib_rule; struct fib_table; struct fib_result { - __be32 prefix; - unsigned char prefixlen; - unsigned char nh_sel; - unsigned char type; - unsigned char scope; - u32 tclassid; - struct fib_info *fi; - struct fib_table *table; - struct hlist_head *fa_head; + __be32 prefix; + unsigned char prefixlen; + unsigned char nh_sel; + unsigned char type; + unsigned char scope; + u32 tclassid; + struct fib_nh_common *nhc; + struct fib_info *fi; + struct fib_table *table; + struct hlist_head *fa_head; }; struct fib_result_nl { @@ -161,11 +187,10 @@ struct fib_result_nl { int err; }; -#ifdef CONFIG_IP_ROUTE_MULTIPATH -#define FIB_RES_NH(res) ((res).fi->fib_nh[(res).nh_sel]) -#else /* CONFIG_IP_ROUTE_MULTIPATH */ -#define FIB_RES_NH(res) ((res).fi->fib_nh[0]) -#endif /* CONFIG_IP_ROUTE_MULTIPATH */ +static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel) +{ + return &fi->fib_nh[nhsel].nh_common; +} #ifdef CONFIG_IP_MULTIPLE_TABLES #define FIB_TABLE_HASHSZ 256 @@ -174,18 +199,11 @@ struct fib_result_nl { #endif __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); +__be32 fib_result_prefsrc(struct net *net, struct fib_result *res); -#define FIB_RES_SADDR(net, res) \ - ((FIB_RES_NH(res).nh_saddr_genid == \ - atomic_read(&(net)->ipv4.dev_addr_genid)) ? \ - FIB_RES_NH(res).nh_saddr : \ - fib_info_update_nh_saddr((net), &FIB_RES_NH(res))) -#define FIB_RES_GW(res) (FIB_RES_NH(res).nh_gw) -#define FIB_RES_DEV(res) (FIB_RES_NH(res).nh_dev) -#define FIB_RES_OIF(res) (FIB_RES_NH(res).nh_oif) - -#define FIB_RES_PREFSRC(net, res) ((res).fi->fib_prefsrc ? : \ - FIB_RES_SADDR(net, res)) +#define FIB_RES_NHC(res) ((res).nhc) +#define FIB_RES_DEV(res) (FIB_RES_NHC(res)->nhc_dev) +#define FIB_RES_OIF(res) (FIB_RES_NHC(res)->nhc_oif) struct fib_entry_notifier_info { struct fib_notifier_info info; /* must be first */ @@ -383,6 +401,8 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net, /* Exported by fib_frontend.c */ extern const struct nla_policy rtm_ipv4_policy[]; void ip_fib_init(void); +int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla, + struct netlink_ext_ack *extack); __be32 fib_compute_spec_dst(struct sk_buff *skb); bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev); int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, @@ -416,6 +436,15 @@ void fib_select_multipath(struct fib_result *res, int hash); void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, const struct sk_buff *skb); +int fib_nh_init(struct net *net, struct fib_nh *fib_nh, + struct fib_config *cfg, int nh_weight, + struct netlink_ext_ack *extack); +void fib_nh_release(struct net *net, struct fib_nh *fib_nh); +int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *fc_encap, + u16 fc_encap_type, void *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack); +void fib_nh_common_release(struct fib_nh_common *nhc); + /* Exported by fib_trie.c */ void fib_trie_init(void); struct fib_table *fib_trie_table(u32 id, struct fib_table *alias); @@ -423,10 +452,12 @@ struct fib_table *fib_trie_table(u32 id, struct fib_table *alias); static inline void fib_combine_itag(u32 *itag, const struct fib_result *res) { #ifdef CONFIG_IP_ROUTE_CLASSID + struct fib_nh_common *nhc = res->nhc; + struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common); #ifdef CONFIG_IP_MULTIPLE_TABLES u32 rtag; #endif - *itag = FIB_RES_NH(*res).nh_tclassid<<16; + *itag = nh->nh_tclassid << 16; #ifdef CONFIG_IP_MULTIPLE_TABLES rtag = res->tclassid; if (*itag == 0) @@ -467,4 +498,9 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr); int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, struct fib_dump_filter *filter, struct netlink_callback *cb); + +int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nh, + unsigned int *flags, bool skip_oif); +int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nh, + int nh_weight); #endif /* _NET_FIB_H */ diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 047f9a5ccaad..2ac40135b576 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -600,6 +600,9 @@ struct ip_vs_dest_user_kern { /* Address family of addr */ u16 af; + + u16 tun_type; /* tunnel type */ + __be16 tun_port; /* tunnel port */ }; @@ -660,6 +663,8 @@ struct ip_vs_dest { atomic_t conn_flags; /* flags to copy to conn */ atomic_t weight; /* server weight */ atomic_t last_weight; /* server latest weight */ + __u16 tun_type; /* tunnel type */ + __be16 tun_port; /* tunnel port */ refcount_t refcnt; /* reference counter */ struct ip_vs_stats stats; /* statistics */ diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h new file mode 100644 index 000000000000..453b55bf6723 --- /dev/null +++ b/include/net/ipv6_stubs.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _IPV6_STUBS_H +#define _IPV6_STUBS_H + +#include <linux/in6.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <net/dst.h> +#include <net/flow.h> +#include <net/neighbour.h> +#include <net/sock.h> + +/* structs from net/ip6_fib.h */ +struct fib6_info; +struct fib6_nh; +struct fib6_config; + +/* This is ugly, ideally these symbols should be built + * into the core kernel. + */ +struct ipv6_stub { + int (*ipv6_sock_mc_join)(struct sock *sk, int ifindex, + const struct in6_addr *addr); + int (*ipv6_sock_mc_drop)(struct sock *sk, int ifindex, + const struct in6_addr *addr); + int (*ipv6_dst_lookup)(struct net *net, struct sock *sk, + struct dst_entry **dst, struct flowi6 *fl6); + int (*ipv6_route_input)(struct sk_buff *skb); + + struct fib6_table *(*fib6_get_table)(struct net *net, u32 id); + struct fib6_info *(*fib6_lookup)(struct net *net, int oif, + struct flowi6 *fl6, int flags); + struct fib6_info *(*fib6_table_lookup)(struct net *net, + struct fib6_table *table, + int oif, struct flowi6 *fl6, + int flags); + struct fib6_info *(*fib6_multipath_select)(const struct net *net, + struct fib6_info *f6i, + struct flowi6 *fl6, int oif, + const struct sk_buff *skb, + int strict); + u32 (*ip6_mtu_from_fib6)(struct fib6_info *f6i, struct in6_addr *daddr, + struct in6_addr *saddr); + + int (*fib6_nh_init)(struct net *net, struct fib6_nh *fib6_nh, + struct fib6_config *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack); + void (*fib6_nh_release)(struct fib6_nh *fib6_nh); + void (*udpv6_encap_enable)(void); + void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr, + const struct in6_addr *solicited_addr, + bool router, bool solicited, bool override, bool inc_opt); + struct neigh_table *nd_tbl; +}; +extern const struct ipv6_stub *ipv6_stub __read_mostly; + +/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */ +struct ipv6_bpf_stub { + int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len, + bool force_bind_address_no_port, bool with_lock); + struct sock *(*udp6_lib_lookup)(struct net *net, + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, __be16 dport, + int dif, int sdif, struct udp_table *tbl, + struct sk_buff *skb); +}; +extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; + +#endif diff --git a/include/net/ndisc.h b/include/net/ndisc.h index ddfbb591e2c5..366150053043 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -2,6 +2,8 @@ #ifndef _NDISC_H #define _NDISC_H +#include <net/ipv6_stubs.h> + /* * ICMP codes for neighbour discovery messages */ @@ -379,6 +381,14 @@ static inline struct neighbour *__ipv6_neigh_lookup_noref(struct net_device *dev return ___neigh_lookup_noref(&nd_tbl, neigh_key_eq128, ndisc_hashfn, pkey, dev); } +static inline +struct neighbour *__ipv6_neigh_lookup_noref_stub(struct net_device *dev, + const void *pkey) +{ + return ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128, + ndisc_hashfn, pkey, dev); +} + static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, const void *pkey) { struct neighbour *n; @@ -409,6 +419,36 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev, rcu_read_unlock_bh(); } +static inline void __ipv6_confirm_neigh_stub(struct net_device *dev, + const void *pkey) +{ + struct neighbour *n; + + rcu_read_lock_bh(); + n = __ipv6_neigh_lookup_noref_stub(dev, pkey); + if (n) { + unsigned long now = jiffies; + + /* avoid dirtying neighbour */ + if (n->confirmed != now) + n->confirmed = now; + } + rcu_read_unlock_bh(); +} + +/* uses ipv6_stub and is meant for use outside of IPv6 core */ +static inline struct neighbour *ip_neigh_gw6(struct net_device *dev, + const void *addr) +{ + struct neighbour *neigh; + + neigh = __ipv6_neigh_lookup_noref_stub(dev, addr); + if (unlikely(!neigh)) + neigh = __neigh_create(ipv6_stub->nd_tbl, addr, dev, false); + + return neigh; +} + int ndisc_init(void); int ndisc_late_init(void); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 7c1ab9edba03..3e5438bd0101 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -498,11 +498,12 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb return dev_queue_xmit(skb); } -static inline int neigh_output(struct neighbour *n, struct sk_buff *skb) +static inline int neigh_output(struct neighbour *n, struct sk_buff *skb, + bool skip_cache) { const struct hh_cache *hh = &n->hh; - if ((n->nud_state & NUD_CONNECTED) && hh->hh_len) + if ((n->nud_state & NUD_CONNECTED) && hh->hh_len && !skip_cache) return neigh_hh_output(hh, skb); else return n->output(n, skb); diff --git a/include/net/netfilter/ipv4/nf_nat_masquerade.h b/include/net/netfilter/ipv4/nf_nat_masquerade.h deleted file mode 100644 index 13d55206bb9f..000000000000 --- a/include/net/netfilter/ipv4/nf_nat_masquerade.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NF_NAT_MASQUERADE_IPV4_H_ -#define _NF_NAT_MASQUERADE_IPV4_H_ - -#include <net/netfilter/nf_nat.h> - -unsigned int -nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, - const struct nf_nat_range2 *range, - const struct net_device *out); - -int nf_nat_masquerade_ipv4_register_notifier(void); -void nf_nat_masquerade_ipv4_unregister_notifier(void); - -#endif /*_NF_NAT_MASQUERADE_IPV4_H_ */ diff --git a/include/net/netfilter/ipv6/nf_nat_masquerade.h b/include/net/netfilter/ipv6/nf_nat_masquerade.h deleted file mode 100644 index 2917bf95c437..000000000000 --- a/include/net/netfilter/ipv6/nf_nat_masquerade.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NF_NAT_MASQUERADE_IPV6_H_ -#define _NF_NAT_MASQUERADE_IPV6_H_ - -unsigned int -nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, - const struct net_device *out); -int nf_nat_masquerade_ipv6_register_notifier(void); -void nf_nat_masquerade_ipv6_unregister_notifier(void); - -#endif /* _NF_NAT_MASQUERADE_IPV6_H_ */ diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 006e430d1cdf..93ce6b0daaba 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -48,7 +48,7 @@ struct nf_conntrack_expect { /* Expectation class */ unsigned int class; -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) union nf_inet_addr saved_addr; /* This is the original per-proto part, used to map the * expected connection the way the recipient expects. */ diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index 3394d75e1c80..00a8fbb2d735 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -88,6 +88,9 @@ static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct) int nf_conntrack_timeout_init(void); void nf_conntrack_timeout_fini(void); void nf_ct_untimeout(struct net *net, struct nf_ct_timeout *timeout); +int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, u8 l3num, u8 l4num, + const char *timeout_name); +void nf_ct_destroy_timeout(struct nf_conn *ct); #else static inline int nf_conntrack_timeout_init(void) { @@ -98,6 +101,18 @@ static inline void nf_conntrack_timeout_fini(void) { return; } + +static inline int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, + u8 l3num, u8 l4num, + const char *timeout_name) +{ + return -EOPNOTSUPP; +} + +static inline void nf_ct_destroy_timeout(struct nf_conn *ct) +{ + return; +} #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ #ifdef CONFIG_NF_CONNTRACK_TIMEOUT diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index cf332c4e0b32..423cda2c6542 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -69,9 +69,9 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum, #endif } -int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops, +int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, const struct nf_hook_ops *nat_ops, unsigned int ops_count); -void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops, +void nf_nat_unregister_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, unsigned int ops_count); unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, @@ -98,6 +98,9 @@ void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops); int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops); void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops); +int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops); +void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops); + unsigned int nf_nat_inet_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); diff --git a/include/net/netfilter/nf_nat_masquerade.h b/include/net/netfilter/nf_nat_masquerade.h new file mode 100644 index 000000000000..54a14d643c34 --- /dev/null +++ b/include/net/netfilter/nf_nat_masquerade.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_NAT_MASQUERADE_H_ +#define _NF_NAT_MASQUERADE_H_ + +#include <net/netfilter/nf_nat.h> + +unsigned int +nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, + const struct nf_nat_range2 *range, + const struct net_device *out); + +int nf_nat_masquerade_inet_register_notifiers(void); +void nf_nat_masquerade_inet_unregister_notifiers(void); + +unsigned int +nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, + const struct net_device *out); + +#endif /*_NF_NAT_MASQUERADE_H_ */ diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index a50a69f5334c..7239105d9d2e 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -119,4 +119,7 @@ nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family, return queue; } +int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, + const struct nf_hook_entries *entries, unsigned int index, + unsigned int verdict); #endif /* _NF_QUEUE_H */ diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 3e9ab643eedf..2d5a0a1a87b8 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -475,8 +475,6 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, enum nft_trans_phase phase); int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); -void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding, bool commit); void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set); /** @@ -1411,4 +1409,6 @@ struct nft_trans_flowtable { int __init nft_chain_filter_init(void); void nft_chain_filter_fini(void); +void __init nft_chain_route_init(void); +void nft_chain_route_fini(void); #endif /* _NET_NF_TABLES_H */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 104a6669e344..7698460a3dd1 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -9,6 +9,7 @@ #include <linux/uidgid.h> #include <net/inet_frag.h> #include <linux/rcupdate.h> +#include <linux/siphash.h> struct tcpm_hash_bucket; struct ctl_table_header; @@ -217,5 +218,6 @@ struct netns_ipv4 { unsigned int ipmr_seq; /* protected by rtnl_mutex */ atomic_t rt_genid; + siphash_key_t ip_id_key; }; #endif diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index b028a1dc150d..64e29b58bb5e 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -33,6 +33,8 @@ struct netns_sysctl_ipv6 { int auto_flowlabels; int icmpv6_time; int icmpv6_echo_ignore_all; + int icmpv6_echo_ignore_multicast; + int icmpv6_echo_ignore_anycast; int anycast_src_echo_reply; int ip_nonlocal_bind; int fwmark_reflect; diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 21a5243fecd1..9dfd7960d90a 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -106,10 +106,8 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, return req; } -static inline void reqsk_free(struct request_sock *req) +static inline void __reqsk_free(struct request_sock *req) { - WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); - req->rsk_ops->destructor(req); if (req->rsk_listener) sock_put(req->rsk_listener); @@ -117,6 +115,12 @@ static inline void reqsk_free(struct request_sock *req) kmem_cache_free(req->rsk_ops->slab, req); } +static inline void reqsk_free(struct request_sock *req) +{ + WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); + __reqsk_free(req); +} + static inline void reqsk_put(struct request_sock *req) { if (refcount_dec_and_test(&req->rsk_refcnt)) diff --git a/include/net/route.h b/include/net/route.h index 9883dc82f723..96f6c9ae33c2 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -29,6 +29,8 @@ #include <net/flow.h> #include <net/inet_sock.h> #include <net/ip_fib.h> +#include <net/arp.h> +#include <net/ndisc.h> #include <linux/in_route.h> #include <linux/rtnetlink.h> #include <linux/rcupdate.h> @@ -55,12 +57,15 @@ struct rtable { unsigned int rt_flags; __u16 rt_type; __u8 rt_is_input; - __u8 rt_uses_gateway; + u8 rt_gw_family; int rt_iif; /* Info on neighbour */ - __be32 rt_gateway; + union { + __be32 rt_gw4; + struct in6_addr rt_gw6; + }; /* Miscellaneous cached information */ u32 rt_mtu_locked:1, @@ -82,8 +87,8 @@ static inline bool rt_is_output_route(const struct rtable *rt) static inline __be32 rt_nexthop(const struct rtable *rt, __be32 daddr) { - if (rt->rt_gateway) - return rt->rt_gateway; + if (rt->rt_gw_family == AF_INET) + return rt->rt_gw4; return daddr; } @@ -347,4 +352,34 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) return hoplimit; } +static inline struct neighbour *ip_neigh_gw4(struct net_device *dev, + __be32 daddr) +{ + struct neighbour *neigh; + + neigh = __ipv4_neigh_lookup_noref(dev, daddr); + if (unlikely(!neigh)) + neigh = __neigh_create(&arp_tbl, &daddr, dev, false); + + return neigh; +} + +static inline struct neighbour *ip_neigh_for_gw(struct rtable *rt, + struct sk_buff *skb, + bool *is_v6gw) +{ + struct net_device *dev = rt->dst.dev; + struct neighbour *neigh; + + if (likely(rt->rt_gw_family == AF_INET)) { + neigh = ip_neigh_gw4(dev, rt->rt_gw4); + } else if (rt->rt_gw_family == AF_INET6) { + neigh = ip_neigh_gw6(dev, &rt->rt_gw6); + *is_v6gw = true; + } else { + neigh = ip_neigh_gw4(dev, ip_hdr(skb)->daddr); + } + return neigh; +} + #endif /* _ROUTE_H */ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index a2b38b3deeca..e8f85cd2afce 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -52,10 +52,7 @@ struct qdisc_size_table { struct qdisc_skb_head { struct sk_buff *head; struct sk_buff *tail; - union { - u32 qlen; - atomic_t atomic_qlen; - }; + __u32 qlen; spinlock_t lock; }; @@ -113,6 +110,9 @@ struct Qdisc { spinlock_t busylock ____cacheline_aligned_in_smp; spinlock_t seqlock; + + /* for NOLOCK qdisc, true if there are no enqueued skbs */ + bool empty; struct rcu_head rcu; }; @@ -143,11 +143,24 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc) return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; } +static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) +{ + return q->flags & TCQ_F_CPUSTATS; +} + +static inline bool qdisc_is_empty(const struct Qdisc *qdisc) +{ + if (qdisc_is_percpu_stats(qdisc)) + return qdisc->empty; + return !qdisc->q.qlen; +} + static inline bool qdisc_run_begin(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) { if (!spin_trylock(&qdisc->seqlock)) return false; + qdisc->empty = false; } else if (qdisc_is_running(qdisc)) { return false; } @@ -470,19 +483,27 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) BUILD_BUG_ON(sizeof(qcb->data) < sz); } +static inline int qdisc_qlen_cpu(const struct Qdisc *q) +{ + return this_cpu_ptr(q->cpu_qstats)->qlen; +} + static inline int qdisc_qlen(const struct Qdisc *q) { return q->q.qlen; } -static inline u32 qdisc_qlen_sum(const struct Qdisc *q) +static inline int qdisc_qlen_sum(const struct Qdisc *q) { - u32 qlen = q->qstats.qlen; + __u32 qlen = q->qstats.qlen; + int i; - if (q->flags & TCQ_F_NOLOCK) - qlen += atomic_read(&q->q.atomic_qlen); - else + if (qdisc_is_percpu_stats(q)) { + for_each_possible_cpu(i) + qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen; + } else { qlen += q->q.qlen; + } return qlen; } @@ -736,7 +757,7 @@ static inline bool qdisc_all_tx_empty(const struct net_device *dev) struct netdev_queue *txq = netdev_get_tx_queue(dev, i); const struct Qdisc *q = rcu_dereference(txq->qdisc); - if (q->q.qlen) { + if (!qdisc_is_empty(q)) { rcu_read_unlock(); return false; } @@ -806,11 +827,6 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, return sch->enqueue(skb, sch, to_free); } -static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) -{ - return q->flags & TCQ_F_CPUSTATS; -} - static inline void _bstats_update(struct gnet_stats_basic_packed *bstats, __u64 bytes, __u32 packets) { @@ -878,14 +894,14 @@ static inline void qdisc_qstats_cpu_backlog_inc(struct Qdisc *sch, this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb)); } -static inline void qdisc_qstats_atomic_qlen_inc(struct Qdisc *sch) +static inline void qdisc_qstats_cpu_qlen_inc(struct Qdisc *sch) { - atomic_inc(&sch->q.atomic_qlen); + this_cpu_inc(sch->cpu_qstats->qlen); } -static inline void qdisc_qstats_atomic_qlen_dec(struct Qdisc *sch) +static inline void qdisc_qstats_cpu_qlen_dec(struct Qdisc *sch) { - atomic_dec(&sch->q.atomic_qlen); + this_cpu_dec(sch->cpu_qstats->qlen); } static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch) @@ -1095,6 +1111,32 @@ static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch) return skb; } +static inline void qdisc_update_stats_at_dequeue(struct Qdisc *sch, + struct sk_buff *skb) +{ + if (qdisc_is_percpu_stats(sch)) { + qdisc_qstats_cpu_backlog_dec(sch, skb); + qdisc_bstats_cpu_update(sch, skb); + qdisc_qstats_cpu_qlen_dec(sch); + } else { + qdisc_qstats_backlog_dec(sch, skb); + qdisc_bstats_update(sch, skb); + sch->q.qlen--; + } +} + +static inline void qdisc_update_stats_at_enqueue(struct Qdisc *sch, + unsigned int pkt_len) +{ + if (qdisc_is_percpu_stats(sch)) { + qdisc_qstats_cpu_qlen_inc(sch); + this_cpu_add(sch->cpu_qstats->backlog, pkt_len); + } else { + sch->qstats.backlog += pkt_len; + sch->q.qlen++; + } +} + /* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) { @@ -1102,8 +1144,13 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) if (skb) { skb = __skb_dequeue(&sch->gso_skb); - qdisc_qstats_backlog_dec(sch, skb); - sch->q.qlen--; + if (qdisc_is_percpu_stats(sch)) { + qdisc_qstats_cpu_backlog_dec(sch, skb); + qdisc_qstats_cpu_qlen_dec(sch); + } else { + qdisc_qstats_backlog_dec(sch, skb); + sch->q.qlen--; + } } else { skb = sch->dequeue(sch); } diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 1d13ec3f2707..eefdfa5abf6e 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -421,7 +421,7 @@ static inline void sctp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) /* * This mimics the behavior of skb_set_owner_r */ - sk->sk_forward_alloc -= event->rmem_len; + sk_mem_charge(sk, event->rmem_len); } /* Tests if the list has one and only one entry. */ diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h index bb0ecba3db2b..f4ac7117ff29 100644 --- a/include/net/sctp/ulpqueue.h +++ b/include/net/sctp/ulpqueue.h @@ -59,7 +59,7 @@ void sctp_ulpq_free(struct sctp_ulpq *); int sctp_ulpq_tail_data(struct sctp_ulpq *, struct sctp_chunk *, gfp_t); /* Add a new event for propagation to the ULP. */ -int sctp_ulpq_tail_event(struct sctp_ulpq *, struct sctp_ulpevent *ev); +int sctp_ulpq_tail_event(struct sctp_ulpq *, struct sk_buff_head *skb_list); /* Renege previously received chunks. */ void sctp_ulpq_renege(struct sctp_ulpq *, struct sctp_chunk *, gfp_t); diff --git a/include/net/sock.h b/include/net/sock.h index 341f8bafa0cf..bdd77bbce7d8 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -368,6 +368,7 @@ struct sock { atomic_t sk_drops; int sk_rcvlowat; struct sk_buff_head sk_error_queue; + struct sk_buff *sk_rx_skb_cache; struct sk_buff_head sk_receive_queue; /* * The backlog queue is special, it is always used with @@ -414,6 +415,7 @@ struct sock { struct sk_buff *sk_send_head; struct rb_root tcp_rtx_queue; }; + struct sk_buff *sk_tx_skb_cache; struct sk_buff_head sk_write_queue; __s32 sk_peek_off; int sk_write_pending; @@ -966,7 +968,7 @@ static inline void sock_rps_record_flow_hash(__u32 hash) static inline void sock_rps_record_flow(const struct sock *sk) { #ifdef CONFIG_RPS - if (static_key_false(&rfs_needed)) { + if (static_branch_unlikely(&rfs_needed)) { /* Reading sk->sk_rxhash might incur an expensive cache line * miss. * @@ -1466,6 +1468,11 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) sock_set_flag(sk, SOCK_QUEUE_SHRUNK); sk->sk_wmem_queued -= skb->truesize; sk_mem_uncharge(sk, skb->truesize); + if (!sk->sk_tx_skb_cache) { + skb_zcopy_clear(skb, true); + sk->sk_tx_skb_cache = skb; + return; + } __kfree_skb(skb); } @@ -2427,6 +2434,15 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb) { __skb_unlink(skb, &sk->sk_receive_queue); + if ( +#ifdef CONFIG_RPS + !static_branch_unlikely(&rps_needed) && +#endif + !sk->sk_rx_skb_cache) { + sk->sk_rx_skb_cache = skb; + skb_orphan(skb); + return; + } __kfree_skb(skb); } diff --git a/include/net/tls.h b/include/net/tls.h index 5934246b2c6f..d9d0ac66f040 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -60,6 +60,17 @@ #define TLS_AAD_SPACE_SIZE 13 #define TLS_DEVICE_NAME_MAX 32 +#define MAX_IV_SIZE 16 + +/* For AES-CCM, the full 16-bytes of IV is made of '4' fields of given sizes. + * + * IV[16] = b0[1] || implicit nonce[4] || explicit nonce[8] || length[3] + * + * The field 'length' is encoded in field 'b0' as '(length width - 1)'. + * Hence b0 contains (3 - 1) = 2. + */ +#define TLS_AES_CCM_IV_B0_BYTE 2 + /* * This structure defines the routines for Inline TLS driver. * The following routines are optional and filled with a @@ -123,8 +134,7 @@ struct tls_rec { struct scatterlist sg_content_type; char aad_space[TLS_AAD_SPACE_SIZE]; - u8 iv_data[TLS_CIPHER_AES_GCM_128_IV_SIZE + - TLS_CIPHER_AES_GCM_128_SALT_SIZE]; + u8 iv_data[MAX_IV_SIZE]; struct aead_request aead_req; u8 aead_req_ctx[]; }; @@ -219,6 +229,7 @@ struct tls_prot_info { u16 tag_size; u16 overhead_size; u16 iv_size; + u16 salt_size; u16 rec_seq_size; u16 aad_size; u16 tail_size; diff --git a/include/net/udp.h b/include/net/udp.h index fd6d948755c8..d8ce937bc395 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -269,13 +269,13 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); void udp_skb_destructor(struct sock *sk, struct sk_buff *skb); struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, - int noblock, int *peeked, int *off, int *err); + int noblock, int *off, int *err); static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, int noblock, int *err) { - int peeked, off = 0; + int off = 0; - return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err); + return __skb_recv_udp(sk, flags, noblock, &off, err); } int udp_v4_early_demux(struct sk_buff *skb); diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index b8137953fea3..4b1f95e08307 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -7,7 +7,7 @@ #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> -#include <net/addrconf.h> +#include <net/ipv6_stubs.h> #endif struct udp_port_cfg { diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 00254a58824b..83b5999a2587 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -8,6 +8,8 @@ #include <net/rtnetlink.h> #include <net/switchdev.h> +#define IANA_VXLAN_UDP_PORT 4789 + /* VXLAN protocol (RFC 7348) header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |R|R|R|R|I|R|R|R| Reserved | |