From bd085ef678b2cc8c38c105673dfe8ff8f5ec0c57 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sun, 22 Dec 2019 10:51:09 +0800 Subject: net: add bool confirm_neigh parameter for dst_ops.update_pmtu The MTU update code is supposed to be invoked in response to real networking events that update the PMTU. In IPv6 PMTU update function __ip6_rt_update_pmtu() we called dst_confirm_neigh() to update neighbor confirmed time. But for tunnel code, it will call pmtu before xmit, like: - tnl_update_pmtu() - skb_dst_update_pmtu() - ip6_rt_update_pmtu() - __ip6_rt_update_pmtu() - dst_confirm_neigh() If the tunnel remote dst mac address changed and we still do the neigh confirm, we will not be able to update neigh cache and ping6 remote will failed. So for this ip_tunnel_xmit() case, _EVEN_ if the MTU is changed, we should not be invoking dst_confirm_neigh() as we have no evidence of successful two-way communication at this point. On the other hand it is also important to keep the neigh reachability fresh for TCP flows, so we cannot remove this dst_confirm_neigh() call. To fix the issue, we have to add a new bool parameter for dst_ops.update_pmtu to choose whether we should do neigh update or not. I will add the parameter in this patch and set all the callers to true to comply with the previous way, and fix the tunnel code one by one on later patches. v5: No change. v4: No change. v3: Do not remove dst_confirm_neigh, but add a new bool parameter in dst_ops.update_pmtu to control whether we should do neighbor confirm. Also split the big patch to small ones for each area. v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu. Suggested-by: David Miller Reviewed-by: Guillaume Nault Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- include/net/dst.h | 2 +- include/net/dst_ops.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 8224dad2ae94..593630e0e076 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -516,7 +516,7 @@ static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu) struct dst_entry *dst = skb_dst(skb); if (dst && dst->ops->update_pmtu) - dst->ops->update_pmtu(dst, NULL, skb, mtu); + dst->ops->update_pmtu(dst, NULL, skb, mtu, true); } static inline void skb_tunnel_check_pmtu(struct sk_buff *skb, diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 5ec645f27ee3..443863c7b8da 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -27,7 +27,8 @@ struct dst_ops { struct dst_entry * (*negative_advice)(struct dst_entry *); void (*link_failure)(struct sk_buff *); void (*update_pmtu)(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu); + struct sk_buff *skb, u32 mtu, + bool confirm_neigh); void (*redirect)(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); int (*local_out)(struct net *net, struct sock *sk, struct sk_buff *skb); -- cgit v1.2.3-59-g8ed1b From 07dc35c6e3cc3c001915d05f5bf21f80a39a0970 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sun, 22 Dec 2019 10:51:12 +0800 Subject: net/dst: add new function skb_dst_update_pmtu_no_confirm Add a new function skb_dst_update_pmtu_no_confirm() for callers who need update pmtu but should not do neighbor confirm. v5: No change. v4: No change. v3: Do not remove dst_confirm_neigh, but add a new bool parameter in dst_ops.update_pmtu to control whether we should do neighbor confirm. Also split the big patch to small ones for each area. v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu. Reviewed-by: Guillaume Nault Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- include/net/dst.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 593630e0e076..dc7cc1f1051c 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -519,6 +519,15 @@ static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu) dst->ops->update_pmtu(dst, NULL, skb, mtu, true); } +/* update dst pmtu but not do neighbor confirm */ +static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu) +{ + struct dst_entry *dst = skb_dst(skb); + + if (dst && dst->ops->update_pmtu) + dst->ops->update_pmtu(dst, NULL, skb, mtu, false); +} + static inline void skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst, int headroom) -- cgit v1.2.3-59-g8ed1b From f081042d128a0c7acbd67611def62e1b52e2d294 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sun, 22 Dec 2019 10:51:16 +0800 Subject: net/dst: do not confirm neighbor for vxlan and geneve pmtu update When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end, we should not call dst_confirm_neigh() as there is no two-way communication. So disable the neigh confirm for vxlan and geneve pmtu update. v5: No change. v4: No change. v3: Do not remove dst_confirm_neigh, but add a new bool parameter in dst_ops.update_pmtu to control whether we should do neighbor confirm. Also split the big patch to small ones for each area. v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu. Fixes: a93bf0ff4490 ("vxlan: update skb dst pmtu on tx path") Fixes: 52a589d51f10 ("geneve: update skb dst pmtu on tx path") Reviewed-by: Guillaume Nault Tested-by: Guillaume Nault Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- include/net/dst.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index dc7cc1f1051c..3448cf865ede 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -535,7 +535,7 @@ static inline void skb_tunnel_check_pmtu(struct sk_buff *skb, u32 encap_mtu = dst_mtu(encap_dst); if (skb->len > encap_mtu - headroom) - skb_dst_update_pmtu(skb, encap_mtu - headroom); + skb_dst_update_pmtu_no_confirm(skb, encap_mtu - headroom); } #endif /* _NET_DST_H */ -- cgit v1.2.3-59-g8ed1b From 7df2281a174bd0fdbb2211a26914e5440740fcde Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 23 Dec 2019 11:03:21 +0100 Subject: of: mdio: Add missing inline to of_mdiobus_child_is_phy() dummy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If CONFIG_OF_MDIO=n: drivers/net/phy/mdio_bus.c:23: include/linux/of_mdio.h:58:13: warning: ‘of_mdiobus_child_is_phy’ defined but not used [-Wunused-function] static bool of_mdiobus_child_is_phy(struct device_node *child) ^~~~~~~~~~~~~~~~~~~~~~~ Fix this by adding the missing "inline" keyword. Fixes: 0aa4d016c043d16a ("of: mdio: export of_mdiobus_child_is_phy") Signed-off-by: Geert Uytterhoeven Reviewed-by: Andrew Lunn Acked-by: Borislav Petkov Signed-off-by: David S. Miller --- include/linux/of_mdio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index 79bc82e30c02..491a2b7e77c1 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -55,7 +55,7 @@ static inline int of_mdio_parse_addr(struct device *dev, } #else /* CONFIG_OF_MDIO */ -static bool of_mdiobus_child_is_phy(struct device_node *child) +static inline bool of_mdiobus_child_is_phy(struct device_node *child) { return false; } -- cgit v1.2.3-59-g8ed1b From a33121e5487b424339636b25c35d3a180eaa5f5e Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Fri, 27 Dec 2019 03:26:27 +0100 Subject: ptp: fix the race between the release of ptp_clock and cdev In a case when a ptp chardev (like /dev/ptp0) is open but an underlying device is removed, closing this file leads to a race. This reproduces easily in a kvm virtual machine: ts# cat openptp0.c int main() { ... fp = fopen("/dev/ptp0", "r"); ... sleep(10); } ts# uname -r 5.5.0-rc3-46cf053e ts# cat /proc/cmdline ... slub_debug=FZP ts# modprobe ptp_kvm ts# ./openptp0 & [1] 670 opened /dev/ptp0, sleeping 10s... ts# rmmod ptp_kvm ts# ls /dev/ptp* ls: cannot access '/dev/ptp*': No such file or directory ts# ...woken up [ 48.010809] general protection fault: 0000 [#1] SMP [ 48.012502] CPU: 6 PID: 658 Comm: openptp0 Not tainted 5.5.0-rc3-46cf053e #25 [ 48.014624] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ... [ 48.016270] RIP: 0010:module_put.part.0+0x7/0x80 [ 48.017939] RSP: 0018:ffffb3850073be00 EFLAGS: 00010202 [ 48.018339] RAX: 000000006b6b6b6b RBX: 6b6b6b6b6b6b6b6b RCX: ffff89a476c00ad0 [ 48.018936] RDX: fffff65a08d3ea08 RSI: 0000000000000247 RDI: 6b6b6b6b6b6b6b6b [ 48.019470] ... ^^^ a slub poison [ 48.023854] Call Trace: [ 48.024050] __fput+0x21f/0x240 [ 48.024288] task_work_run+0x79/0x90 [ 48.024555] do_exit+0x2af/0xab0 [ 48.024799] ? vfs_write+0x16a/0x190 [ 48.025082] do_group_exit+0x35/0x90 [ 48.025387] __x64_sys_exit_group+0xf/0x10 [ 48.025737] do_syscall_64+0x3d/0x130 [ 48.026056] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 48.026479] RIP: 0033:0x7f53b12082f6 [ 48.026792] ... [ 48.030945] Modules linked in: ptp i6300esb watchdog [last unloaded: ptp_kvm] [ 48.045001] Fixing recursive fault but reboot is needed! This happens in: static void __fput(struct file *file) { ... if (file->f_op->release) file->f_op->release(inode, file); <<< cdev is kfree'd here if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && !(mode & FMODE_PATH))) { cdev_put(inode->i_cdev); <<< cdev fields are accessed here Namely: __fput() posix_clock_release() kref_put(&clk->kref, delete_clock) <<< the last reference delete_clock() delete_ptp_clock() kfree(ptp) <<< cdev is embedded in ptp cdev_put module_put(p->owner) <<< *p is kfree'd, bang! Here cdev is embedded in posix_clock which is embedded in ptp_clock. The race happens because ptp_clock's lifetime is controlled by two refcounts: kref and cdev.kobj in posix_clock. This is wrong. Make ptp_clock's sysfs device a parent of cdev with cdev_device_add() created especially for such cases. This way the parent device with its ptp_clock is not released until all references to the cdev are released. This adds a requirement that an initialized but not exposed struct device should be provided to posix_clock_register() by a caller instead of a simple dev_t. This approach was adopted from the commit 72139dfa2464 ("watchdog: Fix the race between the release of watchdog_core_data and cdev"). See details of the implementation in the commit 233ed09d7fda ("chardev: add helper function to register char devs with a struct device"). Link: https://lore.kernel.org/linux-fsdevel/20191125125342.6189-1-vdronov@redhat.com/T/#u Analyzed-by: Stephen Johnston Analyzed-by: Vern Lovejoy Signed-off-by: Vladis Dronov Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/ptp/ptp_clock.c | 31 ++++++++++++++----------------- drivers/ptp/ptp_private.h | 2 +- include/linux/posix-clock.h | 19 +++++++++++-------- kernel/time/posix-clock.c | 31 +++++++++++++------------------ 4 files changed, 39 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index e60eab7f8a61..61fafe0374ce 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -166,9 +166,9 @@ static struct posix_clock_operations ptp_clock_ops = { .read = ptp_read, }; -static void delete_ptp_clock(struct posix_clock *pc) +static void ptp_clock_release(struct device *dev) { - struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); + struct ptp_clock *ptp = container_of(dev, struct ptp_clock, dev); mutex_destroy(&ptp->tsevq_mux); mutex_destroy(&ptp->pincfg_mux); @@ -213,7 +213,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, } ptp->clock.ops = ptp_clock_ops; - ptp->clock.release = delete_ptp_clock; ptp->info = info; ptp->devid = MKDEV(major, index); ptp->index = index; @@ -236,15 +235,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, if (err) goto no_pin_groups; - /* Create a new device in our class. */ - ptp->dev = device_create_with_groups(ptp_class, parent, ptp->devid, - ptp, ptp->pin_attr_groups, - "ptp%d", ptp->index); - if (IS_ERR(ptp->dev)) { - err = PTR_ERR(ptp->dev); - goto no_device; - } - /* Register a new PPS source. */ if (info->pps) { struct pps_source_info pps; @@ -260,8 +250,18 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, } } - /* Create a posix clock. */ - err = posix_clock_register(&ptp->clock, ptp->devid); + /* Initialize a new device of our class in our clock structure. */ + device_initialize(&ptp->dev); + ptp->dev.devt = ptp->devid; + ptp->dev.class = ptp_class; + ptp->dev.parent = parent; + ptp->dev.groups = ptp->pin_attr_groups; + ptp->dev.release = ptp_clock_release; + dev_set_drvdata(&ptp->dev, ptp); + dev_set_name(&ptp->dev, "ptp%d", ptp->index); + + /* Create a posix clock and link it to the device. */ + err = posix_clock_register(&ptp->clock, &ptp->dev); if (err) { pr_err("failed to create posix clock\n"); goto no_clock; @@ -273,8 +273,6 @@ no_clock: if (ptp->pps_source) pps_unregister_source(ptp->pps_source); no_pps: - device_destroy(ptp_class, ptp->devid); -no_device: ptp_cleanup_pin_groups(ptp); no_pin_groups: if (ptp->kworker) @@ -304,7 +302,6 @@ int ptp_clock_unregister(struct ptp_clock *ptp) if (ptp->pps_source) pps_unregister_source(ptp->pps_source); - device_destroy(ptp_class, ptp->devid); ptp_cleanup_pin_groups(ptp); posix_clock_unregister(&ptp->clock); diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index 9171d42468fd..6b97155148f1 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -28,7 +28,7 @@ struct timestamp_event_queue { struct ptp_clock { struct posix_clock clock; - struct device *dev; + struct device dev; struct ptp_clock_info *info; dev_t devid; int index; /* index into clocks.map */ diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index fe6cfdcfbc26..468328b1e1dd 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -69,29 +69,32 @@ struct posix_clock_operations { * * @ops: Functional interface to the clock * @cdev: Character device instance for this clock - * @kref: Reference count. + * @dev: Pointer to the clock's device. * @rwsem: Protects the 'zombie' field from concurrent access. * @zombie: If 'zombie' is true, then the hardware has disappeared. - * @release: A function to free the structure when the reference count reaches - * zero. May be NULL if structure is statically allocated. * * Drivers should embed their struct posix_clock within a private * structure, obtaining a reference to it during callbacks using * container_of(). + * + * Drivers should supply an initialized but not exposed struct device + * to posix_clock_register(). It is used to manage lifetime of the + * driver's private structure. It's 'release' field should be set to + * a release function for this private structure. */ struct posix_clock { struct posix_clock_operations ops; struct cdev cdev; - struct kref kref; + struct device *dev; struct rw_semaphore rwsem; bool zombie; - void (*release)(struct posix_clock *clk); }; /** * posix_clock_register() - register a new clock - * @clk: Pointer to the clock. Caller must provide 'ops' and 'release' - * @devid: Allocated device id + * @clk: Pointer to the clock. Caller must provide 'ops' field + * @dev: Pointer to the initialized device. Caller must provide + * 'release' field * * A clock driver calls this function to register itself with the * clock device subsystem. If 'clk' points to dynamically allocated @@ -100,7 +103,7 @@ struct posix_clock { * * Returns zero on success, non-zero otherwise. */ -int posix_clock_register(struct posix_clock *clk, dev_t devid); +int posix_clock_register(struct posix_clock *clk, struct device *dev); /** * posix_clock_unregister() - unregister a clock diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index ec960bb939fd..200fb2d3be99 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -14,8 +14,6 @@ #include "posix-timers.h" -static void delete_clock(struct kref *kref); - /* * Returns NULL if the posix_clock instance attached to 'fp' is old and stale. */ @@ -125,7 +123,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp) err = 0; if (!err) { - kref_get(&clk->kref); + get_device(clk->dev); fp->private_data = clk; } out: @@ -141,7 +139,7 @@ static int posix_clock_release(struct inode *inode, struct file *fp) if (clk->ops.release) err = clk->ops.release(clk); - kref_put(&clk->kref, delete_clock); + put_device(clk->dev); fp->private_data = NULL; @@ -161,38 +159,35 @@ static const struct file_operations posix_clock_file_operations = { #endif }; -int posix_clock_register(struct posix_clock *clk, dev_t devid) +int posix_clock_register(struct posix_clock *clk, struct device *dev) { int err; - kref_init(&clk->kref); init_rwsem(&clk->rwsem); cdev_init(&clk->cdev, &posix_clock_file_operations); + err = cdev_device_add(&clk->cdev, dev); + if (err) { + pr_err("%s unable to add device %d:%d\n", + dev_name(dev), MAJOR(dev->devt), MINOR(dev->devt)); + return err; + } clk->cdev.owner = clk->ops.owner; - err = cdev_add(&clk->cdev, devid, 1); + clk->dev = dev; - return err; + return 0; } EXPORT_SYMBOL_GPL(posix_clock_register); -static void delete_clock(struct kref *kref) -{ - struct posix_clock *clk = container_of(kref, struct posix_clock, kref); - - if (clk->release) - clk->release(clk); -} - void posix_clock_unregister(struct posix_clock *clk) { - cdev_del(&clk->cdev); + cdev_device_del(&clk->cdev, clk->dev); down_write(&clk->rwsem); clk->zombie = true; up_write(&clk->rwsem); - kref_put(&clk->kref, delete_clock); + put_device(clk->dev); } EXPORT_SYMBOL_GPL(posix_clock_unregister); -- cgit v1.2.3-59-g8ed1b From a5b72a083da197b493c7ed1e5730d62d3199f7d6 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Sat, 28 Dec 2019 16:36:58 +0100 Subject: net/sched: add delete_empty() to filters and use it in cls_flower Revert "net/sched: cls_u32: fix refcount leak in the error path of u32_change()", and fix the u32 refcount leak in a more generic way that preserves the semantic of rule dumping. On tc filters that don't support lockless insertion/removal, there is no need to guard against concurrent insertion when a removal is in progress. Therefore, for most of them we can avoid a full walk() when deleting, and just decrease the refcount, like it was done on older Linux kernels. This fixes situations where walk() was wrongly detecting a non-empty filter, like it happened with cls_u32 in the error path of change(), thus leading to failures in the following tdc selftests: 6aa7: (filter, u32) Add/Replace u32 with source match and invalid indev 6658: (filter, u32) Add/Replace u32 with custom hash table and invalid handle 74c2: (filter, u32) Add/Replace u32 filter with invalid hash table id On cls_flower, and on (future) lockless filters, this check is necessary: move all the check_empty() logic in a callback so that each filter can have its own implementation. For cls_flower, it's sufficient to check if no IDRs have been allocated. This reverts commit 275c44aa194b7159d1191817b20e076f55f0e620. Changes since v1: - document the need for delete_empty() when TCF_PROTO_OPS_DOIT_UNLOCKED is used, thanks to Vlad Buslov - implement delete_empty() without doing fl_walk(), thanks to Vlad Buslov - squash revert and new fix in a single patch, to be nice with bisect tests that run tdc on u32 filter, thanks to Dave Miller Fixes: 275c44aa194b ("net/sched: cls_u32: fix refcount leak in the error path of u32_change()") Fixes: 6676d5e416ee ("net: sched: set dedicated tcf_walker flag when tp is empty") Suggested-by: Jamal Hadi Salim Suggested-by: Vlad Buslov Signed-off-by: Davide Caratti Reviewed-by: Vlad Buslov Tested-by: Jamal Hadi Salim Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/sch_generic.h | 5 +++++ net/sched/cls_api.c | 31 +++++-------------------------- net/sched/cls_flower.c | 12 ++++++++++++ net/sched/cls_u32.c | 25 ------------------------- 4 files changed, 22 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 144f264ea394..fceddf89592a 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -308,6 +308,7 @@ struct tcf_proto_ops { int (*delete)(struct tcf_proto *tp, void *arg, bool *last, bool rtnl_held, struct netlink_ext_ack *); + bool (*delete_empty)(struct tcf_proto *tp); void (*walk)(struct tcf_proto *tp, struct tcf_walker *arg, bool rtnl_held); int (*reoffload)(struct tcf_proto *tp, bool add, @@ -336,6 +337,10 @@ struct tcf_proto_ops { int flags; }; +/* Classifiers setting TCF_PROTO_OPS_DOIT_UNLOCKED in tcf_proto_ops->flags + * are expected to implement tcf_proto_ops->delete_empty(), otherwise race + * conditions can occur when filters are inserted/deleted simultaneously. + */ enum tcf_proto_ops_flags { TCF_PROTO_OPS_DOIT_UNLOCKED = 1, }; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 6a0eacafdb19..76e0d122616a 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -308,33 +308,12 @@ static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held, tcf_proto_destroy(tp, rtnl_held, true, extack); } -static int walker_check_empty(struct tcf_proto *tp, void *fh, - struct tcf_walker *arg) +static bool tcf_proto_check_delete(struct tcf_proto *tp) { - if (fh) { - arg->nonempty = true; - return -1; - } - return 0; -} - -static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held) -{ - struct tcf_walker walker = { .fn = walker_check_empty, }; - - if (tp->ops->walk) { - tp->ops->walk(tp, &walker, rtnl_held); - return !walker.nonempty; - } - return true; -} + if (tp->ops->delete_empty) + return tp->ops->delete_empty(tp); -static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held) -{ - spin_lock(&tp->lock); - if (tcf_proto_is_empty(tp, rtnl_held)) - tp->deleting = true; - spin_unlock(&tp->lock); + tp->deleting = true; return tp->deleting; } @@ -1751,7 +1730,7 @@ static void tcf_chain_tp_delete_empty(struct tcf_chain *chain, * concurrently. * Mark tp for deletion if it is empty. */ - if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) { + if (!tp_iter || !tcf_proto_check_delete(tp)) { mutex_unlock(&chain->filter_chain_lock); return; } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 0d125de54285..b0f42e62dd76 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2773,6 +2773,17 @@ static void fl_bind_class(void *fh, u32 classid, unsigned long cl) f->res.class = cl; } +static bool fl_delete_empty(struct tcf_proto *tp) +{ + struct cls_fl_head *head = fl_head_dereference(tp); + + spin_lock(&tp->lock); + tp->deleting = idr_is_empty(&head->handle_idr); + spin_unlock(&tp->lock); + + return tp->deleting; +} + static struct tcf_proto_ops cls_fl_ops __read_mostly = { .kind = "flower", .classify = fl_classify, @@ -2782,6 +2793,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .put = fl_put, .change = fl_change, .delete = fl_delete, + .delete_empty = fl_delete_empty, .walk = fl_walk, .reoffload = fl_reoffload, .hw_add = fl_hw_add, diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 66c6bcec16cb..a0e6fac613de 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -1108,33 +1108,10 @@ erridr: return err; } -static bool u32_hnode_empty(struct tc_u_hnode *ht, bool *non_root_ht) -{ - int i; - - if (!ht) - return true; - if (!ht->is_root) { - *non_root_ht = true; - return false; - } - if (*non_root_ht) - return false; - if (ht->refcnt < 2) - return true; - - for (i = 0; i <= ht->divisor; i++) { - if (rtnl_dereference(ht->ht[i])) - return false; - } - return true; -} - static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg, bool rtnl_held) { struct tc_u_common *tp_c = tp->data; - bool non_root_ht = false; struct tc_u_hnode *ht; struct tc_u_knode *n; unsigned int h; @@ -1147,8 +1124,6 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg, ht = rtnl_dereference(ht->next)) { if (ht->prio != tp->prio) continue; - if (u32_hnode_empty(ht, &non_root_ht)) - return; if (arg->count >= arg->skip) { if (arg->fn(tp, ht, arg) < 0) { arg->stop = 1; -- cgit v1.2.3-59-g8ed1b