From 19a1aad8886fd5b704b02870020cb6694f686991 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 28 Sep 2019 12:21:56 +0800 Subject: nfsd: remove set but not used variable 'len' Fixes gcc '-Wunused-but-set-variable' warning: fs/nfsd/nfs4xdr.c: In function nfsd4_encode_splice_read: fs/nfsd/nfs4xdr.c:3464:7: warning: variable len set but not used [-Wunused-but-set-variable] It is not used since commit 83a63072c815 ("nfsd: fix nfs read eof detection") Reported-by: Hulk Robot Signed-off-by: YueHaibing Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 533d0fc3c96b..1883370693f2 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3461,7 +3461,6 @@ static __be32 nfsd4_encode_splice_read( struct xdr_stream *xdr = &resp->xdr; struct xdr_buf *buf = xdr->buf; u32 eof; - long len; int space_left; __be32 nfserr; __be32 *p = xdr->p - 2; @@ -3470,7 +3469,6 @@ static __be32 nfsd4_encode_splice_read( if (xdr->end - xdr->p < 1) return nfserr_resource; - len = maxcount; nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp, file, read->rd_offset, &maxcount, &eof); read->rd_length = maxcount; -- cgit v1.2.3-59-g8ed1b From c4b77edb3f7f58a3241d318f946ed68708776f8b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 3 Oct 2019 12:33:08 -0400 Subject: nfsd: "\%s" should be "%s" Randy says: > sparse complains about these, as does gcc when used with --pedantic. > sparse says: > > ../fs/nfsd/nfs4state.c:2385:23: warning: unknown escape sequence: '\%' > ../fs/nfsd/nfs4state.c:2385:23: warning: unknown escape sequence: '\%' > ../fs/nfsd/nfs4state.c:2388:23: warning: unknown escape sequence: '\%' > ../fs/nfsd/nfs4state.c:2388:23: warning: unknown escape sequence: '\%' I'm not sure how this crept in. Fix it. Reported-by: Randy Dunlap Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c65aeaa812d4..befcafc43c74 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2382,10 +2382,10 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st) access = bmap_to_share_mode(ols->st_access_bmap); deny = bmap_to_share_mode(ols->st_deny_bmap); - seq_printf(s, "access: \%s\%s, ", + seq_printf(s, "access: %s%s, ", access & NFS4_SHARE_ACCESS_READ ? "r" : "-", access & NFS4_SHARE_ACCESS_WRITE ? "w" : "-"); - seq_printf(s, "deny: \%s\%s, ", + seq_printf(s, "deny: %s%s, ", deny & NFS4_SHARE_ACCESS_READ ? "r" : "-", deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-"); -- cgit v1.2.3-59-g8ed1b From 832b2cb955437dcfe9b8f08e5f37303c9097fc87 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 Oct 2019 09:58:20 -0400 Subject: svcrdma: Improve DMA mapping trace points Capture the total size of Sends, the size of DMA map and the matching DMA unmap to ensure operation is correct. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/trace/events/rpcrdma.h | 30 +++++++++++++++++++++++------- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 8 ++++++-- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index a13830616107..9dd76806a5c9 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1498,31 +1498,47 @@ DEFINE_ERROR_EVENT(chunk); ** Server-side RDMA API events **/ -TRACE_EVENT(svcrdma_dma_map_page, +DECLARE_EVENT_CLASS(svcrdma_dma_map_class, TP_PROTO( const struct svcxprt_rdma *rdma, - const void *page + u64 dma_addr, + u32 length ), - TP_ARGS(rdma, page), + TP_ARGS(rdma, dma_addr, length), TP_STRUCT__entry( - __field(const void *, page); + __field(u64, dma_addr) + __field(u32, length) __string(device, rdma->sc_cm_id->device->name) __string(addr, rdma->sc_xprt.xpt_remotebuf) ), TP_fast_assign( - __entry->page = page; + __entry->dma_addr = dma_addr; + __entry->length = length; __assign_str(device, rdma->sc_cm_id->device->name); __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); ), - TP_printk("addr=%s device=%s page=%p", - __get_str(addr), __get_str(device), __entry->page + TP_printk("addr=%s device=%s dma_addr=%llu length=%u", + __get_str(addr), __get_str(device), + __entry->dma_addr, __entry->length ) ); +#define DEFINE_SVC_DMA_EVENT(name) \ + DEFINE_EVENT(svcrdma_dma_map_class, svcrdma_##name, \ + TP_PROTO( \ + const struct svcxprt_rdma *rdma,\ + u64 dma_addr, \ + u32 length \ + ), \ + TP_ARGS(rdma, dma_addr, length)) + +DEFINE_SVC_DMA_EVENT(dma_map_page); +DEFINE_SVC_DMA_EVENT(dma_unmap_page); + TRACE_EVENT(svcrdma_dma_map_rwctx, TP_PROTO( const struct svcxprt_rdma *rdma, diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 6fdba72f89f4..f3f108090aa4 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -233,11 +233,15 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, /* The first SGE contains the transport header, which * remains mapped until @ctxt is destroyed. */ - for (i = 1; i < ctxt->sc_send_wr.num_sge; i++) + for (i = 1; i < ctxt->sc_send_wr.num_sge; i++) { ib_dma_unmap_page(device, ctxt->sc_sges[i].addr, ctxt->sc_sges[i].length, DMA_TO_DEVICE); + trace_svcrdma_dma_unmap_page(rdma, + ctxt->sc_sges[i].addr, + ctxt->sc_sges[i].length); + } for (i = 0; i < ctxt->sc_page_count; ++i) put_page(ctxt->sc_pages[i]); @@ -490,6 +494,7 @@ static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma, dma_addr_t dma_addr; dma_addr = ib_dma_map_page(dev, page, offset, len, DMA_TO_DEVICE); + trace_svcrdma_dma_map_page(rdma, dma_addr, len); if (ib_dma_mapping_error(dev, dma_addr)) goto out_maperr; @@ -499,7 +504,6 @@ static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma, return 0; out_maperr: - trace_svcrdma_dma_map_page(rdma, page); return -EIO; } -- cgit v1.2.3-59-g8ed1b From 6e73e92b155c868ff7fce9d108839668caf1d9be Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Wed, 9 Oct 2019 15:11:37 -0400 Subject: nfsd4: fix up replay_matches_cache() When running an nfs stress test, I see quite a few cached replies that don't match up with the actual request. The first comment in replay_matches_cache() makes sense, but the code doesn't seem to match... fix it. This isn't exactly a bugfix, as the server isn't required to catch every case of a false retry. So, we may as well do this, but if this is fixing a problem then that suggests there's a client bug. Fixes: 53da6a53e1d4 ("nfsd4: catch some false session retries") Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index befcafc43c74..369e574c5092 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3548,12 +3548,17 @@ static bool replay_matches_cache(struct svc_rqst *rqstp, (bool)seq->cachethis) return false; /* - * If there's an error than the reply can have fewer ops than - * the call. But if we cached a reply with *more* ops than the - * call you're sending us now, then this new call is clearly not - * really a replay of the old one: + * If there's an error then the reply can have fewer ops than + * the call. */ - if (slot->sl_opcnt < argp->opcnt) + if (slot->sl_opcnt < argp->opcnt && !slot->sl_status) + return false; + /* + * But if we cached a reply with *more* ops than the call you're + * sending us now, then this new call is clearly not really a + * replay of the old one: + */ + if (slot->sl_opcnt > argp->opcnt) return false; /* This is the only check explicitly called by spec: */ if (!same_creds(&rqstp->rq_cred, &slot->sl_cred)) -- cgit v1.2.3-59-g8ed1b From 12b4157b7d3b666b1296b5cd4f1b675f102e2126 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 11 Oct 2019 19:02:58 +0300 Subject: nfsd: remove private bin2hex implementation Calling sprintf in a loop is not very efficient, and in any case, we already have an implementation of bin-to-hex conversion in lib/ which we might as well use. Note that original code used to nul-terminate the destination while bin2hex doesn't. That's why replace kmalloc() with kzalloc(). Signed-off-by: Andy Shevchenko Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4recover.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index cdc75ad4438b..29dff4c6e752 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -1850,19 +1850,14 @@ nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *env0, char *env1) static char * bin_to_hex_dup(const unsigned char *src, int srclen) { - int i; - char *buf, *hex; + char *buf; /* +1 for terminating NULL */ - buf = kmalloc((srclen * 2) + 1, GFP_KERNEL); + buf = kzalloc((srclen * 2) + 1, GFP_KERNEL); if (!buf) return buf; - hex = buf; - for (i = 0; i < srclen; i++) { - sprintf(hex, "%2.2x", *src++); - hex += 2; - } + bin2hex(buf, src, srclen); return buf; } -- cgit v1.2.3-59-g8ed1b From 5fcaf6982d1167f1cd9b264704f6d1ef4c505d54 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Tue, 1 Oct 2019 11:03:59 +0300 Subject: sunrpc: fix crash when cache_head become valid before update I was investigating a crash in our Virtuozzo7 kernel which happened in in svcauth_unix_set_client. I found out that we access m_client field in ip_map structure, which was received from sunrpc_cache_lookup (we have a bit older kernel, now the code is in sunrpc_cache_add_entry), and these field looks uninitialized (m_client == 0x74 don't look like a pointer) but in the cache_head in flags we see 0x1 which is CACHE_VALID. It looks like the problem appeared from our previous fix to sunrpc (1): commit 4ecd55ea0742 ("sunrpc: fix cache_head leak due to queued request") And we've also found a patch already fixing our patch (2): commit d58431eacb22 ("sunrpc: don't mark uninitialised items as VALID.") Though the crash is eliminated, I think the core of the problem is not completely fixed: Neil in the patch (2) makes cache_head CACHE_NEGATIVE, before cache_fresh_locked which was added in (1) to fix crash. These way cache_is_valid won't say the cache is valid anymore and in svcauth_unix_set_client the function cache_check will return error instead of 0, and we don't count entry as initialized. But it looks like we need to remove cache_fresh_locked completely in sunrpc_cache_lookup: In (1) we've only wanted to make cache_fresh_unlocked->cache_dequeue so that cache_requests with no readers also release corresponding cache_head, to fix their leak. We with Vasily were not sure if cache_fresh_locked and cache_fresh_unlocked should be used in pair or not, so we've guessed to use them in pair. Now we see that we don't want the CACHE_VALID bit set here by cache_fresh_locked, as "valid" means "initialized" and there is no initialization in sunrpc_cache_add_entry. Both expiry_time and last_refresh are not used in cache_fresh_unlocked code-path and also not required for the initial fix. So to conclude cache_fresh_locked was called by mistake, and we can just safely remove it instead of crutching it with CACHE_NEGATIVE. It looks ideologically better for me. Hope I don't miss something here. Here is our crash backtrace: [13108726.326291] BUG: unable to handle kernel NULL pointer dereference at 0000000000000074 [13108726.326365] IP: [] svcauth_unix_set_client+0x2ab/0x520 [sunrpc] [13108726.326448] PGD 0 [13108726.326468] Oops: 0002 [#1] SMP [13108726.326497] Modules linked in: nbd isofs xfs loop kpatch_cumulative_81_0_r1(O) xt_physdev nfnetlink_queue bluetooth rfkill ip6table_nat nf_nat_ipv6 ip_vs_wrr ip_vs_wlc ip_vs_sh nf_conntrack_netlink ip_vs_sed ip_vs_pe_sip nf_conntrack_sip ip_vs_nq ip_vs_lc ip_vs_lblcr ip_vs_lblc ip_vs_ftp ip_vs_dh nf_nat_ftp nf_conntrack_ftp iptable_raw xt_recent nf_log_ipv6 xt_hl ip6t_rt nf_log_ipv4 nf_log_common xt_LOG xt_limit xt_TCPMSS xt_tcpmss vxlan ip6_udp_tunnel udp_tunnel xt_statistic xt_NFLOG nfnetlink_log dummy xt_mark xt_REDIRECT nf_nat_redirect raw_diag udp_diag tcp_diag inet_diag netlink_diag af_packet_diag unix_diag rpcsec_gss_krb5 xt_addrtype ip6t_rpfilter ipt_REJECT nf_reject_ipv4 ip6t_REJECT nf_reject_ipv6 ebtable_nat ebtable_broute nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_mangle ip6table_raw nfsv4 [13108726.327173] dns_resolver cls_u32 binfmt_misc arptable_filter arp_tables ip6table_filter ip6_tables devlink fuse_kio_pcs ipt_MASQUERADE nf_nat_masquerade_ipv4 xt_nat iptable_nat nf_nat_ipv4 xt_comment nf_conntrack_ipv4 nf_defrag_ipv4 xt_wdog_tmo xt_multiport bonding xt_set xt_conntrack iptable_filter iptable_mangle kpatch(O) ebtable_filter ebt_among ebtables ip_set_hash_ip ip_set nfnetlink vfat fat skx_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass fuse pcspkr ses enclosure joydev sg mei_me hpwdt hpilo lpc_ich mei ipmi_si shpchp ipmi_devintf ipmi_msghandler xt_ipvs acpi_power_meter ip_vs_rr nfsv3 nfsd auth_rpcgss nfs_acl nfs lockd grace fscache nf_nat cls_fw sch_htb sch_cbq sch_sfq ip_vs em_u32 nf_conntrack tun br_netfilter veth overlay ip6_vzprivnet ip6_vznetstat ip_vznetstat [13108726.327817] ip_vzprivnet vziolimit vzevent vzlist vzstat vznetstat vznetdev vzmon vzdev bridge pio_kaio pio_nfs pio_direct pfmt_raw pfmt_ploop1 ploop ip_tables ext4 mbcache jbd2 sd_mod crc_t10dif crct10dif_generic mgag200 i2c_algo_bit drm_kms_helper scsi_transport_iscsi 8021q syscopyarea sysfillrect garp sysimgblt fb_sys_fops mrp stp ttm llc bnx2x crct10dif_pclmul crct10dif_common crc32_pclmul crc32c_intel drm dm_multipath ghash_clmulni_intel uas aesni_intel lrw gf128mul glue_helper ablk_helper cryptd tg3 smartpqi scsi_transport_sas mdio libcrc32c i2c_core usb_storage ptp pps_core wmi sunrpc dm_mirror dm_region_hash dm_log dm_mod [last unloaded: kpatch_cumulative_82_0_r1] [13108726.328403] CPU: 35 PID: 63742 Comm: nfsd ve: 51332 Kdump: loaded Tainted: G W O ------------ 3.10.0-862.20.2.vz7.73.29 #1 73.29 [13108726.328491] Hardware name: HPE ProLiant DL360 Gen10/ProLiant DL360 Gen10, BIOS U32 10/02/2018 [13108726.328554] task: ffffa0a6a41b1160 ti: ffffa0c2a74bc000 task.ti: ffffa0c2a74bc000 [13108726.328610] RIP: 0010:[] [] svcauth_unix_set_client+0x2ab/0x520 [sunrpc] [13108726.328706] RSP: 0018:ffffa0c2a74bfd80 EFLAGS: 00010246 [13108726.328750] RAX: 0000000000000001 RBX: ffffa0a6183ae000 RCX: 0000000000000000 [13108726.328811] RDX: 0000000000000074 RSI: 0000000000000286 RDI: ffffa0c2a74bfcf0 [13108726.328864] RBP: ffffa0c2a74bfe00 R08: ffffa0bab8c22960 R09: 0000000000000001 [13108726.328916] R10: 0000000000000001 R11: 0000000000000001 R12: ffffa0a32aa7f000 [13108726.328969] R13: ffffa0a6183afac0 R14: ffffa0c233d88d00 R15: ffffa0c2a74bfdb4 [13108726.329022] FS: 0000000000000000(0000) GS:ffffa0e17f9c0000(0000) knlGS:0000000000000000 [13108726.329081] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [13108726.332311] CR2: 0000000000000074 CR3: 00000026a1b28000 CR4: 00000000007607e0 [13108726.334606] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [13108726.336754] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [13108726.338908] PKRU: 00000000 [13108726.341047] Call Trace: [13108726.343074] [] ? groups_alloc+0x34/0x110 [13108726.344837] [] svc_set_client+0x24/0x30 [sunrpc] [13108726.346631] [] svc_process_common+0x241/0x710 [sunrpc] [13108726.348332] [] svc_process+0x103/0x190 [sunrpc] [13108726.350016] [] nfsd+0xdf/0x150 [nfsd] [13108726.351735] [] ? nfsd_destroy+0x80/0x80 [nfsd] [13108726.353459] [] kthread+0xd1/0xe0 [13108726.355195] [] ? create_kthread+0x60/0x60 [13108726.356896] [] ret_from_fork_nospec_begin+0x7/0x21 [13108726.358577] [] ? create_kthread+0x60/0x60 [13108726.360240] Code: 4c 8b 45 98 0f 8e 2e 01 00 00 83 f8 fe 0f 84 76 fe ff ff 85 c0 0f 85 2b 01 00 00 49 8b 50 40 b8 01 00 00 00 48 89 93 d0 1a 00 00 0f c1 02 83 c0 01 83 f8 01 0f 8e 53 02 00 00 49 8b 44 24 38 [13108726.363769] RIP [] svcauth_unix_set_client+0x2ab/0x520 [sunrpc] [13108726.365530] RSP [13108726.367179] CR2: 0000000000000074 Fixes: d58431eacb22 ("sunrpc: don't mark uninitialised items as VALID.") Signed-off-by: Pavel Tikhomirov Acked-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index a349094f6fb7..f740cb51802a 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -53,9 +53,6 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail) h->last_refresh = now; } -static inline int cache_is_valid(struct cache_head *h); -static void cache_fresh_locked(struct cache_head *head, time_t expiry, - struct cache_detail *detail); static void cache_fresh_unlocked(struct cache_head *head, struct cache_detail *detail); @@ -105,9 +102,6 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, if (cache_is_expired(detail, tmp)) { hlist_del_init_rcu(&tmp->cache_list); detail->entries --; - if (cache_is_valid(tmp) == -EAGAIN) - set_bit(CACHE_NEGATIVE, &tmp->flags); - cache_fresh_locked(tmp, 0, detail); freeme = tmp; break; } -- cgit v1.2.3-59-g8ed1b From ff27e9f748303e8567bfceb6d7ff264cbcaca2ef Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 24 Oct 2019 09:34:10 -0400 Subject: SUNRPC: Trace gssproxy upcall results Record results of a GSS proxy ACCEPT_SEC_CONTEXT upcall and the svc_authenticate() function to make field debugging of NFS server Kerberos issues easier. Signed-off-by: Chuck Lever Reviewed-by: Bill Baker Signed-off-by: J. Bruce Fields --- include/trace/events/rpcgss.h | 45 ++++++++++++++++++++++++++++ include/trace/events/sunrpc.h | 55 +++++++++++++++++++++++++++++++++++ net/sunrpc/auth_gss/gss_mech_switch.c | 4 ++- net/sunrpc/auth_gss/svcauth_gss.c | 8 +++-- net/sunrpc/svc.c | 2 ++ net/sunrpc/svcauth.c | 2 ++ 6 files changed, 112 insertions(+), 4 deletions(-) diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h index d1f7fe1b6fe4..9827f535f032 100644 --- a/include/trace/events/rpcgss.h +++ b/include/trace/events/rpcgss.h @@ -126,6 +126,34 @@ DEFINE_GSSAPI_EVENT(verify_mic); DEFINE_GSSAPI_EVENT(wrap); DEFINE_GSSAPI_EVENT(unwrap); +TRACE_EVENT(rpcgss_accept_upcall, + TP_PROTO( + __be32 xid, + u32 major_status, + u32 minor_status + ), + + TP_ARGS(xid, major_status, minor_status), + + TP_STRUCT__entry( + __field(u32, xid) + __field(u32, minor_status) + __field(unsigned long, major_status) + ), + + TP_fast_assign( + __entry->xid = be32_to_cpu(xid); + __entry->minor_status = minor_status; + __entry->major_status = major_status; + ), + + TP_printk("xid=0x%08x major_status=%s (0x%08lx) minor_status=%u", + __entry->xid, __entry->major_status == 0 ? "GSS_S_COMPLETE" : + show_gss_status(__entry->major_status), + __entry->major_status, __entry->minor_status + ) +); + /** ** GSS auth unwrap failures @@ -355,6 +383,23 @@ TRACE_EVENT(rpcgss_createauth, show_pseudoflavor(__entry->flavor), __entry->error) ); +TRACE_EVENT(rpcgss_oid_to_mech, + TP_PROTO( + const char *oid + ), + + TP_ARGS(oid), + + TP_STRUCT__entry( + __string(oid, oid) + ), + + TP_fast_assign( + __assign_str(oid, oid); + ), + + TP_printk("mech for oid %s was not found", __get_str(oid)) +); #endif /* _TRACE_RPCGSS_H */ diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ffa3c51dbb1a..c358a0af683b 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -14,6 +14,26 @@ #include #include +TRACE_DEFINE_ENUM(RPC_AUTH_OK); +TRACE_DEFINE_ENUM(RPC_AUTH_BADCRED); +TRACE_DEFINE_ENUM(RPC_AUTH_REJECTEDCRED); +TRACE_DEFINE_ENUM(RPC_AUTH_BADVERF); +TRACE_DEFINE_ENUM(RPC_AUTH_REJECTEDVERF); +TRACE_DEFINE_ENUM(RPC_AUTH_TOOWEAK); +TRACE_DEFINE_ENUM(RPCSEC_GSS_CREDPROBLEM); +TRACE_DEFINE_ENUM(RPCSEC_GSS_CTXPROBLEM); + +#define rpc_show_auth_stat(status) \ + __print_symbolic(status, \ + { RPC_AUTH_OK, "AUTH_OK" }, \ + { RPC_AUTH_BADCRED, "BADCRED" }, \ + { RPC_AUTH_REJECTEDCRED, "REJECTEDCRED" }, \ + { RPC_AUTH_BADVERF, "BADVERF" }, \ + { RPC_AUTH_REJECTEDVERF, "REJECTEDVERF" }, \ + { RPC_AUTH_TOOWEAK, "TOOWEAK" }, \ + { RPCSEC_GSS_CREDPROBLEM, "GSS_CREDPROBLEM" }, \ + { RPCSEC_GSS_CTXPROBLEM, "GSS_CTXPROBLEM" }) \ + DECLARE_EVENT_CLASS(rpc_task_status, TP_PROTO(const struct rpc_task *task), @@ -866,6 +886,41 @@ TRACE_EVENT(svc_recv, show_rqstp_flags(__entry->flags)) ); +#define svc_show_status(status) \ + __print_symbolic(status, \ + { SVC_GARBAGE, "SVC_GARBAGE" }, \ + { SVC_SYSERR, "SVC_SYSERR" }, \ + { SVC_VALID, "SVC_VALID" }, \ + { SVC_NEGATIVE, "SVC_NEGATIVE" }, \ + { SVC_OK, "SVC_OK" }, \ + { SVC_DROP, "SVC_DROP" }, \ + { SVC_CLOSE, "SVC_CLOSE" }, \ + { SVC_DENIED, "SVC_DENIED" }, \ + { SVC_PENDING, "SVC_PENDING" }, \ + { SVC_COMPLETE, "SVC_COMPLETE" }) + +TRACE_EVENT(svc_authenticate, + TP_PROTO(const struct svc_rqst *rqst, int auth_res, __be32 auth_stat), + + TP_ARGS(rqst, auth_res, auth_stat), + + TP_STRUCT__entry( + __field(u32, xid) + __field(unsigned long, svc_status) + __field(unsigned long, auth_stat) + ), + + TP_fast_assign( + __entry->xid = be32_to_cpu(rqst->rq_xid); + __entry->svc_status = auth_res; + __entry->auth_stat = be32_to_cpu(auth_stat); + ), + + TP_printk("xid=0x%08x auth_res=%s auth_stat=%s", + __entry->xid, svc_show_status(__entry->svc_status), + rpc_show_auth_stat(__entry->auth_stat)) +); + TRACE_EVENT(svc_process, TP_PROTO(const struct svc_rqst *rqst, const char *name), diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 82060099a429..30b7de6f3d76 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -20,6 +20,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH @@ -158,7 +159,6 @@ struct gss_api_mech *gss_mech_get_by_OID(struct rpcsec_gss_oid *obj) if (sprint_oid(obj->data, obj->len, buf, sizeof(buf)) < 0) return NULL; - dprintk("RPC: %s(%s)\n", __func__, buf); request_module("rpc-auth-gss-%s", buf); rcu_read_lock(); @@ -172,6 +172,8 @@ struct gss_api_mech *gss_mech_get_by_OID(struct rpcsec_gss_oid *obj) } } rcu_read_unlock(); + if (!gm) + trace_rpcgss_oid_to_mech(buf); return gm; } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 8be2f209982b..f1309905aed3 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -49,6 +49,9 @@ #include #include #include + +#include + #include "gss_rpc_upcall.h" @@ -1270,9 +1273,8 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp, if (status) goto out; - dprintk("RPC: svcauth_gss: gss major status = %d " - "minor status = %d\n", - ud.major_status, ud.minor_status); + trace_rpcgss_accept_upcall(rqstp->rq_xid, ud.major_status, + ud.minor_status); switch (ud.major_status) { case GSS_S_CONTINUE_NEEDED: diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index d11b70552c33..187dd4e73d64 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1337,6 +1337,8 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) auth_stat = rpc_autherr_badcred; auth_res = progp->pg_authenticate(rqstp); } + if (auth_res != SVC_OK) + trace_svc_authenticate(rqstp, auth_res, auth_stat); switch (auth_res) { case SVC_OK: break; diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 550b214cb001..552617e3467b 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -19,6 +19,8 @@ #include #include +#include + #define RPCDBG_FACILITY RPCDBG_AUTH -- cgit v1.2.3-59-g8ed1b From 5866efa8cbfbadf3905072798e96652faf02dbe8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 24 Oct 2019 09:34:16 -0400 Subject: SUNRPC: Fix svcauth_gss_proxy_init() gss_read_proxy_verf() assumes things about the XDR buffer containing the RPC Call that are not true for buffers generated by svc_rdma_recv(). RDMA's buffers look more like what the upper layer generates for sending: head is a kmalloc'd buffer; it does not point to a page whose contents are contiguous with the first page in the buffers' page array. The result is that ACCEPT_SEC_CONTEXT via RPC/RDMA has stopped working on Linux NFS servers that use gssproxy. This does not affect clients that use only TCP to send their ACCEPT_SEC_CONTEXT operation (that's all Linux clients). Other clients, like Solaris NFS clients, send ACCEPT_SEC_CONTEXT on the same transport as they send all other NFS operations. Such clients can send ACCEPT_SEC_CONTEXT via RPC/RDMA. I thought I had found every direct reference in the server RPC code to the rqstp->rq_pages field. Bug found at the 2019 Westford NFS bake-a-thon. Fixes: 3316f0631139 ("svcrdma: Persistently allocate and DMA- ... ") Signed-off-by: Chuck Lever Tested-by: Bill Baker Reviewed-by: Simo Sorce Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/svcauth_gss.c | 84 +++++++++++++++++++++++++++++---------- 1 file changed, 63 insertions(+), 21 deletions(-) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index f1309905aed3..c62d1f10978b 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1078,24 +1078,32 @@ gss_read_verf(struct rpc_gss_wire_cred *gc, return 0; } -/* Ok this is really heavily depending on a set of semantics in - * how rqstp is set up by svc_recv and pages laid down by the - * server when reading a request. We are basically guaranteed that - * the token lays all down linearly across a set of pages, starting - * at iov_base in rq_arg.head[0] which happens to be the first of a - * set of pages stored in rq_pages[]. - * rq_arg.head[0].iov_base will provide us the page_base to pass - * to the upcall. - */ -static inline int -gss_read_proxy_verf(struct svc_rqst *rqstp, - struct rpc_gss_wire_cred *gc, __be32 *authp, - struct xdr_netobj *in_handle, - struct gssp_in_token *in_token) +static void gss_free_in_token_pages(struct gssp_in_token *in_token) { - struct kvec *argv = &rqstp->rq_arg.head[0]; u32 inlen; - int res; + int i; + + i = 0; + inlen = in_token->page_len; + while (inlen) { + if (in_token->pages[i]) + put_page(in_token->pages[i]); + inlen -= inlen > PAGE_SIZE ? PAGE_SIZE : inlen; + } + + kfree(in_token->pages); + in_token->pages = NULL; +} + +static int gss_read_proxy_verf(struct svc_rqst *rqstp, + struct rpc_gss_wire_cred *gc, __be32 *authp, + struct xdr_netobj *in_handle, + struct gssp_in_token *in_token) +{ + struct kvec *argv = &rqstp->rq_arg.head[0]; + unsigned int page_base, length; + int pages, i, res; + size_t inlen; res = gss_read_common_verf(gc, argv, authp, in_handle); if (res) @@ -1105,10 +1113,36 @@ gss_read_proxy_verf(struct svc_rqst *rqstp, if (inlen > (argv->iov_len + rqstp->rq_arg.page_len)) return SVC_DENIED; - in_token->pages = rqstp->rq_pages; - in_token->page_base = (ulong)argv->iov_base & ~PAGE_MASK; + pages = DIV_ROUND_UP(inlen, PAGE_SIZE); + in_token->pages = kcalloc(pages, sizeof(struct page *), GFP_KERNEL); + if (!in_token->pages) + return SVC_DENIED; + in_token->page_base = 0; in_token->page_len = inlen; + for (i = 0; i < pages; i++) { + in_token->pages[i] = alloc_page(GFP_KERNEL); + if (!in_token->pages[i]) { + gss_free_in_token_pages(in_token); + return SVC_DENIED; + } + } + length = min_t(unsigned int, inlen, argv->iov_len); + memcpy(page_address(in_token->pages[0]), argv->iov_base, length); + inlen -= length; + + i = 1; + page_base = rqstp->rq_arg.page_base; + while (inlen) { + length = min_t(unsigned int, inlen, PAGE_SIZE); + memcpy(page_address(in_token->pages[i]), + page_address(rqstp->rq_arg.pages[i]) + page_base, + length); + + inlen -= length; + page_base = 0; + i++; + } return 0; } @@ -1282,8 +1316,11 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp, break; case GSS_S_COMPLETE: status = gss_proxy_save_rsc(sn->rsc_cache, &ud, &handle); - if (status) + if (status) { + pr_info("%s: gss_proxy_save_rsc failed (%d)\n", + __func__, status); goto out; + } cli_handle.data = (u8 *)&handle; cli_handle.len = sizeof(handle); break; @@ -1294,15 +1331,20 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp, /* Got an answer to the upcall; use it: */ if (gss_write_init_verf(sn->rsc_cache, rqstp, - &cli_handle, &ud.major_status)) + &cli_handle, &ud.major_status)) { + pr_info("%s: gss_write_init_verf failed\n", __func__); goto out; + } if (gss_write_resv(resv, PAGE_SIZE, &cli_handle, &ud.out_token, - ud.major_status, ud.minor_status)) + ud.major_status, ud.minor_status)) { + pr_info("%s: gss_write_resv failed\n", __func__); goto out; + } ret = SVC_COMPLETE; out: + gss_free_in_token_pages(&ud.in_token); gssp_free_upcall_data(&ud); return ret; } -- cgit v1.2.3-59-g8ed1b From 12357f1b2c8e0d06f34a045498d4a1e7877153ee Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 7 Nov 2019 17:11:57 -0500 Subject: nfsd: minor 4.1 callback cleanup Move all the cb_holds_slot management into helper functions. No change in behavior. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 524111420b48..1542e1d6dd1a 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -975,9 +975,12 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) * If the slot is available, then mark it busy. Otherwise, set the * thread for sleeping on the callback RPC wait queue. */ -static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task) +static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct rpc_task *task) { - if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { + struct nfs4_client *clp = cb->cb_clp; + + if (!cb->cb_holds_slot && + test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { rpc_sleep_on(&clp->cl_cb_waitq, task, NULL); /* Race breaker */ if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { @@ -986,9 +989,21 @@ static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task) } rpc_wake_up_queued_task(&clp->cl_cb_waitq, task); } + cb->cb_holds_slot = true; return true; } +static void nfsd41_cb_release_slot(struct nfsd4_callback *cb) +{ + struct nfs4_client *clp = cb->cb_clp; + + if (cb->cb_holds_slot) { + cb->cb_holds_slot = false; + clear_bit(0, &clp->cl_cb_slot_busy); + rpc_wake_up_next(&clp->cl_cb_waitq); + } +} + /* * TODO: cb_sequence should support referring call lists, cachethis, multiple * slots, and mark callback channel down on communication errors. @@ -1005,11 +1020,8 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) */ cb->cb_seq_status = 1; cb->cb_status = 0; - if (minorversion) { - if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp, task)) - return; - cb->cb_holds_slot = true; - } + if (minorversion && !nfsd41_cb_get_slot(cb, task)) + return; rpc_call_start(task); } @@ -1076,9 +1088,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback cb->cb_seq_status); } - cb->cb_holds_slot = false; - clear_bit(0, &clp->cl_cb_slot_busy); - rpc_wake_up_next(&clp->cl_cb_waitq); + nfsd41_cb_release_slot(cb); dprintk("%s: freed slot, new seqid=%d\n", __func__, clp->cl_cb_session->se_cb_seq_nr); -- cgit v1.2.3-59-g8ed1b From 2bbfed98a4d82ac4e7abfcd4eba40bddfc670b1d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 23 Oct 2019 17:43:18 -0400 Subject: nfsd: Fix races between nfsd4_cb_release() and nfsd4_shutdown_callback() When we're destroying the client lease, and we call nfsd4_shutdown_callback(), we must ensure that we do not return before all outstanding callbacks have terminated and have released their payloads. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 67 +++++++++++++++++++++++++++++++++++++++++++------- fs/nfsd/state.h | 1 + 2 files changed, 59 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 1542e1d6dd1a..67d24a536082 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -826,6 +826,31 @@ static int max_cb_time(struct net *net) return max(nn->nfsd4_lease/10, (time_t)1) * HZ; } +static struct workqueue_struct *callback_wq; + +static bool nfsd4_queue_cb(struct nfsd4_callback *cb) +{ + return queue_work(callback_wq, &cb->cb_work); +} + +static void nfsd41_cb_inflight_begin(struct nfs4_client *clp) +{ + atomic_inc(&clp->cl_cb_inflight); +} + +static void nfsd41_cb_inflight_end(struct nfs4_client *clp) +{ + + if (atomic_dec_and_test(&clp->cl_cb_inflight)) + wake_up_var(&clp->cl_cb_inflight); +} + +static void nfsd41_cb_inflight_wait_complete(struct nfs4_client *clp) +{ + wait_var_event(&clp->cl_cb_inflight, + !atomic_read(&clp->cl_cb_inflight)); +} + static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses) { if (clp->cl_minorversion == 0) { @@ -937,14 +962,21 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) clp->cl_cb_state = NFSD4_CB_UP; } +static void nfsd4_cb_probe_release(void *calldata) +{ + struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null); + + nfsd41_cb_inflight_end(clp); + +} + static const struct rpc_call_ops nfsd4_cb_probe_ops = { /* XXX: release method to ensure we set the cb channel down if * necessary on early failure? */ .rpc_call_done = nfsd4_cb_probe_done, + .rpc_release = nfsd4_cb_probe_release, }; -static struct workqueue_struct *callback_wq; - /* * Poke the callback thread to process any updates to the callback * parameters, and send a null probe. @@ -1004,6 +1036,16 @@ static void nfsd41_cb_release_slot(struct nfsd4_callback *cb) } } +static void nfsd41_destroy_cb(struct nfsd4_callback *cb) +{ + struct nfs4_client *clp = cb->cb_clp; + + nfsd41_cb_release_slot(cb); + if (cb->cb_ops && cb->cb_ops->release) + cb->cb_ops->release(cb); + nfsd41_cb_inflight_end(clp); +} + /* * TODO: cb_sequence should support referring call lists, cachethis, multiple * slots, and mark callback channel down on communication errors. @@ -1101,8 +1143,10 @@ retry_nowait: ret = false; goto out; need_restart: - task->tk_status = 0; - cb->cb_need_restart = true; + if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) { + task->tk_status = 0; + cb->cb_need_restart = true; + } return false; } @@ -1144,9 +1188,9 @@ static void nfsd4_cb_release(void *calldata) struct nfsd4_callback *cb = calldata; if (cb->cb_need_restart) - nfsd4_run_cb(cb); + nfsd4_queue_cb(cb); else - cb->cb_ops->release(cb); + nfsd41_destroy_cb(cb); } @@ -1180,6 +1224,7 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp) */ nfsd4_run_cb(&clp->cl_cb_null); flush_workqueue(callback_wq); + nfsd41_cb_inflight_wait_complete(clp); } /* requires cl_lock: */ @@ -1265,8 +1310,7 @@ nfsd4_run_cb_work(struct work_struct *work) clnt = clp->cl_cb_client; if (!clnt) { /* Callback channel broken, or client killed; give up: */ - if (cb->cb_ops && cb->cb_ops->release) - cb->cb_ops->release(cb); + nfsd41_destroy_cb(cb); return; } @@ -1275,6 +1319,7 @@ nfsd4_run_cb_work(struct work_struct *work) */ if (!cb->cb_ops && clp->cl_minorversion) { clp->cl_cb_state = NFSD4_CB_UP; + nfsd41_destroy_cb(cb); return; } @@ -1300,5 +1345,9 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, void nfsd4_run_cb(struct nfsd4_callback *cb) { - queue_work(callback_wq, &cb->cb_work); + struct nfs4_client *clp = cb->cb_clp; + + nfsd41_cb_inflight_begin(clp); + if (!nfsd4_queue_cb(cb)) + nfsd41_cb_inflight_end(clp); } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 46f56afb6cb8..d61b83b9654c 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -367,6 +367,7 @@ struct nfs4_client { struct net *net; struct list_head async_copies; /* list of async copies */ spinlock_t async_lock; /* lock for async copies */ + atomic_t cl_cb_inflight; /* Outstanding callbacks */ }; /* struct nfs4_client_reset -- cgit v1.2.3-59-g8ed1b From 20428a8047eac2fe3b493b454232dfd18d7f3d34 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 22 Oct 2019 12:29:37 -0400 Subject: nfsd: mark cb path down on unknown errors An unexpected error is probably a sign that something is wrong with the callback path. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 67d24a536082..c94768b096a3 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -1126,6 +1126,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback } break; default: + nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status); dprintk("%s: unprocessed error %d\n", __func__, cb->cb_seq_status); } -- cgit v1.2.3-59-g8ed1b From cc1ce2f13ea1c13d7f1f322146b01446d9f7ad8b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 29 Oct 2019 16:02:18 -0400 Subject: nfsd: document callback_wq serialization of callback code The callback code relies on the fact that much of it is only ever called from the ordered workqueue callback_wq, and this is worth documenting. Reported-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index c94768b096a3..24534db87e86 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -1243,6 +1243,12 @@ static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp) return NULL; } +/* + * Note there isn't a lot of locking in this code; instead we depend on + * the fact that it is run from the callback_wq, which won't run two + * work items at once. So, for example, callback_wq handles all access + * of cl_cb_client and all calls to rpc_create or rpc_shutdown_client. + */ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) { struct nfs4_cb_conn conn; -- cgit v1.2.3-59-g8ed1b From 2a67803e1305b6b829b361e0b2f243aafcddab0b Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Fri, 1 Nov 2019 19:40:54 +0800 Subject: nfsd: Drop LIST_HEAD where the variable it declares is never used. The declarations were introduced with the file, but the declared variables were not used. Fixes: 65294c1f2c5e ("nfsd: add a new struct file caching facility to nfsd") Signed-off-by: Mao Wenan Signed-off-by: J. Bruce Fields --- fs/nfsd/filecache.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index ef55e9b1cd4e..32a9bf22ac08 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -685,8 +685,6 @@ nfsd_file_cache_purge(struct net *net) void nfsd_file_cache_shutdown(void) { - LIST_HEAD(dispose); - set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); lease_unregister_notifier(&nfsd_file_lease_notifier); -- cgit v1.2.3-59-g8ed1b From 581ae686f269194de975fd3385b881fe622a24ab Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Nov 2019 03:13:33 +0000 Subject: race in exportfs_decode_fh() On Sat, Nov 02, 2019 at 06:08:42PM +0000, Al Viro wrote: > It is converging to a reasonably small and understandable surface, actually, > most of that being in core pathname resolution. Two big piles of nightmares > left to review - overlayfs and (somewhat surprisingly) setxattr call chains, > the latter due to IMA/EVM/LSM insanity... Oh, lovely - in exportfs_decode_fh() we have this: err = exportfs_get_name(mnt, target_dir, nbuf, result); if (!err) { inode_lock(target_dir->d_inode); nresult = lookup_one_len(nbuf, target_dir, strlen(nbuf)); inode_unlock(target_dir->d_inode); if (!IS_ERR(nresult)) { if (nresult->d_inode) { dput(result); result = nresult; } else dput(nresult); } } We have derived the parent from fhandle, we have a disconnected dentry for child, we go look for the name. We even find it. Now, we want to look it up. And some bastard goes and unlinks it, just as we are trying to lock the parent. We do a lookup, and get a negative dentry. Then we unlock the parent... and some other bastard does e.g. mkdir with the same name. OK, nresult->d_inode is not NULL (anymore). It has fuck-all to do with the original fhandle (different inumber, etc.) but we happily accept it. Even better, we have no barriers between our check and nresult becoming positive. IOW, having observed non-NULL ->d_inode doesn't give us enough - e.g. we might still see the old ->d_flags value, from back when ->d_inode used to be NULL. On something like alpha we also have no promises that we'll observe anything about the fields of nresult->d_inode, but ->d_flags alone is enough for fun. The callers can't e.g. expect d_is_reg() et.al. to match the reality. This is obviously bogus. And the fix is obvious: check that nresult->d_inode is equal to result->d_inode before unlocking the parent. Note that we'd *already* had the original result and all of its aliases rejected by the 'acceptable' predicate, so if nresult doesn't supply us a better alias, we are SOL. Does anyone see objections to the following patch? Christoph, that seems to be your code; am I missing something subtle here? AFAICS, that goes back to 2007 or so... Signed-off-by: Al Viro Signed-off-by: J. Bruce Fields --- fs/exportfs/expfs.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 09bc68708d28..2dd55b172d57 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -519,26 +519,33 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, * inode is actually connected to the parent. */ err = exportfs_get_name(mnt, target_dir, nbuf, result); - if (!err) { - inode_lock(target_dir->d_inode); - nresult = lookup_one_len(nbuf, target_dir, - strlen(nbuf)); - inode_unlock(target_dir->d_inode); - if (!IS_ERR(nresult)) { - if (nresult->d_inode) { - dput(result); - result = nresult; - } else - dput(nresult); - } + if (err) { + dput(target_dir); + goto err_result; } + inode_lock(target_dir->d_inode); + nresult = lookup_one_len(nbuf, target_dir, strlen(nbuf)); + if (!IS_ERR(nresult)) { + if (unlikely(nresult->d_inode != result->d_inode)) { + dput(nresult); + nresult = ERR_PTR(-ESTALE); + } + } + inode_unlock(target_dir->d_inode); /* * At this point we are done with the parent, but it's pinned * by the child dentry anyway. */ dput(target_dir); + if (IS_ERR(nresult)) { + err = PTR_ERR(nresult); + goto err_result; + } + dput(result); + result = nresult; + /* * And finally make sure the dentry is actually acceptable * to NFSD. -- cgit v1.2.3-59-g8ed1b From d05a0201969045f4c488f7cf1d024089949a68b6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Nov 2019 16:34:22 +0100 Subject: sunrpc: remove __KERNEL__ ifdefs Remove the __KERNEL__ ifdefs from the non-UAPI sunrpc headers, as those can't be included from user space programs. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/auth.h | 3 --- include/linux/sunrpc/auth_gss.h | 2 -- include/linux/sunrpc/clnt.h | 3 --- include/linux/sunrpc/gss_api.h | 2 -- include/linux/sunrpc/gss_err.h | 3 --- include/linux/sunrpc/msg_prot.h | 3 --- include/linux/sunrpc/rpc_pipe_fs.h | 3 --- include/linux/sunrpc/svcauth.h | 4 ---- include/linux/sunrpc/svcauth_gss.h | 2 -- include/linux/sunrpc/xdr.h | 3 --- include/linux/sunrpc/xprt.h | 4 ---- include/linux/sunrpc/xprtsock.h | 4 ---- 12 files changed, 36 deletions(-) diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 5f9076fdb090..e9ec742796e7 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -10,8 +10,6 @@ #ifndef _LINUX_SUNRPC_AUTH_H #define _LINUX_SUNRPC_AUTH_H -#ifdef __KERNEL__ - #include #include #include @@ -194,5 +192,4 @@ struct rpc_cred *get_rpccred(struct rpc_cred *cred) return NULL; } -#endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_AUTH_H */ diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index 30427b729070..43e481aa347a 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -13,7 +13,6 @@ #ifndef _LINUX_SUNRPC_AUTH_GSS_H #define _LINUX_SUNRPC_AUTH_GSS_H -#ifdef __KERNEL__ #include #include #include @@ -90,6 +89,5 @@ struct gss_cred { unsigned long gc_upcall_timestamp; }; -#endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_AUTH_GSS_H */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index abc63bd1be2b..64bffcb7142b 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -109,8 +109,6 @@ struct rpc_procinfo { const char * p_name; /* name of procedure */ }; -#ifdef __KERNEL__ - struct rpc_create_args { struct net *net; int protocol; @@ -237,5 +235,4 @@ static inline int rpc_reply_expected(struct rpc_task *task) (task->tk_msg.rpc_proc->p_decode != NULL); } -#endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 5ac5db4d295f..bd691e08be3b 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -13,7 +13,6 @@ #ifndef _LINUX_SUNRPC_GSS_API_H #define _LINUX_SUNRPC_GSS_API_H -#ifdef __KERNEL__ #include #include #include @@ -160,6 +159,5 @@ struct gss_api_mech * gss_mech_get(struct gss_api_mech *); * corresponding call to gss_mech_put. */ void gss_mech_put(struct gss_api_mech *); -#endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_GSS_API_H */ diff --git a/include/linux/sunrpc/gss_err.h b/include/linux/sunrpc/gss_err.h index a6807867bd21..b73c329c83f2 100644 --- a/include/linux/sunrpc/gss_err.h +++ b/include/linux/sunrpc/gss_err.h @@ -34,8 +34,6 @@ #ifndef _LINUX_SUNRPC_GSS_ERR_H #define _LINUX_SUNRPC_GSS_ERR_H -#ifdef __KERNEL__ - typedef unsigned int OM_uint32; /* @@ -163,5 +161,4 @@ typedef unsigned int OM_uint32; /* XXXX This is a necessary evil until the spec is fixed */ #define GSS_S_CRED_UNAVAIL GSS_S_FAILURE -#endif /* __KERNEL__ */ #endif /* __LINUX_SUNRPC_GSS_ERR_H */ diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 4722b28ec36a..bea40d9f03a1 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -8,8 +8,6 @@ #ifndef _LINUX_SUNRPC_MSGPROT_H_ #define _LINUX_SUNRPC_MSGPROT_H_ -#ifdef __KERNEL__ /* user programs should get these from the rpc header files */ - #define RPC_VERSION 2 /* size of an XDR encoding unit in bytes, i.e. 32bit */ @@ -217,5 +215,4 @@ typedef __be32 rpc_fraghdr; /* Assume INET6_ADDRSTRLEN will always be larger than INET_ADDRSTRLEN... */ #define RPCBIND_MAXUADDRLEN RPCBIND_MAXUADDR6LEN -#endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_MSGPROT_H_ */ diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index e90b9bd99ded..cd188a527d16 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -2,8 +2,6 @@ #ifndef _LINUX_SUNRPC_RPC_PIPE_FS_H #define _LINUX_SUNRPC_RPC_PIPE_FS_H -#ifdef __KERNEL__ - #include struct rpc_pipe_dir_head { @@ -133,4 +131,3 @@ extern void unregister_rpc_pipefs(void); extern bool gssd_running(struct net *net); #endif -#endif diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 3e53a6e2ada7..b0003866a249 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -10,8 +10,6 @@ #ifndef _LINUX_SUNRPC_SVCAUTH_H_ #define _LINUX_SUNRPC_SVCAUTH_H_ -#ifdef __KERNEL__ - #include #include #include @@ -185,6 +183,4 @@ static inline unsigned long hash_mem(char const *buf, int length, int bits) return full_name_hash(NULL, buf, length) >> (32 - bits); } -#endif /* __KERNEL__ */ - #endif /* _LINUX_SUNRPC_SVCAUTH_H_ */ diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h index a4528b26c8aa..ca39a388dc22 100644 --- a/include/linux/sunrpc/svcauth_gss.h +++ b/include/linux/sunrpc/svcauth_gss.h @@ -9,7 +9,6 @@ #ifndef _LINUX_SUNRPC_SVCAUTH_GSS_H #define _LINUX_SUNRPC_SVCAUTH_GSS_H -#ifdef __KERNEL__ #include #include #include @@ -24,5 +23,4 @@ void gss_svc_shutdown_net(struct net *net); int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name); u32 svcauth_gss_flavor(struct auth_domain *dom); -#endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */ diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index f33e5013bdfb..b41f34977995 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -11,8 +11,6 @@ #ifndef _SUNRPC_XDR_H_ #define _SUNRPC_XDR_H_ -#ifdef __KERNEL__ - #include #include #include @@ -552,6 +550,5 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr, *array = be32_to_cpup(p); return retval; } -#endif /* __KERNEL__ */ #endif /* _SUNRPC_XDR_H_ */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index d783e15ba898..874205227778 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -19,8 +19,6 @@ #include #include -#ifdef __KERNEL__ - #define RPC_MIN_SLOT_TABLE (2U) #define RPC_DEF_SLOT_TABLE (16U) #define RPC_MAX_SLOT_TABLE_LIMIT (65536U) @@ -505,6 +503,4 @@ static inline void xprt_inject_disconnect(struct rpc_xprt *xprt) } #endif -#endif /* __KERNEL__*/ - #endif /* _LINUX_SUNRPC_XPRT_H */ diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 7638dbe7bc50..30acd67d1627 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -8,8 +8,6 @@ #ifndef _LINUX_SUNRPC_XPRTSOCK_H #define _LINUX_SUNRPC_XPRTSOCK_H -#ifdef __KERNEL__ - int init_socket_xprt(void); void cleanup_socket_xprt(void); @@ -90,6 +88,4 @@ struct sock_xprt { #define XPRT_SOCK_WAKE_PENDING (6) #define XPRT_SOCK_WAKE_DISCONNECT (7) -#endif /* __KERNEL__ */ - #endif /* _LINUX_SUNRPC_XPRTSOCK_H */ -- cgit v1.2.3-59-g8ed1b From fb7dd0a1ba8690527c2394c6c55f909aa87d8f44 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Nov 2019 16:34:23 +0100 Subject: lockd: remove __KERNEL__ ifdefs Remove the __KERNEL__ ifdefs from the non-UAPI sunrpc headers, as those can't be included from user space programs. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- include/linux/lockd/debug.h | 4 ---- include/linux/lockd/lockd.h | 4 ---- 2 files changed, 8 deletions(-) diff --git a/include/linux/lockd/debug.h b/include/linux/lockd/debug.h index e536c579827f..eede2ab5246f 100644 --- a/include/linux/lockd/debug.h +++ b/include/linux/lockd/debug.h @@ -10,8 +10,6 @@ #ifndef LINUX_LOCKD_DEBUG_H #define LINUX_LOCKD_DEBUG_H -#ifdef __KERNEL__ - #include /* @@ -25,8 +23,6 @@ # define ifdebug(flag) if (0) #endif -#endif /* __KERNEL__ */ - /* * Debug flags */ diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index d294dde9e546..666f5f310a04 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -10,8 +10,6 @@ #ifndef LINUX_LOCKD_LOCKD_H #define LINUX_LOCKD_LOCKD_H -#ifdef __KERNEL__ - #include #include #include @@ -373,6 +371,4 @@ static inline int nlm_compare_locks(const struct file_lock *fl1, extern const struct lock_manager_operations nlmsvc_lock_operations; -#endif /* __KERNEL__ */ - #endif /* LINUX_LOCKD_LOCKD_H */ -- cgit v1.2.3-59-g8ed1b From 18b9a895e652979b70f9c20565394a69354dfebc Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 12 Nov 2019 14:01:43 -0500 Subject: nfsd: Fix cld_net->cn_tfm initialization Don't assign an error pointer to cld_net->cn_tfm, otherwise an oops will occur in nfsd4_remove_cld_pipe(). Also, move the initialization of cld_net->cn_tfm so that it occurs after the check to see if nfsdcld is running. This is necessary because nfsd4_client_tracking_init() looks for -ETIMEDOUT to determine whether to use the "old" nfsdcld tracking ops. Fixes: 6ee95d1c8991 ("nfsd: add support for upcall version 2") Reported-by: Jamie Heilman Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4recover.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 29dff4c6e752..2481e7662128 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -1578,6 +1578,7 @@ nfsd4_cld_tracking_init(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); bool running; int retries = 10; + struct crypto_shash *tfm; status = nfs4_cld_state_init(net); if (status) @@ -1586,11 +1587,6 @@ nfsd4_cld_tracking_init(struct net *net) status = __nfsd4_init_cld_pipe(net); if (status) goto err_shutdown; - nn->cld_net->cn_tfm = crypto_alloc_shash("sha256", 0, 0); - if (IS_ERR(nn->cld_net->cn_tfm)) { - status = PTR_ERR(nn->cld_net->cn_tfm); - goto err_remove; - } /* * rpc pipe upcalls take 30 seconds to time out, so we don't want to @@ -1607,6 +1603,12 @@ nfsd4_cld_tracking_init(struct net *net) status = -ETIMEDOUT; goto err_remove; } + tfm = crypto_alloc_shash("sha256", 0, 0); + if (IS_ERR(tfm)) { + status = PTR_ERR(tfm); + goto err_remove; + } + nn->cld_net->cn_tfm = tfm; status = nfsd4_cld_get_version(nn); if (status == -EOPNOTSUPP) -- cgit v1.2.3-59-g8ed1b From a2e2f2dc77a18d2b0f450fb7fcb4871c9f697822 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 12 Nov 2019 14:01:55 -0500 Subject: nfsd: v4 support requires CRYPTO_SHA256 The new nfsdcld client tracking operations use sha256 to compute hashes of the kerberos principals, so make sure CRYPTO_SHA256 is enabled. Fixes: 6ee95d1c8991 ("nfsd: add support for upcall version 2") Reported-by: Jamie Heilman Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- fs/nfsd/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 10cefb0c07c7..c4b1a89b8845 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -73,7 +73,7 @@ config NFSD_V4 select NFSD_V3 select FS_POSIX_ACL select SUNRPC_GSS - select CRYPTO + select CRYPTO_SHA256 select GRACE_PERIOD help This option enables support in your system's NFS server for -- cgit v1.2.3-59-g8ed1b From 7c149057d044c52ed1e1d4ee50cf412c8d0f7295 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 19 Nov 2019 16:05:33 -0500 Subject: nfsd: restore NFSv3 ACL support An error in e333f3bbefe3 left the nfsd_acl_program->pg_vers array empty, which effectively turned off the server's support for NFSv3 ACLs. Fixes: e333f3bbefe3 "nfsd: Allow containers to set supported nfs versions" Cc: stable@vger.kernel.org Cc: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index fdf7ed4bd5dd..e8bee8ff30c5 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -95,12 +95,11 @@ static const struct svc_version *nfsd_acl_version[] = { #define NFSD_ACL_MINVERS 2 #define NFSD_ACL_NRVERS ARRAY_SIZE(nfsd_acl_version) -static const struct svc_version *nfsd_acl_versions[NFSD_ACL_NRVERS]; static struct svc_program nfsd_acl_program = { .pg_prog = NFS_ACL_PROGRAM, .pg_nvers = NFSD_ACL_NRVERS, - .pg_vers = nfsd_acl_versions, + .pg_vers = nfsd_acl_version, .pg_name = "nfsacl", .pg_class = "nfsd", .pg_stats = &nfsd_acl_svcstats, -- cgit v1.2.3-59-g8ed1b From 8729aaba74626c4ebce3abf1b9e96bb62d2958ca Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 Nov 2019 16:25:46 -0500 Subject: SUNRPC: Fix backchannel latency metrics I noticed that for callback requests, the reported backlog latency is always zero, and the rtt value is crazy big. The problem was that rqst->rq_xtime is never set for backchannel requests. Fixes: 78215759e20d ("SUNRPC: Make RTT measurement more ... ") Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 1 + net/sunrpc/xprtsock.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index d1fcc41d5eb5..908e78bb87c6 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -195,6 +195,7 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst) pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer); #endif + rqst->rq_xtime = ktime_get(); rc = svc_rdma_bc_sendto(rdma, rqst, ctxt); if (rc) { svc_rdma_send_ctxt_put(rdma, ctxt); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 9ac88722fa83..46fb7cf1ad04 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2660,6 +2660,8 @@ static int bc_sendto(struct rpc_rqst *req) .iov_len = sizeof(marker), }; + req->rq_xtime = ktime_get(); + len = kernel_sendmsg(transport->sock, &msg, &iov, 1, iov.iov_len); if (len != iov.iov_len) return -EAGAIN; @@ -2685,7 +2687,6 @@ static int bc_send_request(struct rpc_rqst *req) struct svc_xprt *xprt; int len; - dprintk("sending request with xid: %08x\n", ntohl(req->rq_xid)); /* * Get the server socket associated with this callback xprt */ -- cgit v1.2.3-59-g8ed1b From a25e3726b32c746c0098125d4c7463bb84df72bb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 27 Nov 2019 17:05:51 -0500 Subject: nfsd: Ensure CLONE persists data and metadata changes to the target file The NFSv4.2 CLONE operation has implicit persistence requirements on the target file, since there is no protocol requirement that the client issue a separate operation to persist data. For that reason, we should call vfs_fsync_range() on the destination file after a successful call to vfs_clone_file_range(). Fixes: ffa0160a1039 ("nfsd: implement the NFSv4.2 CLONE operation") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.5+ Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 3 ++- fs/nfsd/vfs.c | 8 +++++++- fs/nfsd/vfs.h | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 4e3e77b76411..38c0aeda500e 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1077,7 +1077,8 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; status = nfsd4_clone_file_range(src->nf_file, clone->cl_src_pos, - dst->nf_file, clone->cl_dst_pos, clone->cl_count); + dst->nf_file, clone->cl_dst_pos, clone->cl_count, + EX_ISSYNC(cstate->current_fh.fh_export)); nfsd_file_put(dst); nfsd_file_put(src); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index bd0a385df3fc..cf423fea0c6f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -525,7 +525,7 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, #endif __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, - u64 dst_pos, u64 count) + u64 dst_pos, u64 count, bool sync) { loff_t cloned; @@ -534,6 +534,12 @@ __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, return nfserrno(cloned); if (count && cloned != count) return nfserrno(-EINVAL); + if (sync) { + loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX; + int status = vfs_fsync_range(dst, dst_pos, dst_end, 0); + if (status < 0) + return nfserrno(status); + } return 0; } diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index a13fd9d7e1f5..cc110a10bfe8 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -56,7 +56,7 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *, __be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *, struct file *, loff_t, loff_t, int); __be32 nfsd4_clone_file_range(struct file *, u64, struct file *, - u64, u64); + u64, u64, bool); #endif /* CONFIG_NFSD_V4 */ __be32 nfsd_create_locked(struct svc_rqst *, struct svc_fh *, char *name, int len, struct iattr *attrs, -- cgit v1.2.3-59-g8ed1b From 466e16f0920f3ffdfa49713212fa334fb3dc08f1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 28 Nov 2019 13:56:43 +1100 Subject: nfsd: check for EBUSY from vfs_rmdir/vfs_unink. vfs_rmdir and vfs_unlink can return -EBUSY if the target is a mountpoint. This currently gets passed to nfserrno() by nfsd_unlink(), and that results in a WARNing, which is not user-friendly. Possibly the best NFSv4 error is NFS4ERR_FILE_OPEN, because there is a sense in which the object is currently in use by some other task. The Linux NFSv4 client will map this back to EBUSY, which is an added benefit. For NFSv3, the best we can do is probably NFS3ERR_ACCES, which isn't true, but is not less true than the other options. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsd.h | 3 ++- fs/nfsd/vfs.c | 12 +++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index af2947551e9c..57b93d95fa5c 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -280,7 +280,8 @@ void nfsd_lockd_shutdown(void); #define nfserr_union_notsupp cpu_to_be32(NFS4ERR_UNION_NOTSUPP) #define nfserr_offload_denied cpu_to_be32(NFS4ERR_OFFLOAD_DENIED) #define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS) -#define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL) +#define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL) +#define nfserr_file_open cpu_to_be32(NFS4ERR_FILE_OPEN) /* error codes for internal use */ /* if a request fails due to kmalloc failure, it gets dropped. diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index cf423fea0c6f..c0dc491537a6 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1815,7 +1815,17 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, out_drop_write: fh_drop_write(fhp); out_nfserr: - err = nfserrno(host_err); + if (host_err == -EBUSY) { + /* name is mounted-on. There is no perfect + * error status. + */ + if (nfsd_v4client(rqstp)) + err = nfserr_file_open; + else + err = nfserr_acces; + } else { + err = nfserrno(host_err); + } out: return err; } -- cgit v1.2.3-59-g8ed1b From 18f428d4e2f7eff162d80b2b21689496c4e82afd Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 4 Dec 2019 15:13:54 -0500 Subject: NFSD fixing possible null pointer derefering in copy offload Static checker revealed possible error path leading to possible NULL pointer dereferencing. Reported-by: Dan Carpenter Fixes: e0639dc5805a: ("NFSD introduce async copy feature") Signed-off-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 38c0aeda500e..4798667af647 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1298,7 +1298,8 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, out: return status; out_err: - cleanup_async_copy(async_copy); + if (async_copy) + cleanup_async_copy(async_copy); goto out; } -- cgit v1.2.3-59-g8ed1b From 38a2204f5298620e8a1c3b1dc7b831425106dbc0 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 4 Dec 2019 07:13:22 +0100 Subject: nfsd: depend on CRYPTO_MD5 for legacy client tracking The legacy client tracking infrastructure of nfsd makes use of MD5 to derive a client's recovery directory name. As the nfsd module doesn't declare any dependency on CRYPTO_MD5, though, it may fail to allocate the hash if the kernel was compiled without it. As a result, generation of client recovery directories will fail with the following error: NFSD: unable to generate recoverydir name The explicit dependency on CRYPTO_MD5 was removed as redundant back in 6aaa67b5f3b9 (NFSD: Remove redundant "select" clauses in fs/Kconfig 2008-02-11) as it was already implicitly selected via RPCSEC_GSS_KRB5. This broke when RPCSEC_GSS_KRB5 was made optional for NFSv4 in commit df486a25900f (NFS: Fix the selection of security flavours in Kconfig) at a later point. Fix the issue by adding back an explicit dependency on CRYPTO_MD5. Fixes: df486a25900f (NFS: Fix the selection of security flavours in Kconfig) Signed-off-by: Patrick Steinhardt Signed-off-by: J. Bruce Fields --- fs/nfsd/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index c4b1a89b8845..f2f81561ebb6 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -73,6 +73,7 @@ config NFSD_V4 select NFSD_V3 select FS_POSIX_ACL select SUNRPC_GSS + select CRYPTO_MD5 select CRYPTO_SHA256 select GRACE_PERIOD help -- cgit v1.2.3-59-g8ed1b