aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS9
-rw-r--r--drivers/staging/Kconfig2
-rw-r--r--drivers/staging/Makefile1
-rw-r--r--drivers/staging/lustre/Kconfig3
-rw-r--r--drivers/staging/lustre/Makefile2
-rw-r--r--drivers/staging/lustre/README.txt83
-rw-r--r--drivers/staging/lustre/TODO302
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs.h76
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h434
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h208
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h207
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h194
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h869
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_private.h200
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_string.h102
-rw-r--r--drivers/staging/lustre/include/linux/lnet/api.h212
-rw-r--r--drivers/staging/lustre/include/linux/lnet/lib-lnet.h652
-rw-r--r--drivers/staging/lustre/include/linux/lnet/lib-types.h666
-rw-r--r--drivers/staging/lustre/include/linux/lnet/socklnd.h87
-rw-r--r--drivers/staging/lustre/include/uapi/linux/lnet/libcfs_debug.h149
-rw-r--r--drivers/staging/lustre/include/uapi/linux/lnet/libcfs_ioctl.h141
-rw-r--r--drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h150
-rw-r--r--drivers/staging/lustre/include/uapi/linux/lnet/lnet-types.h669
-rw-r--r--drivers/staging/lustre/include/uapi/linux/lnet/lnetctl.h123
-rw-r--r--drivers/staging/lustre/include/uapi/linux/lnet/lnetst.h556
-rw-r--r--drivers/staging/lustre/include/uapi/linux/lnet/nidstr.h119
-rw-r--r--drivers/staging/lustre/include/uapi/linux/lnet/socklnd.h44
-rw-r--r--drivers/staging/lustre/include/uapi/linux/lustre/lustre_cfg.h261
-rw-r--r--drivers/staging/lustre/include/uapi/linux/lustre/lustre_fid.h293
-rw-r--r--drivers/staging/lustre/include/uapi/linux/lustre/lustre_fiemap.h72
-rw-r--r--drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h2690
-rw-r--r--drivers/staging/lustre/include/uapi/linux/lustre/lustre_ioctl.h229
-rw-r--r--drivers/staging/lustre/include/uapi/linux/lustre/lustre_kernelcomm.h94
-rw-r--r--drivers/staging/lustre/include/uapi/linux/lustre/lustre_ostid.h236
-rw-r--r--drivers/staging/lustre/include/uapi/linux/lustre/lustre_param.h94
-rw-r--r--drivers/staging/lustre/include/uapi/linux/lustre/lustre_user.h1327
-rw-r--r--drivers/staging/lustre/include/uapi/linux/lustre/lustre_ver.h27
-rw-r--r--drivers/staging/lustre/lnet/Kconfig46
-rw-r--r--drivers/staging/lustre/lnet/Makefile1
-rw-r--r--drivers/staging/lustre/lnet/klnds/Makefile1
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile5
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c2958
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h1048
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c3763
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c296
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/Makefile6
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c2921
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h704
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c2586
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c534
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c184
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c810
-rw-r--r--drivers/staging/lustre/lnet/libcfs/Makefile16
-rw-r--r--drivers/staging/lustre/lnet/libcfs/debug.c461
-rw-r--r--drivers/staging/lustre/lnet/libcfs/fail.c146
-rw-r--r--drivers/staging/lustre/lnet/libcfs/hash.c2065
-rw-r--r--drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c1086
-rw-r--r--drivers/staging/lustre/lnet/libcfs/libcfs_lock.c155
-rw-r--r--drivers/staging/lustre/lnet/libcfs/libcfs_mem.c171
-rw-r--r--drivers/staging/lustre/lnet/libcfs/libcfs_string.c562
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux-crypto-adler.c139
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux-crypto.c447
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux-crypto.h30
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux-debug.c142
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux-tracefile.c258
-rw-r--r--drivers/staging/lustre/lnet/libcfs/module.c758
-rw-r--r--drivers/staging/lustre/lnet/libcfs/tracefile.c1198
-rw-r--r--drivers/staging/lustre/lnet/libcfs/tracefile.h274
-rw-r--r--drivers/staging/lustre/lnet/lnet/Makefile10
-rw-r--r--drivers/staging/lustre/lnet/lnet/acceptor.c501
-rw-r--r--drivers/staging/lustre/lnet/lnet/api-ni.c2307
-rw-r--r--drivers/staging/lustre/lnet/lnet/config.c1235
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-eq.c426
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-md.c463
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-me.c274
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-move.c2386
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-msg.c625
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-ptl.c987
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-socket.c585
-rw-r--r--drivers/staging/lustre/lnet/lnet/lo.c105
-rw-r--r--drivers/staging/lustre/lnet/lnet/module.c239
-rw-r--r--drivers/staging/lustre/lnet/lnet/net_fault.c1023
-rw-r--r--drivers/staging/lustre/lnet/lnet/nidstrings.c1261
-rw-r--r--drivers/staging/lustre/lnet/lnet/peer.c456
-rw-r--r--drivers/staging/lustre/lnet/lnet/router.c1799
-rw-r--r--drivers/staging/lustre/lnet/lnet/router_proc.c907
-rw-r--r--drivers/staging/lustre/lnet/selftest/Makefile7
-rw-r--r--drivers/staging/lustre/lnet/selftest/brw_test.c526
-rw-r--r--drivers/staging/lustre/lnet/selftest/conctl.c801
-rw-r--r--drivers/staging/lustre/lnet/selftest/conrpc.c1396
-rw-r--r--drivers/staging/lustre/lnet/selftest/conrpc.h142
-rw-r--r--drivers/staging/lustre/lnet/selftest/console.c2104
-rw-r--r--drivers/staging/lustre/lnet/selftest/console.h244
-rw-r--r--drivers/staging/lustre/lnet/selftest/framework.c1786
-rw-r--r--drivers/staging/lustre/lnet/selftest/module.c169
-rw-r--r--drivers/staging/lustre/lnet/selftest/ping_test.c228
-rw-r--r--drivers/staging/lustre/lnet/selftest/rpc.c1682
-rw-r--r--drivers/staging/lustre/lnet/selftest/rpc.h295
-rw-r--r--drivers/staging/lustre/lnet/selftest/selftest.h622
-rw-r--r--drivers/staging/lustre/lnet/selftest/timer.c244
-rw-r--r--drivers/staging/lustre/lnet/selftest/timer.h50
-rw-r--r--drivers/staging/lustre/lustre/Kconfig45
-rw-r--r--drivers/staging/lustre/lustre/Makefile2
-rw-r--r--drivers/staging/lustre/lustre/fid/Makefile5
-rw-r--r--drivers/staging/lustre/lustre/fid/fid_internal.h46
-rw-r--r--drivers/staging/lustre/lustre/fid/fid_lib.c87
-rw-r--r--drivers/staging/lustre/lustre/fid/fid_request.c410
-rw-r--r--drivers/staging/lustre/lustre/fid/lproc_fid.c225
-rw-r--r--drivers/staging/lustre/lustre/fld/Makefile5
-rw-r--r--drivers/staging/lustre/lustre/fld/fld_cache.c516
-rw-r--r--drivers/staging/lustre/lustre/fld/fld_internal.h170
-rw-r--r--drivers/staging/lustre/lustre/fld/fld_request.c446
-rw-r--r--drivers/staging/lustre/lustre/fld/lproc_fld.c154
-rw-r--r--drivers/staging/lustre/lustre/include/cl_object.h2463
-rw-r--r--drivers/staging/lustre/lustre/include/interval_tree.h119
-rw-r--r--drivers/staging/lustre/lustre/include/llog_swab.h67
-rw-r--r--drivers/staging/lustre/lustre/include/lprocfs_status.h646
-rw-r--r--drivers/staging/lustre/lustre/include/lu_object.h1305
-rw-r--r--drivers/staging/lustre/lustre/include/lu_ref.h178
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_acl.h51
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_compat.h82
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_debug.h52
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_disk.h152
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_dlm.h1346
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_dlm_flags.h402
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_errno.h198
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_export.h250
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_fid.h676
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_fld.h137
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_ha.h61
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_handles.h91
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_import.h369
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_intent.h71
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_kernelcomm.h56
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_lib.h126
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_linkea.h93
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_lmv.h174
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_log.h382
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_mdc.h229
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_mds.h62
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_net.h2360
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_nrs.h718
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h71
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_obdo.h55
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_patchless_compat.h68
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_req_layout.h307
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_sec.h1072
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_swab.h109
-rw-r--r--drivers/staging/lustre/lustre/include/obd.h1114
-rw-r--r--drivers/staging/lustre/lustre/include/obd_cksum.h153
-rw-r--r--drivers/staging/lustre/lustre/include/obd_class.h1603
-rw-r--r--drivers/staging/lustre/lustre/include/obd_support.h517
-rw-r--r--drivers/staging/lustre/lustre/include/seq_range.h200
-rw-r--r--drivers/staging/lustre/lustre/ldlm/interval_tree.c599
-rw-r--r--drivers/staging/lustre/lustre/ldlm/l_lock.c73
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_extent.c258
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_flock.c486
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c69
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_internal.h342
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_lib.c842
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_lock.c2135
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c1163
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_plain.c68
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_pool.c1013
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_request.c2033
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_resource.c1318
-rw-r--r--drivers/staging/lustre/lustre/llite/Makefile13
-rw-r--r--drivers/staging/lustre/lustre/llite/acl.c108
-rw-r--r--drivers/staging/lustre/lustre/llite/dcache.c300
-rw-r--r--drivers/staging/lustre/lustre/llite/dir.c1708
-rw-r--r--drivers/staging/lustre/lustre/llite/file.c3580
-rw-r--r--drivers/staging/lustre/lustre/llite/glimpse.c205
-rw-r--r--drivers/staging/lustre/lustre/llite/lcommon_cl.c292
-rw-r--r--drivers/staging/lustre/lustre/llite/lcommon_misc.c186
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_internal.h1344
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_lib.c2668
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_mmap.c480
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_nfs.c375
-rw-r--r--drivers/staging/lustre/lustre/llite/lproc_llite.c1659
-rw-r--r--drivers/staging/lustre/lustre/llite/namei.c1207
-rw-r--r--drivers/staging/lustre/lustre/llite/range_lock.c241
-rw-r--r--drivers/staging/lustre/lustre/llite/range_lock.h83
-rw-r--r--drivers/staging/lustre/lustre/llite/rw.c1214
-rw-r--r--drivers/staging/lustre/lustre/llite/rw26.c641
-rw-r--r--drivers/staging/lustre/lustre/llite/statahead.c1577
-rw-r--r--drivers/staging/lustre/lustre/llite/super25.c189
-rw-r--r--drivers/staging/lustre/lustre/llite/symlink.c159
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_dev.c640
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_internal.h321
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_io.c1374
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_lock.c87
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_object.c303
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_page.c523
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr.c665
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr_cache.c504
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr_security.c96
-rw-r--r--drivers/staging/lustre/lustre/lmv/Makefile5
-rw-r--r--drivers/staging/lustre/lustre/lmv/lmv_fld.c82
-rw-r--r--drivers/staging/lustre/lustre/lmv/lmv_intent.c521
-rw-r--r--drivers/staging/lustre/lustre/lmv/lmv_internal.h164
-rw-r--r--drivers/staging/lustre/lustre/lmv/lmv_obd.c3131
-rw-r--r--drivers/staging/lustre/lustre/lmv/lproc_lmv.c173
-rw-r--r--drivers/staging/lustre/lustre/lov/Makefile9
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_cl_internal.h639
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_dev.c384
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_ea.c331
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_internal.h286
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_io.c1023
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_lock.c348
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_merge.c105
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_obd.c1444
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_object.c1625
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_offset.c269
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_pack.c400
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_page.c136
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_pool.c546
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_request.c354
-rw-r--r--drivers/staging/lustre/lustre/lov/lovsub_dev.c147
-rw-r--r--drivers/staging/lustre/lustre/lov/lovsub_lock.c81
-rw-r--r--drivers/staging/lustre/lustre/lov/lovsub_object.c180
-rw-r--r--drivers/staging/lustre/lustre/lov/lovsub_page.c68
-rw-r--r--drivers/staging/lustre/lustre/lov/lproc_lov.c299
-rw-r--r--drivers/staging/lustre/lustre/mdc/Makefile5
-rw-r--r--drivers/staging/lustre/lustre/mdc/lproc_mdc.c231
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_internal.h144
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_lib.c498
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_locks.c1239
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_reint.c421
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_request.c2770
-rw-r--r--drivers/staging/lustre/lustre/mgc/Makefile5
-rw-r--r--drivers/staging/lustre/lustre/mgc/lproc_mgc.c69
-rw-r--r--drivers/staging/lustre/lustre/mgc/mgc_internal.h57
-rw-r--r--drivers/staging/lustre/lustre/mgc/mgc_request.c1851
-rw-r--r--drivers/staging/lustre/lustre/obdclass/Makefile12
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_internal.h95
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_io.c1151
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_lock.c275
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_object.c1059
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_page.c1045
-rw-r--r--drivers/staging/lustre/lustre/obdclass/class_obd.c544
-rw-r--r--drivers/staging/lustre/lustre/obdclass/debug.c96
-rw-r--r--drivers/staging/lustre/lustre/obdclass/genops.c1480
-rw-r--r--drivers/staging/lustre/lustre/obdclass/kernelcomm.c240
-rw-r--r--drivers/staging/lustre/lustre/obdclass/linkea.c249
-rw-r--r--drivers/staging/lustre/lustre/obdclass/linux/linux-module.c514
-rw-r--r--drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c162
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog.c524
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog_cat.c236
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog_internal.h79
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog_obd.c225
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog_swab.c412
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lprocfs_counters.c134
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lprocfs_status.c1698
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lu_object.c2056
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lu_ref.c45
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lustre_handles.c241
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lustre_peer.c214
-rw-r--r--drivers/staging/lustre/lustre/obdclass/obd_config.c1538
-rw-r--r--drivers/staging/lustre/lustre/obdclass/obd_mount.c1245
-rw-r--r--drivers/staging/lustre/lustre/obdclass/obdo.c181
-rw-r--r--drivers/staging/lustre/lustre/obdclass/statfs_pack.c58
-rw-r--r--drivers/staging/lustre/lustre/obdclass/uuid.c45
-rw-r--r--drivers/staging/lustre/lustre/obdecho/Makefile5
-rw-r--r--drivers/staging/lustre/lustre/obdecho/echo_client.c1729
-rw-r--r--drivers/staging/lustre/lustre/obdecho/echo_internal.h42
-rw-r--r--drivers/staging/lustre/lustre/osc/Makefile6
-rw-r--r--drivers/staging/lustre/lustre/osc/lproc_osc.c838
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_cache.c3306
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_cl_internal.h681
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_dev.c246
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_internal.h237
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_io.c918
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_lock.c1230
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_object.c473
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_page.c1094
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_quota.c236
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_request.c2907
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/Makefile23
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/client.c3271
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/connection.c192
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/errno.c383
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/events.c585
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/import.c1677
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/layout.c2232
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/llog_client.c338
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/llog_net.c67
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c1316
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/niobuf.c771
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/nrs.c1613
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c270
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/pack_generic.c2311
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/pers.c72
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/pinger.c474
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h371
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c186
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c914
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/recover.c374
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec.c2379
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c572
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_config.c850
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_gc.c190
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c170
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_null.c459
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_plain.c1023
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/service.c2807
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/wiretest.c4210
-rw-r--r--drivers/staging/lustre/sysfs-fs-lustre654
-rw-r--r--scripts/selinux/mdp/mdp.c1
308 files changed, 0 insertions, 195272 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 4b65225d443a..db158767de20 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13329,15 +13329,6 @@ S: Odd Fixes
F: Documentation/devicetree/bindings/staging/iio/
F: drivers/staging/iio/
-STAGING - LUSTRE PARALLEL FILESYSTEM
-M: Oleg Drokin <oleg.drokin@intel.com>
-M: Andreas Dilger <andreas.dilger@intel.com>
-M: James Simmons <jsimmons@infradead.org>
-L: lustre-devel@lists.lustre.org (moderated for non-subscribers)
-W: http://wiki.lustre.org/
-S: Maintained
-F: drivers/staging/lustre
-
STAGING - NVIDIA COMPLIANT EMBEDDED CONTROLLER INTERFACE (nvec)
M: Marc Dietrich <marvin24@gmx.de>
L: ac100@lists.launchpad.net (moderated for non-subscribers)
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index d5926f0d3f6c..1c357ef669ae 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -84,8 +84,6 @@ source "drivers/staging/netlogic/Kconfig"
source "drivers/staging/mt29f_spinand/Kconfig"
-source "drivers/staging/lustre/Kconfig"
-
source "drivers/staging/dgnc/Kconfig"
source "drivers/staging/gs_fpgaboot/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 919753c3d3f6..2edb9860931e 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -32,7 +32,6 @@ obj-$(CONFIG_STAGING_BOARD) += board/
obj-$(CONFIG_LTE_GDM724X) += gdm724x/
obj-$(CONFIG_FIREWIRE_SERIAL) += fwserial/
obj-$(CONFIG_GOLDFISH) += goldfish/
-obj-$(CONFIG_LNET) += lustre/
obj-$(CONFIG_DGNC) += dgnc/
obj-$(CONFIG_MTD_SPINAND_MT29F) += mt29f_spinand/
obj-$(CONFIG_GS_FPGABOOT) += gs_fpgaboot/
diff --git a/drivers/staging/lustre/Kconfig b/drivers/staging/lustre/Kconfig
deleted file mode 100644
index b7d81096eee9..000000000000
--- a/drivers/staging/lustre/Kconfig
+++ /dev/null
@@ -1,3 +0,0 @@
-source "drivers/staging/lustre/lnet/Kconfig"
-
-source "drivers/staging/lustre/lustre/Kconfig"
diff --git a/drivers/staging/lustre/Makefile b/drivers/staging/lustre/Makefile
deleted file mode 100644
index 95ffe337a80a..000000000000
--- a/drivers/staging/lustre/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-obj-$(CONFIG_LNET) += lnet/
-obj-$(CONFIG_LUSTRE_FS) += lustre/
diff --git a/drivers/staging/lustre/README.txt b/drivers/staging/lustre/README.txt
deleted file mode 100644
index 783959240490..000000000000
--- a/drivers/staging/lustre/README.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-Lustre Parallel Filesystem Client
-=================================
-
-The Lustre file system is an open-source, parallel file system
-that supports many requirements of leadership class HPC simulation
-environments.
-Born from a research project at Carnegie Mellon University,
-the Lustre file system is a widely-used option in HPC.
-The Lustre file system provides a POSIX compliant file system interface,
-can scale to thousands of clients, petabytes of storage and
-hundreds of gigabytes per second of I/O bandwidth.
-
-Unlike shared disk storage cluster filesystems (e.g. OCFS2, GFS, GPFS),
-Lustre has independent Metadata and Data servers that clients can access
-in parallel to maximize performance.
-
-In order to use Lustre client you will need to download the "lustre-client"
-package that contains the userspace tools from http://lustre.org/download/
-
-You will need to install and configure your Lustre servers separately.
-
-Mount Syntax
-============
-After you installed the lustre-client tools including mount.lustre binary
-you can mount your Lustre filesystem with:
-
-mount -t lustre mgs:/fsname mnt
-
-where mgs is the host name or ip address of your Lustre MGS(management service)
-fsname is the name of the filesystem you would like to mount.
-
-
-Mount Options
-=============
-
- noflock
- Disable posix file locking (Applications trying to use
- the functionality will get ENOSYS)
-
- localflock
- Enable local flock support, using only client-local flock
- (faster, for applications that require flock but do not run
- on multiple nodes).
-
- flock
- Enable cluster-global posix file locking coherent across all
- client nodes.
-
- user_xattr, nouser_xattr
- Support "user." extended attributes (or not)
-
- user_fid2path, nouser_fid2path
- Enable FID to path translation by regular users (or not)
-
- checksum, nochecksum
- Verify data consistency on the wire and in memory as it passes
- between the layers (or not).
-
- lruresize, nolruresize
- Allow lock LRU to be controlled by memory pressure on the server
- (or only 100 (default, controlled by lru_size proc parameter) locks
- per CPU per server on this client).
-
- lazystatfs, nolazystatfs
- Do not block in statfs() if some of the servers are down.
-
- 32bitapi
- Shrink inode numbers to fit into 32 bits. This is necessary
- if you plan to reexport Lustre filesystem from this client via
- NFSv4.
-
- verbose, noverbose
- Enable mount/umount console messages (or not)
-
-More Information
-================
-You can get more information at the Lustre website: http://wiki.lustre.org/
-
-Source for the userspace tools and out-of-tree client and server code
-is available at: http://git.hpdd.intel.com/fs/lustre-release.git
-
-Latest binary packages:
-http://lustre.org/download/
diff --git a/drivers/staging/lustre/TODO b/drivers/staging/lustre/TODO
deleted file mode 100644
index 5332cdb19bfa..000000000000
--- a/drivers/staging/lustre/TODO
+++ /dev/null
@@ -1,302 +0,0 @@
-Currently all the work directed toward the lustre upstream client is tracked
-at the following link:
-
-https://jira.hpdd.intel.com/browse/LU-9679
-
-Under this ticket you will see the following work items that need to be
-addressed:
-
-******************************************************************************
-* libcfs cleanup
-*
-* https://jira.hpdd.intel.com/browse/LU-9859
-*
-* Track all the cleanups and simplification of the libcfs module. Remove
-* functions the kernel provides. Possibly integrate some of the functionality
-* into the kernel proper.
-*
-******************************************************************************
-
-https://jira.hpdd.intel.com/browse/LU-100086
-
-LNET_MINOR conflicts with USERIO_MINOR
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-8130
-
-Fix and simplify libcfs hash handling
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-8703
-
-The current way we handle SMP is wrong. Platforms like ARM and KNL can have
-core and NUMA setups with things like NUMA nodes with no cores. We need to
-handle such cases. This work also greatly simplified the lustre SMP code.
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9019
-
-Replace libcfs time API with standard kernel APIs. Also migrate away from
-jiffies. We found jiffies can vary on nodes which can lead to corner cases
-that can break the file system due to nodes having inconsistent behavior.
-So move to time64_t and ktime_t as much as possible.
-
-******************************************************************************
-* Proper IB support for ko2iblnd
-******************************************************************************
-https://jira.hpdd.intel.com/browse/LU-9179
-
-Poor performance for the ko2iblnd driver. This is related to many of the
-patches below that are missing from the linux client.
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9886
-
-Crash in upstream kiblnd_handle_early_rxs()
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-10394 / LU-10526 / LU-10089
-
-Default to default to using MEM_REG
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-10459
-
-throttle tx based on queue depth
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9943
-
-correct WR fast reg accounting
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-10291
-
-remove concurrent_sends tunable
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-10213
-
-calculate qp max_send_wrs properly
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9810
-
-use less CQ entries for each connection
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-10129 / LU-9180
-
-rework map_on_demand behavior
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-10129
-
-query device capabilities
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-10015
-
-fix race at kiblnd_connect_peer
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9983
-
-allow for discontiguous fragments
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9500
-
-Don't Page Align remote_addr with FastReg
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9448
-
-handle empty CPTs
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9507
-
-Don't Assert On Reconnect with MultiQP
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9472
-
-Fix FastReg map/unmap for MLX5
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9425
-
-Turn on 2 sges by default
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-8943
-
-Enable Multiple OPA Endpoints between Nodes
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-5718
-
-multiple sges for work request
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9094
-
-kill timedout txs from ibp_tx_queue
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9094
-
-reconnect peer for REJ_INVALID_SERVICE_ID
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-8752
-
-Stop MLX5 triggering a dump_cqe
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-8874
-
-Move ko2iblnd to latest RDMA changes
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-8875 / LU-8874
-
-Change to new RDMA done callback mechanism
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9164 / LU-8874
-
-Incorporate RDMA map/unamp API's into ko2iblnd
-
-******************************************************************************
-* sysfs/debugfs fixes
-*
-* https://jira.hpdd.intel.com/browse/LU-8066
-*
-* The original migration to sysfs was done in haste without properly working
-* utilities to test the changes. This covers the work to restore the proper
-* behavior. Huge project to make this right.
-*
-******************************************************************************
-
-https://jira.hpdd.intel.com/browse/LU-9431
-
-The function class_process_proc_param was used for our mass updates of proc
-tunables. It didn't work with sysfs and it was just ugly so it was removed.
-In the process the ability to mass update thousands of clients was lost. This
-work restores this in a sane way.
-
-------------------------------------------------------------------------------
-https://jira.hpdd.intel.com/browse/LU-9091
-
-One the major request of users is the ability to pass in parameters into a
-sysfs file in various different units. For example we can set max_pages_per_rpc
-but this can vary on platforms due to different platform sizes. So you can
-set this like max_pages_per_rpc=16MiB. The original code to handle this written
-before the string helpers were created so the code doesn't follow that format
-but it would be easy to move to. Currently the string helpers does the reverse
-of what we need, changing bytes to string. We need to change a string to bytes.
-
-******************************************************************************
-* Proper user land to kernel space interface for Lustre
-*
-* https://jira.hpdd.intel.com/browse/LU-9680
-*
-******************************************************************************
-
-https://jira.hpdd.intel.com/browse/LU-8915
-
-Don't use linux list structure as user land arguments for lnet selftest.
-This code is pretty poor quality and really needs to be reworked.
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-8834
-
-The lustre ioctl LL_IOC_FUTIMES_3 is very generic. Need to either work with
-other file systems with similar functionality and make a common syscall
-interface or rework our server code to automagically do it for us.
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-6202
-
-Cleanup up ioctl handling. We have many obsolete ioctls. Also the way we do
-ioctls can be changed over to netlink. This also has the benefit of working
-better with HPC systems that do IO forwarding. Such systems don't like ioctls
-very well.
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9667
-
-More cleanups by making our utilities use sysfs instead of ioctls for LNet.
-Also it has been requested to move the remaining ioctls to the netlink API.
-
-******************************************************************************
-* Misc
-******************************************************************************
-
-------------------------------------------------------------------------------
-https://jira.hpdd.intel.com/browse/LU-9855
-
-Clean up obdclass preprocessor code. One of the major eye sores is the various
-pointer redirections and macros used by the obdclass. This makes the code very
-difficult to understand. It was requested by the Al Viro to clean this up before
-we leave staging.
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9633
-
-Migrate to sphinx kernel-doc style comments. Add documents in Documentation.
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-6142
-
-Possible remaining coding style fix. Remove deadcode. Enforce kernel code
-style. Other minor misc cleanups...
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-8837
-
-Separate client/server functionality. Functions only used by server can be
-removed from client. Most of this has been done but we need a inspect of the
-code to make sure.
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-8964
-
-Lustre client readahead/writeback control needs to better suit kernel providings.
-Currently its being explored. We could end up replacing the CLIO read ahead
-abstract with the kernel proper version.
-
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9862
-
-Patch that landed for LU-7890 leads to static checker errors
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-9868
-
-dcache/namei fixes for lustre
-------------------------------------------------------------------------------
-
-https://jira.hpdd.intel.com/browse/LU-10467
-
-use standard linux wait_events macros work by Neil Brown
-
-------------------------------------------------------------------------------
-
-Please send any patches to Greg Kroah-Hartman <greg@kroah.com>, Andreas Dilger
-<andreas.dilger@intel.com>, James Simmons <jsimmons@infradead.org> and
-Oleg Drokin <oleg.drokin@intel.com>.
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs.h b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
deleted file mode 100644
index edc7ed0dcb94..000000000000
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs.h
+++ /dev/null
@@ -1,76 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef __LIBCFS_LIBCFS_H__
-#define __LIBCFS_LIBCFS_H__
-
-#include <linux/notifier.h>
-#include <linux/workqueue.h>
-#include <linux/sysctl.h>
-
-#include <linux/libcfs/libcfs_debug.h>
-#include <linux/libcfs/libcfs_private.h>
-#include <linux/libcfs/libcfs_fail.h>
-
-#define LIBCFS_VERSION "0.7.0"
-
-extern struct blocking_notifier_head libcfs_ioctl_list;
-static inline int notifier_from_ioctl_errno(int err)
-{
- if (err == -EINVAL)
- return NOTIFY_OK;
- return notifier_from_errno(err) | NOTIFY_STOP_MASK;
-}
-
-int libcfs_setup(void);
-
-extern struct workqueue_struct *cfs_rehash_wq;
-
-void lustre_insert_debugfs(struct ctl_table *table);
-int lprocfs_call_handler(void *data, int write, loff_t *ppos,
- void __user *buffer, size_t *lenp,
- int (*handler)(void *data, int write, loff_t pos,
- void __user *buffer, int len));
-
-/*
- * Memory
- */
-#if BITS_PER_LONG == 32
-/* limit to lowmem on 32-bit systems */
-#define NUM_CACHEPAGES \
- min(totalram_pages, 1UL << (30 - PAGE_SHIFT) * 3 / 4)
-#else
-#define NUM_CACHEPAGES totalram_pages
-#endif
-
-#endif /* __LIBCFS_LIBCFS_H__ */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
deleted file mode 100644
index 61641c41c492..000000000000
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
+++ /dev/null
@@ -1,434 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2012, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/include/libcfs/libcfs_cpu.h
- *
- * CPU partition
- * . CPU partition is virtual processing unit
- *
- * . CPU partition can present 1-N cores, or 1-N NUMA nodes,
- * in other words, CPU partition is a processors pool.
- *
- * CPU Partition Table (CPT)
- * . a set of CPU partitions
- *
- * . There are two modes for CPT: CFS_CPU_MODE_NUMA and CFS_CPU_MODE_SMP
- *
- * . User can specify total number of CPU partitions while creating a
- * CPT, ID of CPU partition is always start from 0.
- *
- * Example: if there are 8 cores on the system, while creating a CPT
- * with cpu_npartitions=4:
- * core[0, 1] = partition[0], core[2, 3] = partition[1]
- * core[4, 5] = partition[2], core[6, 7] = partition[3]
- *
- * cpu_npartitions=1:
- * core[0, 1, ... 7] = partition[0]
- *
- * . User can also specify CPU partitions by string pattern
- *
- * Examples: cpu_partitions="0[0,1], 1[2,3]"
- * cpu_partitions="N 0[0-3], 1[4-8]"
- *
- * The first character "N" means following numbers are numa ID
- *
- * . NUMA allocators, CPU affinity threads are built over CPU partitions,
- * instead of HW CPUs or HW nodes.
- *
- * . By default, Lustre modules should refer to the global cfs_cpt_tab,
- * instead of accessing HW CPUs directly, so concurrency of Lustre can be
- * configured by cpu_npartitions of the global cfs_cpt_tab
- *
- * . If cpu_npartitions=1(all CPUs in one pool), lustre should work the
- * same way as 2.2 or earlier versions
- *
- * Author: liang@whamcloud.com
- */
-
-#ifndef __LIBCFS_CPU_H__
-#define __LIBCFS_CPU_H__
-
-#include <linux/cpu.h>
-#include <linux/cpuset.h>
-#include <linux/topology.h>
-
-/* any CPU partition */
-#define CFS_CPT_ANY (-1)
-
-#ifdef CONFIG_SMP
-/** virtual processing unit */
-struct cfs_cpu_partition {
- /* CPUs mask for this partition */
- cpumask_var_t cpt_cpumask;
- /* nodes mask for this partition */
- nodemask_t *cpt_nodemask;
- /* spread rotor for NUMA allocator */
- unsigned int cpt_spread_rotor;
-};
-
-
-/** descriptor for CPU partitions */
-struct cfs_cpt_table {
- /* version, reserved for hotplug */
- unsigned int ctb_version;
- /* spread rotor for NUMA allocator */
- unsigned int ctb_spread_rotor;
- /* # of CPU partitions */
- unsigned int ctb_nparts;
- /* partitions tables */
- struct cfs_cpu_partition *ctb_parts;
- /* shadow HW CPU to CPU partition ID */
- int *ctb_cpu2cpt;
- /* all cpus in this partition table */
- cpumask_var_t ctb_cpumask;
- /* all nodes in this partition table */
- nodemask_t *ctb_nodemask;
-};
-
-extern struct cfs_cpt_table *cfs_cpt_tab;
-
-/**
- * return cpumask of CPU partition \a cpt
- */
-cpumask_var_t *cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt);
-/**
- * print string information of cpt-table
- */
-int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len);
-/**
- * return total number of CPU partitions in \a cptab
- */
-int
-cfs_cpt_number(struct cfs_cpt_table *cptab);
-/**
- * return number of HW cores or hyper-threadings in a CPU partition \a cpt
- */
-int cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt);
-/**
- * is there any online CPU in CPU partition \a cpt
- */
-int cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt);
-/**
- * return nodemask of CPU partition \a cpt
- */
-nodemask_t *cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt);
-/**
- * shadow current HW processor ID to CPU-partition ID of \a cptab
- */
-int cfs_cpt_current(struct cfs_cpt_table *cptab, int remap);
-/**
- * shadow HW processor ID \a CPU to CPU-partition ID by \a cptab
- */
-int cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu);
-/**
- * bind current thread on a CPU-partition \a cpt of \a cptab
- */
-int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt);
-/**
- * add \a cpu to CPU partition @cpt of \a cptab, return 1 for success,
- * otherwise 0 is returned
- */
-int cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu);
-/**
- * remove \a cpu from CPU partition \a cpt of \a cptab
- */
-void cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu);
-/**
- * add all cpus in \a mask to CPU partition \a cpt
- * return 1 if successfully set all CPUs, otherwise return 0
- */
-int cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab,
- int cpt, cpumask_t *mask);
-/**
- * remove all cpus in \a mask from CPU partition \a cpt
- */
-void cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab,
- int cpt, cpumask_t *mask);
-/**
- * add all cpus in NUMA node \a node to CPU partition \a cpt
- * return 1 if successfully set all CPUs, otherwise return 0
- */
-int cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node);
-/**
- * remove all cpus in NUMA node \a node from CPU partition \a cpt
- */
-void cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node);
-
-/**
- * add all cpus in node mask \a mask to CPU partition \a cpt
- * return 1 if successfully set all CPUs, otherwise return 0
- */
-int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab,
- int cpt, nodemask_t *mask);
-/**
- * remove all cpus in node mask \a mask from CPU partition \a cpt
- */
-void cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab,
- int cpt, nodemask_t *mask);
-/**
- * unset all cpus for CPU partition \a cpt
- */
-void cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt);
-/**
- * convert partition id \a cpt to numa node id, if there are more than one
- * nodes in this partition, it might return a different node id each time.
- */
-int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt);
-
-/**
- * return number of HTs in the same core of \a cpu
- */
-int cfs_cpu_ht_nsiblings(int cpu);
-
-int cfs_cpu_init(void);
-void cfs_cpu_fini(void);
-
-#else /* !CONFIG_SMP */
-struct cfs_cpt_table;
-#define cfs_cpt_tab ((struct cfs_cpt_table *)NULL)
-
-static inline cpumask_var_t *
-cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
-{
- return NULL;
-}
-
-static inline int
-cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
-{
- return 0;
-}
-static inline int
-cfs_cpt_number(struct cfs_cpt_table *cptab)
-{
- return 1;
-}
-
-static inline int
-cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
-{
- return 1;
-}
-
-static inline int
-cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
-{
- return 1;
-}
-
-static inline nodemask_t *
-cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
-{
- return NULL;
-}
-
-static inline int
-cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
-{
- return 1;
-}
-
-static inline void
-cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
-{
-}
-
-static inline int
-cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
-{
- return 1;
-}
-
-static inline void
-cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
-{
-}
-
-static inline int
-cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
-{
- return 1;
-}
-
-static inline void
-cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
-{
-}
-
-static inline int
-cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
-{
- return 1;
-}
-
-static inline void
-cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
-{
-}
-
-static inline void
-cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
-{
-}
-
-static inline int
-cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
-{
- return 0;
-}
-
-static inline int
-cfs_cpu_ht_nsiblings(int cpu)
-{
- return 1;
-}
-
-static inline int
-cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
-{
- return 0;
-}
-
-static inline int
-cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
-{
- return 0;
-}
-
-static inline int
-cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
-{
- return 0;
-}
-
-static inline int
-cfs_cpu_init(void)
-{
- return 0;
-}
-
-static inline void cfs_cpu_fini(void)
-{
-}
-
-#endif /* CONFIG_SMP */
-
-/**
- * destroy a CPU partition table
- */
-void cfs_cpt_table_free(struct cfs_cpt_table *cptab);
-/**
- * create a cfs_cpt_table with \a ncpt number of partitions
- */
-struct cfs_cpt_table *cfs_cpt_table_alloc(unsigned int ncpt);
-
-/*
- * allocate per-cpu-partition data, returned value is an array of pointers,
- * variable can be indexed by CPU ID.
- * cptab != NULL: size of array is number of CPU partitions
- * cptab == NULL: size of array is number of HW cores
- */
-void *cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size);
-/*
- * destroy per-cpu-partition variable
- */
-void cfs_percpt_free(void *vars);
-int cfs_percpt_number(void *vars);
-
-#define cfs_percpt_for_each(var, i, vars) \
- for (i = 0; i < cfs_percpt_number(vars) && \
- ((var) = (vars)[i]) != NULL; i++)
-
-/*
- * percpu partition lock
- *
- * There are some use-cases like this in Lustre:
- * . each CPU partition has it's own private data which is frequently changed,
- * and mostly by the local CPU partition.
- * . all CPU partitions share some global data, these data are rarely changed.
- *
- * LNet is typical example.
- * CPU partition lock is designed for this kind of use-cases:
- * . each CPU partition has it's own private lock
- * . change on private data just needs to take the private lock
- * . read on shared data just needs to take _any_ of private locks
- * . change on shared data needs to take _all_ private locks,
- * which is slow and should be really rare.
- */
-enum {
- CFS_PERCPT_LOCK_EX = -1, /* negative */
-};
-
-struct cfs_percpt_lock {
- /* cpu-partition-table for this lock */
- struct cfs_cpt_table *pcl_cptab;
- /* exclusively locked */
- unsigned int pcl_locked;
- /* private lock table */
- spinlock_t **pcl_locks;
-};
-
-/* return number of private locks */
-#define cfs_percpt_lock_num(pcl) cfs_cpt_number(pcl->pcl_cptab)
-
-/*
- * create a cpu-partition lock based on CPU partition table \a cptab,
- * each private lock has extra \a psize bytes padding data
- */
-struct cfs_percpt_lock *cfs_percpt_lock_create(struct cfs_cpt_table *cptab,
- struct lock_class_key *keys);
-/* destroy a cpu-partition lock */
-void cfs_percpt_lock_free(struct cfs_percpt_lock *pcl);
-
-/* lock private lock \a index of \a pcl */
-void cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index);
-
-/* unlock private lock \a index of \a pcl */
-void cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index);
-
-#define CFS_PERCPT_LOCK_KEYS 256
-
-/* NB: don't allocate keys dynamically, lockdep needs them to be in ".data" */
-#define cfs_percpt_lock_alloc(cptab) \
-({ \
- static struct lock_class_key ___keys[CFS_PERCPT_LOCK_KEYS]; \
- struct cfs_percpt_lock *___lk; \
- \
- if (cfs_cpt_number(cptab) > CFS_PERCPT_LOCK_KEYS) \
- ___lk = cfs_percpt_lock_create(cptab, NULL); \
- else \
- ___lk = cfs_percpt_lock_create(cptab, ___keys); \
- ___lk; \
-})
-
-/**
- * iterate over all CPU partitions in \a cptab
- */
-#define cfs_cpt_for_each(i, cptab) \
- for (i = 0; i < cfs_cpt_number(cptab); i++)
-
-#endif /* __LIBCFS_CPU_H__ */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h
deleted file mode 100644
index 176fae7319e3..000000000000
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h
+++ /dev/null
@@ -1,208 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/*
- * Copyright 2012 Xyratex Technology Limited
- */
-
-#ifndef _LIBCFS_CRYPTO_H
-#define _LIBCFS_CRYPTO_H
-
-#include <linux/string.h>
-struct page;
-
-struct cfs_crypto_hash_type {
- char *cht_name; /*< hash algorithm name, equal to
- * format name for crypto api
- */
- unsigned int cht_key; /*< init key by default (valid for
- * 4 bytes context like crc32, adler
- */
- unsigned int cht_size; /**< hash digest size */
-};
-
-enum cfs_crypto_hash_alg {
- CFS_HASH_ALG_NULL = 0,
- CFS_HASH_ALG_ADLER32,
- CFS_HASH_ALG_CRC32,
- CFS_HASH_ALG_MD5,
- CFS_HASH_ALG_SHA1,
- CFS_HASH_ALG_SHA256,
- CFS_HASH_ALG_SHA384,
- CFS_HASH_ALG_SHA512,
- CFS_HASH_ALG_CRC32C,
- CFS_HASH_ALG_MAX,
- CFS_HASH_ALG_UNKNOWN = 0xff
-};
-
-static struct cfs_crypto_hash_type hash_types[] = {
- [CFS_HASH_ALG_NULL] = {
- .cht_name = "null",
- .cht_key = 0,
- .cht_size = 0
- },
- [CFS_HASH_ALG_ADLER32] = {
- .cht_name = "adler32",
- .cht_key = 1,
- .cht_size = 4
- },
- [CFS_HASH_ALG_CRC32] = {
- .cht_name = "crc32",
- .cht_key = ~0,
- .cht_size = 4
- },
- [CFS_HASH_ALG_CRC32C] = {
- .cht_name = "crc32c",
- .cht_key = ~0,
- .cht_size = 4
- },
- [CFS_HASH_ALG_MD5] = {
- .cht_name = "md5",
- .cht_key = 0,
- .cht_size = 16
- },
- [CFS_HASH_ALG_SHA1] = {
- .cht_name = "sha1",
- .cht_key = 0,
- .cht_size = 20
- },
- [CFS_HASH_ALG_SHA256] = {
- .cht_name = "sha256",
- .cht_key = 0,
- .cht_size = 32
- },
- [CFS_HASH_ALG_SHA384] = {
- .cht_name = "sha384",
- .cht_key = 0,
- .cht_size = 48
- },
- [CFS_HASH_ALG_SHA512] = {
- .cht_name = "sha512",
- .cht_key = 0,
- .cht_size = 64
- },
- [CFS_HASH_ALG_MAX] = {
- .cht_name = NULL,
- .cht_key = 0,
- .cht_size = 64
- },
-};
-
-/* Maximum size of hash_types[].cht_size */
-#define CFS_CRYPTO_HASH_DIGESTSIZE_MAX 64
-
-/**
- * Return hash algorithm information for the specified algorithm identifier
- *
- * Hash information includes algorithm name, initial seed, hash size.
- *
- * \retval cfs_crypto_hash_type for valid ID (CFS_HASH_ALG_*)
- * \retval NULL for unknown algorithm identifier
- */
-static inline const struct cfs_crypto_hash_type *
-cfs_crypto_hash_type(enum cfs_crypto_hash_alg hash_alg)
-{
- struct cfs_crypto_hash_type *ht;
-
- if (hash_alg < CFS_HASH_ALG_MAX) {
- ht = &hash_types[hash_alg];
- if (ht->cht_name)
- return ht;
- }
- return NULL;
-}
-
-/**
- * Return hash name for hash algorithm identifier
- *
- * \param[in] hash_alg hash alrgorithm id (CFS_HASH_ALG_*)
- *
- * \retval string name of known hash algorithm
- * \retval "unknown" if hash algorithm is unknown
- */
-static inline const char *
-cfs_crypto_hash_name(enum cfs_crypto_hash_alg hash_alg)
-{
- const struct cfs_crypto_hash_type *ht;
-
- ht = cfs_crypto_hash_type(hash_alg);
- if (ht)
- return ht->cht_name;
- return "unknown";
-}
-
-/**
- * Return digest size for hash algorithm type
- *
- * \param[in] hash_alg hash alrgorithm id (CFS_HASH_ALG_*)
- *
- * \retval hash algorithm digest size in bytes
- * \retval 0 if hash algorithm type is unknown
- */
-static inline int cfs_crypto_hash_digestsize(enum cfs_crypto_hash_alg hash_alg)
-{
- const struct cfs_crypto_hash_type *ht;
-
- ht = cfs_crypto_hash_type(hash_alg);
- if (ht)
- return ht->cht_size;
- return 0;
-}
-
-/**
- * Find hash algorithm ID for the specified algorithm name
- *
- * \retval hash algorithm ID for valid ID (CFS_HASH_ALG_*)
- * \retval CFS_HASH_ALG_UNKNOWN for unknown algorithm name
- */
-static inline unsigned char cfs_crypto_hash_alg(const char *algname)
-{
- enum cfs_crypto_hash_alg hash_alg;
-
- for (hash_alg = 0; hash_alg < CFS_HASH_ALG_MAX; hash_alg++)
- if (!strcmp(hash_types[hash_alg].cht_name, algname))
- return hash_alg;
-
- return CFS_HASH_ALG_UNKNOWN;
-}
-
-int cfs_crypto_hash_digest(enum cfs_crypto_hash_alg hash_alg,
- const void *buf, unsigned int buf_len,
- unsigned char *key, unsigned int key_len,
- unsigned char *hash, unsigned int *hash_len);
-
-struct ahash_request *
-cfs_crypto_hash_init(enum cfs_crypto_hash_alg hash_alg,
- unsigned char *key, unsigned int key_len);
-int cfs_crypto_hash_update_page(struct ahash_request *desc,
- struct page *page, unsigned int offset,
- unsigned int len);
-int cfs_crypto_hash_update(struct ahash_request *desc, const void *buf,
- unsigned int buf_len);
-int cfs_crypto_hash_final(struct ahash_request *desc,
- unsigned char *hash, unsigned int *hash_len);
-int cfs_crypto_register(void);
-void cfs_crypto_unregister(void);
-int cfs_crypto_hash_speed(enum cfs_crypto_hash_alg hash_alg);
-#endif
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h
deleted file mode 100644
index 17534a76362a..000000000000
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h
+++ /dev/null
@@ -1,207 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/include/libcfs/libcfs_debug.h
- *
- * Debug messages and assertions
- *
- */
-
-#ifndef __LIBCFS_DEBUG_H__
-#define __LIBCFS_DEBUG_H__
-
-#include <linux/limits.h>
-#include <uapi/linux/lnet/libcfs_debug.h>
-
-/*
- * Debugging
- */
-extern unsigned int libcfs_subsystem_debug;
-extern unsigned int libcfs_stack;
-extern unsigned int libcfs_debug;
-extern unsigned int libcfs_printk;
-extern unsigned int libcfs_console_ratelimit;
-extern unsigned int libcfs_console_max_delay;
-extern unsigned int libcfs_console_min_delay;
-extern unsigned int libcfs_console_backoff;
-extern unsigned int libcfs_debug_binary;
-extern char libcfs_debug_file_path_arr[PATH_MAX];
-
-int libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys);
-int libcfs_debug_str2mask(int *mask, const char *str, int is_subsys);
-
-/* Has there been an LBUG? */
-extern unsigned int libcfs_catastrophe;
-extern unsigned int libcfs_panic_on_lbug;
-
-/* Enable debug-checks on stack size - except on x86_64 */
-#if !defined(__x86_64__)
-# ifdef __ia64__
-# define CDEBUG_STACK() (THREAD_SIZE - \
- ((unsigned long)__builtin_dwarf_cfa() & \
- (THREAD_SIZE - 1)))
-# else
-# define CDEBUG_STACK() (THREAD_SIZE - \
- ((unsigned long)__builtin_frame_address(0) & \
- (THREAD_SIZE - 1)))
-# endif /* __ia64__ */
-
-#define __CHECK_STACK(msgdata, mask, cdls) \
-do { \
- if (unlikely(CDEBUG_STACK() > libcfs_stack)) { \
- LIBCFS_DEBUG_MSG_DATA_INIT(msgdata, D_WARNING, NULL); \
- libcfs_stack = CDEBUG_STACK(); \
- libcfs_debug_msg(msgdata, \
- "maximum lustre stack %lu\n", \
- CDEBUG_STACK()); \
- (msgdata)->msg_mask = mask; \
- (msgdata)->msg_cdls = cdls; \
- dump_stack(); \
- /*panic("LBUG");*/ \
- } \
-} while (0)
-#define CFS_CHECK_STACK(msgdata, mask, cdls) __CHECK_STACK(msgdata, mask, cdls)
-#else /* __x86_64__ */
-#define CFS_CHECK_STACK(msgdata, mask, cdls) do {} while (0)
-#define CDEBUG_STACK() (0L)
-#endif /* __x86_64__ */
-
-#ifndef DEBUG_SUBSYSTEM
-# define DEBUG_SUBSYSTEM S_UNDEFINED
-#endif
-
-#define CDEBUG_DEFAULT_MAX_DELAY (600 * HZ) /* jiffies */
-#define CDEBUG_DEFAULT_MIN_DELAY ((HZ + 1) / 2) /* jiffies */
-#define CDEBUG_DEFAULT_BACKOFF 2
-struct cfs_debug_limit_state {
- unsigned long cdls_next;
- unsigned int cdls_delay;
- int cdls_count;
-};
-
-struct libcfs_debug_msg_data {
- const char *msg_file;
- const char *msg_fn;
- int msg_subsys;
- int msg_line;
- int msg_mask;
- struct cfs_debug_limit_state *msg_cdls;
-};
-
-#define LIBCFS_DEBUG_MSG_DATA_INIT(data, mask, cdls) \
-do { \
- (data)->msg_subsys = DEBUG_SUBSYSTEM; \
- (data)->msg_file = __FILE__; \
- (data)->msg_fn = __func__; \
- (data)->msg_line = __LINE__; \
- (data)->msg_cdls = (cdls); \
- (data)->msg_mask = (mask); \
-} while (0)
-
-#define LIBCFS_DEBUG_MSG_DATA_DECL(dataname, mask, cdls) \
- static struct libcfs_debug_msg_data dataname = { \
- .msg_subsys = DEBUG_SUBSYSTEM, \
- .msg_file = __FILE__, \
- .msg_fn = __func__, \
- .msg_line = __LINE__, \
- .msg_cdls = (cdls) }; \
- dataname.msg_mask = (mask)
-
-/**
- * Filters out logging messages based on mask and subsystem.
- */
-static inline int cfs_cdebug_show(unsigned int mask, unsigned int subsystem)
-{
- return mask & D_CANTMASK ||
- ((libcfs_debug & mask) && (libcfs_subsystem_debug & subsystem));
-}
-
-#define __CDEBUG(cdls, mask, format, ...) \
-do { \
- static struct libcfs_debug_msg_data msgdata; \
- \
- CFS_CHECK_STACK(&msgdata, mask, cdls); \
- \
- if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
- LIBCFS_DEBUG_MSG_DATA_INIT(&msgdata, mask, cdls); \
- libcfs_debug_msg(&msgdata, format, ## __VA_ARGS__); \
- } \
-} while (0)
-
-#define CDEBUG(mask, format, ...) __CDEBUG(NULL, mask, format, ## __VA_ARGS__)
-
-#define CDEBUG_LIMIT(mask, format, ...) \
-do { \
- static struct cfs_debug_limit_state cdls; \
- \
- __CDEBUG(&cdls, mask, format, ## __VA_ARGS__); \
-} while (0)
-
-/*
- * Lustre Error Checksum: calculates checksum
- * of Hex number by XORing the nybbles.
- */
-#define LERRCHKSUM(hexnum) (((hexnum) & 0xf) ^ ((hexnum) >> 4 & 0xf) ^ \
- ((hexnum) >> 8 & 0xf))
-
-#define CWARN(format, ...) CDEBUG_LIMIT(D_WARNING, format, ## __VA_ARGS__)
-#define CERROR(format, ...) CDEBUG_LIMIT(D_ERROR, format, ## __VA_ARGS__)
-#define CNETERR(format, a...) CDEBUG_LIMIT(D_NETERROR, format, ## a)
-#define CEMERG(format, ...) CDEBUG_LIMIT(D_EMERG, format, ## __VA_ARGS__)
-
-#define LCONSOLE(mask, format, ...) CDEBUG(D_CONSOLE | (mask), format, ## __VA_ARGS__)
-#define LCONSOLE_INFO(format, ...) CDEBUG_LIMIT(D_CONSOLE, format, ## __VA_ARGS__)
-#define LCONSOLE_WARN(format, ...) CDEBUG_LIMIT(D_CONSOLE | D_WARNING, format, ## __VA_ARGS__)
-#define LCONSOLE_ERROR_MSG(errnum, format, ...) CDEBUG_LIMIT(D_CONSOLE | D_ERROR, \
- "%x-%x: " format, errnum, LERRCHKSUM(errnum), ## __VA_ARGS__)
-#define LCONSOLE_ERROR(format, ...) LCONSOLE_ERROR_MSG(0x00, format, ## __VA_ARGS__)
-
-#define LCONSOLE_EMERG(format, ...) CDEBUG(D_CONSOLE | D_EMERG, format, ## __VA_ARGS__)
-
-int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
- const char *format1, ...)
- __printf(2, 3);
-
-int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
- const char *format1,
- va_list args, const char *format2, ...)
- __printf(4, 5);
-
-/* other external symbols that tracefile provides: */
-int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
- const char __user *usr_buffer, int usr_buffer_nob);
-int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
- const char *knl_buffer, char *append);
-
-#define LIBCFS_DEBUG_FILE_PATH_DEFAULT "/tmp/lustre-log"
-
-#endif /* __LIBCFS_DEBUG_H__ */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
deleted file mode 100644
index 8074e390b4d1..000000000000
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
+++ /dev/null
@@ -1,194 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Oracle Corporation, Inc.
- */
-
-#ifndef _LIBCFS_FAIL_H
-#define _LIBCFS_FAIL_H
-
-#include <linux/sched.h>
-#include <linux/wait.h>
-
-extern unsigned long cfs_fail_loc;
-extern unsigned int cfs_fail_val;
-extern int cfs_fail_err;
-
-extern wait_queue_head_t cfs_race_waitq;
-extern int cfs_race_state;
-
-int __cfs_fail_check_set(u32 id, u32 value, int set);
-int __cfs_fail_timeout_set(u32 id, u32 value, int ms, int set);
-
-enum {
- CFS_FAIL_LOC_NOSET = 0,
- CFS_FAIL_LOC_ORSET = 1,
- CFS_FAIL_LOC_RESET = 2,
- CFS_FAIL_LOC_VALUE = 3
-};
-
-/* Failure injection control */
-#define CFS_FAIL_MASK_SYS 0x0000FF00
-#define CFS_FAIL_MASK_LOC (0x000000FF | CFS_FAIL_MASK_SYS)
-
-#define CFS_FAILED_BIT 30
-/* CFS_FAILED is 0x40000000 */
-#define CFS_FAILED BIT(CFS_FAILED_BIT)
-
-#define CFS_FAIL_ONCE_BIT 31
-/* CFS_FAIL_ONCE is 0x80000000 */
-#define CFS_FAIL_ONCE BIT(CFS_FAIL_ONCE_BIT)
-
-/* The following flags aren't made to be combined */
-#define CFS_FAIL_SKIP 0x20000000 /* skip N times then fail */
-#define CFS_FAIL_SOME 0x10000000 /* only fail N times */
-#define CFS_FAIL_RAND 0x08000000 /* fail 1/N of the times */
-#define CFS_FAIL_USR1 0x04000000 /* user flag */
-
-#define CFS_FAULT 0x02000000 /* match any CFS_FAULT_CHECK */
-
-static inline bool CFS_FAIL_PRECHECK(u32 id)
-{
- return cfs_fail_loc &&
- ((cfs_fail_loc & CFS_FAIL_MASK_LOC) == (id & CFS_FAIL_MASK_LOC) ||
- (cfs_fail_loc & id & CFS_FAULT));
-}
-
-static inline int cfs_fail_check_set(u32 id, u32 value,
- int set, int quiet)
-{
- int ret = 0;
-
- if (unlikely(CFS_FAIL_PRECHECK(id))) {
- ret = __cfs_fail_check_set(id, value, set);
- if (ret) {
- if (quiet) {
- CDEBUG(D_INFO, "*** cfs_fail_loc=%x, val=%u***\n",
- id, value);
- } else {
- LCONSOLE_INFO("*** cfs_fail_loc=%x, val=%u***\n",
- id, value);
- }
- }
- }
-
- return ret;
-}
-
-/* If id hit cfs_fail_loc, return 1, otherwise return 0 */
-#define CFS_FAIL_CHECK(id) \
- cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET, 0)
-#define CFS_FAIL_CHECK_QUIET(id) \
- cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET, 1)
-
-/*
- * If id hit cfs_fail_loc and cfs_fail_val == (-1 or value) return 1,
- * otherwise return 0
- */
-#define CFS_FAIL_CHECK_VALUE(id, value) \
- cfs_fail_check_set(id, value, CFS_FAIL_LOC_VALUE, 0)
-#define CFS_FAIL_CHECK_VALUE_QUIET(id, value) \
- cfs_fail_check_set(id, value, CFS_FAIL_LOC_VALUE, 1)
-
-/*
- * If id hit cfs_fail_loc, cfs_fail_loc |= value and return 1,
- * otherwise return 0
- */
-#define CFS_FAIL_CHECK_ORSET(id, value) \
- cfs_fail_check_set(id, value, CFS_FAIL_LOC_ORSET, 0)
-#define CFS_FAIL_CHECK_ORSET_QUIET(id, value) \
- cfs_fail_check_set(id, value, CFS_FAIL_LOC_ORSET, 1)
-
-/*
- * If id hit cfs_fail_loc, cfs_fail_loc = value and return 1,
- * otherwise return 0
- */
-#define CFS_FAIL_CHECK_RESET(id, value) \
- cfs_fail_check_set(id, value, CFS_FAIL_LOC_RESET, 0)
-#define CFS_FAIL_CHECK_RESET_QUIET(id, value) \
- cfs_fail_check_set(id, value, CFS_FAIL_LOC_RESET, 1)
-
-static inline int cfs_fail_timeout_set(u32 id, u32 value, int ms, int set)
-{
- if (unlikely(CFS_FAIL_PRECHECK(id)))
- return __cfs_fail_timeout_set(id, value, ms, set);
- return 0;
-}
-
-/* If id hit cfs_fail_loc, sleep for seconds or milliseconds */
-#define CFS_FAIL_TIMEOUT(id, secs) \
- cfs_fail_timeout_set(id, 0, (secs) * 1000, CFS_FAIL_LOC_NOSET)
-
-#define CFS_FAIL_TIMEOUT_MS(id, ms) \
- cfs_fail_timeout_set(id, 0, ms, CFS_FAIL_LOC_NOSET)
-
-/*
- * If id hit cfs_fail_loc, cfs_fail_loc |= value and
- * sleep seconds or milliseconds
- */
-#define CFS_FAIL_TIMEOUT_ORSET(id, value, secs) \
- cfs_fail_timeout_set(id, value, (secs) * 1000, CFS_FAIL_LOC_ORSET)
-
-#define CFS_FAIL_TIMEOUT_RESET(id, value, secs) \
- cfs_fail_timeout_set(id, value, (secs) * 1000, CFS_FAIL_LOC_RESET)
-
-#define CFS_FAIL_TIMEOUT_MS_ORSET(id, value, ms) \
- cfs_fail_timeout_set(id, value, ms, CFS_FAIL_LOC_ORSET)
-
-#define CFS_FAULT_CHECK(id) \
- CFS_FAIL_CHECK(CFS_FAULT | (id))
-
-/*
- * The idea here is to synchronise two threads to force a race. The
- * first thread that calls this with a matching fail_loc is put to
- * sleep. The next thread that calls with the same fail_loc wakes up
- * the first and continues.
- */
-static inline void cfs_race(u32 id)
-{
- if (CFS_FAIL_PRECHECK(id)) {
- if (unlikely(__cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET))) {
- int rc;
-
- cfs_race_state = 0;
- CERROR("cfs_race id %x sleeping\n", id);
- rc = wait_event_interruptible(cfs_race_waitq,
- !!cfs_race_state);
- CERROR("cfs_fail_race id %x awake, rc=%d\n", id, rc);
- } else {
- CERROR("cfs_fail_race id %x waking\n", id);
- cfs_race_state = 1;
- wake_up(&cfs_race_waitq);
- }
- }
-}
-
-#define CFS_RACE(id) cfs_race(id)
-
-#endif /* _LIBCFS_FAIL_H */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h
deleted file mode 100644
index be315958a4b3..000000000000
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h
+++ /dev/null
@@ -1,869 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/include/libcfs/libcfs_hash.h
- *
- * Hashing routines
- *
- */
-
-#ifndef __LIBCFS_HASH_H__
-#define __LIBCFS_HASH_H__
-
-#include <linux/hash.h>
-#include <linux/spinlock.h>
-#include <linux/workqueue.h>
-#include <linux/libcfs/libcfs.h>
-
-/*
- * Knuth recommends primes in approximately golden ratio to the maximum
- * integer representable by a machine word for multiplicative hashing.
- * Chuck Lever verified the effectiveness of this technique:
- * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf
- *
- * These primes are chosen to be bit-sparse, that is operations on
- * them can use shifts and additions instead of multiplications for
- * machines where multiplications are slow.
- */
-/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
-#define CFS_GOLDEN_RATIO_PRIME_32 0x9e370001UL
-/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
-#define CFS_GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001ULL
-
-/** disable debug */
-#define CFS_HASH_DEBUG_NONE 0
-/*
- * record hash depth and output to console when it's too deep,
- * computing overhead is low but consume more memory
- */
-#define CFS_HASH_DEBUG_1 1
-/** expensive, check key validation */
-#define CFS_HASH_DEBUG_2 2
-
-#define CFS_HASH_DEBUG_LEVEL CFS_HASH_DEBUG_NONE
-
-struct cfs_hash_ops;
-struct cfs_hash_lock_ops;
-struct cfs_hash_hlist_ops;
-
-union cfs_hash_lock {
- rwlock_t rw; /**< rwlock */
- spinlock_t spin; /**< spinlock */
-};
-
-/**
- * cfs_hash_bucket is a container of:
- * - lock, counter ...
- * - array of hash-head starting from hsb_head[0], hash-head can be one of
- * . struct cfs_hash_head
- * . struct cfs_hash_head_dep
- * . struct cfs_hash_dhead
- * . struct cfs_hash_dhead_dep
- * which depends on requirement of user
- * - some extra bytes (caller can require it while creating hash)
- */
-struct cfs_hash_bucket {
- union cfs_hash_lock hsb_lock; /**< bucket lock */
- u32 hsb_count; /**< current entries */
- u32 hsb_version; /**< change version */
- unsigned int hsb_index; /**< index of bucket */
- int hsb_depmax; /**< max depth on bucket */
- long hsb_head[0]; /**< hash-head array */
-};
-
-/**
- * cfs_hash bucket descriptor, it's normally in stack of caller
- */
-struct cfs_hash_bd {
- /* address of bucket */
- struct cfs_hash_bucket *bd_bucket;
- /* offset in bucket */
- unsigned int bd_offset;
-};
-
-#define CFS_HASH_NAME_LEN 16 /**< default name length */
-#define CFS_HASH_BIGNAME_LEN 64 /**< bigname for param tree */
-
-#define CFS_HASH_BKT_BITS 3 /**< default bits of bucket */
-#define CFS_HASH_BITS_MAX 30 /**< max bits of bucket */
-#define CFS_HASH_BITS_MIN CFS_HASH_BKT_BITS
-
-/**
- * common hash attributes.
- */
-enum cfs_hash_tag {
- /**
- * don't need any lock, caller will protect operations with it's
- * own lock. With this flag:
- * . CFS_HASH_NO_BKTLOCK, CFS_HASH_RW_BKTLOCK, CFS_HASH_SPIN_BKTLOCK
- * will be ignored.
- * . Some functions will be disabled with this flag, i.e:
- * cfs_hash_for_each_empty, cfs_hash_rehash
- */
- CFS_HASH_NO_LOCK = BIT(0),
- /** no bucket lock, use one spinlock to protect the whole hash */
- CFS_HASH_NO_BKTLOCK = BIT(1),
- /** rwlock to protect bucket */
- CFS_HASH_RW_BKTLOCK = BIT(2),
- /** spinlock to protect bucket */
- CFS_HASH_SPIN_BKTLOCK = BIT(3),
- /** always add new item to tail */
- CFS_HASH_ADD_TAIL = BIT(4),
- /** hash-table doesn't have refcount on item */
- CFS_HASH_NO_ITEMREF = BIT(5),
- /** big name for param-tree */
- CFS_HASH_BIGNAME = BIT(6),
- /** track global count */
- CFS_HASH_COUNTER = BIT(7),
- /** rehash item by new key */
- CFS_HASH_REHASH_KEY = BIT(8),
- /** Enable dynamic hash resizing */
- CFS_HASH_REHASH = BIT(9),
- /** can shrink hash-size */
- CFS_HASH_SHRINK = BIT(10),
- /** assert hash is empty on exit */
- CFS_HASH_ASSERT_EMPTY = BIT(11),
- /** record hlist depth */
- CFS_HASH_DEPTH = BIT(12),
- /**
- * rehash is always scheduled in a different thread, so current
- * change on hash table is non-blocking
- */
- CFS_HASH_NBLK_CHANGE = BIT(13),
- /**
- * NB, we typed hs_flags as u16, please change it
- * if you need to extend >=16 flags
- */
-};
-
-/** most used attributes */
-#define CFS_HASH_DEFAULT (CFS_HASH_RW_BKTLOCK | \
- CFS_HASH_COUNTER | CFS_HASH_REHASH)
-
-/**
- * cfs_hash is a hash-table implementation for general purpose, it can support:
- * . two refcount modes
- * hash-table with & without refcount
- * . four lock modes
- * nolock, one-spinlock, rw-bucket-lock, spin-bucket-lock
- * . general operations
- * lookup, add(add_tail or add_head), delete
- * . rehash
- * grows or shrink
- * . iteration
- * locked iteration and unlocked iteration
- * . bigname
- * support long name hash
- * . debug
- * trace max searching depth
- *
- * Rehash:
- * When the htable grows or shrinks, a separate task (cfs_hash_rehash_worker)
- * is spawned to handle the rehash in the background, it's possible that other
- * processes can concurrently perform additions, deletions, and lookups
- * without being blocked on rehash completion, because rehash will release
- * the global wrlock for each bucket.
- *
- * rehash and iteration can't run at the same time because it's too tricky
- * to keep both of them safe and correct.
- * As they are relatively rare operations, so:
- * . if iteration is in progress while we try to launch rehash, then
- * it just giveup, iterator will launch rehash at the end.
- * . if rehash is in progress while we try to iterate the hash table,
- * then we just wait (shouldn't be very long time), anyway, nobody
- * should expect iteration of whole hash-table to be non-blocking.
- *
- * During rehashing, a (key,object) pair may be in one of two buckets,
- * depending on whether the worker task has yet to transfer the object
- * to its new location in the table. Lookups and deletions need to search both
- * locations; additions must take care to only insert into the new bucket.
- */
-
-struct cfs_hash {
- /**
- * serialize with rehash, or serialize all operations if
- * the hash-table has CFS_HASH_NO_BKTLOCK
- */
- union cfs_hash_lock hs_lock;
- /** hash operations */
- struct cfs_hash_ops *hs_ops;
- /** hash lock operations */
- struct cfs_hash_lock_ops *hs_lops;
- /** hash list operations */
- struct cfs_hash_hlist_ops *hs_hops;
- /** hash buckets-table */
- struct cfs_hash_bucket **hs_buckets;
- /** total number of items on this hash-table */
- atomic_t hs_count;
- /** hash flags, see cfs_hash_tag for detail */
- u16 hs_flags;
- /** # of extra-bytes for bucket, for user saving extended attributes */
- u16 hs_extra_bytes;
- /** wants to iterate */
- u8 hs_iterating;
- /** hash-table is dying */
- u8 hs_exiting;
- /** current hash bits */
- u8 hs_cur_bits;
- /** min hash bits */
- u8 hs_min_bits;
- /** max hash bits */
- u8 hs_max_bits;
- /** bits for rehash */
- u8 hs_rehash_bits;
- /** bits for each bucket */
- u8 hs_bkt_bits;
- /** resize min threshold */
- u16 hs_min_theta;
- /** resize max threshold */
- u16 hs_max_theta;
- /** resize count */
- u32 hs_rehash_count;
- /** # of iterators (caller of cfs_hash_for_each_*) */
- u32 hs_iterators;
- /** rehash workitem */
- struct work_struct hs_rehash_work;
- /** refcount on this hash table */
- atomic_t hs_refcount;
- /** rehash buckets-table */
- struct cfs_hash_bucket **hs_rehash_buckets;
-#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
- /** serialize debug members */
- spinlock_t hs_dep_lock;
- /** max depth */
- unsigned int hs_dep_max;
- /** id of the deepest bucket */
- unsigned int hs_dep_bkt;
- /** offset in the deepest bucket */
- unsigned int hs_dep_off;
- /** bits when we found the max depth */
- unsigned int hs_dep_bits;
- /** workitem to output max depth */
- struct work_struct hs_dep_work;
-#endif
- /** name of htable */
- char hs_name[0];
-};
-
-struct cfs_hash_lock_ops {
- /** lock the hash table */
- void (*hs_lock)(union cfs_hash_lock *lock, int exclusive);
- /** unlock the hash table */
- void (*hs_unlock)(union cfs_hash_lock *lock, int exclusive);
- /** lock the hash bucket */
- void (*hs_bkt_lock)(union cfs_hash_lock *lock, int exclusive);
- /** unlock the hash bucket */
- void (*hs_bkt_unlock)(union cfs_hash_lock *lock, int exclusive);
-};
-
-struct cfs_hash_hlist_ops {
- /** return hlist_head of hash-head of @bd */
- struct hlist_head *(*hop_hhead)(struct cfs_hash *hs,
- struct cfs_hash_bd *bd);
- /** return hash-head size */
- int (*hop_hhead_size)(struct cfs_hash *hs);
- /** add @hnode to hash-head of @bd */
- int (*hop_hnode_add)(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode);
- /** remove @hnode from hash-head of @bd */
- int (*hop_hnode_del)(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode);
-};
-
-struct cfs_hash_ops {
- /** return hashed value from @key */
- unsigned int (*hs_hash)(struct cfs_hash *hs, const void *key,
- unsigned int mask);
- /** return key address of @hnode */
- void * (*hs_key)(struct hlist_node *hnode);
- /** copy key from @hnode to @key */
- void (*hs_keycpy)(struct hlist_node *hnode, void *key);
- /**
- * compare @key with key of @hnode
- * returns 1 on a match
- */
- int (*hs_keycmp)(const void *key, struct hlist_node *hnode);
- /** return object address of @hnode, i.e: container_of(...hnode) */
- void * (*hs_object)(struct hlist_node *hnode);
- /** get refcount of item, always called with holding bucket-lock */
- void (*hs_get)(struct cfs_hash *hs, struct hlist_node *hnode);
- /** release refcount of item */
- void (*hs_put)(struct cfs_hash *hs, struct hlist_node *hnode);
- /** release refcount of item, always called with holding bucket-lock */
- void (*hs_put_locked)(struct cfs_hash *hs,
- struct hlist_node *hnode);
- /** it's called before removing of @hnode */
- void (*hs_exit)(struct cfs_hash *hs, struct hlist_node *hnode);
-};
-
-/** total number of buckets in @hs */
-#define CFS_HASH_NBKT(hs) \
- BIT((hs)->hs_cur_bits - (hs)->hs_bkt_bits)
-
-/** total number of buckets in @hs while rehashing */
-#define CFS_HASH_RH_NBKT(hs) \
- BIT((hs)->hs_rehash_bits - (hs)->hs_bkt_bits)
-
-/** number of hlist for in bucket */
-#define CFS_HASH_BKT_NHLIST(hs) BIT((hs)->hs_bkt_bits)
-
-/** total number of hlist in @hs */
-#define CFS_HASH_NHLIST(hs) BIT((hs)->hs_cur_bits)
-
-/** total number of hlist in @hs while rehashing */
-#define CFS_HASH_RH_NHLIST(hs) BIT((hs)->hs_rehash_bits)
-
-static inline int
-cfs_hash_with_no_lock(struct cfs_hash *hs)
-{
- /* caller will serialize all operations for this hash-table */
- return hs->hs_flags & CFS_HASH_NO_LOCK;
-}
-
-static inline int
-cfs_hash_with_no_bktlock(struct cfs_hash *hs)
-{
- /* no bucket lock, one single lock to protect the hash-table */
- return hs->hs_flags & CFS_HASH_NO_BKTLOCK;
-}
-
-static inline int
-cfs_hash_with_rw_bktlock(struct cfs_hash *hs)
-{
- /* rwlock to protect hash bucket */
- return hs->hs_flags & CFS_HASH_RW_BKTLOCK;
-}
-
-static inline int
-cfs_hash_with_spin_bktlock(struct cfs_hash *hs)
-{
- /* spinlock to protect hash bucket */
- return hs->hs_flags & CFS_HASH_SPIN_BKTLOCK;
-}
-
-static inline int
-cfs_hash_with_add_tail(struct cfs_hash *hs)
-{
- return hs->hs_flags & CFS_HASH_ADD_TAIL;
-}
-
-static inline int
-cfs_hash_with_no_itemref(struct cfs_hash *hs)
-{
- /*
- * hash-table doesn't keep refcount on item,
- * item can't be removed from hash unless it's
- * ZERO refcount
- */
- return hs->hs_flags & CFS_HASH_NO_ITEMREF;
-}
-
-static inline int
-cfs_hash_with_bigname(struct cfs_hash *hs)
-{
- return hs->hs_flags & CFS_HASH_BIGNAME;
-}
-
-static inline int
-cfs_hash_with_counter(struct cfs_hash *hs)
-{
- return hs->hs_flags & CFS_HASH_COUNTER;
-}
-
-static inline int
-cfs_hash_with_rehash(struct cfs_hash *hs)
-{
- return hs->hs_flags & CFS_HASH_REHASH;
-}
-
-static inline int
-cfs_hash_with_rehash_key(struct cfs_hash *hs)
-{
- return hs->hs_flags & CFS_HASH_REHASH_KEY;
-}
-
-static inline int
-cfs_hash_with_shrink(struct cfs_hash *hs)
-{
- return hs->hs_flags & CFS_HASH_SHRINK;
-}
-
-static inline int
-cfs_hash_with_assert_empty(struct cfs_hash *hs)
-{
- return hs->hs_flags & CFS_HASH_ASSERT_EMPTY;
-}
-
-static inline int
-cfs_hash_with_depth(struct cfs_hash *hs)
-{
- return hs->hs_flags & CFS_HASH_DEPTH;
-}
-
-static inline int
-cfs_hash_with_nblk_change(struct cfs_hash *hs)
-{
- return hs->hs_flags & CFS_HASH_NBLK_CHANGE;
-}
-
-static inline int
-cfs_hash_is_exiting(struct cfs_hash *hs)
-{
- /* cfs_hash_destroy is called */
- return hs->hs_exiting;
-}
-
-static inline int
-cfs_hash_is_rehashing(struct cfs_hash *hs)
-{
- /* rehash is launched */
- return !!hs->hs_rehash_bits;
-}
-
-static inline int
-cfs_hash_is_iterating(struct cfs_hash *hs)
-{
- /* someone is calling cfs_hash_for_each_* */
- return hs->hs_iterating || hs->hs_iterators;
-}
-
-static inline int
-cfs_hash_bkt_size(struct cfs_hash *hs)
-{
- return offsetof(struct cfs_hash_bucket, hsb_head[0]) +
- hs->hs_hops->hop_hhead_size(hs) * CFS_HASH_BKT_NHLIST(hs) +
- hs->hs_extra_bytes;
-}
-
-static inline unsigned
-cfs_hash_id(struct cfs_hash *hs, const void *key, unsigned int mask)
-{
- return hs->hs_ops->hs_hash(hs, key, mask);
-}
-
-static inline void *
-cfs_hash_key(struct cfs_hash *hs, struct hlist_node *hnode)
-{
- return hs->hs_ops->hs_key(hnode);
-}
-
-static inline void
-cfs_hash_keycpy(struct cfs_hash *hs, struct hlist_node *hnode, void *key)
-{
- if (hs->hs_ops->hs_keycpy)
- hs->hs_ops->hs_keycpy(hnode, key);
-}
-
-/**
- * Returns 1 on a match,
- */
-static inline int
-cfs_hash_keycmp(struct cfs_hash *hs, const void *key, struct hlist_node *hnode)
-{
- return hs->hs_ops->hs_keycmp(key, hnode);
-}
-
-static inline void *
-cfs_hash_object(struct cfs_hash *hs, struct hlist_node *hnode)
-{
- return hs->hs_ops->hs_object(hnode);
-}
-
-static inline void
-cfs_hash_get(struct cfs_hash *hs, struct hlist_node *hnode)
-{
- return hs->hs_ops->hs_get(hs, hnode);
-}
-
-static inline void
-cfs_hash_put_locked(struct cfs_hash *hs, struct hlist_node *hnode)
-{
- return hs->hs_ops->hs_put_locked(hs, hnode);
-}
-
-static inline void
-cfs_hash_put(struct cfs_hash *hs, struct hlist_node *hnode)
-{
- return hs->hs_ops->hs_put(hs, hnode);
-}
-
-static inline void
-cfs_hash_exit(struct cfs_hash *hs, struct hlist_node *hnode)
-{
- if (hs->hs_ops->hs_exit)
- hs->hs_ops->hs_exit(hs, hnode);
-}
-
-static inline void cfs_hash_lock(struct cfs_hash *hs, int excl)
-{
- hs->hs_lops->hs_lock(&hs->hs_lock, excl);
-}
-
-static inline void cfs_hash_unlock(struct cfs_hash *hs, int excl)
-{
- hs->hs_lops->hs_unlock(&hs->hs_lock, excl);
-}
-
-static inline int cfs_hash_dec_and_lock(struct cfs_hash *hs,
- atomic_t *condition)
-{
- LASSERT(cfs_hash_with_no_bktlock(hs));
- return atomic_dec_and_lock(condition, &hs->hs_lock.spin);
-}
-
-static inline void cfs_hash_bd_lock(struct cfs_hash *hs,
- struct cfs_hash_bd *bd, int excl)
-{
- hs->hs_lops->hs_bkt_lock(&bd->bd_bucket->hsb_lock, excl);
-}
-
-static inline void cfs_hash_bd_unlock(struct cfs_hash *hs,
- struct cfs_hash_bd *bd, int excl)
-{
- hs->hs_lops->hs_bkt_unlock(&bd->bd_bucket->hsb_lock, excl);
-}
-
-/**
- * operations on cfs_hash bucket (bd: bucket descriptor),
- * they are normally for hash-table without rehash
- */
-void cfs_hash_bd_get(struct cfs_hash *hs, const void *key,
- struct cfs_hash_bd *bd);
-
-static inline void
-cfs_hash_bd_get_and_lock(struct cfs_hash *hs, const void *key,
- struct cfs_hash_bd *bd, int excl)
-{
- cfs_hash_bd_get(hs, key, bd);
- cfs_hash_bd_lock(hs, bd, excl);
-}
-
-static inline unsigned
-cfs_hash_bd_index_get(struct cfs_hash *hs, struct cfs_hash_bd *bd)
-{
- return bd->bd_offset | (bd->bd_bucket->hsb_index << hs->hs_bkt_bits);
-}
-
-static inline void
-cfs_hash_bd_index_set(struct cfs_hash *hs, unsigned int index,
- struct cfs_hash_bd *bd)
-{
- bd->bd_bucket = hs->hs_buckets[index >> hs->hs_bkt_bits];
- bd->bd_offset = index & (CFS_HASH_BKT_NHLIST(hs) - 1U);
-}
-
-static inline void *
-cfs_hash_bd_extra_get(struct cfs_hash *hs, struct cfs_hash_bd *bd)
-{
- return (void *)bd->bd_bucket +
- cfs_hash_bkt_size(hs) - hs->hs_extra_bytes;
-}
-
-static inline u32
-cfs_hash_bd_version_get(struct cfs_hash_bd *bd)
-{
- /* need hold cfs_hash_bd_lock */
- return bd->bd_bucket->hsb_version;
-}
-
-static inline u32
-cfs_hash_bd_count_get(struct cfs_hash_bd *bd)
-{
- /* need hold cfs_hash_bd_lock */
- return bd->bd_bucket->hsb_count;
-}
-
-static inline int
-cfs_hash_bd_depmax_get(struct cfs_hash_bd *bd)
-{
- return bd->bd_bucket->hsb_depmax;
-}
-
-static inline int
-cfs_hash_bd_compare(struct cfs_hash_bd *bd1, struct cfs_hash_bd *bd2)
-{
- if (bd1->bd_bucket->hsb_index != bd2->bd_bucket->hsb_index)
- return bd1->bd_bucket->hsb_index - bd2->bd_bucket->hsb_index;
-
- if (bd1->bd_offset != bd2->bd_offset)
- return bd1->bd_offset - bd2->bd_offset;
-
- return 0;
-}
-
-void cfs_hash_bd_add_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode);
-void cfs_hash_bd_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode);
-void cfs_hash_bd_move_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd_old,
- struct cfs_hash_bd *bd_new,
- struct hlist_node *hnode);
-
-static inline int
-cfs_hash_bd_dec_and_lock(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- atomic_t *condition)
-{
- LASSERT(cfs_hash_with_spin_bktlock(hs));
- return atomic_dec_and_lock(condition, &bd->bd_bucket->hsb_lock.spin);
-}
-
-static inline struct hlist_head *
-cfs_hash_bd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
-{
- return hs->hs_hops->hop_hhead(hs, bd);
-}
-
-struct hlist_node *
-cfs_hash_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- const void *key);
-struct hlist_node *
-cfs_hash_bd_peek_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- const void *key);
-
-/**
- * operations on cfs_hash bucket (bd: bucket descriptor),
- * they are safe for hash-table with rehash
- */
-void cfs_hash_dual_bd_get(struct cfs_hash *hs, const void *key,
- struct cfs_hash_bd *bds);
-void cfs_hash_dual_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- int excl);
-void cfs_hash_dual_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- int excl);
-
-static inline void
-cfs_hash_dual_bd_get_and_lock(struct cfs_hash *hs, const void *key,
- struct cfs_hash_bd *bds, int excl)
-{
- cfs_hash_dual_bd_get(hs, key, bds);
- cfs_hash_dual_bd_lock(hs, bds, excl);
-}
-
-struct hlist_node *
-cfs_hash_dual_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- const void *key);
-struct hlist_node *
-cfs_hash_dual_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- const void *key, struct hlist_node *hnode,
- int insist_add);
-struct hlist_node *
-cfs_hash_dual_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- const void *key, struct hlist_node *hnode);
-
-/* Hash init/cleanup functions */
-struct cfs_hash *
-cfs_hash_create(char *name, unsigned int cur_bits, unsigned int max_bits,
- unsigned int bkt_bits, unsigned int extra_bytes,
- unsigned int min_theta, unsigned int max_theta,
- struct cfs_hash_ops *ops, unsigned int flags);
-
-struct cfs_hash *cfs_hash_getref(struct cfs_hash *hs);
-void cfs_hash_putref(struct cfs_hash *hs);
-
-/* Hash addition functions */
-void cfs_hash_add(struct cfs_hash *hs, const void *key,
- struct hlist_node *hnode);
-int cfs_hash_add_unique(struct cfs_hash *hs, const void *key,
- struct hlist_node *hnode);
-void *cfs_hash_findadd_unique(struct cfs_hash *hs, const void *key,
- struct hlist_node *hnode);
-
-/* Hash deletion functions */
-void *cfs_hash_del(struct cfs_hash *hs, const void *key,
- struct hlist_node *hnode);
-void *cfs_hash_del_key(struct cfs_hash *hs, const void *key);
-
-/* Hash lookup/for_each functions */
-#define CFS_HASH_LOOP_HOG 1024
-
-typedef int (*cfs_hash_for_each_cb_t)(struct cfs_hash *hs,
- struct cfs_hash_bd *bd,
- struct hlist_node *node,
- void *data);
-void *
-cfs_hash_lookup(struct cfs_hash *hs, const void *key);
-void
-cfs_hash_for_each(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb, void *data);
-void
-cfs_hash_for_each_safe(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb,
- void *data);
-int
-cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb,
- void *data, int start);
-int
-cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb,
- void *data);
-void
-cfs_hash_for_each_key(struct cfs_hash *hs, const void *key,
- cfs_hash_for_each_cb_t cb, void *data);
-typedef int (*cfs_hash_cond_opt_cb_t)(void *obj, void *data);
-void
-cfs_hash_cond_del(struct cfs_hash *hs, cfs_hash_cond_opt_cb_t cb, void *data);
-
-void
-cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned int hindex,
- cfs_hash_for_each_cb_t cb, void *data);
-int cfs_hash_is_empty(struct cfs_hash *hs);
-u64 cfs_hash_size_get(struct cfs_hash *hs);
-
-/*
- * Rehash - Theta is calculated to be the average chained
- * hash depth assuming a perfectly uniform hash function.
- */
-void cfs_hash_rehash_cancel_locked(struct cfs_hash *hs);
-void cfs_hash_rehash_cancel(struct cfs_hash *hs);
-void cfs_hash_rehash(struct cfs_hash *hs, int do_rehash);
-void cfs_hash_rehash_key(struct cfs_hash *hs, const void *old_key,
- void *new_key, struct hlist_node *hnode);
-
-#if CFS_HASH_DEBUG_LEVEL > CFS_HASH_DEBUG_1
-/* Validate hnode references the correct key */
-static inline void
-cfs_hash_key_validate(struct cfs_hash *hs, const void *key,
- struct hlist_node *hnode)
-{
- LASSERT(cfs_hash_keycmp(hs, key, hnode));
-}
-
-/* Validate hnode is in the correct bucket */
-static inline void
-cfs_hash_bucket_validate(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode)
-{
- struct cfs_hash_bd bds[2];
-
- cfs_hash_dual_bd_get(hs, cfs_hash_key(hs, hnode), bds);
- LASSERT(bds[0].bd_bucket == bd->bd_bucket ||
- bds[1].bd_bucket == bd->bd_bucket);
-}
-
-#else /* CFS_HASH_DEBUG_LEVEL > CFS_HASH_DEBUG_1 */
-
-static inline void
-cfs_hash_key_validate(struct cfs_hash *hs, const void *key,
- struct hlist_node *hnode) {}
-
-static inline void
-cfs_hash_bucket_validate(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode) {}
-
-#endif /* CFS_HASH_DEBUG_LEVEL */
-
-#define CFS_HASH_THETA_BITS 10
-#define CFS_HASH_MIN_THETA BIT(CFS_HASH_THETA_BITS - 1)
-#define CFS_HASH_MAX_THETA BIT(CFS_HASH_THETA_BITS + 1)
-
-/* Return integer component of theta */
-static inline int __cfs_hash_theta_int(int theta)
-{
- return (theta >> CFS_HASH_THETA_BITS);
-}
-
-/* Return a fractional value between 0 and 999 */
-static inline int __cfs_hash_theta_frac(int theta)
-{
- return ((theta * 1000) >> CFS_HASH_THETA_BITS) -
- (__cfs_hash_theta_int(theta) * 1000);
-}
-
-static inline int __cfs_hash_theta(struct cfs_hash *hs)
-{
- return (atomic_read(&hs->hs_count) <<
- CFS_HASH_THETA_BITS) >> hs->hs_cur_bits;
-}
-
-static inline void
-__cfs_hash_set_theta(struct cfs_hash *hs, int min, int max)
-{
- LASSERT(min < max);
- hs->hs_min_theta = (u16)min;
- hs->hs_max_theta = (u16)max;
-}
-
-/* Generic debug formatting routines mainly for proc handler */
-struct seq_file;
-void cfs_hash_debug_header(struct seq_file *m);
-void cfs_hash_debug_str(struct cfs_hash *hs, struct seq_file *m);
-
-/*
- * Generic djb2 hash algorithm for character arrays.
- */
-static inline unsigned
-cfs_hash_djb2_hash(const void *key, size_t size, unsigned int mask)
-{
- unsigned int i, hash = 5381;
-
- LASSERT(key);
-
- for (i = 0; i < size; i++)
- hash = hash * 33 + ((char *)key)[i];
-
- return (hash & mask);
-}
-
-/*
- * Generic u32 hash algorithm.
- */
-static inline unsigned
-cfs_hash_u32_hash(const u32 key, unsigned int mask)
-{
- return ((key * CFS_GOLDEN_RATIO_PRIME_32) & mask);
-}
-
-/*
- * Generic u64 hash algorithm.
- */
-static inline unsigned
-cfs_hash_u64_hash(const u64 key, unsigned int mask)
-{
- return ((unsigned int)(key * CFS_GOLDEN_RATIO_PRIME_64) & mask);
-}
-
-/** iterate over all buckets in @bds (array of struct cfs_hash_bd) */
-#define cfs_hash_for_each_bd(bds, n, i) \
- for (i = 0; i < n && (bds)[i].bd_bucket != NULL; i++)
-
-/** iterate over all buckets of @hs */
-#define cfs_hash_for_each_bucket(hs, bd, pos) \
- for (pos = 0; \
- pos < CFS_HASH_NBKT(hs) && \
- ((bd)->bd_bucket = (hs)->hs_buckets[pos]) != NULL; pos++)
-
-/** iterate over all hlist of bucket @bd */
-#define cfs_hash_bd_for_each_hlist(hs, bd, hlist) \
- for ((bd)->bd_offset = 0; \
- (bd)->bd_offset < CFS_HASH_BKT_NHLIST(hs) && \
- (hlist = cfs_hash_bd_hhead(hs, bd)) != NULL; \
- (bd)->bd_offset++)
-
-/* !__LIBCFS__HASH_H__ */
-#endif
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
deleted file mode 100644
index 491d5971d199..000000000000
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
+++ /dev/null
@@ -1,200 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/include/libcfs/libcfs_private.h
- *
- * Various defines for libcfs.
- *
- */
-
-#ifndef __LIBCFS_PRIVATE_H__
-#define __LIBCFS_PRIVATE_H__
-
-#ifndef DEBUG_SUBSYSTEM
-# define DEBUG_SUBSYSTEM S_UNDEFINED
-#endif
-
-#define LASSERTF(cond, fmt, ...) \
-do { \
- if (unlikely(!(cond))) { \
- LIBCFS_DEBUG_MSG_DATA_DECL(__msg_data, D_EMERG, NULL); \
- libcfs_debug_msg(&__msg_data, \
- "ASSERTION( %s ) failed: " fmt, #cond, \
- ## __VA_ARGS__); \
- lbug_with_loc(&__msg_data); \
- } \
-} while (0)
-
-#define LASSERT(cond) LASSERTF(cond, "\n")
-
-#ifdef CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK
-/**
- * This is for more expensive checks that one doesn't want to be enabled all
- * the time. LINVRNT() has to be explicitly enabled by
- * CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK option.
- */
-# define LINVRNT(exp) LASSERT(exp)
-#else
-# define LINVRNT(exp) ((void)sizeof !!(exp))
-#endif
-
-void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msg);
-
-#define LBUG() \
-do { \
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_EMERG, NULL); \
- lbug_with_loc(&msgdata); \
-} while (0)
-
-/*
- * Use #define rather than inline, as lnet_cpt_table() might
- * not be defined yet
- */
-#define kmalloc_cpt(size, flags, cpt) \
- kmalloc_node(size, flags, cfs_cpt_spread_node(lnet_cpt_table(), cpt))
-
-#define kzalloc_cpt(size, flags, cpt) \
- kmalloc_node(size, flags | __GFP_ZERO, \
- cfs_cpt_spread_node(lnet_cpt_table(), cpt))
-
-#define kvmalloc_cpt(size, flags, cpt) \
- kvmalloc_node(size, flags, \
- cfs_cpt_spread_node(lnet_cpt_table(), cpt))
-
-#define kvzalloc_cpt(size, flags, cpt) \
- kvmalloc_node(size, flags | __GFP_ZERO, \
- cfs_cpt_spread_node(lnet_cpt_table(), cpt))
-
-/******************************************************************************/
-
-void libcfs_debug_dumplog(void);
-int libcfs_debug_init(unsigned long bufsize);
-int libcfs_debug_cleanup(void);
-int libcfs_debug_clear_buffer(void);
-int libcfs_debug_mark_buffer(const char *text);
-
-/*
- * allocate a variable array, returned value is an array of pointers.
- * Caller can specify length of array by count.
- */
-void *cfs_array_alloc(int count, unsigned int size);
-void cfs_array_free(void *vars);
-
-#define LASSERT_ATOMIC_ENABLED (1)
-
-#if LASSERT_ATOMIC_ENABLED
-
-/** assert value of @a is equal to @v */
-#define LASSERT_ATOMIC_EQ(a, v) \
- LASSERTF(atomic_read(a) == v, "value: %d\n", atomic_read((a)))
-
-/** assert value of @a is unequal to @v */
-#define LASSERT_ATOMIC_NE(a, v) \
- LASSERTF(atomic_read(a) != v, "value: %d\n", atomic_read((a)))
-
-/** assert value of @a is little than @v */
-#define LASSERT_ATOMIC_LT(a, v) \
- LASSERTF(atomic_read(a) < v, "value: %d\n", atomic_read((a)))
-
-/** assert value of @a is little/equal to @v */
-#define LASSERT_ATOMIC_LE(a, v) \
- LASSERTF(atomic_read(a) <= v, "value: %d\n", atomic_read((a)))
-
-/** assert value of @a is great than @v */
-#define LASSERT_ATOMIC_GT(a, v) \
- LASSERTF(atomic_read(a) > v, "value: %d\n", atomic_read((a)))
-
-/** assert value of @a is great/equal to @v */
-#define LASSERT_ATOMIC_GE(a, v) \
- LASSERTF(atomic_read(a) >= v, "value: %d\n", atomic_read((a)))
-
-/** assert value of @a is great than @v1 and little than @v2 */
-#define LASSERT_ATOMIC_GT_LT(a, v1, v2) \
-do { \
- int __v = atomic_read(a); \
- LASSERTF(__v > v1 && __v < v2, "value: %d\n", __v); \
-} while (0)
-
-/** assert value of @a is great than @v1 and little/equal to @v2 */
-#define LASSERT_ATOMIC_GT_LE(a, v1, v2) \
-do { \
- int __v = atomic_read(a); \
- LASSERTF(__v > v1 && __v <= v2, "value: %d\n", __v); \
-} while (0)
-
-/** assert value of @a is great/equal to @v1 and little than @v2 */
-#define LASSERT_ATOMIC_GE_LT(a, v1, v2) \
-do { \
- int __v = atomic_read(a); \
- LASSERTF(__v >= v1 && __v < v2, "value: %d\n", __v); \
-} while (0)
-
-/** assert value of @a is great/equal to @v1 and little/equal to @v2 */
-#define LASSERT_ATOMIC_GE_LE(a, v1, v2) \
-do { \
- int __v = atomic_read(a); \
- LASSERTF(__v >= v1 && __v <= v2, "value: %d\n", __v); \
-} while (0)
-
-#else /* !LASSERT_ATOMIC_ENABLED */
-
-#define LASSERT_ATOMIC_EQ(a, v) do {} while (0)
-#define LASSERT_ATOMIC_NE(a, v) do {} while (0)
-#define LASSERT_ATOMIC_LT(a, v) do {} while (0)
-#define LASSERT_ATOMIC_LE(a, v) do {} while (0)
-#define LASSERT_ATOMIC_GT(a, v) do {} while (0)
-#define LASSERT_ATOMIC_GE(a, v) do {} while (0)
-#define LASSERT_ATOMIC_GT_LT(a, v1, v2) do {} while (0)
-#define LASSERT_ATOMIC_GT_LE(a, v1, v2) do {} while (0)
-#define LASSERT_ATOMIC_GE_LT(a, v1, v2) do {} while (0)
-#define LASSERT_ATOMIC_GE_LE(a, v1, v2) do {} while (0)
-
-#endif /* LASSERT_ATOMIC_ENABLED */
-
-#define LASSERT_ATOMIC_ZERO(a) LASSERT_ATOMIC_EQ(a, 0)
-#define LASSERT_ATOMIC_POS(a) LASSERT_ATOMIC_GT(a, 0)
-
-/* implication */
-#define ergo(a, b) (!(a) || (b))
-/* logical equivalence */
-#define equi(a, b) (!!(a) == !!(b))
-
-#ifndef HAVE_CFS_SIZE_ROUND
-static inline size_t cfs_size_round(int val)
-{
- return round_up(val, 8);
-}
-
-#define HAVE_CFS_SIZE_ROUND
-#endif
-
-#endif
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h
deleted file mode 100644
index cd7c3ccb2dc0..000000000000
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h
+++ /dev/null
@@ -1,102 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/include/libcfs/libcfs_string.h
- *
- * Generic string manipulation functions.
- *
- * Author: Nathan Rutman <nathan.rutman@sun.com>
- */
-
-#ifndef __LIBCFS_STRING_H__
-#define __LIBCFS_STRING_H__
-
-#include <linux/mm.h>
-
-/* libcfs_string.c */
-/* Convert a text string to a bitmask */
-int cfs_str2mask(const char *str, const char *(*bit2str)(int bit),
- int *oldmask, int minmask, int allmask);
-/* trim leading and trailing space characters */
-char *cfs_firststr(char *str, size_t size);
-
-/**
- * Structure to represent NULL-less strings.
- */
-struct cfs_lstr {
- char *ls_str;
- int ls_len;
-};
-
-/*
- * Structure to represent \<range_expr\> token of the syntax.
- */
-struct cfs_range_expr {
- /*
- * Link to cfs_expr_list::el_exprs.
- */
- struct list_head re_link;
- u32 re_lo;
- u32 re_hi;
- u32 re_stride;
-};
-
-struct cfs_expr_list {
- struct list_head el_link;
- struct list_head el_exprs;
-};
-
-int cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res);
-int cfs_str2num_check(char *str, int nob, unsigned int *num,
- unsigned int min, unsigned int max);
-int cfs_expr_list_match(u32 value, struct cfs_expr_list *expr_list);
-int cfs_expr_list_print(char *buffer, int count,
- struct cfs_expr_list *expr_list);
-int cfs_expr_list_values(struct cfs_expr_list *expr_list,
- int max, u32 **values);
-static inline void
-cfs_expr_list_values_free(u32 *values, int num)
-{
- /*
- * This array is allocated by kvalloc(), so it shouldn't be freed
- * by OBD_FREE() if it's called by module other than libcfs & LNet,
- * otherwise we will see fake memory leak
- */
- kvfree(values);
-}
-
-void cfs_expr_list_free(struct cfs_expr_list *expr_list);
-int cfs_expr_list_parse(char *str, int len, unsigned int min, unsigned int max,
- struct cfs_expr_list **elpp);
-void cfs_expr_list_free_list(struct list_head *list);
-
-#endif
diff --git a/drivers/staging/lustre/include/linux/lnet/api.h b/drivers/staging/lustre/include/linux/lnet/api.h
deleted file mode 100644
index dae2e4f0056c..000000000000
--- a/drivers/staging/lustre/include/linux/lnet/api.h
+++ /dev/null
@@ -1,212 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011 - 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Seagate, Inc.
- */
-
-#ifndef __LNET_API_H__
-#define __LNET_API_H__
-
-/** \defgroup lnet LNet
- *
- * The Lustre Networking subsystem.
- *
- * LNet is an asynchronous message-passing API, which provides an unreliable
- * connectionless service that can't guarantee any order. It supports OFA IB,
- * TCP/IP, and Cray Interconnects, and routes between heterogeneous networks.
- *
- * @{
- */
-
-#include <uapi/linux/lnet/lnet-types.h>
-
-/** \defgroup lnet_init_fini Initialization and cleanup
- * The LNet must be properly initialized before any LNet calls can be made.
- * @{
- */
-int LNetNIInit(lnet_pid_t requested_pid);
-int LNetNIFini(void);
-/** @} lnet_init_fini */
-
-/** \defgroup lnet_addr LNet addressing and basic types
- *
- * Addressing scheme and basic data types of LNet.
- *
- * The LNet API is memory-oriented, so LNet must be able to address not only
- * end-points but also memory region within a process address space.
- * An ::lnet_nid_t addresses an end-point. An ::lnet_pid_t identifies a process
- * in a node. A portal represents an opening in the address space of a
- * process. Match bits is criteria to identify a region of memory inside a
- * portal, and offset specifies an offset within the memory region.
- *
- * LNet creates a table of portals for each process during initialization.
- * This table has MAX_PORTALS entries and its size can't be dynamically
- * changed. A portal stays empty until the owning process starts to add
- * memory regions to it. A portal is sometimes called an index because
- * it's an entry in the portals table of a process.
- *
- * \see LNetMEAttach
- * @{
- */
-int LNetGetId(unsigned int index, struct lnet_process_id *id);
-int LNetDist(lnet_nid_t nid, lnet_nid_t *srcnid, __u32 *order);
-
-/** @} lnet_addr */
-
-/** \defgroup lnet_me Match entries
- *
- * A match entry (abbreviated as ME) describes a set of criteria to accept
- * incoming requests.
- *
- * A portal is essentially a match list plus a set of attributes. A match
- * list is a chain of MEs. Each ME includes a pointer to a memory descriptor
- * and a set of match criteria. The match criteria can be used to reject
- * incoming requests based on process ID or the match bits provided in the
- * request. MEs can be dynamically inserted into a match list by LNetMEAttach()
- * and LNetMEInsert(), and removed from its list by LNetMEUnlink().
- * @{
- */
-int LNetMEAttach(unsigned int portal,
- struct lnet_process_id match_id_in,
- __u64 match_bits_in,
- __u64 ignore_bits_in,
- enum lnet_unlink unlink_in,
- enum lnet_ins_pos pos_in,
- struct lnet_handle_me *handle_out);
-
-int LNetMEInsert(struct lnet_handle_me current_in,
- struct lnet_process_id match_id_in,
- __u64 match_bits_in,
- __u64 ignore_bits_in,
- enum lnet_unlink unlink_in,
- enum lnet_ins_pos position_in,
- struct lnet_handle_me *handle_out);
-
-int LNetMEUnlink(struct lnet_handle_me current_in);
-/** @} lnet_me */
-
-/** \defgroup lnet_md Memory descriptors
- *
- * A memory descriptor contains information about a region of a user's
- * memory (either in kernel or user space) and optionally points to an
- * event queue where information about the operations performed on the
- * memory descriptor are recorded. Memory descriptor is abbreviated as
- * MD and can be used interchangeably with the memory region it describes.
- *
- * The LNet API provides two operations to create MDs: LNetMDAttach()
- * and LNetMDBind(); one operation to unlink and release the resources
- * associated with a MD: LNetMDUnlink().
- * @{
- */
-int LNetMDAttach(struct lnet_handle_me current_in,
- struct lnet_md md_in,
- enum lnet_unlink unlink_in,
- struct lnet_handle_md *md_handle_out);
-
-int LNetMDBind(struct lnet_md md_in,
- enum lnet_unlink unlink_in,
- struct lnet_handle_md *md_handle_out);
-
-int LNetMDUnlink(struct lnet_handle_md md_in);
-/** @} lnet_md */
-
-/** \defgroup lnet_eq Events and event queues
- *
- * Event queues (abbreviated as EQ) are used to log operations performed on
- * local MDs. In particular, they signal the completion of a data transmission
- * into or out of a MD. They can also be used to hold acknowledgments for
- * completed PUT operations and indicate when a MD has been unlinked. Multiple
- * MDs can share a single EQ. An EQ may have an optional event handler
- * associated with it. If an event handler exists, it will be run for each
- * event that is deposited into the EQ.
- *
- * In addition to the lnet_handle_eq, the LNet API defines two types
- * associated with events: The ::lnet_event_kind defines the kinds of events
- * that can be stored in an EQ. The lnet_event defines a structure that
- * holds the information about with an event.
- *
- * There are five functions for dealing with EQs: LNetEQAlloc() is used to
- * create an EQ and allocate the resources needed, while LNetEQFree()
- * releases these resources and free the EQ. LNetEQGet() retrieves the next
- * event from an EQ, and LNetEQWait() can be used to block a process until
- * an EQ has at least one event. LNetEQPoll() can be used to test or wait
- * on multiple EQs.
- * @{
- */
-int LNetEQAlloc(unsigned int count_in,
- lnet_eq_handler_t handler,
- struct lnet_handle_eq *handle_out);
-
-int LNetEQFree(struct lnet_handle_eq eventq_in);
-
-int LNetEQPoll(struct lnet_handle_eq *eventqs_in,
- int neq_in,
- int timeout_ms,
- int interruptible,
- struct lnet_event *event_out,
- int *which_eq_out);
-/** @} lnet_eq */
-
-/** \defgroup lnet_data Data movement operations
- *
- * The LNet API provides two data movement operations: LNetPut()
- * and LNetGet().
- * @{
- */
-int LNetPut(lnet_nid_t self,
- struct lnet_handle_md md_in,
- enum lnet_ack_req ack_req_in,
- struct lnet_process_id target_in,
- unsigned int portal_in,
- __u64 match_bits_in,
- unsigned int offset_in,
- __u64 hdr_data_in);
-
-int LNetGet(lnet_nid_t self,
- struct lnet_handle_md md_in,
- struct lnet_process_id target_in,
- unsigned int portal_in,
- __u64 match_bits_in,
- unsigned int offset_in);
-/** @} lnet_data */
-
-/** \defgroup lnet_misc Miscellaneous operations.
- * Miscellaneous operations.
- * @{
- */
-int LNetSetLazyPortal(int portal);
-int LNetClearLazyPortal(int portal);
-int LNetCtl(unsigned int cmd, void *arg);
-void LNetDebugPeer(struct lnet_process_id id);
-
-/** @} lnet_misc */
-
-/** @} lnet */
-#endif
diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
deleted file mode 100644
index 973c17a1c4a1..000000000000
--- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
+++ /dev/null
@@ -1,652 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Seagate, Inc.
- *
- * lnet/include/lnet/lib-lnet.h
- */
-
-#ifndef __LNET_LIB_LNET_H__
-#define __LNET_LIB_LNET_H__
-
-#include <linux/libcfs/libcfs.h>
-#include <linux/libcfs/libcfs_cpu.h>
-#include <linux/libcfs/libcfs_string.h>
-#include <net/sock.h>
-
-#include <linux/lnet/api.h>
-#include <linux/lnet/lib-types.h>
-#include <uapi/linux/lnet/lnet-dlc.h>
-#include <uapi/linux/lnet/lnet-types.h>
-#include <uapi/linux/lnet/lnetctl.h>
-#include <uapi/linux/lnet/nidstr.h>
-
-extern struct lnet the_lnet; /* THE network */
-
-#if (BITS_PER_LONG == 32)
-/* 2 CPTs, allowing more CPTs might make us under memory pressure */
-#define LNET_CPT_MAX_BITS 1
-
-#else /* 64-bit system */
-/*
- * 256 CPTs for thousands of CPUs, allowing more CPTs might make us
- * under risk of consuming all lh_cookie.
- */
-#define LNET_CPT_MAX_BITS 8
-#endif /* BITS_PER_LONG == 32 */
-
-/* max allowed CPT number */
-#define LNET_CPT_MAX (1 << LNET_CPT_MAX_BITS)
-
-#define LNET_CPT_NUMBER (the_lnet.ln_cpt_number)
-#define LNET_CPT_BITS (the_lnet.ln_cpt_bits)
-#define LNET_CPT_MASK ((1ULL << LNET_CPT_BITS) - 1)
-
-/** exclusive lock */
-#define LNET_LOCK_EX CFS_PERCPT_LOCK_EX
-
-/* need both kernel and user-land acceptor */
-#define LNET_ACCEPTOR_MIN_RESERVED_PORT 512
-#define LNET_ACCEPTOR_MAX_RESERVED_PORT 1023
-
-static inline int lnet_is_route_alive(struct lnet_route *route)
-{
- /* gateway is down */
- if (!route->lr_gateway->lp_alive)
- return 0;
- /* no NI status, assume it's alive */
- if ((route->lr_gateway->lp_ping_feats &
- LNET_PING_FEAT_NI_STATUS) == 0)
- return 1;
- /* has NI status, check # down NIs */
- return route->lr_downis == 0;
-}
-
-static inline int lnet_is_wire_handle_none(struct lnet_handle_wire *wh)
-{
- return (wh->wh_interface_cookie == LNET_WIRE_HANDLE_COOKIE_NONE &&
- wh->wh_object_cookie == LNET_WIRE_HANDLE_COOKIE_NONE);
-}
-
-static inline int lnet_md_exhausted(struct lnet_libmd *md)
-{
- return (!md->md_threshold ||
- ((md->md_options & LNET_MD_MAX_SIZE) &&
- md->md_offset + md->md_max_size > md->md_length));
-}
-
-static inline int lnet_md_unlinkable(struct lnet_libmd *md)
-{
- /*
- * Should unlink md when its refcount is 0 and either:
- * - md has been flagged for deletion (by auto unlink or
- * LNetM[DE]Unlink, in the latter case md may not be exhausted).
- * - auto unlink is on and md is exhausted.
- */
- if (md->md_refcount)
- return 0;
-
- if (md->md_flags & LNET_MD_FLAG_ZOMBIE)
- return 1;
-
- return ((md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) &&
- lnet_md_exhausted(md));
-}
-
-#define lnet_cpt_table() (the_lnet.ln_cpt_table)
-#define lnet_cpt_current() cfs_cpt_current(the_lnet.ln_cpt_table, 1)
-
-static inline int
-lnet_cpt_of_cookie(__u64 cookie)
-{
- unsigned int cpt = (cookie >> LNET_COOKIE_TYPE_BITS) & LNET_CPT_MASK;
-
- /*
- * LNET_CPT_NUMBER doesn't have to be power2, which means we can
- * get illegal cpt from it's invalid cookie
- */
- return cpt < LNET_CPT_NUMBER ? cpt : cpt % LNET_CPT_NUMBER;
-}
-
-static inline void
-lnet_res_lock(int cpt)
-{
- cfs_percpt_lock(the_lnet.ln_res_lock, cpt);
-}
-
-static inline void
-lnet_res_unlock(int cpt)
-{
- cfs_percpt_unlock(the_lnet.ln_res_lock, cpt);
-}
-
-static inline int
-lnet_res_lock_current(void)
-{
- int cpt = lnet_cpt_current();
-
- lnet_res_lock(cpt);
- return cpt;
-}
-
-static inline void
-lnet_net_lock(int cpt)
-{
- cfs_percpt_lock(the_lnet.ln_net_lock, cpt);
-}
-
-static inline void
-lnet_net_unlock(int cpt)
-{
- cfs_percpt_unlock(the_lnet.ln_net_lock, cpt);
-}
-
-static inline int
-lnet_net_lock_current(void)
-{
- int cpt = lnet_cpt_current();
-
- lnet_net_lock(cpt);
- return cpt;
-}
-
-#define LNET_LOCK() lnet_net_lock(LNET_LOCK_EX)
-#define LNET_UNLOCK() lnet_net_unlock(LNET_LOCK_EX)
-
-#define lnet_ptl_lock(ptl) spin_lock(&(ptl)->ptl_lock)
-#define lnet_ptl_unlock(ptl) spin_unlock(&(ptl)->ptl_lock)
-#define lnet_eq_wait_lock() spin_lock(&the_lnet.ln_eq_wait_lock)
-#define lnet_eq_wait_unlock() spin_unlock(&the_lnet.ln_eq_wait_lock)
-#define lnet_ni_lock(ni) spin_lock(&(ni)->ni_lock)
-#define lnet_ni_unlock(ni) spin_unlock(&(ni)->ni_lock)
-
-#define MAX_PORTALS 64
-
-static inline struct lnet_libmd *
-lnet_md_alloc(struct lnet_md *umd)
-{
- struct lnet_libmd *md;
- unsigned int size;
- unsigned int niov;
-
- if (umd->options & LNET_MD_KIOV) {
- niov = umd->length;
- size = offsetof(struct lnet_libmd, md_iov.kiov[niov]);
- } else {
- niov = umd->options & LNET_MD_IOVEC ? umd->length : 1;
- size = offsetof(struct lnet_libmd, md_iov.iov[niov]);
- }
-
- md = kzalloc(size, GFP_NOFS);
-
- if (md) {
- /* Set here in case of early free */
- md->md_options = umd->options;
- md->md_niov = niov;
- INIT_LIST_HEAD(&md->md_list);
- }
-
- return md;
-}
-
-struct lnet_libhandle *lnet_res_lh_lookup(struct lnet_res_container *rec,
- __u64 cookie);
-void lnet_res_lh_initialize(struct lnet_res_container *rec,
- struct lnet_libhandle *lh);
-static inline void
-lnet_res_lh_invalidate(struct lnet_libhandle *lh)
-{
- /* NB: cookie is still useful, don't reset it */
- list_del(&lh->lh_hash_chain);
-}
-
-static inline void
-lnet_eq2handle(struct lnet_handle_eq *handle, struct lnet_eq *eq)
-{
- if (!eq) {
- LNetInvalidateEQHandle(handle);
- return;
- }
-
- handle->cookie = eq->eq_lh.lh_cookie;
-}
-
-static inline struct lnet_eq *
-lnet_handle2eq(struct lnet_handle_eq *handle)
-{
- struct lnet_libhandle *lh;
-
- lh = lnet_res_lh_lookup(&the_lnet.ln_eq_container, handle->cookie);
- if (!lh)
- return NULL;
-
- return lh_entry(lh, struct lnet_eq, eq_lh);
-}
-
-static inline void
-lnet_md2handle(struct lnet_handle_md *handle, struct lnet_libmd *md)
-{
- handle->cookie = md->md_lh.lh_cookie;
-}
-
-static inline struct lnet_libmd *
-lnet_handle2md(struct lnet_handle_md *handle)
-{
- /* ALWAYS called with resource lock held */
- struct lnet_libhandle *lh;
- int cpt;
-
- cpt = lnet_cpt_of_cookie(handle->cookie);
- lh = lnet_res_lh_lookup(the_lnet.ln_md_containers[cpt],
- handle->cookie);
- if (!lh)
- return NULL;
-
- return lh_entry(lh, struct lnet_libmd, md_lh);
-}
-
-static inline struct lnet_libmd *
-lnet_wire_handle2md(struct lnet_handle_wire *wh)
-{
- /* ALWAYS called with resource lock held */
- struct lnet_libhandle *lh;
- int cpt;
-
- if (wh->wh_interface_cookie != the_lnet.ln_interface_cookie)
- return NULL;
-
- cpt = lnet_cpt_of_cookie(wh->wh_object_cookie);
- lh = lnet_res_lh_lookup(the_lnet.ln_md_containers[cpt],
- wh->wh_object_cookie);
- if (!lh)
- return NULL;
-
- return lh_entry(lh, struct lnet_libmd, md_lh);
-}
-
-static inline void
-lnet_me2handle(struct lnet_handle_me *handle, struct lnet_me *me)
-{
- handle->cookie = me->me_lh.lh_cookie;
-}
-
-static inline struct lnet_me *
-lnet_handle2me(struct lnet_handle_me *handle)
-{
- /* ALWAYS called with resource lock held */
- struct lnet_libhandle *lh;
- int cpt;
-
- cpt = lnet_cpt_of_cookie(handle->cookie);
- lh = lnet_res_lh_lookup(the_lnet.ln_me_containers[cpt],
- handle->cookie);
- if (!lh)
- return NULL;
-
- return lh_entry(lh, struct lnet_me, me_lh);
-}
-
-static inline void
-lnet_peer_addref_locked(struct lnet_peer *lp)
-{
- LASSERT(lp->lp_refcount > 0);
- lp->lp_refcount++;
-}
-
-void lnet_destroy_peer_locked(struct lnet_peer *lp);
-
-static inline void
-lnet_peer_decref_locked(struct lnet_peer *lp)
-{
- LASSERT(lp->lp_refcount > 0);
- lp->lp_refcount--;
- if (!lp->lp_refcount)
- lnet_destroy_peer_locked(lp);
-}
-
-static inline int
-lnet_isrouter(struct lnet_peer *lp)
-{
- return lp->lp_rtr_refcount ? 1 : 0;
-}
-
-static inline void
-lnet_ni_addref_locked(struct lnet_ni *ni, int cpt)
-{
- LASSERT(cpt >= 0 && cpt < LNET_CPT_NUMBER);
- LASSERT(*ni->ni_refs[cpt] >= 0);
-
- (*ni->ni_refs[cpt])++;
-}
-
-static inline void
-lnet_ni_addref(struct lnet_ni *ni)
-{
- lnet_net_lock(0);
- lnet_ni_addref_locked(ni, 0);
- lnet_net_unlock(0);
-}
-
-static inline void
-lnet_ni_decref_locked(struct lnet_ni *ni, int cpt)
-{
- LASSERT(cpt >= 0 && cpt < LNET_CPT_NUMBER);
- LASSERT(*ni->ni_refs[cpt] > 0);
-
- (*ni->ni_refs[cpt])--;
-}
-
-static inline void
-lnet_ni_decref(struct lnet_ni *ni)
-{
- lnet_net_lock(0);
- lnet_ni_decref_locked(ni, 0);
- lnet_net_unlock(0);
-}
-
-void lnet_ni_free(struct lnet_ni *ni);
-struct lnet_ni *
-lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist);
-
-static inline int
-lnet_nid2peerhash(lnet_nid_t nid)
-{
- return hash_long(nid, LNET_PEER_HASH_BITS);
-}
-
-static inline struct list_head *
-lnet_net2rnethash(__u32 net)
-{
- return &the_lnet.ln_remote_nets_hash[(LNET_NETNUM(net) +
- LNET_NETTYP(net)) &
- ((1U << the_lnet.ln_remote_nets_hbits) - 1)];
-}
-
-extern struct lnet_lnd the_lolnd;
-extern int avoid_asym_router_failure;
-
-int lnet_cpt_of_nid_locked(lnet_nid_t nid);
-int lnet_cpt_of_nid(lnet_nid_t nid);
-struct lnet_ni *lnet_nid2ni_locked(lnet_nid_t nid, int cpt);
-struct lnet_ni *lnet_net2ni_locked(__u32 net, int cpt);
-struct lnet_ni *lnet_net2ni(__u32 net);
-
-extern int portal_rotor;
-
-int lnet_lib_init(void);
-void lnet_lib_exit(void);
-
-int lnet_notify(struct lnet_ni *ni, lnet_nid_t peer, int alive,
- unsigned long when);
-void lnet_notify_locked(struct lnet_peer *lp, int notifylnd, int alive,
- unsigned long when);
-int lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway_nid,
- unsigned int priority);
-int lnet_check_routes(void);
-int lnet_del_route(__u32 net, lnet_nid_t gw_nid);
-void lnet_destroy_routes(void);
-int lnet_get_route(int idx, __u32 *net, __u32 *hops,
- lnet_nid_t *gateway, __u32 *alive, __u32 *priority);
-int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg);
-
-void lnet_router_debugfs_init(void);
-void lnet_router_debugfs_fini(void);
-int lnet_rtrpools_alloc(int im_a_router);
-void lnet_destroy_rtrbuf(struct lnet_rtrbuf *rb, int npages);
-int lnet_rtrpools_adjust(int tiny, int small, int large);
-int lnet_rtrpools_enable(void);
-void lnet_rtrpools_disable(void);
-void lnet_rtrpools_free(int keep_pools);
-struct lnet_remotenet *lnet_find_net_locked(__u32 net);
-int lnet_dyn_add_ni(lnet_pid_t requested_pid,
- struct lnet_ioctl_config_data *conf);
-int lnet_dyn_del_ni(__u32 net);
-int lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason);
-
-int lnet_islocalnid(lnet_nid_t nid);
-int lnet_islocalnet(__u32 net);
-
-void lnet_msg_attach_md(struct lnet_msg *msg, struct lnet_libmd *md,
- unsigned int offset, unsigned int mlen);
-void lnet_msg_detach_md(struct lnet_msg *msg, int status);
-void lnet_build_unlink_event(struct lnet_libmd *md, struct lnet_event *ev);
-void lnet_build_msg_event(struct lnet_msg *msg, enum lnet_event_kind ev_type);
-void lnet_msg_commit(struct lnet_msg *msg, int cpt);
-void lnet_msg_decommit(struct lnet_msg *msg, int cpt, int status);
-
-void lnet_eq_enqueue_event(struct lnet_eq *eq, struct lnet_event *ev);
-void lnet_prep_send(struct lnet_msg *msg, int type,
- struct lnet_process_id target, unsigned int offset,
- unsigned int len);
-int lnet_send(lnet_nid_t nid, struct lnet_msg *msg, lnet_nid_t rtr_nid);
-void lnet_return_tx_credits_locked(struct lnet_msg *msg);
-void lnet_return_rx_credits_locked(struct lnet_msg *msg);
-void lnet_schedule_blocked_locked(struct lnet_rtrbufpool *rbp);
-void lnet_drop_routed_msgs_locked(struct list_head *list, int cpt);
-
-/* portals functions */
-/* portals attributes */
-static inline int
-lnet_ptl_is_lazy(struct lnet_portal *ptl)
-{
- return !!(ptl->ptl_options & LNET_PTL_LAZY);
-}
-
-static inline int
-lnet_ptl_is_unique(struct lnet_portal *ptl)
-{
- return !!(ptl->ptl_options & LNET_PTL_MATCH_UNIQUE);
-}
-
-static inline int
-lnet_ptl_is_wildcard(struct lnet_portal *ptl)
-{
- return !!(ptl->ptl_options & LNET_PTL_MATCH_WILDCARD);
-}
-
-static inline void
-lnet_ptl_setopt(struct lnet_portal *ptl, int opt)
-{
- ptl->ptl_options |= opt;
-}
-
-static inline void
-lnet_ptl_unsetopt(struct lnet_portal *ptl, int opt)
-{
- ptl->ptl_options &= ~opt;
-}
-
-/* match-table functions */
-struct list_head *lnet_mt_match_head(struct lnet_match_table *mtable,
- struct lnet_process_id id, __u64 mbits);
-struct lnet_match_table *lnet_mt_of_attach(unsigned int index,
- struct lnet_process_id id,
- __u64 mbits, __u64 ignore_bits,
- enum lnet_ins_pos pos);
-int lnet_mt_match_md(struct lnet_match_table *mtable,
- struct lnet_match_info *info, struct lnet_msg *msg);
-
-/* portals match/attach functions */
-void lnet_ptl_attach_md(struct lnet_me *me, struct lnet_libmd *md,
- struct list_head *matches, struct list_head *drops);
-void lnet_ptl_detach_md(struct lnet_me *me, struct lnet_libmd *md);
-int lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg);
-
-/* initialized and finalize portals */
-int lnet_portals_create(void);
-void lnet_portals_destroy(void);
-
-/* message functions */
-int lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr,
- lnet_nid_t fromnid, void *private, int rdma_req);
-int lnet_parse_local(struct lnet_ni *ni, struct lnet_msg *msg);
-int lnet_parse_forward_locked(struct lnet_ni *ni, struct lnet_msg *msg);
-
-void lnet_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg,
- int delayed, unsigned int offset, unsigned int mlen,
- unsigned int rlen);
-void lnet_ni_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg,
- int delayed, unsigned int offset,
- unsigned int mlen, unsigned int rlen);
-
-struct lnet_msg *lnet_create_reply_msg(struct lnet_ni *ni,
- struct lnet_msg *get_msg);
-void lnet_set_reply_msg_len(struct lnet_ni *ni, struct lnet_msg *msg,
- unsigned int len);
-
-void lnet_finalize(struct lnet_ni *ni, struct lnet_msg *msg, int rc);
-
-void lnet_drop_message(struct lnet_ni *ni, int cpt, void *private,
- unsigned int nob);
-void lnet_drop_delayed_msg_list(struct list_head *head, char *reason);
-void lnet_recv_delayed_msg_list(struct list_head *head);
-
-int lnet_msg_container_setup(struct lnet_msg_container *container, int cpt);
-void lnet_msg_container_cleanup(struct lnet_msg_container *container);
-void lnet_msg_containers_destroy(void);
-int lnet_msg_containers_create(void);
-
-char *lnet_msgtyp2str(int type);
-void lnet_print_hdr(struct lnet_hdr *hdr);
-int lnet_fail_nid(lnet_nid_t nid, unsigned int threshold);
-
-/** \addtogroup lnet_fault_simulation @{ */
-
-int lnet_fault_ctl(int cmd, struct libcfs_ioctl_data *data);
-int lnet_fault_init(void);
-void lnet_fault_fini(void);
-
-bool lnet_drop_rule_match(struct lnet_hdr *hdr);
-
-int lnet_delay_rule_add(struct lnet_fault_attr *attr);
-int lnet_delay_rule_del(lnet_nid_t src, lnet_nid_t dst, bool shutdown);
-int lnet_delay_rule_list(int pos, struct lnet_fault_attr *attr,
- struct lnet_fault_stat *stat);
-void lnet_delay_rule_reset(void);
-void lnet_delay_rule_check(void);
-bool lnet_delay_rule_match_locked(struct lnet_hdr *hdr, struct lnet_msg *msg);
-
-/** @} lnet_fault_simulation */
-
-void lnet_counters_get(struct lnet_counters *counters);
-void lnet_counters_reset(void);
-
-unsigned int lnet_iov_nob(unsigned int niov, struct kvec *iov);
-int lnet_extract_iov(int dst_niov, struct kvec *dst,
- int src_niov, const struct kvec *src,
- unsigned int offset, unsigned int len);
-
-unsigned int lnet_kiov_nob(unsigned int niov, struct bio_vec *iov);
-int lnet_extract_kiov(int dst_niov, struct bio_vec *dst,
- int src_niov, const struct bio_vec *src,
- unsigned int offset, unsigned int len);
-
-void lnet_copy_iov2iter(struct iov_iter *to,
- unsigned int nsiov, const struct kvec *siov,
- unsigned int soffset, unsigned int nob);
-void lnet_copy_kiov2iter(struct iov_iter *to,
- unsigned int nkiov, const struct bio_vec *kiov,
- unsigned int kiovoffset, unsigned int nob);
-
-void lnet_me_unlink(struct lnet_me *me);
-
-void lnet_md_unlink(struct lnet_libmd *md);
-void lnet_md_deconstruct(struct lnet_libmd *lmd, struct lnet_md *umd);
-
-void lnet_register_lnd(struct lnet_lnd *lnd);
-void lnet_unregister_lnd(struct lnet_lnd *lnd);
-
-int lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
- __u32 local_ip, __u32 peer_ip, int peer_port);
-void lnet_connect_console_error(int rc, lnet_nid_t peer_nid,
- __u32 peer_ip, int port);
-int lnet_count_acceptor_nis(void);
-int lnet_acceptor_timeout(void);
-int lnet_acceptor_port(void);
-
-int lnet_count_acceptor_nis(void);
-int lnet_acceptor_port(void);
-
-int lnet_acceptor_start(void);
-void lnet_acceptor_stop(void);
-
-int lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask);
-int lnet_ipif_enumerate(char ***names);
-void lnet_ipif_free_enumeration(char **names, int n);
-int lnet_sock_setbuf(struct socket *socket, int txbufsize, int rxbufsize);
-int lnet_sock_getbuf(struct socket *socket, int *txbufsize, int *rxbufsize);
-int lnet_sock_getaddr(struct socket *socket, bool remote, __u32 *ip, int *port);
-int lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout);
-int lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout);
-
-int lnet_sock_listen(struct socket **sockp, __u32 ip, int port, int backlog);
-int lnet_sock_accept(struct socket **newsockp, struct socket *sock);
-int lnet_sock_connect(struct socket **sockp, int *fatal,
- __u32 local_ip, int local_port,
- __u32 peer_ip, int peer_port);
-void libcfs_sock_release(struct socket *sock);
-
-int lnet_peers_start_down(void);
-int lnet_peer_buffer_credits(struct lnet_ni *ni);
-
-int lnet_router_checker_start(void);
-void lnet_router_checker_stop(void);
-void lnet_router_ni_update_locked(struct lnet_peer *gw, __u32 net);
-void lnet_swap_pinginfo(struct lnet_ping_info *info);
-
-int lnet_parse_ip2nets(char **networksp, char *ip2nets);
-int lnet_parse_routes(char *route_str, int *im_a_router);
-int lnet_parse_networks(struct list_head *nilist, char *networks);
-int lnet_net_unique(__u32 net, struct list_head *nilist);
-
-int lnet_nid2peer_locked(struct lnet_peer **lpp, lnet_nid_t nid, int cpt);
-struct lnet_peer *lnet_find_peer_locked(struct lnet_peer_table *ptable,
- lnet_nid_t nid);
-void lnet_peer_tables_cleanup(struct lnet_ni *ni);
-void lnet_peer_tables_destroy(void);
-int lnet_peer_tables_create(void);
-void lnet_debug_peer(lnet_nid_t nid);
-int lnet_get_peer_info(__u32 peer_index, __u64 *nid,
- char alivness[LNET_MAX_STR_LEN],
- __u32 *cpt_iter, __u32 *refcount,
- __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits,
- __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credtis,
- __u32 *peer_tx_qnob);
-
-static inline void
-lnet_peer_set_alive(struct lnet_peer *lp)
-{
- lp->lp_last_query = jiffies;
- lp->lp_last_alive = jiffies;
- if (!lp->lp_alive)
- lnet_notify_locked(lp, 0, 1, lp->lp_last_alive);
-}
-
-#endif
diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h
deleted file mode 100644
index cfe8ee424e94..000000000000
--- a/drivers/staging/lustre/include/linux/lnet/lib-types.h
+++ /dev/null
@@ -1,666 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Seagate, Inc.
- *
- * lnet/include/lnet/lib-types.h
- */
-
-#ifndef __LNET_LIB_TYPES_H__
-#define __LNET_LIB_TYPES_H__
-
-#include <linux/kthread.h>
-#include <linux/uio.h>
-#include <linux/types.h>
-#include <linux/completion.h>
-
-#include <uapi/linux/lnet/lnet-types.h>
-#include <uapi/linux/lnet/lnetctl.h>
-
-/* Max payload size */
-#define LNET_MAX_PAYLOAD CONFIG_LNET_MAX_PAYLOAD
-#if (LNET_MAX_PAYLOAD < LNET_MTU)
-# error "LNET_MAX_PAYLOAD too small - error in configure --with-max-payload-mb"
-#elif (LNET_MAX_PAYLOAD > (PAGE_SIZE * LNET_MAX_IOV))
-# error "LNET_MAX_PAYLOAD too large - error in configure --with-max-payload-mb"
-#endif
-
-/* forward refs */
-struct lnet_libmd;
-
-struct lnet_msg {
- struct list_head msg_activelist;
- struct list_head msg_list; /* Q for credits/MD */
-
- struct lnet_process_id msg_target;
- /* where is it from, it's only for building event */
- lnet_nid_t msg_from;
- __u32 msg_type;
-
- /* committed for sending */
- unsigned int msg_tx_committed:1;
- /* CPT # this message committed for sending */
- unsigned int msg_tx_cpt:15;
- /* committed for receiving */
- unsigned int msg_rx_committed:1;
- /* CPT # this message committed for receiving */
- unsigned int msg_rx_cpt:15;
- /* queued for tx credit */
- unsigned int msg_tx_delayed:1;
- /* queued for RX buffer */
- unsigned int msg_rx_delayed:1;
- /* ready for pending on RX delay list */
- unsigned int msg_rx_ready_delay:1;
-
- unsigned int msg_vmflush:1; /* VM trying to free memory */
- unsigned int msg_target_is_router:1; /* sending to a router */
- unsigned int msg_routing:1; /* being forwarded */
- unsigned int msg_ack:1; /* ack on finalize (PUT) */
- unsigned int msg_sending:1; /* outgoing message */
- unsigned int msg_receiving:1; /* being received */
- unsigned int msg_txcredit:1; /* taken an NI send credit */
- unsigned int msg_peertxcredit:1; /* taken a peer send credit */
- unsigned int msg_rtrcredit:1; /* taken a global router credit */
- unsigned int msg_peerrtrcredit:1; /* taken a peer router credit */
- unsigned int msg_onactivelist:1; /* on the activelist */
- unsigned int msg_rdma_get:1;
-
- struct lnet_peer *msg_txpeer; /* peer I'm sending to */
- struct lnet_peer *msg_rxpeer; /* peer I received from */
-
- void *msg_private;
- struct lnet_libmd *msg_md;
-
- unsigned int msg_len;
- unsigned int msg_wanted;
- unsigned int msg_offset;
- unsigned int msg_niov;
- struct kvec *msg_iov;
- struct bio_vec *msg_kiov;
-
- struct lnet_event msg_ev;
- struct lnet_hdr msg_hdr;
-};
-
-struct lnet_libhandle {
- struct list_head lh_hash_chain;
- __u64 lh_cookie;
-};
-
-#define lh_entry(ptr, type, member) \
- ((type *)((char *)(ptr) - (char *)(&((type *)0)->member)))
-
-struct lnet_eq {
- struct list_head eq_list;
- struct lnet_libhandle eq_lh;
- unsigned long eq_enq_seq;
- unsigned long eq_deq_seq;
- unsigned int eq_size;
- lnet_eq_handler_t eq_callback;
- struct lnet_event *eq_events;
- int **eq_refs; /* percpt refcount for EQ */
-};
-
-struct lnet_me {
- struct list_head me_list;
- struct lnet_libhandle me_lh;
- struct lnet_process_id me_match_id;
- unsigned int me_portal;
- unsigned int me_pos; /* hash offset in mt_hash */
- __u64 me_match_bits;
- __u64 me_ignore_bits;
- enum lnet_unlink me_unlink;
- struct lnet_libmd *me_md;
-};
-
-struct lnet_libmd {
- struct list_head md_list;
- struct lnet_libhandle md_lh;
- struct lnet_me *md_me;
- char *md_start;
- unsigned int md_offset;
- unsigned int md_length;
- unsigned int md_max_size;
- int md_threshold;
- int md_refcount;
- unsigned int md_options;
- unsigned int md_flags;
- void *md_user_ptr;
- struct lnet_eq *md_eq;
- unsigned int md_niov; /* # frags */
- union {
- struct kvec iov[LNET_MAX_IOV];
- struct bio_vec kiov[LNET_MAX_IOV];
- } md_iov;
-};
-
-#define LNET_MD_FLAG_ZOMBIE BIT(0)
-#define LNET_MD_FLAG_AUTO_UNLINK BIT(1)
-#define LNET_MD_FLAG_ABORTED BIT(2)
-
-struct lnet_test_peer {
- /* info about peers we are trying to fail */
- struct list_head tp_list; /* ln_test_peers */
- lnet_nid_t tp_nid; /* matching nid */
- unsigned int tp_threshold; /* # failures to simulate */
-};
-
-#define LNET_COOKIE_TYPE_MD 1
-#define LNET_COOKIE_TYPE_ME 2
-#define LNET_COOKIE_TYPE_EQ 3
-#define LNET_COOKIE_TYPE_BITS 2
-#define LNET_COOKIE_MASK ((1ULL << LNET_COOKIE_TYPE_BITS) - 1ULL)
-
-struct lnet_ni; /* forward ref */
-
-struct lnet_lnd {
- /* fields managed by portals */
- struct list_head lnd_list; /* stash in the LND table */
- int lnd_refcount; /* # active instances */
-
- /* fields initialised by the LND */
- __u32 lnd_type;
-
- int (*lnd_startup)(struct lnet_ni *ni);
- void (*lnd_shutdown)(struct lnet_ni *ni);
- int (*lnd_ctl)(struct lnet_ni *ni, unsigned int cmd, void *arg);
-
- /*
- * In data movement APIs below, payload buffers are described as a set
- * of 'niov' fragments which are...
- * EITHER
- * in virtual memory (struct iovec *iov != NULL)
- * OR
- * in pages (kernel only: plt_kiov_t *kiov != NULL).
- * The LND may NOT overwrite these fragment descriptors.
- * An 'offset' and may specify a byte offset within the set of
- * fragments to start from
- */
-
- /*
- * Start sending a preformatted message. 'private' is NULL for PUT and
- * GET messages; otherwise this is a response to an incoming message
- * and 'private' is the 'private' passed to lnet_parse(). Return
- * non-zero for immediate failure, otherwise complete later with
- * lnet_finalize()
- */
- int (*lnd_send)(struct lnet_ni *ni, void *private,
- struct lnet_msg *msg);
-
- /*
- * Start receiving 'mlen' bytes of payload data, skipping the following
- * 'rlen' - 'mlen' bytes. 'private' is the 'private' passed to
- * lnet_parse(). Return non-zero for immediate failure, otherwise
- * complete later with lnet_finalize(). This also gives back a receive
- * credit if the LND does flow control.
- */
- int (*lnd_recv)(struct lnet_ni *ni, void *private, struct lnet_msg *msg,
- int delayed, struct iov_iter *to, unsigned int rlen);
-
- /*
- * lnet_parse() has had to delay processing of this message
- * (e.g. waiting for a forwarding buffer or send credits). Give the
- * LND a chance to free urgently needed resources. If called, return 0
- * for success and do NOT give back a receive credit; that has to wait
- * until lnd_recv() gets called. On failure return < 0 and
- * release resources; lnd_recv() will not be called.
- */
- int (*lnd_eager_recv)(struct lnet_ni *ni, void *private,
- struct lnet_msg *msg, void **new_privatep);
-
- /* notification of peer health */
- void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive);
-
- /* query of peer aliveness */
- void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer,
- unsigned long *when);
-
- /* accept a new connection */
- int (*lnd_accept)(struct lnet_ni *ni, struct socket *sock);
-};
-
-struct lnet_tx_queue {
- int tq_credits; /* # tx credits free */
- int tq_credits_min; /* lowest it's been */
- int tq_credits_max; /* total # tx credits */
- struct list_head tq_delayed; /* delayed TXs */
-};
-
-struct lnet_ni {
- spinlock_t ni_lock;
- struct list_head ni_list; /* chain on ln_nis */
- struct list_head ni_cptlist; /* chain on ln_nis_cpt */
- int ni_maxtxcredits; /* # tx credits */
- /* # per-peer send credits */
- int ni_peertxcredits;
- /* # per-peer router buffer credits */
- int ni_peerrtrcredits;
- /* seconds to consider peer dead */
- int ni_peertimeout;
- int ni_ncpts; /* number of CPTs */
- __u32 *ni_cpts; /* bond NI on some CPTs */
- lnet_nid_t ni_nid; /* interface's NID */
- void *ni_data; /* instance-specific data */
- struct lnet_lnd *ni_lnd; /* procedural interface */
- struct lnet_tx_queue **ni_tx_queues; /* percpt TX queues */
- int **ni_refs; /* percpt reference count */
- time64_t ni_last_alive;/* when I was last alive */
- struct lnet_ni_status *ni_status; /* my health status */
- /* per NI LND tunables */
- struct lnet_ioctl_config_lnd_tunables *ni_lnd_tunables;
- /* equivalent interfaces to use */
- char *ni_interfaces[LNET_MAX_INTERFACES];
- /* original net namespace */
- struct net *ni_net_ns;
-};
-
-#define LNET_PROTO_PING_MATCHBITS 0x8000000000000000LL
-
-/*
- * NB: value of these features equal to LNET_PROTO_PING_VERSION_x
- * of old LNet, so there shouldn't be any compatibility issue
- */
-#define LNET_PING_FEAT_INVAL (0) /* no feature */
-#define LNET_PING_FEAT_BASE BIT(0) /* just a ping */
-#define LNET_PING_FEAT_NI_STATUS BIT(1) /* return NI status */
-#define LNET_PING_FEAT_RTE_DISABLED BIT(2) /* Routing enabled */
-
-#define LNET_PING_FEAT_MASK (LNET_PING_FEAT_BASE | \
- LNET_PING_FEAT_NI_STATUS)
-
-/* router checker data, per router */
-#define LNET_MAX_RTR_NIS 16
-#define LNET_PINGINFO_SIZE offsetof(struct lnet_ping_info, pi_ni[LNET_MAX_RTR_NIS])
-struct lnet_rc_data {
- /* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */
- struct list_head rcd_list;
- struct lnet_handle_md rcd_mdh; /* ping buffer MD */
- struct lnet_peer *rcd_gateway; /* reference to gateway */
- struct lnet_ping_info *rcd_pinginfo; /* ping buffer */
-};
-
-struct lnet_peer {
- struct list_head lp_hashlist; /* chain on peer hash */
- struct list_head lp_txq; /* messages blocking for
- * tx credits
- */
- struct list_head lp_rtrq; /* messages blocking for
- * router credits
- */
- struct list_head lp_rtr_list; /* chain on router list */
- int lp_txcredits; /* # tx credits available */
- int lp_mintxcredits; /* low water mark */
- int lp_rtrcredits; /* # router credits */
- int lp_minrtrcredits; /* low water mark */
- unsigned int lp_alive:1; /* alive/dead? */
- unsigned int lp_notify:1; /* notification outstanding? */
- unsigned int lp_notifylnd:1;/* outstanding notification
- * for LND?
- */
- unsigned int lp_notifying:1; /* some thread is handling
- * notification
- */
- unsigned int lp_ping_notsent;/* SEND event outstanding
- * from ping
- */
- int lp_alive_count; /* # times router went
- * dead<->alive
- */
- long lp_txqnob; /* ytes queued for sending */
- unsigned long lp_timestamp; /* time of last aliveness
- * news
- */
- unsigned long lp_ping_timestamp;/* time of last ping
- * attempt
- */
- unsigned long lp_ping_deadline; /* != 0 if ping reply
- * expected
- */
- unsigned long lp_last_alive; /* when I was last alive */
- unsigned long lp_last_query; /* when lp_ni was queried
- * last time
- */
- struct lnet_ni *lp_ni; /* interface peer is on */
- lnet_nid_t lp_nid; /* peer's NID */
- int lp_refcount; /* # refs */
- int lp_cpt; /* CPT this peer attached on */
- /* # refs from lnet_route::lr_gateway */
- int lp_rtr_refcount;
- /* returned RC ping features */
- unsigned int lp_ping_feats;
- struct list_head lp_routes; /* routers on this peer */
- struct lnet_rc_data *lp_rcd; /* router checker state */
-};
-
-/* peer hash size */
-#define LNET_PEER_HASH_BITS 9
-#define LNET_PEER_HASH_SIZE (1 << LNET_PEER_HASH_BITS)
-
-/* peer hash table */
-struct lnet_peer_table {
- int pt_version; /* /proc validity stamp */
- int pt_number; /* # peers extant */
- /* # zombies to go to deathrow (and not there yet) */
- int pt_zombies;
- struct list_head pt_deathrow; /* zombie peers */
- struct list_head *pt_hash; /* NID->peer hash */
-};
-
-/*
- * peer aliveness is enabled only on routers for peers in a network where the
- * lnet_ni::ni_peertimeout has been set to a positive value
- */
-#define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing && \
- (lp)->lp_ni->ni_peertimeout > 0)
-
-struct lnet_route {
- struct list_head lr_list; /* chain on net */
- struct list_head lr_gwlist; /* chain on gateway */
- struct lnet_peer *lr_gateway; /* router node */
- __u32 lr_net; /* remote network number */
- int lr_seq; /* sequence for round-robin */
- unsigned int lr_downis; /* number of down NIs */
- __u32 lr_hops; /* how far I am */
- unsigned int lr_priority; /* route priority */
-};
-
-#define LNET_REMOTE_NETS_HASH_DEFAULT (1U << 7)
-#define LNET_REMOTE_NETS_HASH_MAX (1U << 16)
-#define LNET_REMOTE_NETS_HASH_SIZE (1 << the_lnet.ln_remote_nets_hbits)
-
-struct lnet_remotenet {
- struct list_head lrn_list; /* chain on
- * ln_remote_nets_hash
- */
- struct list_head lrn_routes; /* routes to me */
- __u32 lrn_net; /* my net number */
-};
-
-/** lnet message has credit and can be submitted to lnd for send/receive */
-#define LNET_CREDIT_OK 0
-/** lnet message is waiting for credit */
-#define LNET_CREDIT_WAIT 1
-
-struct lnet_rtrbufpool {
- struct list_head rbp_bufs; /* my free buffer pool */
- struct list_head rbp_msgs; /* messages blocking
- * for a buffer
- */
- int rbp_npages; /* # pages in each buffer */
- /* requested number of buffers */
- int rbp_req_nbuffers;
- /* # buffers actually allocated */
- int rbp_nbuffers;
- int rbp_credits; /* # free buffers
- * blocked messages
- */
- int rbp_mincredits; /* low water mark */
-};
-
-struct lnet_rtrbuf {
- struct list_head rb_list; /* chain on rbp_bufs */
- struct lnet_rtrbufpool *rb_pool; /* owning pool */
- struct bio_vec rb_kiov[0]; /* the buffer space */
-};
-
-#define LNET_PEER_HASHSIZE 503 /* prime! */
-
-#define LNET_TINY_BUF_IDX 0
-#define LNET_SMALL_BUF_IDX 1
-#define LNET_LARGE_BUF_IDX 2
-
-/* # different router buffer pools */
-#define LNET_NRBPOOLS (LNET_LARGE_BUF_IDX + 1)
-
-enum lnet_match_flags {
- /* Didn't match anything */
- LNET_MATCHMD_NONE = BIT(0),
- /* Matched OK */
- LNET_MATCHMD_OK = BIT(1),
- /* Must be discarded */
- LNET_MATCHMD_DROP = BIT(2),
- /* match and buffer is exhausted */
- LNET_MATCHMD_EXHAUSTED = BIT(3),
- /* match or drop */
- LNET_MATCHMD_FINISH = (LNET_MATCHMD_OK | LNET_MATCHMD_DROP),
-};
-
-/* Options for lnet_portal::ptl_options */
-#define LNET_PTL_LAZY BIT(0)
-#define LNET_PTL_MATCH_UNIQUE BIT(1) /* unique match, for RDMA */
-#define LNET_PTL_MATCH_WILDCARD BIT(2) /* wildcard match, request portal */
-
-/* parameter for matching operations (GET, PUT) */
-struct lnet_match_info {
- __u64 mi_mbits;
- struct lnet_process_id mi_id;
- unsigned int mi_opc;
- unsigned int mi_portal;
- unsigned int mi_rlength;
- unsigned int mi_roffset;
-};
-
-/* ME hash of RDMA portal */
-#define LNET_MT_HASH_BITS 8
-#define LNET_MT_HASH_SIZE (1 << LNET_MT_HASH_BITS)
-#define LNET_MT_HASH_MASK (LNET_MT_HASH_SIZE - 1)
-/*
- * we allocate (LNET_MT_HASH_SIZE + 1) entries for lnet_match_table::mt_hash,
- * the last entry is reserved for MEs with ignore-bits
- */
-#define LNET_MT_HASH_IGNORE LNET_MT_HASH_SIZE
-/*
- * __u64 has 2^6 bits, so need 2^(LNET_MT_HASH_BITS - LNET_MT_BITS_U64) which
- * is 4 __u64s as bit-map, and add an extra __u64 (only use one bit) for the
- * ME-list with ignore-bits, which is mtable::mt_hash[LNET_MT_HASH_IGNORE]
- */
-#define LNET_MT_BITS_U64 6 /* 2^6 bits */
-#define LNET_MT_EXHAUSTED_BITS (LNET_MT_HASH_BITS - LNET_MT_BITS_U64)
-#define LNET_MT_EXHAUSTED_BMAP ((1 << LNET_MT_EXHAUSTED_BITS) + 1)
-
-/* portal match table */
-struct lnet_match_table {
- /* reserved for upcoming patches, CPU partition ID */
- unsigned int mt_cpt;
- unsigned int mt_portal; /* portal index */
- /*
- * match table is set as "enabled" if there's non-exhausted MD
- * attached on mt_mhash, it's only valid for wildcard portal
- */
- unsigned int mt_enabled;
- /* bitmap to flag whether MEs on mt_hash are exhausted or not */
- __u64 mt_exhausted[LNET_MT_EXHAUSTED_BMAP];
- struct list_head *mt_mhash; /* matching hash */
-};
-
-/* these are only useful for wildcard portal */
-/* Turn off message rotor for wildcard portals */
-#define LNET_PTL_ROTOR_OFF 0
-/* round-robin dispatch all PUT messages for wildcard portals */
-#define LNET_PTL_ROTOR_ON 1
-/* round-robin dispatch routed PUT message for wildcard portals */
-#define LNET_PTL_ROTOR_RR_RT 2
-/* dispatch routed PUT message by hashing source NID for wildcard portals */
-#define LNET_PTL_ROTOR_HASH_RT 3
-
-struct lnet_portal {
- spinlock_t ptl_lock;
- unsigned int ptl_index; /* portal ID, reserved */
- /* flags on this portal: lazy, unique... */
- unsigned int ptl_options;
- /* list of messages which are stealing buffer */
- struct list_head ptl_msg_stealing;
- /* messages blocking for MD */
- struct list_head ptl_msg_delayed;
- /* Match table for each CPT */
- struct lnet_match_table **ptl_mtables;
- /* spread rotor of incoming "PUT" */
- unsigned int ptl_rotor;
- /* # active entries for this portal */
- int ptl_mt_nmaps;
- /* array of active entries' cpu-partition-id */
- int ptl_mt_maps[0];
-};
-
-#define LNET_LH_HASH_BITS 12
-#define LNET_LH_HASH_SIZE (1ULL << LNET_LH_HASH_BITS)
-#define LNET_LH_HASH_MASK (LNET_LH_HASH_SIZE - 1)
-
-/* resource container (ME, MD, EQ) */
-struct lnet_res_container {
- unsigned int rec_type; /* container type */
- __u64 rec_lh_cookie; /* cookie generator */
- struct list_head rec_active; /* active resource list */
- struct list_head *rec_lh_hash; /* handle hash */
-};
-
-/* message container */
-struct lnet_msg_container {
- int msc_init; /* initialized or not */
- /* max # threads finalizing */
- int msc_nfinalizers;
- /* msgs waiting to complete finalizing */
- struct list_head msc_finalizing;
- struct list_head msc_active; /* active message list */
- /* threads doing finalization */
- void **msc_finalizers;
-};
-
-/* Router Checker states */
-#define LNET_RC_STATE_SHUTDOWN 0 /* not started */
-#define LNET_RC_STATE_RUNNING 1 /* started up OK */
-#define LNET_RC_STATE_STOPPING 2 /* telling thread to stop */
-
-struct lnet {
- /* CPU partition table of LNet */
- struct cfs_cpt_table *ln_cpt_table;
- /* number of CPTs in ln_cpt_table */
- unsigned int ln_cpt_number;
- unsigned int ln_cpt_bits;
-
- /* protect LNet resources (ME/MD/EQ) */
- struct cfs_percpt_lock *ln_res_lock;
- /* # portals */
- int ln_nportals;
- /* the vector of portals */
- struct lnet_portal **ln_portals;
- /* percpt ME containers */
- struct lnet_res_container **ln_me_containers;
- /* percpt MD container */
- struct lnet_res_container **ln_md_containers;
-
- /* Event Queue container */
- struct lnet_res_container ln_eq_container;
- wait_queue_head_t ln_eq_waitq;
- spinlock_t ln_eq_wait_lock;
- unsigned int ln_remote_nets_hbits;
-
- /* protect NI, peer table, credits, routers, rtrbuf... */
- struct cfs_percpt_lock *ln_net_lock;
- /* percpt message containers for active/finalizing/freed message */
- struct lnet_msg_container **ln_msg_containers;
- struct lnet_counters **ln_counters;
- struct lnet_peer_table **ln_peer_tables;
- /* failure simulation */
- struct list_head ln_test_peers;
- struct list_head ln_drop_rules;
- struct list_head ln_delay_rules;
-
- struct list_head ln_nis; /* LND instances */
- /* NIs bond on specific CPT(s) */
- struct list_head ln_nis_cpt;
- /* dying LND instances */
- struct list_head ln_nis_zombie;
- struct lnet_ni *ln_loni; /* the loopback NI */
-
- /* remote networks with routes to them */
- struct list_head *ln_remote_nets_hash;
- /* validity stamp */
- __u64 ln_remote_nets_version;
- /* list of all known routers */
- struct list_head ln_routers;
- /* validity stamp */
- __u64 ln_routers_version;
- /* percpt router buffer pools */
- struct lnet_rtrbufpool **ln_rtrpools;
-
- struct lnet_handle_md ln_ping_target_md;
- struct lnet_handle_eq ln_ping_target_eq;
- struct lnet_ping_info *ln_ping_info;
-
- /* router checker startup/shutdown state */
- int ln_rc_state;
- /* router checker's event queue */
- struct lnet_handle_eq ln_rc_eqh;
- /* rcd still pending on net */
- struct list_head ln_rcd_deathrow;
- /* rcd ready for free */
- struct list_head ln_rcd_zombie;
- /* serialise startup/shutdown */
- struct completion ln_rc_signal;
-
- struct mutex ln_api_mutex;
- struct mutex ln_lnd_mutex;
- struct mutex ln_delay_mutex;
- /* Have I called LNetNIInit myself? */
- int ln_niinit_self;
- /* LNetNIInit/LNetNIFini counter */
- int ln_refcount;
- /* shutdown in progress */
- int ln_shutdown;
-
- int ln_routing; /* am I a router? */
- lnet_pid_t ln_pid; /* requested pid */
- /* uniquely identifies this ni in this epoch */
- __u64 ln_interface_cookie;
- /* registered LNDs */
- struct list_head ln_lnds;
-
- /* test protocol compatibility flags */
- int ln_testprotocompat;
-
- /*
- * 0 - load the NIs from the mod params
- * 1 - do not load the NIs from the mod params
- * Reverse logic to ensure that other calls to LNetNIInit
- * need no change
- */
- bool ln_nis_from_mod_params;
-
- /*
- * waitq for router checker. As long as there are no routes in
- * the list, the router checker will sleep on this queue. when
- * routes are added the thread will wake up
- */
- wait_queue_head_t ln_rc_waitq;
-
-};
-
-#endif
diff --git a/drivers/staging/lustre/include/linux/lnet/socklnd.h b/drivers/staging/lustre/include/linux/lnet/socklnd.h
deleted file mode 100644
index 6bd1bca190a3..000000000000
--- a/drivers/staging/lustre/include/linux/lnet/socklnd.h
+++ /dev/null
@@ -1,87 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012 - 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Seagate, Inc.
- *
- * lnet/include/lnet/socklnd.h
- */
-#ifndef __LNET_LNET_SOCKLND_H__
-#define __LNET_LNET_SOCKLND_H__
-
-#include <uapi/linux/lnet/lnet-types.h>
-#include <uapi/linux/lnet/socklnd.h>
-
-struct ksock_hello_msg {
- __u32 kshm_magic; /* magic number of socklnd message */
- __u32 kshm_version; /* version of socklnd message */
- lnet_nid_t kshm_src_nid; /* sender's nid */
- lnet_nid_t kshm_dst_nid; /* destination nid */
- lnet_pid_t kshm_src_pid; /* sender's pid */
- lnet_pid_t kshm_dst_pid; /* destination pid */
- __u64 kshm_src_incarnation; /* sender's incarnation */
- __u64 kshm_dst_incarnation; /* destination's incarnation */
- __u32 kshm_ctype; /* connection type */
- __u32 kshm_nips; /* # IP addrs */
- __u32 kshm_ips[0]; /* IP addrs */
-} WIRE_ATTR;
-
-struct ksock_lnet_msg {
- struct lnet_hdr ksnm_hdr; /* lnet hdr */
-
- /*
- * ksnm_payload is removed because of winnt compiler's limitation:
- * zero-sized array can only be placed at the tail of [nested]
- * structure definitions. lnet payload will be stored just after
- * the body of structure ksock_lnet_msg_t
- */
-} WIRE_ATTR;
-
-struct ksock_msg {
- __u32 ksm_type; /* type of socklnd message */
- __u32 ksm_csum; /* checksum if != 0 */
- __u64 ksm_zc_cookies[2]; /* Zero-Copy request/ACK cookie */
- union {
- struct ksock_lnet_msg lnetmsg; /* lnet message, it's empty if
- * it's NOOP
- */
- } WIRE_ATTR ksm_u;
-} WIRE_ATTR;
-
-#define KSOCK_MSG_NOOP 0xC0 /* ksm_u empty */
-#define KSOCK_MSG_LNET 0xC1 /* lnet msg */
-
-/*
- * We need to know this number to parse hello msg from ksocklnd in
- * other LND (usocklnd, for example)
- */
-#define KSOCK_PROTO_V2 2
-#define KSOCK_PROTO_V3 3
-
-#endif
diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_debug.h b/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_debug.h
deleted file mode 100644
index c4d9472b374f..000000000000
--- a/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_debug.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2014, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/include/libcfs/libcfs_debug.h
- *
- * Debug messages and assertions
- *
- */
-
-#ifndef __UAPI_LIBCFS_DEBUG_H__
-#define __UAPI_LIBCFS_DEBUG_H__
-
-/**
- * Format for debug message headers
- */
-struct ptldebug_header {
- __u32 ph_len;
- __u32 ph_flags;
- __u32 ph_subsys;
- __u32 ph_mask;
- __u16 ph_cpu_id;
- __u16 ph_type;
- /* time_t overflow in 2106 */
- __u32 ph_sec;
- __u64 ph_usec;
- __u32 ph_stack;
- __u32 ph_pid;
- __u32 ph_extern_pid;
- __u32 ph_line_num;
-} __attribute__((packed));
-
-#define PH_FLAG_FIRST_RECORD 1
-
-/* Debugging subsystems (32 bits, non-overlapping) */
-#define S_UNDEFINED 0x00000001
-#define S_MDC 0x00000002
-#define S_MDS 0x00000004
-#define S_OSC 0x00000008
-#define S_OST 0x00000010
-#define S_CLASS 0x00000020
-#define S_LOG 0x00000040
-#define S_LLITE 0x00000080
-#define S_RPC 0x00000100
-#define S_MGMT 0x00000200
-#define S_LNET 0x00000400
-#define S_LND 0x00000800 /* ALL LNDs */
-#define S_PINGER 0x00001000
-#define S_FILTER 0x00002000
-#define S_LIBCFS 0x00004000
-#define S_ECHO 0x00008000
-#define S_LDLM 0x00010000
-#define S_LOV 0x00020000
-#define S_LQUOTA 0x00040000
-#define S_OSD 0x00080000
-#define S_LFSCK 0x00100000
-#define S_SNAPSHOT 0x00200000
-/* unused */
-#define S_LMV 0x00800000 /* b_new_cmd */
-/* unused */
-#define S_SEC 0x02000000 /* upcall cache */
-#define S_GSS 0x04000000 /* b_new_cmd */
-/* unused */
-#define S_MGC 0x10000000
-#define S_MGS 0x20000000
-#define S_FID 0x40000000 /* b_new_cmd */
-#define S_FLD 0x80000000 /* b_new_cmd */
-
-#define LIBCFS_DEBUG_SUBSYS_NAMES { \
- "undefined", "mdc", "mds", "osc", "ost", "class", "log", \
- "llite", "rpc", "mgmt", "lnet", "lnd", "pinger", "filter", \
- "libcfs", "echo", "ldlm", "lov", "lquota", "osd", "lfsck", \
- "snapshot", "", "lmv", "", "sec", "gss", "", "mgc", "mgs", \
- "fid", "fld", NULL }
-
-/* Debugging masks (32 bits, non-overlapping) */
-#define D_TRACE 0x00000001 /* ENTRY/EXIT markers */
-#define D_INODE 0x00000002
-#define D_SUPER 0x00000004
-#define D_EXT2 0x00000008 /* anything from ext2_debug */
-#define D_MALLOC 0x00000010 /* print malloc, free information */
-#define D_CACHE 0x00000020 /* cache-related items */
-#define D_INFO 0x00000040 /* general information */
-#define D_IOCTL 0x00000080 /* ioctl related information */
-#define D_NETERROR 0x00000100 /* network errors */
-#define D_NET 0x00000200 /* network communications */
-#define D_WARNING 0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */
-#define D_BUFFS 0x00000800
-#define D_OTHER 0x00001000
-#define D_DENTRY 0x00002000
-#define D_NETTRACE 0x00004000
-#define D_PAGE 0x00008000 /* bulk page handling */
-#define D_DLMTRACE 0x00010000
-#define D_ERROR 0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */
-#define D_EMERG 0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */
-#define D_HA 0x00080000 /* recovery and failover */
-#define D_RPCTRACE 0x00100000 /* for distributed debugging */
-#define D_VFSTRACE 0x00200000
-#define D_READA 0x00400000 /* read-ahead */
-#define D_MMAP 0x00800000
-#define D_CONFIG 0x01000000
-#define D_CONSOLE 0x02000000
-#define D_QUOTA 0x04000000
-#define D_SEC 0x08000000
-#define D_LFSCK 0x10000000 /* For both OI scrub and LFSCK */
-#define D_HSM 0x20000000
-#define D_SNAPSHOT 0x40000000 /* snapshot */
-#define D_LAYOUT 0x80000000
-
-#define LIBCFS_DEBUG_MASKS_NAMES { \
- "trace", "inode", "super", "ext2", "malloc", "cache", "info", \
- "ioctl", "neterror", "net", "warning", "buffs", "other", \
- "dentry", "nettrace", "page", "dlmtrace", "error", "emerg", \
- "ha", "rpctrace", "vfstrace", "reada", "mmap", "config", \
- "console", "quota", "sec", "lfsck", "hsm", "snapshot", "layout",\
- NULL }
-
-#define D_CANTMASK (D_ERROR | D_EMERG | D_WARNING | D_CONSOLE)
-
-#define LIBCFS_DEBUG_FILE_PATH_DEFAULT "/tmp/lustre-log"
-
-#endif /* __UAPI_LIBCFS_DEBUG_H__ */
diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_ioctl.h b/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_ioctl.h
deleted file mode 100644
index cce6b58e3682..000000000000
--- a/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_ioctl.h
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/include/libcfs/libcfs_ioctl.h
- *
- * Low-level ioctl data structures. Kernel ioctl functions declared here,
- * and user space functions are in libcfs/util/ioctl.h.
- *
- */
-
-#ifndef __LIBCFS_IOCTL_H__
-#define __LIBCFS_IOCTL_H__
-
-#include <linux/types.h>
-#include <linux/ioctl.h>
-
-#define LIBCFS_IOCTL_VERSION 0x0001000a
-#define LIBCFS_IOCTL_VERSION2 0x0001000b
-
-struct libcfs_ioctl_hdr {
- __u32 ioc_len;
- __u32 ioc_version;
-};
-
-/** max size to copy from userspace */
-#define LIBCFS_IOC_DATA_MAX (128 * 1024)
-
-struct libcfs_ioctl_data {
- struct libcfs_ioctl_hdr ioc_hdr;
-
- __u64 ioc_nid;
- __u64 ioc_u64[1];
-
- __u32 ioc_flags;
- __u32 ioc_count;
- __u32 ioc_net;
- __u32 ioc_u32[7];
-
- __u32 ioc_inllen1;
- char *ioc_inlbuf1;
- __u32 ioc_inllen2;
- char *ioc_inlbuf2;
-
- __u32 ioc_plen1; /* buffers in userspace */
- void __user *ioc_pbuf1;
- __u32 ioc_plen2; /* buffers in userspace */
- void __user *ioc_pbuf2;
-
- char ioc_bulk[0];
-};
-
-struct libcfs_debug_ioctl_data {
- struct libcfs_ioctl_hdr hdr;
- unsigned int subs;
- unsigned int debug;
-};
-
-/* 'f' ioctls are defined in lustre_ioctl.h and lustre_user.h except for: */
-#define LIBCFS_IOC_DEBUG_MASK _IOWR('f', 250, long)
-#define IOCTL_LIBCFS_TYPE long
-
-#define IOC_LIBCFS_TYPE ('e')
-#define IOC_LIBCFS_MIN_NR 30
-/* libcfs ioctls */
-/* IOC_LIBCFS_PANIC obsolete in 2.8.0, was _IOWR('e', 30, IOCTL_LIBCFS_TYPE) */
-#define IOC_LIBCFS_CLEAR_DEBUG _IOWR('e', 31, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_MARK_DEBUG _IOWR('e', 32, IOCTL_LIBCFS_TYPE)
-/* IOC_LIBCFS_MEMHOG obsolete in 2.8.0, was _IOWR('e', 36, IOCTL_LIBCFS_TYPE) */
-/* lnet ioctls */
-#define IOC_LIBCFS_GET_NI _IOWR('e', 50, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_FAIL_NID _IOWR('e', 51, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_NOTIFY_ROUTER _IOWR('e', 55, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_UNCONFIGURE _IOWR('e', 56, IOCTL_LIBCFS_TYPE)
-/* IOC_LIBCFS_PORTALS_COMPATIBILITY _IOWR('e', 57, IOCTL_LIBCFS_TYPE) */
-#define IOC_LIBCFS_LNET_DIST _IOWR('e', 58, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_CONFIGURE _IOWR('e', 59, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_TESTPROTOCOMPAT _IOWR('e', 60, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_PING _IOWR('e', 61, IOCTL_LIBCFS_TYPE)
-/* IOC_LIBCFS_DEBUG_PEER _IOWR('e', 62, IOCTL_LIBCFS_TYPE) */
-#define IOC_LIBCFS_LNETST _IOWR('e', 63, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_LNET_FAULT _IOWR('e', 64, IOCTL_LIBCFS_TYPE)
-/* lnd ioctls */
-#define IOC_LIBCFS_REGISTER_MYNID _IOWR('e', 70, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_CLOSE_CONNECTION _IOWR('e', 71, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_PUSH_CONNECTION _IOWR('e', 72, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_GET_CONN _IOWR('e', 73, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_DEL_PEER _IOWR('e', 74, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_ADD_PEER _IOWR('e', 75, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_GET_PEER _IOWR('e', 76, IOCTL_LIBCFS_TYPE)
-/* ioctl 77 is free for use */
-#define IOC_LIBCFS_ADD_INTERFACE _IOWR('e', 78, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_DEL_INTERFACE _IOWR('e', 79, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_GET_INTERFACE _IOWR('e', 80, IOCTL_LIBCFS_TYPE)
-
-/*
- * DLC Specific IOCTL numbers.
- * In order to maintain backward compatibility with any possible external
- * tools which might be accessing the IOCTL numbers, a new group of IOCTL
- * number have been allocated.
- */
-#define IOCTL_CONFIG_SIZE struct lnet_ioctl_config_data
-#define IOC_LIBCFS_ADD_ROUTE _IOWR(IOC_LIBCFS_TYPE, 81, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_DEL_ROUTE _IOWR(IOC_LIBCFS_TYPE, 82, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_GET_ROUTE _IOWR(IOC_LIBCFS_TYPE, 83, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_ADD_NET _IOWR(IOC_LIBCFS_TYPE, 84, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_DEL_NET _IOWR(IOC_LIBCFS_TYPE, 85, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_GET_NET _IOWR(IOC_LIBCFS_TYPE, 86, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_CONFIG_RTR _IOWR(IOC_LIBCFS_TYPE, 87, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_ADD_BUF _IOWR(IOC_LIBCFS_TYPE, 88, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_GET_BUF _IOWR(IOC_LIBCFS_TYPE, 89, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_GET_PEER_INFO _IOWR(IOC_LIBCFS_TYPE, 90, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_GET_LNET_STATS _IOWR(IOC_LIBCFS_TYPE, 91, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_MAX_NR 91
-
-#endif /* __LIBCFS_IOCTL_H__ */
diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h b/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h
deleted file mode 100644
index c1619f411d81..000000000000
--- a/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * LGPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library.
- *
- * LGPL HEADER END
- *
- */
-/*
- * Copyright (c) 2014, Intel Corporation.
- */
-/*
- * Author: Amir Shehata <amir.shehata@intel.com>
- */
-
-#ifndef LNET_DLC_H
-#define LNET_DLC_H
-
-#include <uapi/linux/lnet/libcfs_ioctl.h>
-#include <uapi/linux/lnet/lnet-types.h>
-
-#define MAX_NUM_SHOW_ENTRIES 32
-#define LNET_MAX_STR_LEN 128
-#define LNET_MAX_SHOW_NUM_CPT 128
-#define LNET_UNDEFINED_HOPS ((__u32)(-1))
-
-struct lnet_ioctl_config_lnd_cmn_tunables {
- __u32 lct_version;
- __u32 lct_peer_timeout;
- __u32 lct_peer_tx_credits;
- __u32 lct_peer_rtr_credits;
- __u32 lct_max_tx_credits;
-};
-
-struct lnet_ioctl_config_o2iblnd_tunables {
- __u32 lnd_version;
- __u32 lnd_peercredits_hiw;
- __u32 lnd_map_on_demand;
- __u32 lnd_concurrent_sends;
- __u32 lnd_fmr_pool_size;
- __u32 lnd_fmr_flush_trigger;
- __u32 lnd_fmr_cache;
- __u16 lnd_conns_per_peer;
- __u16 pad;
-};
-
-struct lnet_ioctl_config_lnd_tunables {
- struct lnet_ioctl_config_lnd_cmn_tunables lt_cmn;
- union {
- struct lnet_ioctl_config_o2iblnd_tunables lt_o2ib;
- } lt_tun_u;
-};
-
-struct lnet_ioctl_net_config {
- char ni_interfaces[LNET_MAX_INTERFACES][LNET_MAX_STR_LEN];
- __u32 ni_status;
- __u32 ni_cpts[LNET_MAX_SHOW_NUM_CPT];
- char cfg_bulk[0];
-};
-
-#define LNET_TINY_BUF_IDX 0
-#define LNET_SMALL_BUF_IDX 1
-#define LNET_LARGE_BUF_IDX 2
-
-/* # different router buffer pools */
-#define LNET_NRBPOOLS (LNET_LARGE_BUF_IDX + 1)
-
-struct lnet_ioctl_pool_cfg {
- struct {
- __u32 pl_npages;
- __u32 pl_nbuffers;
- __u32 pl_credits;
- __u32 pl_mincredits;
- } pl_pools[LNET_NRBPOOLS];
- __u32 pl_routing;
-};
-
-struct lnet_ioctl_config_data {
- struct libcfs_ioctl_hdr cfg_hdr;
-
- __u32 cfg_net;
- __u32 cfg_count;
- __u64 cfg_nid;
- __u32 cfg_ncpts;
-
- union {
- struct {
- __u32 rtr_hop;
- __u32 rtr_priority;
- __u32 rtr_flags;
- } cfg_route;
- struct {
- char net_intf[LNET_MAX_STR_LEN];
- __s32 net_peer_timeout;
- __s32 net_peer_tx_credits;
- __s32 net_peer_rtr_credits;
- __s32 net_max_tx_credits;
- __u32 net_cksum_algo;
- __u32 net_interface_count;
- } cfg_net;
- struct {
- __u32 buf_enable;
- __s32 buf_tiny;
- __s32 buf_small;
- __s32 buf_large;
- } cfg_buffers;
- } cfg_config_u;
-
- char cfg_bulk[0];
-};
-
-struct lnet_ioctl_peer {
- struct libcfs_ioctl_hdr pr_hdr;
- __u32 pr_count;
- __u32 pr_pad;
- __u64 pr_nid;
-
- union {
- struct {
- char cr_aliveness[LNET_MAX_STR_LEN];
- __u32 cr_refcount;
- __u32 cr_ni_peer_tx_credits;
- __u32 cr_peer_tx_credits;
- __u32 cr_peer_rtr_credits;
- __u32 cr_peer_min_rtr_credits;
- __u32 cr_peer_tx_qnob;
- __u32 cr_ncpt;
- } pr_peer_credits;
- } pr_lnd_u;
-};
-
-struct lnet_ioctl_lnet_stats {
- struct libcfs_ioctl_hdr st_hdr;
- struct lnet_counters st_cntrs;
-};
-
-#endif /* LNET_DLC_H */
diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/lnet-types.h b/drivers/staging/lustre/include/uapi/linux/lnet/lnet-types.h
deleted file mode 100644
index 1be9b7aa7326..000000000000
--- a/drivers/staging/lustre/include/uapi/linux/lnet/lnet-types.h
+++ /dev/null
@@ -1,669 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012 - 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Seagate, Inc.
- */
-
-#ifndef __LNET_TYPES_H__
-#define __LNET_TYPES_H__
-
-#include <linux/types.h>
-#include <linux/bvec.h>
-
-/** \addtogroup lnet
- * @{
- */
-
-#define LNET_VERSION "0.6.0"
-
-/** \addtogroup lnet_addr
- * @{
- */
-
-/** Portal reserved for LNet's own use.
- * \see lustre/include/lustre/lustre_idl.h for Lustre portal assignments.
- */
-#define LNET_RESERVED_PORTAL 0
-
-/**
- * Address of an end-point in an LNet network.
- *
- * A node can have multiple end-points and hence multiple addresses.
- * An LNet network can be a simple network (e.g. tcp0) or a network of
- * LNet networks connected by LNet routers. Therefore an end-point address
- * has two parts: network ID, and address within a network.
- *
- * \see LNET_NIDNET, LNET_NIDADDR, and LNET_MKNID.
- */
-typedef __u64 lnet_nid_t;
-/**
- * ID of a process in a node. Shortened as PID to distinguish from
- * lnet_process_id, the global process ID.
- */
-typedef __u32 lnet_pid_t;
-
-/** wildcard NID that matches any end-point address */
-#define LNET_NID_ANY ((lnet_nid_t)(-1))
-/** wildcard PID that matches any lnet_pid_t */
-#define LNET_PID_ANY ((lnet_pid_t)(-1))
-
-#define LNET_PID_RESERVED 0xf0000000 /* reserved bits in PID */
-#define LNET_PID_USERFLAG 0x80000000 /* set in userspace peers */
-#define LNET_PID_LUSTRE 12345
-
-#define LNET_TIME_FOREVER (-1)
-
-/* how an LNET NID encodes net:address */
-/** extract the address part of an lnet_nid_t */
-
-static inline __u32 LNET_NIDADDR(lnet_nid_t nid)
-{
- return nid & 0xffffffff;
-}
-
-static inline __u32 LNET_NIDNET(lnet_nid_t nid)
-{
- return (nid >> 32) & 0xffffffff;
-}
-
-static inline lnet_nid_t LNET_MKNID(__u32 net, __u32 addr)
-{
- return (((__u64)net) << 32) | addr;
-}
-
-static inline __u32 LNET_NETNUM(__u32 net)
-{
- return net & 0xffff;
-}
-
-static inline __u32 LNET_NETTYP(__u32 net)
-{
- return (net >> 16) & 0xffff;
-}
-
-static inline __u32 LNET_MKNET(__u32 type, __u32 num)
-{
- return (type << 16) | num;
-}
-
-#define WIRE_ATTR __packed
-
-/* Packed version of lnet_process_id to transfer via network */
-struct lnet_process_id_packed {
- /* node id / process id */
- lnet_nid_t nid;
- lnet_pid_t pid;
-} WIRE_ATTR;
-
-/*
- * The wire handle's interface cookie only matches one network interface in
- * one epoch (i.e. new cookie when the interface restarts or the node
- * reboots). The object cookie only matches one object on that interface
- * during that object's lifetime (i.e. no cookie re-use).
- */
-struct lnet_handle_wire {
- __u64 wh_interface_cookie;
- __u64 wh_object_cookie;
-} WIRE_ATTR;
-
-enum lnet_msg_type {
- LNET_MSG_ACK = 0,
- LNET_MSG_PUT,
- LNET_MSG_GET,
- LNET_MSG_REPLY,
- LNET_MSG_HELLO,
-};
-
-/*
- * The variant fields of the portals message header are aligned on an 8
- * byte boundary in the message header. Note that all types used in these
- * wire structs MUST be fixed size and the smaller types are placed at the
- * end.
- */
-struct lnet_ack {
- struct lnet_handle_wire dst_wmd;
- __u64 match_bits;
- __u32 mlength;
-} WIRE_ATTR;
-
-struct lnet_put {
- struct lnet_handle_wire ack_wmd;
- __u64 match_bits;
- __u64 hdr_data;
- __u32 ptl_index;
- __u32 offset;
-} WIRE_ATTR;
-
-struct lnet_get {
- struct lnet_handle_wire return_wmd;
- __u64 match_bits;
- __u32 ptl_index;
- __u32 src_offset;
- __u32 sink_length;
-} WIRE_ATTR;
-
-struct lnet_reply {
- struct lnet_handle_wire dst_wmd;
-} WIRE_ATTR;
-
-struct lnet_hello {
- __u64 incarnation;
- __u32 type;
-} WIRE_ATTR;
-
-struct lnet_hdr {
- lnet_nid_t dest_nid;
- lnet_nid_t src_nid;
- lnet_pid_t dest_pid;
- lnet_pid_t src_pid;
- __u32 type; /* enum lnet_msg_type */
- __u32 payload_length; /* payload data to follow */
- /*<------__u64 aligned------->*/
- union {
- struct lnet_ack ack;
- struct lnet_put put;
- struct lnet_get get;
- struct lnet_reply reply;
- struct lnet_hello hello;
- } msg;
-} WIRE_ATTR;
-
-/*
- * A HELLO message contains a magic number and protocol version
- * code in the header's dest_nid, the peer's NID in the src_nid, and
- * LNET_MSG_HELLO in the type field. All other common fields are zero
- * (including payload_size; i.e. no payload).
- * This is for use by byte-stream LNDs (e.g. TCP/IP) to check the peer is
- * running the same protocol and to find out its NID. These LNDs should
- * exchange HELLO messages when a connection is first established. Individual
- * LNDs can put whatever else they fancy in struct lnet_hdr::msg.
- */
-struct lnet_magicversion {
- __u32 magic; /* LNET_PROTO_TCP_MAGIC */
- __u16 version_major; /* increment on incompatible change */
- __u16 version_minor; /* increment on compatible change */
-} WIRE_ATTR;
-
-/* PROTO MAGIC for LNDs */
-#define LNET_PROTO_IB_MAGIC 0x0be91b91
-#define LNET_PROTO_GNI_MAGIC 0xb00fbabe /* ask Kim */
-#define LNET_PROTO_TCP_MAGIC 0xeebc0ded
-#define LNET_PROTO_ACCEPTOR_MAGIC 0xacce7100
-#define LNET_PROTO_PING_MAGIC 0x70696E67 /* 'ping' */
-
-/* Placeholder for a future "unified" protocol across all LNDs */
-/*
- * Current LNDs that receive a request with this magic will respond with a
- * "stub" reply using their current protocol
- */
-#define LNET_PROTO_MAGIC 0x45726963 /* ! */
-
-#define LNET_PROTO_TCP_VERSION_MAJOR 1
-#define LNET_PROTO_TCP_VERSION_MINOR 0
-
-/* Acceptor connection request */
-struct lnet_acceptor_connreq {
- __u32 acr_magic; /* PTL_ACCEPTOR_PROTO_MAGIC */
- __u32 acr_version; /* protocol version */
- __u64 acr_nid; /* target NID */
-} WIRE_ATTR;
-
-#define LNET_PROTO_ACCEPTOR_VERSION 1
-
-struct lnet_ni_status {
- lnet_nid_t ns_nid;
- __u32 ns_status;
- __u32 ns_unused;
-} WIRE_ATTR;
-
-struct lnet_ping_info {
- __u32 pi_magic;
- __u32 pi_features;
- lnet_pid_t pi_pid;
- __u32 pi_nnis;
- struct lnet_ni_status pi_ni[0];
-} WIRE_ATTR;
-
-struct lnet_counters {
- __u32 msgs_alloc;
- __u32 msgs_max;
- __u32 errors;
- __u32 send_count;
- __u32 recv_count;
- __u32 route_count;
- __u32 drop_count;
- __u64 send_length;
- __u64 recv_length;
- __u64 route_length;
- __u64 drop_length;
-} WIRE_ATTR;
-
-#define LNET_NI_STATUS_UP 0x15aac0de
-#define LNET_NI_STATUS_DOWN 0xdeadface
-#define LNET_NI_STATUS_INVALID 0x00000000
-
-#define LNET_MAX_INTERFACES 16
-
-/**
- * Objects maintained by the LNet are accessed through handles. Handle types
- * have names of the form lnet_handle_xx, where xx is one of the two letter
- * object type codes ('eq' for event queue, 'md' for memory descriptor, and
- * 'me' for match entry). Each type of object is given a unique handle type
- * to enhance type checking.
- */
-#define LNET_WIRE_HANDLE_COOKIE_NONE (-1)
-
-struct lnet_handle_eq {
- u64 cookie;
-};
-
-/**
- * Invalidate eq handle @h.
- */
-static inline void LNetInvalidateEQHandle(struct lnet_handle_eq *h)
-{
- h->cookie = LNET_WIRE_HANDLE_COOKIE_NONE;
-}
-
-/**
- * Check whether eq handle @h is invalid.
- *
- * @return 1 if handle is invalid, 0 if valid.
- */
-static inline int LNetEQHandleIsInvalid(struct lnet_handle_eq h)
-{
- return (LNET_WIRE_HANDLE_COOKIE_NONE == h.cookie);
-}
-
-struct lnet_handle_md {
- u64 cookie;
-};
-
-/**
- * Invalidate md handle @h.
- */
-static inline void LNetInvalidateMDHandle(struct lnet_handle_md *h)
-{
- h->cookie = LNET_WIRE_HANDLE_COOKIE_NONE;
-}
-
-/**
- * Check whether eq handle @h is invalid.
- *
- * @return 1 if handle is invalid, 0 if valid.
- */
-static inline int LNetMDHandleIsInvalid(struct lnet_handle_md h)
-{
- return (LNET_WIRE_HANDLE_COOKIE_NONE == h.cookie);
-}
-
-struct lnet_handle_me {
- u64 cookie;
-};
-
-/**
- * Global process ID.
- */
-struct lnet_process_id {
- /** node id */
- lnet_nid_t nid;
- /** process id */
- lnet_pid_t pid;
-};
-/** @} lnet_addr */
-
-/** \addtogroup lnet_me
- * @{
- */
-
-/**
- * Specifies whether the match entry or memory descriptor should be unlinked
- * automatically (LNET_UNLINK) or not (LNET_RETAIN).
- */
-enum lnet_unlink {
- LNET_RETAIN = 0,
- LNET_UNLINK
-};
-
-/**
- * Values of the type lnet_ins_pos are used to control where a new match
- * entry is inserted. The value LNET_INS_BEFORE is used to insert the new
- * entry before the current entry or before the head of the list. The value
- * LNET_INS_AFTER is used to insert the new entry after the current entry
- * or after the last item in the list.
- */
-enum lnet_ins_pos {
- /** insert ME before current position or head of the list */
- LNET_INS_BEFORE,
- /** insert ME after current position or tail of the list */
- LNET_INS_AFTER,
- /** attach ME at tail of local CPU partition ME list */
- LNET_INS_LOCAL
-};
-
-/** @} lnet_me */
-
-/** \addtogroup lnet_md
- * @{
- */
-
-/**
- * Defines the visible parts of a memory descriptor. Values of this type
- * are used to initialize memory descriptors.
- */
-struct lnet_md {
- /**
- * Specify the memory region associated with the memory descriptor.
- * If the options field has:
- * - LNET_MD_KIOV bit set: The start field points to the starting
- * address of an array of struct bio_vec and the length field specifies
- * the number of entries in the array. The length can't be bigger
- * than LNET_MAX_IOV. The struct bio_vec is used to describe page-based
- * fragments that are not necessarily mapped in virtual memory.
- * - LNET_MD_IOVEC bit set: The start field points to the starting
- * address of an array of struct iovec and the length field specifies
- * the number of entries in the array. The length can't be bigger
- * than LNET_MAX_IOV. The struct iovec is used to describe fragments
- * that have virtual addresses.
- * - Otherwise: The memory region is contiguous. The start field
- * specifies the starting address for the memory region and the
- * length field specifies its length.
- *
- * When the memory region is fragmented, all fragments but the first
- * one must start on page boundary, and all but the last must end on
- * page boundary.
- */
- void *start;
- unsigned int length;
- /**
- * Specifies the maximum number of operations that can be performed
- * on the memory descriptor. An operation is any action that could
- * possibly generate an event. In the usual case, the threshold value
- * is decremented for each operation on the MD. When the threshold
- * drops to zero, the MD becomes inactive and does not respond to
- * operations. A threshold value of LNET_MD_THRESH_INF indicates that
- * there is no bound on the number of operations that may be applied
- * to a MD.
- */
- int threshold;
- /**
- * Specifies the largest incoming request that the memory descriptor
- * should respond to. When the unused portion of a MD (length -
- * local offset) falls below this value, the MD becomes inactive and
- * does not respond to further operations. This value is only used
- * if the LNET_MD_MAX_SIZE option is set.
- */
- int max_size;
- /**
- * Specifies the behavior of the memory descriptor. A bitwise OR
- * of the following values can be used:
- * - LNET_MD_OP_PUT: The LNet PUT operation is allowed on this MD.
- * - LNET_MD_OP_GET: The LNet GET operation is allowed on this MD.
- * - LNET_MD_MANAGE_REMOTE: The offset used in accessing the memory
- * region is provided by the incoming request. By default, the
- * offset is maintained locally. When maintained locally, the
- * offset is incremented by the length of the request so that
- * the next operation (PUT or GET) will access the next part of
- * the memory region. Note that only one offset variable exists
- * per memory descriptor. If both PUT and GET operations are
- * performed on a memory descriptor, the offset is updated each time.
- * - LNET_MD_TRUNCATE: The length provided in the incoming request can
- * be reduced to match the memory available in the region (determined
- * by subtracting the offset from the length of the memory region).
- * By default, if the length in the incoming operation is greater
- * than the amount of memory available, the operation is rejected.
- * - LNET_MD_ACK_DISABLE: An acknowledgment should not be sent for
- * incoming PUT operations, even if requested. By default,
- * acknowledgments are sent for PUT operations that request an
- * acknowledgment. Acknowledgments are never sent for GET operations.
- * The data sent in the REPLY serves as an implicit acknowledgment.
- * - LNET_MD_KIOV: The start and length fields specify an array of
- * struct bio_vec.
- * - LNET_MD_IOVEC: The start and length fields specify an array of
- * struct iovec.
- * - LNET_MD_MAX_SIZE: The max_size field is valid.
- *
- * Note:
- * - LNET_MD_KIOV or LNET_MD_IOVEC allows for a scatter/gather
- * capability for memory descriptors. They can't be both set.
- * - When LNET_MD_MAX_SIZE is set, the total length of the memory
- * region (i.e. sum of all fragment lengths) must not be less than
- * \a max_size.
- */
- unsigned int options;
- /**
- * A user-specified value that is associated with the memory
- * descriptor. The value does not need to be a pointer, but must fit
- * in the space used by a pointer. This value is recorded in events
- * associated with operations on this MD.
- */
- void *user_ptr;
- /**
- * A handle for the event queue used to log the operations performed on
- * the memory region. If this argument is a NULL handle (i.e. nullified
- * by LNetInvalidateHandle()), operations performed on this memory
- * descriptor are not logged.
- */
- struct lnet_handle_eq eq_handle;
-};
-
-/*
- * Max Transfer Unit (minimum supported everywhere).
- * CAVEAT EMPTOR, with multinet (i.e. routers forwarding between networks)
- * these limits are system wide and not interface-local.
- */
-#define LNET_MTU_BITS 20
-#define LNET_MTU (1 << LNET_MTU_BITS)
-
-/** limit on the number of fragments in discontiguous MDs */
-#define LNET_MAX_IOV 256
-
-/**
- * Options for the MD structure. See lnet_md::options.
- */
-#define LNET_MD_OP_PUT (1 << 0)
-/** See lnet_md::options. */
-#define LNET_MD_OP_GET (1 << 1)
-/** See lnet_md::options. */
-#define LNET_MD_MANAGE_REMOTE (1 << 2)
-/* unused (1 << 3) */
-/** See lnet_md::options. */
-#define LNET_MD_TRUNCATE (1 << 4)
-/** See lnet_md::options. */
-#define LNET_MD_ACK_DISABLE (1 << 5)
-/** See lnet_md::options. */
-#define LNET_MD_IOVEC (1 << 6)
-/** See lnet_md::options. */
-#define LNET_MD_MAX_SIZE (1 << 7)
-/** See lnet_md::options. */
-#define LNET_MD_KIOV (1 << 8)
-
-/* For compatibility with Cray Portals */
-#define LNET_MD_PHYS 0
-
-/** Infinite threshold on MD operations. See lnet_md::threshold */
-#define LNET_MD_THRESH_INF (-1)
-
-/** @} lnet_md */
-
-/** \addtogroup lnet_eq
- * @{
- */
-
-/**
- * Six types of events can be logged in an event queue.
- */
-enum lnet_event_kind {
- /** An incoming GET operation has completed on the MD. */
- LNET_EVENT_GET = 1,
- /**
- * An incoming PUT operation has completed on the MD. The
- * underlying layers will not alter the memory (on behalf of this
- * operation) once this event has been logged.
- */
- LNET_EVENT_PUT,
- /**
- * A REPLY operation has completed. This event is logged after the
- * data (if any) from the REPLY has been written into the MD.
- */
- LNET_EVENT_REPLY,
- /** An acknowledgment has been received. */
- LNET_EVENT_ACK,
- /**
- * An outgoing send (PUT or GET) operation has completed. This event
- * is logged after the entire buffer has been sent and it is safe for
- * the caller to reuse the buffer.
- *
- * Note:
- * - The LNET_EVENT_SEND doesn't guarantee message delivery. It can
- * happen even when the message has not yet been put out on wire.
- * - It's unsafe to assume that in an outgoing GET operation
- * the LNET_EVENT_SEND event would happen before the
- * LNET_EVENT_REPLY event. The same holds for LNET_EVENT_SEND and
- * LNET_EVENT_ACK events in an outgoing PUT operation.
- */
- LNET_EVENT_SEND,
- /**
- * A MD has been unlinked. Note that LNetMDUnlink() does not
- * necessarily trigger an LNET_EVENT_UNLINK event.
- * \see LNetMDUnlink
- */
- LNET_EVENT_UNLINK,
-};
-
-#define LNET_SEQ_GT(a, b) (((signed long)((a) - (b))) > 0)
-
-/**
- * Information about an event on a MD.
- */
-struct lnet_event {
- /** The identifier (nid, pid) of the target. */
- struct lnet_process_id target;
- /** The identifier (nid, pid) of the initiator. */
- struct lnet_process_id initiator;
- /**
- * The NID of the immediate sender. If the request has been forwarded
- * by routers, this is the NID of the last hop; otherwise it's the
- * same as the initiator.
- */
- lnet_nid_t sender;
- /** Indicates the type of the event. */
- enum lnet_event_kind type;
- /** The portal table index specified in the request */
- unsigned int pt_index;
- /** A copy of the match bits specified in the request. */
- __u64 match_bits;
- /** The length (in bytes) specified in the request. */
- unsigned int rlength;
- /**
- * The length (in bytes) of the data that was manipulated by the
- * operation. For truncated operations, the manipulated length will be
- * the number of bytes specified by the MD (possibly with an offset,
- * see lnet_md). For all other operations, the manipulated length
- * will be the length of the requested operation, i.e. rlength.
- */
- unsigned int mlength;
- /**
- * The handle to the MD associated with the event. The handle may be
- * invalid if the MD has been unlinked.
- */
- struct lnet_handle_md md_handle;
- /**
- * A snapshot of the state of the MD immediately after the event has
- * been processed. In particular, the threshold field in md will
- * reflect the value of the threshold after the operation occurred.
- */
- struct lnet_md md;
- /**
- * 64 bits of out-of-band user data. Only valid for LNET_EVENT_PUT.
- * \see LNetPut
- */
- __u64 hdr_data;
- /**
- * Indicates the completion status of the operation. It's 0 for
- * successful operations, otherwise it's an error code.
- */
- int status;
- /**
- * Indicates whether the MD has been unlinked. Note that:
- * - An event with unlinked set is the last event on the MD.
- * - This field is also set for an explicit LNET_EVENT_UNLINK event.
- * \see LNetMDUnlink
- */
- int unlinked;
- /**
- * The displacement (in bytes) into the memory region that the
- * operation used. The offset can be determined by the operation for
- * a remote managed MD or by the local MD.
- * \see lnet_md::options
- */
- unsigned int offset;
- /**
- * The sequence number for this event. Sequence numbers are unique
- * to each event.
- */
- volatile unsigned long sequence;
-};
-
-/**
- * Event queue handler function type.
- *
- * The EQ handler runs for each event that is deposited into the EQ. The
- * handler is supplied with a pointer to the event that triggered the
- * handler invocation.
- *
- * The handler must not block, must be reentrant, and must not call any LNet
- * API functions. It should return as quickly as possible.
- */
-typedef void (*lnet_eq_handler_t)(struct lnet_event *event);
-#define LNET_EQ_HANDLER_NONE NULL
-/** @} lnet_eq */
-
-/** \addtogroup lnet_data
- * @{
- */
-
-/**
- * Specify whether an acknowledgment should be sent by target when the PUT
- * operation completes (i.e., when the data has been written to a MD of the
- * target process).
- *
- * \see lnet_md::options for the discussion on LNET_MD_ACK_DISABLE by which
- * acknowledgments can be disabled for a MD.
- */
-enum lnet_ack_req {
- /** Request an acknowledgment */
- LNET_ACK_REQ,
- /** Request that no acknowledgment should be generated. */
- LNET_NOACK_REQ
-};
-/** @} lnet_data */
-
-/** @} lnet */
-#endif
diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/lnetctl.h b/drivers/staging/lustre/include/uapi/linux/lnet/lnetctl.h
deleted file mode 100644
index cccb32dd28f2..000000000000
--- a/drivers/staging/lustre/include/uapi/linux/lnet/lnetctl.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * header for lnet ioctl
- */
-#ifndef _LNETCTL_H_
-#define _LNETCTL_H_
-
-#include <uapi/linux/lnet/lnet-types.h>
-
-/** \addtogroup lnet_fault_simulation
- * @{
- */
-
-enum {
- LNET_CTL_DROP_ADD,
- LNET_CTL_DROP_DEL,
- LNET_CTL_DROP_RESET,
- LNET_CTL_DROP_LIST,
- LNET_CTL_DELAY_ADD,
- LNET_CTL_DELAY_DEL,
- LNET_CTL_DELAY_RESET,
- LNET_CTL_DELAY_LIST,
-};
-
-#define LNET_ACK_BIT (1 << 0)
-#define LNET_PUT_BIT (1 << 1)
-#define LNET_GET_BIT (1 << 2)
-#define LNET_REPLY_BIT (1 << 3)
-
-/** ioctl parameter for LNet fault simulation */
-struct lnet_fault_attr {
- /**
- * source NID of drop rule
- * LNET_NID_ANY is wildcard for all sources
- * 255.255.255.255@net is wildcard for all addresses from @net
- */
- lnet_nid_t fa_src;
- /** destination NID of drop rule, see \a dr_src for details */
- lnet_nid_t fa_dst;
- /**
- * Portal mask to drop, -1 means all portals, for example:
- * fa_ptl_mask = (1 << _LDLM_CB_REQUEST_PORTAL ) |
- * (1 << LDLM_CANCEL_REQUEST_PORTAL)
- *
- * If it is non-zero then only PUT and GET will be filtered, otherwise
- * there is no portal filter, all matched messages will be checked.
- */
- __u64 fa_ptl_mask;
- /**
- * message types to drop, for example:
- * dra_type = LNET_DROP_ACK_BIT | LNET_DROP_PUT_BIT
- *
- * If it is non-zero then only specified message types are filtered,
- * otherwise all message types will be checked.
- */
- __u32 fa_msg_mask;
- union {
- /** message drop simulation */
- struct {
- /** drop rate of this rule */
- __u32 da_rate;
- /**
- * time interval of message drop, it is exclusive
- * with da_rate
- */
- __u32 da_interval;
- } drop;
- /** message latency simulation */
- struct {
- __u32 la_rate;
- /**
- * time interval of message delay, it is exclusive
- * with la_rate
- */
- __u32 la_interval;
- /** latency to delay */
- __u32 la_latency;
- } delay;
- __u64 space[8];
- } u;
-};
-
-/** fault simluation stats */
-struct lnet_fault_stat {
- /** total # matched messages */
- __u64 fs_count;
- /** # dropped LNET_MSG_PUT by this rule */
- __u64 fs_put;
- /** # dropped LNET_MSG_ACK by this rule */
- __u64 fs_ack;
- /** # dropped LNET_MSG_GET by this rule */
- __u64 fs_get;
- /** # dropped LNET_MSG_REPLY by this rule */
- __u64 fs_reply;
- union {
- struct {
- /** total # dropped messages */
- __u64 ds_dropped;
- } drop;
- struct {
- /** total # delayed messages */
- __u64 ls_delayed;
- } delay;
- __u64 space[8];
- } u;
-};
-
-/** @} lnet_fault_simulation */
-
-#define LNET_DEV_ID 0
-#define LNET_DEV_PATH "/dev/lnet"
-
-#endif
diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/lnetst.h b/drivers/staging/lustre/include/uapi/linux/lnet/lnetst.h
deleted file mode 100644
index a4f9ff01d458..000000000000
--- a/drivers/staging/lustre/include/uapi/linux/lnet/lnetst.h
+++ /dev/null
@@ -1,556 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011 - 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Seagate, Inc.
- *
- * lnet/include/lnet/lnetst.h
- *
- * Author: Liang Zhen <liang.zhen@intel.com>
- */
-
-#ifndef __LNET_ST_H__
-#define __LNET_ST_H__
-
-#include <linux/types.h>
-
-#define LST_FEAT_NONE (0)
-#define LST_FEAT_BULK_LEN (1 << 0) /* enable variable page size */
-
-#define LST_FEATS_EMPTY (LST_FEAT_NONE)
-#define LST_FEATS_MASK (LST_FEAT_NONE | LST_FEAT_BULK_LEN)
-
-#define LST_NAME_SIZE 32 /* max name buffer length */
-
-#define LSTIO_DEBUG 0xC00 /* debug */
-#define LSTIO_SESSION_NEW 0xC01 /* create session */
-#define LSTIO_SESSION_END 0xC02 /* end session */
-#define LSTIO_SESSION_INFO 0xC03 /* query session */
-#define LSTIO_GROUP_ADD 0xC10 /* add group */
-#define LSTIO_GROUP_LIST 0xC11 /* list all groups in session */
-#define LSTIO_GROUP_INFO 0xC12 /* query default information of
- * specified group
- */
-#define LSTIO_GROUP_DEL 0xC13 /* delete group */
-#define LSTIO_NODES_ADD 0xC14 /* add nodes to specified group */
-#define LSTIO_GROUP_UPDATE 0xC15 /* update group */
-#define LSTIO_BATCH_ADD 0xC20 /* add batch */
-#define LSTIO_BATCH_START 0xC21 /* start batch */
-#define LSTIO_BATCH_STOP 0xC22 /* stop batch */
-#define LSTIO_BATCH_DEL 0xC23 /* delete batch */
-#define LSTIO_BATCH_LIST 0xC24 /* show all batches in the session */
-#define LSTIO_BATCH_INFO 0xC25 /* show defail of specified batch */
-#define LSTIO_TEST_ADD 0xC26 /* add test (to batch) */
-#define LSTIO_BATCH_QUERY 0xC27 /* query batch status */
-#define LSTIO_STAT_QUERY 0xC30 /* get stats */
-
-struct lst_sid {
- lnet_nid_t ses_nid; /* nid of console node */
- __u64 ses_stamp; /* time stamp */
-}; /*** session id */
-
-extern struct lst_sid LST_INVALID_SID;
-
-struct lst_bid {
- __u64 bat_id; /* unique id in session */
-}; /*** batch id (group of tests) */
-
-/* Status of test node */
-#define LST_NODE_ACTIVE 0x1 /* node in this session */
-#define LST_NODE_BUSY 0x2 /* node is taken by other session */
-#define LST_NODE_DOWN 0x4 /* node is down */
-#define LST_NODE_UNKNOWN 0x8 /* node not in session */
-
-struct lstcon_node_ent {
- struct lnet_process_id nde_id; /* id of node */
- int nde_state; /* state of node */
-}; /*** node entry, for list_group command */
-
-struct lstcon_ndlist_ent {
- int nle_nnode; /* # of nodes */
- int nle_nactive; /* # of active nodes */
- int nle_nbusy; /* # of busy nodes */
- int nle_ndown; /* # of down nodes */
- int nle_nunknown; /* # of unknown nodes */
-}; /*** node_list entry, for list_batch command */
-
-struct lstcon_test_ent {
- int tse_type; /* test type */
- int tse_loop; /* loop count */
- int tse_concur; /* concurrency of test */
-}; /* test summary entry, for
- * list_batch command
- */
-
-struct lstcon_batch_ent {
- int bae_state; /* batch status */
- int bae_timeout; /* batch timeout */
- int bae_ntest; /* # of tests in the batch */
-}; /* batch summary entry, for
- * list_batch command
- */
-
-struct lstcon_test_batch_ent {
- struct lstcon_ndlist_ent tbe_cli_nle; /* client (group) node_list
- * entry
- */
- struct lstcon_ndlist_ent tbe_srv_nle; /* server (group) node_list
- * entry
- */
- union {
- struct lstcon_test_ent tbe_test; /* test entry */
- struct lstcon_batch_ent tbe_batch;/* batch entry */
- } u;
-}; /* test/batch verbose information entry,
- * for list_batch command
- */
-
-struct lstcon_rpc_ent {
- struct list_head rpe_link; /* link chain */
- struct lnet_process_id rpe_peer; /* peer's id */
- struct timeval rpe_stamp; /* time stamp of RPC */
- int rpe_state; /* peer's state */
- int rpe_rpc_errno; /* RPC errno */
-
- struct lst_sid rpe_sid; /* peer's session id */
- int rpe_fwk_errno; /* framework errno */
- int rpe_priv[4]; /* private data */
- char rpe_payload[0]; /* private reply payload */
-};
-
-struct lstcon_trans_stat {
- int trs_rpc_stat[4]; /* RPCs stat (0: total 1: failed
- * 2: finished
- * 4: reserved
- */
- int trs_rpc_errno; /* RPC errno */
- int trs_fwk_stat[8]; /* framework stat */
- int trs_fwk_errno; /* errno of the first remote error */
- void *trs_fwk_private; /* private framework stat */
-};
-
-static inline int
-lstcon_rpc_stat_total(struct lstcon_trans_stat *stat, int inc)
-{
- return inc ? ++stat->trs_rpc_stat[0] : stat->trs_rpc_stat[0];
-}
-
-static inline int
-lstcon_rpc_stat_success(struct lstcon_trans_stat *stat, int inc)
-{
- return inc ? ++stat->trs_rpc_stat[1] : stat->trs_rpc_stat[1];
-}
-
-static inline int
-lstcon_rpc_stat_failure(struct lstcon_trans_stat *stat, int inc)
-{
- return inc ? ++stat->trs_rpc_stat[2] : stat->trs_rpc_stat[2];
-}
-
-static inline int
-lstcon_sesop_stat_success(struct lstcon_trans_stat *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
-}
-
-static inline int
-lstcon_sesop_stat_failure(struct lstcon_trans_stat *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
-}
-
-static inline int
-lstcon_sesqry_stat_active(struct lstcon_trans_stat *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
-}
-
-static inline int
-lstcon_sesqry_stat_busy(struct lstcon_trans_stat *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
-}
-
-static inline int
-lstcon_sesqry_stat_unknown(struct lstcon_trans_stat *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2];
-}
-
-static inline int
-lstcon_tsbop_stat_success(struct lstcon_trans_stat *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
-}
-
-static inline int
-lstcon_tsbop_stat_failure(struct lstcon_trans_stat *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
-}
-
-static inline int
-lstcon_tsbqry_stat_idle(struct lstcon_trans_stat *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
-}
-
-static inline int
-lstcon_tsbqry_stat_run(struct lstcon_trans_stat *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
-}
-
-static inline int
-lstcon_tsbqry_stat_failure(struct lstcon_trans_stat *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2];
-}
-
-static inline int
-lstcon_statqry_stat_success(struct lstcon_trans_stat *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
-}
-
-static inline int
-lstcon_statqry_stat_failure(struct lstcon_trans_stat *stat, int inc)
-{
- return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
-}
-
-/* create a session */
-struct lstio_session_new_args {
- int lstio_ses_key; /* IN: local key */
- int lstio_ses_timeout; /* IN: session timeout */
- int lstio_ses_force; /* IN: force create ? */
- /** IN: session features */
- unsigned int lstio_ses_feats;
- struct lst_sid __user *lstio_ses_idp; /* OUT: session id */
- int lstio_ses_nmlen; /* IN: name length */
- char __user *lstio_ses_namep; /* IN: session name */
-};
-
-/* query current session */
-struct lstio_session_info_args {
- struct lst_sid __user *lstio_ses_idp; /* OUT: session id */
- int __user *lstio_ses_keyp; /* OUT: local key */
- /** OUT: session features */
- unsigned int __user *lstio_ses_featp;
- struct lstcon_ndlist_ent __user *lstio_ses_ndinfo;/* OUT: */
- int lstio_ses_nmlen; /* IN: name length */
- char __user *lstio_ses_namep; /* OUT: session name */
-};
-
-/* delete a session */
-struct lstio_session_end_args {
- int lstio_ses_key; /* IN: session key */
-};
-
-#define LST_OPC_SESSION 1
-#define LST_OPC_GROUP 2
-#define LST_OPC_NODES 3
-#define LST_OPC_BATCHCLI 4
-#define LST_OPC_BATCHSRV 5
-
-struct lstio_debug_args {
- int lstio_dbg_key; /* IN: session key */
- int lstio_dbg_type; /* IN: debug
- * session|batch|
- * group|nodes list
- */
- int lstio_dbg_flags; /* IN: reserved debug
- * flags
- */
- int lstio_dbg_timeout; /* IN: timeout of
- * debug
- */
- int lstio_dbg_nmlen; /* IN: len of name */
- char __user *lstio_dbg_namep; /* IN: name of
- * group|batch
- */
- int lstio_dbg_count; /* IN: # of test nodes
- * to debug
- */
- struct lnet_process_id __user *lstio_dbg_idsp; /* IN: id of test
- * nodes
- */
- struct list_head __user *lstio_dbg_resultp; /* OUT: list head of
- * result buffer
- */
-};
-
-struct lstio_group_add_args {
- int lstio_grp_key; /* IN: session key */
- int lstio_grp_nmlen; /* IN: name length */
- char __user *lstio_grp_namep; /* IN: group name */
-};
-
-struct lstio_group_del_args {
- int lstio_grp_key; /* IN: session key */
- int lstio_grp_nmlen; /* IN: name length */
- char __user *lstio_grp_namep; /* IN: group name */
-};
-
-#define LST_GROUP_CLEAN 1 /* remove inactive nodes in the group */
-#define LST_GROUP_REFRESH 2 /* refresh inactive nodes
- * in the group
- */
-#define LST_GROUP_RMND 3 /* delete nodes from the group */
-
-struct lstio_group_update_args {
- int lstio_grp_key; /* IN: session key */
- int lstio_grp_opc; /* IN: OPC */
- int lstio_grp_args; /* IN: arguments */
- int lstio_grp_nmlen; /* IN: name length */
- char __user *lstio_grp_namep; /* IN: group name */
- int lstio_grp_count; /* IN: # of nodes id */
- struct lnet_process_id __user *lstio_grp_idsp; /* IN: array of nodes */
- struct list_head __user *lstio_grp_resultp; /* OUT: list head of
- * result buffer
- */
-};
-
-struct lstio_group_nodes_args {
- int lstio_grp_key; /* IN: session key */
- int lstio_grp_nmlen; /* IN: name length */
- char __user *lstio_grp_namep; /* IN: group name */
- int lstio_grp_count; /* IN: # of nodes */
- /** OUT: session features */
- unsigned int __user *lstio_grp_featp;
- struct lnet_process_id __user *lstio_grp_idsp; /* IN: nodes */
- struct list_head __user *lstio_grp_resultp; /* OUT: list head of
- * result buffer
- */
-};
-
-struct lstio_group_list_args {
- int lstio_grp_key; /* IN: session key */
- int lstio_grp_idx; /* IN: group idx */
- int lstio_grp_nmlen; /* IN: name len */
- char __user *lstio_grp_namep; /* OUT: name */
-};
-
-struct lstio_group_info_args {
- int lstio_grp_key; /* IN: session key */
- int lstio_grp_nmlen; /* IN: name len */
- char __user *lstio_grp_namep; /* IN: name */
- struct lstcon_ndlist_ent __user *lstio_grp_entp;/* OUT: description
- * of group
- */
- int __user *lstio_grp_idxp; /* IN/OUT: node index */
- int __user *lstio_grp_ndentp; /* IN/OUT: # of nodent */
- struct lstcon_node_ent __user *lstio_grp_dentsp;/* OUT: nodent array */
-};
-
-#define LST_DEFAULT_BATCH "batch" /* default batch name */
-
-struct lstio_batch_add_args {
- int lstio_bat_key; /* IN: session key */
- int lstio_bat_nmlen; /* IN: name length */
- char __user *lstio_bat_namep; /* IN: batch name */
-};
-
-struct lstio_batch_del_args {
- int lstio_bat_key; /* IN: session key */
- int lstio_bat_nmlen; /* IN: name length */
- char __user *lstio_bat_namep; /* IN: batch name */
-};
-
-struct lstio_batch_run_args {
- int lstio_bat_key; /* IN: session key */
- int lstio_bat_timeout; /* IN: timeout for
- * the batch
- */
- int lstio_bat_nmlen; /* IN: name length */
- char __user *lstio_bat_namep; /* IN: batch name */
- struct list_head __user *lstio_bat_resultp; /* OUT: list head of
- * result buffer
- */
-};
-
-struct lstio_batch_stop_args {
- int lstio_bat_key; /* IN: session key */
- int lstio_bat_force; /* IN: abort unfinished
- * test RPC
- */
- int lstio_bat_nmlen; /* IN: name length */
- char __user *lstio_bat_namep; /* IN: batch name */
- struct list_head __user *lstio_bat_resultp; /* OUT: list head of
- * result buffer
- */
-};
-
-struct lstio_batch_query_args {
- int lstio_bat_key; /* IN: session key */
- int lstio_bat_testidx; /* IN: test index */
- int lstio_bat_client; /* IN: we testing
- * client?
- */
- int lstio_bat_timeout; /* IN: timeout for
- * waiting
- */
- int lstio_bat_nmlen; /* IN: name length */
- char __user *lstio_bat_namep; /* IN: batch name */
- struct list_head __user *lstio_bat_resultp; /* OUT: list head of
- * result buffer
- */
-};
-
-struct lstio_batch_list_args {
- int lstio_bat_key; /* IN: session key */
- int lstio_bat_idx; /* IN: index */
- int lstio_bat_nmlen; /* IN: name length */
- char __user *lstio_bat_namep; /* IN: batch name */
-};
-
-struct lstio_batch_info_args {
- int lstio_bat_key; /* IN: session key */
- int lstio_bat_nmlen; /* IN: name length */
- char __user *lstio_bat_namep; /* IN: name */
- int lstio_bat_server; /* IN: query server
- * or not
- */
- int lstio_bat_testidx; /* IN: test index */
- struct lstcon_test_batch_ent __user *lstio_bat_entp;/* OUT: batch ent */
-
- int __user *lstio_bat_idxp; /* IN/OUT: index of node */
- int __user *lstio_bat_ndentp; /* IN/OUT: # of nodent */
- struct lstcon_node_ent __user *lstio_bat_dentsp;/* array of nodent */
-};
-
-/* add stat in session */
-struct lstio_stat_args {
- int lstio_sta_key; /* IN: session key */
- int lstio_sta_timeout; /* IN: timeout for
- * stat request
- */
- int lstio_sta_nmlen; /* IN: group name
- * length
- */
- char __user *lstio_sta_namep; /* IN: group name */
- int lstio_sta_count; /* IN: # of pid */
- struct lnet_process_id __user *lstio_sta_idsp; /* IN: pid */
- struct list_head __user *lstio_sta_resultp; /* OUT: list head of
- * result buffer
- */
-};
-
-enum lst_test_type {
- LST_TEST_BULK = 1,
- LST_TEST_PING = 2
-};
-
-/* create a test in a batch */
-#define LST_MAX_CONCUR 1024 /* Max concurrency of test */
-
-struct lstio_test_args {
- int lstio_tes_key; /* IN: session key */
- int lstio_tes_bat_nmlen; /* IN: batch name len */
- char __user *lstio_tes_bat_name; /* IN: batch name */
- int lstio_tes_type; /* IN: test type */
- int lstio_tes_oneside; /* IN: one sided test */
- int lstio_tes_loop; /* IN: loop count */
- int lstio_tes_concur; /* IN: concurrency */
-
- int lstio_tes_dist; /* IN: node distribution in
- * destination groups
- */
- int lstio_tes_span; /* IN: node span in
- * destination groups
- */
- int lstio_tes_sgrp_nmlen; /* IN: source group
- * name length
- */
- char __user *lstio_tes_sgrp_name; /* IN: group name */
- int lstio_tes_dgrp_nmlen; /* IN: destination group
- * name length
- */
- char __user *lstio_tes_dgrp_name; /* IN: group name */
-
- int lstio_tes_param_len; /* IN: param buffer len */
- void __user *lstio_tes_param; /* IN: parameter for specified
- * test: lstio_bulk_param_t,
- * lstio_ping_param_t,
- * ... more
- */
- int __user *lstio_tes_retp; /* OUT: private returned
- * value
- */
- struct list_head __user *lstio_tes_resultp;/* OUT: list head of
- * result buffer
- */
-};
-
-enum lst_brw_type {
- LST_BRW_READ = 1,
- LST_BRW_WRITE = 2
-};
-
-enum lst_brw_flags {
- LST_BRW_CHECK_NONE = 1,
- LST_BRW_CHECK_SIMPLE = 2,
- LST_BRW_CHECK_FULL = 3
-};
-
-struct lst_test_bulk_param {
- int blk_opc; /* bulk operation code */
- int blk_size; /* size (bytes) */
- int blk_time; /* time of running the test*/
- int blk_flags; /* reserved flags */
- int blk_cli_off; /* bulk offset on client */
- int blk_srv_off; /* reserved: bulk offset on server */
-};
-
-struct lst_test_ping_param {
- int png_size; /* size of ping message */
- int png_time; /* time */
- int png_loop; /* loop */
- int png_flags; /* reserved flags */
-};
-
-struct srpc_counters {
- __u32 errors;
- __u32 rpcs_sent;
- __u32 rpcs_rcvd;
- __u32 rpcs_dropped;
- __u32 rpcs_expired;
- __u64 bulk_get;
- __u64 bulk_put;
-} WIRE_ATTR;
-
-struct sfw_counters {
- /** milliseconds since current session started */
- __u32 running_ms;
- __u32 active_batches;
- __u32 zombie_sessions;
- __u32 brw_errors;
- __u32 ping_errors;
-} WIRE_ATTR;
-
-#endif
diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/nidstr.h b/drivers/staging/lustre/include/uapi/linux/lnet/nidstr.h
deleted file mode 100644
index 882074ed6021..000000000000
--- a/drivers/staging/lustre/include/uapi/linux/lnet/nidstr.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-#ifndef _LNET_NIDSTRINGS_H
-#define _LNET_NIDSTRINGS_H
-
-#include <uapi/linux/lnet/lnet-types.h>
-
-/**
- * Lustre Network Driver types.
- */
-enum {
- /*
- * Only add to these values (i.e. don't ever change or redefine them):
- * network addresses depend on them...
- */
- QSWLND = 1,
- SOCKLND = 2,
- GMLND = 3,
- PTLLND = 4,
- O2IBLND = 5,
- CIBLND = 6,
- OPENIBLND = 7,
- IIBLND = 8,
- LOLND = 9,
- RALND = 10,
- VIBLND = 11,
- MXLND = 12,
- GNILND = 13,
- GNIIPLND = 14,
-};
-
-struct list_head;
-
-#define LNET_NIDSTR_COUNT 1024 /* # of nidstrings */
-#define LNET_NIDSTR_SIZE 32 /* size of each one (see below for usage) */
-
-/* support decl needed by both kernel and user space */
-char *libcfs_next_nidstring(void);
-int libcfs_isknown_lnd(__u32 lnd);
-char *libcfs_lnd2modname(__u32 lnd);
-char *libcfs_lnd2str_r(__u32 lnd, char *buf, size_t buf_size);
-static inline char *libcfs_lnd2str(__u32 lnd)
-{
- return libcfs_lnd2str_r(lnd, libcfs_next_nidstring(),
- LNET_NIDSTR_SIZE);
-}
-
-int libcfs_str2lnd(const char *str);
-char *libcfs_net2str_r(__u32 net, char *buf, size_t buf_size);
-static inline char *libcfs_net2str(__u32 net)
-{
- return libcfs_net2str_r(net, libcfs_next_nidstring(),
- LNET_NIDSTR_SIZE);
-}
-
-char *libcfs_nid2str_r(lnet_nid_t nid, char *buf, size_t buf_size);
-static inline char *libcfs_nid2str(lnet_nid_t nid)
-{
- return libcfs_nid2str_r(nid, libcfs_next_nidstring(),
- LNET_NIDSTR_SIZE);
-}
-
-__u32 libcfs_str2net(const char *str);
-lnet_nid_t libcfs_str2nid(const char *str);
-int libcfs_str2anynid(lnet_nid_t *nid, const char *str);
-char *libcfs_id2str(struct lnet_process_id id);
-void cfs_free_nidlist(struct list_head *list);
-int cfs_parse_nidlist(char *str, int len, struct list_head *list);
-int cfs_print_nidlist(char *buffer, int count, struct list_head *list);
-int cfs_match_nid(lnet_nid_t nid, struct list_head *list);
-
-int cfs_ip_addr_parse(char *str, int len, struct list_head *list);
-int cfs_ip_addr_match(__u32 addr, struct list_head *list);
-bool cfs_nidrange_is_contiguous(struct list_head *nidlist);
-void cfs_nidrange_find_min_max(struct list_head *nidlist, char *min_nid,
- char *max_nid, size_t nidstr_length);
-
-struct netstrfns {
- __u32 nf_type;
- char *nf_name;
- char *nf_modname;
- void (*nf_addr2str)(__u32 addr, char *str, size_t size);
- int (*nf_str2addr)(const char *str, int nob, __u32 *addr);
- int (*nf_parse_addrlist)(char *str, int len,
- struct list_head *list);
- int (*nf_print_addrlist)(char *buffer, int count,
- struct list_head *list);
- int (*nf_match_addr)(__u32 addr, struct list_head *list);
- bool (*nf_is_contiguous)(struct list_head *nidlist);
- void (*nf_min_max)(struct list_head *nidlist, __u32 *min_nid,
- __u32 *max_nid);
-};
-
-#endif /* _LNET_NIDSTRINGS_H */
diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/socklnd.h b/drivers/staging/lustre/include/uapi/linux/lnet/socklnd.h
deleted file mode 100644
index 6453e053fa99..000000000000
--- a/drivers/staging/lustre/include/uapi/linux/lnet/socklnd.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * #defines shared between socknal implementation and utilities
- */
-#ifndef __UAPI_LNET_SOCKLND_H__
-#define __UAPI_LNET_SOCKLND_H__
-
-#define SOCKLND_CONN_NONE (-1)
-#define SOCKLND_CONN_ANY 0
-#define SOCKLND_CONN_CONTROL 1
-#define SOCKLND_CONN_BULK_IN 2
-#define SOCKLND_CONN_BULK_OUT 3
-#define SOCKLND_CONN_NTYPES 4
-
-#define SOCKLND_CONN_ACK SOCKLND_CONN_BULK_IN
-
-#endif
diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_cfg.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_cfg.h
deleted file mode 100644
index 11b51d93f64c..000000000000
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_cfg.h
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef _UAPI_LUSTRE_CFG_H_
-#define _UAPI_LUSTRE_CFG_H_
-
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <uapi/linux/lustre/lustre_user.h>
-
-/** \defgroup cfg cfg
- *
- * @{
- */
-
-/*
- * 1cf6
- * lcfG
- */
-#define LUSTRE_CFG_VERSION 0x1cf60001
-#define LUSTRE_CFG_MAX_BUFCOUNT 8
-
-#define LCFG_HDR_SIZE(count) \
- __ALIGN_KERNEL(offsetof(struct lustre_cfg, lcfg_buflens[(count)]), 8)
-
-/** If the LCFG_REQUIRED bit is set in a configuration command,
- * then the client is required to understand this parameter
- * in order to mount the filesystem. If it does not understand
- * a REQUIRED command the client mount will fail.
- */
-#define LCFG_REQUIRED 0x0001000
-
-enum lcfg_command_type {
- LCFG_ATTACH = 0x00cf001, /**< create a new obd instance */
- LCFG_DETACH = 0x00cf002, /**< destroy obd instance */
- LCFG_SETUP = 0x00cf003, /**< call type-specific setup */
- LCFG_CLEANUP = 0x00cf004, /**< call type-specific cleanup
- */
- LCFG_ADD_UUID = 0x00cf005, /**< add a nid to a niduuid */
- LCFG_DEL_UUID = 0x00cf006, /**< remove a nid from
- * a niduuid
- */
- LCFG_MOUNTOPT = 0x00cf007, /**< create a profile
- * (mdc, osc)
- */
- LCFG_DEL_MOUNTOPT = 0x00cf008, /**< destroy a profile */
- LCFG_SET_TIMEOUT = 0x00cf009, /**< set obd_timeout */
- LCFG_SET_UPCALL = 0x00cf00a, /**< deprecated */
- LCFG_ADD_CONN = 0x00cf00b, /**< add a failover niduuid to
- * an obd
- */
- LCFG_DEL_CONN = 0x00cf00c, /**< remove a failover niduuid */
- LCFG_LOV_ADD_OBD = 0x00cf00d, /**< add an osc to a lov */
- LCFG_LOV_DEL_OBD = 0x00cf00e, /**< remove an osc from a lov */
- LCFG_PARAM = 0x00cf00f, /**< set a proc parameter */
- LCFG_MARKER = 0x00cf010, /**< metadata about next
- * cfg rec
- */
- LCFG_LOG_START = 0x00ce011, /**< mgc only, process a
- * cfg log
- */
- LCFG_LOG_END = 0x00ce012, /**< stop processing updates */
- LCFG_LOV_ADD_INA = 0x00ce013, /**< like LOV_ADD_OBD,
- * inactive
- */
- LCFG_ADD_MDC = 0x00cf014, /**< add an mdc to a lmv */
- LCFG_DEL_MDC = 0x00cf015, /**< remove an mdc from a lmv */
- LCFG_SPTLRPC_CONF = 0x00ce016, /**< security */
- LCFG_POOL_NEW = 0x00ce020, /**< create an ost pool name */
- LCFG_POOL_ADD = 0x00ce021, /**< add an ost to a pool */
- LCFG_POOL_REM = 0x00ce022, /**< remove an ost from a pool */
- LCFG_POOL_DEL = 0x00ce023, /**< destroy an ost pool name */
- LCFG_SET_LDLM_TIMEOUT = 0x00ce030, /**< set ldlm_timeout */
- LCFG_PRE_CLEANUP = 0x00cf031, /**< call type-specific pre
- * cleanup cleanup
- */
- LCFG_SET_PARAM = 0x00ce032, /**< use set_param syntax to set
- * a proc parameters
- */
-};
-
-struct lustre_cfg_bufs {
- void *lcfg_buf[LUSTRE_CFG_MAX_BUFCOUNT];
- __u32 lcfg_buflen[LUSTRE_CFG_MAX_BUFCOUNT];
- __u32 lcfg_bufcount;
-};
-
-struct lustre_cfg {
- __u32 lcfg_version;
- __u32 lcfg_command;
-
- __u32 lcfg_num;
- __u32 lcfg_flags;
- __u64 lcfg_nid;
- __u32 lcfg_nal; /* not used any more */
-
- __u32 lcfg_bufcount;
- __u32 lcfg_buflens[0];
-};
-
-enum cfg_record_type {
- PORTALS_CFG_TYPE = 1,
- LUSTRE_CFG_TYPE = 123,
-};
-
-#define LUSTRE_CFG_BUFLEN(lcfg, idx) \
- ((lcfg)->lcfg_bufcount <= (idx) ? 0 : (lcfg)->lcfg_buflens[(idx)])
-
-static inline void lustre_cfg_bufs_set(struct lustre_cfg_bufs *bufs,
- __u32 index, void *buf, __u32 buflen)
-{
- if (index >= LUSTRE_CFG_MAX_BUFCOUNT)
- return;
-
- if (!bufs)
- return;
-
- if (bufs->lcfg_bufcount <= index)
- bufs->lcfg_bufcount = index + 1;
-
- bufs->lcfg_buf[index] = buf;
- bufs->lcfg_buflen[index] = buflen;
-}
-
-static inline void lustre_cfg_bufs_set_string(struct lustre_cfg_bufs *bufs,
- __u32 index, char *str)
-{
- lustre_cfg_bufs_set(bufs, index, str, str ? strlen(str) + 1 : 0);
-}
-
-static inline void lustre_cfg_bufs_reset(struct lustre_cfg_bufs *bufs,
- char *name)
-{
- memset((bufs), 0, sizeof(*bufs));
- if (name)
- lustre_cfg_bufs_set_string(bufs, 0, name);
-}
-
-static inline void *lustre_cfg_buf(struct lustre_cfg *lcfg, __u32 index)
-{
- __u32 i;
- size_t offset;
- __u32 bufcount;
-
- if (!lcfg)
- return NULL;
-
- bufcount = lcfg->lcfg_bufcount;
- if (index >= bufcount)
- return NULL;
-
- offset = LCFG_HDR_SIZE(lcfg->lcfg_bufcount);
- for (i = 0; i < index; i++)
- offset += __ALIGN_KERNEL(lcfg->lcfg_buflens[i], 8);
- return (char *)lcfg + offset;
-}
-
-static inline void lustre_cfg_bufs_init(struct lustre_cfg_bufs *bufs,
- struct lustre_cfg *lcfg)
-{
- __u32 i;
-
- bufs->lcfg_bufcount = lcfg->lcfg_bufcount;
- for (i = 0; i < bufs->lcfg_bufcount; i++) {
- bufs->lcfg_buflen[i] = lcfg->lcfg_buflens[i];
- bufs->lcfg_buf[i] = lustre_cfg_buf(lcfg, i);
- }
-}
-
-static inline __u32 lustre_cfg_len(__u32 bufcount, __u32 *buflens)
-{
- __u32 i;
- __u32 len;
-
- len = LCFG_HDR_SIZE(bufcount);
- for (i = 0; i < bufcount; i++)
- len += __ALIGN_KERNEL(buflens[i], 8);
-
- return __ALIGN_KERNEL(len, 8);
-}
-
-static inline void lustre_cfg_init(struct lustre_cfg *lcfg, int cmd,
- struct lustre_cfg_bufs *bufs)
-{
- char *ptr;
- __u32 i;
-
- lcfg->lcfg_version = LUSTRE_CFG_VERSION;
- lcfg->lcfg_command = cmd;
- lcfg->lcfg_bufcount = bufs->lcfg_bufcount;
-
- ptr = (char *)lcfg + LCFG_HDR_SIZE(lcfg->lcfg_bufcount);
- for (i = 0; i < lcfg->lcfg_bufcount; i++) {
- lcfg->lcfg_buflens[i] = bufs->lcfg_buflen[i];
- if (bufs->lcfg_buf[i]) {
- memcpy(ptr, bufs->lcfg_buf[i], bufs->lcfg_buflen[i]);
- ptr += __ALIGN_KERNEL(bufs->lcfg_buflen[i], 8);
- }
- }
-}
-
-static inline int lustre_cfg_sanity_check(void *buf, size_t len)
-{
- struct lustre_cfg *lcfg = (struct lustre_cfg *)buf;
-
- if (!lcfg)
- return -EINVAL;
-
- /* check that the first bits of the struct are valid */
- if (len < LCFG_HDR_SIZE(0))
- return -EINVAL;
-
- if (lcfg->lcfg_version != LUSTRE_CFG_VERSION)
- return -EINVAL;
-
- if (lcfg->lcfg_bufcount >= LUSTRE_CFG_MAX_BUFCOUNT)
- return -EINVAL;
-
- /* check that the buflens are valid */
- if (len < LCFG_HDR_SIZE(lcfg->lcfg_bufcount))
- return -EINVAL;
-
- /* make sure all the pointers point inside the data */
- if (len < lustre_cfg_len(lcfg->lcfg_bufcount, lcfg->lcfg_buflens))
- return -EINVAL;
-
- return 0;
-}
-
-/** @} cfg */
-
-#endif /* _UAPI_LUSTRE_CFG_H_ */
diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_fid.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_fid.h
deleted file mode 100644
index 2e7a8d103777..000000000000
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_fid.h
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2014, Intel Corporation.
- *
- * Copyright 2016 Cray Inc, all rights reserved.
- * Author: Ben Evans.
- *
- * all fid manipulation functions go here
- *
- * FIDS are globally unique within a Lustre filessytem, and are made up
- * of three parts: sequence, Object ID, and version.
- *
- */
-#ifndef _UAPI_LUSTRE_FID_H_
-#define _UAPI_LUSTRE_FID_H_
-
-#include <uapi/linux/lustre/lustre_idl.h>
-
-/** returns fid object sequence */
-static inline __u64 fid_seq(const struct lu_fid *fid)
-{
- return fid->f_seq;
-}
-
-/** returns fid object id */
-static inline __u32 fid_oid(const struct lu_fid *fid)
-{
- return fid->f_oid;
-}
-
-/** returns fid object version */
-static inline __u32 fid_ver(const struct lu_fid *fid)
-{
- return fid->f_ver;
-}
-
-static inline void fid_zero(struct lu_fid *fid)
-{
- memset(fid, 0, sizeof(*fid));
-}
-
-static inline __u64 fid_ver_oid(const struct lu_fid *fid)
-{
- return (__u64)fid_ver(fid) << 32 | fid_oid(fid);
-}
-
-static inline bool fid_seq_is_mdt0(__u64 seq)
-{
- return seq == FID_SEQ_OST_MDT0;
-}
-
-static inline bool fid_seq_is_mdt(__u64 seq)
-{
- return seq == FID_SEQ_OST_MDT0 || seq >= FID_SEQ_NORMAL;
-};
-
-static inline bool fid_seq_is_echo(__u64 seq)
-{
- return seq == FID_SEQ_ECHO;
-}
-
-static inline bool fid_is_echo(const struct lu_fid *fid)
-{
- return fid_seq_is_echo(fid_seq(fid));
-}
-
-static inline bool fid_seq_is_llog(__u64 seq)
-{
- return seq == FID_SEQ_LLOG;
-}
-
-static inline bool fid_is_llog(const struct lu_fid *fid)
-{
- /* file with OID == 0 is not llog but contains last oid */
- return fid_seq_is_llog(fid_seq(fid)) && fid_oid(fid) > 0;
-}
-
-static inline bool fid_seq_is_rsvd(__u64 seq)
-{
- return seq > FID_SEQ_OST_MDT0 && seq <= FID_SEQ_RSVD;
-};
-
-static inline bool fid_seq_is_special(__u64 seq)
-{
- return seq == FID_SEQ_SPECIAL;
-};
-
-static inline bool fid_seq_is_local_file(__u64 seq)
-{
- return seq == FID_SEQ_LOCAL_FILE ||
- seq == FID_SEQ_LOCAL_NAME;
-};
-
-static inline bool fid_seq_is_root(__u64 seq)
-{
- return seq == FID_SEQ_ROOT;
-}
-
-static inline bool fid_seq_is_dot(__u64 seq)
-{
- return seq == FID_SEQ_DOT_LUSTRE;
-}
-
-static inline bool fid_seq_is_default(__u64 seq)
-{
- return seq == FID_SEQ_LOV_DEFAULT;
-}
-
-static inline bool fid_is_mdt0(const struct lu_fid *fid)
-{
- return fid_seq_is_mdt0(fid_seq(fid));
-}
-
-/**
- * Check if a fid is igif or not.
- * \param fid the fid to be tested.
- * \return true if the fid is an igif; otherwise false.
- */
-static inline bool fid_seq_is_igif(__u64 seq)
-{
- return seq >= FID_SEQ_IGIF && seq <= FID_SEQ_IGIF_MAX;
-}
-
-static inline bool fid_is_igif(const struct lu_fid *fid)
-{
- return fid_seq_is_igif(fid_seq(fid));
-}
-
-/**
- * Check if a fid is idif or not.
- * \param fid the fid to be tested.
- * \return true if the fid is an idif; otherwise false.
- */
-static inline bool fid_seq_is_idif(__u64 seq)
-{
- return seq >= FID_SEQ_IDIF && seq <= FID_SEQ_IDIF_MAX;
-}
-
-static inline bool fid_is_idif(const struct lu_fid *fid)
-{
- return fid_seq_is_idif(fid_seq(fid));
-}
-
-static inline bool fid_is_local_file(const struct lu_fid *fid)
-{
- return fid_seq_is_local_file(fid_seq(fid));
-}
-
-static inline bool fid_seq_is_norm(__u64 seq)
-{
- return (seq >= FID_SEQ_NORMAL);
-}
-
-static inline bool fid_is_norm(const struct lu_fid *fid)
-{
- return fid_seq_is_norm(fid_seq(fid));
-}
-
-/* convert an OST objid into an IDIF FID SEQ number */
-static inline __u64 fid_idif_seq(__u64 id, __u32 ost_idx)
-{
- return FID_SEQ_IDIF | (ost_idx << 16) | ((id >> 32) & 0xffff);
-}
-
-/* convert a packed IDIF FID into an OST objid */
-static inline __u64 fid_idif_id(__u64 seq, __u32 oid, __u32 ver)
-{
- return ((__u64)ver << 48) | ((seq & 0xffff) << 32) | oid;
-}
-
-static inline __u32 idif_ost_idx(__u64 seq)
-{
- return (seq >> 16) & 0xffff;
-}
-
-/* extract ost index from IDIF FID */
-static inline __u32 fid_idif_ost_idx(const struct lu_fid *fid)
-{
- return idif_ost_idx(fid_seq(fid));
-}
-
-/**
- * Get inode number from an igif.
- * \param fid an igif to get inode number from.
- * \return inode number for the igif.
- */
-static inline ino_t lu_igif_ino(const struct lu_fid *fid)
-{
- return fid_seq(fid);
-}
-
-/**
- * Get inode generation from an igif.
- * \param fid an igif to get inode generation from.
- * \return inode generation for the igif.
- */
-static inline __u32 lu_igif_gen(const struct lu_fid *fid)
-{
- return fid_oid(fid);
-}
-
-/**
- * Build igif from the inode number/generation.
- */
-static inline void lu_igif_build(struct lu_fid *fid, __u32 ino, __u32 gen)
-{
- fid->f_seq = ino;
- fid->f_oid = gen;
- fid->f_ver = 0;
-}
-
-/*
- * Fids are transmitted across network (in the sender byte-ordering),
- * and stored on disk in big-endian order.
- */
-static inline void fid_cpu_to_le(struct lu_fid *dst, const struct lu_fid *src)
-{
- dst->f_seq = __cpu_to_le64(fid_seq(src));
- dst->f_oid = __cpu_to_le32(fid_oid(src));
- dst->f_ver = __cpu_to_le32(fid_ver(src));
-}
-
-static inline void fid_le_to_cpu(struct lu_fid *dst, const struct lu_fid *src)
-{
- dst->f_seq = __le64_to_cpu(fid_seq(src));
- dst->f_oid = __le32_to_cpu(fid_oid(src));
- dst->f_ver = __le32_to_cpu(fid_ver(src));
-}
-
-static inline void fid_cpu_to_be(struct lu_fid *dst, const struct lu_fid *src)
-{
- dst->f_seq = __cpu_to_be64(fid_seq(src));
- dst->f_oid = __cpu_to_be32(fid_oid(src));
- dst->f_ver = __cpu_to_be32(fid_ver(src));
-}
-
-static inline void fid_be_to_cpu(struct lu_fid *dst, const struct lu_fid *src)
-{
- dst->f_seq = __be64_to_cpu(fid_seq(src));
- dst->f_oid = __be32_to_cpu(fid_oid(src));
- dst->f_ver = __be32_to_cpu(fid_ver(src));
-}
-
-static inline bool fid_is_sane(const struct lu_fid *fid)
-{
- return fid && ((fid_seq(fid) >= FID_SEQ_START && !fid_ver(fid)) ||
- fid_is_igif(fid) || fid_is_idif(fid) ||
- fid_seq_is_rsvd(fid_seq(fid)));
-}
-
-static inline bool lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1)
-{
- return !memcmp(f0, f1, sizeof(*f0));
-}
-
-static inline int lu_fid_cmp(const struct lu_fid *f0,
- const struct lu_fid *f1)
-{
- if (fid_seq(f0) != fid_seq(f1))
- return fid_seq(f0) > fid_seq(f1) ? 1 : -1;
-
- if (fid_oid(f0) != fid_oid(f1))
- return fid_oid(f0) > fid_oid(f1) ? 1 : -1;
-
- if (fid_ver(f0) != fid_ver(f1))
- return fid_ver(f0) > fid_ver(f1) ? 1 : -1;
-
- return 0;
-}
-#endif
diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_fiemap.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_fiemap.h
deleted file mode 100644
index d375a476f5ea..000000000000
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_fiemap.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2014, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * FIEMAP data structures and flags. This header file will be used until
- * fiemap.h is available in the upstream kernel.
- *
- * Author: Kalpak Shah <kalpak.shah@sun.com>
- * Author: Andreas Dilger <adilger@sun.com>
- */
-
-#ifndef _LUSTRE_FIEMAP_H
-#define _LUSTRE_FIEMAP_H
-
-#include <stddef.h>
-#include <linux/fiemap.h>
-
-/* XXX: We use fiemap_extent::fe_reserved[0] */
-#define fe_device fe_reserved[0]
-
-static inline size_t fiemap_count_to_size(size_t extent_count)
-{
- return sizeof(struct fiemap) + extent_count *
- sizeof(struct fiemap_extent);
-}
-
-static inline unsigned int fiemap_size_to_count(size_t array_size)
-{
- return (array_size - sizeof(struct fiemap)) /
- sizeof(struct fiemap_extent);
-}
-
-#define FIEMAP_FLAG_DEVICE_ORDER 0x40000000 /* return device ordered mapping */
-
-#ifdef FIEMAP_FLAGS_COMPAT
-#undef FIEMAP_FLAGS_COMPAT
-#endif
-
-/* Lustre specific flags - use a high bit, don't conflict with upstream flag */
-#define FIEMAP_EXTENT_NO_DIRECT 0x40000000 /* Data mapping undefined */
-#define FIEMAP_EXTENT_NET 0x80000000 /* Data stored remotely.
- * Sets NO_DIRECT flag
- */
-
-#endif /* _LUSTRE_FIEMAP_H */
diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h
deleted file mode 100644
index 6c7e3992d646..000000000000
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h
+++ /dev/null
@@ -1,2690 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Lustre wire protocol definitions.
- */
-
-/** \defgroup lustreidl lustreidl
- *
- * Lustre wire protocol definitions.
- *
- * ALL structs passing over the wire should be declared here. Structs
- * that are used in interfaces with userspace should go in lustre_user.h.
- *
- * All structs being declared here should be built from simple fixed-size
- * types (__u8, __u16, __u32, __u64) or be built from other types or
- * structs also declared in this file. Similarly, all flags and magic
- * values in those structs should also be declared here. This ensures
- * that the Lustre wire protocol is not influenced by external dependencies.
- *
- * The only other acceptable items in this file are VERY SIMPLE accessor
- * functions to avoid callers grubbing inside the structures. Nothing that
- * depends on external functions or definitions should be in here.
- *
- * Structs must be properly aligned to put 64-bit values on an 8-byte
- * boundary. Any structs being added here must also be added to
- * utils/wirecheck.c and "make newwiretest" run to regenerate the
- * utils/wiretest.c sources. This allows us to verify that wire structs
- * have the proper alignment/size on all architectures.
- *
- * DO NOT CHANGE any of the structs, flags, values declared here and used
- * in released Lustre versions. Some structs may have padding fields that
- * can be used. Some structs might allow addition at the end (verify this
- * in the code to ensure that new/old clients that see this larger struct
- * do not fail, otherwise you need to implement protocol compatibility).
- *
- * @{
- */
-
-#ifndef _LUSTRE_IDL_H_
-#define _LUSTRE_IDL_H_
-
-#include <asm/byteorder.h>
-#include <linux/types.h>
-
-#include <uapi/linux/lnet/lnet-types.h>
-/* Defn's shared with user-space. */
-#include <uapi/linux/lustre/lustre_user.h>
-#include <uapi/linux/lustre/lustre_ver.h>
-
-/*
- * GENERAL STUFF
- */
-/* FOO_REQUEST_PORTAL is for incoming requests on the FOO
- * FOO_REPLY_PORTAL is for incoming replies on the FOO
- * FOO_BULK_PORTAL is for incoming bulk on the FOO
- */
-
-/* Lustre service names are following the format
- * service name + MDT + seq name
- */
-#define LUSTRE_MDT_MAXNAMELEN 80
-
-#define CONNMGR_REQUEST_PORTAL 1
-#define CONNMGR_REPLY_PORTAL 2
-/*#define OSC_REQUEST_PORTAL 3 */
-#define OSC_REPLY_PORTAL 4
-/*#define OSC_BULK_PORTAL 5 */
-#define OST_IO_PORTAL 6
-#define OST_CREATE_PORTAL 7
-#define OST_BULK_PORTAL 8
-/*#define MDC_REQUEST_PORTAL 9 */
-#define MDC_REPLY_PORTAL 10
-/*#define MDC_BULK_PORTAL 11 */
-#define MDS_REQUEST_PORTAL 12
-/*#define MDS_REPLY_PORTAL 13 */
-#define MDS_BULK_PORTAL 14
-#define LDLM_CB_REQUEST_PORTAL 15
-#define LDLM_CB_REPLY_PORTAL 16
-#define LDLM_CANCEL_REQUEST_PORTAL 17
-#define LDLM_CANCEL_REPLY_PORTAL 18
-/*#define PTLBD_REQUEST_PORTAL 19 */
-/*#define PTLBD_REPLY_PORTAL 20 */
-/*#define PTLBD_BULK_PORTAL 21 */
-#define MDS_SETATTR_PORTAL 22
-#define MDS_READPAGE_PORTAL 23
-#define OUT_PORTAL 24
-
-#define MGC_REPLY_PORTAL 25
-#define MGS_REQUEST_PORTAL 26
-#define MGS_REPLY_PORTAL 27
-#define OST_REQUEST_PORTAL 28
-#define FLD_REQUEST_PORTAL 29
-#define SEQ_METADATA_PORTAL 30
-#define SEQ_DATA_PORTAL 31
-#define SEQ_CONTROLLER_PORTAL 32
-#define MGS_BULK_PORTAL 33
-
-/* Portal 63 is reserved for the Cray Inc DVS - nic@cray.com, roe@cray.com,
- * n8851@cray.com
- */
-
-/* packet types */
-#define PTL_RPC_MSG_REQUEST 4711
-#define PTL_RPC_MSG_ERR 4712
-#define PTL_RPC_MSG_REPLY 4713
-
-/* DON'T use swabbed values of MAGIC as magic! */
-#define LUSTRE_MSG_MAGIC_V2 0x0BD00BD3
-#define LUSTRE_MSG_MAGIC_V2_SWABBED 0xD30BD00B
-
-#define LUSTRE_MSG_MAGIC LUSTRE_MSG_MAGIC_V2
-
-#define PTLRPC_MSG_VERSION 0x00000003
-#define LUSTRE_VERSION_MASK 0xffff0000
-#define LUSTRE_OBD_VERSION 0x00010000
-#define LUSTRE_MDS_VERSION 0x00020000
-#define LUSTRE_OST_VERSION 0x00030000
-#define LUSTRE_DLM_VERSION 0x00040000
-#define LUSTRE_LOG_VERSION 0x00050000
-#define LUSTRE_MGS_VERSION 0x00060000
-
-/**
- * Describes a range of sequence, lsr_start is included but lsr_end is
- * not in the range.
- * Same structure is used in fld module where lsr_index field holds mdt id
- * of the home mdt.
- */
-struct lu_seq_range {
- __u64 lsr_start;
- __u64 lsr_end;
- __u32 lsr_index;
- __u32 lsr_flags;
-};
-
-struct lu_seq_range_array {
- __u32 lsra_count;
- __u32 lsra_padding;
- struct lu_seq_range lsra_lsr[0];
-};
-
-#define LU_SEQ_RANGE_MDT 0x0
-#define LU_SEQ_RANGE_OST 0x1
-#define LU_SEQ_RANGE_ANY 0x3
-
-#define LU_SEQ_RANGE_MASK 0x3
-
-/** \defgroup lu_fid lu_fid
- * @{
- */
-
-/**
- * Flags for lustre_mdt_attrs::lma_compat and lustre_mdt_attrs::lma_incompat.
- * Deprecated since HSM and SOM attributes are now stored in separate on-disk
- * xattr.
- */
-enum lma_compat {
- LMAC_HSM = 0x00000001,
-/* LMAC_SOM = 0x00000002, obsolete since 2.8.0 */
- LMAC_NOT_IN_OI = 0x00000004, /* the object does NOT need OI mapping */
- LMAC_FID_ON_OST = 0x00000008, /* For OST-object, its OI mapping is
- * under /O/<seq>/d<x>.
- */
-};
-
-/**
- * Masks for all features that should be supported by a Lustre version to
- * access a specific file.
- * This information is stored in lustre_mdt_attrs::lma_incompat.
- */
-enum lma_incompat {
- LMAI_RELEASED = 0x00000001, /* file is released */
- LMAI_AGENT = 0x00000002, /* agent inode */
- LMAI_REMOTE_PARENT = 0x00000004, /* the parent of the object
- * is on the remote MDT
- */
-};
-
-#define LMA_INCOMPAT_SUPP (LMAI_AGENT | LMAI_REMOTE_PARENT)
-
-/**
- * fid constants
- */
-enum {
- /** LASTID file has zero OID */
- LUSTRE_FID_LASTID_OID = 0UL,
- /** initial fid id value */
- LUSTRE_FID_INIT_OID = 1UL
-};
-
-/* copytool uses a 32b bitmask field to encode archive-Ids during register
- * with MDT thru kuc.
- * archive num = 0 => all
- * archive num from 1 to 32
- */
-#define LL_HSM_MAX_ARCHIVE (sizeof(__u32) * 8)
-
-/**
- * Note that reserved SEQ numbers below 12 will conflict with ldiskfs
- * inodes in the IGIF namespace, so these reserved SEQ numbers can be
- * used for other purposes and not risk collisions with existing inodes.
- *
- * Different FID Format
- * http://wiki.old.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs
- */
-enum fid_seq {
- FID_SEQ_OST_MDT0 = 0,
- FID_SEQ_LLOG = 1, /* unnamed llogs */
- FID_SEQ_ECHO = 2,
- FID_SEQ_OST_MDT1 = 3,
- FID_SEQ_OST_MAX = 9, /* Max MDT count before OST_on_FID */
- FID_SEQ_LLOG_NAME = 10, /* named llogs */
- FID_SEQ_RSVD = 11,
- FID_SEQ_IGIF = 12,
- FID_SEQ_IGIF_MAX = 0x0ffffffffULL,
- FID_SEQ_IDIF = 0x100000000ULL,
- FID_SEQ_IDIF_MAX = 0x1ffffffffULL,
- /* Normal FID sequence starts from this value, i.e. 1<<33 */
- FID_SEQ_START = 0x200000000ULL,
- /* sequence for local pre-defined FIDs listed in local_oid */
- FID_SEQ_LOCAL_FILE = 0x200000001ULL,
- FID_SEQ_DOT_LUSTRE = 0x200000002ULL,
- /* sequence is used for local named objects FIDs generated
- * by local_object_storage library
- */
- FID_SEQ_LOCAL_NAME = 0x200000003ULL,
- /* Because current FLD will only cache the fid sequence, instead
- * of oid on the client side, if the FID needs to be exposed to
- * clients sides, it needs to make sure all of fids under one
- * sequence will be located in one MDT.
- */
- FID_SEQ_SPECIAL = 0x200000004ULL,
- FID_SEQ_QUOTA = 0x200000005ULL,
- FID_SEQ_QUOTA_GLB = 0x200000006ULL,
- FID_SEQ_ROOT = 0x200000007ULL, /* Located on MDT0 */
- FID_SEQ_NORMAL = 0x200000400ULL,
- FID_SEQ_LOV_DEFAULT = 0xffffffffffffffffULL
-};
-
-#define OBIF_OID_MAX_BITS 32
-#define OBIF_MAX_OID (1ULL << OBIF_OID_MAX_BITS)
-#define OBIF_OID_MASK ((1ULL << OBIF_OID_MAX_BITS) - 1)
-#define IDIF_OID_MAX_BITS 48
-#define IDIF_MAX_OID (1ULL << IDIF_OID_MAX_BITS)
-#define IDIF_OID_MASK ((1ULL << IDIF_OID_MAX_BITS) - 1)
-
-/** OID for FID_SEQ_SPECIAL */
-enum special_oid {
- /* Big Filesystem Lock to serialize rename operations */
- FID_OID_SPECIAL_BFL = 1UL,
-};
-
-/** OID for FID_SEQ_DOT_LUSTRE */
-enum dot_lustre_oid {
- FID_OID_DOT_LUSTRE = 1UL,
- FID_OID_DOT_LUSTRE_OBF = 2UL,
-};
-
-/** OID for FID_SEQ_ROOT */
-enum root_oid {
- FID_OID_ROOT = 1UL,
- FID_OID_ECHO_ROOT = 2UL,
-};
-
-/** @} lu_fid */
-
-/** \defgroup lu_dir lu_dir
- * @{
- */
-
-/**
- * Enumeration of possible directory entry attributes.
- *
- * Attributes follow directory entry header in the order they appear in this
- * enumeration.
- */
-enum lu_dirent_attrs {
- LUDA_FID = 0x0001,
- LUDA_TYPE = 0x0002,
- LUDA_64BITHASH = 0x0004,
-};
-
-/**
- * Layout of readdir pages, as transmitted on wire.
- */
-struct lu_dirent {
- /** valid if LUDA_FID is set. */
- struct lu_fid lde_fid;
- /** a unique entry identifier: a hash or an offset. */
- __u64 lde_hash;
- /** total record length, including all attributes. */
- __u16 lde_reclen;
- /** name length */
- __u16 lde_namelen;
- /** optional variable size attributes following this entry.
- * taken from enum lu_dirent_attrs.
- */
- __u32 lde_attrs;
- /** name is followed by the attributes indicated in ->ldp_attrs, in
- * their natural order. After the last attribute, padding bytes are
- * added to make ->lde_reclen a multiple of 8.
- */
- char lde_name[0];
-};
-
-/*
- * Definitions of optional directory entry attributes formats.
- *
- * Individual attributes do not have their length encoded in a generic way. It
- * is assumed that consumer of an attribute knows its format. This means that
- * it is impossible to skip over an unknown attribute, except by skipping over all
- * remaining attributes (by using ->lde_reclen), which is not too
- * constraining, because new server versions will append new attributes at
- * the end of an entry.
- */
-
-/**
- * Fid directory attribute: a fid of an object referenced by the entry. This
- * will be almost always requested by the client and supplied by the server.
- *
- * Aligned to 8 bytes.
- */
-/* To have compatibility with 1.8, lets have fid in lu_dirent struct. */
-
-/**
- * File type.
- *
- * Aligned to 2 bytes.
- */
-struct luda_type {
- __u16 lt_type;
-};
-
-#ifndef IFSHIFT
-#define IFSHIFT 12
-#endif
-
-#ifndef IFTODT
-#define IFTODT(type) (((type) & S_IFMT) >> IFSHIFT)
-#endif
-#ifndef DTTOIF
-#define DTTOIF(dirtype) ((dirtype) << IFSHIFT)
-#endif
-
-struct lu_dirpage {
- __le64 ldp_hash_start;
- __le64 ldp_hash_end;
- __le32 ldp_flags;
- __le32 ldp_pad0;
- struct lu_dirent ldp_entries[0];
-};
-
-enum lu_dirpage_flags {
- /**
- * dirpage contains no entry.
- */
- LDF_EMPTY = 1 << 0,
- /**
- * last entry's lde_hash equals ldp_hash_end.
- */
- LDF_COLLIDE = 1 << 1
-};
-
-static inline struct lu_dirent *lu_dirent_start(struct lu_dirpage *dp)
-{
- if (__le32_to_cpu(dp->ldp_flags) & LDF_EMPTY)
- return NULL;
- else
- return dp->ldp_entries;
-}
-
-static inline struct lu_dirent *lu_dirent_next(struct lu_dirent *ent)
-{
- struct lu_dirent *next;
-
- if (__le16_to_cpu(ent->lde_reclen) != 0)
- next = ((void *)ent) + __le16_to_cpu(ent->lde_reclen);
- else
- next = NULL;
-
- return next;
-}
-
-static inline size_t lu_dirent_calc_size(size_t namelen, __u16 attr)
-{
- size_t size;
-
- if (attr & LUDA_TYPE) {
- const size_t align = sizeof(struct luda_type) - 1;
-
- size = (sizeof(struct lu_dirent) + namelen + align) & ~align;
- size += sizeof(struct luda_type);
- } else {
- size = sizeof(struct lu_dirent) + namelen;
- }
-
- return (size + 7) & ~7;
-}
-
-#define MDS_DIR_END_OFF 0xfffffffffffffffeULL
-
-/**
- * MDS_READPAGE page size
- *
- * This is the directory page size packed in MDS_READPAGE RPC.
- * It's different than PAGE_SIZE because the client needs to
- * access the struct lu_dirpage header packed at the beginning of
- * the "page" and without this there isn't any way to know find the
- * lu_dirpage header is if client and server PAGE_SIZE differ.
- */
-#define LU_PAGE_SHIFT 12
-#define LU_PAGE_SIZE (1UL << LU_PAGE_SHIFT)
-#define LU_PAGE_MASK (~(LU_PAGE_SIZE - 1))
-
-#define LU_PAGE_COUNT (1 << (PAGE_SHIFT - LU_PAGE_SHIFT))
-
-/** @} lu_dir */
-
-struct lustre_handle {
- __u64 cookie;
-};
-
-#define DEAD_HANDLE_MAGIC 0xdeadbeefcafebabeULL
-
-static inline bool lustre_handle_is_used(const struct lustre_handle *lh)
-{
- return lh->cookie != 0ull;
-}
-
-static inline bool lustre_handle_equal(const struct lustre_handle *lh1,
- const struct lustre_handle *lh2)
-{
- return lh1->cookie == lh2->cookie;
-}
-
-static inline void lustre_handle_copy(struct lustre_handle *tgt,
- const struct lustre_handle *src)
-{
- tgt->cookie = src->cookie;
-}
-
-/* flags for lm_flags */
-#define MSGHDR_AT_SUPPORT 0x1
-#define MSGHDR_CKSUM_INCOMPAT18 0x2
-
-#define lustre_msg lustre_msg_v2
-/* we depend on this structure to be 8-byte aligned */
-/* this type is only endian-adjusted in lustre_unpack_msg() */
-struct lustre_msg_v2 {
- __u32 lm_bufcount;
- __u32 lm_secflvr;
- __u32 lm_magic;
- __u32 lm_repsize;
- __u32 lm_cksum;
- __u32 lm_flags;
- __u32 lm_padding_2;
- __u32 lm_padding_3;
- __u32 lm_buflens[0];
-};
-
-/* without gss, ptlrpc_body is put at the first buffer. */
-#define PTLRPC_NUM_VERSIONS 4
-
-struct ptlrpc_body_v3 {
- struct lustre_handle pb_handle;
- __u32 pb_type;
- __u32 pb_version;
- __u32 pb_opc;
- __u32 pb_status;
- __u64 pb_last_xid; /* highest replied XID without lower unreplied XID */
- __u16 pb_tag; /* virtual slot idx for multiple modifying RPCs */
- __u16 pb_padding0;
- __u32 pb_padding1;
- __u64 pb_last_committed;
- __u64 pb_transno;
- __u32 pb_flags;
- __u32 pb_op_flags;
- __u32 pb_conn_cnt;
- __u32 pb_timeout; /* for req, the deadline, for rep, the service est */
- __u32 pb_service_time; /* for rep, actual service time */
- __u32 pb_limit;
- __u64 pb_slv;
- /* VBR: pre-versions */
- __u64 pb_pre_versions[PTLRPC_NUM_VERSIONS];
- __u64 pb_mbits; /**< match bits for bulk request */
- /* padding for future needs */
- __u64 pb_padding64_0;
- __u64 pb_padding64_1;
- __u64 pb_padding64_2;
- char pb_jobid[LUSTRE_JOBID_SIZE];
-};
-
-#define ptlrpc_body ptlrpc_body_v3
-
-struct ptlrpc_body_v2 {
- struct lustre_handle pb_handle;
- __u32 pb_type;
- __u32 pb_version;
- __u32 pb_opc;
- __u32 pb_status;
- __u64 pb_last_xid; /* highest replied XID without lower unreplied XID */
- __u16 pb_tag; /* virtual slot idx for multiple modifying RPCs */
- __u16 pb_padding0;
- __u32 pb_padding1;
- __u64 pb_last_committed;
- __u64 pb_transno;
- __u32 pb_flags;
- __u32 pb_op_flags;
- __u32 pb_conn_cnt;
- __u32 pb_timeout; /* for req, the deadline, for rep, the service est */
- __u32 pb_service_time; /* for rep, actual service time, also used for
- * net_latency of req
- */
- __u32 pb_limit;
- __u64 pb_slv;
- /* VBR: pre-versions */
- __u64 pb_pre_versions[PTLRPC_NUM_VERSIONS];
- __u64 pb_mbits; /**< unused in V2 */
- /* padding for future needs */
- __u64 pb_padding64_0;
- __u64 pb_padding64_1;
- __u64 pb_padding64_2;
-};
-
-/* message body offset for lustre_msg_v2 */
-/* ptlrpc body offset in all request/reply messages */
-#define MSG_PTLRPC_BODY_OFF 0
-
-/* normal request/reply message record offset */
-#define REQ_REC_OFF 1
-#define REPLY_REC_OFF 1
-
-/* ldlm request message body offset */
-#define DLM_LOCKREQ_OFF 1 /* lockreq offset */
-#define DLM_REQ_REC_OFF 2 /* normal dlm request record offset */
-
-/* ldlm intent lock message body offset */
-#define DLM_INTENT_IT_OFF 2 /* intent lock it offset */
-#define DLM_INTENT_REC_OFF 3 /* intent lock record offset */
-
-/* ldlm reply message body offset */
-#define DLM_LOCKREPLY_OFF 1 /* lockrep offset */
-#define DLM_REPLY_REC_OFF 2 /* reply record offset */
-
-/** only use in req->rq_{req,rep}_swab_mask */
-#define MSG_PTLRPC_HEADER_OFF 31
-
-/* Flags that are operation-specific go in the top 16 bits. */
-#define MSG_OP_FLAG_MASK 0xffff0000
-#define MSG_OP_FLAG_SHIFT 16
-
-/* Flags that apply to all requests are in the bottom 16 bits */
-#define MSG_GEN_FLAG_MASK 0x0000ffff
-#define MSG_LAST_REPLAY 0x0001
-#define MSG_RESENT 0x0002
-#define MSG_REPLAY 0x0004
-/* #define MSG_AT_SUPPORT 0x0008
- * This was used in early prototypes of adaptive timeouts, and while there
- * shouldn't be any users of that code there also isn't a need for using this
- * bits. Defer usage until at least 1.10 to avoid potential conflict.
- */
-#define MSG_DELAY_REPLAY 0x0010
-#define MSG_VERSION_REPLAY 0x0020
-#define MSG_REQ_REPLAY_DONE 0x0040
-#define MSG_LOCK_REPLAY_DONE 0x0080
-
-/*
- * Flags for all connect opcodes (MDS_CONNECT, OST_CONNECT)
- */
-
-#define MSG_CONNECT_RECOVERING 0x00000001
-#define MSG_CONNECT_RECONNECT 0x00000002
-#define MSG_CONNECT_REPLAYABLE 0x00000004
-/*#define MSG_CONNECT_PEER 0x8 */
-#define MSG_CONNECT_LIBCLIENT 0x00000010
-#define MSG_CONNECT_INITIAL 0x00000020
-#define MSG_CONNECT_ASYNC 0x00000040
-#define MSG_CONNECT_NEXT_VER 0x00000080 /* use next version of lustre_msg */
-#define MSG_CONNECT_TRANSNO 0x00000100 /* report transno */
-
-/* Connect flags */
-#define OBD_CONNECT_RDONLY 0x1ULL /*client has read-only access*/
-#define OBD_CONNECT_INDEX 0x2ULL /*connect specific LOV idx */
-#define OBD_CONNECT_MDS 0x4ULL /*connect from MDT to OST */
-#define OBD_CONNECT_GRANT 0x8ULL /*OSC gets grant at connect */
-#define OBD_CONNECT_SRVLOCK 0x10ULL /*server takes locks for cli */
-#define OBD_CONNECT_VERSION 0x20ULL /*Lustre versions in ocd */
-#define OBD_CONNECT_REQPORTAL 0x40ULL /*Separate non-IO req portal */
-#define OBD_CONNECT_ACL 0x80ULL /*access control lists */
-#define OBD_CONNECT_XATTR 0x100ULL /*client use extended attr */
-#define OBD_CONNECT_LARGE_ACL 0x200ULL /* more than 32 ACL entries */
-#define OBD_CONNECT_TRUNCLOCK 0x400ULL /*locks on server for punch */
-#define OBD_CONNECT_TRANSNO 0x800ULL /*replay sends init transno */
-#define OBD_CONNECT_IBITS 0x1000ULL /*support for inodebits locks*/
-#define OBD_CONNECT_JOIN 0x2000ULL /*files can be concatenated.
- *We do not support JOIN FILE
- *anymore, reserve this flags
- *just for preventing such bit
- *to be reused.
- */
-#define OBD_CONNECT_ATTRFID 0x4000ULL /*Server can GetAttr By Fid*/
-#define OBD_CONNECT_NODEVOH 0x8000ULL /*No open hndl on specl nodes*/
-#define OBD_CONNECT_RMT_CLIENT 0x10000ULL /* Remote client, never used
- * in production. Removed in
- * 2.9. Keep this flag to
- * avoid reuse.
- */
-#define OBD_CONNECT_RMT_CLIENT_FORCE 0x20000ULL /* Remote client by force,
- * never used in production.
- * Removed in 2.9. Keep this
- * flag to avoid reuse
- */
-#define OBD_CONNECT_BRW_SIZE 0x40000ULL /*Max bytes per rpc */
-#define OBD_CONNECT_QUOTA64 0x80000ULL /*Not used since 2.4 */
-#define OBD_CONNECT_MDS_CAPA 0x100000ULL /*MDS capability */
-#define OBD_CONNECT_OSS_CAPA 0x200000ULL /*OSS capability */
-#define OBD_CONNECT_CANCELSET 0x400000ULL /*Early batched cancels. */
-#define OBD_CONNECT_SOM 0x800000ULL /*Size on MDS */
-#define OBD_CONNECT_AT 0x1000000ULL /*client uses AT */
-#define OBD_CONNECT_LRU_RESIZE 0x2000000ULL /*LRU resize feature. */
-#define OBD_CONNECT_MDS_MDS 0x4000000ULL /*MDS-MDS connection */
-#define OBD_CONNECT_REAL 0x8000000ULL /* obsolete since 2.8 */
-#define OBD_CONNECT_CHANGE_QS 0x10000000ULL /*Not used since 2.4 */
-#define OBD_CONNECT_CKSUM 0x20000000ULL /*support several cksum algos*/
-#define OBD_CONNECT_FID 0x40000000ULL /*FID is supported by server */
-#define OBD_CONNECT_VBR 0x80000000ULL /*version based recovery */
-#define OBD_CONNECT_LOV_V3 0x100000000ULL /*client supports LOV v3 EA */
-#define OBD_CONNECT_GRANT_SHRINK 0x200000000ULL /* support grant shrink */
-#define OBD_CONNECT_SKIP_ORPHAN 0x400000000ULL /* don't reuse orphan objids */
-#define OBD_CONNECT_MAX_EASIZE 0x800000000ULL /* preserved for large EA */
-#define OBD_CONNECT_FULL20 0x1000000000ULL /* it is 2.0 client */
-#define OBD_CONNECT_LAYOUTLOCK 0x2000000000ULL /* client uses layout lock */
-#define OBD_CONNECT_64BITHASH 0x4000000000ULL /* client supports 64-bits
- * directory hash
- */
-#define OBD_CONNECT_MAXBYTES 0x8000000000ULL /* max stripe size */
-#define OBD_CONNECT_IMP_RECOV 0x10000000000ULL /* imp recovery support */
-#define OBD_CONNECT_JOBSTATS 0x20000000000ULL /* jobid in ptlrpc_body */
-#define OBD_CONNECT_UMASK 0x40000000000ULL /* create uses client umask */
-#define OBD_CONNECT_EINPROGRESS 0x80000000000ULL /* client handles -EINPROGRESS
- * RPC error properly
- */
-#define OBD_CONNECT_GRANT_PARAM 0x100000000000ULL/* extra grant params used for
- * finer space reservation
- */
-#define OBD_CONNECT_FLOCK_OWNER 0x200000000000ULL /* for the fixed 1.8
- * policy and 2.x server
- */
-#define OBD_CONNECT_LVB_TYPE 0x400000000000ULL /* variable type of LVB */
-#define OBD_CONNECT_NANOSEC_TIME 0x800000000000ULL /* nanosecond timestamps */
-#define OBD_CONNECT_LIGHTWEIGHT 0x1000000000000ULL/* lightweight connection */
-#define OBD_CONNECT_SHORTIO 0x2000000000000ULL/* short io */
-#define OBD_CONNECT_PINGLESS 0x4000000000000ULL/* pings not required */
-#define OBD_CONNECT_FLOCK_DEAD 0x8000000000000ULL/* flock deadlock detection */
-#define OBD_CONNECT_DISP_STRIPE 0x10000000000000ULL/*create stripe disposition*/
-#define OBD_CONNECT_OPEN_BY_FID 0x20000000000000ULL /* open by fid won't pack
- * name in request
- */
-#define OBD_CONNECT_LFSCK 0x40000000000000ULL/* support online LFSCK */
-#define OBD_CONNECT_UNLINK_CLOSE 0x100000000000000ULL/* close file in unlink */
-#define OBD_CONNECT_MULTIMODRPCS 0x200000000000000ULL /* support multiple modify
- * RPCs in parallel
- */
-#define OBD_CONNECT_DIR_STRIPE 0x400000000000000ULL/* striped DNE dir */
-#define OBD_CONNECT_SUBTREE 0x800000000000000ULL /* fileset mount */
-#define OBD_CONNECT_LOCK_AHEAD 0x1000000000000000ULL /* lock ahead */
-/** bulk matchbits is sent within ptlrpc_body */
-#define OBD_CONNECT_BULK_MBITS 0x2000000000000000ULL
-#define OBD_CONNECT_OBDOPACK 0x4000000000000000ULL /* compact OUT obdo */
-#define OBD_CONNECT_FLAGS2 0x8000000000000000ULL /* second flags word */
-
-/* XXX README XXX:
- * Please DO NOT add flag values here before first ensuring that this same
- * flag value is not in use on some other branch. Please clear any such
- * changes with senior engineers before starting to use a new flag. Then,
- * submit a small patch against EVERY branch that ONLY adds the new flag,
- * updates obd_connect_names[] for lprocfs_rd_connect_flags(), adds the
- * flag to check_obd_connect_data(), and updates wiretests accordingly, so it
- * can be approved and landed easily to reserve the flag for future use.
- */
-
-/* The MNE_SWAB flag is overloading the MDS_MDS bit only for the MGS
- * connection. It is a temporary bug fix for Imperative Recovery interop
- * between 2.2 and 2.3 x86/ppc nodes, and can be removed when interop for
- * 2.2 clients/servers is no longer needed. LU-1252/LU-1644.
- */
-#define OBD_CONNECT_MNE_SWAB OBD_CONNECT_MDS_MDS
-
-#define OCD_HAS_FLAG(ocd, flg) \
- (!!((ocd)->ocd_connect_flags & OBD_CONNECT_##flg))
-
-/* Features required for this version of the client to work with server */
-#define CLIENT_CONNECT_MDT_REQD (OBD_CONNECT_IBITS | OBD_CONNECT_FID | \
- OBD_CONNECT_FULL20)
-
-/* This structure is used for both request and reply.
- *
- * If we eventually have separate connect data for different types, which we
- * almost certainly will, then perhaps we stick a union in here.
- */
-struct obd_connect_data {
- __u64 ocd_connect_flags; /* OBD_CONNECT_* per above */
- __u32 ocd_version; /* lustre release version number */
- __u32 ocd_grant; /* initial cache grant amount (bytes) */
- __u32 ocd_index; /* LOV index to connect to */
- __u32 ocd_brw_size; /* Maximum BRW size in bytes */
- __u64 ocd_ibits_known; /* inode bits this client understands */
- __u8 ocd_blocksize; /* log2 of the backend filesystem blocksize */
- __u8 ocd_inodespace; /* log2 of the per-inode space consumption */
- __u16 ocd_grant_extent; /* per-extent grant overhead, in 1K blocks */
- __u32 ocd_unused; /* also fix lustre_swab_connect */
- __u64 ocd_transno; /* first transno from client to be replayed */
- __u32 ocd_group; /* MDS group on OST */
- __u32 ocd_cksum_types; /* supported checksum algorithms */
- __u32 ocd_max_easize; /* How big LOV EA can be on MDS */
- __u32 ocd_instance; /* instance # of this target */
- __u64 ocd_maxbytes; /* Maximum stripe size in bytes */
- /* Fields after ocd_maxbytes are only accessible by the receiver
- * if the corresponding flag in ocd_connect_flags is set. Accessing
- * any field after ocd_maxbytes on the receiver without a valid flag
- * may result in out-of-bound memory access and kernel oops.
- */
- __u16 ocd_maxmodrpcs; /* Maximum modify RPCs in parallel */
- __u16 padding0; /* added 2.1.0. also fix lustre_swab_connect */
- __u32 padding1; /* added 2.1.0. also fix lustre_swab_connect */
- __u64 ocd_connect_flags2;
- __u64 padding3; /* added 2.1.0. also fix lustre_swab_connect */
- __u64 padding4; /* added 2.1.0. also fix lustre_swab_connect */
- __u64 padding5; /* added 2.1.0. also fix lustre_swab_connect */
- __u64 padding6; /* added 2.1.0. also fix lustre_swab_connect */
- __u64 padding7; /* added 2.1.0. also fix lustre_swab_connect */
- __u64 padding8; /* added 2.1.0. also fix lustre_swab_connect */
- __u64 padding9; /* added 2.1.0. also fix lustre_swab_connect */
- __u64 paddingA; /* added 2.1.0. also fix lustre_swab_connect */
- __u64 paddingB; /* added 2.1.0. also fix lustre_swab_connect */
- __u64 paddingC; /* added 2.1.0. also fix lustre_swab_connect */
- __u64 paddingD; /* added 2.1.0. also fix lustre_swab_connect */
- __u64 paddingE; /* added 2.1.0. also fix lustre_swab_connect */
- __u64 paddingF; /* added 2.1.0. also fix lustre_swab_connect */
-};
-
-/* XXX README XXX:
- * Please DO NOT use any fields here before first ensuring that this same
- * field is not in use on some other branch. Please clear any such changes
- * with senior engineers before starting to use a new field. Then, submit
- * a small patch against EVERY branch that ONLY adds the new field along with
- * the matching OBD_CONNECT flag, so that can be approved and landed easily to
- * reserve the flag for future use.
- */
-
-/*
- * Supported checksum algorithms. Up to 32 checksum types are supported.
- * (32-bit mask stored in obd_connect_data::ocd_cksum_types)
- * Please update DECLARE_CKSUM_NAME/OBD_CKSUM_ALL in obd.h when adding a new
- * algorithm and also the OBD_FL_CKSUM* flags.
- */
-enum cksum_type {
- OBD_CKSUM_CRC32 = 0x00000001,
- OBD_CKSUM_ADLER = 0x00000002,
- OBD_CKSUM_CRC32C = 0x00000004,
-};
-
-/*
- * OST requests: OBDO & OBD request records
- */
-
-/* opcodes */
-enum ost_cmd {
- OST_REPLY = 0, /* reply ? */
- OST_GETATTR = 1,
- OST_SETATTR = 2,
- OST_READ = 3,
- OST_WRITE = 4,
- OST_CREATE = 5,
- OST_DESTROY = 6,
- OST_GET_INFO = 7,
- OST_CONNECT = 8,
- OST_DISCONNECT = 9,
- OST_PUNCH = 10,
- OST_OPEN = 11,
- OST_CLOSE = 12,
- OST_STATFS = 13,
- OST_SYNC = 16,
- OST_SET_INFO = 17,
- OST_QUOTACHECK = 18, /* not used since 2.4 */
- OST_QUOTACTL = 19,
- OST_QUOTA_ADJUST_QUNIT = 20, /* not used since 2.4 */
- OST_LAST_OPC
-};
-#define OST_FIRST_OPC OST_REPLY
-
-enum obdo_flags {
- OBD_FL_INLINEDATA = 0x00000001,
- OBD_FL_OBDMDEXISTS = 0x00000002,
- OBD_FL_DELORPHAN = 0x00000004, /* if set in o_flags delete orphans */
- OBD_FL_NORPC = 0x00000008, /* set in o_flags do in OSC not OST */
- OBD_FL_IDONLY = 0x00000010, /* set in o_flags only adjust obj id*/
- OBD_FL_RECREATE_OBJS = 0x00000020, /* recreate missing obj */
- OBD_FL_DEBUG_CHECK = 0x00000040, /* echo client/server debug check */
- OBD_FL_NO_USRQUOTA = 0x00000100, /* the object's owner is over quota */
- OBD_FL_NO_GRPQUOTA = 0x00000200, /* the object's group is over quota */
- OBD_FL_CREATE_CROW = 0x00000400, /* object should be create on write */
- OBD_FL_SRVLOCK = 0x00000800, /* delegate DLM locking to server */
- OBD_FL_CKSUM_CRC32 = 0x00001000, /* CRC32 checksum type */
- OBD_FL_CKSUM_ADLER = 0x00002000, /* ADLER checksum type */
- OBD_FL_CKSUM_CRC32C = 0x00004000, /* CRC32C checksum type */
- OBD_FL_CKSUM_RSVD2 = 0x00008000, /* for future cksum types */
- OBD_FL_CKSUM_RSVD3 = 0x00010000, /* for future cksum types */
- OBD_FL_SHRINK_GRANT = 0x00020000, /* object shrink the grant */
- OBD_FL_MMAP = 0x00040000, /* object is mmapped on the client.
- * XXX: obsoleted - reserved for old
- * clients prior than 2.2
- */
- OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */
- OBD_FL_NOSPC_BLK = 0x00100000, /* no more block space on OST */
- OBD_FL_FLUSH = 0x00200000, /* flush pages on the OST */
- OBD_FL_SHORT_IO = 0x00400000, /* short io request */
-
- /* Note that while these checksum values are currently separate bits,
- * in 2.x we can actually allow all values from 1-31 if we wanted.
- */
- OBD_FL_CKSUM_ALL = OBD_FL_CKSUM_CRC32 | OBD_FL_CKSUM_ADLER |
- OBD_FL_CKSUM_CRC32C,
-
- /* mask for local-only flag, which won't be sent over network */
- OBD_FL_LOCAL_MASK = 0xF0000000,
-};
-
-/*
- * All LOV EA magics should have the same postfix, if some new version
- * Lustre instroduces new LOV EA magic, then when down-grade to an old
- * Lustre, even though the old version system does not recognizes such
- * new magic, it still can distinguish the corrupted cases by checking
- * the magic's postfix.
- */
-#define LOV_MAGIC_MAGIC 0x0BD0
-#define LOV_MAGIC_MASK 0xFFFF
-
-#define LOV_MAGIC_V1 (0x0BD10000 | LOV_MAGIC_MAGIC)
-#define LOV_MAGIC_JOIN_V1 (0x0BD20000 | LOV_MAGIC_MAGIC)
-#define LOV_MAGIC_V3 (0x0BD30000 | LOV_MAGIC_MAGIC)
-#define LOV_MAGIC_MIGRATE (0x0BD40000 | LOV_MAGIC_MAGIC)
-/* reserved for specifying OSTs */
-#define LOV_MAGIC_SPECIFIC (0x0BD50000 | LOV_MAGIC_MAGIC)
-#define LOV_MAGIC LOV_MAGIC_V1
-
-/*
- * magic for fully defined striping
- * the idea is that we should have different magics for striping "hints"
- * (struct lov_user_md_v[13]) and defined ready-to-use striping (struct
- * lov_mds_md_v[13]). at the moment the magics are used in wire protocol,
- * we can't just change it w/o long way preparation, but we still need a
- * mechanism to allow LOD to differentiate hint versus ready striping.
- * so, at the moment we do a trick: MDT knows what to expect from request
- * depending on the case (replay uses ready striping, non-replay req uses
- * hints), so MDT replaces magic with appropriate one and now LOD can
- * easily understand what's inside -bzzz
- */
-#define LOV_MAGIC_V1_DEF 0x0CD10BD0
-#define LOV_MAGIC_V3_DEF 0x0CD30BD0
-
-#define lov_pattern(pattern) (pattern & ~LOV_PATTERN_F_MASK)
-#define lov_pattern_flags(pattern) (pattern & LOV_PATTERN_F_MASK)
-
-#define lov_ost_data lov_ost_data_v1
-struct lov_ost_data_v1 { /* per-stripe data structure (little-endian)*/
- struct ost_id l_ost_oi; /* OST object ID */
- __u32 l_ost_gen; /* generation of this l_ost_idx */
- __u32 l_ost_idx; /* OST index in LOV (lov_tgt_desc->tgts) */
-};
-
-#define lov_mds_md lov_mds_md_v1
-struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */
- __u32 lmm_magic; /* magic number = LOV_MAGIC_V1 */
- __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
- struct ost_id lmm_oi; /* LOV object ID */
- __u32 lmm_stripe_size; /* size of stripe in bytes */
- /* lmm_stripe_count used to be __u32 */
- __u16 lmm_stripe_count; /* num stripes in use for this object */
- __u16 lmm_layout_gen; /* layout generation number */
- struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
-};
-
-#define MAX_MD_SIZE \
- (sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data))
-#define MIN_MD_SIZE \
- (sizeof(struct lov_mds_md) + 1 * sizeof(struct lov_ost_data))
-
-#define XATTR_NAME_ACL_ACCESS "system.posix_acl_access"
-#define XATTR_NAME_ACL_DEFAULT "system.posix_acl_default"
-#define XATTR_USER_PREFIX "user."
-#define XATTR_TRUSTED_PREFIX "trusted."
-#define XATTR_SECURITY_PREFIX "security."
-#define XATTR_LUSTRE_PREFIX "lustre."
-
-#define XATTR_NAME_LOV "trusted.lov"
-#define XATTR_NAME_LMA "trusted.lma"
-#define XATTR_NAME_LMV "trusted.lmv"
-#define XATTR_NAME_DEFAULT_LMV "trusted.dmv"
-#define XATTR_NAME_LINK "trusted.link"
-#define XATTR_NAME_FID "trusted.fid"
-#define XATTR_NAME_VERSION "trusted.version"
-#define XATTR_NAME_SOM "trusted.som"
-#define XATTR_NAME_HSM "trusted.hsm"
-#define XATTR_NAME_LFSCK_NAMESPACE "trusted.lfsck_namespace"
-
-struct lov_mds_md_v3 { /* LOV EA mds/wire data (little-endian) */
- __u32 lmm_magic; /* magic number = LOV_MAGIC_V3 */
- __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
- struct ost_id lmm_oi; /* LOV object ID */
- __u32 lmm_stripe_size; /* size of stripe in bytes */
- /* lmm_stripe_count used to be __u32 */
- __u16 lmm_stripe_count; /* num stripes in use for this object */
- __u16 lmm_layout_gen; /* layout generation number */
- char lmm_pool_name[LOV_MAXPOOLNAME + 1]; /* must be 32bit aligned */
- struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
-};
-
-static inline __u32 lov_mds_md_size(__u16 stripes, __u32 lmm_magic)
-{
- if (lmm_magic == LOV_MAGIC_V3)
- return sizeof(struct lov_mds_md_v3) +
- stripes * sizeof(struct lov_ost_data_v1);
- else
- return sizeof(struct lov_mds_md_v1) +
- stripes * sizeof(struct lov_ost_data_v1);
-}
-
-static inline __u32
-lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic)
-{
- switch (lmm_magic) {
- case LOV_MAGIC_V1: {
- struct lov_mds_md_v1 lmm;
-
- if (buf_size < sizeof(lmm))
- return 0;
-
- return (buf_size - sizeof(lmm)) / sizeof(lmm.lmm_objects[0]);
- }
- case LOV_MAGIC_V3: {
- struct lov_mds_md_v3 lmm;
-
- if (buf_size < sizeof(lmm))
- return 0;
-
- return (buf_size - sizeof(lmm)) / sizeof(lmm.lmm_objects[0]);
- }
- default:
- return 0;
- }
-}
-
-#define OBD_MD_FLID (0x00000001ULL) /* object ID */
-#define OBD_MD_FLATIME (0x00000002ULL) /* access time */
-#define OBD_MD_FLMTIME (0x00000004ULL) /* data modification time */
-#define OBD_MD_FLCTIME (0x00000008ULL) /* change time */
-#define OBD_MD_FLSIZE (0x00000010ULL) /* size */
-#define OBD_MD_FLBLOCKS (0x00000020ULL) /* allocated blocks count */
-#define OBD_MD_FLBLKSZ (0x00000040ULL) /* block size */
-#define OBD_MD_FLMODE (0x00000080ULL) /* access bits (mode & ~S_IFMT) */
-#define OBD_MD_FLTYPE (0x00000100ULL) /* object type (mode & S_IFMT) */
-#define OBD_MD_FLUID (0x00000200ULL) /* user ID */
-#define OBD_MD_FLGID (0x00000400ULL) /* group ID */
-#define OBD_MD_FLFLAGS (0x00000800ULL) /* flags word */
-#define OBD_MD_FLNLINK (0x00002000ULL) /* link count */
-#define OBD_MD_FLGENER (0x00004000ULL) /* generation number */
-/*#define OBD_MD_FLINLINE (0x00008000ULL) inline data. used until 1.6.5 */
-#define OBD_MD_FLRDEV (0x00010000ULL) /* device number */
-#define OBD_MD_FLEASIZE (0x00020000ULL) /* extended attribute data */
-#define OBD_MD_LINKNAME (0x00040000ULL) /* symbolic link target */
-#define OBD_MD_FLHANDLE (0x00080000ULL) /* file/lock handle */
-#define OBD_MD_FLCKSUM (0x00100000ULL) /* bulk data checksum */
-#define OBD_MD_FLQOS (0x00200000ULL) /* quality of service stats */
-/*#define OBD_MD_FLOSCOPQ (0x00400000ULL) osc opaque data, never used */
-/* OBD_MD_FLCOOKIE (0x00800000ULL) obsolete in 2.8 */
-#define OBD_MD_FLGROUP (0x01000000ULL) /* group */
-#define OBD_MD_FLFID (0x02000000ULL) /* ->ost write inline fid */
-#define OBD_MD_FLEPOCH (0x04000000ULL) /* ->ost write with ioepoch */
- /* ->mds if epoch opens or closes
- */
-#define OBD_MD_FLGRANT (0x08000000ULL) /* ost preallocation space grant */
-#define OBD_MD_FLDIREA (0x10000000ULL) /* dir's extended attribute data */
-#define OBD_MD_FLUSRQUOTA (0x20000000ULL) /* over quota flags sent from ost */
-#define OBD_MD_FLGRPQUOTA (0x40000000ULL) /* over quota flags sent from ost */
-#define OBD_MD_FLMODEASIZE (0x80000000ULL) /* EA size will be changed */
-
-#define OBD_MD_MDS (0x0000000100000000ULL) /* where an inode lives on */
-#define OBD_MD_REINT (0x0000000200000000ULL) /* reintegrate oa */
-#define OBD_MD_MEA (0x0000000400000000ULL) /* CMD split EA */
-#define OBD_MD_TSTATE (0x0000000800000000ULL) /* transient state field */
-
-#define OBD_MD_FLXATTR (0x0000001000000000ULL) /* xattr */
-#define OBD_MD_FLXATTRLS (0x0000002000000000ULL) /* xattr list */
-#define OBD_MD_FLXATTRRM (0x0000004000000000ULL) /* xattr remove */
-#define OBD_MD_FLACL (0x0000008000000000ULL) /* ACL */
-/* OBD_MD_FLRMTPERM (0x0000010000000000ULL) remote perm, obsolete */
-#define OBD_MD_FLMDSCAPA (0x0000020000000000ULL) /* MDS capability */
-#define OBD_MD_FLOSSCAPA (0x0000040000000000ULL) /* OSS capability */
-#define OBD_MD_FLCKSPLIT (0x0000080000000000ULL) /* Check split on server */
-#define OBD_MD_FLCROSSREF (0x0000100000000000ULL) /* Cross-ref case */
-#define OBD_MD_FLGETATTRLOCK (0x0000200000000000ULL) /* Get IOEpoch attributes
- * under lock; for xattr
- * requests means the
- * client holds the lock
- */
-#define OBD_MD_FLOBJCOUNT (0x0000400000000000ULL) /* for multiple destroy */
-
-/* OBD_MD_FLRMTLSETFACL (0x0001000000000000ULL) lfs lsetfacl, obsolete */
-/* OBD_MD_FLRMTLGETFACL (0x0002000000000000ULL) lfs lgetfacl, obsolete */
-/* OBD_MD_FLRMTRSETFACL (0x0004000000000000ULL) lfs rsetfacl, obsolete */
-/* OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) lfs rgetfacl, obsolete */
-
-#define OBD_MD_FLDATAVERSION (0x0010000000000000ULL) /* iversion sum */
-#define OBD_MD_CLOSE_INTENT_EXECED (0x0020000000000000ULL) /* close intent
- * executed
- */
-
-#define OBD_MD_DEFAULT_MEA (0x0040000000000000ULL) /* default MEA */
-
-#define OBD_MD_FLGETATTR (OBD_MD_FLID | OBD_MD_FLATIME | OBD_MD_FLMTIME | \
- OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLBLKSZ | \
- OBD_MD_FLMODE | OBD_MD_FLTYPE | OBD_MD_FLUID | \
- OBD_MD_FLGID | OBD_MD_FLFLAGS | OBD_MD_FLNLINK | \
- OBD_MD_FLGENER | OBD_MD_FLRDEV | OBD_MD_FLGROUP)
-
-#define OBD_MD_FLXATTRALL (OBD_MD_FLXATTR | OBD_MD_FLXATTRLS)
-
-/* don't forget obdo_fid which is way down at the bottom so it can
- * come after the definition of llog_cookie
- */
-
-enum hss_valid {
- HSS_SETMASK = 0x01,
- HSS_CLEARMASK = 0x02,
- HSS_ARCHIVE_ID = 0x04,
-};
-
-struct hsm_state_set {
- __u32 hss_valid;
- __u32 hss_archive_id;
- __u64 hss_setmask;
- __u64 hss_clearmask;
-};
-
-/* ost_body.data values for OST_BRW */
-
-#define OBD_BRW_READ 0x01
-#define OBD_BRW_WRITE 0x02
-#define OBD_BRW_RWMASK (OBD_BRW_READ | OBD_BRW_WRITE)
-#define OBD_BRW_SYNC 0x08 /* this page is a part of synchronous
- * transfer and is not accounted in
- * the grant.
- */
-#define OBD_BRW_CHECK 0x10
-#define OBD_BRW_FROM_GRANT 0x20 /* the osc manages this under llite */
-#define OBD_BRW_GRANTED 0x40 /* the ost manages this */
-#define OBD_BRW_NOCACHE 0x80 /* this page is a part of non-cached IO */
-#define OBD_BRW_NOQUOTA 0x100
-#define OBD_BRW_SRVLOCK 0x200 /* Client holds no lock over this page */
-#define OBD_BRW_ASYNC 0x400 /* Server may delay commit to disk */
-#define OBD_BRW_MEMALLOC 0x800 /* Client runs in the "kswapd" context */
-#define OBD_BRW_OVER_USRQUOTA 0x1000 /* Running out of user quota */
-#define OBD_BRW_OVER_GRPQUOTA 0x2000 /* Running out of group quota */
-#define OBD_BRW_SOFT_SYNC 0x4000 /* This flag notifies the server
- * that the client is running low on
- * space for unstable pages; asking
- * it to sync quickly
- */
-
-#define OBD_OBJECT_EOF LUSTRE_EOF
-
-#define OST_MIN_PRECREATE 32
-#define OST_MAX_PRECREATE 20000
-
-struct obd_ioobj {
- struct ost_id ioo_oid; /* object ID, if multi-obj BRW */
- __u32 ioo_max_brw; /* low 16 bits were o_mode before 2.4,
- * now (PTLRPC_BULK_OPS_COUNT - 1) in
- * high 16 bits in 2.4 and later
- */
- __u32 ioo_bufcnt; /* number of niobufs for this object */
-};
-
-/*
- * NOTE: IOOBJ_MAX_BRW_BITS defines the _offset_ of the max_brw field in
- * ioo_max_brw, NOT the maximum number of bits in PTLRPC_BULK_OPS_BITS.
- * That said, ioo_max_brw is a 32-bit field so the limit is also 16 bits.
- */
-#define IOOBJ_MAX_BRW_BITS 16
-#define ioobj_max_brw_get(ioo) (((ioo)->ioo_max_brw >> IOOBJ_MAX_BRW_BITS) + 1)
-#define ioobj_max_brw_set(ioo, num) \
-do { (ioo)->ioo_max_brw = ((num) - 1) << IOOBJ_MAX_BRW_BITS; } while (0)
-
-/* multiple of 8 bytes => can array */
-struct niobuf_remote {
- __u64 rnb_offset;
- __u32 rnb_len;
- __u32 rnb_flags;
-};
-
-/* lock value block communicated between the filter and llite */
-
-/* OST_LVB_ERR_INIT is needed because the return code in rc is
- * negative, i.e. because ((MASK + rc) & MASK) != MASK.
- */
-#define OST_LVB_ERR_INIT 0xffbadbad80000000ULL
-#define OST_LVB_ERR_MASK 0xffbadbad00000000ULL
-#define OST_LVB_IS_ERR(blocks) \
- ((blocks & OST_LVB_ERR_MASK) == OST_LVB_ERR_MASK)
-#define OST_LVB_SET_ERR(blocks, rc) \
- do { blocks = OST_LVB_ERR_INIT + rc; } while (0)
-#define OST_LVB_GET_ERR(blocks) (int)(blocks - OST_LVB_ERR_INIT)
-
-struct ost_lvb_v1 {
- __u64 lvb_size;
- __s64 lvb_mtime;
- __s64 lvb_atime;
- __s64 lvb_ctime;
- __u64 lvb_blocks;
-};
-
-struct ost_lvb {
- __u64 lvb_size;
- __s64 lvb_mtime;
- __s64 lvb_atime;
- __s64 lvb_ctime;
- __u64 lvb_blocks;
- __u32 lvb_mtime_ns;
- __u32 lvb_atime_ns;
- __u32 lvb_ctime_ns;
- __u32 lvb_padding;
-};
-
-/*
- * lquota data structures
- */
-
-/* The lquota_id structure is a union of all the possible identifier types that
- * can be used with quota, this includes:
- * - 64-bit user ID
- * - 64-bit group ID
- * - a FID which can be used for per-directory quota in the future
- */
-union lquota_id {
- struct lu_fid qid_fid; /* FID for per-directory quota */
- __u64 qid_uid; /* user identifier */
- __u64 qid_gid; /* group identifier */
-};
-
-/* quotactl management */
-struct obd_quotactl {
- __u32 qc_cmd;
- __u32 qc_type; /* see Q_* flag below */
- __u32 qc_id;
- __u32 qc_stat;
- struct obd_dqinfo qc_dqinfo;
- struct obd_dqblk qc_dqblk;
-};
-
-#define Q_COPY(out, in, member) (out)->member = (in)->member
-
-#define QCTL_COPY(out, in) \
-do { \
- Q_COPY(out, in, qc_cmd); \
- Q_COPY(out, in, qc_type); \
- Q_COPY(out, in, qc_id); \
- Q_COPY(out, in, qc_stat); \
- Q_COPY(out, in, qc_dqinfo); \
- Q_COPY(out, in, qc_dqblk); \
-} while (0)
-
-/* Data structures associated with the quota locks */
-
-/* Glimpse descriptor used for the index & per-ID quota locks */
-struct ldlm_gl_lquota_desc {
- union lquota_id gl_id; /* quota ID subject to the glimpse */
- __u64 gl_flags; /* see LQUOTA_FL* below */
- __u64 gl_ver; /* new index version */
- __u64 gl_hardlimit; /* new hardlimit or qunit value */
- __u64 gl_softlimit; /* new softlimit */
- __u64 gl_time;
- __u64 gl_pad2;
-};
-
-/* quota glimpse flags */
-#define LQUOTA_FL_EDQUOT 0x1 /* user/group out of quota space on QMT */
-
-/* LVB used with quota (global and per-ID) locks */
-struct lquota_lvb {
- __u64 lvb_flags; /* see LQUOTA_FL* above */
- __u64 lvb_id_may_rel; /* space that might be released later */
- __u64 lvb_id_rel; /* space released by the slave for this ID */
- __u64 lvb_id_qunit; /* current qunit value */
- __u64 lvb_pad1;
-};
-
-/* op codes */
-enum quota_cmd {
- QUOTA_DQACQ = 601,
- QUOTA_DQREL = 602,
- QUOTA_LAST_OPC
-};
-#define QUOTA_FIRST_OPC QUOTA_DQACQ
-
-/*
- * MDS REQ RECORDS
- */
-
-/* opcodes */
-enum mds_cmd {
- MDS_GETATTR = 33,
- MDS_GETATTR_NAME = 34,
- MDS_CLOSE = 35,
- MDS_REINT = 36,
- MDS_READPAGE = 37,
- MDS_CONNECT = 38,
- MDS_DISCONNECT = 39,
- MDS_GETSTATUS = 40,
- MDS_STATFS = 41,
- MDS_PIN = 42, /* obsolete, never used in a release */
- MDS_UNPIN = 43, /* obsolete, never used in a release */
- MDS_SYNC = 44,
- MDS_DONE_WRITING = 45, /* obsolete since 2.8.0 */
- MDS_SET_INFO = 46,
- MDS_QUOTACHECK = 47, /* not used since 2.4 */
- MDS_QUOTACTL = 48,
- MDS_GETXATTR = 49,
- MDS_SETXATTR = 50, /* obsolete, now it's MDS_REINT op */
- MDS_WRITEPAGE = 51,
- MDS_IS_SUBDIR = 52, /* obsolete, never used in a release */
- MDS_GET_INFO = 53,
- MDS_HSM_STATE_GET = 54,
- MDS_HSM_STATE_SET = 55,
- MDS_HSM_ACTION = 56,
- MDS_HSM_PROGRESS = 57,
- MDS_HSM_REQUEST = 58,
- MDS_HSM_CT_REGISTER = 59,
- MDS_HSM_CT_UNREGISTER = 60,
- MDS_SWAP_LAYOUTS = 61,
- MDS_LAST_OPC
-};
-
-#define MDS_FIRST_OPC MDS_GETATTR
-
-/*
- * Do not exceed 63
- */
-
-enum mdt_reint_cmd {
- REINT_SETATTR = 1,
- REINT_CREATE = 2,
- REINT_LINK = 3,
- REINT_UNLINK = 4,
- REINT_RENAME = 5,
- REINT_OPEN = 6,
- REINT_SETXATTR = 7,
- REINT_RMENTRY = 8,
- REINT_MIGRATE = 9,
- REINT_MAX
-};
-
-/* the disposition of the intent outlines what was executed */
-#define DISP_IT_EXECD 0x00000001
-#define DISP_LOOKUP_EXECD 0x00000002
-#define DISP_LOOKUP_NEG 0x00000004
-#define DISP_LOOKUP_POS 0x00000008
-#define DISP_OPEN_CREATE 0x00000010
-#define DISP_OPEN_OPEN 0x00000020
-#define DISP_ENQ_COMPLETE 0x00400000 /* obsolete and unused */
-#define DISP_ENQ_OPEN_REF 0x00800000
-#define DISP_ENQ_CREATE_REF 0x01000000
-#define DISP_OPEN_LOCK 0x02000000
-#define DISP_OPEN_LEASE 0x04000000
-#define DISP_OPEN_STRIPE 0x08000000
-#define DISP_OPEN_DENY 0x10000000
-
-/* INODE LOCK PARTS */
-#define MDS_INODELOCK_LOOKUP 0x000001 /* For namespace, dentry etc, and also
- * was used to protect permission (mode,
- * owner, group etc) before 2.4.
- */
-#define MDS_INODELOCK_UPDATE 0x000002 /* size, links, timestamps */
-#define MDS_INODELOCK_OPEN 0x000004 /* For opened files */
-#define MDS_INODELOCK_LAYOUT 0x000008 /* for layout */
-
-/* The PERM bit is added int 2.4, and it is used to protect permission(mode,
- * owner, group, acl etc), so to separate the permission from LOOKUP lock.
- * Because for remote directories(in DNE), these locks will be granted by
- * different MDTs(different ldlm namespace).
- *
- * For local directory, MDT will always grant UPDATE_LOCK|PERM_LOCK together.
- * For Remote directory, the master MDT, where the remote directory is, will
- * grant UPDATE_LOCK|PERM_LOCK, and the remote MDT, where the name entry is,
- * will grant LOOKUP_LOCK.
- */
-#define MDS_INODELOCK_PERM 0x000010
-#define MDS_INODELOCK_XATTR 0x000020 /* extended attributes */
-
-#define MDS_INODELOCK_MAXSHIFT 5
-/* This FULL lock is useful to take on unlink sort of operations */
-#define MDS_INODELOCK_FULL ((1 << (MDS_INODELOCK_MAXSHIFT + 1)) - 1)
-
-/* NOTE: until Lustre 1.8.7/2.1.1 the fid_ver() was packed into name[2],
- * but was moved into name[1] along with the OID to avoid consuming the
- * name[2,3] fields that need to be used for the quota id (also a FID).
- */
-enum {
- LUSTRE_RES_ID_SEQ_OFF = 0,
- LUSTRE_RES_ID_VER_OID_OFF = 1,
- LUSTRE_RES_ID_WAS_VER_OFF = 2, /* see note above */
- LUSTRE_RES_ID_QUOTA_SEQ_OFF = 2,
- LUSTRE_RES_ID_QUOTA_VER_OID_OFF = 3,
- LUSTRE_RES_ID_HSH_OFF = 3
-};
-
-#define MDS_STATUS_CONN 1
-#define MDS_STATUS_LOV 2
-
-/* these should be identical to their EXT4_*_FL counterparts, they are
- * redefined here only to avoid dragging in fs/ext4/ext4.h
- */
-#define LUSTRE_SYNC_FL 0x00000008 /* Synchronous updates */
-#define LUSTRE_IMMUTABLE_FL 0x00000010 /* Immutable file */
-#define LUSTRE_APPEND_FL 0x00000020 /* writes to file may only append */
-#define LUSTRE_NODUMP_FL 0x00000040 /* do not dump file */
-#define LUSTRE_NOATIME_FL 0x00000080 /* do not update atime */
-#define LUSTRE_INDEX_FL 0x00001000 /* hash-indexed directory */
-#define LUSTRE_DIRSYNC_FL 0x00010000 /* dirsync behaviour (dir only) */
-#define LUSTRE_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
-#define LUSTRE_DIRECTIO_FL 0x00100000 /* Use direct i/o */
-#define LUSTRE_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
-
-/* Convert wire LUSTRE_*_FL to corresponding client local VFS S_* values
- * for the client inode i_flags. The LUSTRE_*_FL are the Lustre wire
- * protocol equivalents of LDISKFS_*_FL values stored on disk, while
- * the S_* flags are kernel-internal values that change between kernel
- * versions. These flags are set/cleared via FSFILT_IOC_{GET,SET}_FLAGS.
- * See b=16526 for a full history.
- */
-static inline int ll_ext_to_inode_flags(int flags)
-{
- return (((flags & LUSTRE_SYNC_FL) ? S_SYNC : 0) |
- ((flags & LUSTRE_NOATIME_FL) ? S_NOATIME : 0) |
- ((flags & LUSTRE_APPEND_FL) ? S_APPEND : 0) |
- ((flags & LUSTRE_DIRSYNC_FL) ? S_DIRSYNC : 0) |
- ((flags & LUSTRE_IMMUTABLE_FL) ? S_IMMUTABLE : 0));
-}
-
-static inline int ll_inode_to_ext_flags(int iflags)
-{
- return (((iflags & S_SYNC) ? LUSTRE_SYNC_FL : 0) |
- ((iflags & S_NOATIME) ? LUSTRE_NOATIME_FL : 0) |
- ((iflags & S_APPEND) ? LUSTRE_APPEND_FL : 0) |
- ((iflags & S_DIRSYNC) ? LUSTRE_DIRSYNC_FL : 0) |
- ((iflags & S_IMMUTABLE) ? LUSTRE_IMMUTABLE_FL : 0));
-}
-
-/* 64 possible states */
-enum md_transient_state {
- MS_RESTORE = (1 << 0), /* restore is running */
-};
-
-struct mdt_body {
- struct lu_fid mbo_fid1;
- struct lu_fid mbo_fid2;
- struct lustre_handle mbo_handle;
- __u64 mbo_valid;
- __u64 mbo_size; /* Offset, in the case of MDS_READPAGE */
- __s64 mbo_mtime;
- __s64 mbo_atime;
- __s64 mbo_ctime;
- __u64 mbo_blocks; /* XID, in the case of MDS_READPAGE */
- __u64 mbo_ioepoch;
- __u64 mbo_t_state; /* transient file state defined in
- * enum md_transient_state
- * was "ino" until 2.4.0
- */
- __u32 mbo_fsuid;
- __u32 mbo_fsgid;
- __u32 mbo_capability;
- __u32 mbo_mode;
- __u32 mbo_uid;
- __u32 mbo_gid;
- __u32 mbo_flags; /* LUSTRE_*_FL file attributes */
- __u32 mbo_rdev;
- __u32 mbo_nlink; /* #bytes to read in the case of MDS_READPAGE */
- __u32 mbo_unused2; /* was "generation" until 2.4.0 */
- __u32 mbo_suppgid;
- __u32 mbo_eadatasize;
- __u32 mbo_aclsize;
- __u32 mbo_max_mdsize;
- __u32 mbo_unused3; /* was max_cookiesize until 2.8 */
- __u32 mbo_uid_h; /* high 32-bits of uid, for FUID */
- __u32 mbo_gid_h; /* high 32-bits of gid, for FUID */
- __u32 mbo_padding_5; /* also fix lustre_swab_mdt_body */
- __u64 mbo_padding_6;
- __u64 mbo_padding_7;
- __u64 mbo_padding_8;
- __u64 mbo_padding_9;
- __u64 mbo_padding_10;
-}; /* 216 */
-
-struct mdt_ioepoch {
- struct lustre_handle mio_handle;
- __u64 mio_unused1; /* was ioepoch */
- __u32 mio_unused2; /* was flags */
- __u32 mio_padding;
-};
-
-/* permissions for md_perm.mp_perm */
-enum {
- CFS_SETUID_PERM = 0x01,
- CFS_SETGID_PERM = 0x02,
- CFS_SETGRP_PERM = 0x04,
-};
-
-struct mdt_rec_setattr {
- __u32 sa_opcode;
- __u32 sa_cap;
- __u32 sa_fsuid;
- __u32 sa_fsuid_h;
- __u32 sa_fsgid;
- __u32 sa_fsgid_h;
- __u32 sa_suppgid;
- __u32 sa_suppgid_h;
- __u32 sa_padding_1;
- __u32 sa_padding_1_h;
- struct lu_fid sa_fid;
- __u64 sa_valid;
- __u32 sa_uid;
- __u32 sa_gid;
- __u64 sa_size;
- __u64 sa_blocks;
- __s64 sa_mtime;
- __s64 sa_atime;
- __s64 sa_ctime;
- __u32 sa_attr_flags;
- __u32 sa_mode;
- __u32 sa_bias; /* some operation flags */
- __u32 sa_padding_3;
- __u32 sa_padding_4;
- __u32 sa_padding_5;
-};
-
-/*
- * Attribute flags used in mdt_rec_setattr::sa_valid.
- * The kernel's #defines for ATTR_* should not be used over the network
- * since the client and MDS may run different kernels (see bug 13828)
- * Therefore, we should only use MDS_ATTR_* attributes for sa_valid.
- */
-#define MDS_ATTR_MODE 0x1ULL /* = 1 */
-#define MDS_ATTR_UID 0x2ULL /* = 2 */
-#define MDS_ATTR_GID 0x4ULL /* = 4 */
-#define MDS_ATTR_SIZE 0x8ULL /* = 8 */
-#define MDS_ATTR_ATIME 0x10ULL /* = 16 */
-#define MDS_ATTR_MTIME 0x20ULL /* = 32 */
-#define MDS_ATTR_CTIME 0x40ULL /* = 64 */
-#define MDS_ATTR_ATIME_SET 0x80ULL /* = 128 */
-#define MDS_ATTR_MTIME_SET 0x100ULL /* = 256 */
-#define MDS_ATTR_FORCE 0x200ULL /* = 512, Not a change, but a change it */
-#define MDS_ATTR_ATTR_FLAG 0x400ULL /* = 1024 */
-#define MDS_ATTR_KILL_SUID 0x800ULL /* = 2048 */
-#define MDS_ATTR_KILL_SGID 0x1000ULL /* = 4096 */
-#define MDS_ATTR_CTIME_SET 0x2000ULL /* = 8192 */
-#define MDS_ATTR_FROM_OPEN 0x4000ULL /* = 16384, called from open path,
- * ie O_TRUNC
- */
-#define MDS_ATTR_BLOCKS 0x8000ULL /* = 32768 */
-
-#define MDS_FMODE_CLOSED 00000000
-#define MDS_FMODE_EXEC 00000004
-/* MDS_FMODE_EPOCH 01000000 obsolete since 2.8.0 */
-/* MDS_FMODE_TRUNC 02000000 obsolete since 2.8.0 */
-/* MDS_FMODE_SOM 04000000 obsolete since 2.8.0 */
-
-#define MDS_OPEN_CREATED 00000010
-#define MDS_OPEN_CROSS 00000020
-
-#define MDS_OPEN_CREAT 00000100
-#define MDS_OPEN_EXCL 00000200
-#define MDS_OPEN_TRUNC 00001000
-#define MDS_OPEN_APPEND 00002000
-#define MDS_OPEN_SYNC 00010000
-#define MDS_OPEN_DIRECTORY 00200000
-
-#define MDS_OPEN_BY_FID 040000000 /* open_by_fid for known object */
-#define MDS_OPEN_DELAY_CREATE 0100000000 /* delay initial object create */
-#define MDS_OPEN_OWNEROVERRIDE 0200000000 /* NFSD rw-reopen ro file for owner */
-#define MDS_OPEN_JOIN_FILE 0400000000 /* open for join file.
- * We do not support JOIN FILE
- * anymore, reserve this flags
- * just for preventing such bit
- * to be reused.
- */
-
-#define MDS_OPEN_LOCK 04000000000 /* This open requires open lock */
-#define MDS_OPEN_HAS_EA 010000000000 /* specify object create pattern */
-#define MDS_OPEN_HAS_OBJS 020000000000 /* Just set the EA the obj exist */
-#define MDS_OPEN_NORESTORE 0100000000000ULL /* Do not restore file at open */
-#define MDS_OPEN_NEWSTRIPE 0200000000000ULL /* New stripe needed (restripe or
- * hsm restore)
- */
-#define MDS_OPEN_VOLATILE 0400000000000ULL /* File is volatile = created
- * unlinked
- */
-#define MDS_OPEN_LEASE 01000000000000ULL /* Open the file and grant lease
- * delegation, succeed if it's not
- * being opened with conflict mode.
- */
-#define MDS_OPEN_RELEASE 02000000000000ULL /* Open the file for HSM release */
-
-#define MDS_OPEN_FL_INTERNAL (MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS | \
- MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK | \
- MDS_OPEN_BY_FID | MDS_OPEN_LEASE | \
- MDS_OPEN_RELEASE)
-
-enum mds_op_bias {
- MDS_CHECK_SPLIT = 1 << 0,
- MDS_CROSS_REF = 1 << 1,
- MDS_VTX_BYPASS = 1 << 2,
- MDS_PERM_BYPASS = 1 << 3,
-/* MDS_SOM = 1 << 4, obsolete since 2.8.0 */
- MDS_QUOTA_IGNORE = 1 << 5,
- MDS_CLOSE_CLEANUP = 1 << 6,
- MDS_KEEP_ORPHAN = 1 << 7,
- MDS_RECOV_OPEN = 1 << 8,
- MDS_DATA_MODIFIED = 1 << 9,
- MDS_CREATE_VOLATILE = 1 << 10,
- MDS_OWNEROVERRIDE = 1 << 11,
- MDS_HSM_RELEASE = 1 << 12,
- MDS_RENAME_MIGRATE = 1 << 13,
- MDS_CLOSE_LAYOUT_SWAP = 1 << 14,
-};
-
-/* instance of mdt_reint_rec */
-struct mdt_rec_create {
- __u32 cr_opcode;
- __u32 cr_cap;
- __u32 cr_fsuid;
- __u32 cr_fsuid_h;
- __u32 cr_fsgid;
- __u32 cr_fsgid_h;
- __u32 cr_suppgid1;
- __u32 cr_suppgid1_h;
- __u32 cr_suppgid2;
- __u32 cr_suppgid2_h;
- struct lu_fid cr_fid1;
- struct lu_fid cr_fid2;
- struct lustre_handle cr_old_handle; /* handle in case of open replay */
- __s64 cr_time;
- __u64 cr_rdev;
- __u64 cr_ioepoch;
- __u64 cr_padding_1; /* rr_blocks */
- __u32 cr_mode;
- __u32 cr_bias;
- /* use of helpers set/get_mrc_cr_flags() is needed to access
- * 64 bits cr_flags [cr_flags_l, cr_flags_h], this is done to
- * extend cr_flags size without breaking 1.8 compat
- */
- __u32 cr_flags_l; /* for use with open, low 32 bits */
- __u32 cr_flags_h; /* for use with open, high 32 bits */
- __u32 cr_umask; /* umask for create */
- __u32 cr_padding_4; /* rr_padding_4 */
-};
-
-/* instance of mdt_reint_rec */
-struct mdt_rec_link {
- __u32 lk_opcode;
- __u32 lk_cap;
- __u32 lk_fsuid;
- __u32 lk_fsuid_h;
- __u32 lk_fsgid;
- __u32 lk_fsgid_h;
- __u32 lk_suppgid1;
- __u32 lk_suppgid1_h;
- __u32 lk_suppgid2;
- __u32 lk_suppgid2_h;
- struct lu_fid lk_fid1;
- struct lu_fid lk_fid2;
- __s64 lk_time;
- __u64 lk_padding_1; /* rr_atime */
- __u64 lk_padding_2; /* rr_ctime */
- __u64 lk_padding_3; /* rr_size */
- __u64 lk_padding_4; /* rr_blocks */
- __u32 lk_bias;
- __u32 lk_padding_5; /* rr_mode */
- __u32 lk_padding_6; /* rr_flags */
- __u32 lk_padding_7; /* rr_padding_2 */
- __u32 lk_padding_8; /* rr_padding_3 */
- __u32 lk_padding_9; /* rr_padding_4 */
-};
-
-/* instance of mdt_reint_rec */
-struct mdt_rec_unlink {
- __u32 ul_opcode;
- __u32 ul_cap;
- __u32 ul_fsuid;
- __u32 ul_fsuid_h;
- __u32 ul_fsgid;
- __u32 ul_fsgid_h;
- __u32 ul_suppgid1;
- __u32 ul_suppgid1_h;
- __u32 ul_suppgid2;
- __u32 ul_suppgid2_h;
- struct lu_fid ul_fid1;
- struct lu_fid ul_fid2;
- __s64 ul_time;
- __u64 ul_padding_2; /* rr_atime */
- __u64 ul_padding_3; /* rr_ctime */
- __u64 ul_padding_4; /* rr_size */
- __u64 ul_padding_5; /* rr_blocks */
- __u32 ul_bias;
- __u32 ul_mode;
- __u32 ul_padding_6; /* rr_flags */
- __u32 ul_padding_7; /* rr_padding_2 */
- __u32 ul_padding_8; /* rr_padding_3 */
- __u32 ul_padding_9; /* rr_padding_4 */
-};
-
-/* instance of mdt_reint_rec */
-struct mdt_rec_rename {
- __u32 rn_opcode;
- __u32 rn_cap;
- __u32 rn_fsuid;
- __u32 rn_fsuid_h;
- __u32 rn_fsgid;
- __u32 rn_fsgid_h;
- __u32 rn_suppgid1;
- __u32 rn_suppgid1_h;
- __u32 rn_suppgid2;
- __u32 rn_suppgid2_h;
- struct lu_fid rn_fid1;
- struct lu_fid rn_fid2;
- __s64 rn_time;
- __u64 rn_padding_1; /* rr_atime */
- __u64 rn_padding_2; /* rr_ctime */
- __u64 rn_padding_3; /* rr_size */
- __u64 rn_padding_4; /* rr_blocks */
- __u32 rn_bias; /* some operation flags */
- __u32 rn_mode; /* cross-ref rename has mode */
- __u32 rn_padding_5; /* rr_flags */
- __u32 rn_padding_6; /* rr_padding_2 */
- __u32 rn_padding_7; /* rr_padding_3 */
- __u32 rn_padding_8; /* rr_padding_4 */
-};
-
-/* instance of mdt_reint_rec */
-struct mdt_rec_setxattr {
- __u32 sx_opcode;
- __u32 sx_cap;
- __u32 sx_fsuid;
- __u32 sx_fsuid_h;
- __u32 sx_fsgid;
- __u32 sx_fsgid_h;
- __u32 sx_suppgid1;
- __u32 sx_suppgid1_h;
- __u32 sx_suppgid2;
- __u32 sx_suppgid2_h;
- struct lu_fid sx_fid;
- __u64 sx_padding_1; /* These three are rr_fid2 */
- __u32 sx_padding_2;
- __u32 sx_padding_3;
- __u64 sx_valid;
- __s64 sx_time;
- __u64 sx_padding_5; /* rr_ctime */
- __u64 sx_padding_6; /* rr_size */
- __u64 sx_padding_7; /* rr_blocks */
- __u32 sx_size;
- __u32 sx_flags;
- __u32 sx_padding_8; /* rr_flags */
- __u32 sx_padding_9; /* rr_padding_2 */
- __u32 sx_padding_10; /* rr_padding_3 */
- __u32 sx_padding_11; /* rr_padding_4 */
-};
-
-/*
- * mdt_rec_reint is the template for all mdt_reint_xxx structures.
- * Do NOT change the size of various members, otherwise the value
- * will be broken in lustre_swab_mdt_rec_reint().
- *
- * If you add new members in other mdt_reint_xxx structures and need to use the
- * rr_padding_x fields, then update lustre_swab_mdt_rec_reint() also.
- */
-struct mdt_rec_reint {
- __u32 rr_opcode;
- __u32 rr_cap;
- __u32 rr_fsuid;
- __u32 rr_fsuid_h;
- __u32 rr_fsgid;
- __u32 rr_fsgid_h;
- __u32 rr_suppgid1;
- __u32 rr_suppgid1_h;
- __u32 rr_suppgid2;
- __u32 rr_suppgid2_h;
- struct lu_fid rr_fid1;
- struct lu_fid rr_fid2;
- __s64 rr_mtime;
- __s64 rr_atime;
- __s64 rr_ctime;
- __u64 rr_size;
- __u64 rr_blocks;
- __u32 rr_bias;
- __u32 rr_mode;
- __u32 rr_flags;
- __u32 rr_flags_h;
- __u32 rr_umask;
- __u32 rr_padding_4; /* also fix lustre_swab_mdt_rec_reint */
-};
-
-/* lmv structures */
-struct lmv_desc {
- __u32 ld_tgt_count; /* how many MDS's */
- __u32 ld_active_tgt_count; /* how many active */
- __u32 ld_default_stripe_count; /* how many objects are used */
- __u32 ld_pattern; /* default hash pattern */
- __u64 ld_default_hash_size;
- __u64 ld_padding_1; /* also fix lustre_swab_lmv_desc */
- __u32 ld_padding_2; /* also fix lustre_swab_lmv_desc */
- __u32 ld_qos_maxage; /* in second */
- __u32 ld_padding_3; /* also fix lustre_swab_lmv_desc */
- __u32 ld_padding_4; /* also fix lustre_swab_lmv_desc */
- struct obd_uuid ld_uuid;
-};
-
-/* LMV layout EA, and it will be stored both in master and slave object */
-struct lmv_mds_md_v1 {
- __u32 lmv_magic;
- __u32 lmv_stripe_count;
- __u32 lmv_master_mdt_index; /* On master object, it is master
- * MDT index, on slave object, it
- * is stripe index of the slave obj
- */
- __u32 lmv_hash_type; /* dir stripe policy, i.e. indicate
- * which hash function to be used,
- * Note: only lower 16 bits is being
- * used for now. Higher 16 bits will
- * be used to mark the object status,
- * for example migrating or dead.
- */
- __u32 lmv_layout_version; /* Used for directory restriping */
- __u32 lmv_padding1;
- __u64 lmv_padding2;
- __u64 lmv_padding3;
- char lmv_pool_name[LOV_MAXPOOLNAME + 1];/* pool name */
- struct lu_fid lmv_stripe_fids[0]; /* FIDs for each stripe */
-};
-
-#define LMV_MAGIC_V1 0x0CD20CD0 /* normal stripe lmv magic */
-#define LMV_MAGIC LMV_MAGIC_V1
-
-/* #define LMV_USER_MAGIC 0x0CD30CD0 */
-#define LMV_MAGIC_STRIPE 0x0CD40CD0 /* magic for dir sub_stripe */
-
-/*
- *Right now only the lower part(0-16bits) of lmv_hash_type is being used,
- * and the higher part will be the flag to indicate the status of object,
- * for example the object is being migrated. And the hash function
- * might be interpreted differently with different flags.
- */
-#define LMV_HASH_TYPE_MASK 0x0000ffff
-
-#define LMV_HASH_FLAG_MIGRATION 0x80000000
-#define LMV_HASH_FLAG_DEAD 0x40000000
-
-/**
- * The FNV-1a hash algorithm is as follows:
- * hash = FNV_offset_basis
- * for each octet_of_data to be hashed
- * hash = hash XOR octet_of_data
- * hash = hash × FNV_prime
- * return hash
- * http://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function#FNV-1a_hash
- *
- * http://www.isthe.com/chongo/tech/comp/fnv/index.html#FNV-reference-source
- * FNV_prime is 2^40 + 2^8 + 0xb3 = 0x100000001b3ULL
- **/
-#define LUSTRE_FNV_1A_64_PRIME 0x100000001b3ULL
-#define LUSTRE_FNV_1A_64_OFFSET_BIAS 0xcbf29ce484222325ULL
-static inline __u64 lustre_hash_fnv_1a_64(const void *buf, size_t size)
-{
- __u64 hash = LUSTRE_FNV_1A_64_OFFSET_BIAS;
- const unsigned char *p = buf;
- size_t i;
-
- for (i = 0; i < size; i++) {
- hash ^= p[i];
- hash *= LUSTRE_FNV_1A_64_PRIME;
- }
-
- return hash;
-}
-
-union lmv_mds_md {
- __u32 lmv_magic;
- struct lmv_mds_md_v1 lmv_md_v1;
- struct lmv_user_md lmv_user_md;
-};
-
-static inline ssize_t lmv_mds_md_size(int stripe_count, unsigned int lmm_magic)
-{
- ssize_t len = -EINVAL;
-
- switch (lmm_magic) {
- case LMV_MAGIC_V1: {
- struct lmv_mds_md_v1 *lmm1;
-
- len = sizeof(*lmm1);
- len += stripe_count * sizeof(lmm1->lmv_stripe_fids[0]);
- break; }
- default:
- break;
- }
- return len;
-}
-
-static inline int lmv_mds_md_stripe_count_get(const union lmv_mds_md *lmm)
-{
- switch (__le32_to_cpu(lmm->lmv_magic)) {
- case LMV_MAGIC_V1:
- return __le32_to_cpu(lmm->lmv_md_v1.lmv_stripe_count);
- case LMV_USER_MAGIC:
- return __le32_to_cpu(lmm->lmv_user_md.lum_stripe_count);
- default:
- return -EINVAL;
- }
-}
-
-enum fld_rpc_opc {
- FLD_QUERY = 900,
- FLD_READ = 901,
- FLD_LAST_OPC,
- FLD_FIRST_OPC = FLD_QUERY
-};
-
-enum seq_rpc_opc {
- SEQ_QUERY = 700,
- SEQ_LAST_OPC,
- SEQ_FIRST_OPC = SEQ_QUERY
-};
-
-enum seq_op {
- SEQ_ALLOC_SUPER = 0,
- SEQ_ALLOC_META = 1
-};
-
-enum fld_op {
- FLD_CREATE = 0,
- FLD_DELETE = 1,
- FLD_LOOKUP = 2,
-};
-
-/*
- * LOV data structures
- */
-
-#define LOV_MAX_UUID_BUFFER_SIZE 8192
-/* The size of the buffer the lov/mdc reserves for the
- * array of UUIDs returned by the MDS. With the current
- * protocol, this will limit the max number of OSTs per LOV
- */
-
-#define LOV_DESC_MAGIC 0xB0CCDE5C
-#define LOV_DESC_QOS_MAXAGE_DEFAULT 5 /* Seconds */
-#define LOV_DESC_STRIPE_SIZE_DEFAULT (1 << LNET_MTU_BITS)
-
-/* LOV settings descriptor (should only contain static info) */
-struct lov_desc {
- __u32 ld_tgt_count; /* how many OBD's */
- __u32 ld_active_tgt_count; /* how many active */
- __u32 ld_default_stripe_count; /* how many objects are used */
- __u32 ld_pattern; /* default PATTERN_RAID0 */
- __u64 ld_default_stripe_size; /* in bytes */
- __u64 ld_default_stripe_offset; /* in bytes */
- __u32 ld_padding_0; /* unused */
- __u32 ld_qos_maxage; /* in second */
- __u32 ld_padding_1; /* also fix lustre_swab_lov_desc */
- __u32 ld_padding_2; /* also fix lustre_swab_lov_desc */
- struct obd_uuid ld_uuid;
-};
-
-#define ld_magic ld_active_tgt_count /* for swabbing from llogs */
-
-/*
- * LDLM requests:
- */
-/* opcodes -- MUST be distinct from OST/MDS opcodes */
-enum ldlm_cmd {
- LDLM_ENQUEUE = 101,
- LDLM_CONVERT = 102,
- LDLM_CANCEL = 103,
- LDLM_BL_CALLBACK = 104,
- LDLM_CP_CALLBACK = 105,
- LDLM_GL_CALLBACK = 106,
- LDLM_SET_INFO = 107,
- LDLM_LAST_OPC
-};
-#define LDLM_FIRST_OPC LDLM_ENQUEUE
-
-#define RES_NAME_SIZE 4
-struct ldlm_res_id {
- __u64 name[RES_NAME_SIZE];
-};
-
-#define DLDLMRES "[%#llx:%#llx:%#llx].%llx"
-#define PLDLMRES(res) (res)->lr_name.name[0], (res)->lr_name.name[1], \
- (res)->lr_name.name[2], (res)->lr_name.name[3]
-
-/* lock types */
-enum ldlm_mode {
- LCK_MINMODE = 0,
- LCK_EX = 1,
- LCK_PW = 2,
- LCK_PR = 4,
- LCK_CW = 8,
- LCK_CR = 16,
- LCK_NL = 32,
- LCK_GROUP = 64,
- LCK_COS = 128,
- LCK_MAXMODE
-};
-
-#define LCK_MODE_NUM 8
-
-enum ldlm_type {
- LDLM_PLAIN = 10,
- LDLM_EXTENT = 11,
- LDLM_FLOCK = 12,
- LDLM_IBITS = 13,
- LDLM_MAX_TYPE
-};
-
-#define LDLM_MIN_TYPE LDLM_PLAIN
-
-struct ldlm_extent {
- __u64 start;
- __u64 end;
- __u64 gid;
-};
-
-struct ldlm_inodebits {
- __u64 bits;
-};
-
-struct ldlm_flock_wire {
- __u64 lfw_start;
- __u64 lfw_end;
- __u64 lfw_owner;
- __u32 lfw_padding;
- __u32 lfw_pid;
-};
-
-/* it's important that the fields of the ldlm_extent structure match
- * the first fields of the ldlm_flock structure because there is only
- * one ldlm_swab routine to process the ldlm_policy_data_t union. if
- * this ever changes we will need to swab the union differently based
- * on the resource type.
- */
-
-union ldlm_wire_policy_data {
- struct ldlm_extent l_extent;
- struct ldlm_flock_wire l_flock;
- struct ldlm_inodebits l_inodebits;
-};
-
-union ldlm_gl_desc {
- struct ldlm_gl_lquota_desc lquota_desc;
-};
-
-enum ldlm_intent_flags {
- IT_OPEN = 0x00000001,
- IT_CREAT = 0x00000002,
- IT_OPEN_CREAT = 0x00000003,
- IT_READDIR = 0x00000004,
- IT_GETATTR = 0x00000008,
- IT_LOOKUP = 0x00000010,
- IT_UNLINK = 0x00000020,
- IT_TRUNC = 0x00000040,
- IT_GETXATTR = 0x00000080,
- IT_EXEC = 0x00000100,
- IT_PIN = 0x00000200,
- IT_LAYOUT = 0x00000400,
- IT_QUOTA_DQACQ = 0x00000800,
- IT_QUOTA_CONN = 0x00001000,
- IT_SETXATTR = 0x00002000,
-};
-
-struct ldlm_intent {
- __u64 opc;
-};
-
-struct ldlm_resource_desc {
- enum ldlm_type lr_type;
- __u32 lr_padding; /* also fix lustre_swab_ldlm_resource_desc */
- struct ldlm_res_id lr_name;
-};
-
-struct ldlm_lock_desc {
- struct ldlm_resource_desc l_resource;
- enum ldlm_mode l_req_mode;
- enum ldlm_mode l_granted_mode;
- union ldlm_wire_policy_data l_policy_data;
-};
-
-#define LDLM_LOCKREQ_HANDLES 2
-#define LDLM_ENQUEUE_CANCEL_OFF 1
-
-struct ldlm_request {
- __u32 lock_flags;
- __u32 lock_count;
- struct ldlm_lock_desc lock_desc;
- struct lustre_handle lock_handle[LDLM_LOCKREQ_HANDLES];
-};
-
-struct ldlm_reply {
- __u32 lock_flags;
- __u32 lock_padding; /* also fix lustre_swab_ldlm_reply */
- struct ldlm_lock_desc lock_desc;
- struct lustre_handle lock_handle;
- __u64 lock_policy_res1;
- __u64 lock_policy_res2;
-};
-
-#define ldlm_flags_to_wire(flags) ((__u32)(flags))
-#define ldlm_flags_from_wire(flags) ((__u64)(flags))
-
-/*
- * Opcodes for mountconf (mgs and mgc)
- */
-enum mgs_cmd {
- MGS_CONNECT = 250,
- MGS_DISCONNECT,
- MGS_EXCEPTION, /* node died, etc. */
- MGS_TARGET_REG, /* whenever target starts up */
- MGS_TARGET_DEL,
- MGS_SET_INFO,
- MGS_CONFIG_READ,
- MGS_LAST_OPC
-};
-#define MGS_FIRST_OPC MGS_CONNECT
-
-#define MGS_PARAM_MAXLEN 1024
-#define KEY_SET_INFO "set_info"
-
-struct mgs_send_param {
- char mgs_param[MGS_PARAM_MAXLEN];
-};
-
-/* We pass this info to the MGS so it can write config logs */
-#define MTI_NAME_MAXLEN 64
-#define MTI_PARAM_MAXLEN 4096
-#define MTI_NIDS_MAX 32
-struct mgs_target_info {
- __u32 mti_lustre_ver;
- __u32 mti_stripe_index;
- __u32 mti_config_ver;
- __u32 mti_flags;
- __u32 mti_nid_count;
- __u32 mti_instance; /* Running instance of target */
- char mti_fsname[MTI_NAME_MAXLEN];
- char mti_svname[MTI_NAME_MAXLEN];
- char mti_uuid[sizeof(struct obd_uuid)];
- __u64 mti_nids[MTI_NIDS_MAX]; /* host nids (lnet_nid_t)*/
- char mti_params[MTI_PARAM_MAXLEN];
-};
-
-struct mgs_nidtbl_entry {
- __u64 mne_version; /* table version of this entry */
- __u32 mne_instance; /* target instance # */
- __u32 mne_index; /* target index */
- __u32 mne_length; /* length of this entry - by bytes */
- __u8 mne_type; /* target type LDD_F_SV_TYPE_OST/MDT */
- __u8 mne_nid_type; /* type of nid(mbz). for ipv6. */
- __u8 mne_nid_size; /* size of each NID, by bytes */
- __u8 mne_nid_count; /* # of NIDs in buffer */
- union {
- lnet_nid_t nids[0]; /* variable size buffer for NIDs. */
- } u;
-};
-
-struct mgs_config_body {
- char mcb_name[MTI_NAME_MAXLEN]; /* logname */
- __u64 mcb_offset; /* next index of config log to request */
- __u16 mcb_type; /* type of log: CONFIG_T_[CONFIG|RECOVER] */
- __u8 mcb_reserved;
- __u8 mcb_bits; /* bits unit size of config log */
- __u32 mcb_units; /* # of units for bulk transfer */
-};
-
-struct mgs_config_res {
- __u64 mcr_offset; /* index of last config log */
- __u64 mcr_size; /* size of the log */
-};
-
-/* Config marker flags (in config log) */
-#define CM_START 0x01
-#define CM_END 0x02
-#define CM_SKIP 0x04
-#define CM_UPGRADE146 0x08
-#define CM_EXCLUDE 0x10
-#define CM_START_SKIP (CM_START | CM_SKIP)
-
-struct cfg_marker {
- __u32 cm_step; /* aka config version */
- __u32 cm_flags;
- __u32 cm_vers; /* lustre release version number */
- __u32 cm_padding; /* 64 bit align */
- __s64 cm_createtime; /*when this record was first created */
- __s64 cm_canceltime; /*when this record is no longer valid*/
- char cm_tgtname[MTI_NAME_MAXLEN];
- char cm_comment[MTI_NAME_MAXLEN];
-};
-
-/*
- * Opcodes for multiple servers.
- */
-
-enum obd_cmd {
- OBD_PING = 400,
- OBD_LOG_CANCEL,
- OBD_QC_CALLBACK, /* not used since 2.4 */
- OBD_IDX_READ,
- OBD_LAST_OPC
-};
-#define OBD_FIRST_OPC OBD_PING
-
-/**
- * llog contexts indices.
- *
- * There is compatibility problem with indexes below, they are not
- * continuous and must keep their numbers for compatibility needs.
- * See LU-5218 for details.
- */
-enum llog_ctxt_id {
- LLOG_CONFIG_ORIG_CTXT = 0,
- LLOG_CONFIG_REPL_CTXT = 1,
- LLOG_MDS_OST_ORIG_CTXT = 2,
- LLOG_MDS_OST_REPL_CTXT = 3, /* kept just to avoid re-assignment */
- LLOG_SIZE_ORIG_CTXT = 4,
- LLOG_SIZE_REPL_CTXT = 5,
- LLOG_TEST_ORIG_CTXT = 8,
- LLOG_TEST_REPL_CTXT = 9, /* kept just to avoid re-assignment */
- LLOG_CHANGELOG_ORIG_CTXT = 12, /**< changelog generation on mdd */
- LLOG_CHANGELOG_REPL_CTXT = 13, /**< changelog access on clients */
- /* for multiple changelog consumers */
- LLOG_CHANGELOG_USER_ORIG_CTXT = 14,
- LLOG_AGENT_ORIG_CTXT = 15, /**< agent requests generation on cdt */
- LLOG_MAX_CTXTS
-};
-
-/** Identifier for a single log object */
-struct llog_logid {
- struct ost_id lgl_oi;
- __u32 lgl_ogen;
-} __packed;
-
-/** Records written to the CATALOGS list */
-#define CATLIST "CATALOGS"
-struct llog_catid {
- struct llog_logid lci_logid;
- __u32 lci_padding1;
- __u32 lci_padding2;
- __u32 lci_padding3;
-} __packed;
-
-/* Log data record types - there is no specific reason that these need to
- * be related to the RPC opcodes, but no reason not to (may be handy later?)
- */
-#define LLOG_OP_MAGIC 0x10600000
-#define LLOG_OP_MASK 0xfff00000
-
-enum llog_op_type {
- LLOG_PAD_MAGIC = LLOG_OP_MAGIC | 0x00000,
- OST_SZ_REC = LLOG_OP_MAGIC | 0x00f00,
- /* OST_RAID1_REC = LLOG_OP_MAGIC | 0x01000, never used */
- MDS_UNLINK_REC = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) |
- REINT_UNLINK, /* obsolete after 2.5.0 */
- MDS_UNLINK64_REC = LLOG_OP_MAGIC | 0x90000 | (MDS_REINT << 8) |
- REINT_UNLINK,
- /* MDS_SETATTR_REC = LLOG_OP_MAGIC | 0x12401, obsolete 1.8.0 */
- MDS_SETATTR64_REC = LLOG_OP_MAGIC | 0x90000 | (MDS_REINT << 8) |
- REINT_SETATTR,
- OBD_CFG_REC = LLOG_OP_MAGIC | 0x20000,
- /* PTL_CFG_REC = LLOG_OP_MAGIC | 0x30000, obsolete 1.4.0 */
- LLOG_GEN_REC = LLOG_OP_MAGIC | 0x40000,
- /* LLOG_JOIN_REC = LLOG_OP_MAGIC | 0x50000, obsolete 1.8.0 */
- CHANGELOG_REC = LLOG_OP_MAGIC | 0x60000,
- CHANGELOG_USER_REC = LLOG_OP_MAGIC | 0x70000,
- HSM_AGENT_REC = LLOG_OP_MAGIC | 0x80000,
- LLOG_HDR_MAGIC = LLOG_OP_MAGIC | 0x45539,
- LLOG_LOGID_MAGIC = LLOG_OP_MAGIC | 0x4553b,
-};
-
-#define LLOG_REC_HDR_NEEDS_SWABBING(r) \
- (((r)->lrh_type & __swab32(LLOG_OP_MASK)) == __swab32(LLOG_OP_MAGIC))
-
-/** Log record header - stored in little endian order.
- * Each record must start with this struct, end with a llog_rec_tail,
- * and be a multiple of 256 bits in size.
- */
-struct llog_rec_hdr {
- __u32 lrh_len;
- __u32 lrh_index;
- __u32 lrh_type;
- __u32 lrh_id;
-};
-
-struct llog_rec_tail {
- __u32 lrt_len;
- __u32 lrt_index;
-};
-
-/* Where data follow just after header */
-#define REC_DATA(ptr) \
- ((void *)((char *)ptr + sizeof(struct llog_rec_hdr)))
-
-#define REC_DATA_LEN(rec) \
- (rec->lrh_len - sizeof(struct llog_rec_hdr) - \
- sizeof(struct llog_rec_tail))
-
-struct llog_logid_rec {
- struct llog_rec_hdr lid_hdr;
- struct llog_logid lid_id;
- __u32 lid_padding1;
- __u64 lid_padding2;
- __u64 lid_padding3;
- struct llog_rec_tail lid_tail;
-} __packed;
-
-struct llog_unlink_rec {
- struct llog_rec_hdr lur_hdr;
- __u64 lur_oid;
- __u32 lur_oseq;
- __u32 lur_count;
- struct llog_rec_tail lur_tail;
-} __packed;
-
-struct llog_unlink64_rec {
- struct llog_rec_hdr lur_hdr;
- struct lu_fid lur_fid;
- __u32 lur_count; /* to destroy the lost precreated */
- __u32 lur_padding1;
- __u64 lur_padding2;
- __u64 lur_padding3;
- struct llog_rec_tail lur_tail;
-} __packed;
-
-struct llog_setattr64_rec {
- struct llog_rec_hdr lsr_hdr;
- struct ost_id lsr_oi;
- __u32 lsr_uid;
- __u32 lsr_uid_h;
- __u32 lsr_gid;
- __u32 lsr_gid_h;
- __u64 lsr_valid;
- struct llog_rec_tail lsr_tail;
-} __packed;
-
-struct llog_size_change_rec {
- struct llog_rec_hdr lsc_hdr;
- struct ll_fid lsc_fid;
- __u32 lsc_ioepoch;
- __u32 lsc_padding1;
- __u64 lsc_padding2;
- __u64 lsc_padding3;
- struct llog_rec_tail lsc_tail;
-} __packed;
-
-/* changelog llog name, needed by client replicators */
-#define CHANGELOG_CATALOG "changelog_catalog"
-
-struct changelog_setinfo {
- __u64 cs_recno;
- __u32 cs_id;
-} __packed;
-
-/** changelog record */
-struct llog_changelog_rec {
- struct llog_rec_hdr cr_hdr;
- struct changelog_rec cr; /**< Variable length field */
- struct llog_rec_tail cr_do_not_use; /**< for_sizezof_only */
-} __packed;
-
-struct llog_changelog_user_rec {
- struct llog_rec_hdr cur_hdr;
- __u32 cur_id;
- __u32 cur_padding;
- __u64 cur_endrec;
- struct llog_rec_tail cur_tail;
-} __packed;
-
-enum agent_req_status {
- ARS_WAITING,
- ARS_STARTED,
- ARS_FAILED,
- ARS_CANCELED,
- ARS_SUCCEED,
-};
-
-static inline const char *agent_req_status2name(const enum agent_req_status ars)
-{
- switch (ars) {
- case ARS_WAITING:
- return "WAITING";
- case ARS_STARTED:
- return "STARTED";
- case ARS_FAILED:
- return "FAILED";
- case ARS_CANCELED:
- return "CANCELED";
- case ARS_SUCCEED:
- return "SUCCEED";
- default:
- return "UNKNOWN";
- }
-}
-
-struct llog_agent_req_rec {
- struct llog_rec_hdr arr_hdr; /**< record header */
- __u32 arr_status; /**< status of the request */
- /* must match enum
- * agent_req_status
- */
- __u32 arr_archive_id; /**< backend archive number */
- __u64 arr_flags; /**< req flags */
- __u64 arr_compound_id;/**< compound cookie */
- __u64 arr_req_create; /**< req. creation time */
- __u64 arr_req_change; /**< req. status change time */
- struct hsm_action_item arr_hai; /**< req. to the agent */
- struct llog_rec_tail arr_tail; /**< record tail for_sizezof_only */
-} __packed;
-
-/* Old llog gen for compatibility */
-struct llog_gen {
- __u64 mnt_cnt;
- __u64 conn_cnt;
-} __packed;
-
-struct llog_gen_rec {
- struct llog_rec_hdr lgr_hdr;
- struct llog_gen lgr_gen;
- __u64 padding1;
- __u64 padding2;
- __u64 padding3;
- struct llog_rec_tail lgr_tail;
-};
-
-/* flags for the logs */
-enum llog_flag {
- LLOG_F_ZAP_WHEN_EMPTY = 0x1,
- LLOG_F_IS_CAT = 0x2,
- LLOG_F_IS_PLAIN = 0x4,
- LLOG_F_EXT_JOBID = 0x8,
- LLOG_F_IS_FIXSIZE = 0x10,
-
- /*
- * Note: Flags covered by LLOG_F_EXT_MASK will be inherited from
- * catlog to plain log, so do not add LLOG_F_IS_FIXSIZE here,
- * because the catlog record is usually fixed size, but its plain
- * log record can be variable
- */
- LLOG_F_EXT_MASK = LLOG_F_EXT_JOBID,
-};
-
-/* On-disk header structure of each log object, stored in little endian order */
-#define LLOG_MIN_CHUNK_SIZE 8192
-#define LLOG_HEADER_SIZE (96) /* sizeof (llog_log_hdr) +
- * sizeof(llh_tail) - sizeof(llh_bitmap)
- */
-#define LLOG_BITMAP_BYTES (LLOG_MIN_CHUNK_SIZE - LLOG_HEADER_SIZE)
-#define LLOG_MIN_REC_SIZE (24) /* round(llog_rec_hdr + llog_rec_tail) */
-
-/* flags for the logs */
-struct llog_log_hdr {
- struct llog_rec_hdr llh_hdr;
- __s64 llh_timestamp;
- __u32 llh_count;
- __u32 llh_bitmap_offset;
- __u32 llh_size;
- __u32 llh_flags;
- __u32 llh_cat_idx;
- /* for a catalog the first plain slot is next to it */
- struct obd_uuid llh_tgtuuid;
- __u32 llh_reserved[LLOG_HEADER_SIZE / sizeof(__u32) - 23];
- /* These fields must always be at the end of the llog_log_hdr.
- * Note: llh_bitmap size is variable because llog chunk size could be
- * bigger than LLOG_MIN_CHUNK_SIZE, i.e. sizeof(llog_log_hdr) > 8192
- * bytes, and the real size is stored in llh_hdr.lrh_len, which means
- * llh_tail should only be referred by LLOG_HDR_TAIL().
- * But this structure is also used by client/server llog interface
- * (see llog_client.c), it will be kept in its original way to avoid
- * compatibility issue.
- */
- __u32 llh_bitmap[LLOG_BITMAP_BYTES / sizeof(__u32)];
- struct llog_rec_tail llh_tail;
-} __packed;
-
-#undef LLOG_HEADER_SIZE
-#undef LLOG_BITMAP_BYTES
-
-#define LLOG_HDR_BITMAP_SIZE(llh) (__u32)((llh->llh_hdr.lrh_len - \
- llh->llh_bitmap_offset - \
- sizeof(llh->llh_tail)) * 8)
-#define LLOG_HDR_BITMAP(llh) (__u32 *)((char *)(llh) + \
- (llh)->llh_bitmap_offset)
-#define LLOG_HDR_TAIL(llh) ((struct llog_rec_tail *)((char *)llh + \
- llh->llh_hdr.lrh_len - \
- sizeof(llh->llh_tail)))
-
-/** log cookies are used to reference a specific log file and a record
- * therein
- */
-struct llog_cookie {
- struct llog_logid lgc_lgl;
- __u32 lgc_subsys;
- __u32 lgc_index;
- __u32 lgc_padding;
-} __packed;
-
-/** llog protocol */
-enum llogd_rpc_ops {
- LLOG_ORIGIN_HANDLE_CREATE = 501,
- LLOG_ORIGIN_HANDLE_NEXT_BLOCK = 502,
- LLOG_ORIGIN_HANDLE_READ_HEADER = 503,
- LLOG_ORIGIN_HANDLE_WRITE_REC = 504,
- LLOG_ORIGIN_HANDLE_CLOSE = 505,
- LLOG_ORIGIN_CONNECT = 506,
- LLOG_CATINFO = 507, /* deprecated */
- LLOG_ORIGIN_HANDLE_PREV_BLOCK = 508,
- LLOG_ORIGIN_HANDLE_DESTROY = 509, /* for destroy llog object*/
- LLOG_LAST_OPC,
- LLOG_FIRST_OPC = LLOG_ORIGIN_HANDLE_CREATE
-};
-
-struct llogd_body {
- struct llog_logid lgd_logid;
- __u32 lgd_ctxt_idx;
- __u32 lgd_llh_flags;
- __u32 lgd_index;
- __u32 lgd_saved_index;
- __u32 lgd_len;
- __u64 lgd_cur_offset;
-} __packed;
-
-struct llogd_conn_body {
- struct llog_gen lgdc_gen;
- struct llog_logid lgdc_logid;
- __u32 lgdc_ctxt_idx;
-} __packed;
-
-/* Note: 64-bit types are 64-bit aligned in structure */
-struct obdo {
- __u64 o_valid; /* hot fields in this obdo */
- struct ost_id o_oi;
- __u64 o_parent_seq;
- __u64 o_size; /* o_size-o_blocks == ost_lvb */
- __s64 o_mtime;
- __s64 o_atime;
- __s64 o_ctime;
- __u64 o_blocks; /* brw: cli sent cached bytes */
- __u64 o_grant;
-
- /* 32-bit fields start here: keep an even number of them via padding */
- __u32 o_blksize; /* optimal IO blocksize */
- __u32 o_mode; /* brw: cli sent cache remain */
- __u32 o_uid;
- __u32 o_gid;
- __u32 o_flags;
- __u32 o_nlink; /* brw: checksum */
- __u32 o_parent_oid;
- __u32 o_misc; /* brw: o_dropped */
-
- __u64 o_ioepoch; /* epoch in ost writes */
- __u32 o_stripe_idx; /* holds stripe idx */
- __u32 o_parent_ver;
- struct lustre_handle o_handle; /* brw: lock handle to prolong locks
- */
- struct llog_cookie o_lcookie; /* destroy: unlink cookie from MDS,
- * obsolete in 2.8, reused in OSP
- */
- __u32 o_uid_h;
- __u32 o_gid_h;
-
- __u64 o_data_version; /* getattr: sum of iversion for
- * each stripe.
- * brw: grant space consumed on
- * the client for the write
- */
- __u64 o_padding_4;
- __u64 o_padding_5;
- __u64 o_padding_6;
-};
-
-#define o_dirty o_blocks
-#define o_undirty o_mode
-#define o_dropped o_misc
-#define o_cksum o_nlink
-#define o_grant_used o_data_version
-
-/* request structure for OST's */
-struct ost_body {
- struct obdo oa;
-};
-
-/* Key for FIEMAP to be used in get_info calls */
-struct ll_fiemap_info_key {
- char lfik_name[8];
- struct obdo lfik_oa;
- struct fiemap lfik_fiemap;
-};
-
-/* security opcodes */
-enum sec_cmd {
- SEC_CTX_INIT = 801,
- SEC_CTX_INIT_CONT = 802,
- SEC_CTX_FINI = 803,
- SEC_LAST_OPC,
- SEC_FIRST_OPC = SEC_CTX_INIT
-};
-
-/*
- * capa related definitions
- */
-#define CAPA_HMAC_MAX_LEN 64
-#define CAPA_HMAC_KEY_MAX_LEN 56
-
-/* NB take care when changing the sequence of elements this struct,
- * because the offset info is used in find_capa()
- */
-struct lustre_capa {
- struct lu_fid lc_fid; /** fid */
- __u64 lc_opc; /** operations allowed */
- __u64 lc_uid; /** file owner */
- __u64 lc_gid; /** file group */
- __u32 lc_flags; /** HMAC algorithm & flags */
- __u32 lc_keyid; /** key# used for the capability */
- __u32 lc_timeout; /** capa timeout value (sec) */
-/* FIXME: y2038 time_t overflow: */
- __u32 lc_expiry; /** expiry time (sec) */
- __u8 lc_hmac[CAPA_HMAC_MAX_LEN]; /** HMAC */
-} __packed;
-
-/** lustre_capa::lc_opc */
-enum {
- CAPA_OPC_BODY_WRITE = 1 << 0, /**< write object data */
- CAPA_OPC_BODY_READ = 1 << 1, /**< read object data */
- CAPA_OPC_INDEX_LOOKUP = 1 << 2, /**< lookup object fid */
- CAPA_OPC_INDEX_INSERT = 1 << 3, /**< insert object fid */
- CAPA_OPC_INDEX_DELETE = 1 << 4, /**< delete object fid */
- CAPA_OPC_OSS_WRITE = 1 << 5, /**< write oss object data */
- CAPA_OPC_OSS_READ = 1 << 6, /**< read oss object data */
- CAPA_OPC_OSS_TRUNC = 1 << 7, /**< truncate oss object */
- CAPA_OPC_OSS_DESTROY = 1 << 8, /**< destroy oss object */
- CAPA_OPC_META_WRITE = 1 << 9, /**< write object meta data */
- CAPA_OPC_META_READ = 1 << 10, /**< read object meta data */
-};
-
-#define CAPA_OPC_OSS_RW (CAPA_OPC_OSS_READ | CAPA_OPC_OSS_WRITE)
-#define CAPA_OPC_MDS_ONLY \
- (CAPA_OPC_BODY_WRITE | CAPA_OPC_BODY_READ | CAPA_OPC_INDEX_LOOKUP | \
- CAPA_OPC_INDEX_INSERT | CAPA_OPC_INDEX_DELETE)
-#define CAPA_OPC_OSS_ONLY \
- (CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ | CAPA_OPC_OSS_TRUNC | \
- CAPA_OPC_OSS_DESTROY)
-#define CAPA_OPC_MDS_DEFAULT ~CAPA_OPC_OSS_ONLY
-#define CAPA_OPC_OSS_DEFAULT ~(CAPA_OPC_MDS_ONLY | CAPA_OPC_OSS_ONLY)
-
-struct lustre_capa_key {
- __u64 lk_seq; /**< mds# */
- __u32 lk_keyid; /**< key# */
- __u32 lk_padding;
- __u8 lk_key[CAPA_HMAC_KEY_MAX_LEN]; /**< key */
-} __packed;
-
-/** The link ea holds 1 \a link_ea_entry for each hardlink */
-#define LINK_EA_MAGIC 0x11EAF1DFUL
-struct link_ea_header {
- __u32 leh_magic;
- __u32 leh_reccount;
- __u64 leh_len; /* total size */
- __u32 leh_overflow_time;
- __u32 leh_padding;
-};
-
-/** Hardlink data is name and parent fid.
- * Stored in this crazy struct for maximum packing and endian-neutrality
- */
-struct link_ea_entry {
- /** __u16 stored big-endian, unaligned */
- unsigned char lee_reclen[2];
- unsigned char lee_parent_fid[sizeof(struct lu_fid)];
- char lee_name[0];
-} __packed;
-
-/** fid2path request/reply structure */
-struct getinfo_fid2path {
- struct lu_fid gf_fid;
- __u64 gf_recno;
- __u32 gf_linkno;
- __u32 gf_pathlen;
- char gf_path[0];
-} __packed;
-
-/** path2parent request/reply structures */
-struct getparent {
- struct lu_fid gp_fid; /**< parent FID */
- __u32 gp_linkno; /**< hardlink number */
- __u32 gp_name_size; /**< size of the name field */
- char gp_name[0]; /**< zero-terminated link name */
-} __packed;
-
-enum {
- LAYOUT_INTENT_ACCESS = 0,
- LAYOUT_INTENT_READ = 1,
- LAYOUT_INTENT_WRITE = 2,
- LAYOUT_INTENT_GLIMPSE = 3,
- LAYOUT_INTENT_TRUNC = 4,
- LAYOUT_INTENT_RELEASE = 5,
- LAYOUT_INTENT_RESTORE = 6
-};
-
-/* enqueue layout lock with intent */
-struct layout_intent {
- __u32 li_opc; /* intent operation for enqueue, read, write etc */
- __u32 li_flags;
- __u64 li_start;
- __u64 li_end;
-};
-
-/**
- * On the wire version of hsm_progress structure.
- *
- * Contains the userspace hsm_progress and some internal fields.
- */
-struct hsm_progress_kernel {
- /* Field taken from struct hsm_progress */
- struct lu_fid hpk_fid;
- __u64 hpk_cookie;
- struct hsm_extent hpk_extent;
- __u16 hpk_flags;
- __u16 hpk_errval; /* positive val */
- __u32 hpk_padding1;
- /* Additional fields */
- __u64 hpk_data_version;
- __u64 hpk_padding2;
-} __packed;
-
-/** layout swap request structure
- * fid1 and fid2 are in mdt_body
- */
-struct mdc_swap_layouts {
- __u64 msl_flags;
-} __packed;
-
-struct close_data {
- struct lustre_handle cd_handle;
- struct lu_fid cd_fid;
- __u64 cd_data_version;
- __u64 cd_reserved[8];
-};
-
-#endif
-/** @} lustreidl */
diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ioctl.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ioctl.h
deleted file mode 100644
index 6e4e109fb874..000000000000
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ioctl.h
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-#ifndef _UAPI_LUSTRE_IOCTL_H_
-#define _UAPI_LUSTRE_IOCTL_H_
-
-#include <linux/ioctl.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-
-#if !defined(__KERNEL__) && !defined(LUSTRE_UTILS)
-# error This file is for Lustre internal use only.
-#endif
-
-enum md_echo_cmd {
- ECHO_MD_CREATE = 1, /* Open/Create file on MDT */
- ECHO_MD_MKDIR = 2, /* Mkdir on MDT */
- ECHO_MD_DESTROY = 3, /* Unlink file on MDT */
- ECHO_MD_RMDIR = 4, /* Rmdir on MDT */
- ECHO_MD_LOOKUP = 5, /* Lookup on MDT */
- ECHO_MD_GETATTR = 6, /* Getattr on MDT */
- ECHO_MD_SETATTR = 7, /* Setattr on MDT */
- ECHO_MD_ALLOC_FID = 8, /* Get FIDs from MDT */
-};
-
-#define OBD_DEV_ID 1
-#define OBD_DEV_NAME "obd"
-#define OBD_DEV_PATH "/dev/" OBD_DEV_NAME
-
-#define OBD_IOCTL_VERSION 0x00010004
-#define OBD_DEV_BY_DEVNAME 0xffffd0de
-
-struct obd_ioctl_data {
- __u32 ioc_len;
- __u32 ioc_version;
-
- union {
- __u64 ioc_cookie;
- __u64 ioc_u64_1;
- };
- union {
- __u32 ioc_conn1;
- __u32 ioc_u32_1;
- };
- union {
- __u32 ioc_conn2;
- __u32 ioc_u32_2;
- };
-
- struct obdo ioc_obdo1;
- struct obdo ioc_obdo2;
-
- __u64 ioc_count;
- __u64 ioc_offset;
- __u32 ioc_dev;
- __u32 ioc_command;
-
- __u64 ioc_nid;
- __u32 ioc_nal;
- __u32 ioc_type;
-
- /* buffers the kernel will treat as user pointers */
- __u32 ioc_plen1;
- char __user *ioc_pbuf1;
- __u32 ioc_plen2;
- char __user *ioc_pbuf2;
-
- /* inline buffers for various arguments */
- __u32 ioc_inllen1;
- char *ioc_inlbuf1;
- __u32 ioc_inllen2;
- char *ioc_inlbuf2;
- __u32 ioc_inllen3;
- char *ioc_inlbuf3;
- __u32 ioc_inllen4;
- char *ioc_inlbuf4;
-
- char ioc_bulk[0];
-};
-
-struct obd_ioctl_hdr {
- __u32 ioc_len;
- __u32 ioc_version;
-};
-
-static inline __u32 obd_ioctl_packlen(struct obd_ioctl_data *data)
-{
- __u32 len = __ALIGN_KERNEL(sizeof(*data), 8);
-
- len += __ALIGN_KERNEL(data->ioc_inllen1, 8);
- len += __ALIGN_KERNEL(data->ioc_inllen2, 8);
- len += __ALIGN_KERNEL(data->ioc_inllen3, 8);
- len += __ALIGN_KERNEL(data->ioc_inllen4, 8);
-
- return len;
-}
-
-/*
- * OBD_IOC_DATA_TYPE is only for compatibility reasons with older
- * Linux Lustre user tools. New ioctls should NOT use this macro as
- * the ioctl "size". Instead the ioctl should get a "size" argument
- * which is the actual data type used by the ioctl, to ensure the
- * ioctl interface is versioned correctly.
- */
-#define OBD_IOC_DATA_TYPE long
-
-/* IOC_LDLM_TEST _IOWR('f', 40, long) */
-/* IOC_LDLM_DUMP _IOWR('f', 41, long) */
-/* IOC_LDLM_REGRESS_START _IOWR('f', 42, long) */
-/* IOC_LDLM_REGRESS_STOP _IOWR('f', 43, long) */
-
-#define OBD_IOC_CREATE _IOWR('f', 101, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_DESTROY _IOW('f', 104, OBD_IOC_DATA_TYPE)
-/* OBD_IOC_PREALLOCATE _IOWR('f', 105, OBD_IOC_DATA_TYPE) */
-
-#define OBD_IOC_SETATTR _IOW('f', 107, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_GETATTR _IOWR('f', 108, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_READ _IOWR('f', 109, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_WRITE _IOWR('f', 110, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_STATFS _IOWR('f', 113, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_SYNC _IOW('f', 114, OBD_IOC_DATA_TYPE)
-/* OBD_IOC_READ2 _IOWR('f', 115, OBD_IOC_DATA_TYPE) */
-/* OBD_IOC_FORMAT _IOWR('f', 116, OBD_IOC_DATA_TYPE) */
-/* OBD_IOC_PARTITION _IOWR('f', 117, OBD_IOC_DATA_TYPE) */
-/* OBD_IOC_COPY _IOWR('f', 120, OBD_IOC_DATA_TYPE) */
-/* OBD_IOC_MIGR _IOWR('f', 121, OBD_IOC_DATA_TYPE) */
-/* OBD_IOC_PUNCH _IOWR('f', 122, OBD_IOC_DATA_TYPE) */
-
-/* OBD_IOC_MODULE_DEBUG _IOWR('f', 124, OBD_IOC_DATA_TYPE) */
-#define OBD_IOC_BRW_READ _IOWR('f', 125, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_BRW_WRITE _IOWR('f', 126, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_NAME2DEV _IOWR('f', 127, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_UUID2DEV _IOWR('f', 130, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_GETNAME _IOWR('f', 131, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_GETMDNAME _IOR('f', 131, char[MAX_OBD_NAME])
-#define OBD_IOC_GETDTNAME OBD_IOC_GETNAME
-#define OBD_IOC_LOV_GET_CONFIG _IOWR('f', 132, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_CLIENT_RECOVER _IOW('f', 133, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PING_TARGET _IOW('f', 136, OBD_IOC_DATA_TYPE)
-
-/* OBD_IOC_DEC_FS_USE_COUNT _IO('f', 139) */
-#define OBD_IOC_NO_TRANSNO _IOW('f', 140, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_SET_READONLY _IOW('f', 141, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_ABORT_RECOVERY _IOR('f', 142, OBD_IOC_DATA_TYPE)
-/* OBD_IOC_ROOT_SQUASH _IOWR('f', 143, OBD_IOC_DATA_TYPE) */
-#define OBD_GET_VERSION _IOWR('f', 144, OBD_IOC_DATA_TYPE)
-/* OBD_IOC_GSS_SUPPORT _IOWR('f', 145, OBD_IOC_DATA_TYPE) */
-/* OBD_IOC_CLOSE_UUID _IOWR('f', 147, OBD_IOC_DATA_TYPE) */
-#define OBD_IOC_CHANGELOG_SEND _IOW('f', 148, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_GETDEVICE _IOWR('f', 149, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_FID2PATH _IOWR('f', 150, OBD_IOC_DATA_TYPE)
-/* lustre/lustre_user.h 151-153 */
-/* OBD_IOC_LOV_SETSTRIPE 154 LL_IOC_LOV_SETSTRIPE */
-/* OBD_IOC_LOV_GETSTRIPE 155 LL_IOC_LOV_GETSTRIPE */
-/* OBD_IOC_LOV_SETEA 156 LL_IOC_LOV_SETEA */
-/* lustre/lustre_user.h 157-159 */
-/* OBD_IOC_QUOTACHECK _IOW('f', 160, int) */
-/* OBD_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *) */
-#define OBD_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl)
-/* lustre/lustre_user.h 163-176 */
-#define OBD_IOC_CHANGELOG_REG _IOW('f', 177, struct obd_ioctl_data)
-#define OBD_IOC_CHANGELOG_DEREG _IOW('f', 178, struct obd_ioctl_data)
-#define OBD_IOC_CHANGELOG_CLEAR _IOW('f', 179, struct obd_ioctl_data)
-/* OBD_IOC_RECORD _IOWR('f', 180, OBD_IOC_DATA_TYPE) */
-/* OBD_IOC_ENDRECORD _IOWR('f', 181, OBD_IOC_DATA_TYPE) */
-/* OBD_IOC_PARSE _IOWR('f', 182, OBD_IOC_DATA_TYPE) */
-/* OBD_IOC_DORECORD _IOWR('f', 183, OBD_IOC_DATA_TYPE) */
-#define OBD_IOC_PROCESS_CFG _IOWR('f', 184, OBD_IOC_DATA_TYPE)
-/* OBD_IOC_DUMP_LOG _IOWR('f', 185, OBD_IOC_DATA_TYPE) */
-/* OBD_IOC_CLEAR_LOG _IOWR('f', 186, OBD_IOC_DATA_TYPE) */
-#define OBD_IOC_PARAM _IOW('f', 187, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_POOL _IOWR('f', 188, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_REPLACE_NIDS _IOWR('f', 189, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_CATLOGLIST _IOWR('f', 190, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_LLOG_INFO _IOWR('f', 191, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_LLOG_PRINT _IOWR('f', 192, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_LLOG_CANCEL _IOWR('f', 193, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_LLOG_REMOVE _IOWR('f', 194, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_LLOG_CHECK _IOWR('f', 195, OBD_IOC_DATA_TYPE)
-/* OBD_IOC_LLOG_CATINFO _IOWR('f', 196, OBD_IOC_DATA_TYPE) */
-#define OBD_IOC_NODEMAP _IOWR('f', 197, OBD_IOC_DATA_TYPE)
-
-/* ECHO_IOC_GET_STRIPE _IOWR('f', 200, OBD_IOC_DATA_TYPE) */
-/* ECHO_IOC_SET_STRIPE _IOWR('f', 201, OBD_IOC_DATA_TYPE) */
-/* ECHO_IOC_ENQUEUE _IOWR('f', 202, OBD_IOC_DATA_TYPE) */
-/* ECHO_IOC_CANCEL _IOWR('f', 203, OBD_IOC_DATA_TYPE) */
-
-#define OBD_IOC_GET_OBJ_VERSION _IOR('f', 210, OBD_IOC_DATA_TYPE)
-
-/* lustre/lustre_user.h 212-217 */
-#define OBD_IOC_GET_MNTOPT _IOW('f', 220, mntopt_t)
-#define OBD_IOC_ECHO_MD _IOR('f', 221, struct obd_ioctl_data)
-#define OBD_IOC_ECHO_ALLOC_SEQ _IOWR('f', 222, struct obd_ioctl_data)
-#define OBD_IOC_START_LFSCK _IOWR('f', 230, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_STOP_LFSCK _IOW('f', 231, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_QUERY_LFSCK _IOR('f', 232, struct obd_ioctl_data)
-/* lustre/lustre_user.h 240-249 */
-/* LIBCFS_IOC_DEBUG_MASK 250 */
-
-#define IOC_OSC_SET_ACTIVE _IOWR('h', 21, void *)
-
-#endif /* _UAPI_LUSTRE_IOCTL_H_ */
diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_kernelcomm.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_kernelcomm.h
deleted file mode 100644
index 94dadbe8e069..000000000000
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_kernelcomm.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2013, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- *
- * Author: Nathan Rutman <nathan.rutman@sun.com>
- *
- * Kernel <-> userspace communication routines.
- * The definitions below are used in the kernel and userspace.
- */
-
-#ifndef __UAPI_LUSTRE_KERNELCOMM_H__
-#define __UAPI_LUSTRE_KERNELCOMM_H__
-
-#include <linux/types.h>
-
-/* KUC message header.
- * All current and future KUC messages should use this header.
- * To avoid having to include Lustre headers from libcfs, define this here.
- */
-struct kuc_hdr {
- __u16 kuc_magic;
- /* Each new Lustre feature should use a different transport */
- __u8 kuc_transport;
- __u8 kuc_flags;
- /* Message type or opcode, transport-specific */
- __u16 kuc_msgtype;
- /* Including header */
- __u16 kuc_msglen;
-} __aligned(sizeof(__u64));
-
-#define KUC_CHANGELOG_MSG_MAXSIZE (sizeof(struct kuc_hdr) + CR_MAXSIZE)
-
-#define KUC_MAGIC 0x191C /*Lustre9etLinC */
-
-/* kuc_msgtype values are defined in each transport */
-enum kuc_transport_type {
- KUC_TRANSPORT_GENERIC = 1,
- KUC_TRANSPORT_HSM = 2,
- KUC_TRANSPORT_CHANGELOG = 3,
-};
-
-enum kuc_generic_message_type {
- KUC_MSG_SHUTDOWN = 1,
-};
-
-/* KUC Broadcast Groups. This determines which userspace process hears which
- * messages. Mutliple transports may be used within a group, or multiple
- * groups may use the same transport. Broadcast
- * groups need not be used if e.g. a UID is specified instead;
- * use group 0 to signify unicast.
- */
-#define KUC_GRP_HSM 0x02
-#define KUC_GRP_MAX KUC_GRP_HSM
-
-#define LK_FLG_STOP 0x01
-#define LK_NOFD -1U
-
-/* kernelcomm control structure, passed from userspace to kernel */
-struct lustre_kernelcomm {
- __u32 lk_wfd;
- __u32 lk_rfd;
- __u32 lk_uid;
- __u32 lk_group;
- __u32 lk_data;
- __u32 lk_flags;
-} __packed;
-
-#endif /* __UAPI_LUSTRE_KERNELCOMM_H__ */
diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ostid.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ostid.h
deleted file mode 100644
index 3343b602219b..000000000000
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ostid.h
+++ /dev/null
@@ -1,236 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2014, Intel Corporation.
- *
- * Copyright 2015 Cray Inc, all rights reserved.
- * Author: Ben Evans.
- *
- * Define ost_id associated functions
- */
-
-#ifndef _UAPI_LUSTRE_OSTID_H_
-#define _UAPI_LUSTRE_OSTID_H_
-
-#include <linux/errno.h>
-#include <uapi/linux/lustre/lustre_fid.h>
-
-static inline __u64 lmm_oi_id(const struct ost_id *oi)
-{
- return oi->oi.oi_id;
-}
-
-static inline __u64 lmm_oi_seq(const struct ost_id *oi)
-{
- return oi->oi.oi_seq;
-}
-
-static inline void lmm_oi_set_seq(struct ost_id *oi, __u64 seq)
-{
- oi->oi.oi_seq = seq;
-}
-
-static inline void lmm_oi_set_id(struct ost_id *oi, __u64 oid)
-{
- oi->oi.oi_id = oid;
-}
-
-static inline void lmm_oi_le_to_cpu(struct ost_id *dst_oi,
- const struct ost_id *src_oi)
-{
- dst_oi->oi.oi_id = __le64_to_cpu(src_oi->oi.oi_id);
- dst_oi->oi.oi_seq = __le64_to_cpu(src_oi->oi.oi_seq);
-}
-
-static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi,
- const struct ost_id *src_oi)
-{
- dst_oi->oi.oi_id = __cpu_to_le64(src_oi->oi.oi_id);
- dst_oi->oi.oi_seq = __cpu_to_le64(src_oi->oi.oi_seq);
-}
-
-/* extract OST sequence (group) from a wire ost_id (id/seq) pair */
-static inline __u64 ostid_seq(const struct ost_id *ostid)
-{
- if (fid_seq_is_mdt0(ostid->oi.oi_seq))
- return FID_SEQ_OST_MDT0;
-
- if (fid_seq_is_default(ostid->oi.oi_seq))
- return FID_SEQ_LOV_DEFAULT;
-
- if (fid_is_idif(&ostid->oi_fid))
- return FID_SEQ_OST_MDT0;
-
- return fid_seq(&ostid->oi_fid);
-}
-
-/* extract OST objid from a wire ost_id (id/seq) pair */
-static inline __u64 ostid_id(const struct ost_id *ostid)
-{
- if (fid_seq_is_mdt0(ostid->oi.oi_seq))
- return ostid->oi.oi_id & IDIF_OID_MASK;
-
- if (fid_seq_is_default(ostid->oi.oi_seq))
- return ostid->oi.oi_id;
-
- if (fid_is_idif(&ostid->oi_fid))
- return fid_idif_id(fid_seq(&ostid->oi_fid),
- fid_oid(&ostid->oi_fid), 0);
-
- return fid_oid(&ostid->oi_fid);
-}
-
-static inline void ostid_set_seq(struct ost_id *oi, __u64 seq)
-{
- if (fid_seq_is_mdt0(seq) || fid_seq_is_default(seq)) {
- oi->oi.oi_seq = seq;
- } else {
- oi->oi_fid.f_seq = seq;
- /*
- * Note: if f_oid + f_ver is zero, we need init it
- * to be 1, otherwise, ostid_seq will treat this
- * as old ostid (oi_seq == 0)
- */
- if (!oi->oi_fid.f_oid && !oi->oi_fid.f_ver)
- oi->oi_fid.f_oid = LUSTRE_FID_INIT_OID;
- }
-}
-
-static inline void ostid_set_seq_mdt0(struct ost_id *oi)
-{
- ostid_set_seq(oi, FID_SEQ_OST_MDT0);
-}
-
-static inline void ostid_set_seq_echo(struct ost_id *oi)
-{
- ostid_set_seq(oi, FID_SEQ_ECHO);
-}
-
-static inline void ostid_set_seq_llog(struct ost_id *oi)
-{
- ostid_set_seq(oi, FID_SEQ_LLOG);
-}
-
-static inline void ostid_cpu_to_le(const struct ost_id *src_oi,
- struct ost_id *dst_oi)
-{
- if (fid_seq_is_mdt0(src_oi->oi.oi_seq)) {
- dst_oi->oi.oi_id = __cpu_to_le64(src_oi->oi.oi_id);
- dst_oi->oi.oi_seq = __cpu_to_le64(src_oi->oi.oi_seq);
- } else {
- fid_cpu_to_le(&dst_oi->oi_fid, &src_oi->oi_fid);
- }
-}
-
-static inline void ostid_le_to_cpu(const struct ost_id *src_oi,
- struct ost_id *dst_oi)
-{
- if (fid_seq_is_mdt0(src_oi->oi.oi_seq)) {
- dst_oi->oi.oi_id = __le64_to_cpu(src_oi->oi.oi_id);
- dst_oi->oi.oi_seq = __le64_to_cpu(src_oi->oi.oi_seq);
- } else {
- fid_le_to_cpu(&dst_oi->oi_fid, &src_oi->oi_fid);
- }
-}
-
-/**
- * Sigh, because pre-2.4 uses
- * struct lov_mds_md_v1 {
- * ........
- * __u64 lmm_object_id;
- * __u64 lmm_object_seq;
- * ......
- * }
- * to identify the LOV(MDT) object, and lmm_object_seq will
- * be normal_fid, which make it hard to combine these conversion
- * to ostid_to FID. so we will do lmm_oi/fid conversion separately
- *
- * We can tell the lmm_oi by this way,
- * 1.8: lmm_object_id = {inode}, lmm_object_gr = 0
- * 2.1: lmm_object_id = {oid < 128k}, lmm_object_seq = FID_SEQ_NORMAL
- * 2.4: lmm_oi.f_seq = FID_SEQ_NORMAL, lmm_oi.f_oid = {oid < 128k},
- * lmm_oi.f_ver = 0
- *
- * But currently lmm_oi/lsm_oi does not have any "real" usages,
- * except for printing some information, and the user can always
- * get the real FID from LMA, besides this multiple case check might
- * make swab more complicate. So we will keep using id/seq for lmm_oi.
- */
-
-static inline void fid_to_lmm_oi(const struct lu_fid *fid,
- struct ost_id *oi)
-{
- oi->oi.oi_id = fid_oid(fid);
- oi->oi.oi_seq = fid_seq(fid);
-}
-
-/**
- * Unpack an OST object id/seq (group) into a FID. This is needed for
- * converting all obdo, lmm, lsm, etc. 64-bit id/seq pairs into proper
- * FIDs. Note that if an id/seq is already in FID/IDIF format it will
- * be passed through unchanged. Only legacy OST objects in "group 0"
- * will be mapped into the IDIF namespace so that they can fit into the
- * struct lu_fid fields without loss.
- */
-static inline int ostid_to_fid(struct lu_fid *fid, const struct ost_id *ostid,
- __u32 ost_idx)
-{
- __u64 seq = ostid_seq(ostid);
-
- if (ost_idx > 0xffff)
- return -EBADF;
-
- if (fid_seq_is_mdt0(seq)) {
- __u64 oid = ostid_id(ostid);
-
- /* This is a "legacy" (old 1.x/2.early) OST object in "group 0"
- * that we map into the IDIF namespace. It allows up to 2^48
- * objects per OST, as this is the object namespace that has
- * been in production for years. This can handle create rates
- * of 1M objects/s/OST for 9 years, or combinations thereof.
- */
- if (oid >= IDIF_MAX_OID)
- return -EBADF;
-
- fid->f_seq = fid_idif_seq(oid, ost_idx);
- /* truncate to 32 bits by assignment */
- fid->f_oid = oid;
- /* in theory, not currently used */
- fid->f_ver = oid >> 48;
- } else if (!fid_seq_is_default(seq)) {
- /* This is either an IDIF object, which identifies objects
- * across all OSTs, or a regular FID. The IDIF namespace
- * maps legacy OST objects into the FID namespace. In both
- * cases, we just pass the FID through, no conversion needed.
- */
- if (ostid->oi_fid.f_ver)
- return -EBADF;
-
- *fid = ostid->oi_fid;
- }
-
- return 0;
-}
-#endif /* _UAPI_LUSTRE_OSTID_H_ */
diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_param.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_param.h
deleted file mode 100644
index 1eab2ceca338..000000000000
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_param.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * User-settable parameter keys
- *
- * Author: Nathan Rutman <nathan@clusterfs.com>
- */
-
-#ifndef _UAPI_LUSTRE_PARAM_H_
-#define _UAPI_LUSTRE_PARAM_H_
-
-/** \defgroup param param
- *
- * @{
- */
-
-/****************** User-settable parameter keys *********************/
-/* e.g.
- * tunefs.lustre --param="failover.node=192.168.0.13@tcp0" /dev/sda
- * lctl conf_param testfs-OST0000 failover.node=3@elan,192.168.0.3@tcp0
- * ... testfs-MDT0000.lov.stripesize=4M
- * ... testfs-OST0000.ost.client_cache_seconds=15
- * ... testfs.sys.timeout=<secs>
- * ... testfs.llite.max_read_ahead_mb=16
- */
-
-/* System global or special params not handled in obd's proc
- * See mgs_write_log_sys()
- */
-#define PARAM_TIMEOUT "timeout=" /* global */
-#define PARAM_LDLM_TIMEOUT "ldlm_timeout=" /* global */
-#define PARAM_AT_MIN "at_min=" /* global */
-#define PARAM_AT_MAX "at_max=" /* global */
-#define PARAM_AT_EXTRA "at_extra=" /* global */
-#define PARAM_AT_EARLY_MARGIN "at_early_margin=" /* global */
-#define PARAM_AT_HISTORY "at_history=" /* global */
-#define PARAM_JOBID_VAR "jobid_var=" /* global */
-#define PARAM_MGSNODE "mgsnode=" /* only at mounttime */
-#define PARAM_FAILNODE "failover.node=" /* add failover nid */
-#define PARAM_FAILMODE "failover.mode=" /* initial mount only */
-#define PARAM_ACTIVE "active=" /* activate/deactivate */
-#define PARAM_NETWORK "network=" /* bind on nid */
-#define PARAM_ID_UPCALL "identity_upcall=" /* identity upcall */
-
-/* Prefixes for parameters handled by obd's proc methods (XXX_process_config) */
-#define PARAM_OST "ost."
-#define PARAM_OSD "osd."
-#define PARAM_OSC "osc."
-#define PARAM_MDT "mdt."
-#define PARAM_HSM "mdt.hsm."
-#define PARAM_MDD "mdd."
-#define PARAM_MDC "mdc."
-#define PARAM_LLITE "llite."
-#define PARAM_LOV "lov."
-#define PARAM_LOD "lod."
-#define PARAM_OSP "osp."
-#define PARAM_SYS "sys." /* global */
-#define PARAM_SRPC "srpc."
-#define PARAM_SRPC_FLVR "srpc.flavor."
-#define PARAM_SRPC_UDESC "srpc.udesc.cli2mdt"
-#define PARAM_SEC "security."
-#define PARAM_QUOTA "quota." /* global */
-
-/** @} param */
-
-#endif /* _UAPI_LUSTRE_PARAM_H_ */
diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_user.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_user.h
deleted file mode 100644
index 69387f36d1f1..000000000000
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_user.h
+++ /dev/null
@@ -1,1327 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2010, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/include/lustre/lustre_user.h
- *
- * Lustre public user-space interface definitions.
- */
-
-#ifndef _LUSTRE_USER_H
-#define _LUSTRE_USER_H
-
-/** \defgroup lustreuser lustreuser
- *
- * @{
- */
-
-#ifdef __KERNEL__
-# include <linux/fs.h>
-# include <linux/quota.h>
-# include <linux/sched/signal.h>
-# include <linux/string.h> /* snprintf() */
-# include <linux/version.h>
-#else /* !__KERNEL__ */
-# define NEED_QUOTA_DEFS
-# include <stdio.h> /* snprintf() */
-# include <string.h>
-# include <sys/quota.h>
-# include <sys/stat.h>
-#endif /* __KERNEL__ */
-#include <uapi/linux/lustre/lustre_fiemap.h>
-
-/*
- * We need to always use 64bit version because the structure
- * is shared across entire cluster where 32bit and 64bit machines
- * are co-existing.
- */
-#if __BITS_PER_LONG != 64 || defined(__ARCH_WANT_STAT64)
-typedef struct stat64 lstat_t;
-#define lstat_f lstat64
-#define fstat_f fstat64
-#define fstatat_f fstatat64
-#else
-typedef struct stat lstat_t;
-#define lstat_f lstat
-#define fstat_f fstat
-#define fstatat_f fstatat
-#endif
-
-#define HAVE_LOV_USER_MDS_DATA
-
-#define LUSTRE_EOF 0xffffffffffffffffULL
-
-/* for statfs() */
-#define LL_SUPER_MAGIC 0x0BD00BD0
-
-#ifndef FSFILT_IOC_GETFLAGS
-#define FSFILT_IOC_GETFLAGS _IOR('f', 1, long)
-#define FSFILT_IOC_SETFLAGS _IOW('f', 2, long)
-#define FSFILT_IOC_GETVERSION _IOR('f', 3, long)
-#define FSFILT_IOC_SETVERSION _IOW('f', 4, long)
-#define FSFILT_IOC_GETVERSION_OLD _IOR('v', 1, long)
-#define FSFILT_IOC_SETVERSION_OLD _IOW('v', 2, long)
-#endif
-
-/* FIEMAP flags supported by Lustre */
-#define LUSTRE_FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_DEVICE_ORDER)
-
-enum obd_statfs_state {
- OS_STATE_DEGRADED = 0x00000001, /**< RAID degraded/rebuilding */
- OS_STATE_READONLY = 0x00000002, /**< filesystem is read-only */
- OS_STATE_RDONLY_1 = 0x00000004, /**< obsolete 1.6, was EROFS=30 */
- OS_STATE_RDONLY_2 = 0x00000008, /**< obsolete 1.6, was EROFS=30 */
- OS_STATE_RDONLY_3 = 0x00000010, /**< obsolete 1.6, was EROFS=30 */
-};
-
-struct obd_statfs {
- __u64 os_type;
- __u64 os_blocks;
- __u64 os_bfree;
- __u64 os_bavail;
- __u64 os_files;
- __u64 os_ffree;
- __u8 os_fsid[40];
- __u32 os_bsize;
- __u32 os_namelen;
- __u64 os_maxbytes;
- __u32 os_state; /**< obd_statfs_state OS_STATE_* flag */
- __u32 os_fprecreated; /* objs available now to the caller */
- /* used in QoS code to find preferred OSTs */
- __u32 os_spare2;
- __u32 os_spare3;
- __u32 os_spare4;
- __u32 os_spare5;
- __u32 os_spare6;
- __u32 os_spare7;
- __u32 os_spare8;
- __u32 os_spare9;
-};
-
-/**
- * File IDentifier.
- *
- * FID is a cluster-wide unique identifier of a file or an object (stripe).
- * FIDs are never reused.
- **/
-struct lu_fid {
- /**
- * FID sequence. Sequence is a unit of migration: all files (objects)
- * with FIDs from a given sequence are stored on the same server.
- * Lustre should support 2^64 objects, so even if each sequence
- * has only a single object we can still enumerate 2^64 objects.
- **/
- __u64 f_seq;
- /* FID number within sequence. */
- __u32 f_oid;
- /**
- * FID version, used to distinguish different versions (in the sense
- * of snapshots, etc.) of the same file system object. Not currently
- * used.
- **/
- __u32 f_ver;
-};
-
-static inline bool fid_is_zero(const struct lu_fid *fid)
-{
- return !fid->f_seq && !fid->f_oid;
-}
-
-struct filter_fid {
- struct lu_fid ff_parent; /* ff_parent.f_ver == file stripe number */
-};
-
-/* keep this one for compatibility */
-struct filter_fid_old {
- struct lu_fid ff_parent;
- __u64 ff_objid;
- __u64 ff_seq;
-};
-
-/* Userspace should treat lu_fid as opaque, and only use the following methods
- * to print or parse them. Other functions (e.g. compare, swab) could be moved
- * here from lustre_idl.h if needed.
- */
-struct lu_fid;
-
-/**
- * Following struct for object attributes, that will be kept inode's EA.
- * Introduced in 2.0 release (please see b15993, for details)
- * Added to all objects since Lustre 2.4 as contains self FID
- */
-struct lustre_mdt_attrs {
- /**
- * Bitfield for supported data in this structure. From enum lma_compat.
- * lma_self_fid and lma_flags are always available.
- */
- __u32 lma_compat;
- /**
- * Per-file incompat feature list. Lustre version should support all
- * flags set in this field. The supported feature mask is available in
- * LMA_INCOMPAT_SUPP.
- */
- __u32 lma_incompat;
- /** FID of this inode */
- struct lu_fid lma_self_fid;
-};
-
-/**
- * Prior to 2.4, the LMA structure also included SOM attributes which has since
- * been moved to a dedicated xattr
- * lma_flags was also removed because of lma_compat/incompat fields.
- */
-#define LMA_OLD_SIZE (sizeof(struct lustre_mdt_attrs) + 5 * sizeof(__u64))
-
-/**
- * OST object IDentifier.
- */
-struct ost_id {
- union {
- struct {
- __u64 oi_id;
- __u64 oi_seq;
- } oi;
- struct lu_fid oi_fid;
- };
-};
-
-#define DOSTID "%#llx:%llu"
-#define POSTID(oi) ostid_seq(oi), ostid_id(oi)
-
-/*
- * The ioctl naming rules:
- * LL_* - works on the currently opened filehandle instead of parent dir
- * *_OBD_* - gets data for both OSC or MDC (LOV, LMV indirectly)
- * *_MDC_* - gets/sets data related to MDC
- * *_LOV_* - gets/sets data related to OSC/LOV
- * *FILE* - called on parent dir and passes in a filename
- * *STRIPE* - set/get lov_user_md
- * *INFO - set/get lov_user_mds_data
- */
-/* lustre_ioctl.h 101-150 */
-#define LL_IOC_GETFLAGS _IOR('f', 151, long)
-#define LL_IOC_SETFLAGS _IOW('f', 152, long)
-#define LL_IOC_CLRFLAGS _IOW('f', 153, long)
-#define LL_IOC_LOV_SETSTRIPE _IOW('f', 154, long)
-#define LL_IOC_LOV_GETSTRIPE _IOW('f', 155, long)
-#define LL_IOC_LOV_SETEA _IOW('f', 156, long)
-/* LL_IOC_RECREATE_OBJ 157 obsolete */
-/* LL_IOC_RECREATE_FID 158 obsolete */
-#define LL_IOC_GROUP_LOCK _IOW('f', 158, long)
-#define LL_IOC_GROUP_UNLOCK _IOW('f', 159, long)
-/* #define LL_IOC_QUOTACHECK 160 OBD_IOC_QUOTACHECK */
-/* #define LL_IOC_POLL_QUOTACHECK 161 OBD_IOC_POLL_QUOTACHECK */
-/* #define LL_IOC_QUOTACTL 162 OBD_IOC_QUOTACTL */
-#define IOC_OBD_STATFS _IOWR('f', 164, struct obd_statfs *)
-/* IOC_LOV_GETINFO 165 obsolete */
-#define LL_IOC_FLUSHCTX _IOW('f', 166, long)
-/* LL_IOC_RMTACL 167 obsolete */
-#define LL_IOC_GETOBDCOUNT _IOR('f', 168, long)
-#define LL_IOC_LLOOP_ATTACH _IOWR('f', 169, long)
-#define LL_IOC_LLOOP_DETACH _IOWR('f', 170, long)
-#define LL_IOC_LLOOP_INFO _IOWR('f', 171, struct lu_fid)
-#define LL_IOC_LLOOP_DETACH_BYDEV _IOWR('f', 172, long)
-#define LL_IOC_PATH2FID _IOR('f', 173, long)
-#define LL_IOC_GET_CONNECT_FLAGS _IOWR('f', 174, __u64 *)
-#define LL_IOC_GET_MDTIDX _IOR('f', 175, int)
-
-/* lustre_ioctl.h 177-210 */
-#define LL_IOC_HSM_STATE_GET _IOR('f', 211, struct hsm_user_state)
-#define LL_IOC_HSM_STATE_SET _IOW('f', 212, struct hsm_state_set)
-#define LL_IOC_HSM_CT_START _IOW('f', 213, struct lustre_kernelcomm)
-#define LL_IOC_HSM_COPY_START _IOW('f', 214, struct hsm_copy *)
-#define LL_IOC_HSM_COPY_END _IOW('f', 215, struct hsm_copy *)
-#define LL_IOC_HSM_PROGRESS _IOW('f', 216, struct hsm_user_request)
-#define LL_IOC_HSM_REQUEST _IOW('f', 217, struct hsm_user_request)
-#define LL_IOC_DATA_VERSION _IOR('f', 218, struct ioc_data_version)
-#define LL_IOC_LOV_SWAP_LAYOUTS _IOW('f', 219, \
- struct lustre_swap_layouts)
-#define LL_IOC_HSM_ACTION _IOR('f', 220, \
- struct hsm_current_action)
-/* see <lustre_lib.h> for ioctl numbers 221-232 */
-
-#define LL_IOC_LMV_SETSTRIPE _IOWR('f', 240, struct lmv_user_md)
-#define LL_IOC_LMV_GETSTRIPE _IOWR('f', 241, struct lmv_user_md)
-#define LL_IOC_SET_LEASE _IOWR('f', 243, long)
-#define LL_IOC_GET_LEASE _IO('f', 244)
-#define LL_IOC_HSM_IMPORT _IOWR('f', 245, struct hsm_user_import)
-#define LL_IOC_LMV_SET_DEFAULT_STRIPE _IOWR('f', 246, struct lmv_user_md)
-#define LL_IOC_MIGRATE _IOR('f', 247, int)
-#define LL_IOC_FID2MDTIDX _IOWR('f', 248, struct lu_fid)
-#define LL_IOC_GETPARENT _IOWR('f', 249, struct getparent)
-
-/* Lease types for use as arg and return of LL_IOC_{GET,SET}_LEASE ioctl. */
-enum ll_lease_type {
- LL_LEASE_RDLCK = 0x1,
- LL_LEASE_WRLCK = 0x2,
- LL_LEASE_UNLCK = 0x4,
-};
-
-#define LL_STATFS_LMV 1
-#define LL_STATFS_LOV 2
-#define LL_STATFS_NODELAY 4
-
-#define IOC_MDC_TYPE 'i'
-#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
-#define IOC_MDC_GETFILESTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_user_md *)
-#define IOC_MDC_GETFILEINFO _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data *)
-#define LL_IOC_MDC_GETINFO _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data *)
-
-#define MAX_OBD_NAME 128 /* If this changes, a NEW ioctl must be added */
-
-/* Define O_LOV_DELAY_CREATE to be a mask that is not useful for regular
- * files, but are unlikely to be used in practice and are not harmful if
- * used incorrectly. O_NOCTTY and FASYNC are only meaningful for character
- * devices and are safe for use on new files (See LU-812, LU-4209).
- */
-#define O_LOV_DELAY_CREATE (O_NOCTTY | FASYNC)
-
-#define LL_FILE_IGNORE_LOCK 0x00000001
-#define LL_FILE_GROUP_LOCKED 0x00000002
-#define LL_FILE_READAHEA 0x00000004
-#define LL_FILE_LOCKED_DIRECTIO 0x00000008 /* client-side locks with dio */
-#define LL_FILE_LOCKLESS_IO 0x00000010 /* server-side locks with cio */
-#define LL_FILE_RMTACL 0x00000020
-
-#define LOV_USER_MAGIC_V1 0x0BD10BD0
-#define LOV_USER_MAGIC LOV_USER_MAGIC_V1
-#define LOV_USER_MAGIC_JOIN_V1 0x0BD20BD0
-#define LOV_USER_MAGIC_V3 0x0BD30BD0
-/* 0x0BD40BD0 is occupied by LOV_MAGIC_MIGRATE */
-#define LOV_USER_MAGIC_SPECIFIC 0x0BD50BD0 /* for specific OSTs */
-
-#define LMV_USER_MAGIC 0x0CD30CD0 /*default lmv magic*/
-
-#define LOV_PATTERN_RAID0 0x001
-#define LOV_PATTERN_RAID1 0x002
-#define LOV_PATTERN_FIRST 0x100
-#define LOV_PATTERN_CMOBD 0x200
-
-#define LOV_PATTERN_F_MASK 0xffff0000
-#define LOV_PATTERN_F_HOLE 0x40000000 /* there is hole in LOV EA */
-#define LOV_PATTERN_F_RELEASED 0x80000000 /* HSM released file */
-
-#define LOV_MAXPOOLNAME 15
-#define LOV_POOLNAMEF "%.15s"
-
-#define LOV_MIN_STRIPE_BITS 16 /* maximum PAGE_SIZE (ia64), power of 2 */
-#define LOV_MIN_STRIPE_SIZE (1 << LOV_MIN_STRIPE_BITS)
-#define LOV_MAX_STRIPE_COUNT_OLD 160
-/* This calculation is crafted so that input of 4096 will result in 160
- * which in turn is equal to old maximal stripe count.
- * XXX: In fact this is too simplified for now, what it also need is to get
- * ea_type argument to clearly know how much space each stripe consumes.
- *
- * The limit of 12 pages is somewhat arbitrary, but is a reasonably large
- * allocation that is sufficient for the current generation of systems.
- *
- * (max buffer size - lov+rpc header) / sizeof(struct lov_ost_data_v1)
- */
-#define LOV_MAX_STRIPE_COUNT 2000 /* ((12 * 4096 - 256) / 24) */
-#define LOV_ALL_STRIPES 0xffff /* only valid for directories */
-#define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */
-
-#define XATTR_LUSTRE_PREFIX "lustre."
-#define XATTR_LUSTRE_LOV "lustre.lov"
-
-#define lov_user_ost_data lov_user_ost_data_v1
-struct lov_user_ost_data_v1 { /* per-stripe data structure */
- struct ost_id l_ost_oi; /* OST object ID */
- __u32 l_ost_gen; /* generation of this OST index */
- __u32 l_ost_idx; /* OST index in LOV */
-} __packed;
-
-#define lov_user_md lov_user_md_v1
-struct lov_user_md_v1 { /* LOV EA user data (host-endian) */
- __u32 lmm_magic; /* magic number = LOV_USER_MAGIC_V1 */
- __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
- struct ost_id lmm_oi; /* LOV object ID */
- __u32 lmm_stripe_size; /* size of stripe in bytes */
- __u16 lmm_stripe_count; /* num stripes in use for this object */
- union {
- __u16 lmm_stripe_offset; /* starting stripe offset in
- * lmm_objects, use when writing
- */
- __u16 lmm_layout_gen; /* layout generation number
- * used when reading
- */
- };
- struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
-} __attribute__((packed, __may_alias__));
-
-struct lov_user_md_v3 { /* LOV EA user data (host-endian) */
- __u32 lmm_magic; /* magic number = LOV_USER_MAGIC_V3 */
- __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
- struct ost_id lmm_oi; /* LOV object ID */
- __u32 lmm_stripe_size; /* size of stripe in bytes */
- __u16 lmm_stripe_count; /* num stripes in use for this object */
- union {
- __u16 lmm_stripe_offset; /* starting stripe offset in
- * lmm_objects, use when writing
- */
- __u16 lmm_layout_gen; /* layout generation number
- * used when reading
- */
- };
- char lmm_pool_name[LOV_MAXPOOLNAME + 1]; /* pool name */
- struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
-} __packed;
-
-static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic)
-{
- if (lmm_magic == LOV_USER_MAGIC_V1)
- return sizeof(struct lov_user_md_v1) +
- stripes * sizeof(struct lov_user_ost_data_v1);
- return sizeof(struct lov_user_md_v3) +
- stripes * sizeof(struct lov_user_ost_data_v1);
-}
-
-/* Compile with -D_LARGEFILE64_SOURCE or -D_GNU_SOURCE (or #define) to
- * use this. It is unsafe to #define those values in this header as it
- * is possible the application has already #included <sys/stat.h>.
- */
-#ifdef HAVE_LOV_USER_MDS_DATA
-#define lov_user_mds_data lov_user_mds_data_v1
-struct lov_user_mds_data_v1 {
- lstat_t lmd_st; /* MDS stat struct */
- struct lov_user_md_v1 lmd_lmm; /* LOV EA V1 user data */
-} __packed;
-
-struct lov_user_mds_data_v3 {
- lstat_t lmd_st; /* MDS stat struct */
- struct lov_user_md_v3 lmd_lmm; /* LOV EA V3 user data */
-} __packed;
-#endif
-
-struct lmv_user_mds_data {
- struct lu_fid lum_fid;
- __u32 lum_padding;
- __u32 lum_mds;
-};
-
-enum lmv_hash_type {
- LMV_HASH_TYPE_UNKNOWN = 0, /* 0 is reserved for testing purpose */
- LMV_HASH_TYPE_ALL_CHARS = 1,
- LMV_HASH_TYPE_FNV_1A_64 = 2,
-};
-
-#define LMV_HASH_NAME_ALL_CHARS "all_char"
-#define LMV_HASH_NAME_FNV_1A_64 "fnv_1a_64"
-
-/*
- * Got this according to how get LOV_MAX_STRIPE_COUNT, see above,
- * (max buffer size - lmv+rpc header) / sizeof(struct lmv_user_mds_data)
- */
-#define LMV_MAX_STRIPE_COUNT 2000 /* ((12 * 4096 - 256) / 24) */
-#define lmv_user_md lmv_user_md_v1
-struct lmv_user_md_v1 {
- __u32 lum_magic; /* must be the first field */
- __u32 lum_stripe_count; /* dirstripe count */
- __u32 lum_stripe_offset; /* MDT idx for default dirstripe */
- __u32 lum_hash_type; /* Dir stripe policy */
- __u32 lum_type; /* LMV type: default or normal */
- __u32 lum_padding1;
- __u32 lum_padding2;
- __u32 lum_padding3;
- char lum_pool_name[LOV_MAXPOOLNAME + 1];
- struct lmv_user_mds_data lum_objects[0];
-} __packed;
-
-static inline int lmv_user_md_size(int stripes, int lmm_magic)
-{
- return sizeof(struct lmv_user_md) +
- stripes * sizeof(struct lmv_user_mds_data);
-}
-
-struct ll_recreate_obj {
- __u64 lrc_id;
- __u32 lrc_ost_idx;
-};
-
-struct ll_fid {
- __u64 id; /* holds object id */
- __u32 generation; /* holds object generation */
- __u32 f_type; /* holds object type or stripe idx when passing it to
- * OST for saving into EA.
- */
-};
-
-#define UUID_MAX 40
-struct obd_uuid {
- char uuid[UUID_MAX];
-};
-
-static inline bool obd_uuid_equals(const struct obd_uuid *u1,
- const struct obd_uuid *u2)
-{
- return strcmp((char *)u1->uuid, (char *)u2->uuid) == 0;
-}
-
-static inline int obd_uuid_empty(struct obd_uuid *uuid)
-{
- return uuid->uuid[0] == '\0';
-}
-
-static inline void obd_str2uuid(struct obd_uuid *uuid, const char *tmp)
-{
- strncpy((char *)uuid->uuid, tmp, sizeof(*uuid));
- uuid->uuid[sizeof(*uuid) - 1] = '\0';
-}
-
-/* For printf's only, make sure uuid is terminated */
-static inline char *obd_uuid2str(const struct obd_uuid *uuid)
-{
- if (!uuid)
- return NULL;
-
- if (uuid->uuid[sizeof(*uuid) - 1] != '\0') {
- /* Obviously not safe, but for printfs, no real harm done...
- * we're always null-terminated, even in a race.
- */
- static char temp[sizeof(*uuid)];
-
- memcpy(temp, uuid->uuid, sizeof(*uuid) - 1);
- temp[sizeof(*uuid) - 1] = '\0';
- return temp;
- }
- return (char *)(uuid->uuid);
-}
-
-/* Extract fsname from uuid (or target name) of a target
- * e.g. (myfs-OST0007_UUID -> myfs)
- * see also deuuidify.
- */
-static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen)
-{
- char *p;
-
- strncpy(buf, uuid, buflen - 1);
- buf[buflen - 1] = '\0';
- p = strrchr(buf, '-');
- if (p)
- *p = '\0';
-}
-
-/* printf display format
- * * usage: printf("file FID is "DFID"\n", PFID(fid));
- */
-#define FID_NOBRACE_LEN 40
-#define FID_LEN (FID_NOBRACE_LEN + 2)
-#define DFID_NOBRACE "%#llx:0x%x:0x%x"
-#define DFID "[" DFID_NOBRACE "]"
-#define PFID(fid) (unsigned long long)(fid)->f_seq, (fid)->f_oid, (fid)->f_ver
-
-/* scanf input parse format for fids in DFID_NOBRACE format
- * Need to strip '[' from DFID format first or use "["SFID"]" at caller.
- * usage: sscanf(fidstr, SFID, RFID(&fid));
- */
-#define SFID "0x%llx:0x%x:0x%x"
-#define RFID(fid) &((fid)->f_seq), &((fid)->f_oid), &((fid)->f_ver)
-
-/********* Quotas **********/
-
-#define Q_QUOTACHECK 0x800100 /* deprecated as of 2.4 */
-#define Q_INITQUOTA 0x800101 /* deprecated as of 2.4 */
-#define Q_GETOINFO 0x800102 /* get obd quota info */
-#define Q_GETOQUOTA 0x800103 /* get obd quotas */
-#define Q_FINVALIDATE 0x800104 /* deprecated as of 2.4 */
-
-/* these must be explicitly translated into linux Q_* in ll_dir_ioctl */
-#define LUSTRE_Q_QUOTAON 0x800002 /* deprecated as of 2.4 */
-#define LUSTRE_Q_QUOTAOFF 0x800003 /* deprecated as of 2.4 */
-#define LUSTRE_Q_GETINFO 0x800005 /* get information about quota files */
-#define LUSTRE_Q_SETINFO 0x800006 /* set information about quota files */
-#define LUSTRE_Q_GETQUOTA 0x800007 /* get user quota structure */
-#define LUSTRE_Q_SETQUOTA 0x800008 /* set user quota structure */
-/* lustre-specific control commands */
-#define LUSTRE_Q_INVALIDATE 0x80000b /* deprecated as of 2.4 */
-#define LUSTRE_Q_FINVALIDATE 0x80000c /* deprecated as of 2.4 */
-
-#define UGQUOTA 2 /* set both USRQUOTA and GRPQUOTA */
-
-#define IDENTITY_DOWNCALL_MAGIC 0x6d6dd629
-
-/* permission */
-#define N_PERMS_MAX 64
-
-struct perm_downcall_data {
- __u64 pdd_nid;
- __u32 pdd_perm;
- __u32 pdd_padding;
-};
-
-struct identity_downcall_data {
- __u32 idd_magic;
- __u32 idd_err;
- __u32 idd_uid;
- __u32 idd_gid;
- __u32 idd_nperms;
- __u32 idd_ngroups;
- struct perm_downcall_data idd_perms[N_PERMS_MAX];
- __u32 idd_groups[0];
-};
-
-/* lustre volatile file support
- * file name header: .^L^S^T^R:volatile"
- */
-#define LUSTRE_VOLATILE_HDR ".\x0c\x13\x14\x12:VOLATILE"
-#define LUSTRE_VOLATILE_HDR_LEN 14
-/* hdr + MDT index */
-#define LUSTRE_VOLATILE_IDX LUSTRE_VOLATILE_HDR":%.4X:"
-
-enum lustre_quota_version {
- LUSTRE_QUOTA_V2 = 1
-};
-
-/* XXX: same as if_dqinfo struct in kernel */
-struct obd_dqinfo {
- __u64 dqi_bgrace;
- __u64 dqi_igrace;
- __u32 dqi_flags;
- __u32 dqi_valid;
-};
-
-/* XXX: same as if_dqblk struct in kernel, plus one padding */
-struct obd_dqblk {
- __u64 dqb_bhardlimit;
- __u64 dqb_bsoftlimit;
- __u64 dqb_curspace;
- __u64 dqb_ihardlimit;
- __u64 dqb_isoftlimit;
- __u64 dqb_curinodes;
- __u64 dqb_btime;
- __u64 dqb_itime;
- __u32 dqb_valid;
- __u32 dqb_padding;
-};
-
-enum {
- QC_GENERAL = 0,
- QC_MDTIDX = 1,
- QC_OSTIDX = 2,
- QC_UUID = 3
-};
-
-struct if_quotactl {
- __u32 qc_cmd;
- __u32 qc_type;
- __u32 qc_id;
- __u32 qc_stat;
- __u32 qc_valid;
- __u32 qc_idx;
- struct obd_dqinfo qc_dqinfo;
- struct obd_dqblk qc_dqblk;
- char obd_type[16];
- struct obd_uuid obd_uuid;
-};
-
-/* swap layout flags */
-#define SWAP_LAYOUTS_CHECK_DV1 (1 << 0)
-#define SWAP_LAYOUTS_CHECK_DV2 (1 << 1)
-#define SWAP_LAYOUTS_KEEP_MTIME (1 << 2)
-#define SWAP_LAYOUTS_KEEP_ATIME (1 << 3)
-#define SWAP_LAYOUTS_CLOSE (1 << 4)
-
-/* Swap XATTR_NAME_HSM as well, only on the MDT so far */
-#define SWAP_LAYOUTS_MDS_HSM (1 << 31)
-struct lustre_swap_layouts {
- __u64 sl_flags;
- __u32 sl_fd;
- __u32 sl_gid;
- __u64 sl_dv1;
- __u64 sl_dv2;
-};
-
-/********* Changelogs **********/
-/** Changelog record types */
-enum changelog_rec_type {
- CL_MARK = 0,
- CL_CREATE = 1, /* namespace */
- CL_MKDIR = 2, /* namespace */
- CL_HARDLINK = 3, /* namespace */
- CL_SOFTLINK = 4, /* namespace */
- CL_MKNOD = 5, /* namespace */
- CL_UNLINK = 6, /* namespace */
- CL_RMDIR = 7, /* namespace */
- CL_RENAME = 8, /* namespace */
- CL_EXT = 9, /* namespace extended record (2nd half of rename) */
- CL_OPEN = 10, /* not currently used */
- CL_CLOSE = 11, /* may be written to log only with mtime change */
- CL_LAYOUT = 12, /* file layout/striping modified */
- CL_TRUNC = 13,
- CL_SETATTR = 14,
- CL_XATTR = 15,
- CL_HSM = 16, /* HSM specific events, see flags */
- CL_MTIME = 17, /* Precedence: setattr > mtime > ctime > atime */
- CL_CTIME = 18,
- CL_ATIME = 19,
- CL_LAST
-};
-
-static inline const char *changelog_type2str(int type)
-{
- static const char *changelog_str[] = {
- "MARK", "CREAT", "MKDIR", "HLINK", "SLINK", "MKNOD", "UNLNK",
- "RMDIR", "RENME", "RNMTO", "OPEN", "CLOSE", "LYOUT", "TRUNC",
- "SATTR", "XATTR", "HSM", "MTIME", "CTIME", "ATIME",
- };
-
- if (type >= 0 && type < CL_LAST)
- return changelog_str[type];
- return NULL;
-}
-
-/* per-record flags */
-#define CLF_FLAGSHIFT 12
-#define CLF_FLAGMASK ((1U << CLF_FLAGSHIFT) - 1)
-#define CLF_VERMASK (~CLF_FLAGMASK)
-enum changelog_rec_flags {
- CLF_VERSION = 0x1000,
- CLF_RENAME = 0x2000,
- CLF_JOBID = 0x4000,
- CLF_SUPPORTED = CLF_VERSION | CLF_RENAME | CLF_JOBID
-};
-
-/* Anything under the flagmask may be per-type (if desired) */
-/* Flags for unlink */
-#define CLF_UNLINK_LAST 0x0001 /* Unlink of last hardlink */
-#define CLF_UNLINK_HSM_EXISTS 0x0002 /* File has something in HSM */
- /* HSM cleaning needed */
-/* Flags for rename */
-#define CLF_RENAME_LAST 0x0001 /* rename unlink last hardlink of
- * target
- */
-#define CLF_RENAME_LAST_EXISTS 0x0002 /* rename unlink last hardlink of target
- * has an archive in backend
- */
-
-/* Flags for HSM */
-/* 12b used (from high weight to low weight):
- * 2b for flags
- * 3b for event
- * 7b for error code
- */
-#define CLF_HSM_ERR_L 0 /* HSM return code, 7 bits */
-#define CLF_HSM_ERR_H 6
-#define CLF_HSM_EVENT_L 7 /* HSM event, 3 bits, see enum hsm_event */
-#define CLF_HSM_EVENT_H 9
-#define CLF_HSM_FLAG_L 10 /* HSM flags, 2 bits, 1 used, 1 spare */
-#define CLF_HSM_FLAG_H 11
-#define CLF_HSM_SPARE_L 12 /* 4 spare bits */
-#define CLF_HSM_SPARE_H 15
-#define CLF_HSM_LAST 15
-
-/* Remove bits higher than _h, then extract the value
- * between _h and _l by shifting lower weigth to bit 0.
- */
-#define CLF_GET_BITS(_b, _h, _l) (((_b << (CLF_HSM_LAST - _h)) & 0xFFFF) \
- >> (CLF_HSM_LAST - _h + _l))
-
-#define CLF_HSM_SUCCESS 0x00
-#define CLF_HSM_MAXERROR 0x7E
-#define CLF_HSM_ERROVERFLOW 0x7F
-
-#define CLF_HSM_DIRTY 1 /* file is dirty after HSM request end */
-
-/* 3 bits field => 8 values allowed */
-enum hsm_event {
- HE_ARCHIVE = 0,
- HE_RESTORE = 1,
- HE_CANCEL = 2,
- HE_RELEASE = 3,
- HE_REMOVE = 4,
- HE_STATE = 5,
- HE_SPARE1 = 6,
- HE_SPARE2 = 7,
-};
-
-static inline enum hsm_event hsm_get_cl_event(__u16 flags)
-{
- return CLF_GET_BITS(flags, CLF_HSM_EVENT_H, CLF_HSM_EVENT_L);
-}
-
-static inline void hsm_set_cl_event(int *flags, enum hsm_event he)
-{
- *flags |= (he << CLF_HSM_EVENT_L);
-}
-
-static inline __u16 hsm_get_cl_flags(int flags)
-{
- return CLF_GET_BITS(flags, CLF_HSM_FLAG_H, CLF_HSM_FLAG_L);
-}
-
-static inline void hsm_set_cl_flags(int *flags, int bits)
-{
- *flags |= (bits << CLF_HSM_FLAG_L);
-}
-
-static inline int hsm_get_cl_error(int flags)
-{
- return CLF_GET_BITS(flags, CLF_HSM_ERR_H, CLF_HSM_ERR_L);
-}
-
-static inline void hsm_set_cl_error(int *flags, int error)
-{
- *flags |= (error << CLF_HSM_ERR_L);
-}
-
-enum changelog_send_flag {
- /* Not yet implemented */
- CHANGELOG_FLAG_FOLLOW = 0x01,
- /*
- * Blocking IO makes sense in case of slow user parsing of the records,
- * but it also prevents us from cleaning up if the records are not
- * consumed.
- */
- CHANGELOG_FLAG_BLOCK = 0x02,
- /* Pack jobid into the changelog records if available. */
- CHANGELOG_FLAG_JOBID = 0x04,
-};
-
-#define CR_MAXSIZE cfs_size_round(2 * NAME_MAX + 2 + \
- changelog_rec_offset(CLF_SUPPORTED))
-
-/* 31 usable bytes string + null terminator. */
-#define LUSTRE_JOBID_SIZE 32
-
-/*
- * This is the minimal changelog record. It can contain extensions
- * such as rename fields or process jobid. Its exact content is described
- * by the cr_flags.
- *
- * Extensions are packed in the same order as their corresponding flags.
- */
-struct changelog_rec {
- __u16 cr_namelen;
- __u16 cr_flags; /**< \a changelog_rec_flags */
- __u32 cr_type; /**< \a changelog_rec_type */
- __u64 cr_index; /**< changelog record number */
- __u64 cr_prev; /**< last index for this target fid */
- __u64 cr_time;
- union {
- struct lu_fid cr_tfid; /**< target fid */
- __u32 cr_markerflags; /**< CL_MARK flags */
- };
- struct lu_fid cr_pfid; /**< parent fid */
-} __packed;
-
-/* Changelog extension for RENAME. */
-struct changelog_ext_rename {
- struct lu_fid cr_sfid; /**< source fid, or zero */
- struct lu_fid cr_spfid; /**< source parent fid, or zero */
-};
-
-/* Changelog extension to include JOBID. */
-struct changelog_ext_jobid {
- char cr_jobid[LUSTRE_JOBID_SIZE]; /**< zero-terminated string. */
-};
-
-static inline size_t changelog_rec_offset(enum changelog_rec_flags crf)
-{
- size_t size = sizeof(struct changelog_rec);
-
- if (crf & CLF_RENAME)
- size += sizeof(struct changelog_ext_rename);
-
- if (crf & CLF_JOBID)
- size += sizeof(struct changelog_ext_jobid);
-
- return size;
-}
-
-static inline size_t changelog_rec_size(struct changelog_rec *rec)
-{
- return changelog_rec_offset(rec->cr_flags);
-}
-
-static inline size_t changelog_rec_varsize(struct changelog_rec *rec)
-{
- return changelog_rec_size(rec) - sizeof(*rec) + rec->cr_namelen;
-}
-
-static inline
-struct changelog_ext_rename *changelog_rec_rename(struct changelog_rec *rec)
-{
- enum changelog_rec_flags crf = rec->cr_flags & CLF_VERSION;
-
- return (struct changelog_ext_rename *)((char *)rec +
- changelog_rec_offset(crf));
-}
-
-/* The jobid follows the rename extension, if present */
-static inline
-struct changelog_ext_jobid *changelog_rec_jobid(struct changelog_rec *rec)
-{
- enum changelog_rec_flags crf = rec->cr_flags &
- (CLF_VERSION | CLF_RENAME);
-
- return (struct changelog_ext_jobid *)((char *)rec +
- changelog_rec_offset(crf));
-}
-
-/* The name follows the rename and jobid extensions, if present */
-static inline char *changelog_rec_name(struct changelog_rec *rec)
-{
- return (char *)rec + changelog_rec_offset(rec->cr_flags &
- CLF_SUPPORTED);
-}
-
-static inline size_t changelog_rec_snamelen(struct changelog_rec *rec)
-{
- return rec->cr_namelen - strlen(changelog_rec_name(rec)) - 1;
-}
-
-static inline char *changelog_rec_sname(struct changelog_rec *rec)
-{
- char *cr_name = changelog_rec_name(rec);
-
- return cr_name + strlen(cr_name) + 1;
-}
-
-/**
- * Remap a record to the desired format as specified by the crf flags.
- * The record must be big enough to contain the final remapped version.
- * Superfluous extension fields are removed and missing ones are added
- * and zeroed. The flags of the record are updated accordingly.
- *
- * The jobid and rename extensions can be added to a record, to match the
- * format an application expects, typically. In this case, the newly added
- * fields will be zeroed.
- * The Jobid field can be removed, to guarantee compatibility with older
- * clients that don't expect this field in the records they process.
- *
- * The following assumptions are being made:
- * - CLF_RENAME will not be removed
- * - CLF_JOBID will not be added without CLF_RENAME being added too
- *
- * @param[in,out] rec The record to remap.
- * @param[in] crf_wanted Flags describing the desired extensions.
- */
-static inline void changelog_remap_rec(struct changelog_rec *rec,
- enum changelog_rec_flags crf_wanted)
-{
- char *jid_mov, *rnm_mov;
-
- crf_wanted &= CLF_SUPPORTED;
-
- if ((rec->cr_flags & CLF_SUPPORTED) == crf_wanted)
- return;
-
- /* First move the variable-length name field */
- memmove((char *)rec + changelog_rec_offset(crf_wanted),
- changelog_rec_name(rec), rec->cr_namelen);
-
- /* Locations of jobid and rename extensions in the remapped record */
- jid_mov = (char *)rec +
- changelog_rec_offset(crf_wanted & ~CLF_JOBID);
- rnm_mov = (char *)rec +
- changelog_rec_offset(crf_wanted & ~(CLF_JOBID | CLF_RENAME));
-
- /* Move the extension fields to the desired positions */
- if ((crf_wanted & CLF_JOBID) && (rec->cr_flags & CLF_JOBID))
- memmove(jid_mov, changelog_rec_jobid(rec),
- sizeof(struct changelog_ext_jobid));
-
- if ((crf_wanted & CLF_RENAME) && (rec->cr_flags & CLF_RENAME))
- memmove(rnm_mov, changelog_rec_rename(rec),
- sizeof(struct changelog_ext_rename));
-
- /* Clear newly added fields */
- if ((crf_wanted & CLF_JOBID) && !(rec->cr_flags & CLF_JOBID))
- memset(jid_mov, 0, sizeof(struct changelog_ext_jobid));
-
- if ((crf_wanted & CLF_RENAME) && !(rec->cr_flags & CLF_RENAME))
- memset(rnm_mov, 0, sizeof(struct changelog_ext_rename));
-
- /* Update the record's flags accordingly */
- rec->cr_flags = (rec->cr_flags & CLF_FLAGMASK) | crf_wanted;
-}
-
-struct ioc_changelog {
- __u64 icc_recno;
- __u32 icc_mdtindex;
- __u32 icc_id;
- __u32 icc_flags;
-};
-
-enum changelog_message_type {
- CL_RECORD = 10, /* message is a changelog_rec */
- CL_EOF = 11, /* at end of current changelog */
-};
-
-/********* Misc **********/
-
-struct ioc_data_version {
- __u64 idv_version;
- __u64 idv_flags; /* See LL_DV_xxx */
-};
-
-#define LL_DV_RD_FLUSH (1 << 0) /* Flush dirty pages from clients */
-#define LL_DV_WR_FLUSH (1 << 1) /* Flush all caching pages from clients */
-
-#ifndef offsetof
-# define offsetof(typ, memb) ((unsigned long)((char *)&(((typ *)0)->memb)))
-#endif
-
-#define dot_lustre_name ".lustre"
-
-/********* HSM **********/
-
-/** HSM per-file state
- * See HSM_FLAGS below.
- */
-enum hsm_states {
- HS_NONE = 0x00000000,
- HS_EXISTS = 0x00000001,
- HS_DIRTY = 0x00000002,
- HS_RELEASED = 0x00000004,
- HS_ARCHIVED = 0x00000008,
- HS_NORELEASE = 0x00000010,
- HS_NOARCHIVE = 0x00000020,
- HS_LOST = 0x00000040,
-};
-
-/* HSM user-setable flags. */
-#define HSM_USER_MASK (HS_NORELEASE | HS_NOARCHIVE | HS_DIRTY)
-
-/* Other HSM flags. */
-#define HSM_STATUS_MASK (HS_EXISTS | HS_LOST | HS_RELEASED | HS_ARCHIVED)
-
-/*
- * All HSM-related possible flags that could be applied to a file.
- * This should be kept in sync with hsm_states.
- */
-#define HSM_FLAGS_MASK (HSM_USER_MASK | HSM_STATUS_MASK)
-
-/**
- * HSM request progress state
- */
-enum hsm_progress_states {
- HPS_WAITING = 1,
- HPS_RUNNING = 2,
- HPS_DONE = 3,
-};
-
-#define HPS_NONE 0
-
-static inline char *hsm_progress_state2name(enum hsm_progress_states s)
-{
- switch (s) {
- case HPS_WAITING: return "waiting";
- case HPS_RUNNING: return "running";
- case HPS_DONE: return "done";
- default: return "unknown";
- }
-}
-
-struct hsm_extent {
- __u64 offset;
- __u64 length;
-} __packed;
-
-/**
- * Current HSM states of a Lustre file.
- *
- * This structure purpose is to be sent to user-space mainly. It describes the
- * current HSM flags and in-progress action.
- */
-struct hsm_user_state {
- /** Current HSM states, from enum hsm_states. */
- __u32 hus_states;
- __u32 hus_archive_id;
- /** The current undergoing action, if there is one */
- __u32 hus_in_progress_state;
- __u32 hus_in_progress_action;
- struct hsm_extent hus_in_progress_location;
- char hus_extended_info[];
-};
-
-struct hsm_state_set_ioc {
- struct lu_fid hssi_fid;
- __u64 hssi_setmask;
- __u64 hssi_clearmask;
-};
-
-/*
- * This structure describes the current in-progress action for a file.
- * it is returned to user space and send over the wire
- */
-struct hsm_current_action {
- /** The current undergoing action, if there is one */
- /* state is one of hsm_progress_states */
- __u32 hca_state;
- /* action is one of hsm_user_action */
- __u32 hca_action;
- struct hsm_extent hca_location;
-};
-
-/***** HSM user requests ******/
-/* User-generated (lfs/ioctl) request types */
-enum hsm_user_action {
- HUA_NONE = 1, /* no action (noop) */
- HUA_ARCHIVE = 10, /* copy to hsm */
- HUA_RESTORE = 11, /* prestage */
- HUA_RELEASE = 12, /* drop ost objects */
- HUA_REMOVE = 13, /* remove from archive */
- HUA_CANCEL = 14 /* cancel a request */
-};
-
-static inline char *hsm_user_action2name(enum hsm_user_action a)
-{
- switch (a) {
- case HUA_NONE: return "NOOP";
- case HUA_ARCHIVE: return "ARCHIVE";
- case HUA_RESTORE: return "RESTORE";
- case HUA_RELEASE: return "RELEASE";
- case HUA_REMOVE: return "REMOVE";
- case HUA_CANCEL: return "CANCEL";
- default: return "UNKNOWN";
- }
-}
-
-/*
- * List of hr_flags (bit field)
- */
-#define HSM_FORCE_ACTION 0x0001
-/* used by CT, connot be set by user */
-#define HSM_GHOST_COPY 0x0002
-
-/**
- * Contains all the fixed part of struct hsm_user_request.
- *
- */
-struct hsm_request {
- __u32 hr_action; /* enum hsm_user_action */
- __u32 hr_archive_id; /* archive id, used only with HUA_ARCHIVE */
- __u64 hr_flags; /* request flags */
- __u32 hr_itemcount; /* item count in hur_user_item vector */
- __u32 hr_data_len;
-};
-
-struct hsm_user_item {
- struct lu_fid hui_fid;
- struct hsm_extent hui_extent;
-} __packed;
-
-struct hsm_user_request {
- struct hsm_request hur_request;
- struct hsm_user_item hur_user_item[0];
- /* extra data blob at end of struct (after all
- * hur_user_items), only use helpers to access it
- */
-} __packed;
-
-/** Return pointer to data field in a hsm user request */
-static inline void *hur_data(struct hsm_user_request *hur)
-{
- return &hur->hur_user_item[hur->hur_request.hr_itemcount];
-}
-
-/**
- * Compute the current length of the provided hsm_user_request. This returns -1
- * instead of an errno because ssize_t is defined to be only [ -1, SSIZE_MAX ]
- *
- * return -1 on bounds check error.
- */
-static inline ssize_t hur_len(struct hsm_user_request *hur)
-{
- __u64 size;
-
- /* can't overflow a __u64 since hr_itemcount is only __u32 */
- size = offsetof(struct hsm_user_request, hur_user_item[0]) +
- (__u64)hur->hur_request.hr_itemcount *
- sizeof(hur->hur_user_item[0]) + hur->hur_request.hr_data_len;
-
- if (size != (ssize_t)size)
- return -1;
-
- return size;
-}
-
-/****** HSM RPCs to copytool *****/
-/* Message types the copytool may receive */
-enum hsm_message_type {
- HMT_ACTION_LIST = 100, /* message is a hsm_action_list */
-};
-
-/* Actions the copytool may be instructed to take for a given action_item */
-enum hsm_copytool_action {
- HSMA_NONE = 10, /* no action */
- HSMA_ARCHIVE = 20, /* arbitrary offset */
- HSMA_RESTORE = 21,
- HSMA_REMOVE = 22,
- HSMA_CANCEL = 23
-};
-
-static inline char *hsm_copytool_action2name(enum hsm_copytool_action a)
-{
- switch (a) {
- case HSMA_NONE: return "NOOP";
- case HSMA_ARCHIVE: return "ARCHIVE";
- case HSMA_RESTORE: return "RESTORE";
- case HSMA_REMOVE: return "REMOVE";
- case HSMA_CANCEL: return "CANCEL";
- default: return "UNKNOWN";
- }
-}
-
-/* Copytool item action description */
-struct hsm_action_item {
- __u32 hai_len; /* valid size of this struct */
- __u32 hai_action; /* hsm_copytool_action, but use known size */
- struct lu_fid hai_fid; /* Lustre FID to operated on */
- struct lu_fid hai_dfid; /* fid used for data access */
- struct hsm_extent hai_extent; /* byte range to operate on */
- __u64 hai_cookie; /* action cookie from coordinator */
- __u64 hai_gid; /* grouplock id */
- char hai_data[0]; /* variable length */
-} __packed;
-
-/*
- * helper function which print in hexa the first bytes of
- * hai opaque field
- * \param hai [IN] record to print
- * \param buffer [OUT] output buffer
- * \param len [IN] max buffer len
- * \retval buffer
- */
-static inline char *hai_dump_data_field(struct hsm_action_item *hai,
- char *buffer, size_t len)
-{
- int i, data_len;
- char *ptr;
-
- ptr = buffer;
- data_len = hai->hai_len - sizeof(*hai);
- for (i = 0; (i < data_len) && (len > 2); i++) {
- snprintf(ptr, 3, "%02X", (unsigned char)hai->hai_data[i]);
- ptr += 2;
- len -= 2;
- }
-
- *ptr = '\0';
-
- return buffer;
-}
-
-/* Copytool action list */
-#define HAL_VERSION 1
-#define HAL_MAXSIZE LNET_MTU /* bytes, used in userspace only */
-struct hsm_action_list {
- __u32 hal_version;
- __u32 hal_count; /* number of hai's to follow */
- __u64 hal_compound_id; /* returned by coordinator */
- __u64 hal_flags;
- __u32 hal_archive_id; /* which archive backend */
- __u32 padding1;
- char hal_fsname[0]; /* null-terminated */
- /* struct hsm_action_item[hal_count] follows, aligned on 8-byte
- * boundaries. See hai_first
- */
-} __packed;
-
-#ifndef HAVE_CFS_SIZE_ROUND
-static inline int cfs_size_round(int val)
-{
- return (val + 7) & (~0x7);
-}
-
-#define HAVE_CFS_SIZE_ROUND
-#endif
-
-/* Return pointer to first hai in action list */
-static inline struct hsm_action_item *hai_first(struct hsm_action_list *hal)
-{
- return (struct hsm_action_item *)(hal->hal_fsname +
- cfs_size_round(strlen(hal-> \
- hal_fsname)
- + 1));
-}
-
-/* Return pointer to next hai */
-static inline struct hsm_action_item *hai_next(struct hsm_action_item *hai)
-{
- return (struct hsm_action_item *)((char *)hai +
- cfs_size_round(hai->hai_len));
-}
-
-/* Return size of an hsm_action_list */
-static inline int hal_size(struct hsm_action_list *hal)
-{
- int i, sz;
- struct hsm_action_item *hai;
-
- sz = sizeof(*hal) + cfs_size_round(strlen(hal->hal_fsname) + 1);
- hai = hai_first(hal);
- for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai))
- sz += cfs_size_round(hai->hai_len);
-
- return sz;
-}
-
-/* HSM file import
- * describe the attributes to be set on imported file
- */
-struct hsm_user_import {
- __u64 hui_size;
- __u64 hui_atime;
- __u64 hui_mtime;
- __u32 hui_atime_ns;
- __u32 hui_mtime_ns;
- __u32 hui_uid;
- __u32 hui_gid;
- __u32 hui_mode;
- __u32 hui_archive_id;
-};
-
-/* Copytool progress reporting */
-#define HP_FLAG_COMPLETED 0x01
-#define HP_FLAG_RETRY 0x02
-
-struct hsm_progress {
- struct lu_fid hp_fid;
- __u64 hp_cookie;
- struct hsm_extent hp_extent;
- __u16 hp_flags;
- __u16 hp_errval; /* positive val */
- __u32 padding;
-};
-
-struct hsm_copy {
- __u64 hc_data_version;
- __u16 hc_flags;
- __u16 hc_errval; /* positive val */
- __u32 padding;
- struct hsm_action_item hc_hai;
-};
-
-/** @} lustreuser */
-
-#endif /* _LUSTRE_USER_H */
diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ver.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ver.h
deleted file mode 100644
index 19c9135e2273..000000000000
--- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ver.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef _LUSTRE_VER_H_
-#define _LUSTRE_VER_H_
-
-#define LUSTRE_MAJOR 2
-#define LUSTRE_MINOR 6
-#define LUSTRE_PATCH 99
-#define LUSTRE_FIX 0
-#define LUSTRE_VERSION_STRING "2.6.99"
-
-#define OBD_OCD_VERSION(major, minor, patch, fix) \
- (((major) << 24) + ((minor) << 16) + ((patch) << 8) + (fix))
-
-#define OBD_OCD_VERSION_MAJOR(version) ((int)((version) >> 24) & 255)
-#define OBD_OCD_VERSION_MINOR(version) ((int)((version) >> 16) & 255)
-#define OBD_OCD_VERSION_PATCH(version) ((int)((version) >> 8) & 255)
-#define OBD_OCD_VERSION_FIX(version) ((int)((version) >> 0) & 255)
-
-#define LUSTRE_VERSION_CODE \
- OBD_OCD_VERSION(LUSTRE_MAJOR, LUSTRE_MINOR, LUSTRE_PATCH, LUSTRE_FIX)
-
-/*
- * If lustre version of client and servers it connects to differs by more
- * than this amount, client would issue a warning.
- */
-#define LUSTRE_VERSION_OFFSET_WARN OBD_OCD_VERSION(0, 4, 0, 0)
-
-#endif
diff --git a/drivers/staging/lustre/lnet/Kconfig b/drivers/staging/lustre/lnet/Kconfig
deleted file mode 100644
index ad049e6f24e4..000000000000
--- a/drivers/staging/lustre/lnet/Kconfig
+++ /dev/null
@@ -1,46 +0,0 @@
-config LNET
- tristate "Lustre networking subsystem (LNet)"
- depends on INET
- help
- The Lustre network layer, also known as LNet, is a networking abstaction
- level API that was initially created to allow Lustre Filesystem to utilize
- very different networks like tcp and ib verbs in a uniform way. In the
- case of Lustre routers only the LNet layer is required. Lately other
- projects are also looking into using LNet as their networking API as well.
-
-config LNET_MAX_PAYLOAD
- int "Lustre lnet max transfer payload (default 1MB)"
- depends on LNET
- default "1048576"
- help
- This option defines the maximum size of payload in bytes that lnet
- can put into its transport.
-
- If unsure, use default.
-
-config LNET_SELFTEST
- tristate "Lustre networking self testing"
- depends on LNET
- help
- Choose Y here if you want to do lnet self testing. To compile this
- as a module, choose M here: the module will be called lnet_selftest.
-
- To compile this as a kernel modules, choose M here and it will be
- called lnet_selftest.
-
- If unsure, say N.
-
- See also http://wiki.lustre.org/
-
-config LNET_XPRT_IB
- tristate "LNET infiniband support"
- depends on LNET && PCI && INFINIBAND && INFINIBAND_ADDR_TRANS
- default LNET && INFINIBAND
- help
- This option allows the LNET users to use infiniband as an
- RDMA-enabled transport.
-
- To compile this as a kernel module, choose M here and it will be
- called ko2iblnd.
-
- If unsure, say N.
diff --git a/drivers/staging/lustre/lnet/Makefile b/drivers/staging/lustre/lnet/Makefile
deleted file mode 100644
index 0a380fe88ce8..000000000000
--- a/drivers/staging/lustre/lnet/Makefile
+++ /dev/null
@@ -1 +0,0 @@
-obj-$(CONFIG_LNET) += libcfs/ lnet/ klnds/ selftest/
diff --git a/drivers/staging/lustre/lnet/klnds/Makefile b/drivers/staging/lustre/lnet/klnds/Makefile
deleted file mode 100644
index c23e4f67f837..000000000000
--- a/drivers/staging/lustre/lnet/klnds/Makefile
+++ /dev/null
@@ -1 +0,0 @@
-obj-$(CONFIG_LNET) += o2iblnd/ socklnd/
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile b/drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile
deleted file mode 100644
index 4affe1d79948..000000000000
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LNET_XPRT_IB) += ko2iblnd.o
-ko2iblnd-y := o2iblnd.o o2iblnd_cb.o o2iblnd_modparams.o
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
deleted file mode 100644
index f0b4eb42bc1d..000000000000
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
+++ /dev/null
@@ -1,2958 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/o2iblnd/o2iblnd.c
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include <asm/div64.h>
-#include <asm/page.h>
-#include "o2iblnd.h"
-
-static struct lnet_lnd the_o2iblnd;
-
-struct kib_data kiblnd_data;
-
-static __u32 kiblnd_cksum(void *ptr, int nob)
-{
- char *c = ptr;
- __u32 sum = 0;
-
- while (nob-- > 0)
- sum = ((sum << 1) | (sum >> 31)) + *c++;
-
- /* ensure I don't return 0 (== no checksum) */
- return !sum ? 1 : sum;
-}
-
-static char *kiblnd_msgtype2str(int type)
-{
- switch (type) {
- case IBLND_MSG_CONNREQ:
- return "CONNREQ";
-
- case IBLND_MSG_CONNACK:
- return "CONNACK";
-
- case IBLND_MSG_NOOP:
- return "NOOP";
-
- case IBLND_MSG_IMMEDIATE:
- return "IMMEDIATE";
-
- case IBLND_MSG_PUT_REQ:
- return "PUT_REQ";
-
- case IBLND_MSG_PUT_NAK:
- return "PUT_NAK";
-
- case IBLND_MSG_PUT_ACK:
- return "PUT_ACK";
-
- case IBLND_MSG_PUT_DONE:
- return "PUT_DONE";
-
- case IBLND_MSG_GET_REQ:
- return "GET_REQ";
-
- case IBLND_MSG_GET_DONE:
- return "GET_DONE";
-
- default:
- return "???";
- }
-}
-
-static int kiblnd_msgtype2size(int type)
-{
- const int hdr_size = offsetof(struct kib_msg, ibm_u);
-
- switch (type) {
- case IBLND_MSG_CONNREQ:
- case IBLND_MSG_CONNACK:
- return hdr_size + sizeof(struct kib_connparams);
-
- case IBLND_MSG_NOOP:
- return hdr_size;
-
- case IBLND_MSG_IMMEDIATE:
- return offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[0]);
-
- case IBLND_MSG_PUT_REQ:
- return hdr_size + sizeof(struct kib_putreq_msg);
-
- case IBLND_MSG_PUT_ACK:
- return hdr_size + sizeof(struct kib_putack_msg);
-
- case IBLND_MSG_GET_REQ:
- return hdr_size + sizeof(struct kib_get_msg);
-
- case IBLND_MSG_PUT_NAK:
- case IBLND_MSG_PUT_DONE:
- case IBLND_MSG_GET_DONE:
- return hdr_size + sizeof(struct kib_completion_msg);
- default:
- return -1;
- }
-}
-
-static int kiblnd_unpack_rd(struct kib_msg *msg, int flip)
-{
- struct kib_rdma_desc *rd;
- int msg_size;
- int nob;
- int n;
- int i;
-
- LASSERT(msg->ibm_type == IBLND_MSG_GET_REQ ||
- msg->ibm_type == IBLND_MSG_PUT_ACK);
-
- rd = msg->ibm_type == IBLND_MSG_GET_REQ ?
- &msg->ibm_u.get.ibgm_rd :
- &msg->ibm_u.putack.ibpam_rd;
-
- if (flip) {
- __swab32s(&rd->rd_key);
- __swab32s(&rd->rd_nfrags);
- }
-
- n = rd->rd_nfrags;
-
- nob = offsetof(struct kib_msg, ibm_u) +
- kiblnd_rd_msg_size(rd, msg->ibm_type, n);
-
- if (msg->ibm_nob < nob) {
- CERROR("Short %s: %d(%d)\n",
- kiblnd_msgtype2str(msg->ibm_type), msg->ibm_nob, nob);
- return 1;
- }
-
- msg_size = kiblnd_rd_size(rd);
- if (msg_size <= 0 || msg_size > LNET_MAX_PAYLOAD) {
- CERROR("Bad msg_size: %d, should be 0 < n <= %d\n",
- msg_size, LNET_MAX_PAYLOAD);
- return 1;
- }
-
- if (!flip)
- return 0;
-
- for (i = 0; i < n; i++) {
- __swab32s(&rd->rd_frags[i].rf_nob);
- __swab64s(&rd->rd_frags[i].rf_addr);
- }
-
- return 0;
-}
-
-void kiblnd_pack_msg(struct lnet_ni *ni, struct kib_msg *msg, int version,
- int credits, lnet_nid_t dstnid, __u64 dststamp)
-{
- struct kib_net *net = ni->ni_data;
-
- /*
- * CAVEAT EMPTOR! all message fields not set here should have been
- * initialised previously.
- */
- msg->ibm_magic = IBLND_MSG_MAGIC;
- msg->ibm_version = version;
- /* ibm_type */
- msg->ibm_credits = credits;
- /* ibm_nob */
- msg->ibm_cksum = 0;
- msg->ibm_srcnid = ni->ni_nid;
- msg->ibm_srcstamp = net->ibn_incarnation;
- msg->ibm_dstnid = dstnid;
- msg->ibm_dststamp = dststamp;
-
- if (*kiblnd_tunables.kib_cksum) {
- /* NB ibm_cksum zero while computing cksum */
- msg->ibm_cksum = kiblnd_cksum(msg, msg->ibm_nob);
- }
-}
-
-int kiblnd_unpack_msg(struct kib_msg *msg, int nob)
-{
- const int hdr_size = offsetof(struct kib_msg, ibm_u);
- __u32 msg_cksum;
- __u16 version;
- int msg_nob;
- int flip;
-
- /* 6 bytes are enough to have received magic + version */
- if (nob < 6) {
- CERROR("Short message: %d\n", nob);
- return -EPROTO;
- }
-
- if (msg->ibm_magic == IBLND_MSG_MAGIC) {
- flip = 0;
- } else if (msg->ibm_magic == __swab32(IBLND_MSG_MAGIC)) {
- flip = 1;
- } else {
- CERROR("Bad magic: %08x\n", msg->ibm_magic);
- return -EPROTO;
- }
-
- version = flip ? __swab16(msg->ibm_version) : msg->ibm_version;
- if (version != IBLND_MSG_VERSION &&
- version != IBLND_MSG_VERSION_1) {
- CERROR("Bad version: %x\n", version);
- return -EPROTO;
- }
-
- if (nob < hdr_size) {
- CERROR("Short message: %d\n", nob);
- return -EPROTO;
- }
-
- msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob;
- if (msg_nob > nob) {
- CERROR("Short message: got %d, wanted %d\n", nob, msg_nob);
- return -EPROTO;
- }
-
- /*
- * checksum must be computed with ibm_cksum zero and BEFORE anything
- * gets flipped
- */
- msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum;
- msg->ibm_cksum = 0;
- if (msg_cksum &&
- msg_cksum != kiblnd_cksum(msg, msg_nob)) {
- CERROR("Bad checksum\n");
- return -EPROTO;
- }
-
- msg->ibm_cksum = msg_cksum;
-
- if (flip) {
- /* leave magic unflipped as a clue to peer endianness */
- msg->ibm_version = version;
- BUILD_BUG_ON(sizeof(msg->ibm_type) != 1);
- BUILD_BUG_ON(sizeof(msg->ibm_credits) != 1);
- msg->ibm_nob = msg_nob;
- __swab64s(&msg->ibm_srcnid);
- __swab64s(&msg->ibm_srcstamp);
- __swab64s(&msg->ibm_dstnid);
- __swab64s(&msg->ibm_dststamp);
- }
-
- if (msg->ibm_srcnid == LNET_NID_ANY) {
- CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid));
- return -EPROTO;
- }
-
- if (msg_nob < kiblnd_msgtype2size(msg->ibm_type)) {
- CERROR("Short %s: %d(%d)\n", kiblnd_msgtype2str(msg->ibm_type),
- msg_nob, kiblnd_msgtype2size(msg->ibm_type));
- return -EPROTO;
- }
-
- switch (msg->ibm_type) {
- default:
- CERROR("Unknown message type %x\n", msg->ibm_type);
- return -EPROTO;
-
- case IBLND_MSG_NOOP:
- case IBLND_MSG_IMMEDIATE:
- case IBLND_MSG_PUT_REQ:
- break;
-
- case IBLND_MSG_PUT_ACK:
- case IBLND_MSG_GET_REQ:
- if (kiblnd_unpack_rd(msg, flip))
- return -EPROTO;
- break;
-
- case IBLND_MSG_PUT_NAK:
- case IBLND_MSG_PUT_DONE:
- case IBLND_MSG_GET_DONE:
- if (flip)
- __swab32s(&msg->ibm_u.completion.ibcm_status);
- break;
-
- case IBLND_MSG_CONNREQ:
- case IBLND_MSG_CONNACK:
- if (flip) {
- __swab16s(&msg->ibm_u.connparams.ibcp_queue_depth);
- __swab16s(&msg->ibm_u.connparams.ibcp_max_frags);
- __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size);
- }
- break;
- }
- return 0;
-}
-
-int kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer **peerp,
- lnet_nid_t nid)
-{
- struct kib_peer *peer;
- struct kib_net *net = ni->ni_data;
- int cpt = lnet_cpt_of_nid(nid);
- unsigned long flags;
-
- LASSERT(net);
- LASSERT(nid != LNET_NID_ANY);
-
- peer = kzalloc_cpt(sizeof(*peer), GFP_NOFS, cpt);
- if (!peer) {
- CERROR("Cannot allocate peer\n");
- return -ENOMEM;
- }
-
- peer->ibp_ni = ni;
- peer->ibp_nid = nid;
- peer->ibp_error = 0;
- peer->ibp_last_alive = 0;
- peer->ibp_max_frags = kiblnd_cfg_rdma_frags(peer->ibp_ni);
- peer->ibp_queue_depth = ni->ni_peertxcredits;
- atomic_set(&peer->ibp_refcount, 1); /* 1 ref for caller */
-
- INIT_LIST_HEAD(&peer->ibp_list); /* not in the peer table yet */
- INIT_LIST_HEAD(&peer->ibp_conns);
- INIT_LIST_HEAD(&peer->ibp_tx_queue);
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- /* always called with a ref on ni, which prevents ni being shutdown */
- LASSERT(!net->ibn_shutdown);
-
- /* npeers only grows with the global lock held */
- atomic_inc(&net->ibn_npeers);
-
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- *peerp = peer;
- return 0;
-}
-
-void kiblnd_destroy_peer(struct kib_peer *peer)
-{
- struct kib_net *net = peer->ibp_ni->ni_data;
-
- LASSERT(net);
- LASSERT(!atomic_read(&peer->ibp_refcount));
- LASSERT(!kiblnd_peer_active(peer));
- LASSERT(kiblnd_peer_idle(peer));
- LASSERT(list_empty(&peer->ibp_tx_queue));
-
- kfree(peer);
-
- /*
- * NB a peer's connections keep a reference on their peer until
- * they are destroyed, so we can be assured that _all_ state to do
- * with this peer has been cleaned up when its refcount drops to
- * zero.
- */
- atomic_dec(&net->ibn_npeers);
-}
-
-struct kib_peer *kiblnd_find_peer_locked(lnet_nid_t nid)
-{
- /*
- * the caller is responsible for accounting the additional reference
- * that this creates
- */
- struct list_head *peer_list = kiblnd_nid2peerlist(nid);
- struct list_head *tmp;
- struct kib_peer *peer;
-
- list_for_each(tmp, peer_list) {
- peer = list_entry(tmp, struct kib_peer, ibp_list);
- LASSERT(!kiblnd_peer_idle(peer));
-
- if (peer->ibp_nid != nid)
- continue;
-
- CDEBUG(D_NET, "got peer [%p] -> %s (%d) version: %x\n",
- peer, libcfs_nid2str(nid),
- atomic_read(&peer->ibp_refcount),
- peer->ibp_version);
- return peer;
- }
- return NULL;
-}
-
-void kiblnd_unlink_peer_locked(struct kib_peer *peer)
-{
- LASSERT(list_empty(&peer->ibp_conns));
-
- LASSERT(kiblnd_peer_active(peer));
- list_del_init(&peer->ibp_list);
- /* lose peerlist's ref */
- kiblnd_peer_decref(peer);
-}
-
-static int kiblnd_get_peer_info(struct lnet_ni *ni, int index,
- lnet_nid_t *nidp, int *count)
-{
- struct kib_peer *peer;
- struct list_head *ptmp;
- int i;
- unsigned long flags;
-
- read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
- list_for_each(ptmp, &kiblnd_data.kib_peers[i]) {
- peer = list_entry(ptmp, struct kib_peer, ibp_list);
- LASSERT(!kiblnd_peer_idle(peer));
-
- if (peer->ibp_ni != ni)
- continue;
-
- if (index-- > 0)
- continue;
-
- *nidp = peer->ibp_nid;
- *count = atomic_read(&peer->ibp_refcount);
-
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock,
- flags);
- return 0;
- }
- }
-
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
- return -ENOENT;
-}
-
-static void kiblnd_del_peer_locked(struct kib_peer *peer)
-{
- struct list_head *ctmp;
- struct list_head *cnxt;
- struct kib_conn *conn;
-
- if (list_empty(&peer->ibp_conns)) {
- kiblnd_unlink_peer_locked(peer);
- } else {
- list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry(ctmp, struct kib_conn, ibc_list);
-
- kiblnd_close_conn_locked(conn, 0);
- }
- /* NB closing peer's last conn unlinked it. */
- }
- /*
- * NB peer now unlinked; might even be freed if the peer table had the
- * last ref on it.
- */
-}
-
-static int kiblnd_del_peer(struct lnet_ni *ni, lnet_nid_t nid)
-{
- LIST_HEAD(zombies);
- struct list_head *ptmp;
- struct list_head *pnxt;
- struct kib_peer *peer;
- int lo;
- int hi;
- int i;
- unsigned long flags;
- int rc = -ENOENT;
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- if (nid != LNET_NID_ANY) {
- lo = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers;
- hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers;
- } else {
- lo = 0;
- hi = kiblnd_data.kib_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
- peer = list_entry(ptmp, struct kib_peer, ibp_list);
- LASSERT(!kiblnd_peer_idle(peer));
-
- if (peer->ibp_ni != ni)
- continue;
-
- if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid))
- continue;
-
- if (!list_empty(&peer->ibp_tx_queue)) {
- LASSERT(list_empty(&peer->ibp_conns));
-
- list_splice_init(&peer->ibp_tx_queue,
- &zombies);
- }
-
- kiblnd_del_peer_locked(peer);
- rc = 0; /* matched something */
- }
- }
-
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- kiblnd_txlist_done(ni, &zombies, -EIO);
-
- return rc;
-}
-
-static struct kib_conn *kiblnd_get_conn_by_idx(struct lnet_ni *ni, int index)
-{
- struct kib_peer *peer;
- struct list_head *ptmp;
- struct kib_conn *conn;
- struct list_head *ctmp;
- int i;
- unsigned long flags;
-
- read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
- list_for_each(ptmp, &kiblnd_data.kib_peers[i]) {
- peer = list_entry(ptmp, struct kib_peer, ibp_list);
- LASSERT(!kiblnd_peer_idle(peer));
-
- if (peer->ibp_ni != ni)
- continue;
-
- list_for_each(ctmp, &peer->ibp_conns) {
- if (index-- > 0)
- continue;
-
- conn = list_entry(ctmp, struct kib_conn,
- ibc_list);
- kiblnd_conn_addref(conn);
- read_unlock_irqrestore(
- &kiblnd_data.kib_global_lock,
- flags);
- return conn;
- }
- }
- }
-
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
- return NULL;
-}
-
-int kiblnd_translate_mtu(int value)
-{
- switch (value) {
- default:
- return -1;
- case 0:
- return 0;
- case 256:
- return IB_MTU_256;
- case 512:
- return IB_MTU_512;
- case 1024:
- return IB_MTU_1024;
- case 2048:
- return IB_MTU_2048;
- case 4096:
- return IB_MTU_4096;
- }
-}
-
-static void kiblnd_setup_mtu_locked(struct rdma_cm_id *cmid)
-{
- int mtu;
-
- /* XXX There is no path record for iWARP, set by netdev->change_mtu? */
- if (!cmid->route.path_rec)
- return;
-
- mtu = kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu);
- LASSERT(mtu >= 0);
- if (mtu)
- cmid->route.path_rec->mtu = mtu;
-}
-
-static int kiblnd_get_completion_vector(struct kib_conn *conn, int cpt)
-{
- cpumask_var_t *mask;
- int vectors;
- int off;
- int i;
- lnet_nid_t nid = conn->ibc_peer->ibp_nid;
-
- vectors = conn->ibc_cmid->device->num_comp_vectors;
- if (vectors <= 1)
- return 0;
-
- mask = cfs_cpt_cpumask(lnet_cpt_table(), cpt);
- if (!mask)
- return 0;
-
- /* hash NID to CPU id in this partition... */
- off = do_div(nid, cpumask_weight(*mask));
- for_each_cpu(i, *mask) {
- if (!off--)
- return i % vectors;
- }
-
- LBUG();
- return 1;
-}
-
-struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, struct rdma_cm_id *cmid,
- int state, int version)
-{
- /*
- * CAVEAT EMPTOR:
- * If the new conn is created successfully it takes over the caller's
- * ref on 'peer'. It also "owns" 'cmid' and destroys it when it itself
- * is destroyed. On failure, the caller's ref on 'peer' remains and
- * she must dispose of 'cmid'. (Actually I'd block forever if I tried
- * to destroy 'cmid' here since I'm called from the CM which still has
- * its ref on 'cmid').
- */
- rwlock_t *glock = &kiblnd_data.kib_global_lock;
- struct kib_net *net = peer->ibp_ni->ni_data;
- struct kib_dev *dev;
- struct ib_qp_init_attr *init_qp_attr;
- struct kib_sched_info *sched;
- struct ib_cq_init_attr cq_attr = {};
- struct kib_conn *conn;
- struct ib_cq *cq;
- unsigned long flags;
- int cpt;
- int rc;
- int i;
-
- LASSERT(net);
- LASSERT(!in_interrupt());
-
- dev = net->ibn_dev;
-
- cpt = lnet_cpt_of_nid(peer->ibp_nid);
- sched = kiblnd_data.kib_scheds[cpt];
-
- LASSERT(sched->ibs_nthreads > 0);
-
- init_qp_attr = kzalloc_cpt(sizeof(*init_qp_attr), GFP_NOFS, cpt);
- if (!init_qp_attr) {
- CERROR("Can't allocate qp_attr for %s\n",
- libcfs_nid2str(peer->ibp_nid));
- goto failed_0;
- }
-
- conn = kzalloc_cpt(sizeof(*conn), GFP_NOFS, cpt);
- if (!conn) {
- CERROR("Can't allocate connection for %s\n",
- libcfs_nid2str(peer->ibp_nid));
- goto failed_1;
- }
-
- conn->ibc_state = IBLND_CONN_INIT;
- conn->ibc_version = version;
- conn->ibc_peer = peer; /* I take the caller's ref */
- cmid->context = conn; /* for future CM callbacks */
- conn->ibc_cmid = cmid;
- conn->ibc_max_frags = peer->ibp_max_frags;
- conn->ibc_queue_depth = peer->ibp_queue_depth;
-
- INIT_LIST_HEAD(&conn->ibc_early_rxs);
- INIT_LIST_HEAD(&conn->ibc_tx_noops);
- INIT_LIST_HEAD(&conn->ibc_tx_queue);
- INIT_LIST_HEAD(&conn->ibc_tx_queue_rsrvd);
- INIT_LIST_HEAD(&conn->ibc_tx_queue_nocred);
- INIT_LIST_HEAD(&conn->ibc_active_txs);
- spin_lock_init(&conn->ibc_lock);
-
- conn->ibc_connvars = kzalloc_cpt(sizeof(*conn->ibc_connvars), GFP_NOFS, cpt);
- if (!conn->ibc_connvars) {
- CERROR("Can't allocate in-progress connection state\n");
- goto failed_2;
- }
-
- write_lock_irqsave(glock, flags);
- if (dev->ibd_failover) {
- write_unlock_irqrestore(glock, flags);
- CERROR("%s: failover in progress\n", dev->ibd_ifname);
- goto failed_2;
- }
-
- if (dev->ibd_hdev->ibh_ibdev != cmid->device) {
- /* wakeup failover thread and teardown connection */
- if (kiblnd_dev_can_failover(dev)) {
- list_add_tail(&dev->ibd_fail_list,
- &kiblnd_data.kib_failed_devs);
- wake_up(&kiblnd_data.kib_failover_waitq);
- }
-
- write_unlock_irqrestore(glock, flags);
- CERROR("cmid HCA(%s), kib_dev(%s) need failover\n",
- cmid->device->name, dev->ibd_ifname);
- goto failed_2;
- }
-
- kiblnd_hdev_addref_locked(dev->ibd_hdev);
- conn->ibc_hdev = dev->ibd_hdev;
-
- kiblnd_setup_mtu_locked(cmid);
-
- write_unlock_irqrestore(glock, flags);
-
- conn->ibc_rxs = kzalloc_cpt(IBLND_RX_MSGS(conn) * sizeof(struct kib_rx),
- GFP_NOFS, cpt);
- if (!conn->ibc_rxs) {
- CERROR("Cannot allocate RX buffers\n");
- goto failed_2;
- }
-
- rc = kiblnd_alloc_pages(&conn->ibc_rx_pages, cpt,
- IBLND_RX_MSG_PAGES(conn));
- if (rc)
- goto failed_2;
-
- kiblnd_map_rx_descs(conn);
-
- cq_attr.cqe = IBLND_CQ_ENTRIES(conn);
- cq_attr.comp_vector = kiblnd_get_completion_vector(conn, cpt);
- cq = ib_create_cq(cmid->device,
- kiblnd_cq_completion, kiblnd_cq_event, conn,
- &cq_attr);
- if (IS_ERR(cq)) {
- CERROR("Failed to create CQ with %d CQEs: %ld\n",
- IBLND_CQ_ENTRIES(conn), PTR_ERR(cq));
- goto failed_2;
- }
-
- conn->ibc_cq = cq;
-
- rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
- if (rc) {
- CERROR("Can't request completion notification: %d\n", rc);
- goto failed_2;
- }
-
- init_qp_attr->event_handler = kiblnd_qp_event;
- init_qp_attr->qp_context = conn;
- init_qp_attr->cap.max_send_wr = IBLND_SEND_WRS(conn);
- init_qp_attr->cap.max_recv_wr = IBLND_RECV_WRS(conn);
- init_qp_attr->cap.max_send_sge = 1;
- init_qp_attr->cap.max_recv_sge = 1;
- init_qp_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
- init_qp_attr->qp_type = IB_QPT_RC;
- init_qp_attr->send_cq = cq;
- init_qp_attr->recv_cq = cq;
-
- conn->ibc_sched = sched;
-
- rc = rdma_create_qp(cmid, conn->ibc_hdev->ibh_pd, init_qp_attr);
- if (rc) {
- CERROR("Can't create QP: %d, send_wr: %d, recv_wr: %d\n",
- rc, init_qp_attr->cap.max_send_wr,
- init_qp_attr->cap.max_recv_wr);
- goto failed_2;
- }
-
- kfree(init_qp_attr);
-
- /* 1 ref for caller and each rxmsg */
- atomic_set(&conn->ibc_refcount, 1 + IBLND_RX_MSGS(conn));
- conn->ibc_nrx = IBLND_RX_MSGS(conn);
-
- /* post receives */
- for (i = 0; i < IBLND_RX_MSGS(conn); i++) {
- rc = kiblnd_post_rx(&conn->ibc_rxs[i],
- IBLND_POSTRX_NO_CREDIT);
- if (rc) {
- CERROR("Can't post rxmsg: %d\n", rc);
-
- /* Make posted receives complete */
- kiblnd_abort_receives(conn);
-
- /*
- * correct # of posted buffers
- * NB locking needed now I'm racing with completion
- */
- spin_lock_irqsave(&sched->ibs_lock, flags);
- conn->ibc_nrx -= IBLND_RX_MSGS(conn) - i;
- spin_unlock_irqrestore(&sched->ibs_lock, flags);
-
- /*
- * cmid will be destroyed by CM(ofed) after cm_callback
- * returned, so we can't refer it anymore
- * (by kiblnd_connd()->kiblnd_destroy_conn)
- */
- rdma_destroy_qp(conn->ibc_cmid);
- conn->ibc_cmid = NULL;
-
- /* Drop my own and unused rxbuffer refcounts */
- while (i++ <= IBLND_RX_MSGS(conn))
- kiblnd_conn_decref(conn);
-
- return NULL;
- }
- }
-
- /* Init successful! */
- LASSERT(state == IBLND_CONN_ACTIVE_CONNECT ||
- state == IBLND_CONN_PASSIVE_WAIT);
- conn->ibc_state = state;
-
- /* 1 more conn */
- atomic_inc(&net->ibn_nconns);
- return conn;
-
- failed_2:
- kiblnd_destroy_conn(conn);
- kfree(conn);
- failed_1:
- kfree(init_qp_attr);
- failed_0:
- return NULL;
-}
-
-void kiblnd_destroy_conn(struct kib_conn *conn)
-{
- struct rdma_cm_id *cmid = conn->ibc_cmid;
- struct kib_peer *peer = conn->ibc_peer;
- int rc;
-
- LASSERT(!in_interrupt());
- LASSERT(!atomic_read(&conn->ibc_refcount));
- LASSERT(list_empty(&conn->ibc_early_rxs));
- LASSERT(list_empty(&conn->ibc_tx_noops));
- LASSERT(list_empty(&conn->ibc_tx_queue));
- LASSERT(list_empty(&conn->ibc_tx_queue_rsrvd));
- LASSERT(list_empty(&conn->ibc_tx_queue_nocred));
- LASSERT(list_empty(&conn->ibc_active_txs));
- LASSERT(!conn->ibc_noops_posted);
- LASSERT(!conn->ibc_nsends_posted);
-
- switch (conn->ibc_state) {
- default:
- /* conn must be completely disengaged from the network */
- LBUG();
-
- case IBLND_CONN_DISCONNECTED:
- /* connvars should have been freed already */
- LASSERT(!conn->ibc_connvars);
- break;
-
- case IBLND_CONN_INIT:
- break;
- }
-
- /* conn->ibc_cmid might be destroyed by CM already */
- if (cmid && cmid->qp)
- rdma_destroy_qp(cmid);
-
- if (conn->ibc_cq) {
- rc = ib_destroy_cq(conn->ibc_cq);
- if (rc)
- CWARN("Error destroying CQ: %d\n", rc);
- }
-
- if (conn->ibc_rx_pages)
- kiblnd_unmap_rx_descs(conn);
-
- kfree(conn->ibc_rxs);
- kfree(conn->ibc_connvars);
-
- if (conn->ibc_hdev)
- kiblnd_hdev_decref(conn->ibc_hdev);
-
- /* See CAVEAT EMPTOR above in kiblnd_create_conn */
- if (conn->ibc_state != IBLND_CONN_INIT) {
- struct kib_net *net = peer->ibp_ni->ni_data;
-
- kiblnd_peer_decref(peer);
- rdma_destroy_id(cmid);
- atomic_dec(&net->ibn_nconns);
- }
-}
-
-int kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why)
-{
- struct kib_conn *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- int count = 0;
-
- list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry(ctmp, struct kib_conn, ibc_list);
-
- CDEBUG(D_NET, "Closing conn -> %s, version: %x, reason: %d\n",
- libcfs_nid2str(peer->ibp_nid),
- conn->ibc_version, why);
-
- kiblnd_close_conn_locked(conn, why);
- count++;
- }
-
- return count;
-}
-
-int kiblnd_close_stale_conns_locked(struct kib_peer *peer,
- int version, __u64 incarnation)
-{
- struct kib_conn *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- int count = 0;
-
- list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry(ctmp, struct kib_conn, ibc_list);
-
- if (conn->ibc_version == version &&
- conn->ibc_incarnation == incarnation)
- continue;
-
- CDEBUG(D_NET,
- "Closing stale conn -> %s version: %x, incarnation:%#llx(%x, %#llx)\n",
- libcfs_nid2str(peer->ibp_nid),
- conn->ibc_version, conn->ibc_incarnation,
- version, incarnation);
-
- kiblnd_close_conn_locked(conn, -ESTALE);
- count++;
- }
-
- return count;
-}
-
-static int kiblnd_close_matching_conns(struct lnet_ni *ni, lnet_nid_t nid)
-{
- struct kib_peer *peer;
- struct list_head *ptmp;
- struct list_head *pnxt;
- int lo;
- int hi;
- int i;
- unsigned long flags;
- int count = 0;
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- if (nid != LNET_NID_ANY) {
- lo = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers;
- hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers;
- } else {
- lo = 0;
- hi = kiblnd_data.kib_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
- peer = list_entry(ptmp, struct kib_peer, ibp_list);
- LASSERT(!kiblnd_peer_idle(peer));
-
- if (peer->ibp_ni != ni)
- continue;
-
- if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid))
- continue;
-
- count += kiblnd_close_peer_conns_locked(peer, 0);
- }
- }
-
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- /* wildcards always succeed */
- if (nid == LNET_NID_ANY)
- return 0;
-
- return !count ? -ENOENT : 0;
-}
-
-static int kiblnd_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg)
-{
- struct libcfs_ioctl_data *data = arg;
- int rc = -EINVAL;
-
- switch (cmd) {
- case IOC_LIBCFS_GET_PEER: {
- lnet_nid_t nid = 0;
- int count = 0;
-
- rc = kiblnd_get_peer_info(ni, data->ioc_count,
- &nid, &count);
- data->ioc_nid = nid;
- data->ioc_count = count;
- break;
- }
-
- case IOC_LIBCFS_DEL_PEER: {
- rc = kiblnd_del_peer(ni, data->ioc_nid);
- break;
- }
- case IOC_LIBCFS_GET_CONN: {
- struct kib_conn *conn;
-
- rc = 0;
- conn = kiblnd_get_conn_by_idx(ni, data->ioc_count);
- if (!conn) {
- rc = -ENOENT;
- break;
- }
-
- LASSERT(conn->ibc_cmid);
- data->ioc_nid = conn->ibc_peer->ibp_nid;
- if (!conn->ibc_cmid->route.path_rec)
- data->ioc_u32[0] = 0; /* iWarp has no path MTU */
- else
- data->ioc_u32[0] =
- ib_mtu_enum_to_int(conn->ibc_cmid->route.path_rec->mtu);
- kiblnd_conn_decref(conn);
- break;
- }
- case IOC_LIBCFS_CLOSE_CONNECTION: {
- rc = kiblnd_close_matching_conns(ni, data->ioc_nid);
- break;
- }
-
- default:
- break;
- }
-
- return rc;
-}
-
-static void kiblnd_query(struct lnet_ni *ni, lnet_nid_t nid,
- unsigned long *when)
-{
- unsigned long last_alive = 0;
- unsigned long now = jiffies;
- rwlock_t *glock = &kiblnd_data.kib_global_lock;
- struct kib_peer *peer;
- unsigned long flags;
-
- read_lock_irqsave(glock, flags);
-
- peer = kiblnd_find_peer_locked(nid);
- if (peer)
- last_alive = peer->ibp_last_alive;
-
- read_unlock_irqrestore(glock, flags);
-
- if (last_alive)
- *when = last_alive;
-
- /*
- * peer is not persistent in hash, trigger peer creation
- * and connection establishment with a NULL tx
- */
- if (!peer)
- kiblnd_launch_tx(ni, NULL, nid);
-
- CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago\n",
- libcfs_nid2str(nid), peer,
- last_alive ? (now - last_alive) / HZ : -1);
-}
-
-static void kiblnd_free_pages(struct kib_pages *p)
-{
- int npages = p->ibp_npages;
- int i;
-
- for (i = 0; i < npages; i++) {
- if (p->ibp_pages[i])
- __free_page(p->ibp_pages[i]);
- }
-
- kfree(p);
-}
-
-int kiblnd_alloc_pages(struct kib_pages **pp, int cpt, int npages)
-{
- struct kib_pages *p;
- int i;
-
- p = kzalloc_cpt(offsetof(struct kib_pages, ibp_pages[npages]),
- GFP_NOFS, cpt);
- if (!p) {
- CERROR("Can't allocate descriptor for %d pages\n", npages);
- return -ENOMEM;
- }
-
- p->ibp_npages = npages;
-
- for (i = 0; i < npages; i++) {
- p->ibp_pages[i] = alloc_pages_node(
- cfs_cpt_spread_node(lnet_cpt_table(), cpt),
- GFP_NOFS, 0);
- if (!p->ibp_pages[i]) {
- CERROR("Can't allocate page %d of %d\n", i, npages);
- kiblnd_free_pages(p);
- return -ENOMEM;
- }
- }
-
- *pp = p;
- return 0;
-}
-
-void kiblnd_unmap_rx_descs(struct kib_conn *conn)
-{
- struct kib_rx *rx;
- int i;
-
- LASSERT(conn->ibc_rxs);
- LASSERT(conn->ibc_hdev);
-
- for (i = 0; i < IBLND_RX_MSGS(conn); i++) {
- rx = &conn->ibc_rxs[i];
-
- LASSERT(rx->rx_nob >= 0); /* not posted */
-
- kiblnd_dma_unmap_single(conn->ibc_hdev->ibh_ibdev,
- KIBLND_UNMAP_ADDR(rx, rx_msgunmap,
- rx->rx_msgaddr),
- IBLND_MSG_SIZE, DMA_FROM_DEVICE);
- }
-
- kiblnd_free_pages(conn->ibc_rx_pages);
-
- conn->ibc_rx_pages = NULL;
-}
-
-void kiblnd_map_rx_descs(struct kib_conn *conn)
-{
- struct kib_rx *rx;
- struct page *pg;
- int pg_off;
- int ipg;
- int i;
-
- for (pg_off = ipg = i = 0; i < IBLND_RX_MSGS(conn); i++) {
- pg = conn->ibc_rx_pages->ibp_pages[ipg];
- rx = &conn->ibc_rxs[i];
-
- rx->rx_conn = conn;
- rx->rx_msg = (struct kib_msg *)(((char *)page_address(pg)) + pg_off);
-
- rx->rx_msgaddr = kiblnd_dma_map_single(conn->ibc_hdev->ibh_ibdev,
- rx->rx_msg,
- IBLND_MSG_SIZE,
- DMA_FROM_DEVICE);
- LASSERT(!kiblnd_dma_mapping_error(conn->ibc_hdev->ibh_ibdev,
- rx->rx_msgaddr));
- KIBLND_UNMAP_ADDR_SET(rx, rx_msgunmap, rx->rx_msgaddr);
-
- CDEBUG(D_NET, "rx %d: %p %#llx(%#llx)\n",
- i, rx->rx_msg, rx->rx_msgaddr,
- (__u64)(page_to_phys(pg) + pg_off));
-
- pg_off += IBLND_MSG_SIZE;
- LASSERT(pg_off <= PAGE_SIZE);
-
- if (pg_off == PAGE_SIZE) {
- pg_off = 0;
- ipg++;
- LASSERT(ipg <= IBLND_RX_MSG_PAGES(conn));
- }
- }
-}
-
-static void kiblnd_unmap_tx_pool(struct kib_tx_pool *tpo)
-{
- struct kib_hca_dev *hdev = tpo->tpo_hdev;
- struct kib_tx *tx;
- int i;
-
- LASSERT(!tpo->tpo_pool.po_allocated);
-
- if (!hdev)
- return;
-
- for (i = 0; i < tpo->tpo_pool.po_size; i++) {
- tx = &tpo->tpo_tx_descs[i];
- kiblnd_dma_unmap_single(hdev->ibh_ibdev,
- KIBLND_UNMAP_ADDR(tx, tx_msgunmap,
- tx->tx_msgaddr),
- IBLND_MSG_SIZE, DMA_TO_DEVICE);
- }
-
- kiblnd_hdev_decref(hdev);
- tpo->tpo_hdev = NULL;
-}
-
-static struct kib_hca_dev *kiblnd_current_hdev(struct kib_dev *dev)
-{
- struct kib_hca_dev *hdev;
- unsigned long flags;
- int i = 0;
-
- read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
- while (dev->ibd_failover) {
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
- if (!(i++ % 50))
- CDEBUG(D_NET, "%s: Wait for failover\n",
- dev->ibd_ifname);
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(HZ / 100);
-
- read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
- }
-
- kiblnd_hdev_addref_locked(dev->ibd_hdev);
- hdev = dev->ibd_hdev;
-
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- return hdev;
-}
-
-static void kiblnd_map_tx_pool(struct kib_tx_pool *tpo)
-{
- struct kib_pages *txpgs = tpo->tpo_tx_pages;
- struct kib_pool *pool = &tpo->tpo_pool;
- struct kib_net *net = pool->po_owner->ps_net;
- struct kib_dev *dev;
- struct page *page;
- struct kib_tx *tx;
- int page_offset;
- int ipage;
- int i;
-
- LASSERT(net);
-
- dev = net->ibn_dev;
-
- /* pre-mapped messages are not bigger than 1 page */
- BUILD_BUG_ON(IBLND_MSG_SIZE > PAGE_SIZE);
-
- /* No fancy arithmetic when we do the buffer calculations */
- BUILD_BUG_ON(PAGE_SIZE % IBLND_MSG_SIZE);
-
- tpo->tpo_hdev = kiblnd_current_hdev(dev);
-
- for (ipage = page_offset = i = 0; i < pool->po_size; i++) {
- page = txpgs->ibp_pages[ipage];
- tx = &tpo->tpo_tx_descs[i];
-
- tx->tx_msg = (struct kib_msg *)(((char *)page_address(page)) +
- page_offset);
-
- tx->tx_msgaddr = kiblnd_dma_map_single(
- tpo->tpo_hdev->ibh_ibdev, tx->tx_msg,
- IBLND_MSG_SIZE, DMA_TO_DEVICE);
- LASSERT(!kiblnd_dma_mapping_error(tpo->tpo_hdev->ibh_ibdev,
- tx->tx_msgaddr));
- KIBLND_UNMAP_ADDR_SET(tx, tx_msgunmap, tx->tx_msgaddr);
-
- list_add(&tx->tx_list, &pool->po_free_list);
-
- page_offset += IBLND_MSG_SIZE;
- LASSERT(page_offset <= PAGE_SIZE);
-
- if (page_offset == PAGE_SIZE) {
- page_offset = 0;
- ipage++;
- LASSERT(ipage <= txpgs->ibp_npages);
- }
- }
-}
-
-static void kiblnd_destroy_fmr_pool(struct kib_fmr_pool *fpo)
-{
- LASSERT(!fpo->fpo_map_count);
-
- if (fpo->fpo_is_fmr) {
- if (fpo->fmr.fpo_fmr_pool)
- ib_destroy_fmr_pool(fpo->fmr.fpo_fmr_pool);
- } else {
- struct kib_fast_reg_descriptor *frd, *tmp;
- int i = 0;
-
- list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list,
- frd_list) {
- list_del(&frd->frd_list);
- ib_dereg_mr(frd->frd_mr);
- kfree(frd);
- i++;
- }
- if (i < fpo->fast_reg.fpo_pool_size)
- CERROR("FastReg pool still has %d regions registered\n",
- fpo->fast_reg.fpo_pool_size - i);
- }
-
- if (fpo->fpo_hdev)
- kiblnd_hdev_decref(fpo->fpo_hdev);
-
- kfree(fpo);
-}
-
-static void kiblnd_destroy_fmr_pool_list(struct list_head *head)
-{
- struct kib_fmr_pool *fpo, *tmp;
-
- list_for_each_entry_safe(fpo, tmp, head, fpo_list) {
- list_del(&fpo->fpo_list);
- kiblnd_destroy_fmr_pool(fpo);
- }
-}
-
-static int
-kiblnd_fmr_pool_size(struct lnet_ioctl_config_o2iblnd_tunables *tunables,
- int ncpts)
-{
- int size = tunables->lnd_fmr_pool_size / ncpts;
-
- return max(IBLND_FMR_POOL, size);
-}
-
-static int
-kiblnd_fmr_flush_trigger(struct lnet_ioctl_config_o2iblnd_tunables *tunables,
- int ncpts)
-{
- int size = tunables->lnd_fmr_flush_trigger / ncpts;
-
- return max(IBLND_FMR_POOL_FLUSH, size);
-}
-
-static int kiblnd_alloc_fmr_pool(struct kib_fmr_poolset *fps, struct kib_fmr_pool *fpo)
-{
- struct ib_fmr_pool_param param = {
- .max_pages_per_fmr = LNET_MAX_PAYLOAD / PAGE_SIZE,
- .page_shift = PAGE_SHIFT,
- .access = (IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE),
- .pool_size = fps->fps_pool_size,
- .dirty_watermark = fps->fps_flush_trigger,
- .flush_function = NULL,
- .flush_arg = NULL,
- .cache = !!fps->fps_cache };
- int rc = 0;
-
- fpo->fmr.fpo_fmr_pool = ib_create_fmr_pool(fpo->fpo_hdev->ibh_pd,
- &param);
- if (IS_ERR(fpo->fmr.fpo_fmr_pool)) {
- rc = PTR_ERR(fpo->fmr.fpo_fmr_pool);
- if (rc != -ENOSYS)
- CERROR("Failed to create FMR pool: %d\n", rc);
- else
- CERROR("FMRs are not supported\n");
- }
-
- return rc;
-}
-
-static int kiblnd_alloc_freg_pool(struct kib_fmr_poolset *fps, struct kib_fmr_pool *fpo)
-{
- struct kib_fast_reg_descriptor *frd, *tmp;
- int i, rc;
-
- INIT_LIST_HEAD(&fpo->fast_reg.fpo_pool_list);
- fpo->fast_reg.fpo_pool_size = 0;
- for (i = 0; i < fps->fps_pool_size; i++) {
- frd = kzalloc_cpt(sizeof(*frd), GFP_NOFS, fps->fps_cpt);
- if (!frd) {
- CERROR("Failed to allocate a new fast_reg descriptor\n");
- rc = -ENOMEM;
- goto out;
- }
-
- frd->frd_mr = ib_alloc_mr(fpo->fpo_hdev->ibh_pd,
- IB_MR_TYPE_MEM_REG,
- LNET_MAX_PAYLOAD / PAGE_SIZE);
- if (IS_ERR(frd->frd_mr)) {
- rc = PTR_ERR(frd->frd_mr);
- CERROR("Failed to allocate ib_alloc_mr: %d\n", rc);
- frd->frd_mr = NULL;
- goto out_middle;
- }
-
- frd->frd_valid = true;
-
- list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list);
- fpo->fast_reg.fpo_pool_size++;
- }
-
- return 0;
-
-out_middle:
- if (frd->frd_mr)
- ib_dereg_mr(frd->frd_mr);
- kfree(frd);
-
-out:
- list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list,
- frd_list) {
- list_del(&frd->frd_list);
- ib_dereg_mr(frd->frd_mr);
- kfree(frd);
- }
-
- return rc;
-}
-
-static int kiblnd_create_fmr_pool(struct kib_fmr_poolset *fps,
- struct kib_fmr_pool **pp_fpo)
-{
- struct kib_dev *dev = fps->fps_net->ibn_dev;
- struct ib_device_attr *dev_attr;
- struct kib_fmr_pool *fpo;
- int rc;
-
- fpo = kzalloc_cpt(sizeof(*fpo), GFP_NOFS, fps->fps_cpt);
- if (!fpo)
- return -ENOMEM;
-
- fpo->fpo_hdev = kiblnd_current_hdev(dev);
- dev_attr = &fpo->fpo_hdev->ibh_ibdev->attrs;
-
- /* Check for FMR or FastReg support */
- fpo->fpo_is_fmr = 0;
- if (fpo->fpo_hdev->ibh_ibdev->alloc_fmr &&
- fpo->fpo_hdev->ibh_ibdev->dealloc_fmr &&
- fpo->fpo_hdev->ibh_ibdev->map_phys_fmr &&
- fpo->fpo_hdev->ibh_ibdev->unmap_fmr) {
- LCONSOLE_INFO("Using FMR for registration\n");
- fpo->fpo_is_fmr = 1;
- } else if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
- LCONSOLE_INFO("Using FastReg for registration\n");
- } else {
- rc = -ENOSYS;
- LCONSOLE_ERROR_MSG(rc, "IB device does not support FMRs nor FastRegs, can't register memory\n");
- goto out_fpo;
- }
-
- if (fpo->fpo_is_fmr)
- rc = kiblnd_alloc_fmr_pool(fps, fpo);
- else
- rc = kiblnd_alloc_freg_pool(fps, fpo);
- if (rc)
- goto out_fpo;
-
- fpo->fpo_deadline = jiffies + IBLND_POOL_DEADLINE * HZ;
- fpo->fpo_owner = fps;
- *pp_fpo = fpo;
-
- return 0;
-
-out_fpo:
- kiblnd_hdev_decref(fpo->fpo_hdev);
- kfree(fpo);
- return rc;
-}
-
-static void kiblnd_fail_fmr_poolset(struct kib_fmr_poolset *fps,
- struct list_head *zombies)
-{
- if (!fps->fps_net) /* initialized? */
- return;
-
- spin_lock(&fps->fps_lock);
-
- while (!list_empty(&fps->fps_pool_list)) {
- struct kib_fmr_pool *fpo = list_entry(fps->fps_pool_list.next,
- struct kib_fmr_pool, fpo_list);
- fpo->fpo_failed = 1;
- list_del(&fpo->fpo_list);
- if (!fpo->fpo_map_count)
- list_add(&fpo->fpo_list, zombies);
- else
- list_add(&fpo->fpo_list, &fps->fps_failed_pool_list);
- }
-
- spin_unlock(&fps->fps_lock);
-}
-
-static void kiblnd_fini_fmr_poolset(struct kib_fmr_poolset *fps)
-{
- if (fps->fps_net) { /* initialized? */
- kiblnd_destroy_fmr_pool_list(&fps->fps_failed_pool_list);
- kiblnd_destroy_fmr_pool_list(&fps->fps_pool_list);
- }
-}
-
-static int
-kiblnd_init_fmr_poolset(struct kib_fmr_poolset *fps, int cpt, int ncpts,
- struct kib_net *net,
- struct lnet_ioctl_config_o2iblnd_tunables *tunables)
-{
- struct kib_fmr_pool *fpo;
- int rc;
-
- memset(fps, 0, sizeof(*fps));
-
- fps->fps_net = net;
- fps->fps_cpt = cpt;
-
- fps->fps_pool_size = kiblnd_fmr_pool_size(tunables, ncpts);
- fps->fps_flush_trigger = kiblnd_fmr_flush_trigger(tunables, ncpts);
- fps->fps_cache = tunables->lnd_fmr_cache;
-
- spin_lock_init(&fps->fps_lock);
- INIT_LIST_HEAD(&fps->fps_pool_list);
- INIT_LIST_HEAD(&fps->fps_failed_pool_list);
-
- rc = kiblnd_create_fmr_pool(fps, &fpo);
- if (!rc)
- list_add_tail(&fpo->fpo_list, &fps->fps_pool_list);
-
- return rc;
-}
-
-static int kiblnd_fmr_pool_is_idle(struct kib_fmr_pool *fpo, unsigned long now)
-{
- if (fpo->fpo_map_count) /* still in use */
- return 0;
- if (fpo->fpo_failed)
- return 1;
- return time_after_eq(now, fpo->fpo_deadline);
-}
-
-static int
-kiblnd_map_tx_pages(struct kib_tx *tx, struct kib_rdma_desc *rd)
-{
- __u64 *pages = tx->tx_pages;
- struct kib_hca_dev *hdev;
- int npages;
- int size;
- int i;
-
- hdev = tx->tx_pool->tpo_hdev;
-
- for (i = 0, npages = 0; i < rd->rd_nfrags; i++) {
- for (size = 0; size < rd->rd_frags[i].rf_nob;
- size += hdev->ibh_page_size) {
- pages[npages++] = (rd->rd_frags[i].rf_addr &
- hdev->ibh_page_mask) + size;
- }
- }
-
- return npages;
-}
-
-void kiblnd_fmr_pool_unmap(struct kib_fmr *fmr, int status)
-{
- LIST_HEAD(zombies);
- struct kib_fmr_pool *fpo = fmr->fmr_pool;
- struct kib_fmr_poolset *fps;
- unsigned long now = jiffies;
- struct kib_fmr_pool *tmp;
- int rc;
-
- if (!fpo)
- return;
-
- fps = fpo->fpo_owner;
- if (fpo->fpo_is_fmr) {
- if (fmr->fmr_pfmr) {
- rc = ib_fmr_pool_unmap(fmr->fmr_pfmr);
- LASSERT(!rc);
- fmr->fmr_pfmr = NULL;
- }
-
- if (status) {
- rc = ib_flush_fmr_pool(fpo->fmr.fpo_fmr_pool);
- LASSERT(!rc);
- }
- } else {
- struct kib_fast_reg_descriptor *frd = fmr->fmr_frd;
-
- if (frd) {
- frd->frd_valid = false;
- spin_lock(&fps->fps_lock);
- list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list);
- spin_unlock(&fps->fps_lock);
- fmr->fmr_frd = NULL;
- }
- }
- fmr->fmr_pool = NULL;
-
- spin_lock(&fps->fps_lock);
- fpo->fpo_map_count--; /* decref the pool */
-
- list_for_each_entry_safe(fpo, tmp, &fps->fps_pool_list, fpo_list) {
- /* the first pool is persistent */
- if (fps->fps_pool_list.next == &fpo->fpo_list)
- continue;
-
- if (kiblnd_fmr_pool_is_idle(fpo, now)) {
- list_move(&fpo->fpo_list, &zombies);
- fps->fps_version++;
- }
- }
- spin_unlock(&fps->fps_lock);
-
- if (!list_empty(&zombies))
- kiblnd_destroy_fmr_pool_list(&zombies);
-}
-
-int kiblnd_fmr_pool_map(struct kib_fmr_poolset *fps, struct kib_tx *tx,
- struct kib_rdma_desc *rd, __u32 nob, __u64 iov,
- struct kib_fmr *fmr)
-{
- __u64 *pages = tx->tx_pages;
- bool is_rx = (rd != tx->tx_rd);
- bool tx_pages_mapped = false;
- struct kib_fmr_pool *fpo;
- int npages = 0;
- __u64 version;
- int rc;
-
- again:
- spin_lock(&fps->fps_lock);
- version = fps->fps_version;
- list_for_each_entry(fpo, &fps->fps_pool_list, fpo_list) {
- fpo->fpo_deadline = jiffies + IBLND_POOL_DEADLINE * HZ;
- fpo->fpo_map_count++;
-
- if (fpo->fpo_is_fmr) {
- struct ib_pool_fmr *pfmr;
-
- spin_unlock(&fps->fps_lock);
-
- if (!tx_pages_mapped) {
- npages = kiblnd_map_tx_pages(tx, rd);
- tx_pages_mapped = 1;
- }
-
- pfmr = ib_fmr_pool_map_phys(fpo->fmr.fpo_fmr_pool,
- pages, npages, iov);
- if (likely(!IS_ERR(pfmr))) {
- fmr->fmr_key = is_rx ? pfmr->fmr->rkey :
- pfmr->fmr->lkey;
- fmr->fmr_frd = NULL;
- fmr->fmr_pfmr = pfmr;
- fmr->fmr_pool = fpo;
- return 0;
- }
- rc = PTR_ERR(pfmr);
- } else {
- if (!list_empty(&fpo->fast_reg.fpo_pool_list)) {
- struct kib_fast_reg_descriptor *frd;
- struct ib_reg_wr *wr;
- struct ib_mr *mr;
- int n;
-
- frd = list_first_entry(&fpo->fast_reg.fpo_pool_list,
- struct kib_fast_reg_descriptor,
- frd_list);
- list_del(&frd->frd_list);
- spin_unlock(&fps->fps_lock);
-
- mr = frd->frd_mr;
-
- if (!frd->frd_valid) {
- __u32 key = is_rx ? mr->rkey : mr->lkey;
- struct ib_send_wr *inv_wr;
-
- inv_wr = &frd->frd_inv_wr;
- memset(inv_wr, 0, sizeof(*inv_wr));
- inv_wr->opcode = IB_WR_LOCAL_INV;
- inv_wr->wr_id = IBLND_WID_MR;
- inv_wr->ex.invalidate_rkey = key;
-
- /* Bump the key */
- key = ib_inc_rkey(key);
- ib_update_fast_reg_key(mr, key);
- }
-
- n = ib_map_mr_sg(mr, tx->tx_frags,
- tx->tx_nfrags, NULL, PAGE_SIZE);
- if (unlikely(n != tx->tx_nfrags)) {
- CERROR("Failed to map mr %d/%d elements\n",
- n, tx->tx_nfrags);
- return n < 0 ? n : -EINVAL;
- }
-
- mr->iova = iov;
-
- /* Prepare FastReg WR */
- wr = &frd->frd_fastreg_wr;
- memset(wr, 0, sizeof(*wr));
- wr->wr.opcode = IB_WR_REG_MR;
- wr->wr.wr_id = IBLND_WID_MR;
- wr->wr.num_sge = 0;
- wr->wr.send_flags = 0;
- wr->mr = mr;
- wr->key = is_rx ? mr->rkey : mr->lkey;
- wr->access = (IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE);
-
- fmr->fmr_key = is_rx ? mr->rkey : mr->lkey;
- fmr->fmr_frd = frd;
- fmr->fmr_pfmr = NULL;
- fmr->fmr_pool = fpo;
- return 0;
- }
- spin_unlock(&fps->fps_lock);
- rc = -EAGAIN;
- }
-
- spin_lock(&fps->fps_lock);
- fpo->fpo_map_count--;
- if (rc != -EAGAIN) {
- spin_unlock(&fps->fps_lock);
- return rc;
- }
-
- /* EAGAIN and ... */
- if (version != fps->fps_version) {
- spin_unlock(&fps->fps_lock);
- goto again;
- }
- }
-
- if (fps->fps_increasing) {
- spin_unlock(&fps->fps_lock);
- CDEBUG(D_NET, "Another thread is allocating new FMR pool, waiting for her to complete\n");
- schedule();
- goto again;
- }
-
- if (time_before(jiffies, fps->fps_next_retry)) {
- /* someone failed recently */
- spin_unlock(&fps->fps_lock);
- return -EAGAIN;
- }
-
- fps->fps_increasing = 1;
- spin_unlock(&fps->fps_lock);
-
- CDEBUG(D_NET, "Allocate new FMR pool\n");
- rc = kiblnd_create_fmr_pool(fps, &fpo);
- spin_lock(&fps->fps_lock);
- fps->fps_increasing = 0;
- if (!rc) {
- fps->fps_version++;
- list_add_tail(&fpo->fpo_list, &fps->fps_pool_list);
- } else {
- fps->fps_next_retry = jiffies + IBLND_POOL_RETRY * HZ;
- }
- spin_unlock(&fps->fps_lock);
-
- goto again;
-}
-
-static void kiblnd_fini_pool(struct kib_pool *pool)
-{
- LASSERT(list_empty(&pool->po_free_list));
- LASSERT(!pool->po_allocated);
-
- CDEBUG(D_NET, "Finalize %s pool\n", pool->po_owner->ps_name);
-}
-
-static void kiblnd_init_pool(struct kib_poolset *ps, struct kib_pool *pool, int size)
-{
- CDEBUG(D_NET, "Initialize %s pool\n", ps->ps_name);
-
- memset(pool, 0, sizeof(*pool));
- INIT_LIST_HEAD(&pool->po_free_list);
- pool->po_deadline = jiffies + IBLND_POOL_DEADLINE * HZ;
- pool->po_owner = ps;
- pool->po_size = size;
-}
-
-static void kiblnd_destroy_pool_list(struct list_head *head)
-{
- struct kib_pool *pool;
-
- while (!list_empty(head)) {
- pool = list_entry(head->next, struct kib_pool, po_list);
- list_del(&pool->po_list);
-
- LASSERT(pool->po_owner);
- pool->po_owner->ps_pool_destroy(pool);
- }
-}
-
-static void kiblnd_fail_poolset(struct kib_poolset *ps, struct list_head *zombies)
-{
- if (!ps->ps_net) /* initialized? */
- return;
-
- spin_lock(&ps->ps_lock);
- while (!list_empty(&ps->ps_pool_list)) {
- struct kib_pool *po = list_entry(ps->ps_pool_list.next,
- struct kib_pool, po_list);
- po->po_failed = 1;
- list_del(&po->po_list);
- if (!po->po_allocated)
- list_add(&po->po_list, zombies);
- else
- list_add(&po->po_list, &ps->ps_failed_pool_list);
- }
- spin_unlock(&ps->ps_lock);
-}
-
-static void kiblnd_fini_poolset(struct kib_poolset *ps)
-{
- if (ps->ps_net) { /* initialized? */
- kiblnd_destroy_pool_list(&ps->ps_failed_pool_list);
- kiblnd_destroy_pool_list(&ps->ps_pool_list);
- }
-}
-
-static int kiblnd_init_poolset(struct kib_poolset *ps, int cpt,
- struct kib_net *net, char *name, int size,
- kib_ps_pool_create_t po_create,
- kib_ps_pool_destroy_t po_destroy,
- kib_ps_node_init_t nd_init,
- kib_ps_node_fini_t nd_fini)
-{
- struct kib_pool *pool;
- int rc;
-
- memset(ps, 0, sizeof(*ps));
-
- ps->ps_cpt = cpt;
- ps->ps_net = net;
- ps->ps_pool_create = po_create;
- ps->ps_pool_destroy = po_destroy;
- ps->ps_node_init = nd_init;
- ps->ps_node_fini = nd_fini;
- ps->ps_pool_size = size;
- if (strlcpy(ps->ps_name, name, sizeof(ps->ps_name))
- >= sizeof(ps->ps_name))
- return -E2BIG;
- spin_lock_init(&ps->ps_lock);
- INIT_LIST_HEAD(&ps->ps_pool_list);
- INIT_LIST_HEAD(&ps->ps_failed_pool_list);
-
- rc = ps->ps_pool_create(ps, size, &pool);
- if (!rc)
- list_add(&pool->po_list, &ps->ps_pool_list);
- else
- CERROR("Failed to create the first pool for %s\n", ps->ps_name);
-
- return rc;
-}
-
-static int kiblnd_pool_is_idle(struct kib_pool *pool, unsigned long now)
-{
- if (pool->po_allocated) /* still in use */
- return 0;
- if (pool->po_failed)
- return 1;
- return time_after_eq(now, pool->po_deadline);
-}
-
-void kiblnd_pool_free_node(struct kib_pool *pool, struct list_head *node)
-{
- LIST_HEAD(zombies);
- struct kib_poolset *ps = pool->po_owner;
- struct kib_pool *tmp;
- unsigned long now = jiffies;
-
- spin_lock(&ps->ps_lock);
-
- if (ps->ps_node_fini)
- ps->ps_node_fini(pool, node);
-
- LASSERT(pool->po_allocated > 0);
- list_add(node, &pool->po_free_list);
- pool->po_allocated--;
-
- list_for_each_entry_safe(pool, tmp, &ps->ps_pool_list, po_list) {
- /* the first pool is persistent */
- if (ps->ps_pool_list.next == &pool->po_list)
- continue;
-
- if (kiblnd_pool_is_idle(pool, now))
- list_move(&pool->po_list, &zombies);
- }
- spin_unlock(&ps->ps_lock);
-
- if (!list_empty(&zombies))
- kiblnd_destroy_pool_list(&zombies);
-}
-
-struct list_head *kiblnd_pool_alloc_node(struct kib_poolset *ps)
-{
- struct list_head *node;
- struct kib_pool *pool;
- unsigned int interval = 1;
- unsigned long time_before;
- unsigned int trips = 0;
- int rc;
-
- again:
- spin_lock(&ps->ps_lock);
- list_for_each_entry(pool, &ps->ps_pool_list, po_list) {
- if (list_empty(&pool->po_free_list))
- continue;
-
- pool->po_allocated++;
- pool->po_deadline = jiffies + IBLND_POOL_DEADLINE * HZ;
- node = pool->po_free_list.next;
- list_del(node);
-
- if (ps->ps_node_init) {
- /* still hold the lock */
- ps->ps_node_init(pool, node);
- }
- spin_unlock(&ps->ps_lock);
- return node;
- }
-
- /* no available tx pool and ... */
- if (ps->ps_increasing) {
- /* another thread is allocating a new pool */
- spin_unlock(&ps->ps_lock);
- trips++;
- CDEBUG(D_NET, "Another thread is allocating new %s pool, waiting %d HZs for her to complete. trips = %d\n",
- ps->ps_name, interval, trips);
-
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(interval);
- if (interval < HZ)
- interval *= 2;
-
- goto again;
- }
-
- if (time_before(jiffies, ps->ps_next_retry)) {
- /* someone failed recently */
- spin_unlock(&ps->ps_lock);
- return NULL;
- }
-
- ps->ps_increasing = 1;
- spin_unlock(&ps->ps_lock);
-
- CDEBUG(D_NET, "%s pool exhausted, allocate new pool\n", ps->ps_name);
- time_before = jiffies;
- rc = ps->ps_pool_create(ps, ps->ps_pool_size, &pool);
- CDEBUG(D_NET, "ps_pool_create took %lu HZ to complete",
- jiffies - time_before);
-
- spin_lock(&ps->ps_lock);
- ps->ps_increasing = 0;
- if (!rc) {
- list_add_tail(&pool->po_list, &ps->ps_pool_list);
- } else {
- ps->ps_next_retry = jiffies + IBLND_POOL_RETRY * HZ;
- CERROR("Can't allocate new %s pool because out of memory\n",
- ps->ps_name);
- }
- spin_unlock(&ps->ps_lock);
-
- goto again;
-}
-
-static void kiblnd_destroy_tx_pool(struct kib_pool *pool)
-{
- struct kib_tx_pool *tpo = container_of(pool, struct kib_tx_pool, tpo_pool);
- int i;
-
- LASSERT(!pool->po_allocated);
-
- if (tpo->tpo_tx_pages) {
- kiblnd_unmap_tx_pool(tpo);
- kiblnd_free_pages(tpo->tpo_tx_pages);
- }
-
- if (!tpo->tpo_tx_descs)
- goto out;
-
- for (i = 0; i < pool->po_size; i++) {
- struct kib_tx *tx = &tpo->tpo_tx_descs[i];
-
- list_del(&tx->tx_list);
- kfree(tx->tx_pages);
- kfree(tx->tx_frags);
- kfree(tx->tx_wrq);
- kfree(tx->tx_sge);
- kfree(tx->tx_rd);
- }
-
- kfree(tpo->tpo_tx_descs);
-out:
- kiblnd_fini_pool(pool);
- kfree(tpo);
-}
-
-static int kiblnd_tx_pool_size(int ncpts)
-{
- int ntx = *kiblnd_tunables.kib_ntx / ncpts;
-
- return max(IBLND_TX_POOL, ntx);
-}
-
-static int kiblnd_create_tx_pool(struct kib_poolset *ps, int size,
- struct kib_pool **pp_po)
-{
- int i;
- int npg;
- struct kib_pool *pool;
- struct kib_tx_pool *tpo;
-
- tpo = kzalloc_cpt(sizeof(*tpo), GFP_NOFS, ps->ps_cpt);
- if (!tpo) {
- CERROR("Failed to allocate TX pool\n");
- return -ENOMEM;
- }
-
- pool = &tpo->tpo_pool;
- kiblnd_init_pool(ps, pool, size);
- tpo->tpo_tx_descs = NULL;
- tpo->tpo_tx_pages = NULL;
-
- npg = DIV_ROUND_UP(size * IBLND_MSG_SIZE, PAGE_SIZE);
- if (kiblnd_alloc_pages(&tpo->tpo_tx_pages, ps->ps_cpt, npg)) {
- CERROR("Can't allocate tx pages: %d\n", npg);
- kfree(tpo);
- return -ENOMEM;
- }
-
- tpo->tpo_tx_descs = kzalloc_cpt(size * sizeof(struct kib_tx),
- GFP_NOFS, ps->ps_cpt);
- if (!tpo->tpo_tx_descs) {
- CERROR("Can't allocate %d tx descriptors\n", size);
- ps->ps_pool_destroy(pool);
- return -ENOMEM;
- }
-
- memset(tpo->tpo_tx_descs, 0, size * sizeof(struct kib_tx));
-
- for (i = 0; i < size; i++) {
- struct kib_tx *tx = &tpo->tpo_tx_descs[i];
-
- tx->tx_pool = tpo;
- if (ps->ps_net->ibn_fmr_ps) {
- tx->tx_pages = kzalloc_cpt(LNET_MAX_IOV * sizeof(*tx->tx_pages),
- GFP_NOFS, ps->ps_cpt);
- if (!tx->tx_pages)
- break;
- }
-
- tx->tx_frags = kzalloc_cpt((1 + IBLND_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_frags),
- GFP_NOFS, ps->ps_cpt);
- if (!tx->tx_frags)
- break;
-
- sg_init_table(tx->tx_frags, IBLND_MAX_RDMA_FRAGS + 1);
-
- tx->tx_wrq = kzalloc_cpt((1 + IBLND_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_wrq),
- GFP_NOFS, ps->ps_cpt);
- if (!tx->tx_wrq)
- break;
-
- tx->tx_sge = kzalloc_cpt((1 + IBLND_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_sge),
- GFP_NOFS, ps->ps_cpt);
- if (!tx->tx_sge)
- break;
-
- tx->tx_rd = kzalloc_cpt(offsetof(struct kib_rdma_desc,
- rd_frags[IBLND_MAX_RDMA_FRAGS]),
- GFP_NOFS, ps->ps_cpt);
- if (!tx->tx_rd)
- break;
- }
-
- if (i == size) {
- kiblnd_map_tx_pool(tpo);
- *pp_po = pool;
- return 0;
- }
-
- ps->ps_pool_destroy(pool);
- return -ENOMEM;
-}
-
-static void kiblnd_tx_init(struct kib_pool *pool, struct list_head *node)
-{
- struct kib_tx_poolset *tps = container_of(pool->po_owner,
- struct kib_tx_poolset,
- tps_poolset);
- struct kib_tx *tx = list_entry(node, struct kib_tx, tx_list);
-
- tx->tx_cookie = tps->tps_next_tx_cookie++;
-}
-
-static void kiblnd_net_fini_pools(struct kib_net *net)
-{
- int i;
-
- cfs_cpt_for_each(i, lnet_cpt_table()) {
- struct kib_tx_poolset *tps;
- struct kib_fmr_poolset *fps;
-
- if (net->ibn_tx_ps) {
- tps = net->ibn_tx_ps[i];
- kiblnd_fini_poolset(&tps->tps_poolset);
- }
-
- if (net->ibn_fmr_ps) {
- fps = net->ibn_fmr_ps[i];
- kiblnd_fini_fmr_poolset(fps);
- }
- }
-
- if (net->ibn_tx_ps) {
- cfs_percpt_free(net->ibn_tx_ps);
- net->ibn_tx_ps = NULL;
- }
-
- if (net->ibn_fmr_ps) {
- cfs_percpt_free(net->ibn_fmr_ps);
- net->ibn_fmr_ps = NULL;
- }
-}
-
-static int kiblnd_net_init_pools(struct kib_net *net, struct lnet_ni *ni,
- __u32 *cpts, int ncpts)
-{
- struct lnet_ioctl_config_o2iblnd_tunables *tunables;
- int cpt;
- int rc;
- int i;
-
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
-
- if (tunables->lnd_fmr_pool_size < *kiblnd_tunables.kib_ntx / 4) {
- CERROR("Can't set fmr pool size (%d) < ntx / 4(%d)\n",
- tunables->lnd_fmr_pool_size,
- *kiblnd_tunables.kib_ntx / 4);
- rc = -EINVAL;
- goto failed;
- }
-
- /*
- * TX pool must be created later than FMR, see LU-2268
- * for details
- */
- LASSERT(!net->ibn_tx_ps);
-
- /*
- * premapping can fail if ibd_nmr > 1, so we always create
- * FMR pool and map-on-demand if premapping failed
- *
- * cfs_precpt_alloc is creating an array of struct kib_fmr_poolset
- * The number of struct kib_fmr_poolsets create is equal to the
- * number of CPTs that exist, i.e net->ibn_fmr_ps[cpt].
- */
- net->ibn_fmr_ps = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(struct kib_fmr_poolset));
- if (!net->ibn_fmr_ps) {
- CERROR("Failed to allocate FMR pool array\n");
- rc = -ENOMEM;
- goto failed;
- }
-
- for (i = 0; i < ncpts; i++) {
- cpt = !cpts ? i : cpts[i];
- rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, ncpts,
- net, tunables);
- if (rc) {
- CERROR("Can't initialize FMR pool for CPT %d: %d\n",
- cpt, rc);
- goto failed;
- }
- }
-
- if (i > 0)
- LASSERT(i == ncpts);
-
- /*
- * cfs_precpt_alloc is creating an array of struct kib_tx_poolset
- * The number of struct kib_tx_poolsets create is equal to the
- * number of CPTs that exist, i.e net->ibn_tx_ps[cpt].
- */
- net->ibn_tx_ps = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(struct kib_tx_poolset));
- if (!net->ibn_tx_ps) {
- CERROR("Failed to allocate tx pool array\n");
- rc = -ENOMEM;
- goto failed;
- }
-
- for (i = 0; i < ncpts; i++) {
- cpt = !cpts ? i : cpts[i];
- rc = kiblnd_init_poolset(&net->ibn_tx_ps[cpt]->tps_poolset,
- cpt, net, "TX",
- kiblnd_tx_pool_size(ncpts),
- kiblnd_create_tx_pool,
- kiblnd_destroy_tx_pool,
- kiblnd_tx_init, NULL);
- if (rc) {
- CERROR("Can't initialize TX pool for CPT %d: %d\n",
- cpt, rc);
- goto failed;
- }
- }
-
- return 0;
- failed:
- kiblnd_net_fini_pools(net);
- LASSERT(rc);
- return rc;
-}
-
-static int kiblnd_hdev_get_attr(struct kib_hca_dev *hdev)
-{
- /*
- * It's safe to assume a HCA can handle a page size
- * matching that of the native system
- */
- hdev->ibh_page_shift = PAGE_SHIFT;
- hdev->ibh_page_size = 1 << PAGE_SHIFT;
- hdev->ibh_page_mask = ~((__u64)hdev->ibh_page_size - 1);
-
- hdev->ibh_mr_size = hdev->ibh_ibdev->attrs.max_mr_size;
- if (hdev->ibh_mr_size == ~0ULL) {
- hdev->ibh_mr_shift = 64;
- return 0;
- }
-
- CERROR("Invalid mr size: %#llx\n", hdev->ibh_mr_size);
- return -EINVAL;
-}
-
-void kiblnd_hdev_destroy(struct kib_hca_dev *hdev)
-{
- if (hdev->ibh_pd)
- ib_dealloc_pd(hdev->ibh_pd);
-
- if (hdev->ibh_cmid)
- rdma_destroy_id(hdev->ibh_cmid);
-
- kfree(hdev);
-}
-
-/* DUMMY */
-static int kiblnd_dummy_callback(struct rdma_cm_id *cmid,
- struct rdma_cm_event *event)
-{
- return 0;
-}
-
-static int kiblnd_dev_need_failover(struct kib_dev *dev)
-{
- struct rdma_cm_id *cmid;
- struct sockaddr_in srcaddr;
- struct sockaddr_in dstaddr;
- int rc;
-
- if (!dev->ibd_hdev || /* initializing */
- !dev->ibd_hdev->ibh_cmid || /* listener is dead */
- *kiblnd_tunables.kib_dev_failover > 1) /* debugging */
- return 1;
-
- /*
- * XXX: it's UGLY, but I don't have better way to find
- * ib-bonding HCA failover because:
- *
- * a. no reliable CM event for HCA failover...
- * b. no OFED API to get ib_device for current net_device...
- *
- * We have only two choices at this point:
- *
- * a. rdma_bind_addr(), it will conflict with listener cmid
- * b. rdma_resolve_addr() to zero addr
- */
- cmid = kiblnd_rdma_create_id(kiblnd_dummy_callback, dev, RDMA_PS_TCP,
- IB_QPT_RC);
- if (IS_ERR(cmid)) {
- rc = PTR_ERR(cmid);
- CERROR("Failed to create cmid for failover: %d\n", rc);
- return rc;
- }
-
- memset(&srcaddr, 0, sizeof(srcaddr));
- srcaddr.sin_family = AF_INET;
- srcaddr.sin_addr.s_addr = htonl(dev->ibd_ifip);
-
- memset(&dstaddr, 0, sizeof(dstaddr));
- dstaddr.sin_family = AF_INET;
- rc = rdma_resolve_addr(cmid, (struct sockaddr *)&srcaddr,
- (struct sockaddr *)&dstaddr, 1);
- if (rc || !cmid->device) {
- CERROR("Failed to bind %s:%pI4h to device(%p): %d\n",
- dev->ibd_ifname, &dev->ibd_ifip,
- cmid->device, rc);
- rdma_destroy_id(cmid);
- return rc;
- }
-
- rc = dev->ibd_hdev->ibh_ibdev != cmid->device; /* true for failover */
- rdma_destroy_id(cmid);
-
- return rc;
-}
-
-int kiblnd_dev_failover(struct kib_dev *dev)
-{
- LIST_HEAD(zombie_tpo);
- LIST_HEAD(zombie_ppo);
- LIST_HEAD(zombie_fpo);
- struct rdma_cm_id *cmid = NULL;
- struct kib_hca_dev *hdev = NULL;
- struct ib_pd *pd;
- struct kib_net *net;
- struct sockaddr_in addr;
- unsigned long flags;
- int rc = 0;
- int i;
-
- LASSERT(*kiblnd_tunables.kib_dev_failover > 1 ||
- dev->ibd_can_failover || !dev->ibd_hdev);
-
- rc = kiblnd_dev_need_failover(dev);
- if (rc <= 0)
- goto out;
-
- if (dev->ibd_hdev &&
- dev->ibd_hdev->ibh_cmid) {
- /*
- * XXX it's not good to close old listener at here,
- * because we can fail to create new listener.
- * But we have to close it now, otherwise rdma_bind_addr
- * will return EADDRINUSE... How crap!
- */
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- cmid = dev->ibd_hdev->ibh_cmid;
- /*
- * make next schedule of kiblnd_dev_need_failover()
- * return 1 for me
- */
- dev->ibd_hdev->ibh_cmid = NULL;
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- rdma_destroy_id(cmid);
- }
-
- cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, dev, RDMA_PS_TCP,
- IB_QPT_RC);
- if (IS_ERR(cmid)) {
- rc = PTR_ERR(cmid);
- CERROR("Failed to create cmid for failover: %d\n", rc);
- goto out;
- }
-
- memset(&addr, 0, sizeof(addr));
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = htonl(dev->ibd_ifip);
- addr.sin_port = htons(*kiblnd_tunables.kib_service);
-
- /* Bind to failover device or port */
- rc = rdma_bind_addr(cmid, (struct sockaddr *)&addr);
- if (rc || !cmid->device) {
- CERROR("Failed to bind %s:%pI4h to device(%p): %d\n",
- dev->ibd_ifname, &dev->ibd_ifip,
- cmid->device, rc);
- rdma_destroy_id(cmid);
- goto out;
- }
-
- hdev = kzalloc(sizeof(*hdev), GFP_NOFS);
- if (!hdev) {
- CERROR("Failed to allocate kib_hca_dev\n");
- rdma_destroy_id(cmid);
- rc = -ENOMEM;
- goto out;
- }
-
- atomic_set(&hdev->ibh_ref, 1);
- hdev->ibh_dev = dev;
- hdev->ibh_cmid = cmid;
- hdev->ibh_ibdev = cmid->device;
-
- pd = ib_alloc_pd(cmid->device, 0);
- if (IS_ERR(pd)) {
- rc = PTR_ERR(pd);
- CERROR("Can't allocate PD: %d\n", rc);
- goto out;
- }
-
- hdev->ibh_pd = pd;
-
- rc = rdma_listen(cmid, 0);
- if (rc) {
- CERROR("Can't start new listener: %d\n", rc);
- goto out;
- }
-
- rc = kiblnd_hdev_get_attr(hdev);
- if (rc) {
- CERROR("Can't get device attributes: %d\n", rc);
- goto out;
- }
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- swap(dev->ibd_hdev, hdev); /* take over the refcount */
-
- list_for_each_entry(net, &dev->ibd_nets, ibn_list) {
- cfs_cpt_for_each(i, lnet_cpt_table()) {
- kiblnd_fail_poolset(&net->ibn_tx_ps[i]->tps_poolset,
- &zombie_tpo);
-
- if (net->ibn_fmr_ps)
- kiblnd_fail_fmr_poolset(net->ibn_fmr_ps[i],
- &zombie_fpo);
- }
- }
-
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
- out:
- if (!list_empty(&zombie_tpo))
- kiblnd_destroy_pool_list(&zombie_tpo);
- if (!list_empty(&zombie_ppo))
- kiblnd_destroy_pool_list(&zombie_ppo);
- if (!list_empty(&zombie_fpo))
- kiblnd_destroy_fmr_pool_list(&zombie_fpo);
- if (hdev)
- kiblnd_hdev_decref(hdev);
-
- if (rc)
- dev->ibd_failed_failover++;
- else
- dev->ibd_failed_failover = 0;
-
- return rc;
-}
-
-void kiblnd_destroy_dev(struct kib_dev *dev)
-{
- LASSERT(!dev->ibd_nnets);
- LASSERT(list_empty(&dev->ibd_nets));
-
- list_del(&dev->ibd_fail_list);
- list_del(&dev->ibd_list);
-
- if (dev->ibd_hdev)
- kiblnd_hdev_decref(dev->ibd_hdev);
-
- kfree(dev);
-}
-
-static struct kib_dev *kiblnd_create_dev(char *ifname)
-{
- struct net_device *netdev;
- struct kib_dev *dev;
- __u32 netmask;
- __u32 ip;
- int up;
- int rc;
-
- rc = lnet_ipif_query(ifname, &up, &ip, &netmask);
- if (rc) {
- CERROR("Can't query IPoIB interface %s: %d\n",
- ifname, rc);
- return NULL;
- }
-
- if (!up) {
- CERROR("Can't query IPoIB interface %s: it's down\n", ifname);
- return NULL;
- }
-
- dev = kzalloc(sizeof(*dev), GFP_NOFS);
- if (!dev)
- return NULL;
-
- netdev = dev_get_by_name(&init_net, ifname);
- if (!netdev) {
- dev->ibd_can_failover = 0;
- } else {
- dev->ibd_can_failover = !!(netdev->flags & IFF_MASTER);
- dev_put(netdev);
- }
-
- INIT_LIST_HEAD(&dev->ibd_nets);
- INIT_LIST_HEAD(&dev->ibd_list); /* not yet in kib_devs */
- INIT_LIST_HEAD(&dev->ibd_fail_list);
- dev->ibd_ifip = ip;
- strcpy(&dev->ibd_ifname[0], ifname);
-
- /* initialize the device */
- rc = kiblnd_dev_failover(dev);
- if (rc) {
- CERROR("Can't initialize device: %d\n", rc);
- kfree(dev);
- return NULL;
- }
-
- list_add_tail(&dev->ibd_list, &kiblnd_data.kib_devs);
- return dev;
-}
-
-static void kiblnd_base_shutdown(void)
-{
- struct kib_sched_info *sched;
- int i;
-
- LASSERT(list_empty(&kiblnd_data.kib_devs));
-
- switch (kiblnd_data.kib_init) {
- default:
- LBUG();
-
- case IBLND_INIT_ALL:
- case IBLND_INIT_DATA:
- LASSERT(kiblnd_data.kib_peers);
- for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++)
- LASSERT(list_empty(&kiblnd_data.kib_peers[i]));
- LASSERT(list_empty(&kiblnd_data.kib_connd_zombies));
- LASSERT(list_empty(&kiblnd_data.kib_connd_conns));
- LASSERT(list_empty(&kiblnd_data.kib_reconn_list));
- LASSERT(list_empty(&kiblnd_data.kib_reconn_wait));
-
- /* flag threads to terminate; wake and wait for them to die */
- kiblnd_data.kib_shutdown = 1;
-
- /*
- * NB: we really want to stop scheduler threads net by net
- * instead of the whole module, this should be improved
- * with dynamic configuration LNet
- */
- cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds)
- wake_up_all(&sched->ibs_waitq);
-
- wake_up_all(&kiblnd_data.kib_connd_waitq);
- wake_up_all(&kiblnd_data.kib_failover_waitq);
-
- i = 2;
- while (atomic_read(&kiblnd_data.kib_nthreads)) {
- i++;
- /* power of 2 ? */
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
- "Waiting for %d threads to terminate\n",
- atomic_read(&kiblnd_data.kib_nthreads));
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
- }
-
- /* fall through */
-
- case IBLND_INIT_NOTHING:
- break;
- }
-
- kvfree(kiblnd_data.kib_peers);
-
- if (kiblnd_data.kib_scheds)
- cfs_percpt_free(kiblnd_data.kib_scheds);
-
- kiblnd_data.kib_init = IBLND_INIT_NOTHING;
- module_put(THIS_MODULE);
-}
-
-static void kiblnd_shutdown(struct lnet_ni *ni)
-{
- struct kib_net *net = ni->ni_data;
- rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
- int i;
- unsigned long flags;
-
- LASSERT(kiblnd_data.kib_init == IBLND_INIT_ALL);
-
- if (!net)
- goto out;
-
- write_lock_irqsave(g_lock, flags);
- net->ibn_shutdown = 1;
- write_unlock_irqrestore(g_lock, flags);
-
- switch (net->ibn_init) {
- default:
- LBUG();
-
- case IBLND_INIT_ALL:
- /* nuke all existing peers within this net */
- kiblnd_del_peer(ni, LNET_NID_ANY);
-
- /* Wait for all peer state to clean up */
- i = 2;
- while (atomic_read(&net->ibn_npeers)) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n? */
- "%s: waiting for %d peers to disconnect\n",
- libcfs_nid2str(ni->ni_nid),
- atomic_read(&net->ibn_npeers));
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
- }
-
- kiblnd_net_fini_pools(net);
-
- write_lock_irqsave(g_lock, flags);
- LASSERT(net->ibn_dev->ibd_nnets > 0);
- net->ibn_dev->ibd_nnets--;
- list_del(&net->ibn_list);
- write_unlock_irqrestore(g_lock, flags);
-
- /* fall through */
-
- case IBLND_INIT_NOTHING:
- LASSERT(!atomic_read(&net->ibn_nconns));
-
- if (net->ibn_dev && !net->ibn_dev->ibd_nnets)
- kiblnd_destroy_dev(net->ibn_dev);
-
- break;
- }
-
- net->ibn_init = IBLND_INIT_NOTHING;
- ni->ni_data = NULL;
-
- kfree(net);
-
-out:
- if (list_empty(&kiblnd_data.kib_devs))
- kiblnd_base_shutdown();
-}
-
-static int kiblnd_base_startup(void)
-{
- struct kib_sched_info *sched;
- int rc;
- int i;
-
- LASSERT(kiblnd_data.kib_init == IBLND_INIT_NOTHING);
-
- try_module_get(THIS_MODULE);
- /* zero pointers, flags etc */
- memset(&kiblnd_data, 0, sizeof(kiblnd_data));
-
- rwlock_init(&kiblnd_data.kib_global_lock);
-
- INIT_LIST_HEAD(&kiblnd_data.kib_devs);
- INIT_LIST_HEAD(&kiblnd_data.kib_failed_devs);
-
- kiblnd_data.kib_peer_hash_size = IBLND_PEER_HASH_SIZE;
- kiblnd_data.kib_peers = kvmalloc_array(kiblnd_data.kib_peer_hash_size,
- sizeof(struct list_head),
- GFP_KERNEL);
- if (!kiblnd_data.kib_peers)
- goto failed;
- for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++)
- INIT_LIST_HEAD(&kiblnd_data.kib_peers[i]);
-
- spin_lock_init(&kiblnd_data.kib_connd_lock);
- INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns);
- INIT_LIST_HEAD(&kiblnd_data.kib_connd_zombies);
- INIT_LIST_HEAD(&kiblnd_data.kib_reconn_list);
- INIT_LIST_HEAD(&kiblnd_data.kib_reconn_wait);
-
- init_waitqueue_head(&kiblnd_data.kib_connd_waitq);
- init_waitqueue_head(&kiblnd_data.kib_failover_waitq);
-
- kiblnd_data.kib_scheds = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(*sched));
- if (!kiblnd_data.kib_scheds)
- goto failed;
-
- cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds) {
- int nthrs;
-
- spin_lock_init(&sched->ibs_lock);
- INIT_LIST_HEAD(&sched->ibs_conns);
- init_waitqueue_head(&sched->ibs_waitq);
-
- nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
- if (*kiblnd_tunables.kib_nscheds > 0) {
- nthrs = min(nthrs, *kiblnd_tunables.kib_nscheds);
- } else {
- /*
- * max to half of CPUs, another half is reserved for
- * upper layer modules
- */
- nthrs = min(max(IBLND_N_SCHED, nthrs >> 1), nthrs);
- }
-
- sched->ibs_nthreads_max = nthrs;
- sched->ibs_cpt = i;
- }
-
- kiblnd_data.kib_error_qpa.qp_state = IB_QPS_ERR;
-
- /* lists/ptrs/locks initialised */
- kiblnd_data.kib_init = IBLND_INIT_DATA;
- /*****************************************************/
-
- rc = kiblnd_thread_start(kiblnd_connd, NULL, "kiblnd_connd");
- if (rc) {
- CERROR("Can't spawn o2iblnd connd: %d\n", rc);
- goto failed;
- }
-
- if (*kiblnd_tunables.kib_dev_failover)
- rc = kiblnd_thread_start(kiblnd_failover_thread, NULL,
- "kiblnd_failover");
-
- if (rc) {
- CERROR("Can't spawn o2iblnd failover thread: %d\n", rc);
- goto failed;
- }
-
- /* flag everything initialised */
- kiblnd_data.kib_init = IBLND_INIT_ALL;
- /*****************************************************/
-
- return 0;
-
- failed:
- kiblnd_base_shutdown();
- return -ENETDOWN;
-}
-
-static int kiblnd_start_schedulers(struct kib_sched_info *sched)
-{
- int rc = 0;
- int nthrs;
- int i;
-
- if (!sched->ibs_nthreads) {
- if (*kiblnd_tunables.kib_nscheds > 0) {
- nthrs = sched->ibs_nthreads_max;
- } else {
- nthrs = cfs_cpt_weight(lnet_cpt_table(),
- sched->ibs_cpt);
- nthrs = min(max(IBLND_N_SCHED, nthrs >> 1), nthrs);
- nthrs = min(IBLND_N_SCHED_HIGH, nthrs);
- }
- } else {
- LASSERT(sched->ibs_nthreads <= sched->ibs_nthreads_max);
- /* increase one thread if there is new interface */
- nthrs = sched->ibs_nthreads < sched->ibs_nthreads_max;
- }
-
- for (i = 0; i < nthrs; i++) {
- long id;
- char name[20];
-
- id = KIB_THREAD_ID(sched->ibs_cpt, sched->ibs_nthreads + i);
- snprintf(name, sizeof(name), "kiblnd_sd_%02ld_%02ld",
- KIB_THREAD_CPT(id), KIB_THREAD_TID(id));
- rc = kiblnd_thread_start(kiblnd_scheduler, (void *)id, name);
- if (!rc)
- continue;
-
- CERROR("Can't spawn thread %d for scheduler[%d]: %d\n",
- sched->ibs_cpt, sched->ibs_nthreads + i, rc);
- break;
- }
-
- sched->ibs_nthreads += i;
- return rc;
-}
-
-static int kiblnd_dev_start_threads(struct kib_dev *dev, int newdev, __u32 *cpts,
- int ncpts)
-{
- int cpt;
- int rc;
- int i;
-
- for (i = 0; i < ncpts; i++) {
- struct kib_sched_info *sched;
-
- cpt = !cpts ? i : cpts[i];
- sched = kiblnd_data.kib_scheds[cpt];
-
- if (!newdev && sched->ibs_nthreads > 0)
- continue;
-
- rc = kiblnd_start_schedulers(kiblnd_data.kib_scheds[cpt]);
- if (rc) {
- CERROR("Failed to start scheduler threads for %s\n",
- dev->ibd_ifname);
- return rc;
- }
- }
- return 0;
-}
-
-static struct kib_dev *kiblnd_dev_search(char *ifname)
-{
- struct kib_dev *alias = NULL;
- struct kib_dev *dev;
- char *colon;
- char *colon2;
-
- colon = strchr(ifname, ':');
- list_for_each_entry(dev, &kiblnd_data.kib_devs, ibd_list) {
- if (!strcmp(&dev->ibd_ifname[0], ifname))
- return dev;
-
- if (alias)
- continue;
-
- colon2 = strchr(dev->ibd_ifname, ':');
- if (colon)
- *colon = 0;
- if (colon2)
- *colon2 = 0;
-
- if (!strcmp(&dev->ibd_ifname[0], ifname))
- alias = dev;
-
- if (colon)
- *colon = ':';
- if (colon2)
- *colon2 = ':';
- }
- return alias;
-}
-
-static int kiblnd_startup(struct lnet_ni *ni)
-{
- char *ifname;
- struct kib_dev *ibdev = NULL;
- struct kib_net *net;
- struct timespec64 tv;
- unsigned long flags;
- int rc;
- int newdev;
-
- LASSERT(ni->ni_lnd == &the_o2iblnd);
-
- if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) {
- rc = kiblnd_base_startup();
- if (rc)
- return rc;
- }
-
- net = kzalloc(sizeof(*net), GFP_NOFS);
- ni->ni_data = net;
- if (!net)
- goto net_failed;
-
- ktime_get_real_ts64(&tv);
- net->ibn_incarnation = tv.tv_sec * USEC_PER_SEC +
- tv.tv_nsec / NSEC_PER_USEC;
-
- rc = kiblnd_tunables_setup(ni);
- if (rc)
- goto net_failed;
-
- if (ni->ni_interfaces[0]) {
- /* Use the IPoIB interface specified in 'networks=' */
-
- BUILD_BUG_ON(LNET_MAX_INTERFACES <= 1);
- if (ni->ni_interfaces[1]) {
- CERROR("Multiple interfaces not supported\n");
- goto failed;
- }
-
- ifname = ni->ni_interfaces[0];
- } else {
- ifname = *kiblnd_tunables.kib_default_ipif;
- }
-
- if (strlen(ifname) >= sizeof(ibdev->ibd_ifname)) {
- CERROR("IPoIB interface name too long: %s\n", ifname);
- goto failed;
- }
-
- ibdev = kiblnd_dev_search(ifname);
-
- newdev = !ibdev;
- /* hmm...create kib_dev even for alias */
- if (!ibdev || strcmp(&ibdev->ibd_ifname[0], ifname))
- ibdev = kiblnd_create_dev(ifname);
-
- if (!ibdev)
- goto failed;
-
- net->ibn_dev = ibdev;
- ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ibdev->ibd_ifip);
-
- rc = kiblnd_dev_start_threads(ibdev, newdev,
- ni->ni_cpts, ni->ni_ncpts);
- if (rc)
- goto failed;
-
- rc = kiblnd_net_init_pools(net, ni, ni->ni_cpts, ni->ni_ncpts);
- if (rc) {
- CERROR("Failed to initialize NI pools: %d\n", rc);
- goto failed;
- }
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
- ibdev->ibd_nnets++;
- list_add_tail(&net->ibn_list, &ibdev->ibd_nets);
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- net->ibn_init = IBLND_INIT_ALL;
-
- return 0;
-
-failed:
- if (!net->ibn_dev && ibdev)
- kiblnd_destroy_dev(ibdev);
-
-net_failed:
- kiblnd_shutdown(ni);
-
- CDEBUG(D_NET, "%s failed\n", __func__);
- return -ENETDOWN;
-}
-
-static struct lnet_lnd the_o2iblnd = {
- .lnd_type = O2IBLND,
- .lnd_startup = kiblnd_startup,
- .lnd_shutdown = kiblnd_shutdown,
- .lnd_ctl = kiblnd_ctl,
- .lnd_query = kiblnd_query,
- .lnd_send = kiblnd_send,
- .lnd_recv = kiblnd_recv,
-};
-
-static void __exit ko2iblnd_exit(void)
-{
- lnet_unregister_lnd(&the_o2iblnd);
-}
-
-static int __init ko2iblnd_init(void)
-{
- int rc;
-
- BUILD_BUG_ON(sizeof(struct kib_msg) > IBLND_MSG_SIZE);
- BUILD_BUG_ON(offsetof(struct kib_msg,
- ibm_u.get.ibgm_rd.rd_frags[IBLND_MAX_RDMA_FRAGS])
- > IBLND_MSG_SIZE);
- BUILD_BUG_ON(offsetof(struct kib_msg,
- ibm_u.putack.ibpam_rd.rd_frags[IBLND_MAX_RDMA_FRAGS])
- > IBLND_MSG_SIZE);
-
- kiblnd_tunables_init();
-
- rc = libcfs_setup();
- if (rc)
- return rc;
-
- lnet_register_lnd(&the_o2iblnd);
-
- return 0;
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("OpenIB gen2 LNet Network Driver");
-MODULE_VERSION("2.7.0");
-MODULE_LICENSE("GPL");
-
-module_init(ko2iblnd_init);
-module_exit(ko2iblnd_exit);
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
deleted file mode 100644
index 217503f125bc..000000000000
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
+++ /dev/null
@@ -1,1048 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/o2iblnd/o2iblnd.h
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <linux/uio.h>
-#include <linux/uaccess.h>
-
-#include <linux/io.h>
-
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/list.h>
-#include <linux/kmod.h>
-#include <linux/sysctl.h>
-#include <linux/pci.h>
-
-#include <net/sock.h>
-#include <linux/in.h>
-
-#include <rdma/rdma_cm.h>
-#include <rdma/ib_cm.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_fmr_pool.h>
-
-#define DEBUG_SUBSYSTEM S_LND
-
-#include <linux/lnet/lib-lnet.h>
-
-#define IBLND_PEER_HASH_SIZE 101 /* # peer lists */
-/* # scheduler loops before reschedule */
-#define IBLND_RESCHED 100
-
-#define IBLND_N_SCHED 2
-#define IBLND_N_SCHED_HIGH 4
-
-struct kib_tunables {
- int *kib_dev_failover; /* HCA failover */
- unsigned int *kib_service; /* IB service number */
- int *kib_min_reconnect_interval; /* first failed connection retry... */
- int *kib_max_reconnect_interval; /* exponentially increasing to this */
- int *kib_cksum; /* checksum struct kib_msg? */
- int *kib_timeout; /* comms timeout (seconds) */
- int *kib_keepalive; /* keepalive timeout (seconds) */
- int *kib_ntx; /* # tx descs */
- char **kib_default_ipif; /* default IPoIB interface */
- int *kib_retry_count;
- int *kib_rnr_retry_count;
- int *kib_ib_mtu; /* IB MTU */
- int *kib_require_priv_port; /* accept only privileged ports */
- int *kib_use_priv_port; /* use privileged port for active connect */
- int *kib_nscheds; /* # threads on each CPT */
-};
-
-extern struct kib_tunables kiblnd_tunables;
-
-#define IBLND_MSG_QUEUE_SIZE_V1 8 /* V1 only : # messages/RDMAs in-flight */
-#define IBLND_CREDIT_HIGHWATER_V1 7 /* V1 only : when eagerly to return credits */
-
-#define IBLND_CREDITS_DEFAULT 8 /* default # of peer credits */
-#define IBLND_CREDITS_MAX ((typeof(((struct kib_msg *)0)->ibm_credits)) - 1) /* Max # of peer credits */
-
-/* when eagerly to return credits */
-#define IBLND_CREDITS_HIGHWATER(t, v) ((v) == IBLND_MSG_VERSION_1 ? \
- IBLND_CREDIT_HIGHWATER_V1 : \
- t->lnd_peercredits_hiw)
-
-#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(current->nsproxy->net_ns, \
- cb, dev, \
- ps, qpt)
-
-/* 2 OOB shall suffice for 1 keepalive and 1 returning credits */
-#define IBLND_OOB_CAPABLE(v) ((v) != IBLND_MSG_VERSION_1)
-#define IBLND_OOB_MSGS(v) (IBLND_OOB_CAPABLE(v) ? 2 : 0)
-
-#define IBLND_FRAG_SHIFT (PAGE_SHIFT - 12) /* frag size on wire is in 4K units */
-#define IBLND_MSG_SIZE (4 << 10) /* max size of queued messages (inc hdr) */
-#define IBLND_MAX_RDMA_FRAGS (LNET_MAX_PAYLOAD >> 12)/* max # of fragments supported in 4K size */
-
-/************************/
-/* derived constants... */
-/* Pools (shared by connections on each CPT) */
-/* These pools can grow at runtime, so don't need give a very large value */
-#define IBLND_TX_POOL 256
-#define IBLND_FMR_POOL 256
-#define IBLND_FMR_POOL_FLUSH 192
-
-#define IBLND_RX_MSGS(c) \
- ((c->ibc_queue_depth) * 2 + IBLND_OOB_MSGS(c->ibc_version))
-#define IBLND_RX_MSG_BYTES(c) (IBLND_RX_MSGS(c) * IBLND_MSG_SIZE)
-#define IBLND_RX_MSG_PAGES(c) \
- ((IBLND_RX_MSG_BYTES(c) + PAGE_SIZE - 1) / PAGE_SIZE)
-
-/* WRs and CQEs (per connection) */
-#define IBLND_RECV_WRS(c) IBLND_RX_MSGS(c)
-#define IBLND_SEND_WRS(c) \
- (((c->ibc_max_frags + 1) << IBLND_FRAG_SHIFT) * \
- kiblnd_concurrent_sends(c->ibc_version, c->ibc_peer->ibp_ni))
-#define IBLND_CQ_ENTRIES(c) (IBLND_RECV_WRS(c) + IBLND_SEND_WRS(c))
-
-struct kib_hca_dev;
-
-/* o2iblnd can run over aliased interface */
-#ifdef IFALIASZ
-#define KIB_IFNAME_SIZE IFALIASZ
-#else
-#define KIB_IFNAME_SIZE 256
-#endif
-
-struct kib_dev {
- struct list_head ibd_list; /* chain on kib_devs */
- struct list_head ibd_fail_list; /* chain on kib_failed_devs */
- __u32 ibd_ifip; /* IPoIB interface IP */
-
- /* IPoIB interface name */
- char ibd_ifname[KIB_IFNAME_SIZE];
- int ibd_nnets; /* # nets extant */
-
- unsigned long ibd_next_failover;
- int ibd_failed_failover; /* # failover failures */
- unsigned int ibd_failover; /* failover in progress */
- unsigned int ibd_can_failover; /* IPoIB interface is a bonding master */
- struct list_head ibd_nets;
- struct kib_hca_dev *ibd_hdev;
-};
-
-struct kib_hca_dev {
- struct rdma_cm_id *ibh_cmid; /* listener cmid */
- struct ib_device *ibh_ibdev; /* IB device */
- int ibh_page_shift; /* page shift of current HCA */
- int ibh_page_size; /* page size of current HCA */
- __u64 ibh_page_mask; /* page mask of current HCA */
- int ibh_mr_shift; /* bits shift of max MR size */
- __u64 ibh_mr_size; /* size of MR */
- struct ib_pd *ibh_pd; /* PD */
- struct kib_dev *ibh_dev; /* owner */
- atomic_t ibh_ref; /* refcount */
-};
-
-/** # of seconds to keep pool alive */
-#define IBLND_POOL_DEADLINE 300
-/** # of seconds to retry if allocation failed */
-#define IBLND_POOL_RETRY 1
-
-struct kib_pages {
- int ibp_npages; /* # pages */
- struct page *ibp_pages[0]; /* page array */
-};
-
-struct kib_pool;
-struct kib_poolset;
-
-typedef int (*kib_ps_pool_create_t)(struct kib_poolset *ps,
- int inc, struct kib_pool **pp_po);
-typedef void (*kib_ps_pool_destroy_t)(struct kib_pool *po);
-typedef void (*kib_ps_node_init_t)(struct kib_pool *po, struct list_head *node);
-typedef void (*kib_ps_node_fini_t)(struct kib_pool *po, struct list_head *node);
-
-struct kib_net;
-
-#define IBLND_POOL_NAME_LEN 32
-
-struct kib_poolset {
- spinlock_t ps_lock; /* serialize */
- struct kib_net *ps_net; /* network it belongs to */
- char ps_name[IBLND_POOL_NAME_LEN]; /* pool set name */
- struct list_head ps_pool_list; /* list of pools */
- struct list_head ps_failed_pool_list;/* failed pool list */
- unsigned long ps_next_retry; /* time stamp for retry if */
- /* failed to allocate */
- int ps_increasing; /* is allocating new pool */
- int ps_pool_size; /* new pool size */
- int ps_cpt; /* CPT id */
-
- kib_ps_pool_create_t ps_pool_create; /* create a new pool */
- kib_ps_pool_destroy_t ps_pool_destroy; /* destroy a pool */
- kib_ps_node_init_t ps_node_init; /* initialize new allocated node */
- kib_ps_node_fini_t ps_node_fini; /* finalize node */
-};
-
-struct kib_pool {
- struct list_head po_list; /* chain on pool list */
- struct list_head po_free_list; /* pre-allocated node */
- struct kib_poolset *po_owner; /* pool_set of this pool */
- unsigned long po_deadline; /* deadline of this pool */
- int po_allocated; /* # of elements in use */
- int po_failed; /* pool is created on failed HCA */
- int po_size; /* # of pre-allocated elements */
-};
-
-struct kib_tx_poolset {
- struct kib_poolset tps_poolset; /* pool-set */
- __u64 tps_next_tx_cookie; /* cookie of TX */
-};
-
-struct kib_tx_pool {
- struct kib_pool tpo_pool; /* pool */
- struct kib_hca_dev *tpo_hdev; /* device for this pool */
- struct kib_tx *tpo_tx_descs; /* all the tx descriptors */
- struct kib_pages *tpo_tx_pages; /* premapped tx msg pages */
-};
-
-struct kib_fmr_poolset {
- spinlock_t fps_lock; /* serialize */
- struct kib_net *fps_net; /* IB network */
- struct list_head fps_pool_list; /* FMR pool list */
- struct list_head fps_failed_pool_list;/* FMR pool list */
- __u64 fps_version; /* validity stamp */
- int fps_cpt; /* CPT id */
- int fps_pool_size;
- int fps_flush_trigger;
- int fps_cache;
- int fps_increasing; /* is allocating new pool */
- unsigned long fps_next_retry; /* time stamp for retry if*/
- /* failed to allocate */
-};
-
-struct kib_fast_reg_descriptor { /* For fast registration */
- struct list_head frd_list;
- struct ib_send_wr frd_inv_wr;
- struct ib_reg_wr frd_fastreg_wr;
- struct ib_mr *frd_mr;
- bool frd_valid;
-};
-
-struct kib_fmr_pool {
- struct list_head fpo_list; /* chain on pool list */
- struct kib_hca_dev *fpo_hdev; /* device for this pool */
- struct kib_fmr_poolset *fpo_owner; /* owner of this pool */
- union {
- struct {
- struct ib_fmr_pool *fpo_fmr_pool; /* IB FMR pool */
- } fmr;
- struct { /* For fast registration */
- struct list_head fpo_pool_list;
- int fpo_pool_size;
- } fast_reg;
- };
- unsigned long fpo_deadline; /* deadline of this pool */
- int fpo_failed; /* fmr pool is failed */
- int fpo_map_count; /* # of mapped FMR */
- int fpo_is_fmr;
-};
-
-struct kib_fmr {
- struct kib_fmr_pool *fmr_pool; /* pool of FMR */
- struct ib_pool_fmr *fmr_pfmr; /* IB pool fmr */
- struct kib_fast_reg_descriptor *fmr_frd;
- u32 fmr_key;
-};
-
-struct kib_net {
- struct list_head ibn_list; /* chain on struct kib_dev::ibd_nets */
- __u64 ibn_incarnation;/* my epoch */
- int ibn_init; /* initialisation state */
- int ibn_shutdown; /* shutting down? */
-
- atomic_t ibn_npeers; /* # peers extant */
- atomic_t ibn_nconns; /* # connections extant */
-
- struct kib_tx_poolset **ibn_tx_ps; /* tx pool-set */
- struct kib_fmr_poolset **ibn_fmr_ps; /* fmr pool-set */
-
- struct kib_dev *ibn_dev; /* underlying IB device */
-};
-
-#define KIB_THREAD_SHIFT 16
-#define KIB_THREAD_ID(cpt, tid) ((cpt) << KIB_THREAD_SHIFT | (tid))
-#define KIB_THREAD_CPT(id) ((id) >> KIB_THREAD_SHIFT)
-#define KIB_THREAD_TID(id) ((id) & ((1UL << KIB_THREAD_SHIFT) - 1))
-
-struct kib_sched_info {
- spinlock_t ibs_lock; /* serialise */
- wait_queue_head_t ibs_waitq; /* schedulers sleep here */
- struct list_head ibs_conns; /* conns to check for rx completions */
- int ibs_nthreads; /* number of scheduler threads */
- int ibs_nthreads_max; /* max allowed scheduler threads */
- int ibs_cpt; /* CPT id */
-};
-
-struct kib_data {
- int kib_init; /* initialisation state */
- int kib_shutdown; /* shut down? */
- struct list_head kib_devs; /* IB devices extant */
- struct list_head kib_failed_devs; /* list head of failed devices */
- wait_queue_head_t kib_failover_waitq; /* schedulers sleep here */
- atomic_t kib_nthreads; /* # live threads */
- rwlock_t kib_global_lock; /* stabilize net/dev/peer/conn ops */
- struct list_head *kib_peers; /* hash table of all my known peers */
- int kib_peer_hash_size; /* size of kib_peers */
- void *kib_connd; /* the connd task (serialisation assertions) */
- struct list_head kib_connd_conns; /* connections to setup/teardown */
- struct list_head kib_connd_zombies; /* connections with zero refcount */
- /* connections to reconnect */
- struct list_head kib_reconn_list;
- /* peers wait for reconnection */
- struct list_head kib_reconn_wait;
- /**
- * The second that peers are pulled out from \a kib_reconn_wait
- * for reconnection.
- */
- time64_t kib_reconn_sec;
-
- wait_queue_head_t kib_connd_waitq; /* connection daemon sleeps here */
- spinlock_t kib_connd_lock; /* serialise */
- struct ib_qp_attr kib_error_qpa; /* QP->ERROR */
- struct kib_sched_info **kib_scheds; /* percpt data for schedulers */
-};
-
-#define IBLND_INIT_NOTHING 0
-#define IBLND_INIT_DATA 1
-#define IBLND_INIT_ALL 2
-
-/************************************************************************
- * IB Wire message format.
- * These are sent in sender's byte order (i.e. receiver flips).
- */
-
-struct kib_connparams {
- __u16 ibcp_queue_depth;
- __u16 ibcp_max_frags;
- __u32 ibcp_max_msg_size;
-} WIRE_ATTR;
-
-struct kib_immediate_msg {
- struct lnet_hdr ibim_hdr; /* portals header */
- char ibim_payload[0]; /* piggy-backed payload */
-} WIRE_ATTR;
-
-struct kib_rdma_frag {
- __u32 rf_nob; /* # bytes this frag */
- __u64 rf_addr; /* CAVEAT EMPTOR: misaligned!! */
-} WIRE_ATTR;
-
-struct kib_rdma_desc {
- __u32 rd_key; /* local/remote key */
- __u32 rd_nfrags; /* # fragments */
- struct kib_rdma_frag rd_frags[0]; /* buffer frags */
-} WIRE_ATTR;
-
-struct kib_putreq_msg {
- struct lnet_hdr ibprm_hdr; /* portals header */
- __u64 ibprm_cookie; /* opaque completion cookie */
-} WIRE_ATTR;
-
-struct kib_putack_msg {
- __u64 ibpam_src_cookie; /* reflected completion cookie */
- __u64 ibpam_dst_cookie; /* opaque completion cookie */
- struct kib_rdma_desc ibpam_rd; /* sender's sink buffer */
-} WIRE_ATTR;
-
-struct kib_get_msg {
- struct lnet_hdr ibgm_hdr; /* portals header */
- __u64 ibgm_cookie; /* opaque completion cookie */
- struct kib_rdma_desc ibgm_rd; /* rdma descriptor */
-} WIRE_ATTR;
-
-struct kib_completion_msg {
- __u64 ibcm_cookie; /* opaque completion cookie */
- __s32 ibcm_status; /* < 0 failure: >= 0 length */
-} WIRE_ATTR;
-
-struct kib_msg {
- /* First 2 fields fixed FOR ALL TIME */
- __u32 ibm_magic; /* I'm an ibnal message */
- __u16 ibm_version; /* this is my version number */
-
- __u8 ibm_type; /* msg type */
- __u8 ibm_credits; /* returned credits */
- __u32 ibm_nob; /* # bytes in whole message */
- __u32 ibm_cksum; /* checksum (0 == no checksum) */
- __u64 ibm_srcnid; /* sender's NID */
- __u64 ibm_srcstamp; /* sender's incarnation */
- __u64 ibm_dstnid; /* destination's NID */
- __u64 ibm_dststamp; /* destination's incarnation */
-
- union {
- struct kib_connparams connparams;
- struct kib_immediate_msg immediate;
- struct kib_putreq_msg putreq;
- struct kib_putack_msg putack;
- struct kib_get_msg get;
- struct kib_completion_msg completion;
- } WIRE_ATTR ibm_u;
-} WIRE_ATTR;
-
-#define IBLND_MSG_MAGIC LNET_PROTO_IB_MAGIC /* unique magic */
-
-#define IBLND_MSG_VERSION_1 0x11
-#define IBLND_MSG_VERSION_2 0x12
-#define IBLND_MSG_VERSION IBLND_MSG_VERSION_2
-
-#define IBLND_MSG_CONNREQ 0xc0 /* connection request */
-#define IBLND_MSG_CONNACK 0xc1 /* connection acknowledge */
-#define IBLND_MSG_NOOP 0xd0 /* nothing (just credits) */
-#define IBLND_MSG_IMMEDIATE 0xd1 /* immediate */
-#define IBLND_MSG_PUT_REQ 0xd2 /* putreq (src->sink) */
-#define IBLND_MSG_PUT_NAK 0xd3 /* completion (sink->src) */
-#define IBLND_MSG_PUT_ACK 0xd4 /* putack (sink->src) */
-#define IBLND_MSG_PUT_DONE 0xd5 /* completion (src->sink) */
-#define IBLND_MSG_GET_REQ 0xd6 /* getreq (sink->src) */
-#define IBLND_MSG_GET_DONE 0xd7 /* completion (src->sink: all OK) */
-
-struct kib_rej {
- __u32 ibr_magic; /* sender's magic */
- __u16 ibr_version; /* sender's version */
- __u8 ibr_why; /* reject reason */
- __u8 ibr_padding; /* padding */
- __u64 ibr_incarnation; /* incarnation of peer */
- struct kib_connparams ibr_cp; /* connection parameters */
-} WIRE_ATTR;
-
-/* connection rejection reasons */
-#define IBLND_REJECT_CONN_RACE 1 /* You lost connection race */
-#define IBLND_REJECT_NO_RESOURCES 2 /* Out of memory/conns etc */
-#define IBLND_REJECT_FATAL 3 /* Anything else */
-#define IBLND_REJECT_CONN_UNCOMPAT 4 /* incompatible version peer */
-#define IBLND_REJECT_CONN_STALE 5 /* stale peer */
-/* peer's rdma frags doesn't match mine */
-#define IBLND_REJECT_RDMA_FRAGS 6
-/* peer's msg queue size doesn't match mine */
-#define IBLND_REJECT_MSG_QUEUE_SIZE 7
-
-/***********************************************************************/
-
-struct kib_rx { /* receive message */
- struct list_head rx_list; /* queue for attention */
- struct kib_conn *rx_conn; /* owning conn */
- int rx_nob; /* # bytes received (-1 while posted) */
- enum ib_wc_status rx_status; /* completion status */
- struct kib_msg *rx_msg; /* message buffer (host vaddr) */
- __u64 rx_msgaddr; /* message buffer (I/O addr) */
- DECLARE_PCI_UNMAP_ADDR(rx_msgunmap); /* for dma_unmap_single() */
- struct ib_recv_wr rx_wrq; /* receive work item... */
- struct ib_sge rx_sge; /* ...and its memory */
-};
-
-#define IBLND_POSTRX_DONT_POST 0 /* don't post */
-#define IBLND_POSTRX_NO_CREDIT 1 /* post: no credits */
-#define IBLND_POSTRX_PEER_CREDIT 2 /* post: give peer back 1 credit */
-#define IBLND_POSTRX_RSRVD_CREDIT 3 /* post: give self back 1 reserved credit */
-
-struct kib_tx { /* transmit message */
- struct list_head tx_list; /* queue on idle_txs ibc_tx_queue etc. */
- struct kib_tx_pool *tx_pool; /* pool I'm from */
- struct kib_conn *tx_conn; /* owning conn */
- short tx_sending; /* # tx callbacks outstanding */
- short tx_queued; /* queued for sending */
- short tx_waiting; /* waiting for peer */
- int tx_status; /* LNET completion status */
- unsigned long tx_deadline; /* completion deadline */
- __u64 tx_cookie; /* completion cookie */
- struct lnet_msg *tx_lntmsg[2]; /* lnet msgs to finalize on completion */
- struct kib_msg *tx_msg; /* message buffer (host vaddr) */
- __u64 tx_msgaddr; /* message buffer (I/O addr) */
- DECLARE_PCI_UNMAP_ADDR(tx_msgunmap); /* for dma_unmap_single() */
- int tx_nwrq; /* # send work items */
- struct ib_rdma_wr *tx_wrq; /* send work items... */
- struct ib_sge *tx_sge; /* ...and their memory */
- struct kib_rdma_desc *tx_rd; /* rdma descriptor */
- int tx_nfrags; /* # entries in... */
- struct scatterlist *tx_frags; /* dma_map_sg descriptor */
- __u64 *tx_pages; /* rdma phys page addrs */
- struct kib_fmr fmr; /* FMR */
- int tx_dmadir; /* dma direction */
-};
-
-struct kib_connvars {
- struct kib_msg cv_msg; /* connection-in-progress variables */
-};
-
-struct kib_conn {
- struct kib_sched_info *ibc_sched; /* scheduler information */
- struct kib_peer *ibc_peer; /* owning peer */
- struct kib_hca_dev *ibc_hdev; /* HCA bound on */
- struct list_head ibc_list; /* stash on peer's conn list */
- struct list_head ibc_sched_list; /* schedule for attention */
- __u16 ibc_version; /* version of connection */
- /* reconnect later */
- __u16 ibc_reconnect:1;
- __u64 ibc_incarnation; /* which instance of the peer */
- atomic_t ibc_refcount; /* # users */
- int ibc_state; /* what's happening */
- int ibc_nsends_posted; /* # uncompleted sends */
- int ibc_noops_posted; /* # uncompleted NOOPs */
- int ibc_credits; /* # credits I have */
- int ibc_outstanding_credits; /* # credits to return */
- int ibc_reserved_credits; /* # ACK/DONE msg credits */
- int ibc_comms_error; /* set on comms error */
- /* connections queue depth */
- __u16 ibc_queue_depth;
- /* connections max frags */
- __u16 ibc_max_frags;
- unsigned int ibc_nrx:16; /* receive buffers owned */
- unsigned int ibc_scheduled:1; /* scheduled for attention */
- unsigned int ibc_ready:1; /* CQ callback fired */
- unsigned long ibc_last_send; /* time of last send */
- struct list_head ibc_connd_list; /* link chain for */
- /* kiblnd_check_conns only */
- struct list_head ibc_early_rxs; /* rxs completed before ESTABLISHED */
- struct list_head ibc_tx_noops; /* IBLND_MSG_NOOPs for */
- /* IBLND_MSG_VERSION_1 */
- struct list_head ibc_tx_queue; /* sends that need a credit */
- struct list_head ibc_tx_queue_nocred; /* sends that don't need a */
- /* credit */
- struct list_head ibc_tx_queue_rsrvd; /* sends that need to */
- /* reserve an ACK/DONE msg */
- struct list_head ibc_active_txs; /* active tx awaiting completion */
- spinlock_t ibc_lock; /* serialise */
- struct kib_rx *ibc_rxs; /* the rx descs */
- struct kib_pages *ibc_rx_pages; /* premapped rx msg pages */
-
- struct rdma_cm_id *ibc_cmid; /* CM id */
- struct ib_cq *ibc_cq; /* completion queue */
-
- struct kib_connvars *ibc_connvars; /* in-progress connection state */
-};
-
-#define IBLND_CONN_INIT 0 /* being initialised */
-#define IBLND_CONN_ACTIVE_CONNECT 1 /* active sending req */
-#define IBLND_CONN_PASSIVE_WAIT 2 /* passive waiting for rtu */
-#define IBLND_CONN_ESTABLISHED 3 /* connection established */
-#define IBLND_CONN_CLOSING 4 /* being closed */
-#define IBLND_CONN_DISCONNECTED 5 /* disconnected */
-
-struct kib_peer {
- struct list_head ibp_list; /* stash on global peer list */
- lnet_nid_t ibp_nid; /* who's on the other end(s) */
- struct lnet_ni *ibp_ni; /* LNet interface */
- struct list_head ibp_conns; /* all active connections */
- struct kib_conn *ibp_next_conn; /* next connection to send on for
- * round robin */
- struct list_head ibp_tx_queue; /* msgs waiting for a conn */
- __u64 ibp_incarnation; /* incarnation of peer */
- /* when (in jiffies) I was last alive */
- unsigned long ibp_last_alive;
- /* # users */
- atomic_t ibp_refcount;
- /* version of peer */
- __u16 ibp_version;
- /* current passive connection attempts */
- unsigned short ibp_accepting;
- /* current active connection attempts */
- unsigned short ibp_connecting;
- /* reconnect this peer later */
- unsigned char ibp_reconnecting;
- /* counter of how many times we triggered a conn race */
- unsigned char ibp_races;
- /* # consecutive reconnection attempts to this peer */
- unsigned int ibp_reconnected;
- /* errno on closing this peer */
- int ibp_error;
- /* max map_on_demand */
- __u16 ibp_max_frags;
- /* max_peer_credits */
- __u16 ibp_queue_depth;
-};
-
-extern struct kib_data kiblnd_data;
-
-void kiblnd_hdev_destroy(struct kib_hca_dev *hdev);
-
-int kiblnd_msg_queue_size(int version, struct lnet_ni *ni);
-
-/* max # of fragments configured by user */
-static inline int
-kiblnd_cfg_rdma_frags(struct lnet_ni *ni)
-{
- struct lnet_ioctl_config_o2iblnd_tunables *tunables;
- int mod;
-
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
- mod = tunables->lnd_map_on_demand;
- return mod ? mod : IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT;
-}
-
-static inline int
-kiblnd_rdma_frags(int version, struct lnet_ni *ni)
-{
- return version == IBLND_MSG_VERSION_1 ?
- (IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT) :
- kiblnd_cfg_rdma_frags(ni);
-}
-
-static inline int
-kiblnd_concurrent_sends(int version, struct lnet_ni *ni)
-{
- struct lnet_ioctl_config_o2iblnd_tunables *tunables;
- int concurrent_sends;
-
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
- concurrent_sends = tunables->lnd_concurrent_sends;
-
- if (version == IBLND_MSG_VERSION_1) {
- if (concurrent_sends > IBLND_MSG_QUEUE_SIZE_V1 * 2)
- return IBLND_MSG_QUEUE_SIZE_V1 * 2;
-
- if (concurrent_sends < IBLND_MSG_QUEUE_SIZE_V1 / 2)
- return IBLND_MSG_QUEUE_SIZE_V1 / 2;
- }
-
- return concurrent_sends;
-}
-
-static inline void
-kiblnd_hdev_addref_locked(struct kib_hca_dev *hdev)
-{
- LASSERT(atomic_read(&hdev->ibh_ref) > 0);
- atomic_inc(&hdev->ibh_ref);
-}
-
-static inline void
-kiblnd_hdev_decref(struct kib_hca_dev *hdev)
-{
- LASSERT(atomic_read(&hdev->ibh_ref) > 0);
- if (atomic_dec_and_test(&hdev->ibh_ref))
- kiblnd_hdev_destroy(hdev);
-}
-
-static inline int
-kiblnd_dev_can_failover(struct kib_dev *dev)
-{
- if (!list_empty(&dev->ibd_fail_list)) /* already scheduled */
- return 0;
-
- if (!*kiblnd_tunables.kib_dev_failover) /* disabled */
- return 0;
-
- if (*kiblnd_tunables.kib_dev_failover > 1) /* force failover */
- return 1;
-
- return dev->ibd_can_failover;
-}
-
-#define kiblnd_conn_addref(conn) \
-do { \
- CDEBUG(D_NET, "conn[%p] (%d)++\n", \
- (conn), atomic_read(&(conn)->ibc_refcount)); \
- atomic_inc(&(conn)->ibc_refcount); \
-} while (0)
-
-#define kiblnd_conn_decref(conn) \
-do { \
- unsigned long flags; \
- \
- CDEBUG(D_NET, "conn[%p] (%d)--\n", \
- (conn), atomic_read(&(conn)->ibc_refcount)); \
- LASSERT_ATOMIC_POS(&(conn)->ibc_refcount); \
- if (atomic_dec_and_test(&(conn)->ibc_refcount)) { \
- spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); \
- list_add_tail(&(conn)->ibc_list, \
- &kiblnd_data.kib_connd_zombies); \
- wake_up(&kiblnd_data.kib_connd_waitq); \
- spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);\
- } \
-} while (0)
-
-#define kiblnd_peer_addref(peer) \
-do { \
- CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n", \
- (peer), libcfs_nid2str((peer)->ibp_nid), \
- atomic_read(&(peer)->ibp_refcount)); \
- atomic_inc(&(peer)->ibp_refcount); \
-} while (0)
-
-#define kiblnd_peer_decref(peer) \
-do { \
- CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n", \
- (peer), libcfs_nid2str((peer)->ibp_nid), \
- atomic_read(&(peer)->ibp_refcount)); \
- LASSERT_ATOMIC_POS(&(peer)->ibp_refcount); \
- if (atomic_dec_and_test(&(peer)->ibp_refcount)) \
- kiblnd_destroy_peer(peer); \
-} while (0)
-
-static inline bool
-kiblnd_peer_connecting(struct kib_peer *peer)
-{
- return peer->ibp_connecting ||
- peer->ibp_reconnecting ||
- peer->ibp_accepting;
-}
-
-static inline bool
-kiblnd_peer_idle(struct kib_peer *peer)
-{
- return !kiblnd_peer_connecting(peer) && list_empty(&peer->ibp_conns);
-}
-
-static inline struct list_head *
-kiblnd_nid2peerlist(lnet_nid_t nid)
-{
- unsigned int hash =
- ((unsigned int)nid) % kiblnd_data.kib_peer_hash_size;
-
- return &kiblnd_data.kib_peers[hash];
-}
-
-static inline int
-kiblnd_peer_active(struct kib_peer *peer)
-{
- /* Am I in the peer hash table? */
- return !list_empty(&peer->ibp_list);
-}
-
-static inline struct kib_conn *
-kiblnd_get_conn_locked(struct kib_peer *peer)
-{
- struct list_head *next;
-
- LASSERT(!list_empty(&peer->ibp_conns));
-
- /* Advance to next connection, be sure to skip the head node */
- if (!peer->ibp_next_conn ||
- peer->ibp_next_conn->ibc_list.next == &peer->ibp_conns)
- next = peer->ibp_conns.next;
- else
- next = peer->ibp_next_conn->ibc_list.next;
- peer->ibp_next_conn = list_entry(next, struct kib_conn, ibc_list);
-
- return peer->ibp_next_conn;
-}
-
-static inline int
-kiblnd_send_keepalive(struct kib_conn *conn)
-{
- return (*kiblnd_tunables.kib_keepalive > 0) &&
- time_after(jiffies, conn->ibc_last_send +
- msecs_to_jiffies(*kiblnd_tunables.kib_keepalive *
- MSEC_PER_SEC));
-}
-
-static inline int
-kiblnd_need_noop(struct kib_conn *conn)
-{
- struct lnet_ioctl_config_o2iblnd_tunables *tunables;
- struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
-
- LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
-
- if (conn->ibc_outstanding_credits <
- IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) &&
- !kiblnd_send_keepalive(conn))
- return 0; /* No need to send NOOP */
-
- if (IBLND_OOB_CAPABLE(conn->ibc_version)) {
- if (!list_empty(&conn->ibc_tx_queue_nocred))
- return 0; /* NOOP can be piggybacked */
-
- /* No tx to piggyback NOOP onto or no credit to send a tx */
- return (list_empty(&conn->ibc_tx_queue) ||
- !conn->ibc_credits);
- }
-
- if (!list_empty(&conn->ibc_tx_noops) || /* NOOP already queued */
- !list_empty(&conn->ibc_tx_queue_nocred) || /* piggyback NOOP */
- !conn->ibc_credits) /* no credit */
- return 0;
-
- if (conn->ibc_credits == 1 && /* last credit reserved for */
- !conn->ibc_outstanding_credits) /* giving back credits */
- return 0;
-
- /* No tx to piggyback NOOP onto or no credit to send a tx */
- return (list_empty(&conn->ibc_tx_queue) || conn->ibc_credits == 1);
-}
-
-static inline void
-kiblnd_abort_receives(struct kib_conn *conn)
-{
- ib_modify_qp(conn->ibc_cmid->qp,
- &kiblnd_data.kib_error_qpa, IB_QP_STATE);
-}
-
-static inline const char *
-kiblnd_queue2str(struct kib_conn *conn, struct list_head *q)
-{
- if (q == &conn->ibc_tx_queue)
- return "tx_queue";
-
- if (q == &conn->ibc_tx_queue_rsrvd)
- return "tx_queue_rsrvd";
-
- if (q == &conn->ibc_tx_queue_nocred)
- return "tx_queue_nocred";
-
- if (q == &conn->ibc_active_txs)
- return "active_txs";
-
- LBUG();
- return NULL;
-}
-
-/* CAVEAT EMPTOR: We rely on descriptor alignment to allow us to use the */
-/* lowest bits of the work request id to stash the work item type. */
-
-#define IBLND_WID_INVAL 0
-#define IBLND_WID_TX 1
-#define IBLND_WID_RX 2
-#define IBLND_WID_RDMA 3
-#define IBLND_WID_MR 4
-#define IBLND_WID_MASK 7UL
-
-static inline __u64
-kiblnd_ptr2wreqid(void *ptr, int type)
-{
- unsigned long lptr = (unsigned long)ptr;
-
- LASSERT(!(lptr & IBLND_WID_MASK));
- LASSERT(!(type & ~IBLND_WID_MASK));
- return (__u64)(lptr | type);
-}
-
-static inline void *
-kiblnd_wreqid2ptr(__u64 wreqid)
-{
- return (void *)(((unsigned long)wreqid) & ~IBLND_WID_MASK);
-}
-
-static inline int
-kiblnd_wreqid2type(__u64 wreqid)
-{
- return wreqid & IBLND_WID_MASK;
-}
-
-static inline void
-kiblnd_set_conn_state(struct kib_conn *conn, int state)
-{
- conn->ibc_state = state;
- mb();
-}
-
-static inline void
-kiblnd_init_msg(struct kib_msg *msg, int type, int body_nob)
-{
- msg->ibm_type = type;
- msg->ibm_nob = offsetof(struct kib_msg, ibm_u) + body_nob;
-}
-
-static inline int
-kiblnd_rd_size(struct kib_rdma_desc *rd)
-{
- int i;
- int size;
-
- for (i = size = 0; i < rd->rd_nfrags; i++)
- size += rd->rd_frags[i].rf_nob;
-
- return size;
-}
-
-static inline __u64
-kiblnd_rd_frag_addr(struct kib_rdma_desc *rd, int index)
-{
- return rd->rd_frags[index].rf_addr;
-}
-
-static inline __u32
-kiblnd_rd_frag_size(struct kib_rdma_desc *rd, int index)
-{
- return rd->rd_frags[index].rf_nob;
-}
-
-static inline __u32
-kiblnd_rd_frag_key(struct kib_rdma_desc *rd, int index)
-{
- return rd->rd_key;
-}
-
-static inline int
-kiblnd_rd_consume_frag(struct kib_rdma_desc *rd, int index, __u32 nob)
-{
- if (nob < rd->rd_frags[index].rf_nob) {
- rd->rd_frags[index].rf_addr += nob;
- rd->rd_frags[index].rf_nob -= nob;
- } else {
- index++;
- }
-
- return index;
-}
-
-static inline int
-kiblnd_rd_msg_size(struct kib_rdma_desc *rd, int msgtype, int n)
-{
- LASSERT(msgtype == IBLND_MSG_GET_REQ ||
- msgtype == IBLND_MSG_PUT_ACK);
-
- return msgtype == IBLND_MSG_GET_REQ ?
- offsetof(struct kib_get_msg, ibgm_rd.rd_frags[n]) :
- offsetof(struct kib_putack_msg, ibpam_rd.rd_frags[n]);
-}
-
-static inline __u64
-kiblnd_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
- return ib_dma_mapping_error(dev, dma_addr);
-}
-
-static inline __u64 kiblnd_dma_map_single(struct ib_device *dev,
- void *msg, size_t size,
- enum dma_data_direction direction)
-{
- return ib_dma_map_single(dev, msg, size, direction);
-}
-
-static inline void kiblnd_dma_unmap_single(struct ib_device *dev,
- __u64 addr, size_t size,
- enum dma_data_direction direction)
-{
- ib_dma_unmap_single(dev, addr, size, direction);
-}
-
-#define KIBLND_UNMAP_ADDR_SET(p, m, a) do {} while (0)
-#define KIBLND_UNMAP_ADDR(p, m, a) (a)
-
-static inline int kiblnd_dma_map_sg(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
-{
- return ib_dma_map_sg(dev, sg, nents, direction);
-}
-
-static inline void kiblnd_dma_unmap_sg(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
-{
- ib_dma_unmap_sg(dev, sg, nents, direction);
-}
-
-static inline __u64 kiblnd_sg_dma_address(struct ib_device *dev,
- struct scatterlist *sg)
-{
- return ib_sg_dma_address(dev, sg);
-}
-
-static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev,
- struct scatterlist *sg)
-{
- return ib_sg_dma_len(dev, sg);
-}
-
-/* XXX We use KIBLND_CONN_PARAM(e) as writable buffer, it's not strictly */
-/* right because OFED1.2 defines it as const, to use it we have to add */
-/* (void *) cast to overcome "const" */
-
-#define KIBLND_CONN_PARAM(e) ((e)->param.conn.private_data)
-#define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len)
-
-void kiblnd_map_rx_descs(struct kib_conn *conn);
-void kiblnd_unmap_rx_descs(struct kib_conn *conn);
-void kiblnd_pool_free_node(struct kib_pool *pool, struct list_head *node);
-struct list_head *kiblnd_pool_alloc_node(struct kib_poolset *ps);
-
-int kiblnd_fmr_pool_map(struct kib_fmr_poolset *fps, struct kib_tx *tx,
- struct kib_rdma_desc *rd, __u32 nob, __u64 iov,
- struct kib_fmr *fmr);
-void kiblnd_fmr_pool_unmap(struct kib_fmr *fmr, int status);
-
-int kiblnd_tunables_setup(struct lnet_ni *ni);
-void kiblnd_tunables_init(void);
-
-int kiblnd_connd(void *arg);
-int kiblnd_scheduler(void *arg);
-int kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name);
-int kiblnd_failover_thread(void *arg);
-
-int kiblnd_alloc_pages(struct kib_pages **pp, int cpt, int npages);
-
-int kiblnd_cm_callback(struct rdma_cm_id *cmid,
- struct rdma_cm_event *event);
-int kiblnd_translate_mtu(int value);
-
-int kiblnd_dev_failover(struct kib_dev *dev);
-int kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer **peerp,
- lnet_nid_t nid);
-void kiblnd_destroy_peer(struct kib_peer *peer);
-bool kiblnd_reconnect_peer(struct kib_peer *peer);
-void kiblnd_destroy_dev(struct kib_dev *dev);
-void kiblnd_unlink_peer_locked(struct kib_peer *peer);
-struct kib_peer *kiblnd_find_peer_locked(lnet_nid_t nid);
-int kiblnd_close_stale_conns_locked(struct kib_peer *peer,
- int version, __u64 incarnation);
-int kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why);
-
-struct kib_conn *kiblnd_create_conn(struct kib_peer *peer,
- struct rdma_cm_id *cmid,
- int state, int version);
-void kiblnd_destroy_conn(struct kib_conn *conn);
-void kiblnd_close_conn(struct kib_conn *conn, int error);
-void kiblnd_close_conn_locked(struct kib_conn *conn, int error);
-
-void kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid);
-void kiblnd_txlist_done(struct lnet_ni *ni, struct list_head *txlist,
- int status);
-
-void kiblnd_qp_event(struct ib_event *event, void *arg);
-void kiblnd_cq_event(struct ib_event *event, void *arg);
-void kiblnd_cq_completion(struct ib_cq *cq, void *arg);
-
-void kiblnd_pack_msg(struct lnet_ni *ni, struct kib_msg *msg, int version,
- int credits, lnet_nid_t dstnid, __u64 dststamp);
-int kiblnd_unpack_msg(struct kib_msg *msg, int nob);
-int kiblnd_post_rx(struct kib_rx *rx, int credit);
-
-int kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg);
-int kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
- int delayed, struct iov_iter *to, unsigned int rlen);
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
deleted file mode 100644
index 65b7a62943ad..000000000000
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ /dev/null
@@ -1,3763 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/o2iblnd/o2iblnd_cb.c
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include <linux/highmem.h>
-#include "o2iblnd.h"
-
-#define MAX_CONN_RACES_BEFORE_ABORT 20
-
-static void kiblnd_peer_alive(struct kib_peer *peer);
-static void kiblnd_peer_connect_failed(struct kib_peer *peer, int active, int error);
-static void kiblnd_init_tx_msg(struct lnet_ni *ni, struct kib_tx *tx,
- int type, int body_nob);
-static int kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
- int resid, struct kib_rdma_desc *dstrd,
- __u64 dstcookie);
-static void kiblnd_queue_tx_locked(struct kib_tx *tx, struct kib_conn *conn);
-static void kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn);
-static void kiblnd_unmap_tx(struct kib_tx *tx);
-static void kiblnd_check_sends_locked(struct kib_conn *conn);
-
-static void
-kiblnd_tx_done(struct lnet_ni *ni, struct kib_tx *tx)
-{
- struct lnet_msg *lntmsg[2];
- struct kib_net *net = ni->ni_data;
- int rc;
- int i;
-
- LASSERT(net);
- LASSERT(!in_interrupt());
- LASSERT(!tx->tx_queued); /* mustn't be queued for sending */
- LASSERT(!tx->tx_sending); /* mustn't be awaiting sent callback */
- LASSERT(!tx->tx_waiting); /* mustn't be awaiting peer response */
- LASSERT(tx->tx_pool);
-
- kiblnd_unmap_tx(tx);
-
- /* tx may have up to 2 lnet msgs to finalise */
- lntmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL;
- lntmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL;
- rc = tx->tx_status;
-
- if (tx->tx_conn) {
- LASSERT(ni == tx->tx_conn->ibc_peer->ibp_ni);
-
- kiblnd_conn_decref(tx->tx_conn);
- tx->tx_conn = NULL;
- }
-
- tx->tx_nwrq = 0;
- tx->tx_status = 0;
-
- kiblnd_pool_free_node(&tx->tx_pool->tpo_pool, &tx->tx_list);
-
- /* delay finalize until my descs have been freed */
- for (i = 0; i < 2; i++) {
- if (!lntmsg[i])
- continue;
-
- lnet_finalize(ni, lntmsg[i], rc);
- }
-}
-
-void
-kiblnd_txlist_done(struct lnet_ni *ni, struct list_head *txlist, int status)
-{
- struct kib_tx *tx;
-
- while (!list_empty(txlist)) {
- tx = list_entry(txlist->next, struct kib_tx, tx_list);
-
- list_del(&tx->tx_list);
- /* complete now */
- tx->tx_waiting = 0;
- tx->tx_status = status;
- kiblnd_tx_done(ni, tx);
- }
-}
-
-static struct kib_tx *
-kiblnd_get_idle_tx(struct lnet_ni *ni, lnet_nid_t target)
-{
- struct kib_net *net = (struct kib_net *)ni->ni_data;
- struct list_head *node;
- struct kib_tx *tx;
- struct kib_tx_poolset *tps;
-
- tps = net->ibn_tx_ps[lnet_cpt_of_nid(target)];
- node = kiblnd_pool_alloc_node(&tps->tps_poolset);
- if (!node)
- return NULL;
- tx = list_entry(node, struct kib_tx, tx_list);
-
- LASSERT(!tx->tx_nwrq);
- LASSERT(!tx->tx_queued);
- LASSERT(!tx->tx_sending);
- LASSERT(!tx->tx_waiting);
- LASSERT(!tx->tx_status);
- LASSERT(!tx->tx_conn);
- LASSERT(!tx->tx_lntmsg[0]);
- LASSERT(!tx->tx_lntmsg[1]);
- LASSERT(!tx->tx_nfrags);
-
- return tx;
-}
-
-static void
-kiblnd_drop_rx(struct kib_rx *rx)
-{
- struct kib_conn *conn = rx->rx_conn;
- struct kib_sched_info *sched = conn->ibc_sched;
- unsigned long flags;
-
- spin_lock_irqsave(&sched->ibs_lock, flags);
- LASSERT(conn->ibc_nrx > 0);
- conn->ibc_nrx--;
- spin_unlock_irqrestore(&sched->ibs_lock, flags);
-
- kiblnd_conn_decref(conn);
-}
-
-int
-kiblnd_post_rx(struct kib_rx *rx, int credit)
-{
- struct kib_conn *conn = rx->rx_conn;
- struct kib_net *net = conn->ibc_peer->ibp_ni->ni_data;
- struct ib_recv_wr *bad_wrq = NULL;
- int rc;
-
- LASSERT(net);
- LASSERT(!in_interrupt());
- LASSERT(credit == IBLND_POSTRX_NO_CREDIT ||
- credit == IBLND_POSTRX_PEER_CREDIT ||
- credit == IBLND_POSTRX_RSRVD_CREDIT);
-
- rx->rx_sge.lkey = conn->ibc_hdev->ibh_pd->local_dma_lkey;
- rx->rx_sge.addr = rx->rx_msgaddr;
- rx->rx_sge.length = IBLND_MSG_SIZE;
-
- rx->rx_wrq.next = NULL;
- rx->rx_wrq.sg_list = &rx->rx_sge;
- rx->rx_wrq.num_sge = 1;
- rx->rx_wrq.wr_id = kiblnd_ptr2wreqid(rx, IBLND_WID_RX);
-
- LASSERT(conn->ibc_state >= IBLND_CONN_INIT);
- LASSERT(rx->rx_nob >= 0); /* not posted */
-
- if (conn->ibc_state > IBLND_CONN_ESTABLISHED) {
- kiblnd_drop_rx(rx); /* No more posts for this rx */
- return 0;
- }
-
- rx->rx_nob = -1; /* flag posted */
-
- /* NB: need an extra reference after ib_post_recv because we don't
- * own this rx (and rx::rx_conn) anymore, LU-5678.
- */
- kiblnd_conn_addref(conn);
- rc = ib_post_recv(conn->ibc_cmid->qp, &rx->rx_wrq, &bad_wrq);
- if (unlikely(rc)) {
- CERROR("Can't post rx for %s: %d, bad_wrq: %p\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), rc, bad_wrq);
- rx->rx_nob = 0;
- }
-
- if (conn->ibc_state < IBLND_CONN_ESTABLISHED) /* Initial post */
- goto out;
-
- if (unlikely(rc)) {
- kiblnd_close_conn(conn, rc);
- kiblnd_drop_rx(rx); /* No more posts for this rx */
- goto out;
- }
-
- if (credit == IBLND_POSTRX_NO_CREDIT)
- goto out;
-
- spin_lock(&conn->ibc_lock);
- if (credit == IBLND_POSTRX_PEER_CREDIT)
- conn->ibc_outstanding_credits++;
- else
- conn->ibc_reserved_credits++;
- kiblnd_check_sends_locked(conn);
- spin_unlock(&conn->ibc_lock);
-
-out:
- kiblnd_conn_decref(conn);
- return rc;
-}
-
-static struct kib_tx *
-kiblnd_find_waiting_tx_locked(struct kib_conn *conn, int txtype, __u64 cookie)
-{
- struct list_head *tmp;
-
- list_for_each(tmp, &conn->ibc_active_txs) {
- struct kib_tx *tx = list_entry(tmp, struct kib_tx, tx_list);
-
- LASSERT(!tx->tx_queued);
- LASSERT(tx->tx_sending || tx->tx_waiting);
-
- if (tx->tx_cookie != cookie)
- continue;
-
- if (tx->tx_waiting &&
- tx->tx_msg->ibm_type == txtype)
- return tx;
-
- CWARN("Bad completion: %swaiting, type %x (wanted %x)\n",
- tx->tx_waiting ? "" : "NOT ",
- tx->tx_msg->ibm_type, txtype);
- }
- return NULL;
-}
-
-static void
-kiblnd_handle_completion(struct kib_conn *conn, int txtype, int status, __u64 cookie)
-{
- struct kib_tx *tx;
- struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
- int idle;
-
- spin_lock(&conn->ibc_lock);
-
- tx = kiblnd_find_waiting_tx_locked(conn, txtype, cookie);
- if (!tx) {
- spin_unlock(&conn->ibc_lock);
-
- CWARN("Unmatched completion type %x cookie %#llx from %s\n",
- txtype, cookie, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kiblnd_close_conn(conn, -EPROTO);
- return;
- }
-
- if (!tx->tx_status) { /* success so far */
- if (status < 0) /* failed? */
- tx->tx_status = status;
- else if (txtype == IBLND_MSG_GET_REQ)
- lnet_set_reply_msg_len(ni, tx->tx_lntmsg[1], status);
- }
-
- tx->tx_waiting = 0;
-
- idle = !tx->tx_queued && !tx->tx_sending;
- if (idle)
- list_del(&tx->tx_list);
-
- spin_unlock(&conn->ibc_lock);
-
- if (idle)
- kiblnd_tx_done(ni, tx);
-}
-
-static void
-kiblnd_send_completion(struct kib_conn *conn, int type, int status, __u64 cookie)
-{
- struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
- struct kib_tx *tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid);
-
- if (!tx) {
- CERROR("Can't get tx for completion %x for %s\n",
- type, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- return;
- }
-
- tx->tx_msg->ibm_u.completion.ibcm_status = status;
- tx->tx_msg->ibm_u.completion.ibcm_cookie = cookie;
- kiblnd_init_tx_msg(ni, tx, type, sizeof(struct kib_completion_msg));
-
- kiblnd_queue_tx(tx, conn);
-}
-
-static void
-kiblnd_handle_rx(struct kib_rx *rx)
-{
- struct kib_msg *msg = rx->rx_msg;
- struct kib_conn *conn = rx->rx_conn;
- struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
- int credits = msg->ibm_credits;
- struct kib_tx *tx;
- int rc = 0;
- int rc2;
- int post_credit;
-
- LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
-
- CDEBUG(D_NET, "Received %x[%d] from %s\n",
- msg->ibm_type, credits,
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
- if (credits) {
- /* Have I received credits that will let me send? */
- spin_lock(&conn->ibc_lock);
-
- if (conn->ibc_credits + credits >
- conn->ibc_queue_depth) {
- rc2 = conn->ibc_credits;
- spin_unlock(&conn->ibc_lock);
-
- CERROR("Bad credits from %s: %d + %d > %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- rc2, credits, conn->ibc_queue_depth);
-
- kiblnd_close_conn(conn, -EPROTO);
- kiblnd_post_rx(rx, IBLND_POSTRX_NO_CREDIT);
- return;
- }
-
- conn->ibc_credits += credits;
-
- /* This ensures the credit taken by NOOP can be returned */
- if (msg->ibm_type == IBLND_MSG_NOOP &&
- !IBLND_OOB_CAPABLE(conn->ibc_version)) /* v1 only */
- conn->ibc_outstanding_credits++;
-
- kiblnd_check_sends_locked(conn);
- spin_unlock(&conn->ibc_lock);
- }
-
- switch (msg->ibm_type) {
- default:
- CERROR("Bad IBLND message type %x from %s\n",
- msg->ibm_type, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- post_credit = IBLND_POSTRX_NO_CREDIT;
- rc = -EPROTO;
- break;
-
- case IBLND_MSG_NOOP:
- if (IBLND_OOB_CAPABLE(conn->ibc_version)) {
- post_credit = IBLND_POSTRX_NO_CREDIT;
- break;
- }
-
- if (credits) /* credit already posted */
- post_credit = IBLND_POSTRX_NO_CREDIT;
- else /* a keepalive NOOP */
- post_credit = IBLND_POSTRX_PEER_CREDIT;
- break;
-
- case IBLND_MSG_IMMEDIATE:
- post_credit = IBLND_POSTRX_DONT_POST;
- rc = lnet_parse(ni, &msg->ibm_u.immediate.ibim_hdr,
- msg->ibm_srcnid, rx, 0);
- if (rc < 0) /* repost on error */
- post_credit = IBLND_POSTRX_PEER_CREDIT;
- break;
-
- case IBLND_MSG_PUT_REQ:
- post_credit = IBLND_POSTRX_DONT_POST;
- rc = lnet_parse(ni, &msg->ibm_u.putreq.ibprm_hdr,
- msg->ibm_srcnid, rx, 1);
- if (rc < 0) /* repost on error */
- post_credit = IBLND_POSTRX_PEER_CREDIT;
- break;
-
- case IBLND_MSG_PUT_NAK:
- CWARN("PUT_NACK from %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- post_credit = IBLND_POSTRX_RSRVD_CREDIT;
- kiblnd_handle_completion(conn, IBLND_MSG_PUT_REQ,
- msg->ibm_u.completion.ibcm_status,
- msg->ibm_u.completion.ibcm_cookie);
- break;
-
- case IBLND_MSG_PUT_ACK:
- post_credit = IBLND_POSTRX_RSRVD_CREDIT;
-
- spin_lock(&conn->ibc_lock);
- tx = kiblnd_find_waiting_tx_locked(conn, IBLND_MSG_PUT_REQ,
- msg->ibm_u.putack.ibpam_src_cookie);
- if (tx)
- list_del(&tx->tx_list);
- spin_unlock(&conn->ibc_lock);
-
- if (!tx) {
- CERROR("Unmatched PUT_ACK from %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- rc = -EPROTO;
- break;
- }
-
- LASSERT(tx->tx_waiting);
- /*
- * CAVEAT EMPTOR: I could be racing with tx_complete, but...
- * (a) I can overwrite tx_msg since my peer has received it!
- * (b) tx_waiting set tells tx_complete() it's not done.
- */
- tx->tx_nwrq = 0; /* overwrite PUT_REQ */
-
- rc2 = kiblnd_init_rdma(conn, tx, IBLND_MSG_PUT_DONE,
- kiblnd_rd_size(&msg->ibm_u.putack.ibpam_rd),
- &msg->ibm_u.putack.ibpam_rd,
- msg->ibm_u.putack.ibpam_dst_cookie);
- if (rc2 < 0)
- CERROR("Can't setup rdma for PUT to %s: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), rc2);
-
- spin_lock(&conn->ibc_lock);
- tx->tx_waiting = 0; /* clear waiting and queue atomically */
- kiblnd_queue_tx_locked(tx, conn);
- spin_unlock(&conn->ibc_lock);
- break;
-
- case IBLND_MSG_PUT_DONE:
- post_credit = IBLND_POSTRX_PEER_CREDIT;
- kiblnd_handle_completion(conn, IBLND_MSG_PUT_ACK,
- msg->ibm_u.completion.ibcm_status,
- msg->ibm_u.completion.ibcm_cookie);
- break;
-
- case IBLND_MSG_GET_REQ:
- post_credit = IBLND_POSTRX_DONT_POST;
- rc = lnet_parse(ni, &msg->ibm_u.get.ibgm_hdr,
- msg->ibm_srcnid, rx, 1);
- if (rc < 0) /* repost on error */
- post_credit = IBLND_POSTRX_PEER_CREDIT;
- break;
-
- case IBLND_MSG_GET_DONE:
- post_credit = IBLND_POSTRX_RSRVD_CREDIT;
- kiblnd_handle_completion(conn, IBLND_MSG_GET_REQ,
- msg->ibm_u.completion.ibcm_status,
- msg->ibm_u.completion.ibcm_cookie);
- break;
- }
-
- if (rc < 0) /* protocol error */
- kiblnd_close_conn(conn, rc);
-
- if (post_credit != IBLND_POSTRX_DONT_POST)
- kiblnd_post_rx(rx, post_credit);
-}
-
-static void
-kiblnd_rx_complete(struct kib_rx *rx, int status, int nob)
-{
- struct kib_msg *msg = rx->rx_msg;
- struct kib_conn *conn = rx->rx_conn;
- struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
- struct kib_net *net = ni->ni_data;
- int rc;
- int err = -EIO;
-
- LASSERT(net);
- LASSERT(rx->rx_nob < 0); /* was posted */
- rx->rx_nob = 0; /* isn't now */
-
- if (conn->ibc_state > IBLND_CONN_ESTABLISHED)
- goto ignore;
-
- if (status != IB_WC_SUCCESS) {
- CNETERR("Rx from %s failed: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), status);
- goto failed;
- }
-
- LASSERT(nob >= 0);
- rx->rx_nob = nob;
-
- rc = kiblnd_unpack_msg(msg, rx->rx_nob);
- if (rc) {
- CERROR("Error %d unpacking rx from %s\n",
- rc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- goto failed;
- }
-
- if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid ||
- msg->ibm_dstnid != ni->ni_nid ||
- msg->ibm_srcstamp != conn->ibc_incarnation ||
- msg->ibm_dststamp != net->ibn_incarnation) {
- CERROR("Stale rx from %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- err = -ESTALE;
- goto failed;
- }
-
- /* set time last known alive */
- kiblnd_peer_alive(conn->ibc_peer);
-
- /* racing with connection establishment/teardown! */
-
- if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
- rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
- unsigned long flags;
-
- write_lock_irqsave(g_lock, flags);
- /* must check holding global lock to eliminate race */
- if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
- list_add_tail(&rx->rx_list, &conn->ibc_early_rxs);
- write_unlock_irqrestore(g_lock, flags);
- return;
- }
- write_unlock_irqrestore(g_lock, flags);
- }
- kiblnd_handle_rx(rx);
- return;
-
- failed:
- CDEBUG(D_NET, "rx %p conn %p\n", rx, conn);
- kiblnd_close_conn(conn, err);
- ignore:
- kiblnd_drop_rx(rx); /* Don't re-post rx. */
-}
-
-static struct page *
-kiblnd_kvaddr_to_page(unsigned long vaddr)
-{
- struct page *page;
-
- if (is_vmalloc_addr((void *)vaddr)) {
- page = vmalloc_to_page((void *)vaddr);
- LASSERT(page);
- return page;
- }
-#ifdef CONFIG_HIGHMEM
- if (vaddr >= PKMAP_BASE &&
- vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) {
- /* No highmem pages only used for bulk (kiov) I/O */
- CERROR("find page for address in highmem\n");
- LBUG();
- }
-#endif
- page = virt_to_page(vaddr);
- LASSERT(page);
- return page;
-}
-
-static int
-kiblnd_fmr_map_tx(struct kib_net *net, struct kib_tx *tx, struct kib_rdma_desc *rd, __u32 nob)
-{
- struct kib_hca_dev *hdev;
- struct kib_fmr_poolset *fps;
- int cpt;
- int rc;
-
- LASSERT(tx->tx_pool);
- LASSERT(tx->tx_pool->tpo_pool.po_owner);
-
- hdev = tx->tx_pool->tpo_hdev;
- cpt = tx->tx_pool->tpo_pool.po_owner->ps_cpt;
-
- fps = net->ibn_fmr_ps[cpt];
- rc = kiblnd_fmr_pool_map(fps, tx, rd, nob, 0, &tx->fmr);
- if (rc) {
- CERROR("Can't map %u bytes: %d\n", nob, rc);
- return rc;
- }
-
- /*
- * If rd is not tx_rd, it's going to get sent to a peer, who will need
- * the rkey
- */
- rd->rd_key = tx->fmr.fmr_key;
- rd->rd_frags[0].rf_addr &= ~hdev->ibh_page_mask;
- rd->rd_frags[0].rf_nob = nob;
- rd->rd_nfrags = 1;
-
- return 0;
-}
-
-static void kiblnd_unmap_tx(struct kib_tx *tx)
-{
- if (tx->fmr.fmr_pfmr || tx->fmr.fmr_frd)
- kiblnd_fmr_pool_unmap(&tx->fmr, tx->tx_status);
-
- if (tx->tx_nfrags) {
- kiblnd_dma_unmap_sg(tx->tx_pool->tpo_hdev->ibh_ibdev,
- tx->tx_frags, tx->tx_nfrags, tx->tx_dmadir);
- tx->tx_nfrags = 0;
- }
-}
-
-static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx,
- struct kib_rdma_desc *rd, int nfrags)
-{
- struct kib_net *net = ni->ni_data;
- struct kib_hca_dev *hdev = net->ibn_dev->ibd_hdev;
- __u32 nob;
- int i;
-
- /*
- * If rd is not tx_rd, it's going to get sent to a peer and I'm the
- * RDMA sink
- */
- tx->tx_dmadir = (rd != tx->tx_rd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
- tx->tx_nfrags = nfrags;
-
- rd->rd_nfrags = kiblnd_dma_map_sg(hdev->ibh_ibdev, tx->tx_frags,
- tx->tx_nfrags, tx->tx_dmadir);
-
- for (i = 0, nob = 0; i < rd->rd_nfrags; i++) {
- rd->rd_frags[i].rf_nob = kiblnd_sg_dma_len(
- hdev->ibh_ibdev, &tx->tx_frags[i]);
- rd->rd_frags[i].rf_addr = kiblnd_sg_dma_address(
- hdev->ibh_ibdev, &tx->tx_frags[i]);
- nob += rd->rd_frags[i].rf_nob;
- }
-
- if (net->ibn_fmr_ps)
- return kiblnd_fmr_map_tx(net, tx, rd, nob);
-
- return -EINVAL;
-}
-
-static int
-kiblnd_setup_rd_iov(struct lnet_ni *ni, struct kib_tx *tx,
- struct kib_rdma_desc *rd, unsigned int niov,
- const struct kvec *iov, int offset, int nob)
-{
- struct kib_net *net = ni->ni_data;
- struct page *page;
- struct scatterlist *sg;
- unsigned long vaddr;
- int fragnob;
- int page_offset;
-
- LASSERT(nob > 0);
- LASSERT(niov > 0);
- LASSERT(net);
-
- while (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- niov--;
- iov++;
- LASSERT(niov > 0);
- }
-
- sg = tx->tx_frags;
- do {
- LASSERT(niov > 0);
-
- vaddr = ((unsigned long)iov->iov_base) + offset;
- page_offset = vaddr & (PAGE_SIZE - 1);
- page = kiblnd_kvaddr_to_page(vaddr);
- if (!page) {
- CERROR("Can't find page\n");
- return -EFAULT;
- }
-
- fragnob = min((int)(iov->iov_len - offset), nob);
- fragnob = min(fragnob, (int)PAGE_SIZE - page_offset);
-
- sg_set_page(sg, page, fragnob, page_offset);
- sg = sg_next(sg);
- if (!sg) {
- CERROR("lacking enough sg entries to map tx\n");
- return -EFAULT;
- }
-
- if (offset + fragnob < iov->iov_len) {
- offset += fragnob;
- } else {
- offset = 0;
- iov++;
- niov--;
- }
- nob -= fragnob;
- } while (nob > 0);
-
- return kiblnd_map_tx(ni, tx, rd, sg - tx->tx_frags);
-}
-
-static int
-kiblnd_setup_rd_kiov(struct lnet_ni *ni, struct kib_tx *tx,
- struct kib_rdma_desc *rd, int nkiov,
- const struct bio_vec *kiov, int offset, int nob)
-{
- struct kib_net *net = ni->ni_data;
- struct scatterlist *sg;
- int fragnob;
-
- CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
-
- LASSERT(nob > 0);
- LASSERT(nkiov > 0);
- LASSERT(net);
-
- while (offset >= kiov->bv_len) {
- offset -= kiov->bv_len;
- nkiov--;
- kiov++;
- LASSERT(nkiov > 0);
- }
-
- sg = tx->tx_frags;
- do {
- LASSERT(nkiov > 0);
-
- fragnob = min((int)(kiov->bv_len - offset), nob);
-
- sg_set_page(sg, kiov->bv_page, fragnob,
- kiov->bv_offset + offset);
- sg = sg_next(sg);
- if (!sg) {
- CERROR("lacking enough sg entries to map tx\n");
- return -EFAULT;
- }
-
- offset = 0;
- kiov++;
- nkiov--;
- nob -= fragnob;
- } while (nob > 0);
-
- return kiblnd_map_tx(ni, tx, rd, sg - tx->tx_frags);
-}
-
-static int
-kiblnd_post_tx_locked(struct kib_conn *conn, struct kib_tx *tx, int credit)
- __must_hold(&conn->ibc_lock)
-{
- struct kib_msg *msg = tx->tx_msg;
- struct kib_peer *peer = conn->ibc_peer;
- struct lnet_ni *ni = peer->ibp_ni;
- int ver = conn->ibc_version;
- int rc;
- int done;
-
- LASSERT(tx->tx_queued);
- /* We rely on this for QP sizing */
- LASSERT(tx->tx_nwrq > 0);
-
- LASSERT(!credit || credit == 1);
- LASSERT(conn->ibc_outstanding_credits >= 0);
- LASSERT(conn->ibc_outstanding_credits <= conn->ibc_queue_depth);
- LASSERT(conn->ibc_credits >= 0);
- LASSERT(conn->ibc_credits <= conn->ibc_queue_depth);
-
- if (conn->ibc_nsends_posted == kiblnd_concurrent_sends(ver, ni)) {
- /* tx completions outstanding... */
- CDEBUG(D_NET, "%s: posted enough\n",
- libcfs_nid2str(peer->ibp_nid));
- return -EAGAIN;
- }
-
- if (credit && !conn->ibc_credits) { /* no credits */
- CDEBUG(D_NET, "%s: no credits\n",
- libcfs_nid2str(peer->ibp_nid));
- return -EAGAIN;
- }
-
- if (credit && !IBLND_OOB_CAPABLE(ver) &&
- conn->ibc_credits == 1 && /* last credit reserved */
- msg->ibm_type != IBLND_MSG_NOOP) { /* for NOOP */
- CDEBUG(D_NET, "%s: not using last credit\n",
- libcfs_nid2str(peer->ibp_nid));
- return -EAGAIN;
- }
-
- /* NB don't drop ibc_lock before bumping tx_sending */
- list_del(&tx->tx_list);
- tx->tx_queued = 0;
-
- if (msg->ibm_type == IBLND_MSG_NOOP &&
- (!kiblnd_need_noop(conn) || /* redundant NOOP */
- (IBLND_OOB_CAPABLE(ver) && /* posted enough NOOP */
- conn->ibc_noops_posted == IBLND_OOB_MSGS(ver)))) {
- /*
- * OK to drop when posted enough NOOPs, since
- * kiblnd_check_sends_locked will queue NOOP again when
- * posted NOOPs complete
- */
- spin_unlock(&conn->ibc_lock);
- kiblnd_tx_done(peer->ibp_ni, tx);
- spin_lock(&conn->ibc_lock);
- CDEBUG(D_NET, "%s(%d): redundant or enough NOOP\n",
- libcfs_nid2str(peer->ibp_nid),
- conn->ibc_noops_posted);
- return 0;
- }
-
- kiblnd_pack_msg(peer->ibp_ni, msg, ver, conn->ibc_outstanding_credits,
- peer->ibp_nid, conn->ibc_incarnation);
-
- conn->ibc_credits -= credit;
- conn->ibc_outstanding_credits = 0;
- conn->ibc_nsends_posted++;
- if (msg->ibm_type == IBLND_MSG_NOOP)
- conn->ibc_noops_posted++;
-
- /*
- * CAVEAT EMPTOR! This tx could be the PUT_DONE of an RDMA
- * PUT. If so, it was first queued here as a PUT_REQ, sent and
- * stashed on ibc_active_txs, matched by an incoming PUT_ACK,
- * and then re-queued here. It's (just) possible that
- * tx_sending is non-zero if we've not done the tx_complete()
- * from the first send; hence the ++ rather than = below.
- */
- tx->tx_sending++;
- list_add(&tx->tx_list, &conn->ibc_active_txs);
-
- /* I'm still holding ibc_lock! */
- if (conn->ibc_state != IBLND_CONN_ESTABLISHED) {
- rc = -ECONNABORTED;
- } else if (tx->tx_pool->tpo_pool.po_failed ||
- conn->ibc_hdev != tx->tx_pool->tpo_hdev) {
- /* close_conn will launch failover */
- rc = -ENETDOWN;
- } else {
- struct kib_fast_reg_descriptor *frd = tx->fmr.fmr_frd;
- struct ib_send_wr *bad = &tx->tx_wrq[tx->tx_nwrq - 1].wr;
- struct ib_send_wr *wrq = &tx->tx_wrq[0].wr;
-
- if (frd) {
- if (!frd->frd_valid) {
- wrq = &frd->frd_inv_wr;
- wrq->next = &frd->frd_fastreg_wr.wr;
- } else {
- wrq = &frd->frd_fastreg_wr.wr;
- }
- frd->frd_fastreg_wr.wr.next = &tx->tx_wrq[0].wr;
- }
-
- LASSERTF(bad->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX),
- "bad wr_id %llx, opc %d, flags %d, peer: %s\n",
- bad->wr_id, bad->opcode, bad->send_flags,
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- bad = NULL;
- rc = ib_post_send(conn->ibc_cmid->qp, wrq, &bad);
- }
-
- conn->ibc_last_send = jiffies;
-
- if (!rc)
- return 0;
-
- /*
- * NB credits are transferred in the actual
- * message, which can only be the last work item
- */
- conn->ibc_credits += credit;
- conn->ibc_outstanding_credits += msg->ibm_credits;
- conn->ibc_nsends_posted--;
- if (msg->ibm_type == IBLND_MSG_NOOP)
- conn->ibc_noops_posted--;
-
- tx->tx_status = rc;
- tx->tx_waiting = 0;
- tx->tx_sending--;
-
- done = !tx->tx_sending;
- if (done)
- list_del(&tx->tx_list);
-
- spin_unlock(&conn->ibc_lock);
-
- if (conn->ibc_state == IBLND_CONN_ESTABLISHED)
- CERROR("Error %d posting transmit to %s\n",
- rc, libcfs_nid2str(peer->ibp_nid));
- else
- CDEBUG(D_NET, "Error %d posting transmit to %s\n",
- rc, libcfs_nid2str(peer->ibp_nid));
-
- kiblnd_close_conn(conn, rc);
-
- if (done)
- kiblnd_tx_done(peer->ibp_ni, tx);
-
- spin_lock(&conn->ibc_lock);
-
- return -EIO;
-}
-
-static void
-kiblnd_check_sends_locked(struct kib_conn *conn)
-{
- int ver = conn->ibc_version;
- struct lnet_ni *ni = conn->ibc_peer->ibp_ni;
- struct kib_tx *tx;
-
- /* Don't send anything until after the connection is established */
- if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
- CDEBUG(D_NET, "%s too soon\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- return;
- }
-
- LASSERT(conn->ibc_nsends_posted <= kiblnd_concurrent_sends(ver, ni));
- LASSERT(!IBLND_OOB_CAPABLE(ver) ||
- conn->ibc_noops_posted <= IBLND_OOB_MSGS(ver));
- LASSERT(conn->ibc_reserved_credits >= 0);
-
- while (conn->ibc_reserved_credits > 0 &&
- !list_empty(&conn->ibc_tx_queue_rsrvd)) {
- tx = list_entry(conn->ibc_tx_queue_rsrvd.next,
- struct kib_tx, tx_list);
- list_del(&tx->tx_list);
- list_add_tail(&tx->tx_list, &conn->ibc_tx_queue);
- conn->ibc_reserved_credits--;
- }
-
- if (kiblnd_need_noop(conn)) {
- spin_unlock(&conn->ibc_lock);
-
- tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid);
- if (tx)
- kiblnd_init_tx_msg(ni, tx, IBLND_MSG_NOOP, 0);
-
- spin_lock(&conn->ibc_lock);
- if (tx)
- kiblnd_queue_tx_locked(tx, conn);
- }
-
- for (;;) {
- int credit;
-
- if (!list_empty(&conn->ibc_tx_queue_nocred)) {
- credit = 0;
- tx = list_entry(conn->ibc_tx_queue_nocred.next,
- struct kib_tx, tx_list);
- } else if (!list_empty(&conn->ibc_tx_noops)) {
- LASSERT(!IBLND_OOB_CAPABLE(ver));
- credit = 1;
- tx = list_entry(conn->ibc_tx_noops.next,
- struct kib_tx, tx_list);
- } else if (!list_empty(&conn->ibc_tx_queue)) {
- credit = 1;
- tx = list_entry(conn->ibc_tx_queue.next,
- struct kib_tx, tx_list);
- } else {
- break;
- }
-
- if (kiblnd_post_tx_locked(conn, tx, credit))
- break;
- }
-}
-
-static void
-kiblnd_tx_complete(struct kib_tx *tx, int status)
-{
- int failed = (status != IB_WC_SUCCESS);
- struct kib_conn *conn = tx->tx_conn;
- int idle;
-
- LASSERT(tx->tx_sending > 0);
-
- if (failed) {
- if (conn->ibc_state == IBLND_CONN_ESTABLISHED)
- CNETERR("Tx -> %s cookie %#llx sending %d waiting %d: failed %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- tx->tx_cookie, tx->tx_sending, tx->tx_waiting,
- status);
-
- kiblnd_close_conn(conn, -EIO);
- } else {
- kiblnd_peer_alive(conn->ibc_peer);
- }
-
- spin_lock(&conn->ibc_lock);
-
- /*
- * I could be racing with rdma completion. Whoever makes 'tx' idle
- * gets to free it, which also drops its ref on 'conn'.
- */
- tx->tx_sending--;
- conn->ibc_nsends_posted--;
- if (tx->tx_msg->ibm_type == IBLND_MSG_NOOP)
- conn->ibc_noops_posted--;
-
- if (failed) {
- tx->tx_waiting = 0; /* don't wait for peer */
- tx->tx_status = -EIO;
- }
-
- idle = !tx->tx_sending && /* This is the final callback */
- !tx->tx_waiting && /* Not waiting for peer */
- !tx->tx_queued; /* Not re-queued (PUT_DONE) */
- if (idle)
- list_del(&tx->tx_list);
-
- kiblnd_check_sends_locked(conn);
- spin_unlock(&conn->ibc_lock);
-
- if (idle)
- kiblnd_tx_done(conn->ibc_peer->ibp_ni, tx);
-}
-
-static void
-kiblnd_init_tx_msg(struct lnet_ni *ni, struct kib_tx *tx, int type,
- int body_nob)
-{
- struct kib_hca_dev *hdev = tx->tx_pool->tpo_hdev;
- struct ib_sge *sge = &tx->tx_sge[tx->tx_nwrq];
- struct ib_rdma_wr *wrq = &tx->tx_wrq[tx->tx_nwrq];
- int nob = offsetof(struct kib_msg, ibm_u) + body_nob;
-
- LASSERT(tx->tx_nwrq >= 0);
- LASSERT(tx->tx_nwrq < IBLND_MAX_RDMA_FRAGS + 1);
- LASSERT(nob <= IBLND_MSG_SIZE);
-
- kiblnd_init_msg(tx->tx_msg, type, body_nob);
-
- sge->lkey = hdev->ibh_pd->local_dma_lkey;
- sge->addr = tx->tx_msgaddr;
- sge->length = nob;
-
- memset(wrq, 0, sizeof(*wrq));
-
- wrq->wr.next = NULL;
- wrq->wr.wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_TX);
- wrq->wr.sg_list = sge;
- wrq->wr.num_sge = 1;
- wrq->wr.opcode = IB_WR_SEND;
- wrq->wr.send_flags = IB_SEND_SIGNALED;
-
- tx->tx_nwrq++;
-}
-
-static int
-kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
- int resid, struct kib_rdma_desc *dstrd, __u64 dstcookie)
-{
- struct kib_msg *ibmsg = tx->tx_msg;
- struct kib_rdma_desc *srcrd = tx->tx_rd;
- struct ib_sge *sge = &tx->tx_sge[0];
- struct ib_rdma_wr *wrq, *next;
- int rc = resid;
- int srcidx = 0;
- int dstidx = 0;
- int wrknob;
-
- LASSERT(!in_interrupt());
- LASSERT(!tx->tx_nwrq);
- LASSERT(type == IBLND_MSG_GET_DONE ||
- type == IBLND_MSG_PUT_DONE);
-
- if (kiblnd_rd_size(srcrd) > conn->ibc_max_frags << PAGE_SHIFT) {
- CERROR("RDMA is too large for peer %s (%d), src size: %d dst size: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- conn->ibc_max_frags << PAGE_SHIFT,
- kiblnd_rd_size(srcrd), kiblnd_rd_size(dstrd));
- rc = -EMSGSIZE;
- goto too_big;
- }
-
- while (resid > 0) {
- if (srcidx >= srcrd->rd_nfrags) {
- CERROR("Src buffer exhausted: %d frags\n", srcidx);
- rc = -EPROTO;
- break;
- }
-
- if (dstidx == dstrd->rd_nfrags) {
- CERROR("Dst buffer exhausted: %d frags\n", dstidx);
- rc = -EPROTO;
- break;
- }
-
- if (tx->tx_nwrq >= IBLND_MAX_RDMA_FRAGS) {
- CERROR("RDMA has too many fragments for peer %s (%d), src idx/frags: %d/%d dst idx/frags: %d/%d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- IBLND_MAX_RDMA_FRAGS,
- srcidx, srcrd->rd_nfrags,
- dstidx, dstrd->rd_nfrags);
- rc = -EMSGSIZE;
- break;
- }
-
- wrknob = min3(kiblnd_rd_frag_size(srcrd, srcidx),
- kiblnd_rd_frag_size(dstrd, dstidx),
- (__u32)resid);
-
- sge = &tx->tx_sge[tx->tx_nwrq];
- sge->addr = kiblnd_rd_frag_addr(srcrd, srcidx);
- sge->lkey = kiblnd_rd_frag_key(srcrd, srcidx);
- sge->length = wrknob;
-
- wrq = &tx->tx_wrq[tx->tx_nwrq];
- next = wrq + 1;
-
- wrq->wr.next = &next->wr;
- wrq->wr.wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA);
- wrq->wr.sg_list = sge;
- wrq->wr.num_sge = 1;
- wrq->wr.opcode = IB_WR_RDMA_WRITE;
- wrq->wr.send_flags = 0;
-
- wrq->remote_addr = kiblnd_rd_frag_addr(dstrd, dstidx);
- wrq->rkey = kiblnd_rd_frag_key(dstrd, dstidx);
-
- srcidx = kiblnd_rd_consume_frag(srcrd, srcidx, wrknob);
- dstidx = kiblnd_rd_consume_frag(dstrd, dstidx, wrknob);
-
- resid -= wrknob;
-
- tx->tx_nwrq++;
- wrq++;
- sge++;
- }
-too_big:
- if (rc < 0) /* no RDMA if completing with failure */
- tx->tx_nwrq = 0;
-
- ibmsg->ibm_u.completion.ibcm_status = rc;
- ibmsg->ibm_u.completion.ibcm_cookie = dstcookie;
- kiblnd_init_tx_msg(conn->ibc_peer->ibp_ni, tx,
- type, sizeof(struct kib_completion_msg));
-
- return rc;
-}
-
-static void
-kiblnd_queue_tx_locked(struct kib_tx *tx, struct kib_conn *conn)
-{
- struct list_head *q;
-
- LASSERT(tx->tx_nwrq > 0); /* work items set up */
- LASSERT(!tx->tx_queued); /* not queued for sending already */
- LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
-
- tx->tx_queued = 1;
- tx->tx_deadline = jiffies +
- msecs_to_jiffies(*kiblnd_tunables.kib_timeout *
- MSEC_PER_SEC);
-
- if (!tx->tx_conn) {
- kiblnd_conn_addref(conn);
- tx->tx_conn = conn;
- LASSERT(tx->tx_msg->ibm_type != IBLND_MSG_PUT_DONE);
- } else {
- /* PUT_DONE first attached to conn as a PUT_REQ */
- LASSERT(tx->tx_conn == conn);
- LASSERT(tx->tx_msg->ibm_type == IBLND_MSG_PUT_DONE);
- }
-
- switch (tx->tx_msg->ibm_type) {
- default:
- LBUG();
-
- case IBLND_MSG_PUT_REQ:
- case IBLND_MSG_GET_REQ:
- q = &conn->ibc_tx_queue_rsrvd;
- break;
-
- case IBLND_MSG_PUT_NAK:
- case IBLND_MSG_PUT_ACK:
- case IBLND_MSG_PUT_DONE:
- case IBLND_MSG_GET_DONE:
- q = &conn->ibc_tx_queue_nocred;
- break;
-
- case IBLND_MSG_NOOP:
- if (IBLND_OOB_CAPABLE(conn->ibc_version))
- q = &conn->ibc_tx_queue_nocred;
- else
- q = &conn->ibc_tx_noops;
- break;
-
- case IBLND_MSG_IMMEDIATE:
- q = &conn->ibc_tx_queue;
- break;
- }
-
- list_add_tail(&tx->tx_list, q);
-}
-
-static void
-kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn)
-{
- spin_lock(&conn->ibc_lock);
- kiblnd_queue_tx_locked(tx, conn);
- kiblnd_check_sends_locked(conn);
- spin_unlock(&conn->ibc_lock);
-}
-
-static int kiblnd_resolve_addr(struct rdma_cm_id *cmid,
- struct sockaddr_in *srcaddr,
- struct sockaddr_in *dstaddr,
- int timeout_ms)
-{
- unsigned short port;
- int rc;
-
- /* allow the port to be reused */
- rc = rdma_set_reuseaddr(cmid, 1);
- if (rc) {
- CERROR("Unable to set reuse on cmid: %d\n", rc);
- return rc;
- }
-
- /* look for a free privileged port */
- for (port = PROT_SOCK - 1; port > 0; port--) {
- srcaddr->sin_port = htons(port);
- rc = rdma_resolve_addr(cmid,
- (struct sockaddr *)srcaddr,
- (struct sockaddr *)dstaddr,
- timeout_ms);
- if (!rc) {
- CDEBUG(D_NET, "bound to port %hu\n", port);
- return 0;
- } else if (rc == -EADDRINUSE || rc == -EADDRNOTAVAIL) {
- CDEBUG(D_NET, "bind to port %hu failed: %d\n",
- port, rc);
- } else {
- return rc;
- }
- }
-
- CERROR("Failed to bind to a free privileged port\n");
- return rc;
-}
-
-static void
-kiblnd_connect_peer(struct kib_peer *peer)
-{
- struct rdma_cm_id *cmid;
- struct kib_dev *dev;
- struct kib_net *net = peer->ibp_ni->ni_data;
- struct sockaddr_in srcaddr;
- struct sockaddr_in dstaddr;
- int rc;
-
- LASSERT(net);
- LASSERT(peer->ibp_connecting > 0);
-
- cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, peer, RDMA_PS_TCP,
- IB_QPT_RC);
-
- if (IS_ERR(cmid)) {
- CERROR("Can't create CMID for %s: %ld\n",
- libcfs_nid2str(peer->ibp_nid), PTR_ERR(cmid));
- rc = PTR_ERR(cmid);
- goto failed;
- }
-
- dev = net->ibn_dev;
- memset(&srcaddr, 0, sizeof(srcaddr));
- srcaddr.sin_family = AF_INET;
- srcaddr.sin_addr.s_addr = htonl(dev->ibd_ifip);
-
- memset(&dstaddr, 0, sizeof(dstaddr));
- dstaddr.sin_family = AF_INET;
- dstaddr.sin_port = htons(*kiblnd_tunables.kib_service);
- dstaddr.sin_addr.s_addr = htonl(LNET_NIDADDR(peer->ibp_nid));
-
- kiblnd_peer_addref(peer); /* cmid's ref */
-
- if (*kiblnd_tunables.kib_use_priv_port) {
- rc = kiblnd_resolve_addr(cmid, &srcaddr, &dstaddr,
- *kiblnd_tunables.kib_timeout * 1000);
- } else {
- rc = rdma_resolve_addr(cmid,
- (struct sockaddr *)&srcaddr,
- (struct sockaddr *)&dstaddr,
- *kiblnd_tunables.kib_timeout * 1000);
- }
- if (rc) {
- /* Can't initiate address resolution: */
- CERROR("Can't resolve addr for %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), rc);
- goto failed2;
- }
-
- return;
-
- failed2:
- kiblnd_peer_connect_failed(peer, 1, rc);
- kiblnd_peer_decref(peer); /* cmid's ref */
- rdma_destroy_id(cmid);
- return;
- failed:
- kiblnd_peer_connect_failed(peer, 1, rc);
-}
-
-bool
-kiblnd_reconnect_peer(struct kib_peer *peer)
-{
- rwlock_t *glock = &kiblnd_data.kib_global_lock;
- char *reason = NULL;
- struct list_head txs;
- unsigned long flags;
-
- INIT_LIST_HEAD(&txs);
-
- write_lock_irqsave(glock, flags);
- if (!peer->ibp_reconnecting) {
- if (peer->ibp_accepting)
- reason = "accepting";
- else if (peer->ibp_connecting)
- reason = "connecting";
- else if (!list_empty(&peer->ibp_conns))
- reason = "connected";
- else /* connected then closed */
- reason = "closed";
-
- goto no_reconnect;
- }
-
- LASSERT(!peer->ibp_accepting && !peer->ibp_connecting &&
- list_empty(&peer->ibp_conns));
- peer->ibp_reconnecting--;
-
- if (!kiblnd_peer_active(peer)) {
- list_splice_init(&peer->ibp_tx_queue, &txs);
- reason = "unlinked";
- goto no_reconnect;
- }
-
- peer->ibp_connecting++;
- peer->ibp_reconnected++;
- write_unlock_irqrestore(glock, flags);
-
- kiblnd_connect_peer(peer);
- return true;
-
-no_reconnect:
- write_unlock_irqrestore(glock, flags);
-
- CWARN("Abort reconnection of %s: %s\n",
- libcfs_nid2str(peer->ibp_nid), reason);
- kiblnd_txlist_done(peer->ibp_ni, &txs, -ECONNABORTED);
- return false;
-}
-
-void
-kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid)
-{
- struct kib_peer *peer;
- struct kib_peer *peer2;
- struct kib_conn *conn;
- rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
- unsigned long flags;
- int rc;
- int i;
- struct lnet_ioctl_config_o2iblnd_tunables *tunables;
-
- /*
- * If I get here, I've committed to send, so I complete the tx with
- * failure on any problems
- */
- LASSERT(!tx || !tx->tx_conn); /* only set when assigned a conn */
- LASSERT(!tx || tx->tx_nwrq > 0); /* work items have been set up */
-
- /*
- * First time, just use a read lock since I expect to find my peer
- * connected
- */
- read_lock_irqsave(g_lock, flags);
-
- peer = kiblnd_find_peer_locked(nid);
- if (peer && !list_empty(&peer->ibp_conns)) {
- /* Found a peer with an established connection */
- conn = kiblnd_get_conn_locked(peer);
- kiblnd_conn_addref(conn); /* 1 ref for me... */
-
- read_unlock_irqrestore(g_lock, flags);
-
- if (tx)
- kiblnd_queue_tx(tx, conn);
- kiblnd_conn_decref(conn); /* ...to here */
- return;
- }
-
- read_unlock(g_lock);
- /* Re-try with a write lock */
- write_lock(g_lock);
-
- peer = kiblnd_find_peer_locked(nid);
- if (peer) {
- if (list_empty(&peer->ibp_conns)) {
- /* found a peer, but it's still connecting... */
- LASSERT(kiblnd_peer_connecting(peer));
- if (tx)
- list_add_tail(&tx->tx_list,
- &peer->ibp_tx_queue);
- write_unlock_irqrestore(g_lock, flags);
- } else {
- conn = kiblnd_get_conn_locked(peer);
- kiblnd_conn_addref(conn); /* 1 ref for me... */
-
- write_unlock_irqrestore(g_lock, flags);
-
- if (tx)
- kiblnd_queue_tx(tx, conn);
- kiblnd_conn_decref(conn); /* ...to here */
- }
- return;
- }
-
- write_unlock_irqrestore(g_lock, flags);
-
- /* Allocate a peer ready to add to the peer table and retry */
- rc = kiblnd_create_peer(ni, &peer, nid);
- if (rc) {
- CERROR("Can't create peer %s\n", libcfs_nid2str(nid));
- if (tx) {
- tx->tx_status = -EHOSTUNREACH;
- tx->tx_waiting = 0;
- kiblnd_tx_done(ni, tx);
- }
- return;
- }
-
- write_lock_irqsave(g_lock, flags);
-
- peer2 = kiblnd_find_peer_locked(nid);
- if (peer2) {
- if (list_empty(&peer2->ibp_conns)) {
- /* found a peer, but it's still connecting... */
- LASSERT(kiblnd_peer_connecting(peer2));
- if (tx)
- list_add_tail(&tx->tx_list,
- &peer2->ibp_tx_queue);
- write_unlock_irqrestore(g_lock, flags);
- } else {
- conn = kiblnd_get_conn_locked(peer2);
- kiblnd_conn_addref(conn); /* 1 ref for me... */
-
- write_unlock_irqrestore(g_lock, flags);
-
- if (tx)
- kiblnd_queue_tx(tx, conn);
- kiblnd_conn_decref(conn); /* ...to here */
- }
-
- kiblnd_peer_decref(peer);
- return;
- }
-
- /* Brand new peer */
- LASSERT(!peer->ibp_connecting);
- tunables = &peer->ibp_ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
- peer->ibp_connecting = tunables->lnd_conns_per_peer;
-
- /* always called with a ref on ni, which prevents ni being shutdown */
- LASSERT(!((struct kib_net *)ni->ni_data)->ibn_shutdown);
-
- if (tx)
- list_add_tail(&tx->tx_list, &peer->ibp_tx_queue);
-
- kiblnd_peer_addref(peer);
- list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid));
-
- write_unlock_irqrestore(g_lock, flags);
-
- for (i = 0; i < tunables->lnd_conns_per_peer; i++)
- kiblnd_connect_peer(peer);
- kiblnd_peer_decref(peer);
-}
-
-int
-kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
-{
- struct lnet_hdr *hdr = &lntmsg->msg_hdr;
- int type = lntmsg->msg_type;
- struct lnet_process_id target = lntmsg->msg_target;
- int target_is_router = lntmsg->msg_target_is_router;
- int routing = lntmsg->msg_routing;
- unsigned int payload_niov = lntmsg->msg_niov;
- struct kvec *payload_iov = lntmsg->msg_iov;
- struct bio_vec *payload_kiov = lntmsg->msg_kiov;
- unsigned int payload_offset = lntmsg->msg_offset;
- unsigned int payload_nob = lntmsg->msg_len;
- struct iov_iter from;
- struct kib_msg *ibmsg;
- struct kib_rdma_desc *rd;
- struct kib_tx *tx;
- int nob;
- int rc;
-
- /* NB 'private' is different depending on what we're sending.... */
-
- CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n",
- payload_nob, payload_niov, libcfs_id2str(target));
-
- LASSERT(!payload_nob || payload_niov > 0);
- LASSERT(payload_niov <= LNET_MAX_IOV);
-
- /* Thread context */
- LASSERT(!in_interrupt());
- /* payload is either all vaddrs or all pages */
- LASSERT(!(payload_kiov && payload_iov));
-
- if (payload_kiov)
- iov_iter_bvec(&from, ITER_BVEC | WRITE,
- payload_kiov, payload_niov,
- payload_nob + payload_offset);
- else
- iov_iter_kvec(&from, ITER_KVEC | WRITE,
- payload_iov, payload_niov,
- payload_nob + payload_offset);
-
- iov_iter_advance(&from, payload_offset);
-
- switch (type) {
- default:
- LBUG();
- return -EIO;
-
- case LNET_MSG_ACK:
- LASSERT(!payload_nob);
- break;
-
- case LNET_MSG_GET:
- if (routing || target_is_router)
- break; /* send IMMEDIATE */
-
- /* is the REPLY message too small for RDMA? */
- nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]);
- if (nob <= IBLND_MSG_SIZE)
- break; /* send IMMEDIATE */
-
- tx = kiblnd_get_idle_tx(ni, target.nid);
- if (!tx) {
- CERROR("Can't allocate txd for GET to %s\n",
- libcfs_nid2str(target.nid));
- return -ENOMEM;
- }
-
- ibmsg = tx->tx_msg;
- rd = &ibmsg->ibm_u.get.ibgm_rd;
- if (!(lntmsg->msg_md->md_options & LNET_MD_KIOV))
- rc = kiblnd_setup_rd_iov(ni, tx, rd,
- lntmsg->msg_md->md_niov,
- lntmsg->msg_md->md_iov.iov,
- 0, lntmsg->msg_md->md_length);
- else
- rc = kiblnd_setup_rd_kiov(ni, tx, rd,
- lntmsg->msg_md->md_niov,
- lntmsg->msg_md->md_iov.kiov,
- 0, lntmsg->msg_md->md_length);
- if (rc) {
- CERROR("Can't setup GET sink for %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- kiblnd_tx_done(ni, tx);
- return -EIO;
- }
-
- nob = offsetof(struct kib_get_msg, ibgm_rd.rd_frags[rd->rd_nfrags]);
- ibmsg->ibm_u.get.ibgm_cookie = tx->tx_cookie;
- ibmsg->ibm_u.get.ibgm_hdr = *hdr;
-
- kiblnd_init_tx_msg(ni, tx, IBLND_MSG_GET_REQ, nob);
-
- tx->tx_lntmsg[1] = lnet_create_reply_msg(ni, lntmsg);
- if (!tx->tx_lntmsg[1]) {
- CERROR("Can't create reply for GET -> %s\n",
- libcfs_nid2str(target.nid));
- kiblnd_tx_done(ni, tx);
- return -EIO;
- }
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg[0,1] on completion */
- tx->tx_waiting = 1; /* waiting for GET_DONE */
- kiblnd_launch_tx(ni, tx, target.nid);
- return 0;
-
- case LNET_MSG_REPLY:
- case LNET_MSG_PUT:
- /* Is the payload small enough not to need RDMA? */
- nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[payload_nob]);
- if (nob <= IBLND_MSG_SIZE)
- break; /* send IMMEDIATE */
-
- tx = kiblnd_get_idle_tx(ni, target.nid);
- if (!tx) {
- CERROR("Can't allocate %s txd for %s\n",
- type == LNET_MSG_PUT ? "PUT" : "REPLY",
- libcfs_nid2str(target.nid));
- return -ENOMEM;
- }
-
- if (!payload_kiov)
- rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd,
- payload_niov, payload_iov,
- payload_offset, payload_nob);
- else
- rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd,
- payload_niov, payload_kiov,
- payload_offset, payload_nob);
- if (rc) {
- CERROR("Can't setup PUT src for %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- kiblnd_tx_done(ni, tx);
- return -EIO;
- }
-
- ibmsg = tx->tx_msg;
- ibmsg->ibm_u.putreq.ibprm_hdr = *hdr;
- ibmsg->ibm_u.putreq.ibprm_cookie = tx->tx_cookie;
- kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_REQ, sizeof(struct kib_putreq_msg));
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */
- tx->tx_waiting = 1; /* waiting for PUT_{ACK,NAK} */
- kiblnd_launch_tx(ni, tx, target.nid);
- return 0;
- }
-
- /* send IMMEDIATE */
-
- LASSERT(offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[payload_nob])
- <= IBLND_MSG_SIZE);
-
- tx = kiblnd_get_idle_tx(ni, target.nid);
- if (!tx) {
- CERROR("Can't send %d to %s: tx descs exhausted\n",
- type, libcfs_nid2str(target.nid));
- return -ENOMEM;
- }
-
- ibmsg = tx->tx_msg;
- ibmsg->ibm_u.immediate.ibim_hdr = *hdr;
-
- rc = copy_from_iter(&ibmsg->ibm_u.immediate.ibim_payload, payload_nob,
- &from);
- if (rc != payload_nob) {
- kiblnd_pool_free_node(&tx->tx_pool->tpo_pool, &tx->tx_list);
- return -EFAULT;
- }
-
- nob = offsetof(struct kib_immediate_msg, ibim_payload[payload_nob]);
- kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob);
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */
- kiblnd_launch_tx(ni, tx, target.nid);
- return 0;
-}
-
-static void
-kiblnd_reply(struct lnet_ni *ni, struct kib_rx *rx, struct lnet_msg *lntmsg)
-{
- struct lnet_process_id target = lntmsg->msg_target;
- unsigned int niov = lntmsg->msg_niov;
- struct kvec *iov = lntmsg->msg_iov;
- struct bio_vec *kiov = lntmsg->msg_kiov;
- unsigned int offset = lntmsg->msg_offset;
- unsigned int nob = lntmsg->msg_len;
- struct kib_tx *tx;
- int rc;
-
- tx = kiblnd_get_idle_tx(ni, rx->rx_conn->ibc_peer->ibp_nid);
- if (!tx) {
- CERROR("Can't get tx for REPLY to %s\n",
- libcfs_nid2str(target.nid));
- goto failed_0;
- }
-
- if (!nob)
- rc = 0;
- else if (!kiov)
- rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd,
- niov, iov, offset, nob);
- else
- rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd,
- niov, kiov, offset, nob);
-
- if (rc) {
- CERROR("Can't setup GET src for %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- goto failed_1;
- }
-
- rc = kiblnd_init_rdma(rx->rx_conn, tx,
- IBLND_MSG_GET_DONE, nob,
- &rx->rx_msg->ibm_u.get.ibgm_rd,
- rx->rx_msg->ibm_u.get.ibgm_cookie);
- if (rc < 0) {
- CERROR("Can't setup rdma for GET from %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- goto failed_1;
- }
-
- if (!nob) {
- /* No RDMA: local completion may happen now! */
- lnet_finalize(ni, lntmsg, 0);
- } else {
- /* RDMA: lnet_finalize(lntmsg) when it completes */
- tx->tx_lntmsg[0] = lntmsg;
- }
-
- kiblnd_queue_tx(tx, rx->rx_conn);
- return;
-
- failed_1:
- kiblnd_tx_done(ni, tx);
- failed_0:
- lnet_finalize(ni, lntmsg, -EIO);
-}
-
-int
-kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
- int delayed, struct iov_iter *to, unsigned int rlen)
-{
- struct kib_rx *rx = private;
- struct kib_msg *rxmsg = rx->rx_msg;
- struct kib_conn *conn = rx->rx_conn;
- struct kib_tx *tx;
- int nob;
- int post_credit = IBLND_POSTRX_PEER_CREDIT;
- int rc = 0;
-
- LASSERT(iov_iter_count(to) <= rlen);
- LASSERT(!in_interrupt());
- /* Either all pages or all vaddrs */
-
- switch (rxmsg->ibm_type) {
- default:
- LBUG();
-
- case IBLND_MSG_IMMEDIATE:
- nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[rlen]);
- if (nob > rx->rx_nob) {
- CERROR("Immediate message from %s too big: %d(%d)\n",
- libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid),
- nob, rx->rx_nob);
- rc = -EPROTO;
- break;
- }
-
- rc = copy_to_iter(&rxmsg->ibm_u.immediate.ibim_payload, rlen,
- to);
- if (rc != rlen) {
- rc = -EFAULT;
- break;
- }
-
- rc = 0;
- lnet_finalize(ni, lntmsg, 0);
- break;
-
- case IBLND_MSG_PUT_REQ: {
- struct kib_msg *txmsg;
- struct kib_rdma_desc *rd;
-
- if (!iov_iter_count(to)) {
- lnet_finalize(ni, lntmsg, 0);
- kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, 0,
- rxmsg->ibm_u.putreq.ibprm_cookie);
- break;
- }
-
- tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid);
- if (!tx) {
- CERROR("Can't allocate tx for %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- /* Not replying will break the connection */
- rc = -ENOMEM;
- break;
- }
-
- txmsg = tx->tx_msg;
- rd = &txmsg->ibm_u.putack.ibpam_rd;
- if (!(to->type & ITER_BVEC))
- rc = kiblnd_setup_rd_iov(ni, tx, rd,
- to->nr_segs, to->kvec,
- to->iov_offset,
- iov_iter_count(to));
- else
- rc = kiblnd_setup_rd_kiov(ni, tx, rd,
- to->nr_segs, to->bvec,
- to->iov_offset,
- iov_iter_count(to));
- if (rc) {
- CERROR("Can't setup PUT sink for %s: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
- kiblnd_tx_done(ni, tx);
- /* tell peer it's over */
- kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, rc,
- rxmsg->ibm_u.putreq.ibprm_cookie);
- break;
- }
-
- nob = offsetof(struct kib_putack_msg, ibpam_rd.rd_frags[rd->rd_nfrags]);
- txmsg->ibm_u.putack.ibpam_src_cookie = rxmsg->ibm_u.putreq.ibprm_cookie;
- txmsg->ibm_u.putack.ibpam_dst_cookie = tx->tx_cookie;
-
- kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_ACK, nob);
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */
- tx->tx_waiting = 1; /* waiting for PUT_DONE */
- kiblnd_queue_tx(tx, conn);
-
- /* reposted buffer reserved for PUT_DONE */
- post_credit = IBLND_POSTRX_NO_CREDIT;
- break;
- }
-
- case IBLND_MSG_GET_REQ:
- if (lntmsg) {
- /* Optimized GET; RDMA lntmsg's payload */
- kiblnd_reply(ni, rx, lntmsg);
- } else {
- /* GET didn't match anything */
- kiblnd_send_completion(rx->rx_conn, IBLND_MSG_GET_DONE,
- -ENODATA,
- rxmsg->ibm_u.get.ibgm_cookie);
- }
- break;
- }
-
- kiblnd_post_rx(rx, post_credit);
- return rc;
-}
-
-int
-kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name)
-{
- struct task_struct *task = kthread_run(fn, arg, "%s", name);
-
- if (IS_ERR(task))
- return PTR_ERR(task);
-
- atomic_inc(&kiblnd_data.kib_nthreads);
- return 0;
-}
-
-static void
-kiblnd_thread_fini(void)
-{
- atomic_dec(&kiblnd_data.kib_nthreads);
-}
-
-static void
-kiblnd_peer_alive(struct kib_peer *peer)
-{
- /* This is racy, but everyone's only writing jiffies */
- peer->ibp_last_alive = jiffies;
- mb();
-}
-
-static void
-kiblnd_peer_notify(struct kib_peer *peer)
-{
- int error = 0;
- unsigned long last_alive = 0;
- unsigned long flags;
-
- read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- if (kiblnd_peer_idle(peer) && peer->ibp_error) {
- error = peer->ibp_error;
- peer->ibp_error = 0;
-
- last_alive = peer->ibp_last_alive;
- }
-
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- if (error)
- lnet_notify(peer->ibp_ni,
- peer->ibp_nid, 0, last_alive);
-}
-
-void
-kiblnd_close_conn_locked(struct kib_conn *conn, int error)
-{
- /*
- * This just does the immediate housekeeping. 'error' is zero for a
- * normal shutdown which can happen only after the connection has been
- * established. If the connection is established, schedule the
- * connection to be finished off by the connd. Otherwise the connd is
- * already dealing with it (either to set it up or tear it down).
- * Caller holds kib_global_lock exclusively in irq context
- */
- struct kib_peer *peer = conn->ibc_peer;
- struct kib_dev *dev;
- unsigned long flags;
-
- LASSERT(error || conn->ibc_state >= IBLND_CONN_ESTABLISHED);
-
- if (error && !conn->ibc_comms_error)
- conn->ibc_comms_error = error;
-
- if (conn->ibc_state != IBLND_CONN_ESTABLISHED)
- return; /* already being handled */
-
- if (!error &&
- list_empty(&conn->ibc_tx_noops) &&
- list_empty(&conn->ibc_tx_queue) &&
- list_empty(&conn->ibc_tx_queue_rsrvd) &&
- list_empty(&conn->ibc_tx_queue_nocred) &&
- list_empty(&conn->ibc_active_txs)) {
- CDEBUG(D_NET, "closing conn to %s\n",
- libcfs_nid2str(peer->ibp_nid));
- } else {
- CNETERR("Closing conn to %s: error %d%s%s%s%s%s\n",
- libcfs_nid2str(peer->ibp_nid), error,
- list_empty(&conn->ibc_tx_queue) ? "" : "(sending)",
- list_empty(&conn->ibc_tx_noops) ? "" : "(sending_noops)",
- list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)",
- list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)",
- list_empty(&conn->ibc_active_txs) ? "" : "(waiting)");
- }
-
- dev = ((struct kib_net *)peer->ibp_ni->ni_data)->ibn_dev;
- if (peer->ibp_next_conn == conn)
- /* clear next_conn so it won't be used */
- peer->ibp_next_conn = NULL;
- list_del(&conn->ibc_list);
- /* connd (see below) takes over ibc_list's ref */
-
- if (list_empty(&peer->ibp_conns) && /* no more conns */
- kiblnd_peer_active(peer)) { /* still in peer table */
- kiblnd_unlink_peer_locked(peer);
-
- /* set/clear error on last conn */
- peer->ibp_error = conn->ibc_comms_error;
- }
-
- kiblnd_set_conn_state(conn, IBLND_CONN_CLOSING);
-
- if (error &&
- kiblnd_dev_can_failover(dev)) {
- list_add_tail(&dev->ibd_fail_list,
- &kiblnd_data.kib_failed_devs);
- wake_up(&kiblnd_data.kib_failover_waitq);
- }
-
- spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
-
- list_add_tail(&conn->ibc_list, &kiblnd_data.kib_connd_conns);
- wake_up(&kiblnd_data.kib_connd_waitq);
-
- spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);
-}
-
-void
-kiblnd_close_conn(struct kib_conn *conn, int error)
-{
- unsigned long flags;
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- kiblnd_close_conn_locked(conn, error);
-
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-}
-
-static void
-kiblnd_handle_early_rxs(struct kib_conn *conn)
-{
- unsigned long flags;
- struct kib_rx *rx;
-
- LASSERT(!in_interrupt());
- LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
- while (!list_empty(&conn->ibc_early_rxs)) {
- rx = list_entry(conn->ibc_early_rxs.next,
- struct kib_rx, rx_list);
- list_del(&rx->rx_list);
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- kiblnd_handle_rx(rx);
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
- }
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-}
-
-static void
-kiblnd_abort_txs(struct kib_conn *conn, struct list_head *txs)
-{
- LIST_HEAD(zombies);
- struct list_head *tmp;
- struct list_head *nxt;
- struct kib_tx *tx;
-
- spin_lock(&conn->ibc_lock);
-
- list_for_each_safe(tmp, nxt, txs) {
- tx = list_entry(tmp, struct kib_tx, tx_list);
-
- if (txs == &conn->ibc_active_txs) {
- LASSERT(!tx->tx_queued);
- LASSERT(tx->tx_waiting || tx->tx_sending);
- } else {
- LASSERT(tx->tx_queued);
- }
-
- tx->tx_status = -ECONNABORTED;
- tx->tx_waiting = 0;
-
- if (!tx->tx_sending) {
- tx->tx_queued = 0;
- list_del(&tx->tx_list);
- list_add(&tx->tx_list, &zombies);
- }
- }
-
- spin_unlock(&conn->ibc_lock);
-
- kiblnd_txlist_done(conn->ibc_peer->ibp_ni, &zombies, -ECONNABORTED);
-}
-
-static void
-kiblnd_finalise_conn(struct kib_conn *conn)
-{
- LASSERT(!in_interrupt());
- LASSERT(conn->ibc_state > IBLND_CONN_INIT);
-
- kiblnd_set_conn_state(conn, IBLND_CONN_DISCONNECTED);
-
- /*
- * abort_receives moves QP state to IB_QPS_ERR. This is only required
- * for connections that didn't get as far as being connected, because
- * rdma_disconnect() does this for free.
- */
- kiblnd_abort_receives(conn);
-
- /*
- * Complete all tx descs not waiting for sends to complete.
- * NB we should be safe from RDMA now that the QP has changed state
- */
- kiblnd_abort_txs(conn, &conn->ibc_tx_noops);
- kiblnd_abort_txs(conn, &conn->ibc_tx_queue);
- kiblnd_abort_txs(conn, &conn->ibc_tx_queue_rsrvd);
- kiblnd_abort_txs(conn, &conn->ibc_tx_queue_nocred);
- kiblnd_abort_txs(conn, &conn->ibc_active_txs);
-
- kiblnd_handle_early_rxs(conn);
-}
-
-static void
-kiblnd_peer_connect_failed(struct kib_peer *peer, int active, int error)
-{
- LIST_HEAD(zombies);
- unsigned long flags;
-
- LASSERT(error);
- LASSERT(!in_interrupt());
-
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- if (active) {
- LASSERT(peer->ibp_connecting > 0);
- peer->ibp_connecting--;
- } else {
- LASSERT(peer->ibp_accepting > 0);
- peer->ibp_accepting--;
- }
-
- if (kiblnd_peer_connecting(peer)) {
- /* another connection attempt under way... */
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock,
- flags);
- return;
- }
-
- peer->ibp_reconnected = 0;
- if (list_empty(&peer->ibp_conns)) {
- /* Take peer's blocked transmits to complete with error */
- list_add(&zombies, &peer->ibp_tx_queue);
- list_del_init(&peer->ibp_tx_queue);
-
- if (kiblnd_peer_active(peer))
- kiblnd_unlink_peer_locked(peer);
-
- peer->ibp_error = error;
- } else {
- /* Can't have blocked transmits if there are connections */
- LASSERT(list_empty(&peer->ibp_tx_queue));
- }
-
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- kiblnd_peer_notify(peer);
-
- if (list_empty(&zombies))
- return;
-
- CNETERR("Deleting messages for %s: connection failed\n",
- libcfs_nid2str(peer->ibp_nid));
-
- kiblnd_txlist_done(peer->ibp_ni, &zombies, -EHOSTUNREACH);
-}
-
-static void
-kiblnd_connreq_done(struct kib_conn *conn, int status)
-{
- struct kib_peer *peer = conn->ibc_peer;
- struct kib_tx *tx;
- struct kib_tx *tmp;
- struct list_head txs;
- unsigned long flags;
- int active;
-
- active = (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
-
- CDEBUG(D_NET, "%s: active(%d), version(%x), status(%d)\n",
- libcfs_nid2str(peer->ibp_nid), active,
- conn->ibc_version, status);
-
- LASSERT(!in_interrupt());
- LASSERT((conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT &&
- peer->ibp_connecting > 0) ||
- (conn->ibc_state == IBLND_CONN_PASSIVE_WAIT &&
- peer->ibp_accepting > 0));
-
- kfree(conn->ibc_connvars);
- conn->ibc_connvars = NULL;
-
- if (status) {
- /* failed to establish connection */
- kiblnd_peer_connect_failed(peer, active, status);
- kiblnd_finalise_conn(conn);
- return;
- }
-
- /* connection established */
- write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- conn->ibc_last_send = jiffies;
- kiblnd_set_conn_state(conn, IBLND_CONN_ESTABLISHED);
- kiblnd_peer_alive(peer);
-
- /*
- * Add conn to peer's list and nuke any dangling conns from a different
- * peer instance...
- */
- kiblnd_conn_addref(conn); /* +1 ref for ibc_list */
- list_add(&conn->ibc_list, &peer->ibp_conns);
- peer->ibp_reconnected = 0;
- if (active)
- peer->ibp_connecting--;
- else
- peer->ibp_accepting--;
-
- if (!peer->ibp_version) {
- peer->ibp_version = conn->ibc_version;
- peer->ibp_incarnation = conn->ibc_incarnation;
- }
-
- if (peer->ibp_version != conn->ibc_version ||
- peer->ibp_incarnation != conn->ibc_incarnation) {
- kiblnd_close_stale_conns_locked(peer, conn->ibc_version,
- conn->ibc_incarnation);
- peer->ibp_version = conn->ibc_version;
- peer->ibp_incarnation = conn->ibc_incarnation;
- }
-
- /* grab pending txs while I have the lock */
- list_add(&txs, &peer->ibp_tx_queue);
- list_del_init(&peer->ibp_tx_queue);
-
- if (!kiblnd_peer_active(peer) || /* peer has been deleted */
- conn->ibc_comms_error) { /* error has happened already */
- struct lnet_ni *ni = peer->ibp_ni;
-
- /* start to shut down connection */
- kiblnd_close_conn_locked(conn, -ECONNABORTED);
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- kiblnd_txlist_done(ni, &txs, -ECONNABORTED);
-
- return;
- }
-
- /*
- * +1 ref for myself, this connection is visible to other threads
- * now, refcount of peer:ibp_conns can be released by connection
- * close from either a different thread, or the calling of
- * kiblnd_check_sends_locked() below. See bz21911 for details.
- */
- kiblnd_conn_addref(conn);
- write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- /* Schedule blocked txs
- * Note: if we are running with conns_per_peer > 1, these blocked
- * txs will all get scheduled to the first connection which gets
- * scheduled. We won't be using round robin on this first batch.
- */
- spin_lock(&conn->ibc_lock);
- list_for_each_entry_safe(tx, tmp, &txs, tx_list) {
- list_del(&tx->tx_list);
-
- kiblnd_queue_tx_locked(tx, conn);
- }
- kiblnd_check_sends_locked(conn);
- spin_unlock(&conn->ibc_lock);
-
- /* schedule blocked rxs */
- kiblnd_handle_early_rxs(conn);
-
- kiblnd_conn_decref(conn);
-}
-
-static void
-kiblnd_reject(struct rdma_cm_id *cmid, struct kib_rej *rej)
-{
- int rc;
-
- rc = rdma_reject(cmid, rej, sizeof(*rej));
-
- if (rc)
- CWARN("Error %d sending reject\n", rc);
-}
-
-static int
-kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
-{
- rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
- struct kib_msg *reqmsg = priv;
- struct kib_msg *ackmsg;
- struct kib_dev *ibdev;
- struct kib_peer *peer;
- struct kib_peer *peer2;
- struct kib_conn *conn;
- struct lnet_ni *ni = NULL;
- struct kib_net *net = NULL;
- lnet_nid_t nid;
- struct rdma_conn_param cp;
- struct kib_rej rej;
- int version = IBLND_MSG_VERSION;
- unsigned long flags;
- int max_frags;
- int rc;
- struct sockaddr_in *peer_addr;
-
- LASSERT(!in_interrupt());
-
- /* cmid inherits 'context' from the corresponding listener id */
- ibdev = (struct kib_dev *)cmid->context;
- LASSERT(ibdev);
-
- memset(&rej, 0, sizeof(rej));
- rej.ibr_magic = IBLND_MSG_MAGIC;
- rej.ibr_why = IBLND_REJECT_FATAL;
- rej.ibr_cp.ibcp_max_msg_size = IBLND_MSG_SIZE;
-
- peer_addr = (struct sockaddr_in *)&cmid->route.addr.dst_addr;
- if (*kiblnd_tunables.kib_require_priv_port &&
- ntohs(peer_addr->sin_port) >= PROT_SOCK) {
- __u32 ip = ntohl(peer_addr->sin_addr.s_addr);
-
- CERROR("Peer's port (%pI4h:%hu) is not privileged\n",
- &ip, ntohs(peer_addr->sin_port));
- goto failed;
- }
-
- if (priv_nob < offsetof(struct kib_msg, ibm_type)) {
- CERROR("Short connection request\n");
- goto failed;
- }
-
- /*
- * Future protocol version compatibility support! If the
- * o2iblnd-specific protocol changes, or when LNET unifies
- * protocols over all LNDs, the initial connection will
- * negotiate a protocol version. I trap this here to avoid
- * console errors; the reject tells the peer which protocol I
- * speak.
- */
- if (reqmsg->ibm_magic == LNET_PROTO_MAGIC ||
- reqmsg->ibm_magic == __swab32(LNET_PROTO_MAGIC))
- goto failed;
- if (reqmsg->ibm_magic == IBLND_MSG_MAGIC &&
- reqmsg->ibm_version != IBLND_MSG_VERSION &&
- reqmsg->ibm_version != IBLND_MSG_VERSION_1)
- goto failed;
- if (reqmsg->ibm_magic == __swab32(IBLND_MSG_MAGIC) &&
- reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION) &&
- reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION_1))
- goto failed;
-
- rc = kiblnd_unpack_msg(reqmsg, priv_nob);
- if (rc) {
- CERROR("Can't parse connection request: %d\n", rc);
- goto failed;
- }
-
- nid = reqmsg->ibm_srcnid;
- ni = lnet_net2ni(LNET_NIDNET(reqmsg->ibm_dstnid));
-
- if (ni) {
- net = (struct kib_net *)ni->ni_data;
- rej.ibr_incarnation = net->ibn_incarnation;
- }
-
- if (!ni || /* no matching net */
- ni->ni_nid != reqmsg->ibm_dstnid || /* right NET, wrong NID! */
- net->ibn_dev != ibdev) { /* wrong device */
- CERROR("Can't accept conn from %s on %s (%s:%d:%pI4h): bad dst nid %s\n",
- libcfs_nid2str(nid),
- !ni ? "NA" : libcfs_nid2str(ni->ni_nid),
- ibdev->ibd_ifname, ibdev->ibd_nnets,
- &ibdev->ibd_ifip,
- libcfs_nid2str(reqmsg->ibm_dstnid));
-
- goto failed;
- }
-
- /* check time stamp as soon as possible */
- if (reqmsg->ibm_dststamp &&
- reqmsg->ibm_dststamp != net->ibn_incarnation) {
- CWARN("Stale connection request\n");
- rej.ibr_why = IBLND_REJECT_CONN_STALE;
- goto failed;
- }
-
- /* I can accept peer's version */
- version = reqmsg->ibm_version;
-
- if (reqmsg->ibm_type != IBLND_MSG_CONNREQ) {
- CERROR("Unexpected connreq msg type: %x from %s\n",
- reqmsg->ibm_type, libcfs_nid2str(nid));
- goto failed;
- }
-
- if (reqmsg->ibm_u.connparams.ibcp_queue_depth >
- kiblnd_msg_queue_size(version, ni)) {
- CERROR("Can't accept conn from %s, queue depth too large: %d (<=%d wanted)\n",
- libcfs_nid2str(nid),
- reqmsg->ibm_u.connparams.ibcp_queue_depth,
- kiblnd_msg_queue_size(version, ni));
-
- if (version == IBLND_MSG_VERSION)
- rej.ibr_why = IBLND_REJECT_MSG_QUEUE_SIZE;
-
- goto failed;
- }
-
- max_frags = reqmsg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT;
- if (max_frags > kiblnd_rdma_frags(version, ni)) {
- CWARN("Can't accept conn from %s (version %x): max message size %d is too large (%d wanted)\n",
- libcfs_nid2str(nid), version, max_frags,
- kiblnd_rdma_frags(version, ni));
-
- if (version >= IBLND_MSG_VERSION)
- rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
-
- goto failed;
- } else if (max_frags < kiblnd_rdma_frags(version, ni) &&
- !net->ibn_fmr_ps) {
- CWARN("Can't accept conn from %s (version %x): max message size %d incompatible without FMR pool (%d wanted)\n",
- libcfs_nid2str(nid), version, max_frags,
- kiblnd_rdma_frags(version, ni));
-
- if (version == IBLND_MSG_VERSION)
- rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
-
- goto failed;
- }
-
- if (reqmsg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) {
- CERROR("Can't accept %s: message size %d too big (%d max)\n",
- libcfs_nid2str(nid),
- reqmsg->ibm_u.connparams.ibcp_max_msg_size,
- IBLND_MSG_SIZE);
- goto failed;
- }
-
- /* assume 'nid' is a new peer; create */
- rc = kiblnd_create_peer(ni, &peer, nid);
- if (rc) {
- CERROR("Can't create peer for %s\n", libcfs_nid2str(nid));
- rej.ibr_why = IBLND_REJECT_NO_RESOURCES;
- goto failed;
- }
-
- /* We have validated the peer's parameters so use those */
- peer->ibp_max_frags = max_frags;
- peer->ibp_queue_depth = reqmsg->ibm_u.connparams.ibcp_queue_depth;
-
- write_lock_irqsave(g_lock, flags);
-
- peer2 = kiblnd_find_peer_locked(nid);
- if (peer2) {
- if (!peer2->ibp_version) {
- peer2->ibp_version = version;
- peer2->ibp_incarnation = reqmsg->ibm_srcstamp;
- }
-
- /* not the guy I've talked with */
- if (peer2->ibp_incarnation != reqmsg->ibm_srcstamp ||
- peer2->ibp_version != version) {
- kiblnd_close_peer_conns_locked(peer2, -ESTALE);
-
- if (kiblnd_peer_active(peer2)) {
- peer2->ibp_incarnation = reqmsg->ibm_srcstamp;
- peer2->ibp_version = version;
- }
- write_unlock_irqrestore(g_lock, flags);
-
- CWARN("Conn stale %s version %x/%x incarnation %llu/%llu\n",
- libcfs_nid2str(nid), peer2->ibp_version, version,
- peer2->ibp_incarnation, reqmsg->ibm_srcstamp);
-
- kiblnd_peer_decref(peer);
- rej.ibr_why = IBLND_REJECT_CONN_STALE;
- goto failed;
- }
-
- /*
- * Tie-break connection race in favour of the higher NID.
- * If we keep running into a race condition multiple times,
- * we have to assume that the connection attempt with the
- * higher NID is stuck in a connecting state and will never
- * recover. As such, we pass through this if-block and let
- * the lower NID connection win so we can move forward.
- */
- if (peer2->ibp_connecting &&
- nid < ni->ni_nid && peer2->ibp_races <
- MAX_CONN_RACES_BEFORE_ABORT) {
- peer2->ibp_races++;
- write_unlock_irqrestore(g_lock, flags);
-
- CDEBUG(D_NET, "Conn race %s\n",
- libcfs_nid2str(peer2->ibp_nid));
-
- kiblnd_peer_decref(peer);
- rej.ibr_why = IBLND_REJECT_CONN_RACE;
- goto failed;
- }
- if (peer2->ibp_races >= MAX_CONN_RACES_BEFORE_ABORT)
- CNETERR("Conn race %s: unresolved after %d attempts, letting lower NID win\n",
- libcfs_nid2str(peer2->ibp_nid),
- MAX_CONN_RACES_BEFORE_ABORT);
- /**
- * passive connection is allowed even this peer is waiting for
- * reconnection.
- */
- peer2->ibp_reconnecting = 0;
- peer2->ibp_races = 0;
- peer2->ibp_accepting++;
- kiblnd_peer_addref(peer2);
-
- /**
- * Race with kiblnd_launch_tx (active connect) to create peer
- * so copy validated parameters since we now know what the
- * peer's limits are
- */
- peer2->ibp_max_frags = peer->ibp_max_frags;
- peer2->ibp_queue_depth = peer->ibp_queue_depth;
-
- write_unlock_irqrestore(g_lock, flags);
- kiblnd_peer_decref(peer);
- peer = peer2;
- } else {
- /* Brand new peer */
- LASSERT(!peer->ibp_accepting);
- LASSERT(!peer->ibp_version &&
- !peer->ibp_incarnation);
-
- peer->ibp_accepting = 1;
- peer->ibp_version = version;
- peer->ibp_incarnation = reqmsg->ibm_srcstamp;
-
- /* I have a ref on ni that prevents it being shutdown */
- LASSERT(!net->ibn_shutdown);
-
- kiblnd_peer_addref(peer);
- list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid));
-
- write_unlock_irqrestore(g_lock, flags);
- }
-
- conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_PASSIVE_WAIT,
- version);
- if (!conn) {
- kiblnd_peer_connect_failed(peer, 0, -ENOMEM);
- kiblnd_peer_decref(peer);
- rej.ibr_why = IBLND_REJECT_NO_RESOURCES;
- goto failed;
- }
-
- /*
- * conn now "owns" cmid, so I return success from here on to ensure the
- * CM callback doesn't destroy cmid.
- */
- conn->ibc_incarnation = reqmsg->ibm_srcstamp;
- conn->ibc_credits = conn->ibc_queue_depth;
- conn->ibc_reserved_credits = conn->ibc_queue_depth;
- LASSERT(conn->ibc_credits + conn->ibc_reserved_credits +
- IBLND_OOB_MSGS(version) <= IBLND_RX_MSGS(conn));
-
- ackmsg = &conn->ibc_connvars->cv_msg;
- memset(ackmsg, 0, sizeof(*ackmsg));
-
- kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK,
- sizeof(ackmsg->ibm_u.connparams));
- ackmsg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth;
- ackmsg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags << IBLND_FRAG_SHIFT;
- ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
-
- kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp);
-
- memset(&cp, 0, sizeof(cp));
- cp.private_data = ackmsg;
- cp.private_data_len = ackmsg->ibm_nob;
- cp.responder_resources = 0; /* No atomic ops or RDMA reads */
- cp.initiator_depth = 0;
- cp.flow_control = 1;
- cp.retry_count = *kiblnd_tunables.kib_retry_count;
- cp.rnr_retry_count = *kiblnd_tunables.kib_rnr_retry_count;
-
- CDEBUG(D_NET, "Accept %s\n", libcfs_nid2str(nid));
-
- rc = rdma_accept(cmid, &cp);
- if (rc) {
- CERROR("Can't accept %s: %d\n", libcfs_nid2str(nid), rc);
- rej.ibr_version = version;
- rej.ibr_why = IBLND_REJECT_FATAL;
-
- kiblnd_reject(cmid, &rej);
- kiblnd_connreq_done(conn, rc);
- kiblnd_conn_decref(conn);
- }
-
- lnet_ni_decref(ni);
- return 0;
-
- failed:
- if (ni) {
- rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni);
- rej.ibr_cp.ibcp_max_frags = kiblnd_rdma_frags(version, ni);
- lnet_ni_decref(ni);
- }
-
- rej.ibr_version = version;
- kiblnd_reject(cmid, &rej);
-
- return -ECONNREFUSED;
-}
-
-static void
-kiblnd_check_reconnect(struct kib_conn *conn, int version,
- __u64 incarnation, int why, struct kib_connparams *cp)
-{
- rwlock_t *glock = &kiblnd_data.kib_global_lock;
- struct kib_peer *peer = conn->ibc_peer;
- char *reason;
- int msg_size = IBLND_MSG_SIZE;
- int frag_num = -1;
- int queue_dep = -1;
- bool reconnect;
- unsigned long flags;
-
- LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
- LASSERT(peer->ibp_connecting > 0); /* 'conn' at least */
-
- if (cp) {
- msg_size = cp->ibcp_max_msg_size;
- frag_num = cp->ibcp_max_frags << IBLND_FRAG_SHIFT;
- queue_dep = cp->ibcp_queue_depth;
- }
-
- write_lock_irqsave(glock, flags);
- /**
- * retry connection if it's still needed and no other connection
- * attempts (active or passive) are in progress
- * NB: reconnect is still needed even when ibp_tx_queue is
- * empty if ibp_version != version because reconnect may be
- * initiated by kiblnd_query()
- */
- reconnect = (!list_empty(&peer->ibp_tx_queue) ||
- peer->ibp_version != version) &&
- peer->ibp_connecting &&
- !peer->ibp_accepting;
- if (!reconnect) {
- reason = "no need";
- goto out;
- }
-
- switch (why) {
- default:
- reason = "Unknown";
- break;
-
- case IBLND_REJECT_RDMA_FRAGS: {
- struct lnet_ioctl_config_lnd_tunables *tunables;
-
- if (!cp) {
- reason = "can't negotiate max frags";
- goto out;
- }
- tunables = peer->ibp_ni->ni_lnd_tunables;
- if (!tunables->lt_tun_u.lt_o2ib.lnd_map_on_demand) {
- reason = "map_on_demand must be enabled";
- goto out;
- }
- if (conn->ibc_max_frags <= frag_num) {
- reason = "unsupported max frags";
- goto out;
- }
-
- peer->ibp_max_frags = frag_num;
- reason = "rdma fragments";
- break;
- }
- case IBLND_REJECT_MSG_QUEUE_SIZE:
- if (!cp) {
- reason = "can't negotiate queue depth";
- goto out;
- }
- if (conn->ibc_queue_depth <= queue_dep) {
- reason = "unsupported queue depth";
- goto out;
- }
-
- peer->ibp_queue_depth = queue_dep;
- reason = "queue depth";
- break;
-
- case IBLND_REJECT_CONN_STALE:
- reason = "stale";
- break;
-
- case IBLND_REJECT_CONN_RACE:
- reason = "conn race";
- break;
-
- case IBLND_REJECT_CONN_UNCOMPAT:
- reason = "version negotiation";
- break;
- }
-
- conn->ibc_reconnect = 1;
- peer->ibp_reconnecting++;
- peer->ibp_version = version;
- if (incarnation)
- peer->ibp_incarnation = incarnation;
-out:
- write_unlock_irqrestore(glock, flags);
-
- CNETERR("%s: %s (%s), %x, %x, msg_size: %d, queue_depth: %d/%d, max_frags: %d/%d\n",
- libcfs_nid2str(peer->ibp_nid),
- reconnect ? "reconnect" : "don't reconnect",
- reason, IBLND_MSG_VERSION, version, msg_size,
- conn->ibc_queue_depth, queue_dep,
- conn->ibc_max_frags, frag_num);
- /**
- * if conn::ibc_reconnect is TRUE, connd will reconnect to the peer
- * while destroying the zombie
- */
-}
-
-static void
-kiblnd_rejected(struct kib_conn *conn, int reason, void *priv, int priv_nob)
-{
- struct kib_peer *peer = conn->ibc_peer;
-
- LASSERT(!in_interrupt());
- LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
-
- switch (reason) {
- case IB_CM_REJ_STALE_CONN:
- kiblnd_check_reconnect(conn, IBLND_MSG_VERSION, 0,
- IBLND_REJECT_CONN_STALE, NULL);
- break;
-
- case IB_CM_REJ_INVALID_SERVICE_ID:
- CNETERR("%s rejected: no listener at %d\n",
- libcfs_nid2str(peer->ibp_nid),
- *kiblnd_tunables.kib_service);
- break;
-
- case IB_CM_REJ_CONSUMER_DEFINED:
- if (priv_nob >= offsetof(struct kib_rej, ibr_padding)) {
- struct kib_rej *rej = priv;
- struct kib_connparams *cp = NULL;
- int flip = 0;
- __u64 incarnation = -1;
-
- /* NB. default incarnation is -1 because:
- * a) V1 will ignore dst incarnation in connreq.
- * b) V2 will provide incarnation while rejecting me,
- * -1 will be overwrote.
- *
- * if I try to connect to a V1 peer with V2 protocol,
- * it rejected me then upgrade to V2, I have no idea
- * about the upgrading and try to reconnect with V1,
- * in this case upgraded V2 can find out I'm trying to
- * talk to the old guy and reject me(incarnation is -1).
- */
-
- if (rej->ibr_magic == __swab32(IBLND_MSG_MAGIC) ||
- rej->ibr_magic == __swab32(LNET_PROTO_MAGIC)) {
- __swab32s(&rej->ibr_magic);
- __swab16s(&rej->ibr_version);
- flip = 1;
- }
-
- if (priv_nob >= sizeof(struct kib_rej) &&
- rej->ibr_version > IBLND_MSG_VERSION_1) {
- /*
- * priv_nob is always 148 in current version
- * of OFED, so we still need to check version.
- * (define of IB_CM_REJ_PRIVATE_DATA_SIZE)
- */
- cp = &rej->ibr_cp;
-
- if (flip) {
- __swab64s(&rej->ibr_incarnation);
- __swab16s(&cp->ibcp_queue_depth);
- __swab16s(&cp->ibcp_max_frags);
- __swab32s(&cp->ibcp_max_msg_size);
- }
-
- incarnation = rej->ibr_incarnation;
- }
-
- if (rej->ibr_magic != IBLND_MSG_MAGIC &&
- rej->ibr_magic != LNET_PROTO_MAGIC) {
- CERROR("%s rejected: consumer defined fatal error\n",
- libcfs_nid2str(peer->ibp_nid));
- break;
- }
-
- if (rej->ibr_version != IBLND_MSG_VERSION &&
- rej->ibr_version != IBLND_MSG_VERSION_1) {
- CERROR("%s rejected: o2iblnd version %x error\n",
- libcfs_nid2str(peer->ibp_nid),
- rej->ibr_version);
- break;
- }
-
- if (rej->ibr_why == IBLND_REJECT_FATAL &&
- rej->ibr_version == IBLND_MSG_VERSION_1) {
- CDEBUG(D_NET, "rejected by old version peer %s: %x\n",
- libcfs_nid2str(peer->ibp_nid), rej->ibr_version);
-
- if (conn->ibc_version != IBLND_MSG_VERSION_1)
- rej->ibr_why = IBLND_REJECT_CONN_UNCOMPAT;
- }
-
- switch (rej->ibr_why) {
- case IBLND_REJECT_CONN_RACE:
- case IBLND_REJECT_CONN_STALE:
- case IBLND_REJECT_CONN_UNCOMPAT:
- case IBLND_REJECT_MSG_QUEUE_SIZE:
- case IBLND_REJECT_RDMA_FRAGS:
- kiblnd_check_reconnect(conn, rej->ibr_version,
- incarnation,
- rej->ibr_why, cp);
- break;
-
- case IBLND_REJECT_NO_RESOURCES:
- CERROR("%s rejected: o2iblnd no resources\n",
- libcfs_nid2str(peer->ibp_nid));
- break;
-
- case IBLND_REJECT_FATAL:
- CERROR("%s rejected: o2iblnd fatal error\n",
- libcfs_nid2str(peer->ibp_nid));
- break;
-
- default:
- CERROR("%s rejected: o2iblnd reason %d\n",
- libcfs_nid2str(peer->ibp_nid),
- rej->ibr_why);
- break;
- }
- break;
- }
- /* fall through */
- default:
- CNETERR("%s rejected: reason %d, size %d\n",
- libcfs_nid2str(peer->ibp_nid), reason, priv_nob);
- break;
- }
-
- kiblnd_connreq_done(conn, -ECONNREFUSED);
-}
-
-static void
-kiblnd_check_connreply(struct kib_conn *conn, void *priv, int priv_nob)
-{
- struct kib_peer *peer = conn->ibc_peer;
- struct lnet_ni *ni = peer->ibp_ni;
- struct kib_net *net = ni->ni_data;
- struct kib_msg *msg = priv;
- int ver = conn->ibc_version;
- int rc = kiblnd_unpack_msg(msg, priv_nob);
- unsigned long flags;
-
- LASSERT(net);
-
- if (rc) {
- CERROR("Can't unpack connack from %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), rc);
- goto failed;
- }
-
- if (msg->ibm_type != IBLND_MSG_CONNACK) {
- CERROR("Unexpected message %d from %s\n",
- msg->ibm_type, libcfs_nid2str(peer->ibp_nid));
- rc = -EPROTO;
- goto failed;
- }
-
- if (ver != msg->ibm_version) {
- CERROR("%s replied version %x is different with requested version %x\n",
- libcfs_nid2str(peer->ibp_nid), msg->ibm_version, ver);
- rc = -EPROTO;
- goto failed;
- }
-
- if (msg->ibm_u.connparams.ibcp_queue_depth >
- conn->ibc_queue_depth) {
- CERROR("%s has incompatible queue depth %d (<=%d wanted)\n",
- libcfs_nid2str(peer->ibp_nid),
- msg->ibm_u.connparams.ibcp_queue_depth,
- conn->ibc_queue_depth);
- rc = -EPROTO;
- goto failed;
- }
-
- if ((msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT) >
- conn->ibc_max_frags) {
- CERROR("%s has incompatible max_frags %d (<=%d wanted)\n",
- libcfs_nid2str(peer->ibp_nid),
- msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT,
- conn->ibc_max_frags);
- rc = -EPROTO;
- goto failed;
- }
-
- if (msg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) {
- CERROR("%s max message size %d too big (%d max)\n",
- libcfs_nid2str(peer->ibp_nid),
- msg->ibm_u.connparams.ibcp_max_msg_size,
- IBLND_MSG_SIZE);
- rc = -EPROTO;
- goto failed;
- }
-
- read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
- if (msg->ibm_dstnid == ni->ni_nid &&
- msg->ibm_dststamp == net->ibn_incarnation)
- rc = 0;
- else
- rc = -ESTALE;
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- if (rc) {
- CERROR("Bad connection reply from %s, rc = %d, version: %x max_frags: %d\n",
- libcfs_nid2str(peer->ibp_nid), rc,
- msg->ibm_version, msg->ibm_u.connparams.ibcp_max_frags);
- goto failed;
- }
-
- conn->ibc_incarnation = msg->ibm_srcstamp;
- conn->ibc_credits = msg->ibm_u.connparams.ibcp_queue_depth;
- conn->ibc_reserved_credits = msg->ibm_u.connparams.ibcp_queue_depth;
- conn->ibc_queue_depth = msg->ibm_u.connparams.ibcp_queue_depth;
- conn->ibc_max_frags = msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT;
- LASSERT(conn->ibc_credits + conn->ibc_reserved_credits +
- IBLND_OOB_MSGS(ver) <= IBLND_RX_MSGS(conn));
-
- kiblnd_connreq_done(conn, 0);
- return;
-
- failed:
- /*
- * NB My QP has already established itself, so I handle anything going
- * wrong here by setting ibc_comms_error.
- * kiblnd_connreq_done(0) moves the conn state to ESTABLISHED, but then
- * immediately tears it down.
- */
- LASSERT(rc);
- conn->ibc_comms_error = rc;
- kiblnd_connreq_done(conn, 0);
-}
-
-static int
-kiblnd_active_connect(struct rdma_cm_id *cmid)
-{
- struct kib_peer *peer = (struct kib_peer *)cmid->context;
- struct kib_conn *conn;
- struct kib_msg *msg;
- struct rdma_conn_param cp;
- int version;
- __u64 incarnation;
- unsigned long flags;
- int rc;
-
- read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- incarnation = peer->ibp_incarnation;
- version = !peer->ibp_version ? IBLND_MSG_VERSION :
- peer->ibp_version;
-
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_ACTIVE_CONNECT,
- version);
- if (!conn) {
- kiblnd_peer_connect_failed(peer, 1, -ENOMEM);
- kiblnd_peer_decref(peer); /* lose cmid's ref */
- return -ENOMEM;
- }
-
- /*
- * conn "owns" cmid now, so I return success from here on to ensure the
- * CM callback doesn't destroy cmid. conn also takes over cmid's ref
- * on peer
- */
- msg = &conn->ibc_connvars->cv_msg;
-
- memset(msg, 0, sizeof(*msg));
- kiblnd_init_msg(msg, IBLND_MSG_CONNREQ, sizeof(msg->ibm_u.connparams));
- msg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth;
- msg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags << IBLND_FRAG_SHIFT;
- msg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
-
- kiblnd_pack_msg(peer->ibp_ni, msg, version,
- 0, peer->ibp_nid, incarnation);
-
- memset(&cp, 0, sizeof(cp));
- cp.private_data = msg;
- cp.private_data_len = msg->ibm_nob;
- cp.responder_resources = 0; /* No atomic ops or RDMA reads */
- cp.initiator_depth = 0;
- cp.flow_control = 1;
- cp.retry_count = *kiblnd_tunables.kib_retry_count;
- cp.rnr_retry_count = *kiblnd_tunables.kib_rnr_retry_count;
-
- LASSERT(cmid->context == (void *)conn);
- LASSERT(conn->ibc_cmid == cmid);
-
- rc = rdma_connect(cmid, &cp);
- if (rc) {
- CERROR("Can't connect to %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), rc);
- kiblnd_connreq_done(conn, rc);
- kiblnd_conn_decref(conn);
- }
-
- return 0;
-}
-
-int
-kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
-{
- struct kib_peer *peer;
- struct kib_conn *conn;
- int rc;
-
- switch (event->event) {
- default:
- CERROR("Unexpected event: %d, status: %d\n",
- event->event, event->status);
- LBUG();
-
- case RDMA_CM_EVENT_CONNECT_REQUEST:
- /* destroy cmid on failure */
- rc = kiblnd_passive_connect(cmid,
- (void *)KIBLND_CONN_PARAM(event),
- KIBLND_CONN_PARAM_LEN(event));
- CDEBUG(D_NET, "connreq: %d\n", rc);
- return rc;
-
- case RDMA_CM_EVENT_ADDR_ERROR:
- peer = (struct kib_peer *)cmid->context;
- CNETERR("%s: ADDR ERROR %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
- kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH);
- kiblnd_peer_decref(peer);
- return -EHOSTUNREACH; /* rc destroys cmid */
-
- case RDMA_CM_EVENT_ADDR_RESOLVED:
- peer = (struct kib_peer *)cmid->context;
-
- CDEBUG(D_NET, "%s Addr resolved: %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
-
- if (event->status) {
- CNETERR("Can't resolve address for %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
- rc = event->status;
- } else {
- rc = rdma_resolve_route(
- cmid, *kiblnd_tunables.kib_timeout * 1000);
- if (!rc) {
- struct kib_net *net = peer->ibp_ni->ni_data;
- struct kib_dev *dev = net->ibn_dev;
-
- CDEBUG(D_NET, "%s: connection bound to "\
- "%s:%pI4h:%s\n",
- libcfs_nid2str(peer->ibp_nid),
- dev->ibd_ifname,
- &dev->ibd_ifip, cmid->device->name);
-
- return 0;
- }
-
- /* Can't initiate route resolution */
- CERROR("Can't resolve route for %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), rc);
- }
- kiblnd_peer_connect_failed(peer, 1, rc);
- kiblnd_peer_decref(peer);
- return rc; /* rc destroys cmid */
-
- case RDMA_CM_EVENT_ROUTE_ERROR:
- peer = (struct kib_peer *)cmid->context;
- CNETERR("%s: ROUTE ERROR %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
- kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH);
- kiblnd_peer_decref(peer);
- return -EHOSTUNREACH; /* rc destroys cmid */
-
- case RDMA_CM_EVENT_ROUTE_RESOLVED:
- peer = (struct kib_peer *)cmid->context;
- CDEBUG(D_NET, "%s Route resolved: %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
-
- if (!event->status)
- return kiblnd_active_connect(cmid);
-
- CNETERR("Can't resolve route for %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
- kiblnd_peer_connect_failed(peer, 1, event->status);
- kiblnd_peer_decref(peer);
- return event->status; /* rc destroys cmid */
-
- case RDMA_CM_EVENT_UNREACHABLE:
- conn = (struct kib_conn *)cmid->context;
- LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT ||
- conn->ibc_state == IBLND_CONN_PASSIVE_WAIT);
- CNETERR("%s: UNREACHABLE %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status);
- kiblnd_connreq_done(conn, -ENETDOWN);
- kiblnd_conn_decref(conn);
- return 0;
-
- case RDMA_CM_EVENT_CONNECT_ERROR:
- conn = (struct kib_conn *)cmid->context;
- LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT ||
- conn->ibc_state == IBLND_CONN_PASSIVE_WAIT);
- CNETERR("%s: CONNECT ERROR %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status);
- kiblnd_connreq_done(conn, -ENOTCONN);
- kiblnd_conn_decref(conn);
- return 0;
-
- case RDMA_CM_EVENT_REJECTED:
- conn = (struct kib_conn *)cmid->context;
- switch (conn->ibc_state) {
- default:
- LBUG();
-
- case IBLND_CONN_PASSIVE_WAIT:
- CERROR("%s: REJECTED %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- event->status);
- kiblnd_connreq_done(conn, -ECONNRESET);
- break;
-
- case IBLND_CONN_ACTIVE_CONNECT:
- kiblnd_rejected(conn, event->status,
- (void *)KIBLND_CONN_PARAM(event),
- KIBLND_CONN_PARAM_LEN(event));
- break;
- }
- kiblnd_conn_decref(conn);
- return 0;
-
- case RDMA_CM_EVENT_ESTABLISHED:
- conn = (struct kib_conn *)cmid->context;
- switch (conn->ibc_state) {
- default:
- LBUG();
-
- case IBLND_CONN_PASSIVE_WAIT:
- CDEBUG(D_NET, "ESTABLISHED (passive): %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kiblnd_connreq_done(conn, 0);
- break;
-
- case IBLND_CONN_ACTIVE_CONNECT:
- CDEBUG(D_NET, "ESTABLISHED(active): %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kiblnd_check_connreply(conn,
- (void *)KIBLND_CONN_PARAM(event),
- KIBLND_CONN_PARAM_LEN(event));
- break;
- }
- /* net keeps its ref on conn! */
- return 0;
-
- case RDMA_CM_EVENT_TIMEWAIT_EXIT:
- CDEBUG(D_NET, "Ignore TIMEWAIT_EXIT event\n");
- return 0;
- case RDMA_CM_EVENT_DISCONNECTED:
- conn = (struct kib_conn *)cmid->context;
- if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
- CERROR("%s DISCONNECTED\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kiblnd_connreq_done(conn, -ECONNRESET);
- } else {
- kiblnd_close_conn(conn, 0);
- }
- kiblnd_conn_decref(conn);
- cmid->context = NULL;
- return 0;
-
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
- LCONSOLE_ERROR_MSG(0x131,
- "Received notification of device removal\n"
- "Please shutdown LNET to allow this to proceed\n");
- /*
- * Can't remove network from underneath LNET for now, so I have
- * to ignore this
- */
- return 0;
-
- case RDMA_CM_EVENT_ADDR_CHANGE:
- LCONSOLE_INFO("Physical link changed (eg hca/port)\n");
- return 0;
- }
-}
-
-static int
-kiblnd_check_txs_locked(struct kib_conn *conn, struct list_head *txs)
-{
- struct kib_tx *tx;
- struct list_head *ttmp;
-
- list_for_each(ttmp, txs) {
- tx = list_entry(ttmp, struct kib_tx, tx_list);
-
- if (txs != &conn->ibc_active_txs) {
- LASSERT(tx->tx_queued);
- } else {
- LASSERT(!tx->tx_queued);
- LASSERT(tx->tx_waiting || tx->tx_sending);
- }
-
- if (time_after_eq(jiffies, tx->tx_deadline)) {
- CERROR("Timed out tx: %s, %lu seconds\n",
- kiblnd_queue2str(conn, txs),
- (jiffies - tx->tx_deadline) / HZ);
- return 1;
- }
- }
-
- return 0;
-}
-
-static int
-kiblnd_conn_timed_out_locked(struct kib_conn *conn)
-{
- return kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue) ||
- kiblnd_check_txs_locked(conn, &conn->ibc_tx_noops) ||
- kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue_rsrvd) ||
- kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue_nocred) ||
- kiblnd_check_txs_locked(conn, &conn->ibc_active_txs);
-}
-
-static void
-kiblnd_check_conns(int idx)
-{
- LIST_HEAD(closes);
- LIST_HEAD(checksends);
- struct list_head *peers = &kiblnd_data.kib_peers[idx];
- struct list_head *ptmp;
- struct kib_peer *peer;
- struct kib_conn *conn;
- struct kib_conn *temp;
- struct kib_conn *tmp;
- struct list_head *ctmp;
- unsigned long flags;
-
- /*
- * NB. We expect to have a look at all the peers and not find any
- * RDMAs to time out, so we just use a shared lock while we
- * take a look...
- */
- read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-
- list_for_each(ptmp, peers) {
- peer = list_entry(ptmp, struct kib_peer, ibp_list);
-
- list_for_each(ctmp, &peer->ibp_conns) {
- int timedout;
- int sendnoop;
-
- conn = list_entry(ctmp, struct kib_conn, ibc_list);
-
- LASSERT(conn->ibc_state == IBLND_CONN_ESTABLISHED);
-
- spin_lock(&conn->ibc_lock);
-
- sendnoop = kiblnd_need_noop(conn);
- timedout = kiblnd_conn_timed_out_locked(conn);
- if (!sendnoop && !timedout) {
- spin_unlock(&conn->ibc_lock);
- continue;
- }
-
- if (timedout) {
- CERROR("Timed out RDMA with %s (%lu): c: %u, oc: %u, rc: %u\n",
- libcfs_nid2str(peer->ibp_nid),
- (jiffies - peer->ibp_last_alive) / HZ,
- conn->ibc_credits,
- conn->ibc_outstanding_credits,
- conn->ibc_reserved_credits);
- list_add(&conn->ibc_connd_list, &closes);
- } else {
- list_add(&conn->ibc_connd_list, &checksends);
- }
- /* +ref for 'closes' or 'checksends' */
- kiblnd_conn_addref(conn);
-
- spin_unlock(&conn->ibc_lock);
- }
- }
-
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
- /*
- * Handle timeout by closing the whole
- * connection. We can only be sure RDMA activity
- * has ceased once the QP has been modified.
- */
- list_for_each_entry_safe(conn, tmp, &closes, ibc_connd_list) {
- list_del(&conn->ibc_connd_list);
- kiblnd_close_conn(conn, -ETIMEDOUT);
- kiblnd_conn_decref(conn);
- }
-
- /*
- * In case we have enough credits to return via a
- * NOOP, but there were no non-blocking tx descs
- * free to do it last time...
- */
- list_for_each_entry_safe(conn, temp, &checksends, ibc_connd_list) {
- list_del(&conn->ibc_connd_list);
-
- spin_lock(&conn->ibc_lock);
- kiblnd_check_sends_locked(conn);
- spin_unlock(&conn->ibc_lock);
-
- kiblnd_conn_decref(conn);
- }
-}
-
-static void
-kiblnd_disconnect_conn(struct kib_conn *conn)
-{
- LASSERT(!in_interrupt());
- LASSERT(current == kiblnd_data.kib_connd);
- LASSERT(conn->ibc_state == IBLND_CONN_CLOSING);
-
- rdma_disconnect(conn->ibc_cmid);
- kiblnd_finalise_conn(conn);
-
- kiblnd_peer_notify(conn->ibc_peer);
-}
-
-/**
- * High-water for reconnection to the same peer, reconnection attempt should
- * be delayed after trying more than KIB_RECONN_HIGH_RACE.
- */
-#define KIB_RECONN_HIGH_RACE 10
-/**
- * Allow connd to take a break and handle other things after consecutive
- * reconnection attempts.
- */
-#define KIB_RECONN_BREAK 100
-
-int
-kiblnd_connd(void *arg)
-{
- spinlock_t *lock = &kiblnd_data.kib_connd_lock;
- wait_queue_entry_t wait;
- unsigned long flags;
- struct kib_conn *conn;
- int timeout;
- int i;
- int dropped_lock;
- int peer_index = 0;
- unsigned long deadline = jiffies;
-
- init_waitqueue_entry(&wait, current);
- kiblnd_data.kib_connd = current;
-
- spin_lock_irqsave(lock, flags);
-
- while (!kiblnd_data.kib_shutdown) {
- int reconn = 0;
-
- dropped_lock = 0;
-
- if (!list_empty(&kiblnd_data.kib_connd_zombies)) {
- struct kib_peer *peer = NULL;
-
- conn = list_entry(kiblnd_data.kib_connd_zombies.next,
- struct kib_conn, ibc_list);
- list_del(&conn->ibc_list);
- if (conn->ibc_reconnect) {
- peer = conn->ibc_peer;
- kiblnd_peer_addref(peer);
- }
-
- spin_unlock_irqrestore(lock, flags);
- dropped_lock = 1;
-
- kiblnd_destroy_conn(conn);
-
- spin_lock_irqsave(lock, flags);
- if (!peer) {
- kfree(conn);
- continue;
- }
-
- conn->ibc_peer = peer;
- if (peer->ibp_reconnected < KIB_RECONN_HIGH_RACE)
- list_add_tail(&conn->ibc_list,
- &kiblnd_data.kib_reconn_list);
- else
- list_add_tail(&conn->ibc_list,
- &kiblnd_data.kib_reconn_wait);
- }
-
- if (!list_empty(&kiblnd_data.kib_connd_conns)) {
- conn = list_entry(kiblnd_data.kib_connd_conns.next,
- struct kib_conn, ibc_list);
- list_del(&conn->ibc_list);
-
- spin_unlock_irqrestore(lock, flags);
- dropped_lock = 1;
-
- kiblnd_disconnect_conn(conn);
- kiblnd_conn_decref(conn);
-
- spin_lock_irqsave(lock, flags);
- }
-
- while (reconn < KIB_RECONN_BREAK) {
- if (kiblnd_data.kib_reconn_sec !=
- ktime_get_real_seconds()) {
- kiblnd_data.kib_reconn_sec = ktime_get_real_seconds();
- list_splice_init(&kiblnd_data.kib_reconn_wait,
- &kiblnd_data.kib_reconn_list);
- }
-
- if (list_empty(&kiblnd_data.kib_reconn_list))
- break;
-
- conn = list_entry(kiblnd_data.kib_reconn_list.next,
- struct kib_conn, ibc_list);
- list_del(&conn->ibc_list);
-
- spin_unlock_irqrestore(lock, flags);
- dropped_lock = 1;
-
- reconn += kiblnd_reconnect_peer(conn->ibc_peer);
- kiblnd_peer_decref(conn->ibc_peer);
- kfree(conn);
-
- spin_lock_irqsave(lock, flags);
- }
-
- /* careful with the jiffy wrap... */
- timeout = (int)(deadline - jiffies);
- if (timeout <= 0) {
- const int n = 4;
- const int p = 1;
- int chunk = kiblnd_data.kib_peer_hash_size;
-
- spin_unlock_irqrestore(lock, flags);
- dropped_lock = 1;
-
- /*
- * Time to check for RDMA timeouts on a few more
- * peers: I do checks every 'p' seconds on a
- * proportion of the peer table and I need to check
- * every connection 'n' times within a timeout
- * interval, to ensure I detect a timeout on any
- * connection within (n+1)/n times the timeout
- * interval.
- */
- if (*kiblnd_tunables.kib_timeout > n * p)
- chunk = (chunk * n * p) /
- *kiblnd_tunables.kib_timeout;
- if (!chunk)
- chunk = 1;
-
- for (i = 0; i < chunk; i++) {
- kiblnd_check_conns(peer_index);
- peer_index = (peer_index + 1) %
- kiblnd_data.kib_peer_hash_size;
- }
-
- deadline += msecs_to_jiffies(p * MSEC_PER_SEC);
- spin_lock_irqsave(lock, flags);
- }
-
- if (dropped_lock)
- continue;
-
- /* Nothing to do for 'timeout' */
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&kiblnd_data.kib_connd_waitq, &wait);
- spin_unlock_irqrestore(lock, flags);
-
- schedule_timeout(timeout);
-
- remove_wait_queue(&kiblnd_data.kib_connd_waitq, &wait);
- spin_lock_irqsave(lock, flags);
- }
-
- spin_unlock_irqrestore(lock, flags);
-
- kiblnd_thread_fini();
- return 0;
-}
-
-void
-kiblnd_qp_event(struct ib_event *event, void *arg)
-{
- struct kib_conn *conn = arg;
-
- switch (event->event) {
- case IB_EVENT_COMM_EST:
- CDEBUG(D_NET, "%s established\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- /*
- * We received a packet but connection isn't established
- * probably handshake packet was lost, so free to
- * force make connection established
- */
- rdma_notify(conn->ibc_cmid, IB_EVENT_COMM_EST);
- return;
-
- default:
- CERROR("%s: Async QP event type %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event);
- return;
- }
-}
-
-static void
-kiblnd_complete(struct ib_wc *wc)
-{
- switch (kiblnd_wreqid2type(wc->wr_id)) {
- default:
- LBUG();
-
- case IBLND_WID_MR:
- if (wc->status != IB_WC_SUCCESS &&
- wc->status != IB_WC_WR_FLUSH_ERR)
- CNETERR("FastReg failed: %d\n", wc->status);
- break;
-
- case IBLND_WID_RDMA:
- /*
- * We only get RDMA completion notification if it fails. All
- * subsequent work items, including the final SEND will fail
- * too. However we can't print out any more info about the
- * failing RDMA because 'tx' might be back on the idle list or
- * even reused already if we didn't manage to post all our work
- * items
- */
- CNETERR("RDMA (tx: %p) failed: %d\n",
- kiblnd_wreqid2ptr(wc->wr_id), wc->status);
- return;
-
- case IBLND_WID_TX:
- kiblnd_tx_complete(kiblnd_wreqid2ptr(wc->wr_id), wc->status);
- return;
-
- case IBLND_WID_RX:
- kiblnd_rx_complete(kiblnd_wreqid2ptr(wc->wr_id), wc->status,
- wc->byte_len);
- return;
- }
-}
-
-void
-kiblnd_cq_completion(struct ib_cq *cq, void *arg)
-{
- /*
- * NB I'm not allowed to schedule this conn once its refcount has
- * reached 0. Since fundamentally I'm racing with scheduler threads
- * consuming my CQ I could be called after all completions have
- * occurred. But in this case, !ibc_nrx && !ibc_nsends_posted
- * and this CQ is about to be destroyed so I NOOP.
- */
- struct kib_conn *conn = arg;
- struct kib_sched_info *sched = conn->ibc_sched;
- unsigned long flags;
-
- LASSERT(cq == conn->ibc_cq);
-
- spin_lock_irqsave(&sched->ibs_lock, flags);
-
- conn->ibc_ready = 1;
-
- if (!conn->ibc_scheduled &&
- (conn->ibc_nrx > 0 ||
- conn->ibc_nsends_posted > 0)) {
- kiblnd_conn_addref(conn); /* +1 ref for sched_conns */
- conn->ibc_scheduled = 1;
- list_add_tail(&conn->ibc_sched_list, &sched->ibs_conns);
-
- if (waitqueue_active(&sched->ibs_waitq))
- wake_up(&sched->ibs_waitq);
- }
-
- spin_unlock_irqrestore(&sched->ibs_lock, flags);
-}
-
-void
-kiblnd_cq_event(struct ib_event *event, void *arg)
-{
- struct kib_conn *conn = arg;
-
- CERROR("%s: async CQ event type %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event);
-}
-
-int
-kiblnd_scheduler(void *arg)
-{
- long id = (long)arg;
- struct kib_sched_info *sched;
- struct kib_conn *conn;
- wait_queue_entry_t wait;
- unsigned long flags;
- struct ib_wc wc;
- int did_something;
- int busy_loops = 0;
- int rc;
-
- init_waitqueue_entry(&wait, current);
-
- sched = kiblnd_data.kib_scheds[KIB_THREAD_CPT(id)];
-
- rc = cfs_cpt_bind(lnet_cpt_table(), sched->ibs_cpt);
- if (rc) {
- CWARN("Unable to bind on CPU partition %d, please verify whether all CPUs are healthy and reload modules if necessary, otherwise your system might under risk of low performance\n",
- sched->ibs_cpt);
- }
-
- spin_lock_irqsave(&sched->ibs_lock, flags);
-
- while (!kiblnd_data.kib_shutdown) {
- if (busy_loops++ >= IBLND_RESCHED) {
- spin_unlock_irqrestore(&sched->ibs_lock, flags);
-
- cond_resched();
- busy_loops = 0;
-
- spin_lock_irqsave(&sched->ibs_lock, flags);
- }
-
- did_something = 0;
-
- if (!list_empty(&sched->ibs_conns)) {
- conn = list_entry(sched->ibs_conns.next, struct kib_conn,
- ibc_sched_list);
- /* take over kib_sched_conns' ref on conn... */
- LASSERT(conn->ibc_scheduled);
- list_del(&conn->ibc_sched_list);
- conn->ibc_ready = 0;
-
- spin_unlock_irqrestore(&sched->ibs_lock, flags);
-
- wc.wr_id = IBLND_WID_INVAL;
-
- rc = ib_poll_cq(conn->ibc_cq, 1, &wc);
- if (!rc) {
- rc = ib_req_notify_cq(conn->ibc_cq,
- IB_CQ_NEXT_COMP);
- if (rc < 0) {
- CWARN("%s: ib_req_notify_cq failed: %d, closing connection\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
- kiblnd_close_conn(conn, -EIO);
- kiblnd_conn_decref(conn);
- spin_lock_irqsave(&sched->ibs_lock,
- flags);
- continue;
- }
-
- rc = ib_poll_cq(conn->ibc_cq, 1, &wc);
- }
-
- if (unlikely(rc > 0 && wc.wr_id == IBLND_WID_INVAL)) {
- LCONSOLE_ERROR("ib_poll_cq (rc: %d) returned invalid wr_id, opcode %d, status: %d, vendor_err: %d, conn: %s status: %d\nplease upgrade firmware and OFED or contact vendor.\n",
- rc, wc.opcode, wc.status,
- wc.vendor_err,
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- conn->ibc_state);
- rc = -EINVAL;
- }
-
- if (rc < 0) {
- CWARN("%s: ib_poll_cq failed: %d, closing connection\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- rc);
- kiblnd_close_conn(conn, -EIO);
- kiblnd_conn_decref(conn);
- spin_lock_irqsave(&sched->ibs_lock, flags);
- continue;
- }
-
- spin_lock_irqsave(&sched->ibs_lock, flags);
-
- if (rc || conn->ibc_ready) {
- /*
- * There may be another completion waiting; get
- * another scheduler to check while I handle
- * this one...
- */
- /* +1 ref for sched_conns */
- kiblnd_conn_addref(conn);
- list_add_tail(&conn->ibc_sched_list,
- &sched->ibs_conns);
- if (waitqueue_active(&sched->ibs_waitq))
- wake_up(&sched->ibs_waitq);
- } else {
- conn->ibc_scheduled = 0;
- }
-
- if (rc) {
- spin_unlock_irqrestore(&sched->ibs_lock, flags);
- kiblnd_complete(&wc);
-
- spin_lock_irqsave(&sched->ibs_lock, flags);
- }
-
- kiblnd_conn_decref(conn); /* ...drop my ref from above */
- did_something = 1;
- }
-
- if (did_something)
- continue;
-
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue_exclusive(&sched->ibs_waitq, &wait);
- spin_unlock_irqrestore(&sched->ibs_lock, flags);
-
- schedule();
- busy_loops = 0;
-
- remove_wait_queue(&sched->ibs_waitq, &wait);
- spin_lock_irqsave(&sched->ibs_lock, flags);
- }
-
- spin_unlock_irqrestore(&sched->ibs_lock, flags);
-
- kiblnd_thread_fini();
- return 0;
-}
-
-int
-kiblnd_failover_thread(void *arg)
-{
- rwlock_t *glock = &kiblnd_data.kib_global_lock;
- struct kib_dev *dev;
- wait_queue_entry_t wait;
- unsigned long flags;
- int rc;
-
- LASSERT(*kiblnd_tunables.kib_dev_failover);
-
- init_waitqueue_entry(&wait, current);
- write_lock_irqsave(glock, flags);
-
- while (!kiblnd_data.kib_shutdown) {
- int do_failover = 0;
- int long_sleep;
-
- list_for_each_entry(dev, &kiblnd_data.kib_failed_devs,
- ibd_fail_list) {
- if (time_before(jiffies,
- dev->ibd_next_failover))
- continue;
- do_failover = 1;
- break;
- }
-
- if (do_failover) {
- list_del_init(&dev->ibd_fail_list);
- dev->ibd_failover = 1;
- write_unlock_irqrestore(glock, flags);
-
- rc = kiblnd_dev_failover(dev);
-
- write_lock_irqsave(glock, flags);
-
- LASSERT(dev->ibd_failover);
- dev->ibd_failover = 0;
- if (rc >= 0) { /* Device is OK or failover succeed */
- dev->ibd_next_failover = jiffies + 3 * HZ;
- continue;
- }
-
- /* failed to failover, retry later */
- dev->ibd_next_failover =
- jiffies + min(dev->ibd_failed_failover, 10) * HZ;
- if (kiblnd_dev_can_failover(dev)) {
- list_add_tail(&dev->ibd_fail_list,
- &kiblnd_data.kib_failed_devs);
- }
-
- continue;
- }
-
- /* long sleep if no more pending failover */
- long_sleep = list_empty(&kiblnd_data.kib_failed_devs);
-
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&kiblnd_data.kib_failover_waitq, &wait);
- write_unlock_irqrestore(glock, flags);
-
- rc = schedule_timeout(long_sleep ? 10 * HZ :
- HZ);
- remove_wait_queue(&kiblnd_data.kib_failover_waitq, &wait);
- write_lock_irqsave(glock, flags);
-
- if (!long_sleep || rc)
- continue;
-
- /*
- * have a long sleep, routine check all active devices,
- * we need checking like this because if there is not active
- * connection on the dev and no SEND from local, we may listen
- * on wrong HCA for ever while there is a bonding failover
- */
- list_for_each_entry(dev, &kiblnd_data.kib_devs, ibd_list) {
- if (kiblnd_dev_can_failover(dev)) {
- list_add_tail(&dev->ibd_fail_list,
- &kiblnd_data.kib_failed_devs);
- }
- }
- }
-
- write_unlock_irqrestore(glock, flags);
-
- kiblnd_thread_fini();
- return 0;
-}
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
deleted file mode 100644
index 39d07926d603..000000000000
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
+++ /dev/null
@@ -1,296 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/o2iblnd/o2iblnd_modparams.c
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include "o2iblnd.h"
-
-static int service = 987;
-module_param(service, int, 0444);
-MODULE_PARM_DESC(service, "service number (within RDMA_PS_TCP)");
-
-static int cksum;
-module_param(cksum, int, 0644);
-MODULE_PARM_DESC(cksum, "set non-zero to enable message (not RDMA) checksums");
-
-static int timeout = 50;
-module_param(timeout, int, 0644);
-MODULE_PARM_DESC(timeout, "timeout (seconds)");
-
-/*
- * Number of threads in each scheduler pool which is percpt,
- * we will estimate reasonable value based on CPUs if it's set to zero.
- */
-static int nscheds;
-module_param(nscheds, int, 0444);
-MODULE_PARM_DESC(nscheds, "number of threads in each scheduler pool");
-
-static unsigned int conns_per_peer = 1;
-module_param(conns_per_peer, uint, 0444);
-MODULE_PARM_DESC(conns_per_peer, "number of connections per peer");
-
-/* NB: this value is shared by all CPTs, it can grow at runtime */
-static int ntx = 512;
-module_param(ntx, int, 0444);
-MODULE_PARM_DESC(ntx, "# of message descriptors allocated for each pool");
-
-/* NB: this value is shared by all CPTs */
-static int credits = 256;
-module_param(credits, int, 0444);
-MODULE_PARM_DESC(credits, "# concurrent sends");
-
-static int peer_credits = 8;
-module_param(peer_credits, int, 0444);
-MODULE_PARM_DESC(peer_credits, "# concurrent sends to 1 peer");
-
-static int peer_credits_hiw;
-module_param(peer_credits_hiw, int, 0444);
-MODULE_PARM_DESC(peer_credits_hiw, "when eagerly to return credits");
-
-static int peer_buffer_credits;
-module_param(peer_buffer_credits, int, 0444);
-MODULE_PARM_DESC(peer_buffer_credits, "# per-peer router buffer credits");
-
-static int peer_timeout = 180;
-module_param(peer_timeout, int, 0444);
-MODULE_PARM_DESC(peer_timeout, "Seconds without aliveness news to declare peer dead (<=0 to disable)");
-
-static char *ipif_name = "ib0";
-module_param(ipif_name, charp, 0444);
-MODULE_PARM_DESC(ipif_name, "IPoIB interface name");
-
-static int retry_count = 5;
-module_param(retry_count, int, 0644);
-MODULE_PARM_DESC(retry_count, "Retransmissions when no ACK received");
-
-static int rnr_retry_count = 6;
-module_param(rnr_retry_count, int, 0644);
-MODULE_PARM_DESC(rnr_retry_count, "RNR retransmissions");
-
-static int keepalive = 100;
-module_param(keepalive, int, 0644);
-MODULE_PARM_DESC(keepalive, "Idle time in seconds before sending a keepalive");
-
-static int ib_mtu;
-module_param(ib_mtu, int, 0444);
-MODULE_PARM_DESC(ib_mtu, "IB MTU 256/512/1024/2048/4096");
-
-static int concurrent_sends;
-module_param(concurrent_sends, int, 0444);
-MODULE_PARM_DESC(concurrent_sends, "send work-queue sizing");
-
-#define IBLND_DEFAULT_MAP_ON_DEMAND IBLND_MAX_RDMA_FRAGS
-static int map_on_demand = IBLND_DEFAULT_MAP_ON_DEMAND;
-module_param(map_on_demand, int, 0444);
-MODULE_PARM_DESC(map_on_demand, "map on demand");
-
-/* NB: this value is shared by all CPTs, it can grow at runtime */
-static int fmr_pool_size = 512;
-module_param(fmr_pool_size, int, 0444);
-MODULE_PARM_DESC(fmr_pool_size, "size of fmr pool on each CPT (>= ntx / 4)");
-
-/* NB: this value is shared by all CPTs, it can grow at runtime */
-static int fmr_flush_trigger = 384;
-module_param(fmr_flush_trigger, int, 0444);
-MODULE_PARM_DESC(fmr_flush_trigger, "# dirty FMRs that triggers pool flush");
-
-static int fmr_cache = 1;
-module_param(fmr_cache, int, 0444);
-MODULE_PARM_DESC(fmr_cache, "non-zero to enable FMR caching");
-
-/*
- * 0: disable failover
- * 1: enable failover if necessary
- * 2: force to failover (for debug)
- */
-static int dev_failover;
-module_param(dev_failover, int, 0444);
-MODULE_PARM_DESC(dev_failover, "HCA failover for bonding (0 off, 1 on, other values reserved)");
-
-static int require_privileged_port;
-module_param(require_privileged_port, int, 0644);
-MODULE_PARM_DESC(require_privileged_port, "require privileged port when accepting connection");
-
-static int use_privileged_port = 1;
-module_param(use_privileged_port, int, 0644);
-MODULE_PARM_DESC(use_privileged_port, "use privileged port when initiating connection");
-
-struct kib_tunables kiblnd_tunables = {
- .kib_dev_failover = &dev_failover,
- .kib_service = &service,
- .kib_cksum = &cksum,
- .kib_timeout = &timeout,
- .kib_keepalive = &keepalive,
- .kib_ntx = &ntx,
- .kib_default_ipif = &ipif_name,
- .kib_retry_count = &retry_count,
- .kib_rnr_retry_count = &rnr_retry_count,
- .kib_ib_mtu = &ib_mtu,
- .kib_require_priv_port = &require_privileged_port,
- .kib_use_priv_port = &use_privileged_port,
- .kib_nscheds = &nscheds
-};
-
-static struct lnet_ioctl_config_o2iblnd_tunables default_tunables;
-
-/* # messages/RDMAs in-flight */
-int kiblnd_msg_queue_size(int version, struct lnet_ni *ni)
-{
- if (version == IBLND_MSG_VERSION_1)
- return IBLND_MSG_QUEUE_SIZE_V1;
- else if (ni)
- return ni->ni_peertxcredits;
- else
- return peer_credits;
-}
-
-int kiblnd_tunables_setup(struct lnet_ni *ni)
-{
- struct lnet_ioctl_config_o2iblnd_tunables *tunables;
-
- /*
- * if there was no tunables specified, setup the tunables to be
- * defaulted
- */
- if (!ni->ni_lnd_tunables) {
- ni->ni_lnd_tunables = kzalloc(sizeof(*ni->ni_lnd_tunables),
- GFP_NOFS);
- if (!ni->ni_lnd_tunables)
- return -ENOMEM;
-
- memcpy(&ni->ni_lnd_tunables->lt_tun_u.lt_o2ib,
- &default_tunables, sizeof(*tunables));
- }
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
-
- /* Current API version */
- tunables->lnd_version = 0;
-
- if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) {
- CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n",
- *kiblnd_tunables.kib_ib_mtu);
- return -EINVAL;
- }
-
- if (!ni->ni_peertimeout)
- ni->ni_peertimeout = peer_timeout;
-
- if (!ni->ni_maxtxcredits)
- ni->ni_maxtxcredits = credits;
-
- if (!ni->ni_peertxcredits)
- ni->ni_peertxcredits = peer_credits;
-
- if (!ni->ni_peerrtrcredits)
- ni->ni_peerrtrcredits = peer_buffer_credits;
-
- if (ni->ni_peertxcredits < IBLND_CREDITS_DEFAULT)
- ni->ni_peertxcredits = IBLND_CREDITS_DEFAULT;
-
- if (ni->ni_peertxcredits > IBLND_CREDITS_MAX)
- ni->ni_peertxcredits = IBLND_CREDITS_MAX;
-
- if (ni->ni_peertxcredits > credits)
- ni->ni_peertxcredits = credits;
-
- if (!tunables->lnd_peercredits_hiw)
- tunables->lnd_peercredits_hiw = peer_credits_hiw;
-
- if (tunables->lnd_peercredits_hiw < ni->ni_peertxcredits / 2)
- tunables->lnd_peercredits_hiw = ni->ni_peertxcredits / 2;
-
- if (tunables->lnd_peercredits_hiw >= ni->ni_peertxcredits)
- tunables->lnd_peercredits_hiw = ni->ni_peertxcredits - 1;
-
- if (tunables->lnd_map_on_demand <= 0 ||
- tunables->lnd_map_on_demand > IBLND_MAX_RDMA_FRAGS) {
- /* Use the default */
- CWARN("Invalid map_on_demand (%d), expects 1 - %d. Using default of %d\n",
- tunables->lnd_map_on_demand,
- IBLND_MAX_RDMA_FRAGS, IBLND_DEFAULT_MAP_ON_DEMAND);
- tunables->lnd_map_on_demand = IBLND_DEFAULT_MAP_ON_DEMAND;
- }
-
- if (tunables->lnd_map_on_demand == 1) {
- /* don't make sense to create map if only one fragment */
- tunables->lnd_map_on_demand = 2;
- }
-
- if (!tunables->lnd_concurrent_sends) {
- if (tunables->lnd_map_on_demand > 0 &&
- tunables->lnd_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8) {
- tunables->lnd_concurrent_sends =
- ni->ni_peertxcredits * 2;
- } else {
- tunables->lnd_concurrent_sends = ni->ni_peertxcredits;
- }
- }
-
- if (tunables->lnd_concurrent_sends > ni->ni_peertxcredits * 2)
- tunables->lnd_concurrent_sends = ni->ni_peertxcredits * 2;
-
- if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits / 2)
- tunables->lnd_concurrent_sends = ni->ni_peertxcredits / 2;
-
- if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits) {
- CWARN("Concurrent sends %d is lower than message queue size: %d, performance may drop slightly.\n",
- tunables->lnd_concurrent_sends, ni->ni_peertxcredits);
- }
-
- if (!tunables->lnd_fmr_pool_size)
- tunables->lnd_fmr_pool_size = fmr_pool_size;
- if (!tunables->lnd_fmr_flush_trigger)
- tunables->lnd_fmr_flush_trigger = fmr_flush_trigger;
- if (!tunables->lnd_fmr_cache)
- tunables->lnd_fmr_cache = fmr_cache;
- if (!tunables->lnd_conns_per_peer) {
- tunables->lnd_conns_per_peer = (conns_per_peer) ?
- conns_per_peer : 1;
- }
-
- return 0;
-}
-
-void kiblnd_tunables_init(void)
-{
- default_tunables.lnd_version = 0;
- default_tunables.lnd_peercredits_hiw = peer_credits_hiw,
- default_tunables.lnd_map_on_demand = map_on_demand;
- default_tunables.lnd_concurrent_sends = concurrent_sends;
- default_tunables.lnd_fmr_pool_size = fmr_pool_size;
- default_tunables.lnd_fmr_flush_trigger = fmr_flush_trigger;
- default_tunables.lnd_fmr_cache = fmr_cache;
- default_tunables.lnd_conns_per_peer = conns_per_peer;
-}
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/Makefile b/drivers/staging/lustre/lnet/klnds/socklnd/Makefile
deleted file mode 100644
index a7da1abfc804..000000000000
--- a/drivers/staging/lustre/lnet/klnds/socklnd/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LNET) += ksocklnd.o
-
-ksocklnd-y := socklnd.o socklnd_cb.o socklnd_proto.o socklnd_modparams.o socklnd_lib.o
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
deleted file mode 100644
index f01b34ac1a53..000000000000
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
+++ /dev/null
@@ -1,2921 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/socklnd/socklnd.c
- *
- * Author: Zach Brown <zab@zabbo.net>
- * Author: Peter J. Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include "socklnd.h"
-
-static struct lnet_lnd the_ksocklnd;
-struct ksock_nal_data ksocknal_data;
-
-static struct ksock_interface *
-ksocknal_ip2iface(struct lnet_ni *ni, __u32 ip)
-{
- struct ksock_net *net = ni->ni_data;
- int i;
- struct ksock_interface *iface;
-
- for (i = 0; i < net->ksnn_ninterfaces; i++) {
- LASSERT(i < LNET_MAX_INTERFACES);
- iface = &net->ksnn_interfaces[i];
-
- if (iface->ksni_ipaddr == ip)
- return iface;
- }
-
- return NULL;
-}
-
-static struct ksock_route *
-ksocknal_create_route(__u32 ipaddr, int port)
-{
- struct ksock_route *route;
-
- route = kzalloc(sizeof(*route), GFP_NOFS);
- if (!route)
- return NULL;
-
- atomic_set(&route->ksnr_refcount, 1);
- route->ksnr_peer = NULL;
- route->ksnr_retry_interval = 0; /* OK to connect at any time */
- route->ksnr_ipaddr = ipaddr;
- route->ksnr_port = port;
- route->ksnr_scheduled = 0;
- route->ksnr_connecting = 0;
- route->ksnr_connected = 0;
- route->ksnr_deleted = 0;
- route->ksnr_conn_count = 0;
- route->ksnr_share_count = 0;
-
- return route;
-}
-
-void
-ksocknal_destroy_route(struct ksock_route *route)
-{
- LASSERT(!atomic_read(&route->ksnr_refcount));
-
- if (route->ksnr_peer)
- ksocknal_peer_decref(route->ksnr_peer);
-
- kfree(route);
-}
-
-static int
-ksocknal_create_peer(struct ksock_peer **peerp, struct lnet_ni *ni,
- struct lnet_process_id id)
-{
- int cpt = lnet_cpt_of_nid(id.nid);
- struct ksock_net *net = ni->ni_data;
- struct ksock_peer *peer;
-
- LASSERT(id.nid != LNET_NID_ANY);
- LASSERT(id.pid != LNET_PID_ANY);
- LASSERT(!in_interrupt());
-
- peer = kzalloc_cpt(sizeof(*peer), GFP_NOFS, cpt);
- if (!peer)
- return -ENOMEM;
-
- peer->ksnp_ni = ni;
- peer->ksnp_id = id;
- atomic_set(&peer->ksnp_refcount, 1); /* 1 ref for caller */
- peer->ksnp_closing = 0;
- peer->ksnp_accepting = 0;
- peer->ksnp_proto = NULL;
- peer->ksnp_last_alive = 0;
- peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
-
- INIT_LIST_HEAD(&peer->ksnp_conns);
- INIT_LIST_HEAD(&peer->ksnp_routes);
- INIT_LIST_HEAD(&peer->ksnp_tx_queue);
- INIT_LIST_HEAD(&peer->ksnp_zc_req_list);
- spin_lock_init(&peer->ksnp_lock);
-
- spin_lock_bh(&net->ksnn_lock);
-
- if (net->ksnn_shutdown) {
- spin_unlock_bh(&net->ksnn_lock);
-
- kfree(peer);
- CERROR("Can't create peer: network shutdown\n");
- return -ESHUTDOWN;
- }
-
- net->ksnn_npeers++;
-
- spin_unlock_bh(&net->ksnn_lock);
-
- *peerp = peer;
- return 0;
-}
-
-void
-ksocknal_destroy_peer(struct ksock_peer *peer)
-{
- struct ksock_net *net = peer->ksnp_ni->ni_data;
-
- CDEBUG(D_NET, "peer %s %p deleted\n",
- libcfs_id2str(peer->ksnp_id), peer);
-
- LASSERT(!atomic_read(&peer->ksnp_refcount));
- LASSERT(!peer->ksnp_accepting);
- LASSERT(list_empty(&peer->ksnp_conns));
- LASSERT(list_empty(&peer->ksnp_routes));
- LASSERT(list_empty(&peer->ksnp_tx_queue));
- LASSERT(list_empty(&peer->ksnp_zc_req_list));
-
- kfree(peer);
-
- /*
- * NB a peer's connections and routes keep a reference on their peer
- * until they are destroyed, so we can be assured that _all_ state to
- * do with this peer has been cleaned up when its refcount drops to
- * zero.
- */
- spin_lock_bh(&net->ksnn_lock);
- net->ksnn_npeers--;
- spin_unlock_bh(&net->ksnn_lock);
-}
-
-struct ksock_peer *
-ksocknal_find_peer_locked(struct lnet_ni *ni, struct lnet_process_id id)
-{
- struct list_head *peer_list = ksocknal_nid2peerlist(id.nid);
- struct ksock_peer *peer;
-
- list_for_each_entry(peer, peer_list, ksnp_list) {
- LASSERT(!peer->ksnp_closing);
-
- if (peer->ksnp_ni != ni)
- continue;
-
- if (peer->ksnp_id.nid != id.nid ||
- peer->ksnp_id.pid != id.pid)
- continue;
-
- CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
- peer, libcfs_id2str(id),
- atomic_read(&peer->ksnp_refcount));
- return peer;
- }
- return NULL;
-}
-
-struct ksock_peer *
-ksocknal_find_peer(struct lnet_ni *ni, struct lnet_process_id id)
-{
- struct ksock_peer *peer;
-
- read_lock(&ksocknal_data.ksnd_global_lock);
- peer = ksocknal_find_peer_locked(ni, id);
- if (peer) /* +1 ref for caller? */
- ksocknal_peer_addref(peer);
- read_unlock(&ksocknal_data.ksnd_global_lock);
-
- return peer;
-}
-
-static void
-ksocknal_unlink_peer_locked(struct ksock_peer *peer)
-{
- int i;
- __u32 ip;
- struct ksock_interface *iface;
-
- for (i = 0; i < peer->ksnp_n_passive_ips; i++) {
- LASSERT(i < LNET_MAX_INTERFACES);
- ip = peer->ksnp_passive_ips[i];
-
- iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
- /*
- * All IPs in peer->ksnp_passive_ips[] come from the
- * interface list, therefore the call must succeed.
- */
- LASSERT(iface);
-
- CDEBUG(D_NET, "peer=%p iface=%p ksni_nroutes=%d\n",
- peer, iface, iface->ksni_nroutes);
- iface->ksni_npeers--;
- }
-
- LASSERT(list_empty(&peer->ksnp_conns));
- LASSERT(list_empty(&peer->ksnp_routes));
- LASSERT(!peer->ksnp_closing);
- peer->ksnp_closing = 1;
- list_del(&peer->ksnp_list);
- /* lose peerlist's ref */
- ksocknal_peer_decref(peer);
-}
-
-static int
-ksocknal_get_peer_info(struct lnet_ni *ni, int index,
- struct lnet_process_id *id, __u32 *myip, __u32 *peer_ip,
- int *port, int *conn_count, int *share_count)
-{
- struct ksock_peer *peer;
- struct list_head *ptmp;
- struct ksock_route *route;
- struct list_head *rtmp;
- int i;
- int j;
- int rc = -ENOENT;
-
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
- list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
-
- if (peer->ksnp_ni != ni)
- continue;
-
- if (!peer->ksnp_n_passive_ips &&
- list_empty(&peer->ksnp_routes)) {
- if (index-- > 0)
- continue;
-
- *id = peer->ksnp_id;
- *myip = 0;
- *peer_ip = 0;
- *port = 0;
- *conn_count = 0;
- *share_count = 0;
- rc = 0;
- goto out;
- }
-
- for (j = 0; j < peer->ksnp_n_passive_ips; j++) {
- if (index-- > 0)
- continue;
-
- *id = peer->ksnp_id;
- *myip = peer->ksnp_passive_ips[j];
- *peer_ip = 0;
- *port = 0;
- *conn_count = 0;
- *share_count = 0;
- rc = 0;
- goto out;
- }
-
- list_for_each(rtmp, &peer->ksnp_routes) {
- if (index-- > 0)
- continue;
-
- route = list_entry(rtmp, struct ksock_route,
- ksnr_list);
-
- *id = peer->ksnp_id;
- *myip = route->ksnr_myipaddr;
- *peer_ip = route->ksnr_ipaddr;
- *port = route->ksnr_port;
- *conn_count = route->ksnr_conn_count;
- *share_count = route->ksnr_share_count;
- rc = 0;
- goto out;
- }
- }
- }
- out:
- read_unlock(&ksocknal_data.ksnd_global_lock);
- return rc;
-}
-
-static void
-ksocknal_associate_route_conn_locked(struct ksock_route *route,
- struct ksock_conn *conn)
-{
- struct ksock_peer *peer = route->ksnr_peer;
- int type = conn->ksnc_type;
- struct ksock_interface *iface;
-
- conn->ksnc_route = route;
- ksocknal_route_addref(route);
-
- if (route->ksnr_myipaddr != conn->ksnc_myipaddr) {
- if (!route->ksnr_myipaddr) {
- /* route wasn't bound locally yet (the initial route) */
- CDEBUG(D_NET, "Binding %s %pI4h to %pI4h\n",
- libcfs_id2str(peer->ksnp_id),
- &route->ksnr_ipaddr,
- &conn->ksnc_myipaddr);
- } else {
- CDEBUG(D_NET, "Rebinding %s %pI4h from %pI4h to %pI4h\n",
- libcfs_id2str(peer->ksnp_id),
- &route->ksnr_ipaddr,
- &route->ksnr_myipaddr,
- &conn->ksnc_myipaddr);
-
- iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
- route->ksnr_myipaddr);
- if (iface)
- iface->ksni_nroutes--;
- }
- route->ksnr_myipaddr = conn->ksnc_myipaddr;
- iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
- route->ksnr_myipaddr);
- if (iface)
- iface->ksni_nroutes++;
- }
-
- route->ksnr_connected |= (1 << type);
- route->ksnr_conn_count++;
-
- /*
- * Successful connection => further attempts can
- * proceed immediately
- */
- route->ksnr_retry_interval = 0;
-}
-
-static void
-ksocknal_add_route_locked(struct ksock_peer *peer, struct ksock_route *route)
-{
- struct list_head *tmp;
- struct ksock_conn *conn;
- struct ksock_route *route2;
-
- LASSERT(!peer->ksnp_closing);
- LASSERT(!route->ksnr_peer);
- LASSERT(!route->ksnr_scheduled);
- LASSERT(!route->ksnr_connecting);
- LASSERT(!route->ksnr_connected);
-
- /* LASSERT(unique) */
- list_for_each(tmp, &peer->ksnp_routes) {
- route2 = list_entry(tmp, struct ksock_route, ksnr_list);
-
- if (route2->ksnr_ipaddr == route->ksnr_ipaddr) {
- CERROR("Duplicate route %s %pI4h\n",
- libcfs_id2str(peer->ksnp_id),
- &route->ksnr_ipaddr);
- LBUG();
- }
- }
-
- route->ksnr_peer = peer;
- ksocknal_peer_addref(peer);
- /* peer's routelist takes over my ref on 'route' */
- list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
-
- list_for_each(tmp, &peer->ksnp_conns) {
- conn = list_entry(tmp, struct ksock_conn, ksnc_list);
-
- if (conn->ksnc_ipaddr != route->ksnr_ipaddr)
- continue;
-
- ksocknal_associate_route_conn_locked(route, conn);
- /* keep going (typed routes) */
- }
-}
-
-static void
-ksocknal_del_route_locked(struct ksock_route *route)
-{
- struct ksock_peer *peer = route->ksnr_peer;
- struct ksock_interface *iface;
- struct ksock_conn *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
-
- LASSERT(!route->ksnr_deleted);
-
- /* Close associated conns */
- list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) {
- conn = list_entry(ctmp, struct ksock_conn, ksnc_list);
-
- if (conn->ksnc_route != route)
- continue;
-
- ksocknal_close_conn_locked(conn, 0);
- }
-
- if (route->ksnr_myipaddr) {
- iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
- route->ksnr_myipaddr);
- if (iface)
- iface->ksni_nroutes--;
- }
-
- route->ksnr_deleted = 1;
- list_del(&route->ksnr_list);
- ksocknal_route_decref(route); /* drop peer's ref */
-
- if (list_empty(&peer->ksnp_routes) &&
- list_empty(&peer->ksnp_conns)) {
- /*
- * I've just removed the last route to a peer with no active
- * connections
- */
- ksocknal_unlink_peer_locked(peer);
- }
-}
-
-int
-ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr,
- int port)
-{
- struct ksock_peer *peer;
- struct ksock_peer *peer2;
- struct ksock_route *route;
- struct ksock_route *route2;
- int rc;
-
- if (id.nid == LNET_NID_ANY ||
- id.pid == LNET_PID_ANY)
- return -EINVAL;
-
- /* Have a brand new peer ready... */
- rc = ksocknal_create_peer(&peer, ni, id);
- if (rc)
- return rc;
-
- route = ksocknal_create_route(ipaddr, port);
- if (!route) {
- ksocknal_peer_decref(peer);
- return -ENOMEM;
- }
-
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
- /* always called with a ref on ni, so shutdown can't have started */
- LASSERT(!((struct ksock_net *)ni->ni_data)->ksnn_shutdown);
-
- peer2 = ksocknal_find_peer_locked(ni, id);
- if (peer2) {
- ksocknal_peer_decref(peer);
- peer = peer2;
- } else {
- /* peer table takes my ref on peer */
- list_add_tail(&peer->ksnp_list,
- ksocknal_nid2peerlist(id.nid));
- }
-
- list_for_each_entry(route2, &peer->ksnp_routes, ksnr_list) {
- if (route2->ksnr_ipaddr == ipaddr) {
- /* Route already exists, use the old one */
- ksocknal_route_decref(route);
- route2->ksnr_share_count++;
- goto out;
- }
- }
- /* Route doesn't already exist, add the new one */
- ksocknal_add_route_locked(peer, route);
- route->ksnr_share_count++;
-out:
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
- return 0;
-}
-
-static void
-ksocknal_del_peer_locked(struct ksock_peer *peer, __u32 ip)
-{
- struct ksock_conn *conn;
- struct ksock_route *route;
- struct list_head *tmp;
- struct list_head *nxt;
- int nshared;
-
- LASSERT(!peer->ksnp_closing);
-
- /* Extra ref prevents peer disappearing until I'm done with it */
- ksocknal_peer_addref(peer);
-
- list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
- route = list_entry(tmp, struct ksock_route, ksnr_list);
-
- /* no match */
- if (!(!ip || route->ksnr_ipaddr == ip))
- continue;
-
- route->ksnr_share_count = 0;
- /* This deletes associated conns too */
- ksocknal_del_route_locked(route);
- }
-
- nshared = 0;
- list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
- route = list_entry(tmp, struct ksock_route, ksnr_list);
- nshared += route->ksnr_share_count;
- }
-
- if (!nshared) {
- /*
- * remove everything else if there are no explicit entries
- * left
- */
- list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
- route = list_entry(tmp, struct ksock_route, ksnr_list);
-
- /* we should only be removing auto-entries */
- LASSERT(!route->ksnr_share_count);
- ksocknal_del_route_locked(route);
- }
-
- list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
- conn = list_entry(tmp, struct ksock_conn, ksnc_list);
-
- ksocknal_close_conn_locked(conn, 0);
- }
- }
-
- ksocknal_peer_decref(peer);
- /* NB peer unlinks itself when last conn/route is removed */
-}
-
-static int
-ksocknal_del_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip)
-{
- LIST_HEAD(zombies);
- struct list_head *ptmp;
- struct list_head *pnxt;
- struct ksock_peer *peer;
- int lo;
- int hi;
- int i;
- int rc = -ENOENT;
-
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
- if (id.nid != LNET_NID_ANY) {
- lo = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
- hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
- } else {
- lo = 0;
- hi = ksocknal_data.ksnd_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe(ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
-
- if (peer->ksnp_ni != ni)
- continue;
-
- if (!((id.nid == LNET_NID_ANY || peer->ksnp_id.nid == id.nid) &&
- (id.pid == LNET_PID_ANY || peer->ksnp_id.pid == id.pid)))
- continue;
-
- ksocknal_peer_addref(peer); /* a ref for me... */
-
- ksocknal_del_peer_locked(peer, ip);
-
- if (peer->ksnp_closing &&
- !list_empty(&peer->ksnp_tx_queue)) {
- LASSERT(list_empty(&peer->ksnp_conns));
- LASSERT(list_empty(&peer->ksnp_routes));
-
- list_splice_init(&peer->ksnp_tx_queue,
- &zombies);
- }
-
- ksocknal_peer_decref(peer); /* ...till here */
-
- rc = 0; /* matched! */
- }
- }
-
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
- ksocknal_txlist_done(ni, &zombies, 1);
-
- return rc;
-}
-
-static struct ksock_conn *
-ksocknal_get_conn_by_idx(struct lnet_ni *ni, int index)
-{
- struct ksock_peer *peer;
- struct list_head *ptmp;
- struct ksock_conn *conn;
- struct list_head *ctmp;
- int i;
-
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
- list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
-
- LASSERT(!peer->ksnp_closing);
-
- if (peer->ksnp_ni != ni)
- continue;
-
- list_for_each(ctmp, &peer->ksnp_conns) {
- if (index-- > 0)
- continue;
-
- conn = list_entry(ctmp, struct ksock_conn,
- ksnc_list);
- ksocknal_conn_addref(conn);
- read_unlock(&ksocknal_data.ksnd_global_lock);
- return conn;
- }
- }
- }
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
- return NULL;
-}
-
-static struct ksock_sched *
-ksocknal_choose_scheduler_locked(unsigned int cpt)
-{
- struct ksock_sched_info *info = ksocknal_data.ksnd_sched_info[cpt];
- struct ksock_sched *sched;
- int i;
-
- LASSERT(info->ksi_nthreads > 0);
-
- sched = &info->ksi_scheds[0];
- /*
- * NB: it's safe so far, but info->ksi_nthreads could be changed
- * at runtime when we have dynamic LNet configuration, then we
- * need to take care of this.
- */
- for (i = 1; i < info->ksi_nthreads; i++) {
- if (sched->kss_nconns > info->ksi_scheds[i].kss_nconns)
- sched = &info->ksi_scheds[i];
- }
-
- return sched;
-}
-
-static int
-ksocknal_local_ipvec(struct lnet_ni *ni, __u32 *ipaddrs)
-{
- struct ksock_net *net = ni->ni_data;
- int i;
- int nip;
-
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- nip = net->ksnn_ninterfaces;
- LASSERT(nip <= LNET_MAX_INTERFACES);
-
- /*
- * Only offer interfaces for additional connections if I have
- * more than one.
- */
- if (nip < 2) {
- read_unlock(&ksocknal_data.ksnd_global_lock);
- return 0;
- }
-
- for (i = 0; i < nip; i++) {
- ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr;
- LASSERT(ipaddrs[i]);
- }
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
- return nip;
-}
-
-static int
-ksocknal_match_peerip(struct ksock_interface *iface, __u32 *ips, int nips)
-{
- int best_netmatch = 0;
- int best_xor = 0;
- int best = -1;
- int this_xor;
- int this_netmatch;
- int i;
-
- for (i = 0; i < nips; i++) {
- if (!ips[i])
- continue;
-
- this_xor = ips[i] ^ iface->ksni_ipaddr;
- this_netmatch = !(this_xor & iface->ksni_netmask) ? 1 : 0;
-
- if (!(best < 0 ||
- best_netmatch < this_netmatch ||
- (best_netmatch == this_netmatch &&
- best_xor > this_xor)))
- continue;
-
- best = i;
- best_netmatch = this_netmatch;
- best_xor = this_xor;
- }
-
- LASSERT(best >= 0);
- return best;
-}
-
-static int
-ksocknal_select_ips(struct ksock_peer *peer, __u32 *peerips, int n_peerips)
-{
- rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
- struct ksock_net *net = peer->ksnp_ni->ni_data;
- struct ksock_interface *iface;
- struct ksock_interface *best_iface;
- int n_ips;
- int i;
- int j;
- int k;
- __u32 ip;
- __u32 xor;
- int this_netmatch;
- int best_netmatch;
- int best_npeers;
-
- /*
- * CAVEAT EMPTOR: We do all our interface matching with an
- * exclusive hold of global lock at IRQ priority. We're only
- * expecting to be dealing with small numbers of interfaces, so the
- * O(n**3)-ness shouldn't matter
- */
- /*
- * Also note that I'm not going to return more than n_peerips
- * interfaces, even if I have more myself
- */
- write_lock_bh(global_lock);
-
- LASSERT(n_peerips <= LNET_MAX_INTERFACES);
- LASSERT(net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
-
- /*
- * Only match interfaces for additional connections
- * if I have > 1 interface
- */
- n_ips = (net->ksnn_ninterfaces < 2) ? 0 :
- min(n_peerips, net->ksnn_ninterfaces);
-
- for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) {
- /* ^ yes really... */
-
- /*
- * If we have any new interfaces, first tick off all the
- * peer IPs that match old interfaces, then choose new
- * interfaces to match the remaining peer IPS.
- * We don't forget interfaces we've stopped using; we might
- * start using them again...
- */
- if (i < peer->ksnp_n_passive_ips) {
- /* Old interface. */
- ip = peer->ksnp_passive_ips[i];
- best_iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
-
- /* peer passive ips are kept up to date */
- LASSERT(best_iface);
- } else {
- /* choose a new interface */
- LASSERT(i == peer->ksnp_n_passive_ips);
-
- best_iface = NULL;
- best_netmatch = 0;
- best_npeers = 0;
-
- for (j = 0; j < net->ksnn_ninterfaces; j++) {
- iface = &net->ksnn_interfaces[j];
- ip = iface->ksni_ipaddr;
-
- for (k = 0; k < peer->ksnp_n_passive_ips; k++)
- if (peer->ksnp_passive_ips[k] == ip)
- break;
-
- if (k < peer->ksnp_n_passive_ips) /* using it already */
- continue;
-
- k = ksocknal_match_peerip(iface, peerips,
- n_peerips);
- xor = ip ^ peerips[k];
- this_netmatch = !(xor & iface->ksni_netmask) ? 1 : 0;
-
- if (!(!best_iface ||
- best_netmatch < this_netmatch ||
- (best_netmatch == this_netmatch &&
- best_npeers > iface->ksni_npeers)))
- continue;
-
- best_iface = iface;
- best_netmatch = this_netmatch;
- best_npeers = iface->ksni_npeers;
- }
-
- LASSERT(best_iface);
-
- best_iface->ksni_npeers++;
- ip = best_iface->ksni_ipaddr;
- peer->ksnp_passive_ips[i] = ip;
- peer->ksnp_n_passive_ips = i + 1;
- }
-
- /* mark the best matching peer IP used */
- j = ksocknal_match_peerip(best_iface, peerips, n_peerips);
- peerips[j] = 0;
- }
-
- /* Overwrite input peer IP addresses */
- memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips));
-
- write_unlock_bh(global_lock);
-
- return n_ips;
-}
-
-static void
-ksocknal_create_routes(struct ksock_peer *peer, int port,
- __u32 *peer_ipaddrs, int npeer_ipaddrs)
-{
- struct ksock_route *newroute = NULL;
- rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
- struct lnet_ni *ni = peer->ksnp_ni;
- struct ksock_net *net = ni->ni_data;
- struct list_head *rtmp;
- struct ksock_route *route;
- struct ksock_interface *iface;
- struct ksock_interface *best_iface;
- int best_netmatch;
- int this_netmatch;
- int best_nroutes;
- int i;
- int j;
-
- /*
- * CAVEAT EMPTOR: We do all our interface matching with an
- * exclusive hold of global lock at IRQ priority. We're only
- * expecting to be dealing with small numbers of interfaces, so the
- * O(n**3)-ness here shouldn't matter
- */
- write_lock_bh(global_lock);
-
- if (net->ksnn_ninterfaces < 2) {
- /*
- * Only create additional connections
- * if I have > 1 interface
- */
- write_unlock_bh(global_lock);
- return;
- }
-
- LASSERT(npeer_ipaddrs <= LNET_MAX_INTERFACES);
-
- for (i = 0; i < npeer_ipaddrs; i++) {
- if (newroute) {
- newroute->ksnr_ipaddr = peer_ipaddrs[i];
- } else {
- write_unlock_bh(global_lock);
-
- newroute = ksocknal_create_route(peer_ipaddrs[i], port);
- if (!newroute)
- return;
-
- write_lock_bh(global_lock);
- }
-
- if (peer->ksnp_closing) {
- /* peer got closed under me */
- break;
- }
-
- /* Already got a route? */
- route = NULL;
- list_for_each(rtmp, &peer->ksnp_routes) {
- route = list_entry(rtmp, struct ksock_route, ksnr_list);
-
- if (route->ksnr_ipaddr == newroute->ksnr_ipaddr)
- break;
-
- route = NULL;
- }
- if (route)
- continue;
-
- best_iface = NULL;
- best_nroutes = 0;
- best_netmatch = 0;
-
- LASSERT(net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
-
- /* Select interface to connect from */
- for (j = 0; j < net->ksnn_ninterfaces; j++) {
- iface = &net->ksnn_interfaces[j];
-
- /* Using this interface already? */
- list_for_each(rtmp, &peer->ksnp_routes) {
- route = list_entry(rtmp, struct ksock_route,
- ksnr_list);
-
- if (route->ksnr_myipaddr == iface->ksni_ipaddr)
- break;
-
- route = NULL;
- }
- if (route)
- continue;
-
- this_netmatch = (!((iface->ksni_ipaddr ^
- newroute->ksnr_ipaddr) &
- iface->ksni_netmask)) ? 1 : 0;
-
- if (!(!best_iface ||
- best_netmatch < this_netmatch ||
- (best_netmatch == this_netmatch &&
- best_nroutes > iface->ksni_nroutes)))
- continue;
-
- best_iface = iface;
- best_netmatch = this_netmatch;
- best_nroutes = iface->ksni_nroutes;
- }
-
- if (!best_iface)
- continue;
-
- newroute->ksnr_myipaddr = best_iface->ksni_ipaddr;
- best_iface->ksni_nroutes++;
-
- ksocknal_add_route_locked(peer, newroute);
- newroute = NULL;
- }
-
- write_unlock_bh(global_lock);
- if (newroute)
- ksocknal_route_decref(newroute);
-}
-
-int
-ksocknal_accept(struct lnet_ni *ni, struct socket *sock)
-{
- struct ksock_connreq *cr;
- int rc;
- __u32 peer_ip;
- int peer_port;
-
- rc = lnet_sock_getaddr(sock, 1, &peer_ip, &peer_port);
- LASSERT(!rc); /* we succeeded before */
-
- cr = kzalloc(sizeof(*cr), GFP_NOFS);
- if (!cr) {
- LCONSOLE_ERROR_MSG(0x12f, "Dropping connection request from %pI4h: memory exhausted\n",
- &peer_ip);
- return -ENOMEM;
- }
-
- lnet_ni_addref(ni);
- cr->ksncr_ni = ni;
- cr->ksncr_sock = sock;
-
- spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
-
- list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs);
- wake_up(&ksocknal_data.ksnd_connd_waitq);
-
- spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
- return 0;
-}
-
-static int
-ksocknal_connecting(struct ksock_peer *peer, __u32 ipaddr)
-{
- struct ksock_route *route;
-
- list_for_each_entry(route, &peer->ksnp_routes, ksnr_list) {
- if (route->ksnr_ipaddr == ipaddr)
- return route->ksnr_connecting;
- }
- return 0;
-}
-
-int
-ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route,
- struct socket *sock, int type)
-{
- rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
- LIST_HEAD(zombies);
- struct lnet_process_id peerid;
- struct list_head *tmp;
- __u64 incarnation;
- struct ksock_conn *conn;
- struct ksock_conn *conn2;
- struct ksock_peer *peer = NULL;
- struct ksock_peer *peer2;
- struct ksock_sched *sched;
- struct ksock_hello_msg *hello;
- int cpt;
- struct ksock_tx *tx;
- struct ksock_tx *txtmp;
- int rc;
- int active;
- char *warn = NULL;
-
- active = !!route;
-
- LASSERT(active == (type != SOCKLND_CONN_NONE));
-
- conn = kzalloc(sizeof(*conn), GFP_NOFS);
- if (!conn) {
- rc = -ENOMEM;
- goto failed_0;
- }
-
- conn->ksnc_peer = NULL;
- conn->ksnc_route = NULL;
- conn->ksnc_sock = sock;
- /*
- * 2 ref, 1 for conn, another extra ref prevents socket
- * being closed before establishment of connection
- */
- atomic_set(&conn->ksnc_sock_refcount, 2);
- conn->ksnc_type = type;
- ksocknal_lib_save_callback(sock, conn);
- atomic_set(&conn->ksnc_conn_refcount, 1); /* 1 ref for me */
-
- conn->ksnc_rx_ready = 0;
- conn->ksnc_rx_scheduled = 0;
-
- INIT_LIST_HEAD(&conn->ksnc_tx_queue);
- conn->ksnc_tx_ready = 0;
- conn->ksnc_tx_scheduled = 0;
- conn->ksnc_tx_carrier = NULL;
- atomic_set(&conn->ksnc_tx_nob, 0);
-
- hello = kvzalloc(offsetof(struct ksock_hello_msg,
- kshm_ips[LNET_MAX_INTERFACES]),
- GFP_KERNEL);
- if (!hello) {
- rc = -ENOMEM;
- goto failed_1;
- }
-
- /* stash conn's local and remote addrs */
- rc = ksocknal_lib_get_conn_addrs(conn);
- if (rc)
- goto failed_1;
-
- /*
- * Find out/confirm peer's NID and connection type and get the
- * vector of interfaces she's willing to let me connect to.
- * Passive connections use the listener timeout since the peer sends
- * eagerly
- */
- if (active) {
- peer = route->ksnr_peer;
- LASSERT(ni == peer->ksnp_ni);
-
- /* Active connection sends HELLO eagerly */
- hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips);
- peerid = peer->ksnp_id;
-
- write_lock_bh(global_lock);
- conn->ksnc_proto = peer->ksnp_proto;
- write_unlock_bh(global_lock);
-
- if (!conn->ksnc_proto) {
- conn->ksnc_proto = &ksocknal_protocol_v3x;
-#if SOCKNAL_VERSION_DEBUG
- if (*ksocknal_tunables.ksnd_protocol == 2)
- conn->ksnc_proto = &ksocknal_protocol_v2x;
- else if (*ksocknal_tunables.ksnd_protocol == 1)
- conn->ksnc_proto = &ksocknal_protocol_v1x;
-#endif
- }
-
- rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
- if (rc)
- goto failed_1;
- } else {
- peerid.nid = LNET_NID_ANY;
- peerid.pid = LNET_PID_ANY;
-
- /* Passive, get protocol from peer */
- conn->ksnc_proto = NULL;
- }
-
- rc = ksocknal_recv_hello(ni, conn, hello, &peerid, &incarnation);
- if (rc < 0)
- goto failed_1;
-
- LASSERT(!rc || active);
- LASSERT(conn->ksnc_proto);
- LASSERT(peerid.nid != LNET_NID_ANY);
-
- cpt = lnet_cpt_of_nid(peerid.nid);
-
- if (active) {
- ksocknal_peer_addref(peer);
- write_lock_bh(global_lock);
- } else {
- rc = ksocknal_create_peer(&peer, ni, peerid);
- if (rc)
- goto failed_1;
-
- write_lock_bh(global_lock);
-
- /* called with a ref on ni, so shutdown can't have started */
- LASSERT(!((struct ksock_net *)ni->ni_data)->ksnn_shutdown);
-
- peer2 = ksocknal_find_peer_locked(ni, peerid);
- if (!peer2) {
- /*
- * NB this puts an "empty" peer in the peer
- * table (which takes my ref)
- */
- list_add_tail(&peer->ksnp_list,
- ksocknal_nid2peerlist(peerid.nid));
- } else {
- ksocknal_peer_decref(peer);
- peer = peer2;
- }
-
- /* +1 ref for me */
- ksocknal_peer_addref(peer);
- peer->ksnp_accepting++;
-
- /*
- * Am I already connecting to this guy? Resolve in
- * favour of higher NID...
- */
- if (peerid.nid < ni->ni_nid &&
- ksocknal_connecting(peer, conn->ksnc_ipaddr)) {
- rc = EALREADY;
- warn = "connection race resolution";
- goto failed_2;
- }
- }
-
- if (peer->ksnp_closing ||
- (active && route->ksnr_deleted)) {
- /* peer/route got closed under me */
- rc = -ESTALE;
- warn = "peer/route removed";
- goto failed_2;
- }
-
- if (!peer->ksnp_proto) {
- /*
- * Never connected before.
- * NB recv_hello may have returned EPROTO to signal my peer
- * wants a different protocol than the one I asked for.
- */
- LASSERT(list_empty(&peer->ksnp_conns));
-
- peer->ksnp_proto = conn->ksnc_proto;
- peer->ksnp_incarnation = incarnation;
- }
-
- if (peer->ksnp_proto != conn->ksnc_proto ||
- peer->ksnp_incarnation != incarnation) {
- /* Peer rebooted or I've got the wrong protocol version */
- ksocknal_close_peer_conns_locked(peer, 0, 0);
-
- peer->ksnp_proto = NULL;
- rc = ESTALE;
- warn = peer->ksnp_incarnation != incarnation ?
- "peer rebooted" :
- "wrong proto version";
- goto failed_2;
- }
-
- switch (rc) {
- default:
- LBUG();
- case 0:
- break;
- case EALREADY:
- warn = "lost conn race";
- goto failed_2;
- case EPROTO:
- warn = "retry with different protocol version";
- goto failed_2;
- }
-
- /*
- * Refuse to duplicate an existing connection, unless this is a
- * loopback connection
- */
- if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) {
- list_for_each(tmp, &peer->ksnp_conns) {
- conn2 = list_entry(tmp, struct ksock_conn, ksnc_list);
-
- if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr ||
- conn2->ksnc_myipaddr != conn->ksnc_myipaddr ||
- conn2->ksnc_type != conn->ksnc_type)
- continue;
-
- /*
- * Reply on a passive connection attempt so the peer
- * realises we're connected.
- */
- LASSERT(!rc);
- if (!active)
- rc = EALREADY;
-
- warn = "duplicate";
- goto failed_2;
- }
- }
-
- /*
- * If the connection created by this route didn't bind to the IP
- * address the route connected to, the connection/route matching
- * code below probably isn't going to work.
- */
- if (active &&
- route->ksnr_ipaddr != conn->ksnc_ipaddr) {
- CERROR("Route %s %pI4h connected to %pI4h\n",
- libcfs_id2str(peer->ksnp_id),
- &route->ksnr_ipaddr,
- &conn->ksnc_ipaddr);
- }
-
- /*
- * Search for a route corresponding to the new connection and
- * create an association. This allows incoming connections created
- * by routes in my peer to match my own route entries so I don't
- * continually create duplicate routes.
- */
- list_for_each(tmp, &peer->ksnp_routes) {
- route = list_entry(tmp, struct ksock_route, ksnr_list);
-
- if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
- continue;
-
- ksocknal_associate_route_conn_locked(route, conn);
- break;
- }
-
- conn->ksnc_peer = peer; /* conn takes my ref on peer */
- peer->ksnp_last_alive = jiffies;
- peer->ksnp_send_keepalive = 0;
- peer->ksnp_error = 0;
-
- sched = ksocknal_choose_scheduler_locked(cpt);
- sched->kss_nconns++;
- conn->ksnc_scheduler = sched;
-
- conn->ksnc_tx_last_post = jiffies;
- /* Set the deadline for the outgoing HELLO to drain */
- conn->ksnc_tx_bufnob = sock->sk->sk_wmem_queued;
- conn->ksnc_tx_deadline = jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
- mb(); /* order with adding to peer's conn list */
-
- list_add(&conn->ksnc_list, &peer->ksnp_conns);
- ksocknal_conn_addref(conn);
-
- ksocknal_new_packet(conn, 0);
-
- conn->ksnc_zc_capable = ksocknal_lib_zc_capable(conn);
-
- /* Take packets blocking for this connection. */
- list_for_each_entry_safe(tx, txtmp, &peer->ksnp_tx_queue, tx_list) {
- int match = conn->ksnc_proto->pro_match_tx(conn, tx,
- tx->tx_nonblk);
-
- if (match == SOCKNAL_MATCH_NO)
- continue;
-
- list_del(&tx->tx_list);
- ksocknal_queue_tx_locked(tx, conn);
- }
-
- write_unlock_bh(global_lock);
-
- /*
- * We've now got a new connection. Any errors from here on are just
- * like "normal" comms errors and we close the connection normally.
- * NB (a) we still have to send the reply HELLO for passive
- * connections,
- * (b) normal I/O on the conn is blocked until I setup and call the
- * socket callbacks.
- */
- CDEBUG(D_NET, "New conn %s p %d.x %pI4h -> %pI4h/%d incarnation:%lld sched[%d:%d]\n",
- libcfs_id2str(peerid), conn->ksnc_proto->pro_version,
- &conn->ksnc_myipaddr, &conn->ksnc_ipaddr,
- conn->ksnc_port, incarnation, cpt,
- (int)(sched - &sched->kss_info->ksi_scheds[0]));
-
- if (active) {
- /* additional routes after interface exchange? */
- ksocknal_create_routes(peer, conn->ksnc_port,
- hello->kshm_ips, hello->kshm_nips);
- } else {
- hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips,
- hello->kshm_nips);
- rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
- }
-
- kvfree(hello);
-
- /*
- * setup the socket AFTER I've received hello (it disables
- * SO_LINGER). I might call back to the acceptor who may want
- * to send a protocol version response and then close the
- * socket; this ensures the socket only tears down after the
- * response has been sent.
- */
- if (!rc)
- rc = ksocknal_lib_setup_sock(sock);
-
- write_lock_bh(global_lock);
-
- /* NB my callbacks block while I hold ksnd_global_lock */
- ksocknal_lib_set_callback(sock, conn);
-
- if (!active)
- peer->ksnp_accepting--;
-
- write_unlock_bh(global_lock);
-
- if (rc) {
- write_lock_bh(global_lock);
- if (!conn->ksnc_closing) {
- /* could be closed by another thread */
- ksocknal_close_conn_locked(conn, rc);
- }
- write_unlock_bh(global_lock);
- } else if (!ksocknal_connsock_addref(conn)) {
- /* Allow I/O to proceed. */
- ksocknal_read_callback(conn);
- ksocknal_write_callback(conn);
- ksocknal_connsock_decref(conn);
- }
-
- ksocknal_connsock_decref(conn);
- ksocknal_conn_decref(conn);
- return rc;
-
- failed_2:
- if (!peer->ksnp_closing &&
- list_empty(&peer->ksnp_conns) &&
- list_empty(&peer->ksnp_routes)) {
- list_add(&zombies, &peer->ksnp_tx_queue);
- list_del_init(&peer->ksnp_tx_queue);
- ksocknal_unlink_peer_locked(peer);
- }
-
- write_unlock_bh(global_lock);
-
- if (warn) {
- if (rc < 0)
- CERROR("Not creating conn %s type %d: %s\n",
- libcfs_id2str(peerid), conn->ksnc_type, warn);
- else
- CDEBUG(D_NET, "Not creating conn %s type %d: %s\n",
- libcfs_id2str(peerid), conn->ksnc_type, warn);
- }
-
- if (!active) {
- if (rc > 0) {
- /*
- * Request retry by replying with CONN_NONE
- * ksnc_proto has been set already
- */
- conn->ksnc_type = SOCKLND_CONN_NONE;
- hello->kshm_nips = 0;
- ksocknal_send_hello(ni, conn, peerid.nid, hello);
- }
-
- write_lock_bh(global_lock);
- peer->ksnp_accepting--;
- write_unlock_bh(global_lock);
- }
-
- ksocknal_txlist_done(ni, &zombies, 1);
- ksocknal_peer_decref(peer);
-
-failed_1:
- kvfree(hello);
-
- kfree(conn);
-
-failed_0:
- sock_release(sock);
- return rc;
-}
-
-void
-ksocknal_close_conn_locked(struct ksock_conn *conn, int error)
-{
- /*
- * This just does the immmediate housekeeping, and queues the
- * connection for the reaper to terminate.
- * Caller holds ksnd_global_lock exclusively in irq context
- */
- struct ksock_peer *peer = conn->ksnc_peer;
- struct ksock_route *route;
- struct ksock_conn *conn2;
- struct list_head *tmp;
-
- LASSERT(!peer->ksnp_error);
- LASSERT(!conn->ksnc_closing);
- conn->ksnc_closing = 1;
-
- /* ksnd_deathrow_conns takes over peer's ref */
- list_del(&conn->ksnc_list);
-
- route = conn->ksnc_route;
- if (route) {
- /* dissociate conn from route... */
- LASSERT(!route->ksnr_deleted);
- LASSERT(route->ksnr_connected & (1 << conn->ksnc_type));
-
- conn2 = NULL;
- list_for_each(tmp, &peer->ksnp_conns) {
- conn2 = list_entry(tmp, struct ksock_conn, ksnc_list);
-
- if (conn2->ksnc_route == route &&
- conn2->ksnc_type == conn->ksnc_type)
- break;
-
- conn2 = NULL;
- }
- if (!conn2)
- route->ksnr_connected &= ~(1 << conn->ksnc_type);
-
- conn->ksnc_route = NULL;
-
- ksocknal_route_decref(route); /* drop conn's ref on route */
- }
-
- if (list_empty(&peer->ksnp_conns)) {
- /* No more connections to this peer */
-
- if (!list_empty(&peer->ksnp_tx_queue)) {
- struct ksock_tx *tx;
-
- LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x);
-
- /*
- * throw them to the last connection...,
- * these TXs will be send to /dev/null by scheduler
- */
- list_for_each_entry(tx, &peer->ksnp_tx_queue,
- tx_list)
- ksocknal_tx_prep(conn, tx);
-
- spin_lock_bh(&conn->ksnc_scheduler->kss_lock);
- list_splice_init(&peer->ksnp_tx_queue,
- &conn->ksnc_tx_queue);
- spin_unlock_bh(&conn->ksnc_scheduler->kss_lock);
- }
-
- peer->ksnp_proto = NULL; /* renegotiate protocol version */
- peer->ksnp_error = error; /* stash last conn close reason */
-
- if (list_empty(&peer->ksnp_routes)) {
- /*
- * I've just closed last conn belonging to a
- * peer with no routes to it
- */
- ksocknal_unlink_peer_locked(peer);
- }
- }
-
- spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
-
- list_add_tail(&conn->ksnc_list,
- &ksocknal_data.ksnd_deathrow_conns);
- wake_up(&ksocknal_data.ksnd_reaper_waitq);
-
- spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
-}
-
-void
-ksocknal_peer_failed(struct ksock_peer *peer)
-{
- int notify = 0;
- unsigned long last_alive = 0;
-
- /*
- * There has been a connection failure or comms error; but I'll only
- * tell LNET I think the peer is dead if it's to another kernel and
- * there are no connections or connection attempts in existence.
- */
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- if (!(peer->ksnp_id.pid & LNET_PID_USERFLAG) &&
- list_empty(&peer->ksnp_conns) &&
- !peer->ksnp_accepting &&
- !ksocknal_find_connecting_route_locked(peer)) {
- notify = 1;
- last_alive = peer->ksnp_last_alive;
- }
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
-
- if (notify)
- lnet_notify(peer->ksnp_ni, peer->ksnp_id.nid, 0,
- last_alive);
-}
-
-void
-ksocknal_finalize_zcreq(struct ksock_conn *conn)
-{
- struct ksock_peer *peer = conn->ksnc_peer;
- struct ksock_tx *tx;
- struct ksock_tx *temp;
- struct ksock_tx *tmp;
- LIST_HEAD(zlist);
-
- /*
- * NB safe to finalize TXs because closing of socket will
- * abort all buffered data
- */
- LASSERT(!conn->ksnc_sock);
-
- spin_lock(&peer->ksnp_lock);
-
- list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list, tx_zc_list) {
- if (tx->tx_conn != conn)
- continue;
-
- LASSERT(tx->tx_msg.ksm_zc_cookies[0]);
-
- tx->tx_msg.ksm_zc_cookies[0] = 0;
- tx->tx_zc_aborted = 1; /* mark it as not-acked */
- list_del(&tx->tx_zc_list);
- list_add(&tx->tx_zc_list, &zlist);
- }
-
- spin_unlock(&peer->ksnp_lock);
-
- list_for_each_entry_safe(tx, temp, &zlist, tx_zc_list) {
- list_del(&tx->tx_zc_list);
- ksocknal_tx_decref(tx);
- }
-}
-
-void
-ksocknal_terminate_conn(struct ksock_conn *conn)
-{
- /*
- * This gets called by the reaper (guaranteed thread context) to
- * disengage the socket from its callbacks and close it.
- * ksnc_refcount will eventually hit zero, and then the reaper will
- * destroy it.
- */
- struct ksock_peer *peer = conn->ksnc_peer;
- struct ksock_sched *sched = conn->ksnc_scheduler;
- int failed = 0;
-
- LASSERT(conn->ksnc_closing);
-
- /* wake up the scheduler to "send" all remaining packets to /dev/null */
- spin_lock_bh(&sched->kss_lock);
-
- /* a closing conn is always ready to tx */
- conn->ksnc_tx_ready = 1;
-
- if (!conn->ksnc_tx_scheduled &&
- !list_empty(&conn->ksnc_tx_queue)) {
- list_add_tail(&conn->ksnc_tx_list,
- &sched->kss_tx_conns);
- conn->ksnc_tx_scheduled = 1;
- /* extra ref for scheduler */
- ksocknal_conn_addref(conn);
-
- wake_up(&sched->kss_waitq);
- }
-
- spin_unlock_bh(&sched->kss_lock);
-
- /* serialise with callbacks */
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
- ksocknal_lib_reset_callback(conn->ksnc_sock, conn);
-
- /*
- * OK, so this conn may not be completely disengaged from its
- * scheduler yet, but it _has_ committed to terminate...
- */
- conn->ksnc_scheduler->kss_nconns--;
-
- if (peer->ksnp_error) {
- /* peer's last conn closed in error */
- LASSERT(list_empty(&peer->ksnp_conns));
- failed = 1;
- peer->ksnp_error = 0; /* avoid multiple notifications */
- }
-
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
- if (failed)
- ksocknal_peer_failed(peer);
-
- /*
- * The socket is closed on the final put; either here, or in
- * ksocknal_{send,recv}msg(). Since we set up the linger2 option
- * when the connection was established, this will close the socket
- * immediately, aborting anything buffered in it. Any hung
- * zero-copy transmits will therefore complete in finite time.
- */
- ksocknal_connsock_decref(conn);
-}
-
-void
-ksocknal_queue_zombie_conn(struct ksock_conn *conn)
-{
- /* Queue the conn for the reaper to destroy */
-
- LASSERT(!atomic_read(&conn->ksnc_conn_refcount));
- spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
-
- list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns);
- wake_up(&ksocknal_data.ksnd_reaper_waitq);
-
- spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
-}
-
-void
-ksocknal_destroy_conn(struct ksock_conn *conn)
-{
- unsigned long last_rcv;
-
- /* Final coup-de-grace of the reaper */
- CDEBUG(D_NET, "connection %p\n", conn);
-
- LASSERT(!atomic_read(&conn->ksnc_conn_refcount));
- LASSERT(!atomic_read(&conn->ksnc_sock_refcount));
- LASSERT(!conn->ksnc_sock);
- LASSERT(!conn->ksnc_route);
- LASSERT(!conn->ksnc_tx_scheduled);
- LASSERT(!conn->ksnc_rx_scheduled);
- LASSERT(list_empty(&conn->ksnc_tx_queue));
-
- /* complete current receive if any */
- switch (conn->ksnc_rx_state) {
- case SOCKNAL_RX_LNET_PAYLOAD:
- last_rcv = conn->ksnc_rx_deadline -
- *ksocknal_tunables.ksnd_timeout * HZ;
- CERROR("Completing partial receive from %s[%d], ip %pI4h:%d, with error, wanted: %zd, left: %d, last alive is %ld secs ago\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type,
- &conn->ksnc_ipaddr, conn->ksnc_port,
- iov_iter_count(&conn->ksnc_rx_to), conn->ksnc_rx_nob_left,
- (jiffies - last_rcv) / HZ);
- lnet_finalize(conn->ksnc_peer->ksnp_ni,
- conn->ksnc_cookie, -EIO);
- break;
- case SOCKNAL_RX_LNET_HEADER:
- if (conn->ksnc_rx_started)
- CERROR("Incomplete receive of lnet header from %s, ip %pI4h:%d, with error, protocol: %d.x.\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- &conn->ksnc_ipaddr, conn->ksnc_port,
- conn->ksnc_proto->pro_version);
- break;
- case SOCKNAL_RX_KSM_HEADER:
- if (conn->ksnc_rx_started)
- CERROR("Incomplete receive of ksock message from %s, ip %pI4h:%d, with error, protocol: %d.x.\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- &conn->ksnc_ipaddr, conn->ksnc_port,
- conn->ksnc_proto->pro_version);
- break;
- case SOCKNAL_RX_SLOP:
- if (conn->ksnc_rx_started)
- CERROR("Incomplete receive of slops from %s, ip %pI4h:%d, with error\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- &conn->ksnc_ipaddr, conn->ksnc_port);
- break;
- default:
- LBUG();
- break;
- }
-
- ksocknal_peer_decref(conn->ksnc_peer);
-
- kfree(conn);
-}
-
-int
-ksocknal_close_peer_conns_locked(struct ksock_peer *peer, __u32 ipaddr, int why)
-{
- struct ksock_conn *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- int count = 0;
-
- list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) {
- conn = list_entry(ctmp, struct ksock_conn, ksnc_list);
-
- if (!ipaddr || conn->ksnc_ipaddr == ipaddr) {
- count++;
- ksocknal_close_conn_locked(conn, why);
- }
- }
-
- return count;
-}
-
-int
-ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why)
-{
- struct ksock_peer *peer = conn->ksnc_peer;
- __u32 ipaddr = conn->ksnc_ipaddr;
- int count;
-
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
- count = ksocknal_close_peer_conns_locked(peer, ipaddr, why);
-
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
- return count;
-}
-
-int
-ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr)
-{
- struct ksock_peer *peer;
- struct list_head *ptmp;
- struct list_head *pnxt;
- int lo;
- int hi;
- int i;
- int count = 0;
-
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
- if (id.nid != LNET_NID_ANY) {
- lo = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
- hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
- } else {
- lo = 0;
- hi = ksocknal_data.ksnd_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe(ptmp, pnxt,
- &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry(ptmp, struct ksock_peer, ksnp_list);
-
- if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) &&
- (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid)))
- continue;
-
- count += ksocknal_close_peer_conns_locked(peer, ipaddr,
- 0);
- }
- }
-
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
- /* wildcards always succeed */
- if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || !ipaddr)
- return 0;
-
- if (!count)
- return -ENOENT;
- else
- return 0;
-}
-
-void
-ksocknal_notify(struct lnet_ni *ni, lnet_nid_t gw_nid, int alive)
-{
- /*
- * The router is telling me she's been notified of a change in
- * gateway state....
- */
- struct lnet_process_id id = {0};
-
- id.nid = gw_nid;
- id.pid = LNET_PID_ANY;
-
- CDEBUG(D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid),
- alive ? "up" : "down");
-
- if (!alive) {
- /* If the gateway crashed, close all open connections... */
- ksocknal_close_matching_conns(id, 0);
- return;
- }
-
- /*
- * ...otherwise do nothing. We can only establish new connections
- * if we have autroutes, and these connect on demand.
- */
-}
-
-void
-ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, unsigned long *when)
-{
- int connect = 1;
- unsigned long last_alive = 0;
- unsigned long now = jiffies;
- struct ksock_peer *peer = NULL;
- rwlock_t *glock = &ksocknal_data.ksnd_global_lock;
- struct lnet_process_id id = {
- .nid = nid,
- .pid = LNET_PID_LUSTRE,
- };
-
- read_lock(glock);
-
- peer = ksocknal_find_peer_locked(ni, id);
- if (peer) {
- struct ksock_conn *conn;
- int bufnob;
-
- list_for_each_entry(conn, &peer->ksnp_conns, ksnc_list) {
- bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
-
- if (bufnob < conn->ksnc_tx_bufnob) {
- /* something got ACKed */
- conn->ksnc_tx_deadline =
- jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
- peer->ksnp_last_alive = now;
- conn->ksnc_tx_bufnob = bufnob;
- }
- }
-
- last_alive = peer->ksnp_last_alive;
- if (!ksocknal_find_connectable_route_locked(peer))
- connect = 0;
- }
-
- read_unlock(glock);
-
- if (last_alive)
- *when = last_alive;
-
- CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago, connect %d\n",
- libcfs_nid2str(nid), peer,
- last_alive ? (now - last_alive) / HZ : -1,
- connect);
-
- if (!connect)
- return;
-
- ksocknal_add_peer(ni, id, LNET_NIDADDR(nid), lnet_acceptor_port());
-
- write_lock_bh(glock);
-
- peer = ksocknal_find_peer_locked(ni, id);
- if (peer)
- ksocknal_launch_all_connections_locked(peer);
-
- write_unlock_bh(glock);
-}
-
-static void
-ksocknal_push_peer(struct ksock_peer *peer)
-{
- int index;
- int i;
- struct list_head *tmp;
- struct ksock_conn *conn;
-
- for (index = 0; ; index++) {
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- i = 0;
- conn = NULL;
-
- list_for_each(tmp, &peer->ksnp_conns) {
- if (i++ == index) {
- conn = list_entry(tmp, struct ksock_conn,
- ksnc_list);
- ksocknal_conn_addref(conn);
- break;
- }
- }
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
-
- if (!conn)
- break;
-
- ksocknal_lib_push_conn(conn);
- ksocknal_conn_decref(conn);
- }
-}
-
-static int ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id)
-{
- struct list_head *start;
- struct list_head *end;
- struct list_head *tmp;
- int rc = -ENOENT;
- unsigned int hsize = ksocknal_data.ksnd_peer_hash_size;
-
- if (id.nid == LNET_NID_ANY) {
- start = &ksocknal_data.ksnd_peers[0];
- end = &ksocknal_data.ksnd_peers[hsize - 1];
- } else {
- start = ksocknal_nid2peerlist(id.nid);
- end = ksocknal_nid2peerlist(id.nid);
- }
-
- for (tmp = start; tmp <= end; tmp++) {
- int peer_off; /* searching offset in peer hash table */
-
- for (peer_off = 0; ; peer_off++) {
- struct ksock_peer *peer;
- int i = 0;
-
- read_lock(&ksocknal_data.ksnd_global_lock);
- list_for_each_entry(peer, tmp, ksnp_list) {
- if (!((id.nid == LNET_NID_ANY ||
- id.nid == peer->ksnp_id.nid) &&
- (id.pid == LNET_PID_ANY ||
- id.pid == peer->ksnp_id.pid)))
- continue;
-
- if (i++ == peer_off) {
- ksocknal_peer_addref(peer);
- break;
- }
- }
- read_unlock(&ksocknal_data.ksnd_global_lock);
-
- if (!i) /* no match */
- break;
-
- rc = 0;
- ksocknal_push_peer(peer);
- ksocknal_peer_decref(peer);
- }
- }
- return rc;
-}
-
-static int
-ksocknal_add_interface(struct lnet_ni *ni, __u32 ipaddress, __u32 netmask)
-{
- struct ksock_net *net = ni->ni_data;
- struct ksock_interface *iface;
- int rc;
- int i;
- int j;
- struct list_head *ptmp;
- struct ksock_peer *peer;
- struct list_head *rtmp;
- struct ksock_route *route;
-
- if (!ipaddress || !netmask)
- return -EINVAL;
-
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
- iface = ksocknal_ip2iface(ni, ipaddress);
- if (iface) {
- /* silently ignore dups */
- rc = 0;
- } else if (net->ksnn_ninterfaces == LNET_MAX_INTERFACES) {
- rc = -ENOSPC;
- } else {
- iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++];
-
- iface->ksni_ipaddr = ipaddress;
- iface->ksni_netmask = netmask;
- iface->ksni_nroutes = 0;
- iface->ksni_npeers = 0;
-
- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
- list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry(ptmp, struct ksock_peer,
- ksnp_list);
-
- for (j = 0; j < peer->ksnp_n_passive_ips; j++)
- if (peer->ksnp_passive_ips[j] == ipaddress)
- iface->ksni_npeers++;
-
- list_for_each(rtmp, &peer->ksnp_routes) {
- route = list_entry(rtmp, struct ksock_route,
- ksnr_list);
-
- if (route->ksnr_myipaddr == ipaddress)
- iface->ksni_nroutes++;
- }
- }
- }
-
- rc = 0;
- /*
- * NB only new connections will pay attention to the
- * new interface!
- */
- }
-
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
- return rc;
-}
-
-static void
-ksocknal_peer_del_interface_locked(struct ksock_peer *peer, __u32 ipaddr)
-{
- struct list_head *tmp;
- struct list_head *nxt;
- struct ksock_route *route;
- struct ksock_conn *conn;
- int i;
- int j;
-
- for (i = 0; i < peer->ksnp_n_passive_ips; i++)
- if (peer->ksnp_passive_ips[i] == ipaddr) {
- for (j = i + 1; j < peer->ksnp_n_passive_ips; j++)
- peer->ksnp_passive_ips[j - 1] =
- peer->ksnp_passive_ips[j];
- peer->ksnp_n_passive_ips--;
- break;
- }
-
- list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
- route = list_entry(tmp, struct ksock_route, ksnr_list);
-
- if (route->ksnr_myipaddr != ipaddr)
- continue;
-
- if (route->ksnr_share_count) {
- /* Manually created; keep, but unbind */
- route->ksnr_myipaddr = 0;
- } else {
- ksocknal_del_route_locked(route);
- }
- }
-
- list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
- conn = list_entry(tmp, struct ksock_conn, ksnc_list);
-
- if (conn->ksnc_myipaddr == ipaddr)
- ksocknal_close_conn_locked(conn, 0);
- }
-}
-
-static int
-ksocknal_del_interface(struct lnet_ni *ni, __u32 ipaddress)
-{
- struct ksock_net *net = ni->ni_data;
- int rc = -ENOENT;
- struct list_head *tmp;
- struct list_head *nxt;
- struct ksock_peer *peer;
- __u32 this_ip;
- int i;
- int j;
-
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
- for (i = 0; i < net->ksnn_ninterfaces; i++) {
- this_ip = net->ksnn_interfaces[i].ksni_ipaddr;
-
- if (!(!ipaddress || ipaddress == this_ip))
- continue;
-
- rc = 0;
-
- for (j = i + 1; j < net->ksnn_ninterfaces; j++)
- net->ksnn_interfaces[j - 1] =
- net->ksnn_interfaces[j];
-
- net->ksnn_ninterfaces--;
-
- for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) {
- list_for_each_safe(tmp, nxt,
- &ksocknal_data.ksnd_peers[j]) {
- peer = list_entry(tmp, struct ksock_peer, ksnp_list);
-
- if (peer->ksnp_ni != ni)
- continue;
-
- ksocknal_peer_del_interface_locked(peer, this_ip);
- }
- }
- }
-
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
- return rc;
-}
-
-int
-ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg)
-{
- struct lnet_process_id id = {0};
- struct libcfs_ioctl_data *data = arg;
- int rc;
-
- switch (cmd) {
- case IOC_LIBCFS_GET_INTERFACE: {
- struct ksock_net *net = ni->ni_data;
- struct ksock_interface *iface;
-
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- if (data->ioc_count >= (__u32)net->ksnn_ninterfaces) {
- rc = -ENOENT;
- } else {
- rc = 0;
- iface = &net->ksnn_interfaces[data->ioc_count];
-
- data->ioc_u32[0] = iface->ksni_ipaddr;
- data->ioc_u32[1] = iface->ksni_netmask;
- data->ioc_u32[2] = iface->ksni_npeers;
- data->ioc_u32[3] = iface->ksni_nroutes;
- }
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
- return rc;
- }
-
- case IOC_LIBCFS_ADD_INTERFACE:
- return ksocknal_add_interface(ni,
- data->ioc_u32[0], /* IP address */
- data->ioc_u32[1]); /* net mask */
-
- case IOC_LIBCFS_DEL_INTERFACE:
- return ksocknal_del_interface(ni,
- data->ioc_u32[0]); /* IP address */
-
- case IOC_LIBCFS_GET_PEER: {
- __u32 myip = 0;
- __u32 ip = 0;
- int port = 0;
- int conn_count = 0;
- int share_count = 0;
-
- rc = ksocknal_get_peer_info(ni, data->ioc_count,
- &id, &myip, &ip, &port,
- &conn_count, &share_count);
- if (rc)
- return rc;
-
- data->ioc_nid = id.nid;
- data->ioc_count = share_count;
- data->ioc_u32[0] = ip;
- data->ioc_u32[1] = port;
- data->ioc_u32[2] = myip;
- data->ioc_u32[3] = conn_count;
- data->ioc_u32[4] = id.pid;
- return 0;
- }
-
- case IOC_LIBCFS_ADD_PEER:
- id.nid = data->ioc_nid;
- id.pid = LNET_PID_LUSTRE;
- return ksocknal_add_peer(ni, id,
- data->ioc_u32[0], /* IP */
- data->ioc_u32[1]); /* port */
-
- case IOC_LIBCFS_DEL_PEER:
- id.nid = data->ioc_nid;
- id.pid = LNET_PID_ANY;
- return ksocknal_del_peer(ni, id,
- data->ioc_u32[0]); /* IP */
-
- case IOC_LIBCFS_GET_CONN: {
- int txmem;
- int rxmem;
- int nagle;
- struct ksock_conn *conn;
-
- conn = ksocknal_get_conn_by_idx(ni, data->ioc_count);
- if (!conn)
- return -ENOENT;
-
- ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle);
-
- data->ioc_count = txmem;
- data->ioc_nid = conn->ksnc_peer->ksnp_id.nid;
- data->ioc_flags = nagle;
- data->ioc_u32[0] = conn->ksnc_ipaddr;
- data->ioc_u32[1] = conn->ksnc_port;
- data->ioc_u32[2] = conn->ksnc_myipaddr;
- data->ioc_u32[3] = conn->ksnc_type;
- data->ioc_u32[4] = conn->ksnc_scheduler->kss_info->ksi_cpt;
- data->ioc_u32[5] = rxmem;
- data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid;
- ksocknal_conn_decref(conn);
- return 0;
- }
-
- case IOC_LIBCFS_CLOSE_CONNECTION:
- id.nid = data->ioc_nid;
- id.pid = LNET_PID_ANY;
- return ksocknal_close_matching_conns(id,
- data->ioc_u32[0]);
-
- case IOC_LIBCFS_REGISTER_MYNID:
- /* Ignore if this is a noop */
- if (data->ioc_nid == ni->ni_nid)
- return 0;
-
- CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
- libcfs_nid2str(data->ioc_nid),
- libcfs_nid2str(ni->ni_nid));
- return -EINVAL;
-
- case IOC_LIBCFS_PUSH_CONNECTION:
- id.nid = data->ioc_nid;
- id.pid = LNET_PID_ANY;
- return ksocknal_push(ni, id);
-
- default:
- return -EINVAL;
- }
- /* not reached */
-}
-
-static void
-ksocknal_free_buffers(void)
-{
- LASSERT(!atomic_read(&ksocknal_data.ksnd_nactive_txs));
-
- if (ksocknal_data.ksnd_sched_info) {
- struct ksock_sched_info *info;
- int i;
-
- cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info)
- kfree(info->ksi_scheds);
- cfs_percpt_free(ksocknal_data.ksnd_sched_info);
- }
-
- kvfree(ksocknal_data.ksnd_peers);
-
- spin_lock(&ksocknal_data.ksnd_tx_lock);
-
- if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
- struct list_head zlist;
- struct ksock_tx *tx;
- struct ksock_tx *temp;
-
- list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs);
- list_del_init(&ksocknal_data.ksnd_idle_noop_txs);
- spin_unlock(&ksocknal_data.ksnd_tx_lock);
-
- list_for_each_entry_safe(tx, temp, &zlist, tx_list) {
- list_del(&tx->tx_list);
- kfree(tx);
- }
- } else {
- spin_unlock(&ksocknal_data.ksnd_tx_lock);
- }
-}
-
-static void
-ksocknal_base_shutdown(void)
-{
- struct ksock_sched_info *info;
- struct ksock_sched *sched;
- int i;
- int j;
-
- LASSERT(!ksocknal_data.ksnd_nnets);
-
- switch (ksocknal_data.ksnd_init) {
- default:
- LASSERT(0);
- /* fall through */
- case SOCKNAL_INIT_ALL:
- case SOCKNAL_INIT_DATA:
- LASSERT(ksocknal_data.ksnd_peers);
- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
- LASSERT(list_empty(&ksocknal_data.ksnd_peers[i]));
-
- LASSERT(list_empty(&ksocknal_data.ksnd_nets));
- LASSERT(list_empty(&ksocknal_data.ksnd_enomem_conns));
- LASSERT(list_empty(&ksocknal_data.ksnd_zombie_conns));
- LASSERT(list_empty(&ksocknal_data.ksnd_connd_connreqs));
- LASSERT(list_empty(&ksocknal_data.ksnd_connd_routes));
-
- if (ksocknal_data.ksnd_sched_info) {
- cfs_percpt_for_each(info, i,
- ksocknal_data.ksnd_sched_info) {
- if (!info->ksi_scheds)
- continue;
-
- for (j = 0; j < info->ksi_nthreads_max; j++) {
- sched = &info->ksi_scheds[j];
- LASSERT(list_empty(
- &sched->kss_tx_conns));
- LASSERT(list_empty(
- &sched->kss_rx_conns));
- LASSERT(list_empty(
- &sched->kss_zombie_noop_txs));
- LASSERT(!sched->kss_nconns);
- }
- }
- }
-
- /* flag threads to terminate; wake and wait for them to die */
- ksocknal_data.ksnd_shuttingdown = 1;
- wake_up_all(&ksocknal_data.ksnd_connd_waitq);
- wake_up_all(&ksocknal_data.ksnd_reaper_waitq);
-
- if (ksocknal_data.ksnd_sched_info) {
- cfs_percpt_for_each(info, i,
- ksocknal_data.ksnd_sched_info) {
- if (!info->ksi_scheds)
- continue;
-
- for (j = 0; j < info->ksi_nthreads_max; j++) {
- sched = &info->ksi_scheds[j];
- wake_up_all(&sched->kss_waitq);
- }
- }
- }
-
- i = 4;
- read_lock(&ksocknal_data.ksnd_global_lock);
- while (ksocknal_data.ksnd_nthreads) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "waiting for %d threads to terminate\n",
- ksocknal_data.ksnd_nthreads);
- read_unlock(&ksocknal_data.ksnd_global_lock);
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
- read_lock(&ksocknal_data.ksnd_global_lock);
- }
- read_unlock(&ksocknal_data.ksnd_global_lock);
-
- ksocknal_free_buffers();
-
- ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING;
- break;
- }
-
- module_put(THIS_MODULE);
-}
-
-static __u64
-ksocknal_new_incarnation(void)
-{
- /* The incarnation number is the time this module loaded and it
- * identifies this particular instance of the socknal.
- */
- return ktime_get_ns();
-}
-
-static int
-ksocknal_base_startup(void)
-{
- struct ksock_sched_info *info;
- int rc;
- int i;
-
- LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
- LASSERT(!ksocknal_data.ksnd_nnets);
-
- memset(&ksocknal_data, 0, sizeof(ksocknal_data)); /* zero pointers */
-
- ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE;
- ksocknal_data.ksnd_peers = kvmalloc_array(ksocknal_data.ksnd_peer_hash_size,
- sizeof(struct list_head),
- GFP_KERNEL);
- if (!ksocknal_data.ksnd_peers)
- return -ENOMEM;
-
- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
- INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
-
- rwlock_init(&ksocknal_data.ksnd_global_lock);
- INIT_LIST_HEAD(&ksocknal_data.ksnd_nets);
-
- spin_lock_init(&ksocknal_data.ksnd_reaper_lock);
- INIT_LIST_HEAD(&ksocknal_data.ksnd_enomem_conns);
- INIT_LIST_HEAD(&ksocknal_data.ksnd_zombie_conns);
- INIT_LIST_HEAD(&ksocknal_data.ksnd_deathrow_conns);
- init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq);
-
- spin_lock_init(&ksocknal_data.ksnd_connd_lock);
- INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_connreqs);
- INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_routes);
- init_waitqueue_head(&ksocknal_data.ksnd_connd_waitq);
-
- spin_lock_init(&ksocknal_data.ksnd_tx_lock);
- INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_noop_txs);
-
- /* NB memset above zeros whole of ksocknal_data */
-
- /* flag lists/ptrs/locks initialised */
- ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
- try_module_get(THIS_MODULE);
-
- ksocknal_data.ksnd_sched_info = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(*info));
- if (!ksocknal_data.ksnd_sched_info)
- goto failed;
-
- cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) {
- struct ksock_sched *sched;
- int nthrs;
-
- nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
- if (*ksocknal_tunables.ksnd_nscheds > 0) {
- nthrs = min(nthrs, *ksocknal_tunables.ksnd_nscheds);
- } else {
- /*
- * max to half of CPUs, assume another half should be
- * reserved for upper layer modules
- */
- nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
- }
-
- info->ksi_nthreads_max = nthrs;
- info->ksi_cpt = i;
-
- info->ksi_scheds = kzalloc_cpt(info->ksi_nthreads_max * sizeof(*sched),
- GFP_NOFS, i);
- if (!info->ksi_scheds)
- goto failed;
-
- for (; nthrs > 0; nthrs--) {
- sched = &info->ksi_scheds[nthrs - 1];
-
- sched->kss_info = info;
- spin_lock_init(&sched->kss_lock);
- INIT_LIST_HEAD(&sched->kss_rx_conns);
- INIT_LIST_HEAD(&sched->kss_tx_conns);
- INIT_LIST_HEAD(&sched->kss_zombie_noop_txs);
- init_waitqueue_head(&sched->kss_waitq);
- }
- }
-
- ksocknal_data.ksnd_connd_starting = 0;
- ksocknal_data.ksnd_connd_failed_stamp = 0;
- ksocknal_data.ksnd_connd_starting_stamp = ktime_get_real_seconds();
- /*
- * must have at least 2 connds to remain responsive to accepts while
- * connecting
- */
- if (*ksocknal_tunables.ksnd_nconnds < SOCKNAL_CONND_RESV + 1)
- *ksocknal_tunables.ksnd_nconnds = SOCKNAL_CONND_RESV + 1;
-
- if (*ksocknal_tunables.ksnd_nconnds_max <
- *ksocknal_tunables.ksnd_nconnds) {
- ksocknal_tunables.ksnd_nconnds_max =
- ksocknal_tunables.ksnd_nconnds;
- }
-
- for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) {
- char name[16];
-
- spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
- ksocknal_data.ksnd_connd_starting++;
- spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
-
- snprintf(name, sizeof(name), "socknal_cd%02d", i);
- rc = ksocknal_thread_start(ksocknal_connd,
- (void *)((uintptr_t)i), name);
- if (rc) {
- spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
- ksocknal_data.ksnd_connd_starting--;
- spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
- CERROR("Can't spawn socknal connd: %d\n", rc);
- goto failed;
- }
- }
-
- rc = ksocknal_thread_start(ksocknal_reaper, NULL, "socknal_reaper");
- if (rc) {
- CERROR("Can't spawn socknal reaper: %d\n", rc);
- goto failed;
- }
-
- /* flag everything initialised */
- ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
-
- return 0;
-
- failed:
- ksocknal_base_shutdown();
- return -ENETDOWN;
-}
-
-static void
-ksocknal_debug_peerhash(struct lnet_ni *ni)
-{
- struct ksock_peer *peer = NULL;
- struct list_head *tmp;
- int i;
-
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
- list_for_each(tmp, &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry(tmp, struct ksock_peer, ksnp_list);
-
- if (peer->ksnp_ni == ni)
- break;
-
- peer = NULL;
- }
- }
-
- if (peer) {
- struct ksock_route *route;
- struct ksock_conn *conn;
-
- CWARN("Active peer on shutdown: %s, ref %d, scnt %d, closing %d, accepting %d, err %d, zcookie %llu, txq %d, zc_req %d\n",
- libcfs_id2str(peer->ksnp_id),
- atomic_read(&peer->ksnp_refcount),
- peer->ksnp_sharecount, peer->ksnp_closing,
- peer->ksnp_accepting, peer->ksnp_error,
- peer->ksnp_zc_next_cookie,
- !list_empty(&peer->ksnp_tx_queue),
- !list_empty(&peer->ksnp_zc_req_list));
-
- list_for_each(tmp, &peer->ksnp_routes) {
- route = list_entry(tmp, struct ksock_route, ksnr_list);
- CWARN("Route: ref %d, schd %d, conn %d, cnted %d, del %d\n",
- atomic_read(&route->ksnr_refcount),
- route->ksnr_scheduled, route->ksnr_connecting,
- route->ksnr_connected, route->ksnr_deleted);
- }
-
- list_for_each(tmp, &peer->ksnp_conns) {
- conn = list_entry(tmp, struct ksock_conn, ksnc_list);
- CWARN("Conn: ref %d, sref %d, t %d, c %d\n",
- atomic_read(&conn->ksnc_conn_refcount),
- atomic_read(&conn->ksnc_sock_refcount),
- conn->ksnc_type, conn->ksnc_closing);
- }
- }
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
-}
-
-void
-ksocknal_shutdown(struct lnet_ni *ni)
-{
- struct ksock_net *net = ni->ni_data;
- int i;
- struct lnet_process_id anyid = {0};
-
- anyid.nid = LNET_NID_ANY;
- anyid.pid = LNET_PID_ANY;
-
- LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL);
- LASSERT(ksocknal_data.ksnd_nnets > 0);
-
- spin_lock_bh(&net->ksnn_lock);
- net->ksnn_shutdown = 1; /* prevent new peers */
- spin_unlock_bh(&net->ksnn_lock);
-
- /* Delete all peers */
- ksocknal_del_peer(ni, anyid, 0);
-
- /* Wait for all peer state to clean up */
- i = 2;
- spin_lock_bh(&net->ksnn_lock);
- while (net->ksnn_npeers) {
- spin_unlock_bh(&net->ksnn_lock);
-
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "waiting for %d peers to disconnect\n",
- net->ksnn_npeers);
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
-
- ksocknal_debug_peerhash(ni);
-
- spin_lock_bh(&net->ksnn_lock);
- }
- spin_unlock_bh(&net->ksnn_lock);
-
- for (i = 0; i < net->ksnn_ninterfaces; i++) {
- LASSERT(!net->ksnn_interfaces[i].ksni_npeers);
- LASSERT(!net->ksnn_interfaces[i].ksni_nroutes);
- }
-
- list_del(&net->ksnn_list);
- kfree(net);
-
- ksocknal_data.ksnd_nnets--;
- if (!ksocknal_data.ksnd_nnets)
- ksocknal_base_shutdown();
-}
-
-static int
-ksocknal_enumerate_interfaces(struct ksock_net *net)
-{
- char **names;
- int i;
- int j;
- int rc;
- int n;
-
- n = lnet_ipif_enumerate(&names);
- if (n <= 0) {
- CERROR("Can't enumerate interfaces: %d\n", n);
- return n;
- }
-
- for (i = j = 0; i < n; i++) {
- int up;
- __u32 ip;
- __u32 mask;
-
- if (!strcmp(names[i], "lo")) /* skip the loopback IF */
- continue;
-
- rc = lnet_ipif_query(names[i], &up, &ip, &mask);
- if (rc) {
- CWARN("Can't get interface %s info: %d\n",
- names[i], rc);
- continue;
- }
-
- if (!up) {
- CWARN("Ignoring interface %s (down)\n",
- names[i]);
- continue;
- }
-
- if (j == LNET_MAX_INTERFACES) {
- CWARN("Ignoring interface %s (too many interfaces)\n",
- names[i]);
- continue;
- }
-
- net->ksnn_interfaces[j].ksni_ipaddr = ip;
- net->ksnn_interfaces[j].ksni_netmask = mask;
- strlcpy(net->ksnn_interfaces[j].ksni_name,
- names[i], sizeof(net->ksnn_interfaces[j].ksni_name));
- j++;
- }
-
- lnet_ipif_free_enumeration(names, n);
-
- if (!j)
- CERROR("Can't find any usable interfaces\n");
-
- return j;
-}
-
-static int
-ksocknal_search_new_ipif(struct ksock_net *net)
-{
- int new_ipif = 0;
- int i;
-
- for (i = 0; i < net->ksnn_ninterfaces; i++) {
- char *ifnam = &net->ksnn_interfaces[i].ksni_name[0];
- char *colon = strchr(ifnam, ':');
- int found = 0;
- struct ksock_net *tmp;
- int j;
-
- if (colon) /* ignore alias device */
- *colon = 0;
-
- list_for_each_entry(tmp, &ksocknal_data.ksnd_nets, ksnn_list) {
- for (j = 0; !found && j < tmp->ksnn_ninterfaces; j++) {
- char *ifnam2 =
- &tmp->ksnn_interfaces[j].ksni_name[0];
- char *colon2 = strchr(ifnam2, ':');
-
- if (colon2)
- *colon2 = 0;
-
- found = !strcmp(ifnam, ifnam2);
- if (colon2)
- *colon2 = ':';
- }
- if (found)
- break;
- }
-
- new_ipif += !found;
- if (colon)
- *colon = ':';
- }
-
- return new_ipif;
-}
-
-static int
-ksocknal_start_schedulers(struct ksock_sched_info *info)
-{
- int nthrs;
- int rc = 0;
- int i;
-
- if (!info->ksi_nthreads) {
- if (*ksocknal_tunables.ksnd_nscheds > 0) {
- nthrs = info->ksi_nthreads_max;
- } else {
- nthrs = cfs_cpt_weight(lnet_cpt_table(),
- info->ksi_cpt);
- nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
- nthrs = min(SOCKNAL_NSCHEDS_HIGH, nthrs);
- }
- nthrs = min(nthrs, info->ksi_nthreads_max);
- } else {
- LASSERT(info->ksi_nthreads <= info->ksi_nthreads_max);
- /* increase two threads if there is new interface */
- nthrs = min(2, info->ksi_nthreads_max - info->ksi_nthreads);
- }
-
- for (i = 0; i < nthrs; i++) {
- long id;
- char name[20];
- struct ksock_sched *sched;
-
- id = KSOCK_THREAD_ID(info->ksi_cpt, info->ksi_nthreads + i);
- sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)];
- snprintf(name, sizeof(name), "socknal_sd%02d_%02d",
- info->ksi_cpt, (int)(sched - &info->ksi_scheds[0]));
-
- rc = ksocknal_thread_start(ksocknal_scheduler,
- (void *)id, name);
- if (!rc)
- continue;
-
- CERROR("Can't spawn thread %d for scheduler[%d]: %d\n",
- info->ksi_cpt, info->ksi_nthreads + i, rc);
- break;
- }
-
- info->ksi_nthreads += i;
- return rc;
-}
-
-static int
-ksocknal_net_start_threads(struct ksock_net *net, __u32 *cpts, int ncpts)
-{
- int newif = ksocknal_search_new_ipif(net);
- int rc;
- int i;
-
- LASSERT(ncpts > 0 && ncpts <= cfs_cpt_number(lnet_cpt_table()));
-
- for (i = 0; i < ncpts; i++) {
- struct ksock_sched_info *info;
- int cpt = !cpts ? i : cpts[i];
-
- LASSERT(cpt < cfs_cpt_number(lnet_cpt_table()));
- info = ksocknal_data.ksnd_sched_info[cpt];
-
- if (!newif && info->ksi_nthreads > 0)
- continue;
-
- rc = ksocknal_start_schedulers(info);
- if (rc)
- return rc;
- }
- return 0;
-}
-
-int
-ksocknal_startup(struct lnet_ni *ni)
-{
- struct ksock_net *net;
- int rc;
- int i;
-
- LASSERT(ni->ni_lnd == &the_ksocklnd);
-
- if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) {
- rc = ksocknal_base_startup();
- if (rc)
- return rc;
- }
-
- net = kzalloc(sizeof(*net), GFP_NOFS);
- if (!net)
- goto fail_0;
-
- spin_lock_init(&net->ksnn_lock);
- net->ksnn_incarnation = ksocknal_new_incarnation();
- ni->ni_data = net;
- ni->ni_peertimeout = *ksocknal_tunables.ksnd_peertimeout;
- ni->ni_maxtxcredits = *ksocknal_tunables.ksnd_credits;
- ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peertxcredits;
- ni->ni_peerrtrcredits = *ksocknal_tunables.ksnd_peerrtrcredits;
-
- if (!ni->ni_interfaces[0]) {
- rc = ksocknal_enumerate_interfaces(net);
- if (rc <= 0)
- goto fail_1;
-
- net->ksnn_ninterfaces = 1;
- } else {
- for (i = 0; i < LNET_MAX_INTERFACES; i++) {
- int up;
-
- if (!ni->ni_interfaces[i])
- break;
-
- rc = lnet_ipif_query(ni->ni_interfaces[i], &up,
- &net->ksnn_interfaces[i].ksni_ipaddr,
- &net->ksnn_interfaces[i].ksni_netmask);
-
- if (rc) {
- CERROR("Can't get interface %s info: %d\n",
- ni->ni_interfaces[i], rc);
- goto fail_1;
- }
-
- if (!up) {
- CERROR("Interface %s is down\n",
- ni->ni_interfaces[i]);
- goto fail_1;
- }
-
- strlcpy(net->ksnn_interfaces[i].ksni_name,
- ni->ni_interfaces[i],
- sizeof(net->ksnn_interfaces[i].ksni_name));
- }
- net->ksnn_ninterfaces = i;
- }
-
- /* call it before add it to ksocknal_data.ksnd_nets */
- rc = ksocknal_net_start_threads(net, ni->ni_cpts, ni->ni_ncpts);
- if (rc)
- goto fail_1;
-
- ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
- net->ksnn_interfaces[0].ksni_ipaddr);
- list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets);
-
- ksocknal_data.ksnd_nnets++;
-
- return 0;
-
- fail_1:
- kfree(net);
- fail_0:
- if (!ksocknal_data.ksnd_nnets)
- ksocknal_base_shutdown();
-
- return -ENETDOWN;
-}
-
-static void __exit ksocklnd_exit(void)
-{
- lnet_unregister_lnd(&the_ksocklnd);
-}
-
-static int __init ksocklnd_init(void)
-{
- int rc;
-
- /* check ksnr_connected/connecting field large enough */
- BUILD_BUG_ON(SOCKLND_CONN_NTYPES > 4);
- BUILD_BUG_ON(SOCKLND_CONN_ACK != SOCKLND_CONN_BULK_IN);
-
- /* initialize the_ksocklnd */
- the_ksocklnd.lnd_type = SOCKLND;
- the_ksocklnd.lnd_startup = ksocknal_startup;
- the_ksocklnd.lnd_shutdown = ksocknal_shutdown;
- the_ksocklnd.lnd_ctl = ksocknal_ctl;
- the_ksocklnd.lnd_send = ksocknal_send;
- the_ksocklnd.lnd_recv = ksocknal_recv;
- the_ksocklnd.lnd_notify = ksocknal_notify;
- the_ksocklnd.lnd_query = ksocknal_query;
- the_ksocklnd.lnd_accept = ksocknal_accept;
-
- rc = ksocknal_tunables_init();
- if (rc)
- return rc;
-
- rc = libcfs_setup();
- if (rc)
- return rc;
-
- lnet_register_lnd(&the_ksocklnd);
-
- return 0;
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("TCP Socket LNet Network Driver");
-MODULE_VERSION("2.7.0");
-MODULE_LICENSE("GPL");
-
-module_init(ksocklnd_init);
-module_exit(ksocklnd_exit);
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
deleted file mode 100644
index 4e5c89a692a3..000000000000
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
+++ /dev/null
@@ -1,704 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- *
- * Author: Zach Brown <zab@zabbo.net>
- * Author: Peter J. Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Lustre, http://www.lustre.org
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef _SOCKLND_SOCKLND_H_
-#define _SOCKLND_SOCKLND_H_
-
-#define DEBUG_PORTAL_ALLOC
-#define DEBUG_SUBSYSTEM S_LND
-
-#include <linux/crc32.h>
-#include <linux/errno.h>
-#include <linux/if.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/kmod.h>
-#include <linux/list.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/stat.h>
-#include <linux/string.h>
-#include <linux/syscalls.h>
-#include <linux/sysctl.h>
-#include <linux/uio.h>
-#include <linux/unistd.h>
-#include <asm/irq.h>
-#include <net/sock.h>
-#include <net/tcp.h>
-
-#include <linux/lnet/lib-lnet.h>
-#include <linux/lnet/socklnd.h>
-
-/* assume one thread for each connection type */
-#define SOCKNAL_NSCHEDS 3
-#define SOCKNAL_NSCHEDS_HIGH (SOCKNAL_NSCHEDS << 1)
-
-#define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */
-#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */
-#define SOCKNAL_INSANITY_RECONN 5000 /* connd is trying on reconn infinitely */
-#define SOCKNAL_ENOMEM_RETRY 1 /* jiffies between retries */
-
-#define SOCKNAL_SINGLE_FRAG_TX 0 /* disable multi-fragment sends */
-#define SOCKNAL_SINGLE_FRAG_RX 0 /* disable multi-fragment receives */
-
-#define SOCKNAL_VERSION_DEBUG 0 /* enable protocol version debugging */
-
-/*
- * risk kmap deadlock on multi-frag I/O (backs off to single-frag if disabled).
- * no risk if we're not running on a CONFIG_HIGHMEM platform.
- */
-#ifdef CONFIG_HIGHMEM
-# define SOCKNAL_RISK_KMAP_DEADLOCK 0
-#else
-# define SOCKNAL_RISK_KMAP_DEADLOCK 1
-#endif
-
-struct ksock_sched_info;
-
-struct ksock_sched { /* per scheduler state */
- spinlock_t kss_lock; /* serialise */
- struct list_head kss_rx_conns; /* conn waiting to be read */
- struct list_head kss_tx_conns; /* conn waiting to be written */
- struct list_head kss_zombie_noop_txs; /* zombie noop tx list */
- wait_queue_head_t kss_waitq; /* where scheduler sleeps */
- int kss_nconns; /* # connections assigned to
- * this scheduler
- */
- struct ksock_sched_info *kss_info; /* owner of it */
-};
-
-struct ksock_sched_info {
- int ksi_nthreads_max; /* max allowed threads */
- int ksi_nthreads; /* number of threads */
- int ksi_cpt; /* CPT id */
- struct ksock_sched *ksi_scheds; /* array of schedulers */
-};
-
-#define KSOCK_CPT_SHIFT 16
-#define KSOCK_THREAD_ID(cpt, sid) (((cpt) << KSOCK_CPT_SHIFT) | (sid))
-#define KSOCK_THREAD_CPT(id) ((id) >> KSOCK_CPT_SHIFT)
-#define KSOCK_THREAD_SID(id) ((id) & ((1UL << KSOCK_CPT_SHIFT) - 1))
-
-struct ksock_interface { /* in-use interface */
- __u32 ksni_ipaddr; /* interface's IP address */
- __u32 ksni_netmask; /* interface's network mask */
- int ksni_nroutes; /* # routes using (active) */
- int ksni_npeers; /* # peers using (passive) */
- char ksni_name[IFNAMSIZ]; /* interface name */
-};
-
-struct ksock_tunables {
- int *ksnd_timeout; /* "stuck" socket timeout
- * (seconds)
- */
- int *ksnd_nscheds; /* # scheduler threads in each
- * pool while starting
- */
- int *ksnd_nconnds; /* # connection daemons */
- int *ksnd_nconnds_max; /* max # connection daemons */
- int *ksnd_min_reconnectms; /* first connection retry after
- * (ms)...
- */
- int *ksnd_max_reconnectms; /* ...exponentially increasing to
- * this
- */
- int *ksnd_eager_ack; /* make TCP ack eagerly? */
- int *ksnd_typed_conns; /* drive sockets by type? */
- int *ksnd_min_bulk; /* smallest "large" message */
- int *ksnd_tx_buffer_size; /* socket tx buffer size */
- int *ksnd_rx_buffer_size; /* socket rx buffer size */
- int *ksnd_nagle; /* enable NAGLE? */
- int *ksnd_round_robin; /* round robin for multiple
- * interfaces
- */
- int *ksnd_keepalive; /* # secs for sending keepalive
- * NOOP
- */
- int *ksnd_keepalive_idle; /* # idle secs before 1st probe
- */
- int *ksnd_keepalive_count; /* # probes */
- int *ksnd_keepalive_intvl; /* time between probes */
- int *ksnd_credits; /* # concurrent sends */
- int *ksnd_peertxcredits; /* # concurrent sends to 1 peer
- */
- int *ksnd_peerrtrcredits; /* # per-peer router buffer
- * credits
- */
- int *ksnd_peertimeout; /* seconds to consider peer dead
- */
- int *ksnd_enable_csum; /* enable check sum */
- int *ksnd_inject_csum_error; /* set non-zero to inject
- * checksum error
- */
- int *ksnd_nonblk_zcack; /* always send zc-ack on
- * non-blocking connection
- */
- unsigned int *ksnd_zc_min_payload; /* minimum zero copy payload
- * size
- */
- int *ksnd_zc_recv; /* enable ZC receive (for
- * Chelsio TOE)
- */
- int *ksnd_zc_recv_min_nfrags; /* minimum # of fragments to
- * enable ZC receive
- */
-};
-
-struct ksock_net {
- __u64 ksnn_incarnation; /* my epoch */
- spinlock_t ksnn_lock; /* serialise */
- struct list_head ksnn_list; /* chain on global list */
- int ksnn_npeers; /* # peers */
- int ksnn_shutdown; /* shutting down? */
- int ksnn_ninterfaces; /* IP interfaces */
- struct ksock_interface ksnn_interfaces[LNET_MAX_INTERFACES];
-};
-
-/** connd timeout */
-#define SOCKNAL_CONND_TIMEOUT 120
-/** reserved thread for accepting & creating new connd */
-#define SOCKNAL_CONND_RESV 1
-
-struct ksock_nal_data {
- int ksnd_init; /* initialisation state
- */
- int ksnd_nnets; /* # networks set up */
- struct list_head ksnd_nets; /* list of nets */
- rwlock_t ksnd_global_lock; /* stabilize peer/conn
- * ops
- */
- struct list_head *ksnd_peers; /* hash table of all my
- * known peers
- */
- int ksnd_peer_hash_size; /* size of ksnd_peers */
-
- int ksnd_nthreads; /* # live threads */
- int ksnd_shuttingdown; /* tell threads to exit
- */
- struct ksock_sched_info **ksnd_sched_info; /* schedulers info */
-
- atomic_t ksnd_nactive_txs; /* #active txs */
-
- struct list_head ksnd_deathrow_conns; /* conns to close:
- * reaper_lock
- */
- struct list_head ksnd_zombie_conns; /* conns to free:
- * reaper_lock
- */
- struct list_head ksnd_enomem_conns; /* conns to retry:
- * reaper_lock
- */
- wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */
- unsigned long ksnd_reaper_waketime; /* when reaper will wake
- */
- spinlock_t ksnd_reaper_lock; /* serialise */
-
- int ksnd_enomem_tx; /* test ENOMEM sender */
- int ksnd_stall_tx; /* test sluggish sender
- */
- int ksnd_stall_rx; /* test sluggish
- * receiver
- */
- struct list_head ksnd_connd_connreqs; /* incoming connection
- * requests
- */
- struct list_head ksnd_connd_routes; /* routes waiting to be
- * connected
- */
- wait_queue_head_t ksnd_connd_waitq; /* connds sleep here */
- int ksnd_connd_connecting; /* # connds connecting
- */
- time64_t ksnd_connd_failed_stamp;/* time stamp of the
- * last failed
- * connecting attempt
- */
- time64_t ksnd_connd_starting_stamp;/* time stamp of the
- * last starting connd
- */
- unsigned int ksnd_connd_starting; /* # starting connd */
- unsigned int ksnd_connd_running; /* # running connd */
- spinlock_t ksnd_connd_lock; /* serialise */
-
- struct list_head ksnd_idle_noop_txs; /* list head for freed
- * noop tx
- */
- spinlock_t ksnd_tx_lock; /* serialise, g_lock
- * unsafe
- */
-};
-
-#define SOCKNAL_INIT_NOTHING 0
-#define SOCKNAL_INIT_DATA 1
-#define SOCKNAL_INIT_ALL 2
-
-/*
- * A packet just assembled for transmission is represented by 1 or more
- * struct iovec fragments (the first frag contains the portals header),
- * followed by 0 or more struct bio_vec fragments.
- *
- * On the receive side, initially 1 struct iovec fragment is posted for
- * receive (the header). Once the header has been received, the payload is
- * received into either struct iovec or struct bio_vec fragments, depending on
- * what the header matched or whether the message needs forwarding.
- */
-struct ksock_conn; /* forward ref */
-struct ksock_peer; /* forward ref */
-struct ksock_route; /* forward ref */
-struct ksock_proto; /* forward ref */
-
-struct ksock_tx { /* transmit packet */
- struct list_head tx_list; /* queue on conn for transmission etc
- */
- struct list_head tx_zc_list; /* queue on peer for ZC request */
- atomic_t tx_refcount; /* tx reference count */
- int tx_nob; /* # packet bytes */
- int tx_resid; /* residual bytes */
- int tx_niov; /* # packet iovec frags */
- struct kvec *tx_iov; /* packet iovec frags */
- int tx_nkiov; /* # packet page frags */
- unsigned short tx_zc_aborted; /* aborted ZC request */
- unsigned short tx_zc_capable:1; /* payload is large enough for ZC */
- unsigned short tx_zc_checked:1; /* Have I checked if I should ZC? */
- unsigned short tx_nonblk:1; /* it's a non-blocking ACK */
- struct bio_vec *tx_kiov; /* packet page frags */
- struct ksock_conn *tx_conn; /* owning conn */
- struct lnet_msg *tx_lnetmsg; /* lnet message for lnet_finalize()
- */
- unsigned long tx_deadline; /* when (in jiffies) tx times out */
- struct ksock_msg tx_msg; /* socklnd message buffer */
- int tx_desc_size; /* size of this descriptor */
- union {
- struct {
- struct kvec iov; /* virt hdr */
- struct bio_vec kiov[0]; /* paged payload */
- } paged;
- struct {
- struct kvec iov[1]; /* virt hdr + payload */
- } virt;
- } tx_frags;
-};
-
-#define KSOCK_NOOP_TX_SIZE (offsetof(struct ksock_tx, tx_frags.paged.kiov[0]))
-
-/* network zero copy callback descriptor embedded in struct ksock_tx */
-
-#define SOCKNAL_RX_KSM_HEADER 1 /* reading ksock message header */
-#define SOCKNAL_RX_LNET_HEADER 2 /* reading lnet message header */
-#define SOCKNAL_RX_PARSE 3 /* Calling lnet_parse() */
-#define SOCKNAL_RX_PARSE_WAIT 4 /* waiting to be told to read the body */
-#define SOCKNAL_RX_LNET_PAYLOAD 5 /* reading lnet payload (to deliver here) */
-#define SOCKNAL_RX_SLOP 6 /* skipping body */
-
-struct ksock_conn {
- struct ksock_peer *ksnc_peer; /* owning peer */
- struct ksock_route *ksnc_route; /* owning route */
- struct list_head ksnc_list; /* stash on peer's conn list */
- struct socket *ksnc_sock; /* actual socket */
- void *ksnc_saved_data_ready; /* socket's original
- * data_ready() callback
- */
- void *ksnc_saved_write_space; /* socket's original
- * write_space() callback
- */
- atomic_t ksnc_conn_refcount;/* conn refcount */
- atomic_t ksnc_sock_refcount;/* sock refcount */
- struct ksock_sched *ksnc_scheduler; /* who schedules this connection
- */
- __u32 ksnc_myipaddr; /* my IP */
- __u32 ksnc_ipaddr; /* peer's IP */
- int ksnc_port; /* peer's port */
- signed int ksnc_type:3; /* type of connection, should be
- * signed value
- */
- unsigned int ksnc_closing:1; /* being shut down */
- unsigned int ksnc_flip:1; /* flip or not, only for V2.x */
- unsigned int ksnc_zc_capable:1; /* enable to ZC */
- struct ksock_proto *ksnc_proto; /* protocol for the connection */
-
- /* reader */
- struct list_head ksnc_rx_list; /* where I enq waiting input or a
- * forwarding descriptor
- */
- unsigned long ksnc_rx_deadline; /* when (in jiffies) receive times
- * out
- */
- __u8 ksnc_rx_started; /* started receiving a message */
- __u8 ksnc_rx_ready; /* data ready to read */
- __u8 ksnc_rx_scheduled; /* being progressed */
- __u8 ksnc_rx_state; /* what is being read */
- int ksnc_rx_nob_left; /* # bytes to next hdr/body */
- struct iov_iter ksnc_rx_to; /* copy destination */
- struct kvec ksnc_rx_iov_space[LNET_MAX_IOV]; /* space for frag descriptors */
- __u32 ksnc_rx_csum; /* partial checksum for incoming
- * data
- */
- void *ksnc_cookie; /* rx lnet_finalize passthru arg
- */
- struct ksock_msg ksnc_msg; /* incoming message buffer:
- * V2.x message takes the
- * whole struct
- * V1.x message is a bare
- * struct lnet_hdr, it's stored in
- * ksnc_msg.ksm_u.lnetmsg
- */
- /* WRITER */
- struct list_head ksnc_tx_list; /* where I enq waiting for output
- * space
- */
- struct list_head ksnc_tx_queue; /* packets waiting to be sent */
- struct ksock_tx *ksnc_tx_carrier; /* next TX that can carry a LNet
- * message or ZC-ACK
- */
- unsigned long ksnc_tx_deadline; /* when (in jiffies) tx times out
- */
- int ksnc_tx_bufnob; /* send buffer marker */
- atomic_t ksnc_tx_nob; /* # bytes queued */
- int ksnc_tx_ready; /* write space */
- int ksnc_tx_scheduled; /* being progressed */
- unsigned long ksnc_tx_last_post; /* time stamp of the last posted
- * TX
- */
-};
-
-struct ksock_route {
- struct list_head ksnr_list; /* chain on peer route list */
- struct list_head ksnr_connd_list; /* chain on ksnr_connd_routes */
- struct ksock_peer *ksnr_peer; /* owning peer */
- atomic_t ksnr_refcount; /* # users */
- unsigned long ksnr_timeout; /* when (in jiffies) reconnection
- * can happen next
- */
- long ksnr_retry_interval; /* how long between retries */
- __u32 ksnr_myipaddr; /* my IP */
- __u32 ksnr_ipaddr; /* IP address to connect to */
- int ksnr_port; /* port to connect to */
- unsigned int ksnr_scheduled:1; /* scheduled for attention */
- unsigned int ksnr_connecting:1; /* connection establishment in
- * progress
- */
- unsigned int ksnr_connected:4; /* connections established by
- * type
- */
- unsigned int ksnr_deleted:1; /* been removed from peer? */
- unsigned int ksnr_share_count; /* created explicitly? */
- int ksnr_conn_count; /* # conns established by this
- * route
- */
-};
-
-#define SOCKNAL_KEEPALIVE_PING 1 /* cookie for keepalive ping */
-
-struct ksock_peer {
- struct list_head ksnp_list; /* stash on global peer list */
- unsigned long ksnp_last_alive; /* when (in jiffies) I was last
- * alive
- */
- struct lnet_process_id ksnp_id; /* who's on the other end(s) */
- atomic_t ksnp_refcount; /* # users */
- int ksnp_sharecount; /* lconf usage counter */
- int ksnp_closing; /* being closed */
- int ksnp_accepting; /* # passive connections pending
- */
- int ksnp_error; /* errno on closing last conn */
- __u64 ksnp_zc_next_cookie; /* ZC completion cookie */
- __u64 ksnp_incarnation; /* latest known peer incarnation
- */
- struct ksock_proto *ksnp_proto; /* latest known peer protocol */
- struct list_head ksnp_conns; /* all active connections */
- struct list_head ksnp_routes; /* routes */
- struct list_head ksnp_tx_queue; /* waiting packets */
- spinlock_t ksnp_lock; /* serialize, g_lock unsafe */
- struct list_head ksnp_zc_req_list; /* zero copy requests wait for
- * ACK
- */
- unsigned long ksnp_send_keepalive; /* time to send keepalive */
- struct lnet_ni *ksnp_ni; /* which network */
- int ksnp_n_passive_ips; /* # of... */
-
- /* preferred local interfaces */
- __u32 ksnp_passive_ips[LNET_MAX_INTERFACES];
-};
-
-struct ksock_connreq {
- struct list_head ksncr_list; /* stash on ksnd_connd_connreqs */
- struct lnet_ni *ksncr_ni; /* chosen NI */
- struct socket *ksncr_sock; /* accepted socket */
-};
-
-extern struct ksock_nal_data ksocknal_data;
-extern struct ksock_tunables ksocknal_tunables;
-
-#define SOCKNAL_MATCH_NO 0 /* TX can't match type of connection */
-#define SOCKNAL_MATCH_YES 1 /* TX matches type of connection */
-#define SOCKNAL_MATCH_MAY 2 /* TX can be sent on the connection, but not
- * preferred
- */
-
-struct ksock_proto {
- /* version number of protocol */
- int pro_version;
-
- /* handshake function */
- int (*pro_send_hello)(struct ksock_conn *, struct ksock_hello_msg *);
-
- /* handshake function */
- int (*pro_recv_hello)(struct ksock_conn *, struct ksock_hello_msg *, int);
-
- /* message pack */
- void (*pro_pack)(struct ksock_tx *);
-
- /* message unpack */
- void (*pro_unpack)(struct ksock_msg *);
-
- /* queue tx on the connection */
- struct ksock_tx *(*pro_queue_tx_msg)(struct ksock_conn *, struct ksock_tx *);
-
- /* queue ZC ack on the connection */
- int (*pro_queue_tx_zcack)(struct ksock_conn *, struct ksock_tx *, __u64);
-
- /* handle ZC request */
- int (*pro_handle_zcreq)(struct ksock_conn *, __u64, int);
-
- /* handle ZC ACK */
- int (*pro_handle_zcack)(struct ksock_conn *, __u64, __u64);
-
- /*
- * msg type matches the connection type:
- * return value:
- * return MATCH_NO : no
- * return MATCH_YES : matching type
- * return MATCH_MAY : can be backup
- */
- int (*pro_match_tx)(struct ksock_conn *, struct ksock_tx *, int);
-};
-
-extern struct ksock_proto ksocknal_protocol_v1x;
-extern struct ksock_proto ksocknal_protocol_v2x;
-extern struct ksock_proto ksocknal_protocol_v3x;
-
-#define KSOCK_PROTO_V1_MAJOR LNET_PROTO_TCP_VERSION_MAJOR
-#define KSOCK_PROTO_V1_MINOR LNET_PROTO_TCP_VERSION_MINOR
-#define KSOCK_PROTO_V1 KSOCK_PROTO_V1_MAJOR
-
-#ifndef CPU_MASK_NONE
-#define CPU_MASK_NONE 0UL
-#endif
-
-static inline int
-ksocknal_route_mask(void)
-{
- if (!*ksocknal_tunables.ksnd_typed_conns)
- return (1 << SOCKLND_CONN_ANY);
-
- return ((1 << SOCKLND_CONN_CONTROL) |
- (1 << SOCKLND_CONN_BULK_IN) |
- (1 << SOCKLND_CONN_BULK_OUT));
-}
-
-static inline struct list_head *
-ksocknal_nid2peerlist(lnet_nid_t nid)
-{
- unsigned int hash = ((unsigned int)nid) % ksocknal_data.ksnd_peer_hash_size;
-
- return &ksocknal_data.ksnd_peers[hash];
-}
-
-static inline void
-ksocknal_conn_addref(struct ksock_conn *conn)
-{
- LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0);
- atomic_inc(&conn->ksnc_conn_refcount);
-}
-
-void ksocknal_queue_zombie_conn(struct ksock_conn *conn);
-void ksocknal_finalize_zcreq(struct ksock_conn *conn);
-
-static inline void
-ksocknal_conn_decref(struct ksock_conn *conn)
-{
- LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0);
- if (atomic_dec_and_test(&conn->ksnc_conn_refcount))
- ksocknal_queue_zombie_conn(conn);
-}
-
-static inline int
-ksocknal_connsock_addref(struct ksock_conn *conn)
-{
- int rc = -ESHUTDOWN;
-
- read_lock(&ksocknal_data.ksnd_global_lock);
- if (!conn->ksnc_closing) {
- LASSERT(atomic_read(&conn->ksnc_sock_refcount) > 0);
- atomic_inc(&conn->ksnc_sock_refcount);
- rc = 0;
- }
- read_unlock(&ksocknal_data.ksnd_global_lock);
-
- return rc;
-}
-
-static inline void
-ksocknal_connsock_decref(struct ksock_conn *conn)
-{
- LASSERT(atomic_read(&conn->ksnc_sock_refcount) > 0);
- if (atomic_dec_and_test(&conn->ksnc_sock_refcount)) {
- LASSERT(conn->ksnc_closing);
- sock_release(conn->ksnc_sock);
- conn->ksnc_sock = NULL;
- ksocknal_finalize_zcreq(conn);
- }
-}
-
-static inline void
-ksocknal_tx_addref(struct ksock_tx *tx)
-{
- LASSERT(atomic_read(&tx->tx_refcount) > 0);
- atomic_inc(&tx->tx_refcount);
-}
-
-void ksocknal_tx_prep(struct ksock_conn *, struct ksock_tx *tx);
-void ksocknal_tx_done(struct lnet_ni *ni, struct ksock_tx *tx);
-
-static inline void
-ksocknal_tx_decref(struct ksock_tx *tx)
-{
- LASSERT(atomic_read(&tx->tx_refcount) > 0);
- if (atomic_dec_and_test(&tx->tx_refcount))
- ksocknal_tx_done(NULL, tx);
-}
-
-static inline void
-ksocknal_route_addref(struct ksock_route *route)
-{
- LASSERT(atomic_read(&route->ksnr_refcount) > 0);
- atomic_inc(&route->ksnr_refcount);
-}
-
-void ksocknal_destroy_route(struct ksock_route *route);
-
-static inline void
-ksocknal_route_decref(struct ksock_route *route)
-{
- LASSERT(atomic_read(&route->ksnr_refcount) > 0);
- if (atomic_dec_and_test(&route->ksnr_refcount))
- ksocknal_destroy_route(route);
-}
-
-static inline void
-ksocknal_peer_addref(struct ksock_peer *peer)
-{
- LASSERT(atomic_read(&peer->ksnp_refcount) > 0);
- atomic_inc(&peer->ksnp_refcount);
-}
-
-void ksocknal_destroy_peer(struct ksock_peer *peer);
-
-static inline void
-ksocknal_peer_decref(struct ksock_peer *peer)
-{
- LASSERT(atomic_read(&peer->ksnp_refcount) > 0);
- if (atomic_dec_and_test(&peer->ksnp_refcount))
- ksocknal_destroy_peer(peer);
-}
-
-int ksocknal_startup(struct lnet_ni *ni);
-void ksocknal_shutdown(struct lnet_ni *ni);
-int ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg);
-int ksocknal_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg);
-int ksocknal_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
- int delayed, struct iov_iter *to, unsigned int rlen);
-int ksocknal_accept(struct lnet_ni *ni, struct socket *sock);
-
-int ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip,
- int port);
-struct ksock_peer *ksocknal_find_peer_locked(struct lnet_ni *ni,
- struct lnet_process_id id);
-struct ksock_peer *ksocknal_find_peer(struct lnet_ni *ni,
- struct lnet_process_id id);
-void ksocknal_peer_failed(struct ksock_peer *peer);
-int ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route,
- struct socket *sock, int type);
-void ksocknal_close_conn_locked(struct ksock_conn *conn, int why);
-void ksocknal_terminate_conn(struct ksock_conn *conn);
-void ksocknal_destroy_conn(struct ksock_conn *conn);
-int ksocknal_close_peer_conns_locked(struct ksock_peer *peer,
- __u32 ipaddr, int why);
-int ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why);
-int ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr);
-struct ksock_conn *ksocknal_find_conn_locked(struct ksock_peer *peer,
- struct ksock_tx *tx, int nonblk);
-
-int ksocknal_launch_packet(struct lnet_ni *ni, struct ksock_tx *tx,
- struct lnet_process_id id);
-struct ksock_tx *ksocknal_alloc_tx(int type, int size);
-void ksocknal_free_tx(struct ksock_tx *tx);
-struct ksock_tx *ksocknal_alloc_tx_noop(__u64 cookie, int nonblk);
-void ksocknal_next_tx_carrier(struct ksock_conn *conn);
-void ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn);
-void ksocknal_txlist_done(struct lnet_ni *ni, struct list_head *txlist, int error);
-void ksocknal_notify(struct lnet_ni *ni, lnet_nid_t gw_nid, int alive);
-void ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, unsigned long *when);
-int ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name);
-void ksocknal_thread_fini(void);
-void ksocknal_launch_all_connections_locked(struct ksock_peer *peer);
-struct ksock_route *ksocknal_find_connectable_route_locked(struct ksock_peer *peer);
-struct ksock_route *ksocknal_find_connecting_route_locked(struct ksock_peer *peer);
-int ksocknal_new_packet(struct ksock_conn *conn, int skip);
-int ksocknal_scheduler(void *arg);
-int ksocknal_connd(void *arg);
-int ksocknal_reaper(void *arg);
-int ksocknal_send_hello(struct lnet_ni *ni, struct ksock_conn *conn,
- lnet_nid_t peer_nid, struct ksock_hello_msg *hello);
-int ksocknal_recv_hello(struct lnet_ni *ni, struct ksock_conn *conn,
- struct ksock_hello_msg *hello,
- struct lnet_process_id *id,
- __u64 *incarnation);
-void ksocknal_read_callback(struct ksock_conn *conn);
-void ksocknal_write_callback(struct ksock_conn *conn);
-
-int ksocknal_lib_zc_capable(struct ksock_conn *conn);
-void ksocknal_lib_save_callback(struct socket *sock, struct ksock_conn *conn);
-void ksocknal_lib_set_callback(struct socket *sock, struct ksock_conn *conn);
-void ksocknal_lib_reset_callback(struct socket *sock, struct ksock_conn *conn);
-void ksocknal_lib_push_conn(struct ksock_conn *conn);
-int ksocknal_lib_get_conn_addrs(struct ksock_conn *conn);
-int ksocknal_lib_setup_sock(struct socket *so);
-int ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx);
-int ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx);
-void ksocknal_lib_eager_ack(struct ksock_conn *conn);
-int ksocknal_lib_recv(struct ksock_conn *conn);
-int ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem,
- int *rxmem, int *nagle);
-
-void ksocknal_read_callback(struct ksock_conn *conn);
-void ksocknal_write_callback(struct ksock_conn *conn);
-
-int ksocknal_tunables_init(void);
-
-void ksocknal_lib_csum_tx(struct ksock_tx *tx);
-
-int ksocknal_lib_memory_pressure(struct ksock_conn *conn);
-int ksocknal_lib_bind_thread_to_cpu(int id);
-
-#endif /* _SOCKLND_SOCKLND_H_ */
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
deleted file mode 100644
index 01b31a6bb588..000000000000
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
+++ /dev/null
@@ -1,2586 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- *
- * Author: Zach Brown <zab@zabbo.net>
- * Author: Peter J. Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-
-#include <linux/sched/mm.h>
-#include "socklnd.h"
-
-struct ksock_tx *
-ksocknal_alloc_tx(int type, int size)
-{
- struct ksock_tx *tx = NULL;
-
- if (type == KSOCK_MSG_NOOP) {
- LASSERT(size == KSOCK_NOOP_TX_SIZE);
-
- /* searching for a noop tx in free list */
- spin_lock(&ksocknal_data.ksnd_tx_lock);
-
- if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
- tx = list_entry(ksocknal_data.ksnd_idle_noop_txs.next,
- struct ksock_tx, tx_list);
- LASSERT(tx->tx_desc_size == size);
- list_del(&tx->tx_list);
- }
-
- spin_unlock(&ksocknal_data.ksnd_tx_lock);
- }
-
- if (!tx)
- tx = kzalloc(size, GFP_NOFS);
-
- if (!tx)
- return NULL;
-
- atomic_set(&tx->tx_refcount, 1);
- tx->tx_zc_aborted = 0;
- tx->tx_zc_capable = 0;
- tx->tx_zc_checked = 0;
- tx->tx_desc_size = size;
-
- atomic_inc(&ksocknal_data.ksnd_nactive_txs);
-
- return tx;
-}
-
-struct ksock_tx *
-ksocknal_alloc_tx_noop(__u64 cookie, int nonblk)
-{
- struct ksock_tx *tx;
-
- tx = ksocknal_alloc_tx(KSOCK_MSG_NOOP, KSOCK_NOOP_TX_SIZE);
- if (!tx) {
- CERROR("Can't allocate noop tx desc\n");
- return NULL;
- }
-
- tx->tx_conn = NULL;
- tx->tx_lnetmsg = NULL;
- tx->tx_kiov = NULL;
- tx->tx_nkiov = 0;
- tx->tx_iov = tx->tx_frags.virt.iov;
- tx->tx_niov = 1;
- tx->tx_nonblk = nonblk;
-
- tx->tx_msg.ksm_csum = 0;
- tx->tx_msg.ksm_type = KSOCK_MSG_NOOP;
- tx->tx_msg.ksm_zc_cookies[0] = 0;
- tx->tx_msg.ksm_zc_cookies[1] = cookie;
-
- return tx;
-}
-
-void
-ksocknal_free_tx(struct ksock_tx *tx)
-{
- atomic_dec(&ksocknal_data.ksnd_nactive_txs);
-
- if (!tx->tx_lnetmsg && tx->tx_desc_size == KSOCK_NOOP_TX_SIZE) {
- /* it's a noop tx */
- spin_lock(&ksocknal_data.ksnd_tx_lock);
-
- list_add(&tx->tx_list, &ksocknal_data.ksnd_idle_noop_txs);
-
- spin_unlock(&ksocknal_data.ksnd_tx_lock);
- } else {
- kfree(tx);
- }
-}
-
-static int
-ksocknal_send_iov(struct ksock_conn *conn, struct ksock_tx *tx)
-{
- struct kvec *iov = tx->tx_iov;
- int nob;
- int rc;
-
- LASSERT(tx->tx_niov > 0);
-
- /* Never touch tx->tx_iov inside ksocknal_lib_send_iov() */
- rc = ksocknal_lib_send_iov(conn, tx);
-
- if (rc <= 0) /* sent nothing? */
- return rc;
-
- nob = rc;
- LASSERT(nob <= tx->tx_resid);
- tx->tx_resid -= nob;
-
- /* "consume" iov */
- do {
- LASSERT(tx->tx_niov > 0);
-
- if (nob < (int)iov->iov_len) {
- iov->iov_base = (void *)((char *)iov->iov_base + nob);
- iov->iov_len -= nob;
- return rc;
- }
-
- nob -= iov->iov_len;
- tx->tx_iov = ++iov;
- tx->tx_niov--;
- } while (nob);
-
- return rc;
-}
-
-static int
-ksocknal_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
-{
- struct bio_vec *kiov = tx->tx_kiov;
- int nob;
- int rc;
-
- LASSERT(!tx->tx_niov);
- LASSERT(tx->tx_nkiov > 0);
-
- /* Never touch tx->tx_kiov inside ksocknal_lib_send_kiov() */
- rc = ksocknal_lib_send_kiov(conn, tx);
-
- if (rc <= 0) /* sent nothing? */
- return rc;
-
- nob = rc;
- LASSERT(nob <= tx->tx_resid);
- tx->tx_resid -= nob;
-
- /* "consume" kiov */
- do {
- LASSERT(tx->tx_nkiov > 0);
-
- if (nob < (int)kiov->bv_len) {
- kiov->bv_offset += nob;
- kiov->bv_len -= nob;
- return rc;
- }
-
- nob -= (int)kiov->bv_len;
- tx->tx_kiov = ++kiov;
- tx->tx_nkiov--;
- } while (nob);
-
- return rc;
-}
-
-static int
-ksocknal_transmit(struct ksock_conn *conn, struct ksock_tx *tx)
-{
- int rc;
- int bufnob;
-
- if (ksocknal_data.ksnd_stall_tx) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(ksocknal_data.ksnd_stall_tx * HZ);
- }
-
- LASSERT(tx->tx_resid);
-
- rc = ksocknal_connsock_addref(conn);
- if (rc) {
- LASSERT(conn->ksnc_closing);
- return -ESHUTDOWN;
- }
-
- do {
- if (ksocknal_data.ksnd_enomem_tx > 0) {
- /* testing... */
- ksocknal_data.ksnd_enomem_tx--;
- rc = -EAGAIN;
- } else if (tx->tx_niov) {
- rc = ksocknal_send_iov(conn, tx);
- } else {
- rc = ksocknal_send_kiov(conn, tx);
- }
-
- bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
- if (rc > 0) /* sent something? */
- conn->ksnc_tx_bufnob += rc; /* account it */
-
- if (bufnob < conn->ksnc_tx_bufnob) {
- /*
- * allocated send buffer bytes < computed; infer
- * something got ACKed
- */
- conn->ksnc_tx_deadline =
- jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
- conn->ksnc_peer->ksnp_last_alive = jiffies;
- conn->ksnc_tx_bufnob = bufnob;
- mb();
- }
-
- if (rc <= 0) { /* Didn't write anything? */
-
- if (!rc) /* some stacks return 0 instead of -EAGAIN */
- rc = -EAGAIN;
-
- /* Check if EAGAIN is due to memory pressure */
- if (rc == -EAGAIN && ksocknal_lib_memory_pressure(conn))
- rc = -ENOMEM;
-
- break;
- }
-
- /* socket's wmem_queued now includes 'rc' bytes */
- atomic_sub(rc, &conn->ksnc_tx_nob);
- rc = 0;
-
- } while (tx->tx_resid);
-
- ksocknal_connsock_decref(conn);
- return rc;
-}
-
-static int
-ksocknal_recv_iter(struct ksock_conn *conn)
-{
- int nob;
- int rc;
-
- /*
- * Never touch conn->ksnc_rx_to or change connection
- * status inside ksocknal_lib_recv
- */
- rc = ksocknal_lib_recv(conn);
-
- if (rc <= 0)
- return rc;
-
- /* received something... */
- nob = rc;
-
- conn->ksnc_peer->ksnp_last_alive = jiffies;
- conn->ksnc_rx_deadline =
- jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
- mb(); /* order with setting rx_started */
- conn->ksnc_rx_started = 1;
-
- conn->ksnc_rx_nob_left -= nob;
-
- iov_iter_advance(&conn->ksnc_rx_to, nob);
- if (iov_iter_count(&conn->ksnc_rx_to))
- return -EAGAIN;
-
- return 1;
-}
-
-static int
-ksocknal_receive(struct ksock_conn *conn)
-{
- /*
- * Return 1 on success, 0 on EOF, < 0 on error.
- * Caller checks ksnc_rx_to to determine
- * progress/completion.
- */
- int rc;
-
- if (ksocknal_data.ksnd_stall_rx) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(ksocknal_data.ksnd_stall_rx * HZ);
- }
-
- rc = ksocknal_connsock_addref(conn);
- if (rc) {
- LASSERT(conn->ksnc_closing);
- return -ESHUTDOWN;
- }
-
- for (;;) {
- rc = ksocknal_recv_iter(conn);
- if (rc <= 0) {
- /* error/EOF or partial receive */
- if (rc == -EAGAIN) {
- rc = 1;
- } else if (!rc && conn->ksnc_rx_started) {
- /* EOF in the middle of a message */
- rc = -EPROTO;
- }
- break;
- }
-
- /* Completed a fragment */
-
- if (!iov_iter_count(&conn->ksnc_rx_to)) {
- rc = 1;
- break;
- }
- }
-
- ksocknal_connsock_decref(conn);
- return rc;
-}
-
-void
-ksocknal_tx_done(struct lnet_ni *ni, struct ksock_tx *tx)
-{
- struct lnet_msg *lnetmsg = tx->tx_lnetmsg;
- int rc = (!tx->tx_resid && !tx->tx_zc_aborted) ? 0 : -EIO;
-
- LASSERT(ni || tx->tx_conn);
-
- if (tx->tx_conn)
- ksocknal_conn_decref(tx->tx_conn);
-
- if (!ni && tx->tx_conn)
- ni = tx->tx_conn->ksnc_peer->ksnp_ni;
-
- ksocknal_free_tx(tx);
- if (lnetmsg) /* KSOCK_MSG_NOOP go without lnetmsg */
- lnet_finalize(ni, lnetmsg, rc);
-}
-
-void
-ksocknal_txlist_done(struct lnet_ni *ni, struct list_head *txlist, int error)
-{
- struct ksock_tx *tx;
-
- while (!list_empty(txlist)) {
- tx = list_entry(txlist->next, struct ksock_tx, tx_list);
-
- if (error && tx->tx_lnetmsg) {
- CNETERR("Deleting packet type %d len %d %s->%s\n",
- le32_to_cpu(tx->tx_lnetmsg->msg_hdr.type),
- le32_to_cpu(tx->tx_lnetmsg->msg_hdr.payload_length),
- libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.src_nid)),
- libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.dest_nid)));
- } else if (error) {
- CNETERR("Deleting noop packet\n");
- }
-
- list_del(&tx->tx_list);
-
- LASSERT(atomic_read(&tx->tx_refcount) == 1);
- ksocknal_tx_done(ni, tx);
- }
-}
-
-static void
-ksocknal_check_zc_req(struct ksock_tx *tx)
-{
- struct ksock_conn *conn = tx->tx_conn;
- struct ksock_peer *peer = conn->ksnc_peer;
-
- /*
- * Set tx_msg.ksm_zc_cookies[0] to a unique non-zero cookie and add tx
- * to ksnp_zc_req_list if some fragment of this message should be sent
- * zero-copy. Our peer will send an ACK containing this cookie when
- * she has received this message to tell us we can signal completion.
- * tx_msg.ksm_zc_cookies[0] remains non-zero while tx is on
- * ksnp_zc_req_list.
- */
- LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
- LASSERT(tx->tx_zc_capable);
-
- tx->tx_zc_checked = 1;
-
- if (conn->ksnc_proto == &ksocknal_protocol_v1x ||
- !conn->ksnc_zc_capable)
- return;
-
- /*
- * assign cookie and queue tx to pending list, it will be released when
- * a matching ack is received. See ksocknal_handle_zcack()
- */
- ksocknal_tx_addref(tx);
-
- spin_lock(&peer->ksnp_lock);
-
- /* ZC_REQ is going to be pinned to the peer */
- tx->tx_deadline =
- jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
-
- LASSERT(!tx->tx_msg.ksm_zc_cookies[0]);
-
- tx->tx_msg.ksm_zc_cookies[0] = peer->ksnp_zc_next_cookie++;
-
- if (!peer->ksnp_zc_next_cookie)
- peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
-
- list_add_tail(&tx->tx_zc_list, &peer->ksnp_zc_req_list);
-
- spin_unlock(&peer->ksnp_lock);
-}
-
-static void
-ksocknal_uncheck_zc_req(struct ksock_tx *tx)
-{
- struct ksock_peer *peer = tx->tx_conn->ksnc_peer;
-
- LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
- LASSERT(tx->tx_zc_capable);
-
- tx->tx_zc_checked = 0;
-
- spin_lock(&peer->ksnp_lock);
-
- if (!tx->tx_msg.ksm_zc_cookies[0]) {
- /* Not waiting for an ACK */
- spin_unlock(&peer->ksnp_lock);
- return;
- }
-
- tx->tx_msg.ksm_zc_cookies[0] = 0;
- list_del(&tx->tx_zc_list);
-
- spin_unlock(&peer->ksnp_lock);
-
- ksocknal_tx_decref(tx);
-}
-
-static int
-ksocknal_process_transmit(struct ksock_conn *conn, struct ksock_tx *tx)
-{
- int rc;
-
- if (tx->tx_zc_capable && !tx->tx_zc_checked)
- ksocknal_check_zc_req(tx);
-
- rc = ksocknal_transmit(conn, tx);
-
- CDEBUG(D_NET, "send(%d) %d\n", tx->tx_resid, rc);
-
- if (!tx->tx_resid) {
- /* Sent everything OK */
- LASSERT(!rc);
-
- return 0;
- }
-
- if (rc == -EAGAIN)
- return rc;
-
- if (rc == -ENOMEM) {
- static int counter;
-
- counter++; /* exponential backoff warnings */
- if ((counter & (-counter)) == counter)
- CWARN("%u ENOMEM tx %p\n", counter, conn);
-
- /* Queue on ksnd_enomem_conns for retry after a timeout */
- spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
-
- /* enomem list takes over scheduler's ref... */
- LASSERT(conn->ksnc_tx_scheduled);
- list_add_tail(&conn->ksnc_tx_list,
- &ksocknal_data.ksnd_enomem_conns);
- if (!time_after_eq(jiffies + SOCKNAL_ENOMEM_RETRY,
- ksocknal_data.ksnd_reaper_waketime))
- wake_up(&ksocknal_data.ksnd_reaper_waitq);
-
- spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
- return rc;
- }
-
- /* Actual error */
- LASSERT(rc < 0);
-
- if (!conn->ksnc_closing) {
- switch (rc) {
- case -ECONNRESET:
- LCONSOLE_WARN("Host %pI4h reset our connection while we were sending data; it may have rebooted.\n",
- &conn->ksnc_ipaddr);
- break;
- default:
- LCONSOLE_WARN("There was an unexpected network error while writing to %pI4h: %d.\n",
- &conn->ksnc_ipaddr, rc);
- break;
- }
- CDEBUG(D_NET, "[%p] Error %d on write to %s ip %pI4h:%d\n",
- conn, rc,
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- &conn->ksnc_ipaddr,
- conn->ksnc_port);
- }
-
- if (tx->tx_zc_checked)
- ksocknal_uncheck_zc_req(tx);
-
- /* it's not an error if conn is being closed */
- ksocknal_close_conn_and_siblings(conn, (conn->ksnc_closing) ? 0 : rc);
-
- return rc;
-}
-
-static void
-ksocknal_launch_connection_locked(struct ksock_route *route)
-{
- /* called holding write lock on ksnd_global_lock */
-
- LASSERT(!route->ksnr_scheduled);
- LASSERT(!route->ksnr_connecting);
- LASSERT(ksocknal_route_mask() & ~route->ksnr_connected);
-
- route->ksnr_scheduled = 1; /* scheduling conn for connd */
- ksocknal_route_addref(route); /* extra ref for connd */
-
- spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
-
- list_add_tail(&route->ksnr_connd_list,
- &ksocknal_data.ksnd_connd_routes);
- wake_up(&ksocknal_data.ksnd_connd_waitq);
-
- spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
-}
-
-void
-ksocknal_launch_all_connections_locked(struct ksock_peer *peer)
-{
- struct ksock_route *route;
-
- /* called holding write lock on ksnd_global_lock */
- for (;;) {
- /* launch any/all connections that need it */
- route = ksocknal_find_connectable_route_locked(peer);
- if (!route)
- return;
-
- ksocknal_launch_connection_locked(route);
- }
-}
-
-struct ksock_conn *
-ksocknal_find_conn_locked(struct ksock_peer *peer, struct ksock_tx *tx,
- int nonblk)
-{
- struct list_head *tmp;
- struct ksock_conn *conn;
- struct ksock_conn *typed = NULL;
- struct ksock_conn *fallback = NULL;
- int tnob = 0;
- int fnob = 0;
-
- list_for_each(tmp, &peer->ksnp_conns) {
- struct ksock_conn *c;
- int nob, rc;
-
- c = list_entry(tmp, struct ksock_conn, ksnc_list);
- nob = atomic_read(&c->ksnc_tx_nob) +
- c->ksnc_sock->sk->sk_wmem_queued;
-
- LASSERT(!c->ksnc_closing);
- LASSERT(c->ksnc_proto &&
- c->ksnc_proto->pro_match_tx);
-
- rc = c->ksnc_proto->pro_match_tx(c, tx, nonblk);
-
- switch (rc) {
- default:
- LBUG();
- case SOCKNAL_MATCH_NO: /* protocol rejected the tx */
- continue;
-
- case SOCKNAL_MATCH_YES: /* typed connection */
- if (!typed || tnob > nob ||
- (tnob == nob && *ksocknal_tunables.ksnd_round_robin &&
- time_after(typed->ksnc_tx_last_post, c->ksnc_tx_last_post))) {
- typed = c;
- tnob = nob;
- }
- break;
-
- case SOCKNAL_MATCH_MAY: /* fallback connection */
- if (!fallback || fnob > nob ||
- (fnob == nob && *ksocknal_tunables.ksnd_round_robin &&
- time_after(fallback->ksnc_tx_last_post, c->ksnc_tx_last_post))) {
- fallback = c;
- fnob = nob;
- }
- break;
- }
- }
-
- /* prefer the typed selection */
- conn = (typed) ? typed : fallback;
-
- if (conn)
- conn->ksnc_tx_last_post = jiffies;
-
- return conn;
-}
-
-void
-ksocknal_tx_prep(struct ksock_conn *conn, struct ksock_tx *tx)
-{
- conn->ksnc_proto->pro_pack(tx);
-
- atomic_add(tx->tx_nob, &conn->ksnc_tx_nob);
- ksocknal_conn_addref(conn); /* +1 ref for tx */
- tx->tx_conn = conn;
-}
-
-void
-ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn)
-{
- struct ksock_sched *sched = conn->ksnc_scheduler;
- struct ksock_msg *msg = &tx->tx_msg;
- struct ksock_tx *ztx = NULL;
- int bufnob = 0;
-
- /*
- * called holding global lock (read or irq-write) and caller may
- * not have dropped this lock between finding conn and calling me,
- * so we don't need the {get,put}connsock dance to deref
- * ksnc_sock...
- */
- LASSERT(!conn->ksnc_closing);
-
- CDEBUG(D_NET, "Sending to %s ip %pI4h:%d\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- &conn->ksnc_ipaddr, conn->ksnc_port);
-
- ksocknal_tx_prep(conn, tx);
-
- /*
- * Ensure the frags we've been given EXACTLY match the number of
- * bytes we want to send. Many TCP/IP stacks disregard any total
- * size parameters passed to them and just look at the frags.
- *
- * We always expect at least 1 mapped fragment containing the
- * complete ksocknal message header.
- */
- LASSERT(lnet_iov_nob(tx->tx_niov, tx->tx_iov) +
- lnet_kiov_nob(tx->tx_nkiov, tx->tx_kiov) ==
- (unsigned int)tx->tx_nob);
- LASSERT(tx->tx_niov >= 1);
- LASSERT(tx->tx_resid == tx->tx_nob);
-
- CDEBUG(D_NET, "Packet %p type %d, nob %d niov %d nkiov %d\n",
- tx, (tx->tx_lnetmsg) ? tx->tx_lnetmsg->msg_hdr.type :
- KSOCK_MSG_NOOP,
- tx->tx_nob, tx->tx_niov, tx->tx_nkiov);
-
- /*
- * FIXME: SOCK_WMEM_QUEUED and SOCK_ERROR could block in __DARWIN8__
- * but they're used inside spinlocks a lot.
- */
- bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
- spin_lock_bh(&sched->kss_lock);
-
- if (list_empty(&conn->ksnc_tx_queue) && !bufnob) {
- /* First packet starts the timeout */
- conn->ksnc_tx_deadline =
- jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
- if (conn->ksnc_tx_bufnob > 0) /* something got ACKed */
- conn->ksnc_peer->ksnp_last_alive = jiffies;
- conn->ksnc_tx_bufnob = 0;
- mb(); /* order with adding to tx_queue */
- }
-
- if (msg->ksm_type == KSOCK_MSG_NOOP) {
- /*
- * The packet is noop ZC ACK, try to piggyback the ack_cookie
- * on a normal packet so I don't need to send it
- */
- LASSERT(msg->ksm_zc_cookies[1]);
- LASSERT(conn->ksnc_proto->pro_queue_tx_zcack);
-
- /* ZC ACK piggybacked on ztx release tx later */
- if (conn->ksnc_proto->pro_queue_tx_zcack(conn, tx, 0))
- ztx = tx;
- } else {
- /*
- * It's a normal packet - can it piggback a noop zc-ack that
- * has been queued already?
- */
- LASSERT(!msg->ksm_zc_cookies[1]);
- LASSERT(conn->ksnc_proto->pro_queue_tx_msg);
-
- ztx = conn->ksnc_proto->pro_queue_tx_msg(conn, tx);
- /* ztx will be released later */
- }
-
- if (ztx) {
- atomic_sub(ztx->tx_nob, &conn->ksnc_tx_nob);
- list_add_tail(&ztx->tx_list, &sched->kss_zombie_noop_txs);
- }
-
- if (conn->ksnc_tx_ready && /* able to send */
- !conn->ksnc_tx_scheduled) { /* not scheduled to send */
- /* +1 ref for scheduler */
- ksocknal_conn_addref(conn);
- list_add_tail(&conn->ksnc_tx_list, &sched->kss_tx_conns);
- conn->ksnc_tx_scheduled = 1;
- wake_up(&sched->kss_waitq);
- }
-
- spin_unlock_bh(&sched->kss_lock);
-}
-
-struct ksock_route *
-ksocknal_find_connectable_route_locked(struct ksock_peer *peer)
-{
- unsigned long now = jiffies;
- struct list_head *tmp;
- struct ksock_route *route;
-
- list_for_each(tmp, &peer->ksnp_routes) {
- route = list_entry(tmp, struct ksock_route, ksnr_list);
-
- LASSERT(!route->ksnr_connecting || route->ksnr_scheduled);
-
- /* connections being established */
- if (route->ksnr_scheduled)
- continue;
-
- /* all route types connected ? */
- if (!(ksocknal_route_mask() & ~route->ksnr_connected))
- continue;
-
- if (!(!route->ksnr_retry_interval || /* first attempt */
- time_after_eq(now, route->ksnr_timeout))) {
- CDEBUG(D_NET,
- "Too soon to retry route %pI4h (cnted %d, interval %ld, %ld secs later)\n",
- &route->ksnr_ipaddr,
- route->ksnr_connected,
- route->ksnr_retry_interval,
- (route->ksnr_timeout - now) / HZ);
- continue;
- }
-
- return route;
- }
-
- return NULL;
-}
-
-struct ksock_route *
-ksocknal_find_connecting_route_locked(struct ksock_peer *peer)
-{
- struct list_head *tmp;
- struct ksock_route *route;
-
- list_for_each(tmp, &peer->ksnp_routes) {
- route = list_entry(tmp, struct ksock_route, ksnr_list);
-
- LASSERT(!route->ksnr_connecting || route->ksnr_scheduled);
-
- if (route->ksnr_scheduled)
- return route;
- }
-
- return NULL;
-}
-
-int
-ksocknal_launch_packet(struct lnet_ni *ni, struct ksock_tx *tx,
- struct lnet_process_id id)
-{
- struct ksock_peer *peer;
- struct ksock_conn *conn;
- rwlock_t *g_lock;
- int retry;
- int rc;
-
- LASSERT(!tx->tx_conn);
-
- g_lock = &ksocknal_data.ksnd_global_lock;
-
- for (retry = 0;; retry = 1) {
- read_lock(g_lock);
- peer = ksocknal_find_peer_locked(ni, id);
- if (peer) {
- if (!ksocknal_find_connectable_route_locked(peer)) {
- conn = ksocknal_find_conn_locked(peer, tx, tx->tx_nonblk);
- if (conn) {
- /*
- * I've got no routes that need to be
- * connecting and I do have an actual
- * connection...
- */
- ksocknal_queue_tx_locked(tx, conn);
- read_unlock(g_lock);
- return 0;
- }
- }
- }
-
- /* I'll need a write lock... */
- read_unlock(g_lock);
-
- write_lock_bh(g_lock);
-
- peer = ksocknal_find_peer_locked(ni, id);
- if (peer)
- break;
-
- write_unlock_bh(g_lock);
-
- if (id.pid & LNET_PID_USERFLAG) {
- CERROR("Refusing to create a connection to userspace process %s\n",
- libcfs_id2str(id));
- return -EHOSTUNREACH;
- }
-
- if (retry) {
- CERROR("Can't find peer %s\n", libcfs_id2str(id));
- return -EHOSTUNREACH;
- }
-
- rc = ksocknal_add_peer(ni, id,
- LNET_NIDADDR(id.nid),
- lnet_acceptor_port());
- if (rc) {
- CERROR("Can't add peer %s: %d\n",
- libcfs_id2str(id), rc);
- return rc;
- }
- }
-
- ksocknal_launch_all_connections_locked(peer);
-
- conn = ksocknal_find_conn_locked(peer, tx, tx->tx_nonblk);
- if (conn) {
- /* Connection exists; queue message on it */
- ksocknal_queue_tx_locked(tx, conn);
- write_unlock_bh(g_lock);
- return 0;
- }
-
- if (peer->ksnp_accepting > 0 ||
- ksocknal_find_connecting_route_locked(peer)) {
- /* the message is going to be pinned to the peer */
- tx->tx_deadline =
- jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
-
- /* Queue the message until a connection is established */
- list_add_tail(&tx->tx_list, &peer->ksnp_tx_queue);
- write_unlock_bh(g_lock);
- return 0;
- }
-
- write_unlock_bh(g_lock);
-
- /* NB Routes may be ignored if connections to them failed recently */
- CNETERR("No usable routes to %s\n", libcfs_id2str(id));
- return -EHOSTUNREACH;
-}
-
-int
-ksocknal_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
-{
- unsigned int mpflag = 0;
- int type = lntmsg->msg_type;
- struct lnet_process_id target = lntmsg->msg_target;
- unsigned int payload_niov = lntmsg->msg_niov;
- struct kvec *payload_iov = lntmsg->msg_iov;
- struct bio_vec *payload_kiov = lntmsg->msg_kiov;
- unsigned int payload_offset = lntmsg->msg_offset;
- unsigned int payload_nob = lntmsg->msg_len;
- struct ksock_tx *tx;
- int desc_size;
- int rc;
-
- /*
- * NB 'private' is different depending on what we're sending.
- * Just ignore it...
- */
- CDEBUG(D_NET, "sending %u bytes in %d frags to %s\n",
- payload_nob, payload_niov, libcfs_id2str(target));
-
- LASSERT(!payload_nob || payload_niov > 0);
- LASSERT(payload_niov <= LNET_MAX_IOV);
- /* payload is either all vaddrs or all pages */
- LASSERT(!(payload_kiov && payload_iov));
- LASSERT(!in_interrupt());
-
- if (payload_iov)
- desc_size = offsetof(struct ksock_tx,
- tx_frags.virt.iov[1 + payload_niov]);
- else
- desc_size = offsetof(struct ksock_tx,
- tx_frags.paged.kiov[payload_niov]);
-
- if (lntmsg->msg_vmflush)
- mpflag = memalloc_noreclaim_save();
- tx = ksocknal_alloc_tx(KSOCK_MSG_LNET, desc_size);
- if (!tx) {
- CERROR("Can't allocate tx desc type %d size %d\n",
- type, desc_size);
- if (lntmsg->msg_vmflush)
- memalloc_noreclaim_restore(mpflag);
- return -ENOMEM;
- }
-
- tx->tx_conn = NULL; /* set when assigned a conn */
- tx->tx_lnetmsg = lntmsg;
-
- if (payload_iov) {
- tx->tx_kiov = NULL;
- tx->tx_nkiov = 0;
- tx->tx_iov = tx->tx_frags.virt.iov;
- tx->tx_niov = 1 +
- lnet_extract_iov(payload_niov, &tx->tx_iov[1],
- payload_niov, payload_iov,
- payload_offset, payload_nob);
- } else {
- tx->tx_niov = 1;
- tx->tx_iov = &tx->tx_frags.paged.iov;
- tx->tx_kiov = tx->tx_frags.paged.kiov;
- tx->tx_nkiov = lnet_extract_kiov(payload_niov, tx->tx_kiov,
- payload_niov, payload_kiov,
- payload_offset, payload_nob);
-
- if (payload_nob >= *ksocknal_tunables.ksnd_zc_min_payload)
- tx->tx_zc_capable = 1;
- }
-
- tx->tx_msg.ksm_csum = 0;
- tx->tx_msg.ksm_type = KSOCK_MSG_LNET;
- tx->tx_msg.ksm_zc_cookies[0] = 0;
- tx->tx_msg.ksm_zc_cookies[1] = 0;
-
- /* The first fragment will be set later in pro_pack */
- rc = ksocknal_launch_packet(ni, tx, target);
- if (mpflag)
- memalloc_noreclaim_restore(mpflag);
-
- if (!rc)
- return 0;
-
- ksocknal_free_tx(tx);
- return -EIO;
-}
-
-int
-ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name)
-{
- struct task_struct *task = kthread_run(fn, arg, "%s", name);
-
- if (IS_ERR(task))
- return PTR_ERR(task);
-
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
- ksocknal_data.ksnd_nthreads++;
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
- return 0;
-}
-
-void
-ksocknal_thread_fini(void)
-{
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
- ksocknal_data.ksnd_nthreads--;
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-}
-
-int
-ksocknal_new_packet(struct ksock_conn *conn, int nob_to_skip)
-{
- static char ksocknal_slop_buffer[4096];
- struct kvec *kvec = conn->ksnc_rx_iov_space;
-
- int nob;
- unsigned int niov;
- int skipped;
-
- LASSERT(conn->ksnc_proto);
-
- if (*ksocknal_tunables.ksnd_eager_ack & conn->ksnc_type) {
- /* Remind the socket to ack eagerly... */
- ksocknal_lib_eager_ack(conn);
- }
-
- if (!nob_to_skip) { /* right at next packet boundary now */
- conn->ksnc_rx_started = 0;
- mb(); /* racing with timeout thread */
-
- switch (conn->ksnc_proto->pro_version) {
- case KSOCK_PROTO_V2:
- case KSOCK_PROTO_V3:
- conn->ksnc_rx_state = SOCKNAL_RX_KSM_HEADER;
- kvec->iov_base = &conn->ksnc_msg;
- kvec->iov_len = offsetof(struct ksock_msg, ksm_u);
- conn->ksnc_rx_nob_left = offsetof(struct ksock_msg, ksm_u);
- iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec,
- 1, offsetof(struct ksock_msg, ksm_u));
- break;
-
- case KSOCK_PROTO_V1:
- /* Receiving bare struct lnet_hdr */
- conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
- kvec->iov_base = &conn->ksnc_msg.ksm_u.lnetmsg;
- kvec->iov_len = sizeof(struct lnet_hdr);
- conn->ksnc_rx_nob_left = sizeof(struct lnet_hdr);
- iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec,
- 1, sizeof(struct lnet_hdr));
- break;
-
- default:
- LBUG();
- }
- conn->ksnc_rx_csum = ~0;
- return 1;
- }
-
- /*
- * Set up to skip as much as possible now. If there's more left
- * (ran out of iov entries) we'll get called again
- */
- conn->ksnc_rx_state = SOCKNAL_RX_SLOP;
- conn->ksnc_rx_nob_left = nob_to_skip;
- skipped = 0;
- niov = 0;
-
- do {
- nob = min_t(int, nob_to_skip, sizeof(ksocknal_slop_buffer));
-
- kvec[niov].iov_base = ksocknal_slop_buffer;
- kvec[niov].iov_len = nob;
- niov++;
- skipped += nob;
- nob_to_skip -= nob;
-
- } while (nob_to_skip && /* mustn't overflow conn's rx iov */
- niov < sizeof(conn->ksnc_rx_iov_space) / sizeof(struct iovec));
-
- iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec, niov, skipped);
- return 0;
-}
-
-static int
-ksocknal_process_receive(struct ksock_conn *conn)
-{
- struct kvec *kvec = conn->ksnc_rx_iov_space;
- struct lnet_hdr *lhdr;
- struct lnet_process_id *id;
- int rc;
-
- LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0);
-
- /* NB: sched lock NOT held */
- /* SOCKNAL_RX_LNET_HEADER is here for backward compatibility */
- LASSERT(conn->ksnc_rx_state == SOCKNAL_RX_KSM_HEADER ||
- conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD ||
- conn->ksnc_rx_state == SOCKNAL_RX_LNET_HEADER ||
- conn->ksnc_rx_state == SOCKNAL_RX_SLOP);
- again:
- if (iov_iter_count(&conn->ksnc_rx_to)) {
- rc = ksocknal_receive(conn);
-
- if (rc <= 0) {
- LASSERT(rc != -EAGAIN);
-
- if (!rc)
- CDEBUG(D_NET, "[%p] EOF from %s ip %pI4h:%d\n",
- conn,
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- &conn->ksnc_ipaddr,
- conn->ksnc_port);
- else if (!conn->ksnc_closing)
- CERROR("[%p] Error %d on read from %s ip %pI4h:%d\n",
- conn, rc,
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- &conn->ksnc_ipaddr,
- conn->ksnc_port);
-
- /* it's not an error if conn is being closed */
- ksocknal_close_conn_and_siblings(conn,
- (conn->ksnc_closing) ? 0 : rc);
- return (!rc ? -ESHUTDOWN : rc);
- }
-
- if (iov_iter_count(&conn->ksnc_rx_to)) {
- /* short read */
- return -EAGAIN;
- }
- }
- switch (conn->ksnc_rx_state) {
- case SOCKNAL_RX_KSM_HEADER:
- if (conn->ksnc_flip) {
- __swab32s(&conn->ksnc_msg.ksm_type);
- __swab32s(&conn->ksnc_msg.ksm_csum);
- __swab64s(&conn->ksnc_msg.ksm_zc_cookies[0]);
- __swab64s(&conn->ksnc_msg.ksm_zc_cookies[1]);
- }
-
- if (conn->ksnc_msg.ksm_type != KSOCK_MSG_NOOP &&
- conn->ksnc_msg.ksm_type != KSOCK_MSG_LNET) {
- CERROR("%s: Unknown message type: %x\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- conn->ksnc_msg.ksm_type);
- ksocknal_new_packet(conn, 0);
- ksocknal_close_conn_and_siblings(conn, -EPROTO);
- return -EPROTO;
- }
-
- if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP &&
- conn->ksnc_msg.ksm_csum && /* has checksum */
- conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) {
- /* NOOP Checksum error */
- CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum);
- ksocknal_new_packet(conn, 0);
- ksocknal_close_conn_and_siblings(conn, -EPROTO);
- return -EIO;
- }
-
- if (conn->ksnc_msg.ksm_zc_cookies[1]) {
- __u64 cookie = 0;
-
- LASSERT(conn->ksnc_proto != &ksocknal_protocol_v1x);
-
- if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP)
- cookie = conn->ksnc_msg.ksm_zc_cookies[0];
-
- rc = conn->ksnc_proto->pro_handle_zcack(conn, cookie,
- conn->ksnc_msg.ksm_zc_cookies[1]);
-
- if (rc) {
- CERROR("%s: Unknown ZC-ACK cookie: %llu, %llu\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- cookie, conn->ksnc_msg.ksm_zc_cookies[1]);
- ksocknal_new_packet(conn, 0);
- ksocknal_close_conn_and_siblings(conn, -EPROTO);
- return rc;
- }
- }
-
- if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP) {
- ksocknal_new_packet(conn, 0);
- return 0; /* NOOP is done and just return */
- }
-
- conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
- conn->ksnc_rx_nob_left = sizeof(struct ksock_lnet_msg);
-
- kvec->iov_base = &conn->ksnc_msg.ksm_u.lnetmsg;
- kvec->iov_len = sizeof(struct ksock_lnet_msg);
-
- iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec,
- 1, sizeof(struct ksock_lnet_msg));
-
- goto again; /* read lnet header now */
-
- case SOCKNAL_RX_LNET_HEADER:
- /* unpack message header */
- conn->ksnc_proto->pro_unpack(&conn->ksnc_msg);
-
- if (conn->ksnc_peer->ksnp_id.pid & LNET_PID_USERFLAG) {
- /* Userspace peer */
- lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr;
- id = &conn->ksnc_peer->ksnp_id;
-
- /* Substitute process ID assigned at connection time */
- lhdr->src_pid = cpu_to_le32(id->pid);
- lhdr->src_nid = cpu_to_le64(id->nid);
- }
-
- conn->ksnc_rx_state = SOCKNAL_RX_PARSE;
- ksocknal_conn_addref(conn); /* ++ref while parsing */
-
- rc = lnet_parse(conn->ksnc_peer->ksnp_ni,
- &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr,
- conn->ksnc_peer->ksnp_id.nid, conn, 0);
- if (rc < 0) {
- /* I just received garbage: give up on this conn */
- ksocknal_new_packet(conn, 0);
- ksocknal_close_conn_and_siblings(conn, rc);
- ksocknal_conn_decref(conn);
- return -EPROTO;
- }
-
- /* I'm racing with ksocknal_recv() */
- LASSERT(conn->ksnc_rx_state == SOCKNAL_RX_PARSE ||
- conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD);
-
- if (conn->ksnc_rx_state != SOCKNAL_RX_LNET_PAYLOAD)
- return 0;
-
- /* ksocknal_recv() got called */
- goto again;
-
- case SOCKNAL_RX_LNET_PAYLOAD:
- /* payload all received */
- rc = 0;
-
- if (!conn->ksnc_rx_nob_left && /* not truncating */
- conn->ksnc_msg.ksm_csum && /* has checksum */
- conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) {
- CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id),
- conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum);
- rc = -EIO;
- }
-
- if (!rc && conn->ksnc_msg.ksm_zc_cookies[0]) {
- LASSERT(conn->ksnc_proto != &ksocknal_protocol_v1x);
-
- lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr;
- id = &conn->ksnc_peer->ksnp_id;
-
- rc = conn->ksnc_proto->pro_handle_zcreq(conn,
- conn->ksnc_msg.ksm_zc_cookies[0],
- *ksocknal_tunables.ksnd_nonblk_zcack ||
- le64_to_cpu(lhdr->src_nid) != id->nid);
- }
-
- lnet_finalize(conn->ksnc_peer->ksnp_ni, conn->ksnc_cookie, rc);
-
- if (rc) {
- ksocknal_new_packet(conn, 0);
- ksocknal_close_conn_and_siblings(conn, rc);
- return -EPROTO;
- }
- /* Fall through */
-
- case SOCKNAL_RX_SLOP:
- /* starting new packet? */
- if (ksocknal_new_packet(conn, conn->ksnc_rx_nob_left))
- return 0; /* come back later */
- goto again; /* try to finish reading slop now */
-
- default:
- break;
- }
-
- /* Not Reached */
- LBUG();
- return -EINVAL; /* keep gcc happy */
-}
-
-int
-ksocknal_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg,
- int delayed, struct iov_iter *to, unsigned int rlen)
-{
- struct ksock_conn *conn = private;
- struct ksock_sched *sched = conn->ksnc_scheduler;
-
- LASSERT(iov_iter_count(to) <= rlen);
- LASSERT(to->nr_segs <= LNET_MAX_IOV);
-
- conn->ksnc_cookie = msg;
- conn->ksnc_rx_nob_left = rlen;
-
- conn->ksnc_rx_to = *to;
-
- LASSERT(conn->ksnc_rx_scheduled);
-
- spin_lock_bh(&sched->kss_lock);
-
- switch (conn->ksnc_rx_state) {
- case SOCKNAL_RX_PARSE_WAIT:
- list_add_tail(&conn->ksnc_rx_list, &sched->kss_rx_conns);
- wake_up(&sched->kss_waitq);
- LASSERT(conn->ksnc_rx_ready);
- break;
-
- case SOCKNAL_RX_PARSE:
- /* scheduler hasn't noticed I'm parsing yet */
- break;
- }
-
- conn->ksnc_rx_state = SOCKNAL_RX_LNET_PAYLOAD;
-
- spin_unlock_bh(&sched->kss_lock);
- ksocknal_conn_decref(conn);
- return 0;
-}
-
-static inline int
-ksocknal_sched_cansleep(struct ksock_sched *sched)
-{
- int rc;
-
- spin_lock_bh(&sched->kss_lock);
-
- rc = !ksocknal_data.ksnd_shuttingdown &&
- list_empty(&sched->kss_rx_conns) &&
- list_empty(&sched->kss_tx_conns);
-
- spin_unlock_bh(&sched->kss_lock);
- return rc;
-}
-
-int ksocknal_scheduler(void *arg)
-{
- struct ksock_sched_info *info;
- struct ksock_sched *sched;
- struct ksock_conn *conn;
- struct ksock_tx *tx;
- int rc;
- int nloops = 0;
- long id = (long)arg;
-
- info = ksocknal_data.ksnd_sched_info[KSOCK_THREAD_CPT(id)];
- sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)];
-
- rc = cfs_cpt_bind(lnet_cpt_table(), info->ksi_cpt);
- if (rc) {
- CWARN("Can't set CPU partition affinity to %d: %d\n",
- info->ksi_cpt, rc);
- }
-
- spin_lock_bh(&sched->kss_lock);
-
- while (!ksocknal_data.ksnd_shuttingdown) {
- int did_something = 0;
-
- /* Ensure I progress everything semi-fairly */
-
- if (!list_empty(&sched->kss_rx_conns)) {
- conn = list_entry(sched->kss_rx_conns.next,
- struct ksock_conn, ksnc_rx_list);
- list_del(&conn->ksnc_rx_list);
-
- LASSERT(conn->ksnc_rx_scheduled);
- LASSERT(conn->ksnc_rx_ready);
-
- /*
- * clear rx_ready in case receive isn't complete.
- * Do it BEFORE we call process_recv, since
- * data_ready can set it any time after we release
- * kss_lock.
- */
- conn->ksnc_rx_ready = 0;
- spin_unlock_bh(&sched->kss_lock);
-
- rc = ksocknal_process_receive(conn);
-
- spin_lock_bh(&sched->kss_lock);
-
- /* I'm the only one that can clear this flag */
- LASSERT(conn->ksnc_rx_scheduled);
-
- /* Did process_receive get everything it wanted? */
- if (!rc)
- conn->ksnc_rx_ready = 1;
-
- if (conn->ksnc_rx_state == SOCKNAL_RX_PARSE) {
- /*
- * Conn blocked waiting for ksocknal_recv()
- * I change its state (under lock) to signal
- * it can be rescheduled
- */
- conn->ksnc_rx_state = SOCKNAL_RX_PARSE_WAIT;
- } else if (conn->ksnc_rx_ready) {
- /* reschedule for rx */
- list_add_tail(&conn->ksnc_rx_list,
- &sched->kss_rx_conns);
- } else {
- conn->ksnc_rx_scheduled = 0;
- /* drop my ref */
- ksocknal_conn_decref(conn);
- }
-
- did_something = 1;
- }
-
- if (!list_empty(&sched->kss_tx_conns)) {
- LIST_HEAD(zlist);
-
- if (!list_empty(&sched->kss_zombie_noop_txs)) {
- list_add(&zlist, &sched->kss_zombie_noop_txs);
- list_del_init(&sched->kss_zombie_noop_txs);
- }
-
- conn = list_entry(sched->kss_tx_conns.next,
- struct ksock_conn, ksnc_tx_list);
- list_del(&conn->ksnc_tx_list);
-
- LASSERT(conn->ksnc_tx_scheduled);
- LASSERT(conn->ksnc_tx_ready);
- LASSERT(!list_empty(&conn->ksnc_tx_queue));
-
- tx = list_entry(conn->ksnc_tx_queue.next,
- struct ksock_tx, tx_list);
-
- if (conn->ksnc_tx_carrier == tx)
- ksocknal_next_tx_carrier(conn);
-
- /* dequeue now so empty list => more to send */
- list_del(&tx->tx_list);
-
- /*
- * Clear tx_ready in case send isn't complete. Do
- * it BEFORE we call process_transmit, since
- * write_space can set it any time after we release
- * kss_lock.
- */
- conn->ksnc_tx_ready = 0;
- spin_unlock_bh(&sched->kss_lock);
-
- if (!list_empty(&zlist)) {
- /*
- * free zombie noop txs, it's fast because
- * noop txs are just put in freelist
- */
- ksocknal_txlist_done(NULL, &zlist, 0);
- }
-
- rc = ksocknal_process_transmit(conn, tx);
-
- if (rc == -ENOMEM || rc == -EAGAIN) {
- /*
- * Incomplete send: replace tx on HEAD of
- * tx_queue
- */
- spin_lock_bh(&sched->kss_lock);
- list_add(&tx->tx_list, &conn->ksnc_tx_queue);
- } else {
- /* Complete send; tx -ref */
- ksocknal_tx_decref(tx);
-
- spin_lock_bh(&sched->kss_lock);
- /* assume space for more */
- conn->ksnc_tx_ready = 1;
- }
-
- if (rc == -ENOMEM) {
- /*
- * Do nothing; after a short timeout, this
- * conn will be reposted on kss_tx_conns.
- */
- } else if (conn->ksnc_tx_ready &&
- !list_empty(&conn->ksnc_tx_queue)) {
- /* reschedule for tx */
- list_add_tail(&conn->ksnc_tx_list,
- &sched->kss_tx_conns);
- } else {
- conn->ksnc_tx_scheduled = 0;
- /* drop my ref */
- ksocknal_conn_decref(conn);
- }
-
- did_something = 1;
- }
- if (!did_something || /* nothing to do */
- ++nloops == SOCKNAL_RESCHED) { /* hogging CPU? */
- spin_unlock_bh(&sched->kss_lock);
-
- nloops = 0;
-
- if (!did_something) { /* wait for something to do */
- rc = wait_event_interruptible_exclusive(
- sched->kss_waitq,
- !ksocknal_sched_cansleep(sched));
- LASSERT(!rc);
- } else {
- cond_resched();
- }
-
- spin_lock_bh(&sched->kss_lock);
- }
- }
-
- spin_unlock_bh(&sched->kss_lock);
- ksocknal_thread_fini();
- return 0;
-}
-
-/*
- * Add connection to kss_rx_conns of scheduler
- * and wakeup the scheduler.
- */
-void ksocknal_read_callback(struct ksock_conn *conn)
-{
- struct ksock_sched *sched;
-
- sched = conn->ksnc_scheduler;
-
- spin_lock_bh(&sched->kss_lock);
-
- conn->ksnc_rx_ready = 1;
-
- if (!conn->ksnc_rx_scheduled) { /* not being progressed */
- list_add_tail(&conn->ksnc_rx_list, &sched->kss_rx_conns);
- conn->ksnc_rx_scheduled = 1;
- /* extra ref for scheduler */
- ksocknal_conn_addref(conn);
-
- wake_up(&sched->kss_waitq);
- }
- spin_unlock_bh(&sched->kss_lock);
-}
-
-/*
- * Add connection to kss_tx_conns of scheduler
- * and wakeup the scheduler.
- */
-void ksocknal_write_callback(struct ksock_conn *conn)
-{
- struct ksock_sched *sched;
-
- sched = conn->ksnc_scheduler;
-
- spin_lock_bh(&sched->kss_lock);
-
- conn->ksnc_tx_ready = 1;
-
- if (!conn->ksnc_tx_scheduled && /* not being progressed */
- !list_empty(&conn->ksnc_tx_queue)) { /* packets to send */
- list_add_tail(&conn->ksnc_tx_list, &sched->kss_tx_conns);
- conn->ksnc_tx_scheduled = 1;
- /* extra ref for scheduler */
- ksocknal_conn_addref(conn);
-
- wake_up(&sched->kss_waitq);
- }
-
- spin_unlock_bh(&sched->kss_lock);
-}
-
-static struct ksock_proto *
-ksocknal_parse_proto_version(struct ksock_hello_msg *hello)
-{
- __u32 version = 0;
-
- if (hello->kshm_magic == LNET_PROTO_MAGIC)
- version = hello->kshm_version;
- else if (hello->kshm_magic == __swab32(LNET_PROTO_MAGIC))
- version = __swab32(hello->kshm_version);
-
- if (version) {
-#if SOCKNAL_VERSION_DEBUG
- if (*ksocknal_tunables.ksnd_protocol == 1)
- return NULL;
-
- if (*ksocknal_tunables.ksnd_protocol == 2 &&
- version == KSOCK_PROTO_V3)
- return NULL;
-#endif
- if (version == KSOCK_PROTO_V2)
- return &ksocknal_protocol_v2x;
-
- if (version == KSOCK_PROTO_V3)
- return &ksocknal_protocol_v3x;
-
- return NULL;
- }
-
- if (hello->kshm_magic == le32_to_cpu(LNET_PROTO_TCP_MAGIC)) {
- struct lnet_magicversion *hmv = (struct lnet_magicversion *)hello;
-
- BUILD_BUG_ON(sizeof(struct lnet_magicversion) !=
- offsetof(struct ksock_hello_msg, kshm_src_nid));
-
- if (hmv->version_major == cpu_to_le16(KSOCK_PROTO_V1_MAJOR) &&
- hmv->version_minor == cpu_to_le16(KSOCK_PROTO_V1_MINOR))
- return &ksocknal_protocol_v1x;
- }
-
- return NULL;
-}
-
-int
-ksocknal_send_hello(struct lnet_ni *ni, struct ksock_conn *conn,
- lnet_nid_t peer_nid, struct ksock_hello_msg *hello)
-{
- /* CAVEAT EMPTOR: this byte flips 'ipaddrs' */
- struct ksock_net *net = (struct ksock_net *)ni->ni_data;
-
- LASSERT(hello->kshm_nips <= LNET_MAX_INTERFACES);
-
- /* rely on caller to hold a ref on socket so it wouldn't disappear */
- LASSERT(conn->ksnc_proto);
-
- hello->kshm_src_nid = ni->ni_nid;
- hello->kshm_dst_nid = peer_nid;
- hello->kshm_src_pid = the_lnet.ln_pid;
-
- hello->kshm_src_incarnation = net->ksnn_incarnation;
- hello->kshm_ctype = conn->ksnc_type;
-
- return conn->ksnc_proto->pro_send_hello(conn, hello);
-}
-
-static int
-ksocknal_invert_type(int type)
-{
- switch (type) {
- case SOCKLND_CONN_ANY:
- case SOCKLND_CONN_CONTROL:
- return type;
- case SOCKLND_CONN_BULK_IN:
- return SOCKLND_CONN_BULK_OUT;
- case SOCKLND_CONN_BULK_OUT:
- return SOCKLND_CONN_BULK_IN;
- default:
- return SOCKLND_CONN_NONE;
- }
-}
-
-int
-ksocknal_recv_hello(struct lnet_ni *ni, struct ksock_conn *conn,
- struct ksock_hello_msg *hello,
- struct lnet_process_id *peerid,
- __u64 *incarnation)
-{
- /* Return < 0 fatal error
- * 0 success
- * EALREADY lost connection race
- * EPROTO protocol version mismatch
- */
- struct socket *sock = conn->ksnc_sock;
- int active = !!conn->ksnc_proto;
- int timeout;
- int proto_match;
- int rc;
- struct ksock_proto *proto;
- struct lnet_process_id recv_id;
-
- /* socket type set on active connections - not set on passive */
- LASSERT(!active == !(conn->ksnc_type != SOCKLND_CONN_NONE));
-
- timeout = active ? *ksocknal_tunables.ksnd_timeout :
- lnet_acceptor_timeout();
-
- rc = lnet_sock_read(sock, &hello->kshm_magic,
- sizeof(hello->kshm_magic), timeout);
- if (rc) {
- CERROR("Error %d reading HELLO from %pI4h\n",
- rc, &conn->ksnc_ipaddr);
- LASSERT(rc < 0);
- return rc;
- }
-
- if (hello->kshm_magic != LNET_PROTO_MAGIC &&
- hello->kshm_magic != __swab32(LNET_PROTO_MAGIC) &&
- hello->kshm_magic != le32_to_cpu(LNET_PROTO_TCP_MAGIC)) {
- /* Unexpected magic! */
- CERROR("Bad magic(1) %#08x (%#08x expected) from %pI4h\n",
- __cpu_to_le32(hello->kshm_magic),
- LNET_PROTO_TCP_MAGIC,
- &conn->ksnc_ipaddr);
- return -EPROTO;
- }
-
- rc = lnet_sock_read(sock, &hello->kshm_version,
- sizeof(hello->kshm_version), timeout);
- if (rc) {
- CERROR("Error %d reading HELLO from %pI4h\n",
- rc, &conn->ksnc_ipaddr);
- LASSERT(rc < 0);
- return rc;
- }
-
- proto = ksocknal_parse_proto_version(hello);
- if (!proto) {
- if (!active) {
- /* unknown protocol from peer, tell peer my protocol */
- conn->ksnc_proto = &ksocknal_protocol_v3x;
-#if SOCKNAL_VERSION_DEBUG
- if (*ksocknal_tunables.ksnd_protocol == 2)
- conn->ksnc_proto = &ksocknal_protocol_v2x;
- else if (*ksocknal_tunables.ksnd_protocol == 1)
- conn->ksnc_proto = &ksocknal_protocol_v1x;
-#endif
- hello->kshm_nips = 0;
- ksocknal_send_hello(ni, conn, ni->ni_nid, hello);
- }
-
- CERROR("Unknown protocol version (%d.x expected) from %pI4h\n",
- conn->ksnc_proto->pro_version,
- &conn->ksnc_ipaddr);
-
- return -EPROTO;
- }
-
- proto_match = (conn->ksnc_proto == proto);
- conn->ksnc_proto = proto;
-
- /* receive the rest of hello message anyway */
- rc = conn->ksnc_proto->pro_recv_hello(conn, hello, timeout);
- if (rc) {
- CERROR("Error %d reading or checking hello from from %pI4h\n",
- rc, &conn->ksnc_ipaddr);
- LASSERT(rc < 0);
- return rc;
- }
-
- *incarnation = hello->kshm_src_incarnation;
-
- if (hello->kshm_src_nid == LNET_NID_ANY) {
- CERROR("Expecting a HELLO hdr with a NID, but got LNET_NID_ANY from %pI4h\n",
- &conn->ksnc_ipaddr);
- return -EPROTO;
- }
-
- if (!active &&
- conn->ksnc_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
- /* Userspace NAL assigns peer process ID from socket */
- recv_id.pid = conn->ksnc_port | LNET_PID_USERFLAG;
- recv_id.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
- conn->ksnc_ipaddr);
- } else {
- recv_id.nid = hello->kshm_src_nid;
- recv_id.pid = hello->kshm_src_pid;
- }
-
- if (!active) {
- *peerid = recv_id;
-
- /* peer determines type */
- conn->ksnc_type = ksocknal_invert_type(hello->kshm_ctype);
- if (conn->ksnc_type == SOCKLND_CONN_NONE) {
- CERROR("Unexpected type %d from %s ip %pI4h\n",
- hello->kshm_ctype, libcfs_id2str(*peerid),
- &conn->ksnc_ipaddr);
- return -EPROTO;
- }
-
- return 0;
- }
-
- if (peerid->pid != recv_id.pid ||
- peerid->nid != recv_id.nid) {
- LCONSOLE_ERROR_MSG(0x130, "Connected successfully to %s on host %pI4h, but they claimed they were %s; please check your Lustre configuration.\n",
- libcfs_id2str(*peerid),
- &conn->ksnc_ipaddr,
- libcfs_id2str(recv_id));
- return -EPROTO;
- }
-
- if (hello->kshm_ctype == SOCKLND_CONN_NONE) {
- /* Possible protocol mismatch or I lost the connection race */
- return proto_match ? EALREADY : EPROTO;
- }
-
- if (ksocknal_invert_type(hello->kshm_ctype) != conn->ksnc_type) {
- CERROR("Mismatched types: me %d, %s ip %pI4h %d\n",
- conn->ksnc_type, libcfs_id2str(*peerid),
- &conn->ksnc_ipaddr, hello->kshm_ctype);
- return -EPROTO;
- }
-
- return 0;
-}
-
-static int
-ksocknal_connect(struct ksock_route *route)
-{
- LIST_HEAD(zombies);
- struct ksock_peer *peer = route->ksnr_peer;
- int type;
- int wanted;
- struct socket *sock;
- unsigned long deadline;
- int retry_later = 0;
- int rc = 0;
-
- deadline = jiffies + *ksocknal_tunables.ksnd_timeout * HZ;
-
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
- LASSERT(route->ksnr_scheduled);
- LASSERT(!route->ksnr_connecting);
-
- route->ksnr_connecting = 1;
-
- for (;;) {
- wanted = ksocknal_route_mask() & ~route->ksnr_connected;
-
- /*
- * stop connecting if peer/route got closed under me, or
- * route got connected while queued
- */
- if (peer->ksnp_closing || route->ksnr_deleted ||
- !wanted) {
- retry_later = 0;
- break;
- }
-
- /* reschedule if peer is connecting to me */
- if (peer->ksnp_accepting > 0) {
- CDEBUG(D_NET,
- "peer %s(%d) already connecting to me, retry later.\n",
- libcfs_nid2str(peer->ksnp_id.nid),
- peer->ksnp_accepting);
- retry_later = 1;
- }
-
- if (retry_later) /* needs reschedule */
- break;
-
- if (wanted & BIT(SOCKLND_CONN_ANY)) {
- type = SOCKLND_CONN_ANY;
- } else if (wanted & BIT(SOCKLND_CONN_CONTROL)) {
- type = SOCKLND_CONN_CONTROL;
- } else if (wanted & BIT(SOCKLND_CONN_BULK_IN)) {
- type = SOCKLND_CONN_BULK_IN;
- } else {
- LASSERT(wanted & BIT(SOCKLND_CONN_BULK_OUT));
- type = SOCKLND_CONN_BULK_OUT;
- }
-
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
- if (time_after_eq(jiffies, deadline)) {
- rc = -ETIMEDOUT;
- lnet_connect_console_error(rc, peer->ksnp_id.nid,
- route->ksnr_ipaddr,
- route->ksnr_port);
- goto failed;
- }
-
- rc = lnet_connect(&sock, peer->ksnp_id.nid,
- route->ksnr_myipaddr,
- route->ksnr_ipaddr, route->ksnr_port);
- if (rc)
- goto failed;
-
- rc = ksocknal_create_conn(peer->ksnp_ni, route, sock, type);
- if (rc < 0) {
- lnet_connect_console_error(rc, peer->ksnp_id.nid,
- route->ksnr_ipaddr,
- route->ksnr_port);
- goto failed;
- }
-
- /*
- * A +ve RC means I have to retry because I lost the connection
- * race or I have to renegotiate protocol version
- */
- retry_later = (rc);
- if (retry_later)
- CDEBUG(D_NET, "peer %s: conn race, retry later.\n",
- libcfs_nid2str(peer->ksnp_id.nid));
-
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
- }
-
- route->ksnr_scheduled = 0;
- route->ksnr_connecting = 0;
-
- if (retry_later) {
- /*
- * re-queue for attention; this frees me up to handle
- * the peer's incoming connection request
- */
- if (rc == EALREADY ||
- (!rc && peer->ksnp_accepting > 0)) {
- /*
- * We want to introduce a delay before next
- * attempt to connect if we lost conn race,
- * but the race is resolved quickly usually,
- * so min_reconnectms should be good heuristic
- */
- route->ksnr_retry_interval =
- *ksocknal_tunables.ksnd_min_reconnectms * HZ / 1000;
- route->ksnr_timeout = jiffies + route->ksnr_retry_interval;
- }
-
- ksocknal_launch_connection_locked(route);
- }
-
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
- return retry_later;
-
- failed:
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
- route->ksnr_scheduled = 0;
- route->ksnr_connecting = 0;
-
- /* This is a retry rather than a new connection */
- route->ksnr_retry_interval *= 2;
- route->ksnr_retry_interval =
- max(route->ksnr_retry_interval,
- (long)*ksocknal_tunables.ksnd_min_reconnectms * HZ / 1000);
- route->ksnr_retry_interval =
- min(route->ksnr_retry_interval,
- (long)*ksocknal_tunables.ksnd_max_reconnectms * HZ / 1000);
-
- LASSERT(route->ksnr_retry_interval);
- route->ksnr_timeout = jiffies + route->ksnr_retry_interval;
-
- if (!list_empty(&peer->ksnp_tx_queue) &&
- !peer->ksnp_accepting &&
- !ksocknal_find_connecting_route_locked(peer)) {
- struct ksock_conn *conn;
-
- /*
- * ksnp_tx_queue is queued on a conn on successful
- * connection for V1.x and V2.x
- */
- if (!list_empty(&peer->ksnp_conns)) {
- conn = list_entry(peer->ksnp_conns.next,
- struct ksock_conn, ksnc_list);
- LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x);
- }
-
- /*
- * take all the blocked packets while I've got the lock and
- * complete below...
- */
- list_splice_init(&peer->ksnp_tx_queue, &zombies);
- }
-
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
- ksocknal_peer_failed(peer);
- ksocknal_txlist_done(peer->ksnp_ni, &zombies, 1);
- return 0;
-}
-
-/*
- * check whether we need to create more connds.
- * It will try to create new thread if it's necessary, @timeout can
- * be updated if failed to create, so caller wouldn't keep try while
- * running out of resource.
- */
-static int
-ksocknal_connd_check_start(time64_t sec, long *timeout)
-{
- char name[16];
- int rc;
- int total = ksocknal_data.ksnd_connd_starting +
- ksocknal_data.ksnd_connd_running;
-
- if (unlikely(ksocknal_data.ksnd_init < SOCKNAL_INIT_ALL)) {
- /* still in initializing */
- return 0;
- }
-
- if (total >= *ksocknal_tunables.ksnd_nconnds_max ||
- total > ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV) {
- /*
- * can't create more connd, or still have enough
- * threads to handle more connecting
- */
- return 0;
- }
-
- if (list_empty(&ksocknal_data.ksnd_connd_routes)) {
- /* no pending connecting request */
- return 0;
- }
-
- if (sec - ksocknal_data.ksnd_connd_failed_stamp <= 1) {
- /* may run out of resource, retry later */
- *timeout = HZ;
- return 0;
- }
-
- if (ksocknal_data.ksnd_connd_starting > 0) {
- /* serialize starting to avoid flood */
- return 0;
- }
-
- ksocknal_data.ksnd_connd_starting_stamp = sec;
- ksocknal_data.ksnd_connd_starting++;
- spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
-
- /* NB: total is the next id */
- snprintf(name, sizeof(name), "socknal_cd%02d", total);
- rc = ksocknal_thread_start(ksocknal_connd, NULL, name);
-
- spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
- if (!rc)
- return 1;
-
- /* we tried ... */
- LASSERT(ksocknal_data.ksnd_connd_starting > 0);
- ksocknal_data.ksnd_connd_starting--;
- ksocknal_data.ksnd_connd_failed_stamp = ktime_get_real_seconds();
-
- return 1;
-}
-
-/*
- * check whether current thread can exit, it will return 1 if there are too
- * many threads and no creating in past 120 seconds.
- * Also, this function may update @timeout to make caller come back
- * again to recheck these conditions.
- */
-static int
-ksocknal_connd_check_stop(time64_t sec, long *timeout)
-{
- int val;
-
- if (unlikely(ksocknal_data.ksnd_init < SOCKNAL_INIT_ALL)) {
- /* still in initializing */
- return 0;
- }
-
- if (ksocknal_data.ksnd_connd_starting > 0) {
- /* in progress of starting new thread */
- return 0;
- }
-
- if (ksocknal_data.ksnd_connd_running <=
- *ksocknal_tunables.ksnd_nconnds) { /* can't shrink */
- return 0;
- }
-
- /* created thread in past 120 seconds? */
- val = (int)(ksocknal_data.ksnd_connd_starting_stamp +
- SOCKNAL_CONND_TIMEOUT - sec);
-
- *timeout = (val > 0) ? val * HZ :
- SOCKNAL_CONND_TIMEOUT * HZ;
- if (val > 0)
- return 0;
-
- /* no creating in past 120 seconds */
-
- return ksocknal_data.ksnd_connd_running >
- ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV;
-}
-
-/*
- * Go through connd_routes queue looking for a route that we can process
- * right now, @timeout_p can be updated if we need to come back later
- */
-static struct ksock_route *
-ksocknal_connd_get_route_locked(signed long *timeout_p)
-{
- struct ksock_route *route;
- unsigned long now;
-
- now = jiffies;
-
- /* connd_routes can contain both pending and ordinary routes */
- list_for_each_entry(route, &ksocknal_data.ksnd_connd_routes,
- ksnr_connd_list) {
- if (!route->ksnr_retry_interval ||
- time_after_eq(now, route->ksnr_timeout))
- return route;
-
- if (*timeout_p == MAX_SCHEDULE_TIMEOUT ||
- (int)*timeout_p > (int)(route->ksnr_timeout - now))
- *timeout_p = (int)(route->ksnr_timeout - now);
- }
-
- return NULL;
-}
-
-int
-ksocknal_connd(void *arg)
-{
- spinlock_t *connd_lock = &ksocknal_data.ksnd_connd_lock;
- struct ksock_connreq *cr;
- wait_queue_entry_t wait;
- int nloops = 0;
- int cons_retry = 0;
-
- init_waitqueue_entry(&wait, current);
-
- spin_lock_bh(connd_lock);
-
- LASSERT(ksocknal_data.ksnd_connd_starting > 0);
- ksocknal_data.ksnd_connd_starting--;
- ksocknal_data.ksnd_connd_running++;
-
- while (!ksocknal_data.ksnd_shuttingdown) {
- struct ksock_route *route = NULL;
- time64_t sec = ktime_get_real_seconds();
- long timeout = MAX_SCHEDULE_TIMEOUT;
- int dropped_lock = 0;
-
- if (ksocknal_connd_check_stop(sec, &timeout)) {
- /* wakeup another one to check stop */
- wake_up(&ksocknal_data.ksnd_connd_waitq);
- break;
- }
-
- if (ksocknal_connd_check_start(sec, &timeout)) {
- /* created new thread */
- dropped_lock = 1;
- }
-
- if (!list_empty(&ksocknal_data.ksnd_connd_connreqs)) {
- /* Connection accepted by the listener */
- cr = list_entry(ksocknal_data.ksnd_connd_connreqs.next,
- struct ksock_connreq, ksncr_list);
-
- list_del(&cr->ksncr_list);
- spin_unlock_bh(connd_lock);
- dropped_lock = 1;
-
- ksocknal_create_conn(cr->ksncr_ni, NULL,
- cr->ksncr_sock, SOCKLND_CONN_NONE);
- lnet_ni_decref(cr->ksncr_ni);
- kfree(cr);
-
- spin_lock_bh(connd_lock);
- }
-
- /*
- * Only handle an outgoing connection request if there
- * is a thread left to handle incoming connections and
- * create new connd
- */
- if (ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV <
- ksocknal_data.ksnd_connd_running) {
- route = ksocknal_connd_get_route_locked(&timeout);
- }
- if (route) {
- list_del(&route->ksnr_connd_list);
- ksocknal_data.ksnd_connd_connecting++;
- spin_unlock_bh(connd_lock);
- dropped_lock = 1;
-
- if (ksocknal_connect(route)) {
- /* consecutive retry */
- if (cons_retry++ > SOCKNAL_INSANITY_RECONN) {
- CWARN("massive consecutive re-connecting to %pI4h\n",
- &route->ksnr_ipaddr);
- cons_retry = 0;
- }
- } else {
- cons_retry = 0;
- }
-
- ksocknal_route_decref(route);
-
- spin_lock_bh(connd_lock);
- ksocknal_data.ksnd_connd_connecting--;
- }
-
- if (dropped_lock) {
- if (++nloops < SOCKNAL_RESCHED)
- continue;
- spin_unlock_bh(connd_lock);
- nloops = 0;
- cond_resched();
- spin_lock_bh(connd_lock);
- continue;
- }
-
- /* Nothing to do for 'timeout' */
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue_exclusive(&ksocknal_data.ksnd_connd_waitq,
- &wait);
- spin_unlock_bh(connd_lock);
-
- nloops = 0;
- schedule_timeout(timeout);
-
- remove_wait_queue(&ksocknal_data.ksnd_connd_waitq, &wait);
- spin_lock_bh(connd_lock);
- }
- ksocknal_data.ksnd_connd_running--;
- spin_unlock_bh(connd_lock);
-
- ksocknal_thread_fini();
- return 0;
-}
-
-static struct ksock_conn *
-ksocknal_find_timed_out_conn(struct ksock_peer *peer)
-{
- /* We're called with a shared lock on ksnd_global_lock */
- struct ksock_conn *conn;
- struct list_head *ctmp;
-
- list_for_each(ctmp, &peer->ksnp_conns) {
- int error;
-
- conn = list_entry(ctmp, struct ksock_conn, ksnc_list);
-
- /* Don't need the {get,put}connsock dance to deref ksnc_sock */
- LASSERT(!conn->ksnc_closing);
-
- /*
- * SOCK_ERROR will reset error code of socket in
- * some platform (like Darwin8.x)
- */
- error = conn->ksnc_sock->sk->sk_err;
- if (error) {
- ksocknal_conn_addref(conn);
-
- switch (error) {
- case ECONNRESET:
- CNETERR("A connection with %s (%pI4h:%d) was reset; it may have rebooted.\n",
- libcfs_id2str(peer->ksnp_id),
- &conn->ksnc_ipaddr,
- conn->ksnc_port);
- break;
- case ETIMEDOUT:
- CNETERR("A connection with %s (%pI4h:%d) timed out; the network or node may be down.\n",
- libcfs_id2str(peer->ksnp_id),
- &conn->ksnc_ipaddr,
- conn->ksnc_port);
- break;
- default:
- CNETERR("An unexpected network error %d occurred with %s (%pI4h:%d\n",
- error,
- libcfs_id2str(peer->ksnp_id),
- &conn->ksnc_ipaddr,
- conn->ksnc_port);
- break;
- }
-
- return conn;
- }
-
- if (conn->ksnc_rx_started &&
- time_after_eq(jiffies,
- conn->ksnc_rx_deadline)) {
- /* Timed out incomplete incoming message */
- ksocknal_conn_addref(conn);
- CNETERR("Timeout receiving from %s (%pI4h:%d), state %d wanted %zd left %d\n",
- libcfs_id2str(peer->ksnp_id),
- &conn->ksnc_ipaddr,
- conn->ksnc_port,
- conn->ksnc_rx_state,
- iov_iter_count(&conn->ksnc_rx_to),
- conn->ksnc_rx_nob_left);
- return conn;
- }
-
- if ((!list_empty(&conn->ksnc_tx_queue) ||
- conn->ksnc_sock->sk->sk_wmem_queued) &&
- time_after_eq(jiffies,
- conn->ksnc_tx_deadline)) {
- /*
- * Timed out messages queued for sending or
- * buffered in the socket's send buffer
- */
- ksocknal_conn_addref(conn);
- CNETERR("Timeout sending data to %s (%pI4h:%d) the network or that node may be down.\n",
- libcfs_id2str(peer->ksnp_id),
- &conn->ksnc_ipaddr,
- conn->ksnc_port);
- return conn;
- }
- }
-
- return NULL;
-}
-
-static inline void
-ksocknal_flush_stale_txs(struct ksock_peer *peer)
-{
- struct ksock_tx *tx;
- struct ksock_tx *tmp;
- LIST_HEAD(stale_txs);
-
- write_lock_bh(&ksocknal_data.ksnd_global_lock);
-
- list_for_each_entry_safe(tx, tmp, &peer->ksnp_tx_queue, tx_list) {
- if (!time_after_eq(jiffies,
- tx->tx_deadline))
- break;
-
- list_del(&tx->tx_list);
- list_add_tail(&tx->tx_list, &stale_txs);
- }
-
- write_unlock_bh(&ksocknal_data.ksnd_global_lock);
-
- ksocknal_txlist_done(peer->ksnp_ni, &stale_txs, 1);
-}
-
-static int
-ksocknal_send_keepalive_locked(struct ksock_peer *peer)
- __must_hold(&ksocknal_data.ksnd_global_lock)
-{
- struct ksock_sched *sched;
- struct ksock_conn *conn;
- struct ksock_tx *tx;
-
- /* last_alive will be updated by create_conn */
- if (list_empty(&peer->ksnp_conns))
- return 0;
-
- if (peer->ksnp_proto != &ksocknal_protocol_v3x)
- return 0;
-
- if (*ksocknal_tunables.ksnd_keepalive <= 0 ||
- time_before(jiffies,
- peer->ksnp_last_alive + *ksocknal_tunables.ksnd_keepalive * HZ))
- return 0;
-
- if (time_before(jiffies, peer->ksnp_send_keepalive))
- return 0;
-
- /*
- * retry 10 secs later, so we wouldn't put pressure
- * on this peer if we failed to send keepalive this time
- */
- peer->ksnp_send_keepalive = jiffies + 10 * HZ;
-
- conn = ksocknal_find_conn_locked(peer, NULL, 1);
- if (conn) {
- sched = conn->ksnc_scheduler;
-
- spin_lock_bh(&sched->kss_lock);
- if (!list_empty(&conn->ksnc_tx_queue)) {
- spin_unlock_bh(&sched->kss_lock);
- /* there is an queued ACK, don't need keepalive */
- return 0;
- }
-
- spin_unlock_bh(&sched->kss_lock);
- }
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
-
- /* cookie = 1 is reserved for keepalive PING */
- tx = ksocknal_alloc_tx_noop(1, 1);
- if (!tx) {
- read_lock(&ksocknal_data.ksnd_global_lock);
- return -ENOMEM;
- }
-
- if (!ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id)) {
- read_lock(&ksocknal_data.ksnd_global_lock);
- return 1;
- }
-
- ksocknal_free_tx(tx);
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- return -EIO;
-}
-
-static void
-ksocknal_check_peer_timeouts(int idx)
-{
- struct list_head *peers = &ksocknal_data.ksnd_peers[idx];
- struct ksock_peer *peer;
- struct ksock_conn *conn;
- struct ksock_tx *tx;
-
- again:
- /*
- * NB. We expect to have a look at all the peers and not find any
- * connections to time out, so we just use a shared lock while we
- * take a look...
- */
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- list_for_each_entry(peer, peers, ksnp_list) {
- unsigned long deadline = 0;
- struct ksock_tx *tx_stale;
- int resid = 0;
- int n = 0;
-
- if (ksocknal_send_keepalive_locked(peer)) {
- read_unlock(&ksocknal_data.ksnd_global_lock);
- goto again;
- }
-
- conn = ksocknal_find_timed_out_conn(peer);
-
- if (conn) {
- read_unlock(&ksocknal_data.ksnd_global_lock);
-
- ksocknal_close_conn_and_siblings(conn, -ETIMEDOUT);
-
- /*
- * NB we won't find this one again, but we can't
- * just proceed with the next peer, since we dropped
- * ksnd_global_lock and it might be dead already!
- */
- ksocknal_conn_decref(conn);
- goto again;
- }
-
- /*
- * we can't process stale txs right here because we're
- * holding only shared lock
- */
- if (!list_empty(&peer->ksnp_tx_queue)) {
- tx = list_entry(peer->ksnp_tx_queue.next,
- struct ksock_tx, tx_list);
-
- if (time_after_eq(jiffies,
- tx->tx_deadline)) {
- ksocknal_peer_addref(peer);
- read_unlock(&ksocknal_data.ksnd_global_lock);
-
- ksocknal_flush_stale_txs(peer);
-
- ksocknal_peer_decref(peer);
- goto again;
- }
- }
-
- if (list_empty(&peer->ksnp_zc_req_list))
- continue;
-
- tx_stale = NULL;
- spin_lock(&peer->ksnp_lock);
- list_for_each_entry(tx, &peer->ksnp_zc_req_list, tx_zc_list) {
- if (!time_after_eq(jiffies,
- tx->tx_deadline))
- break;
- /* ignore the TX if connection is being closed */
- if (tx->tx_conn->ksnc_closing)
- continue;
- if (!tx_stale)
- tx_stale = tx;
- n++;
- }
-
- if (!tx_stale) {
- spin_unlock(&peer->ksnp_lock);
- continue;
- }
-
- deadline = tx_stale->tx_deadline;
- resid = tx_stale->tx_resid;
- conn = tx_stale->tx_conn;
- ksocknal_conn_addref(conn);
-
- spin_unlock(&peer->ksnp_lock);
- read_unlock(&ksocknal_data.ksnd_global_lock);
-
- CERROR("Total %d stale ZC_REQs for peer %s detected; the oldest(%p) timed out %ld secs ago, resid: %d, wmem: %d\n",
- n, libcfs_nid2str(peer->ksnp_id.nid), tx_stale,
- (jiffies - deadline) / HZ,
- resid, conn->ksnc_sock->sk->sk_wmem_queued);
-
- ksocknal_close_conn_and_siblings(conn, -ETIMEDOUT);
- ksocknal_conn_decref(conn);
- goto again;
- }
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
-}
-
-int
-ksocknal_reaper(void *arg)
-{
- wait_queue_entry_t wait;
- struct ksock_conn *conn;
- struct ksock_sched *sched;
- struct list_head enomem_conns;
- int nenomem_conns;
- long timeout;
- int i;
- int peer_index = 0;
- unsigned long deadline = jiffies;
-
- INIT_LIST_HEAD(&enomem_conns);
- init_waitqueue_entry(&wait, current);
-
- spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
-
- while (!ksocknal_data.ksnd_shuttingdown) {
- if (!list_empty(&ksocknal_data.ksnd_deathrow_conns)) {
- conn = list_entry(ksocknal_data.ksnd_deathrow_conns.next,
- struct ksock_conn, ksnc_list);
- list_del(&conn->ksnc_list);
-
- spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
-
- ksocknal_terminate_conn(conn);
- ksocknal_conn_decref(conn);
-
- spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
- continue;
- }
-
- if (!list_empty(&ksocknal_data.ksnd_zombie_conns)) {
- conn = list_entry(ksocknal_data.ksnd_zombie_conns.next,
- struct ksock_conn, ksnc_list);
- list_del(&conn->ksnc_list);
-
- spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
-
- ksocknal_destroy_conn(conn);
-
- spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
- continue;
- }
-
- if (!list_empty(&ksocknal_data.ksnd_enomem_conns)) {
- list_add(&enomem_conns,
- &ksocknal_data.ksnd_enomem_conns);
- list_del_init(&ksocknal_data.ksnd_enomem_conns);
- }
-
- spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
-
- /* reschedule all the connections that stalled with ENOMEM... */
- nenomem_conns = 0;
- while (!list_empty(&enomem_conns)) {
- conn = list_entry(enomem_conns.next, struct ksock_conn,
- ksnc_tx_list);
- list_del(&conn->ksnc_tx_list);
-
- sched = conn->ksnc_scheduler;
-
- spin_lock_bh(&sched->kss_lock);
-
- LASSERT(conn->ksnc_tx_scheduled);
- conn->ksnc_tx_ready = 1;
- list_add_tail(&conn->ksnc_tx_list,
- &sched->kss_tx_conns);
- wake_up(&sched->kss_waitq);
-
- spin_unlock_bh(&sched->kss_lock);
- nenomem_conns++;
- }
-
- /* careful with the jiffy wrap... */
- while ((timeout = deadline - jiffies) <= 0) {
- const int n = 4;
- const int p = 1;
- int chunk = ksocknal_data.ksnd_peer_hash_size;
-
- /*
- * Time to check for timeouts on a few more peers: I do
- * checks every 'p' seconds on a proportion of the peer
- * table and I need to check every connection 'n' times
- * within a timeout interval, to ensure I detect a
- * timeout on any connection within (n+1)/n times the
- * timeout interval.
- */
- if (*ksocknal_tunables.ksnd_timeout > n * p)
- chunk = (chunk * n * p) /
- *ksocknal_tunables.ksnd_timeout;
- if (!chunk)
- chunk = 1;
-
- for (i = 0; i < chunk; i++) {
- ksocknal_check_peer_timeouts(peer_index);
- peer_index = (peer_index + 1) %
- ksocknal_data.ksnd_peer_hash_size;
- }
-
- deadline = deadline + p * HZ;
- }
-
- if (nenomem_conns) {
- /*
- * Reduce my timeout if I rescheduled ENOMEM conns.
- * This also prevents me getting woken immediately
- * if any go back on my enomem list.
- */
- timeout = SOCKNAL_ENOMEM_RETRY;
- }
- ksocknal_data.ksnd_reaper_waketime = jiffies + timeout;
-
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&ksocknal_data.ksnd_reaper_waitq, &wait);
-
- if (!ksocknal_data.ksnd_shuttingdown &&
- list_empty(&ksocknal_data.ksnd_deathrow_conns) &&
- list_empty(&ksocknal_data.ksnd_zombie_conns))
- schedule_timeout(timeout);
-
- set_current_state(TASK_RUNNING);
- remove_wait_queue(&ksocknal_data.ksnd_reaper_waitq, &wait);
-
- spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
- }
-
- spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
-
- ksocknal_thread_fini();
- return 0;
-}
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
deleted file mode 100644
index 93a02cd6b6b5..000000000000
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
+++ /dev/null
@@ -1,534 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include <linux/highmem.h>
-#include "socklnd.h"
-
-int
-ksocknal_lib_get_conn_addrs(struct ksock_conn *conn)
-{
- int rc = lnet_sock_getaddr(conn->ksnc_sock, 1, &conn->ksnc_ipaddr,
- &conn->ksnc_port);
-
- /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
- LASSERT(!conn->ksnc_closing);
-
- if (rc) {
- CERROR("Error %d getting sock peer IP\n", rc);
- return rc;
- }
-
- rc = lnet_sock_getaddr(conn->ksnc_sock, 0, &conn->ksnc_myipaddr, NULL);
- if (rc) {
- CERROR("Error %d getting sock local IP\n", rc);
- return rc;
- }
-
- return 0;
-}
-
-int
-ksocknal_lib_zc_capable(struct ksock_conn *conn)
-{
- int caps = conn->ksnc_sock->sk->sk_route_caps;
-
- if (conn->ksnc_proto == &ksocknal_protocol_v1x)
- return 0;
-
- /*
- * ZC if the socket supports scatter/gather and doesn't need software
- * checksums
- */
- return ((caps & NETIF_F_SG) && (caps & NETIF_F_CSUM_MASK));
-}
-
-int
-ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx)
-{
- struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
- struct socket *sock = conn->ksnc_sock;
- int nob, i;
-
- if (*ksocknal_tunables.ksnd_enable_csum && /* checksum enabled */
- conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection */
- tx->tx_nob == tx->tx_resid && /* frist sending */
- !tx->tx_msg.ksm_csum) /* not checksummed */
- ksocknal_lib_csum_tx(tx);
-
- for (nob = i = 0; i < tx->tx_niov; i++)
- nob += tx->tx_iov[i].iov_len;
-
- if (!list_empty(&conn->ksnc_tx_queue) ||
- nob < tx->tx_resid)
- msg.msg_flags |= MSG_MORE;
-
- iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC,
- tx->tx_iov, tx->tx_niov, nob);
- return sock_sendmsg(sock, &msg);
-}
-
-int
-ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
-{
- struct socket *sock = conn->ksnc_sock;
- struct bio_vec *kiov = tx->tx_kiov;
- int rc;
- int nob;
-
- /* Not NOOP message */
- LASSERT(tx->tx_lnetmsg);
-
- if (tx->tx_msg.ksm_zc_cookies[0]) {
- /* Zero copy is enabled */
- struct sock *sk = sock->sk;
- struct page *page = kiov->bv_page;
- int offset = kiov->bv_offset;
- int fragsize = kiov->bv_len;
- int msgflg = MSG_DONTWAIT;
-
- CDEBUG(D_NET, "page %p + offset %x for %d\n",
- page, offset, kiov->bv_len);
-
- if (!list_empty(&conn->ksnc_tx_queue) ||
- fragsize < tx->tx_resid)
- msgflg |= MSG_MORE;
-
- if (sk->sk_prot->sendpage) {
- rc = sk->sk_prot->sendpage(sk, page,
- offset, fragsize, msgflg);
- } else {
- rc = tcp_sendpage(sk, page, offset, fragsize, msgflg);
- }
- } else {
- struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
- int i;
-
- for (nob = i = 0; i < tx->tx_nkiov; i++)
- nob += kiov[i].bv_len;
-
- if (!list_empty(&conn->ksnc_tx_queue) ||
- nob < tx->tx_resid)
- msg.msg_flags |= MSG_MORE;
-
- iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC,
- kiov, tx->tx_nkiov, nob);
- rc = sock_sendmsg(sock, &msg);
- }
- return rc;
-}
-
-void
-ksocknal_lib_eager_ack(struct ksock_conn *conn)
-{
- int opt = 1;
- struct socket *sock = conn->ksnc_sock;
-
- /*
- * Remind the socket to ACK eagerly. If I don't, the socket might
- * think I'm about to send something it could piggy-back the ACK
- * on, introducing delay in completing zero-copy sends in my
- * peer.
- */
- kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK, (char *)&opt,
- sizeof(opt));
-}
-
-static int lustre_csum(struct kvec *v, void *context)
-{
- struct ksock_conn *conn = context;
- conn->ksnc_rx_csum = crc32_le(conn->ksnc_rx_csum,
- v->iov_base, v->iov_len);
- return 0;
-}
-
-int
-ksocknal_lib_recv(struct ksock_conn *conn)
-{
- struct msghdr msg = { .msg_iter = conn->ksnc_rx_to };
- __u32 saved_csum;
- int rc;
-
- rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT);
- if (rc <= 0)
- return rc;
-
- saved_csum = conn->ksnc_msg.ksm_csum;
- if (!saved_csum)
- return rc;
-
- /* header is included only in V2 - V3 checksums only the bulk data */
- if (!(conn->ksnc_rx_to.type & ITER_BVEC) &&
- conn->ksnc_proto != &ksocknal_protocol_v2x)
- return rc;
-
- /* accumulate checksum */
- conn->ksnc_msg.ksm_csum = 0;
- iov_iter_for_each_range(&conn->ksnc_rx_to, rc, lustre_csum, conn);
- conn->ksnc_msg.ksm_csum = saved_csum;
-
- return rc;
-}
-
-void
-ksocknal_lib_csum_tx(struct ksock_tx *tx)
-{
- int i;
- __u32 csum;
- void *base;
-
- LASSERT(tx->tx_iov[0].iov_base == &tx->tx_msg);
- LASSERT(tx->tx_conn);
- LASSERT(tx->tx_conn->ksnc_proto == &ksocknal_protocol_v2x);
-
- tx->tx_msg.ksm_csum = 0;
-
- csum = crc32_le(~0, tx->tx_iov[0].iov_base,
- tx->tx_iov[0].iov_len);
-
- if (tx->tx_kiov) {
- for (i = 0; i < tx->tx_nkiov; i++) {
- base = kmap(tx->tx_kiov[i].bv_page) +
- tx->tx_kiov[i].bv_offset;
-
- csum = crc32_le(csum, base, tx->tx_kiov[i].bv_len);
-
- kunmap(tx->tx_kiov[i].bv_page);
- }
- } else {
- for (i = 1; i < tx->tx_niov; i++)
- csum = crc32_le(csum, tx->tx_iov[i].iov_base,
- tx->tx_iov[i].iov_len);
- }
-
- if (*ksocknal_tunables.ksnd_inject_csum_error) {
- csum++;
- *ksocknal_tunables.ksnd_inject_csum_error = 0;
- }
-
- tx->tx_msg.ksm_csum = csum;
-}
-
-int
-ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem,
- int *rxmem, int *nagle)
-{
- struct socket *sock = conn->ksnc_sock;
- int len;
- int rc;
-
- rc = ksocknal_connsock_addref(conn);
- if (rc) {
- LASSERT(conn->ksnc_closing);
- *txmem = *rxmem = *nagle = 0;
- return -ESHUTDOWN;
- }
-
- rc = lnet_sock_getbuf(sock, txmem, rxmem);
- if (!rc) {
- len = sizeof(*nagle);
- rc = kernel_getsockopt(sock, SOL_TCP, TCP_NODELAY,
- (char *)nagle, &len);
- }
-
- ksocknal_connsock_decref(conn);
-
- if (!rc)
- *nagle = !*nagle;
- else
- *txmem = *rxmem = *nagle = 0;
-
- return rc;
-}
-
-int
-ksocknal_lib_setup_sock(struct socket *sock)
-{
- int rc;
- int option;
- int keep_idle;
- int keep_intvl;
- int keep_count;
- int do_keepalive;
- struct linger linger;
-
- sock->sk->sk_allocation = GFP_NOFS;
-
- /*
- * Ensure this socket aborts active sends immediately when we close
- * it.
- */
- linger.l_onoff = 0;
- linger.l_linger = 0;
-
- rc = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, (char *)&linger,
- sizeof(linger));
- if (rc) {
- CERROR("Can't set SO_LINGER: %d\n", rc);
- return rc;
- }
-
- option = -1;
- rc = kernel_setsockopt(sock, SOL_TCP, TCP_LINGER2, (char *)&option,
- sizeof(option));
- if (rc) {
- CERROR("Can't set SO_LINGER2: %d\n", rc);
- return rc;
- }
-
- if (!*ksocknal_tunables.ksnd_nagle) {
- option = 1;
-
- rc = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
- (char *)&option, sizeof(option));
- if (rc) {
- CERROR("Can't disable nagle: %d\n", rc);
- return rc;
- }
- }
-
- rc = lnet_sock_setbuf(sock, *ksocknal_tunables.ksnd_tx_buffer_size,
- *ksocknal_tunables.ksnd_rx_buffer_size);
- if (rc) {
- CERROR("Can't set buffer tx %d, rx %d buffers: %d\n",
- *ksocknal_tunables.ksnd_tx_buffer_size,
- *ksocknal_tunables.ksnd_rx_buffer_size, rc);
- return rc;
- }
-
-/* TCP_BACKOFF_* sockopt tunables unsupported in stock kernels */
-
- /* snapshot tunables */
- keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
- keep_count = *ksocknal_tunables.ksnd_keepalive_count;
- keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
-
- do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
-
- option = (do_keepalive ? 1 : 0);
- rc = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&option,
- sizeof(option));
- if (rc) {
- CERROR("Can't set SO_KEEPALIVE: %d\n", rc);
- return rc;
- }
-
- if (!do_keepalive)
- return 0;
-
- rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, (char *)&keep_idle,
- sizeof(keep_idle));
- if (rc) {
- CERROR("Can't set TCP_KEEPIDLE: %d\n", rc);
- return rc;
- }
-
- rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
- (char *)&keep_intvl, sizeof(keep_intvl));
- if (rc) {
- CERROR("Can't set TCP_KEEPINTVL: %d\n", rc);
- return rc;
- }
-
- rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, (char *)&keep_count,
- sizeof(keep_count));
- if (rc) {
- CERROR("Can't set TCP_KEEPCNT: %d\n", rc);
- return rc;
- }
-
- return 0;
-}
-
-void
-ksocknal_lib_push_conn(struct ksock_conn *conn)
-{
- struct sock *sk;
- struct tcp_sock *tp;
- int nonagle;
- int val = 1;
- int rc;
-
- rc = ksocknal_connsock_addref(conn);
- if (rc) /* being shut down */
- return;
-
- sk = conn->ksnc_sock->sk;
- tp = tcp_sk(sk);
-
- lock_sock(sk);
- nonagle = tp->nonagle;
- tp->nonagle = 1;
- release_sock(sk);
-
- rc = kernel_setsockopt(conn->ksnc_sock, SOL_TCP, TCP_NODELAY,
- (char *)&val, sizeof(val));
- LASSERT(!rc);
-
- lock_sock(sk);
- tp->nonagle = nonagle;
- release_sock(sk);
-
- ksocknal_connsock_decref(conn);
-}
-
-/*
- * socket call back in Linux
- */
-static void
-ksocknal_data_ready(struct sock *sk)
-{
- struct ksock_conn *conn;
-
- /* interleave correctly with closing sockets... */
- LASSERT(!in_irq());
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- conn = sk->sk_user_data;
- if (!conn) { /* raced with ksocknal_terminate_conn */
- LASSERT(sk->sk_data_ready != &ksocknal_data_ready);
- sk->sk_data_ready(sk);
- } else {
- ksocknal_read_callback(conn);
- }
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
-}
-
-static void
-ksocknal_write_space(struct sock *sk)
-{
- struct ksock_conn *conn;
- int wspace;
- int min_wpace;
-
- /* interleave correctly with closing sockets... */
- LASSERT(!in_irq());
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- conn = sk->sk_user_data;
- wspace = sk_stream_wspace(sk);
- min_wpace = sk_stream_min_wspace(sk);
-
- CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n",
- sk, wspace, min_wpace, conn,
- !conn ? "" : (conn->ksnc_tx_ready ?
- " ready" : " blocked"),
- !conn ? "" : (conn->ksnc_tx_scheduled ?
- " scheduled" : " idle"),
- !conn ? "" : (list_empty(&conn->ksnc_tx_queue) ?
- " empty" : " queued"));
-
- if (!conn) { /* raced with ksocknal_terminate_conn */
- LASSERT(sk->sk_write_space != &ksocknal_write_space);
- sk->sk_write_space(sk);
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
- return;
- }
-
- if (wspace >= min_wpace) { /* got enough space */
- ksocknal_write_callback(conn);
-
- /*
- * Clear SOCK_NOSPACE _after_ ksocknal_write_callback so the
- * ENOMEM check in ksocknal_transmit is race-free (think about
- * it).
- */
- clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- }
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
-}
-
-void
-ksocknal_lib_save_callback(struct socket *sock, struct ksock_conn *conn)
-{
- conn->ksnc_saved_data_ready = sock->sk->sk_data_ready;
- conn->ksnc_saved_write_space = sock->sk->sk_write_space;
-}
-
-void
-ksocknal_lib_set_callback(struct socket *sock, struct ksock_conn *conn)
-{
- sock->sk->sk_user_data = conn;
- sock->sk->sk_data_ready = ksocknal_data_ready;
- sock->sk->sk_write_space = ksocknal_write_space;
-}
-
-void
-ksocknal_lib_reset_callback(struct socket *sock, struct ksock_conn *conn)
-{
- /*
- * Remove conn's network callbacks.
- * NB I _have_ to restore the callback, rather than storing a noop,
- * since the socket could survive past this module being unloaded!!
- */
- sock->sk->sk_data_ready = conn->ksnc_saved_data_ready;
- sock->sk->sk_write_space = conn->ksnc_saved_write_space;
-
- /*
- * A callback could be in progress already; they hold a read lock
- * on ksnd_global_lock (to serialise with me) and NOOP if
- * sk_user_data is NULL.
- */
- sock->sk->sk_user_data = NULL;
-}
-
-int
-ksocknal_lib_memory_pressure(struct ksock_conn *conn)
-{
- int rc = 0;
- struct ksock_sched *sched;
-
- sched = conn->ksnc_scheduler;
- spin_lock_bh(&sched->kss_lock);
-
- if (!test_bit(SOCK_NOSPACE, &conn->ksnc_sock->flags) &&
- !conn->ksnc_tx_ready) {
- /*
- * SOCK_NOSPACE is set when the socket fills
- * and cleared in the write_space callback
- * (which also sets ksnc_tx_ready). If
- * SOCK_NOSPACE and ksnc_tx_ready are BOTH
- * zero, I didn't fill the socket and
- * write_space won't reschedule me, so I
- * return -ENOMEM to get my caller to retry
- * after a timeout
- */
- rc = -ENOMEM;
- }
-
- spin_unlock_bh(&sched->kss_lock);
-
- return rc;
-}
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c
deleted file mode 100644
index 5663a4ca94d4..000000000000
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c
+++ /dev/null
@@ -1,184 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-
-#include "socklnd.h"
-
-static int sock_timeout = 50;
-module_param(sock_timeout, int, 0644);
-MODULE_PARM_DESC(sock_timeout, "dead socket timeout (seconds)");
-
-static int credits = 256;
-module_param(credits, int, 0444);
-MODULE_PARM_DESC(credits, "# concurrent sends");
-
-static int peer_credits = 8;
-module_param(peer_credits, int, 0444);
-MODULE_PARM_DESC(peer_credits, "# concurrent sends to 1 peer");
-
-static int peer_buffer_credits;
-module_param(peer_buffer_credits, int, 0444);
-MODULE_PARM_DESC(peer_buffer_credits, "# per-peer router buffer credits");
-
-static int peer_timeout = 180;
-module_param(peer_timeout, int, 0444);
-MODULE_PARM_DESC(peer_timeout, "Seconds without aliveness news to declare peer dead (<=0 to disable)");
-
-/*
- * Number of daemons in each thread pool which is percpt,
- * we will estimate reasonable value based on CPUs if it's not set.
- */
-static unsigned int nscheds;
-module_param(nscheds, int, 0444);
-MODULE_PARM_DESC(nscheds, "# scheduler daemons in each pool while starting");
-
-static int nconnds = 4;
-module_param(nconnds, int, 0444);
-MODULE_PARM_DESC(nconnds, "# connection daemons while starting");
-
-static int nconnds_max = 64;
-module_param(nconnds_max, int, 0444);
-MODULE_PARM_DESC(nconnds_max, "max # connection daemons");
-
-static int min_reconnectms = 1000;
-module_param(min_reconnectms, int, 0644);
-MODULE_PARM_DESC(min_reconnectms, "min connection retry interval (mS)");
-
-static int max_reconnectms = 60000;
-module_param(max_reconnectms, int, 0644);
-MODULE_PARM_DESC(max_reconnectms, "max connection retry interval (mS)");
-
-# define DEFAULT_EAGER_ACK 0
-static int eager_ack = DEFAULT_EAGER_ACK;
-module_param(eager_ack, int, 0644);
-MODULE_PARM_DESC(eager_ack, "send tcp ack packets eagerly");
-
-static int typed_conns = 1;
-module_param(typed_conns, int, 0444);
-MODULE_PARM_DESC(typed_conns, "use different sockets for bulk");
-
-static int min_bulk = 1 << 10;
-module_param(min_bulk, int, 0644);
-MODULE_PARM_DESC(min_bulk, "smallest 'large' message");
-
-# define DEFAULT_BUFFER_SIZE 0
-static int tx_buffer_size = DEFAULT_BUFFER_SIZE;
-module_param(tx_buffer_size, int, 0644);
-MODULE_PARM_DESC(tx_buffer_size, "socket tx buffer size (0 for system default)");
-
-static int rx_buffer_size = DEFAULT_BUFFER_SIZE;
-module_param(rx_buffer_size, int, 0644);
-MODULE_PARM_DESC(rx_buffer_size, "socket rx buffer size (0 for system default)");
-
-static int nagle;
-module_param(nagle, int, 0644);
-MODULE_PARM_DESC(nagle, "enable NAGLE?");
-
-static int round_robin = 1;
-module_param(round_robin, int, 0644);
-MODULE_PARM_DESC(round_robin, "Round robin for multiple interfaces");
-
-static int keepalive = 30;
-module_param(keepalive, int, 0644);
-MODULE_PARM_DESC(keepalive, "# seconds before send keepalive");
-
-static int keepalive_idle = 30;
-module_param(keepalive_idle, int, 0644);
-MODULE_PARM_DESC(keepalive_idle, "# idle seconds before probe");
-
-#define DEFAULT_KEEPALIVE_COUNT 5
-static int keepalive_count = DEFAULT_KEEPALIVE_COUNT;
-module_param(keepalive_count, int, 0644);
-MODULE_PARM_DESC(keepalive_count, "# missed probes == dead");
-
-static int keepalive_intvl = 5;
-module_param(keepalive_intvl, int, 0644);
-MODULE_PARM_DESC(keepalive_intvl, "seconds between probes");
-
-static int enable_csum;
-module_param(enable_csum, int, 0644);
-MODULE_PARM_DESC(enable_csum, "enable check sum");
-
-static int inject_csum_error;
-module_param(inject_csum_error, int, 0644);
-MODULE_PARM_DESC(inject_csum_error, "set non-zero to inject a checksum error");
-
-static int nonblk_zcack = 1;
-module_param(nonblk_zcack, int, 0644);
-MODULE_PARM_DESC(nonblk_zcack, "always send ZC-ACK on non-blocking connection");
-
-static unsigned int zc_min_payload = 16 << 10;
-module_param(zc_min_payload, int, 0644);
-MODULE_PARM_DESC(zc_min_payload, "minimum payload size to zero copy");
-
-static unsigned int zc_recv;
-module_param(zc_recv, int, 0644);
-MODULE_PARM_DESC(zc_recv, "enable ZC recv for Chelsio driver");
-
-static unsigned int zc_recv_min_nfrags = 16;
-module_param(zc_recv_min_nfrags, int, 0644);
-MODULE_PARM_DESC(zc_recv_min_nfrags, "minimum # of fragments to enable ZC recv");
-
-#if SOCKNAL_VERSION_DEBUG
-static int protocol = 3;
-module_param(protocol, int, 0644);
-MODULE_PARM_DESC(protocol, "protocol version");
-#endif
-
-struct ksock_tunables ksocknal_tunables;
-
-int ksocknal_tunables_init(void)
-{
- /* initialize ksocknal_tunables structure */
- ksocknal_tunables.ksnd_timeout = &sock_timeout;
- ksocknal_tunables.ksnd_nscheds = &nscheds;
- ksocknal_tunables.ksnd_nconnds = &nconnds;
- ksocknal_tunables.ksnd_nconnds_max = &nconnds_max;
- ksocknal_tunables.ksnd_min_reconnectms = &min_reconnectms;
- ksocknal_tunables.ksnd_max_reconnectms = &max_reconnectms;
- ksocknal_tunables.ksnd_eager_ack = &eager_ack;
- ksocknal_tunables.ksnd_typed_conns = &typed_conns;
- ksocknal_tunables.ksnd_min_bulk = &min_bulk;
- ksocknal_tunables.ksnd_tx_buffer_size = &tx_buffer_size;
- ksocknal_tunables.ksnd_rx_buffer_size = &rx_buffer_size;
- ksocknal_tunables.ksnd_nagle = &nagle;
- ksocknal_tunables.ksnd_round_robin = &round_robin;
- ksocknal_tunables.ksnd_keepalive = &keepalive;
- ksocknal_tunables.ksnd_keepalive_idle = &keepalive_idle;
- ksocknal_tunables.ksnd_keepalive_count = &keepalive_count;
- ksocknal_tunables.ksnd_keepalive_intvl = &keepalive_intvl;
- ksocknal_tunables.ksnd_credits = &credits;
- ksocknal_tunables.ksnd_peertxcredits = &peer_credits;
- ksocknal_tunables.ksnd_peerrtrcredits = &peer_buffer_credits;
- ksocknal_tunables.ksnd_peertimeout = &peer_timeout;
- ksocknal_tunables.ksnd_enable_csum = &enable_csum;
- ksocknal_tunables.ksnd_inject_csum_error = &inject_csum_error;
- ksocknal_tunables.ksnd_nonblk_zcack = &nonblk_zcack;
- ksocknal_tunables.ksnd_zc_min_payload = &zc_min_payload;
- ksocknal_tunables.ksnd_zc_recv = &zc_recv;
- ksocknal_tunables.ksnd_zc_recv_min_nfrags = &zc_recv_min_nfrags;
-
-#if SOCKNAL_VERSION_DEBUG
- ksocknal_tunables.ksnd_protocol = &protocol;
-#endif
-
- if (*ksocknal_tunables.ksnd_zc_min_payload < (2 << 10))
- *ksocknal_tunables.ksnd_zc_min_payload = 2 << 10;
-
- return 0;
-};
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c
deleted file mode 100644
index 05982dac781c..000000000000
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c
+++ /dev/null
@@ -1,810 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2012, Intel Corporation.
- *
- * Author: Zach Brown <zab@zabbo.net>
- * Author: Peter J. Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-
-#include "socklnd.h"
-
-/*
- * Protocol entries :
- * pro_send_hello : send hello message
- * pro_recv_hello : receive hello message
- * pro_pack : pack message header
- * pro_unpack : unpack message header
- * pro_queue_tx_zcack() : Called holding BH lock: kss_lock
- * return 1 if ACK is piggybacked, otherwise return 0
- * pro_queue_tx_msg() : Called holding BH lock: kss_lock
- * return the ACK that piggybacked by my message, or NULL
- * pro_handle_zcreq() : handler of incoming ZC-REQ
- * pro_handle_zcack() : handler of incoming ZC-ACK
- * pro_match_tx() : Called holding glock
- */
-
-static struct ksock_tx *
-ksocknal_queue_tx_msg_v1(struct ksock_conn *conn, struct ksock_tx *tx_msg)
-{
- /* V1.x, just enqueue it */
- list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
- return NULL;
-}
-
-void
-ksocknal_next_tx_carrier(struct ksock_conn *conn)
-{
- struct ksock_tx *tx = conn->ksnc_tx_carrier;
-
- /* Called holding BH lock: conn->ksnc_scheduler->kss_lock */
- LASSERT(!list_empty(&conn->ksnc_tx_queue));
- LASSERT(tx);
-
- /* Next TX that can carry ZC-ACK or LNet message */
- if (tx->tx_list.next == &conn->ksnc_tx_queue) {
- /* no more packets queued */
- conn->ksnc_tx_carrier = NULL;
- } else {
- conn->ksnc_tx_carrier = list_next_entry(tx, tx_list);
- LASSERT(conn->ksnc_tx_carrier->tx_msg.ksm_type == tx->tx_msg.ksm_type);
- }
-}
-
-static int
-ksocknal_queue_tx_zcack_v2(struct ksock_conn *conn,
- struct ksock_tx *tx_ack, __u64 cookie)
-{
- struct ksock_tx *tx = conn->ksnc_tx_carrier;
-
- LASSERT(!tx_ack ||
- tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
-
- /*
- * Enqueue or piggyback tx_ack / cookie
- * . no tx can piggyback cookie of tx_ack (or cookie), just
- * enqueue the tx_ack (if tx_ack != NUL) and return NULL.
- * . There is tx can piggyback cookie of tx_ack (or cookie),
- * piggyback the cookie and return the tx.
- */
- if (!tx) {
- if (tx_ack) {
- list_add_tail(&tx_ack->tx_list,
- &conn->ksnc_tx_queue);
- conn->ksnc_tx_carrier = tx_ack;
- }
- return 0;
- }
-
- if (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP) {
- /* tx is noop zc-ack, can't piggyback zc-ack cookie */
- if (tx_ack)
- list_add_tail(&tx_ack->tx_list,
- &conn->ksnc_tx_queue);
- return 0;
- }
-
- LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_LNET);
- LASSERT(!tx->tx_msg.ksm_zc_cookies[1]);
-
- if (tx_ack)
- cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
-
- /* piggyback the zc-ack cookie */
- tx->tx_msg.ksm_zc_cookies[1] = cookie;
- /* move on to the next TX which can carry cookie */
- ksocknal_next_tx_carrier(conn);
-
- return 1;
-}
-
-static struct ksock_tx *
-ksocknal_queue_tx_msg_v2(struct ksock_conn *conn, struct ksock_tx *tx_msg)
-{
- struct ksock_tx *tx = conn->ksnc_tx_carrier;
-
- /*
- * Enqueue tx_msg:
- * . If there is no NOOP on the connection, just enqueue
- * tx_msg and return NULL
- * . If there is NOOP on the connection, piggyback the cookie
- * and replace the NOOP tx, and return the NOOP tx.
- */
- if (!tx) { /* nothing on queue */
- list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
- conn->ksnc_tx_carrier = tx_msg;
- return NULL;
- }
-
- if (tx->tx_msg.ksm_type == KSOCK_MSG_LNET) { /* nothing to carry */
- list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
- return NULL;
- }
-
- LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
-
- /* There is a noop zc-ack can be piggybacked */
- tx_msg->tx_msg.ksm_zc_cookies[1] = tx->tx_msg.ksm_zc_cookies[1];
- ksocknal_next_tx_carrier(conn);
-
- /* use new_tx to replace the noop zc-ack packet */
- list_add(&tx_msg->tx_list, &tx->tx_list);
- list_del(&tx->tx_list);
-
- return tx;
-}
-
-static int
-ksocknal_queue_tx_zcack_v3(struct ksock_conn *conn,
- struct ksock_tx *tx_ack, __u64 cookie)
-{
- struct ksock_tx *tx;
-
- if (conn->ksnc_type != SOCKLND_CONN_ACK)
- return ksocknal_queue_tx_zcack_v2(conn, tx_ack, cookie);
-
- /* non-blocking ZC-ACK (to router) */
- LASSERT(!tx_ack ||
- tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
-
- tx = conn->ksnc_tx_carrier;
- if (!tx) {
- if (tx_ack) {
- list_add_tail(&tx_ack->tx_list,
- &conn->ksnc_tx_queue);
- conn->ksnc_tx_carrier = tx_ack;
- }
- return 0;
- }
-
- /* conn->ksnc_tx_carrier */
-
- if (tx_ack)
- cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
-
- if (cookie == SOCKNAL_KEEPALIVE_PING) /* ignore keepalive PING */
- return 1;
-
- if (tx->tx_msg.ksm_zc_cookies[1] == SOCKNAL_KEEPALIVE_PING) {
- /* replace the keepalive PING with a real ACK */
- LASSERT(!tx->tx_msg.ksm_zc_cookies[0]);
- tx->tx_msg.ksm_zc_cookies[1] = cookie;
- return 1;
- }
-
- if (cookie == tx->tx_msg.ksm_zc_cookies[0] ||
- cookie == tx->tx_msg.ksm_zc_cookies[1]) {
- CWARN("%s: duplicated ZC cookie: %llu\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
- return 1; /* XXX return error in the future */
- }
-
- if (!tx->tx_msg.ksm_zc_cookies[0]) {
- /*
- * NOOP tx has only one ZC-ACK cookie,
- * can carry at least one more
- */
- if (tx->tx_msg.ksm_zc_cookies[1] > cookie) {
- tx->tx_msg.ksm_zc_cookies[0] = tx->tx_msg.ksm_zc_cookies[1];
- tx->tx_msg.ksm_zc_cookies[1] = cookie;
- } else {
- tx->tx_msg.ksm_zc_cookies[0] = cookie;
- }
-
- if (tx->tx_msg.ksm_zc_cookies[0] - tx->tx_msg.ksm_zc_cookies[1] > 2) {
- /*
- * not likely to carry more ACKs, skip it
- * to simplify logic
- */
- ksocknal_next_tx_carrier(conn);
- }
-
- return 1;
- }
-
- /* takes two or more cookies already */
-
- if (tx->tx_msg.ksm_zc_cookies[0] > tx->tx_msg.ksm_zc_cookies[1]) {
- __u64 tmp = 0;
-
- /* two separated cookies: (a+2, a) or (a+1, a) */
- LASSERT(tx->tx_msg.ksm_zc_cookies[0] -
- tx->tx_msg.ksm_zc_cookies[1] <= 2);
-
- if (tx->tx_msg.ksm_zc_cookies[0] -
- tx->tx_msg.ksm_zc_cookies[1] == 2) {
- if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1)
- tmp = cookie;
- } else if (cookie == tx->tx_msg.ksm_zc_cookies[1] - 1) {
- tmp = tx->tx_msg.ksm_zc_cookies[1];
- } else if (cookie == tx->tx_msg.ksm_zc_cookies[0] + 1) {
- tmp = tx->tx_msg.ksm_zc_cookies[0];
- }
-
- if (tmp) {
- /* range of cookies */
- tx->tx_msg.ksm_zc_cookies[0] = tmp - 1;
- tx->tx_msg.ksm_zc_cookies[1] = tmp + 1;
- return 1;
- }
-
- } else {
- /*
- * ksm_zc_cookies[0] < ksm_zc_cookies[1],
- * it is range of cookies
- */
- if (cookie >= tx->tx_msg.ksm_zc_cookies[0] &&
- cookie <= tx->tx_msg.ksm_zc_cookies[1]) {
- CWARN("%s: duplicated ZC cookie: %llu\n",
- libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
- return 1; /* XXX: return error in the future */
- }
-
- if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) {
- tx->tx_msg.ksm_zc_cookies[1] = cookie;
- return 1;
- }
-
- if (cookie == tx->tx_msg.ksm_zc_cookies[0] - 1) {
- tx->tx_msg.ksm_zc_cookies[0] = cookie;
- return 1;
- }
- }
-
- /* failed to piggyback ZC-ACK */
- if (tx_ack) {
- list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue);
- /* the next tx can piggyback at least 1 ACK */
- ksocknal_next_tx_carrier(conn);
- }
-
- return 0;
-}
-
-static int
-ksocknal_match_tx(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk)
-{
- int nob;
-
-#if SOCKNAL_VERSION_DEBUG
- if (!*ksocknal_tunables.ksnd_typed_conns)
- return SOCKNAL_MATCH_YES;
-#endif
-
- if (!tx || !tx->tx_lnetmsg) {
- /* noop packet */
- nob = offsetof(struct ksock_msg, ksm_u);
- } else {
- nob = tx->tx_lnetmsg->msg_len +
- ((conn->ksnc_proto == &ksocknal_protocol_v1x) ?
- sizeof(struct lnet_hdr) : sizeof(struct ksock_msg));
- }
-
- /* default checking for typed connection */
- switch (conn->ksnc_type) {
- default:
- CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
- LBUG();
- case SOCKLND_CONN_ANY:
- return SOCKNAL_MATCH_YES;
-
- case SOCKLND_CONN_BULK_IN:
- return SOCKNAL_MATCH_MAY;
-
- case SOCKLND_CONN_BULK_OUT:
- if (nob < *ksocknal_tunables.ksnd_min_bulk)
- return SOCKNAL_MATCH_MAY;
- else
- return SOCKNAL_MATCH_YES;
-
- case SOCKLND_CONN_CONTROL:
- if (nob >= *ksocknal_tunables.ksnd_min_bulk)
- return SOCKNAL_MATCH_MAY;
- else
- return SOCKNAL_MATCH_YES;
- }
-}
-
-static int
-ksocknal_match_tx_v3(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk)
-{
- int nob;
-
- if (!tx || !tx->tx_lnetmsg)
- nob = offsetof(struct ksock_msg, ksm_u);
- else
- nob = tx->tx_lnetmsg->msg_len + sizeof(struct ksock_msg);
-
- switch (conn->ksnc_type) {
- default:
- CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
- LBUG();
- case SOCKLND_CONN_ANY:
- return SOCKNAL_MATCH_NO;
-
- case SOCKLND_CONN_ACK:
- if (nonblk)
- return SOCKNAL_MATCH_YES;
- else if (!tx || !tx->tx_lnetmsg)
- return SOCKNAL_MATCH_MAY;
- else
- return SOCKNAL_MATCH_NO;
-
- case SOCKLND_CONN_BULK_OUT:
- if (nonblk)
- return SOCKNAL_MATCH_NO;
- else if (nob < *ksocknal_tunables.ksnd_min_bulk)
- return SOCKNAL_MATCH_MAY;
- else
- return SOCKNAL_MATCH_YES;
-
- case SOCKLND_CONN_CONTROL:
- if (nonblk)
- return SOCKNAL_MATCH_NO;
- else if (nob >= *ksocknal_tunables.ksnd_min_bulk)
- return SOCKNAL_MATCH_MAY;
- else
- return SOCKNAL_MATCH_YES;
- }
-}
-
-/* (Sink) handle incoming ZC request from sender */
-static int
-ksocknal_handle_zcreq(struct ksock_conn *c, __u64 cookie, int remote)
-{
- struct ksock_peer *peer = c->ksnc_peer;
- struct ksock_conn *conn;
- struct ksock_tx *tx;
- int rc;
-
- read_lock(&ksocknal_data.ksnd_global_lock);
-
- conn = ksocknal_find_conn_locked(peer, NULL, !!remote);
- if (conn) {
- struct ksock_sched *sched = conn->ksnc_scheduler;
-
- LASSERT(conn->ksnc_proto->pro_queue_tx_zcack);
-
- spin_lock_bh(&sched->kss_lock);
-
- rc = conn->ksnc_proto->pro_queue_tx_zcack(conn, NULL, cookie);
-
- spin_unlock_bh(&sched->kss_lock);
-
- if (rc) { /* piggybacked */
- read_unlock(&ksocknal_data.ksnd_global_lock);
- return 0;
- }
- }
-
- read_unlock(&ksocknal_data.ksnd_global_lock);
-
- /* ACK connection is not ready, or can't piggyback the ACK */
- tx = ksocknal_alloc_tx_noop(cookie, !!remote);
- if (!tx)
- return -ENOMEM;
-
- rc = ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id);
- if (!rc)
- return 0;
-
- ksocknal_free_tx(tx);
- return rc;
-}
-
-/* (Sender) handle ZC_ACK from sink */
-static int
-ksocknal_handle_zcack(struct ksock_conn *conn, __u64 cookie1, __u64 cookie2)
-{
- struct ksock_peer *peer = conn->ksnc_peer;
- struct ksock_tx *tx;
- struct ksock_tx *temp;
- struct ksock_tx *tmp;
- LIST_HEAD(zlist);
- int count;
-
- if (!cookie1)
- cookie1 = cookie2;
-
- count = (cookie1 > cookie2) ? 2 : (cookie2 - cookie1 + 1);
-
- if (cookie2 == SOCKNAL_KEEPALIVE_PING &&
- conn->ksnc_proto == &ksocknal_protocol_v3x) {
- /* keepalive PING for V3.x, just ignore it */
- return count == 1 ? 0 : -EPROTO;
- }
-
- spin_lock(&peer->ksnp_lock);
-
- list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list,
- tx_zc_list) {
- __u64 c = tx->tx_msg.ksm_zc_cookies[0];
-
- if (c == cookie1 || c == cookie2 ||
- (cookie1 < c && c < cookie2)) {
- tx->tx_msg.ksm_zc_cookies[0] = 0;
- list_del(&tx->tx_zc_list);
- list_add(&tx->tx_zc_list, &zlist);
-
- if (!--count)
- break;
- }
- }
-
- spin_unlock(&peer->ksnp_lock);
-
- list_for_each_entry_safe(tx, temp, &zlist, tx_zc_list) {
- list_del(&tx->tx_zc_list);
- ksocknal_tx_decref(tx);
- }
-
- return !count ? 0 : -EPROTO;
-}
-
-static int
-ksocknal_send_hello_v1(struct ksock_conn *conn, struct ksock_hello_msg *hello)
-{
- struct socket *sock = conn->ksnc_sock;
- struct lnet_hdr *hdr;
- struct lnet_magicversion *hmv;
- int rc;
- int i;
-
- BUILD_BUG_ON(sizeof(struct lnet_magicversion) != offsetof(struct lnet_hdr, src_nid));
-
- hdr = kzalloc(sizeof(*hdr), GFP_NOFS);
- if (!hdr) {
- CERROR("Can't allocate struct lnet_hdr\n");
- return -ENOMEM;
- }
-
- hmv = (struct lnet_magicversion *)&hdr->dest_nid;
-
- /*
- * Re-organize V2.x message header to V1.x (struct lnet_hdr)
- * header and send out
- */
- hmv->magic = cpu_to_le32(LNET_PROTO_TCP_MAGIC);
- hmv->version_major = cpu_to_le16(KSOCK_PROTO_V1_MAJOR);
- hmv->version_minor = cpu_to_le16(KSOCK_PROTO_V1_MINOR);
-
- if (the_lnet.ln_testprotocompat) {
- /* single-shot proto check */
- LNET_LOCK();
- if (the_lnet.ln_testprotocompat & 1) {
- hmv->version_major++; /* just different! */
- the_lnet.ln_testprotocompat &= ~1;
- }
- if (the_lnet.ln_testprotocompat & 2) {
- hmv->magic = LNET_PROTO_MAGIC;
- the_lnet.ln_testprotocompat &= ~2;
- }
- LNET_UNLOCK();
- }
-
- hdr->src_nid = cpu_to_le64(hello->kshm_src_nid);
- hdr->src_pid = cpu_to_le32(hello->kshm_src_pid);
- hdr->type = cpu_to_le32(LNET_MSG_HELLO);
- hdr->payload_length = cpu_to_le32(hello->kshm_nips * sizeof(__u32));
- hdr->msg.hello.type = cpu_to_le32(hello->kshm_ctype);
- hdr->msg.hello.incarnation = cpu_to_le64(hello->kshm_src_incarnation);
-
- rc = lnet_sock_write(sock, hdr, sizeof(*hdr), lnet_acceptor_timeout());
- if (rc) {
- CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n",
- rc, &conn->ksnc_ipaddr, conn->ksnc_port);
- goto out;
- }
-
- if (!hello->kshm_nips)
- goto out;
-
- for (i = 0; i < (int)hello->kshm_nips; i++)
- hello->kshm_ips[i] = __cpu_to_le32(hello->kshm_ips[i]);
-
- rc = lnet_sock_write(sock, hello->kshm_ips,
- hello->kshm_nips * sizeof(__u32),
- lnet_acceptor_timeout());
- if (rc) {
- CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n",
- rc, hello->kshm_nips,
- &conn->ksnc_ipaddr, conn->ksnc_port);
- }
-out:
- kfree(hdr);
-
- return rc;
-}
-
-static int
-ksocknal_send_hello_v2(struct ksock_conn *conn, struct ksock_hello_msg *hello)
-{
- struct socket *sock = conn->ksnc_sock;
- int rc;
-
- hello->kshm_magic = LNET_PROTO_MAGIC;
- hello->kshm_version = conn->ksnc_proto->pro_version;
-
- if (the_lnet.ln_testprotocompat) {
- /* single-shot proto check */
- LNET_LOCK();
- if (the_lnet.ln_testprotocompat & 1) {
- hello->kshm_version++; /* just different! */
- the_lnet.ln_testprotocompat &= ~1;
- }
- LNET_UNLOCK();
- }
-
- rc = lnet_sock_write(sock, hello, offsetof(struct ksock_hello_msg, kshm_ips),
- lnet_acceptor_timeout());
- if (rc) {
- CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n",
- rc, &conn->ksnc_ipaddr, conn->ksnc_port);
- return rc;
- }
-
- if (!hello->kshm_nips)
- return 0;
-
- rc = lnet_sock_write(sock, hello->kshm_ips,
- hello->kshm_nips * sizeof(__u32),
- lnet_acceptor_timeout());
- if (rc) {
- CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n",
- rc, hello->kshm_nips,
- &conn->ksnc_ipaddr, conn->ksnc_port);
- }
-
- return rc;
-}
-
-static int
-ksocknal_recv_hello_v1(struct ksock_conn *conn, struct ksock_hello_msg *hello,
- int timeout)
-{
- struct socket *sock = conn->ksnc_sock;
- struct lnet_hdr *hdr;
- int rc;
- int i;
-
- hdr = kzalloc(sizeof(*hdr), GFP_NOFS);
- if (!hdr) {
- CERROR("Can't allocate struct lnet_hdr\n");
- return -ENOMEM;
- }
-
- rc = lnet_sock_read(sock, &hdr->src_nid,
- sizeof(*hdr) - offsetof(struct lnet_hdr, src_nid),
- timeout);
- if (rc) {
- CERROR("Error %d reading rest of HELLO hdr from %pI4h\n",
- rc, &conn->ksnc_ipaddr);
- LASSERT(rc < 0 && rc != -EALREADY);
- goto out;
- }
-
- /* ...and check we got what we expected */
- if (hdr->type != cpu_to_le32(LNET_MSG_HELLO)) {
- CERROR("Expecting a HELLO hdr, but got type %d from %pI4h\n",
- le32_to_cpu(hdr->type),
- &conn->ksnc_ipaddr);
- rc = -EPROTO;
- goto out;
- }
-
- hello->kshm_src_nid = le64_to_cpu(hdr->src_nid);
- hello->kshm_src_pid = le32_to_cpu(hdr->src_pid);
- hello->kshm_src_incarnation = le64_to_cpu(hdr->msg.hello.incarnation);
- hello->kshm_ctype = le32_to_cpu(hdr->msg.hello.type);
- hello->kshm_nips = le32_to_cpu(hdr->payload_length) /
- sizeof(__u32);
-
- if (hello->kshm_nips > LNET_MAX_INTERFACES) {
- CERROR("Bad nips %d from ip %pI4h\n",
- hello->kshm_nips, &conn->ksnc_ipaddr);
- rc = -EPROTO;
- goto out;
- }
-
- if (!hello->kshm_nips)
- goto out;
-
- rc = lnet_sock_read(sock, hello->kshm_ips,
- hello->kshm_nips * sizeof(__u32), timeout);
- if (rc) {
- CERROR("Error %d reading IPs from ip %pI4h\n",
- rc, &conn->ksnc_ipaddr);
- LASSERT(rc < 0 && rc != -EALREADY);
- goto out;
- }
-
- for (i = 0; i < (int)hello->kshm_nips; i++) {
- hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]);
-
- if (!hello->kshm_ips[i]) {
- CERROR("Zero IP[%d] from ip %pI4h\n",
- i, &conn->ksnc_ipaddr);
- rc = -EPROTO;
- break;
- }
- }
-out:
- kfree(hdr);
-
- return rc;
-}
-
-static int
-ksocknal_recv_hello_v2(struct ksock_conn *conn, struct ksock_hello_msg *hello,
- int timeout)
-{
- struct socket *sock = conn->ksnc_sock;
- int rc;
- int i;
-
- if (hello->kshm_magic == LNET_PROTO_MAGIC)
- conn->ksnc_flip = 0;
- else
- conn->ksnc_flip = 1;
-
- rc = lnet_sock_read(sock, &hello->kshm_src_nid,
- offsetof(struct ksock_hello_msg, kshm_ips) -
- offsetof(struct ksock_hello_msg, kshm_src_nid),
- timeout);
- if (rc) {
- CERROR("Error %d reading HELLO from %pI4h\n",
- rc, &conn->ksnc_ipaddr);
- LASSERT(rc < 0 && rc != -EALREADY);
- return rc;
- }
-
- if (conn->ksnc_flip) {
- __swab32s(&hello->kshm_src_pid);
- __swab64s(&hello->kshm_src_nid);
- __swab32s(&hello->kshm_dst_pid);
- __swab64s(&hello->kshm_dst_nid);
- __swab64s(&hello->kshm_src_incarnation);
- __swab64s(&hello->kshm_dst_incarnation);
- __swab32s(&hello->kshm_ctype);
- __swab32s(&hello->kshm_nips);
- }
-
- if (hello->kshm_nips > LNET_MAX_INTERFACES) {
- CERROR("Bad nips %d from ip %pI4h\n",
- hello->kshm_nips, &conn->ksnc_ipaddr);
- return -EPROTO;
- }
-
- if (!hello->kshm_nips)
- return 0;
-
- rc = lnet_sock_read(sock, hello->kshm_ips,
- hello->kshm_nips * sizeof(__u32), timeout);
- if (rc) {
- CERROR("Error %d reading IPs from ip %pI4h\n",
- rc, &conn->ksnc_ipaddr);
- LASSERT(rc < 0 && rc != -EALREADY);
- return rc;
- }
-
- for (i = 0; i < (int)hello->kshm_nips; i++) {
- if (conn->ksnc_flip)
- __swab32s(&hello->kshm_ips[i]);
-
- if (!hello->kshm_ips[i]) {
- CERROR("Zero IP[%d] from ip %pI4h\n",
- i, &conn->ksnc_ipaddr);
- return -EPROTO;
- }
- }
-
- return 0;
-}
-
-static void
-ksocknal_pack_msg_v1(struct ksock_tx *tx)
-{
- /* V1.x has no KSOCK_MSG_NOOP */
- LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
- LASSERT(tx->tx_lnetmsg);
-
- tx->tx_iov[0].iov_base = &tx->tx_lnetmsg->msg_hdr;
- tx->tx_iov[0].iov_len = sizeof(struct lnet_hdr);
-
- tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(struct lnet_hdr);
- tx->tx_resid = tx->tx_lnetmsg->msg_len + sizeof(struct lnet_hdr);
-}
-
-static void
-ksocknal_pack_msg_v2(struct ksock_tx *tx)
-{
- tx->tx_iov[0].iov_base = &tx->tx_msg;
-
- if (tx->tx_lnetmsg) {
- LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
-
- tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = tx->tx_lnetmsg->msg_hdr;
- tx->tx_iov[0].iov_len = sizeof(struct ksock_msg);
- tx->tx_nob = sizeof(struct ksock_msg) + tx->tx_lnetmsg->msg_len;
- tx->tx_resid = sizeof(struct ksock_msg) + tx->tx_lnetmsg->msg_len;
- } else {
- LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
-
- tx->tx_iov[0].iov_len = offsetof(struct ksock_msg, ksm_u.lnetmsg.ksnm_hdr);
- tx->tx_nob = offsetof(struct ksock_msg, ksm_u.lnetmsg.ksnm_hdr);
- tx->tx_resid = offsetof(struct ksock_msg, ksm_u.lnetmsg.ksnm_hdr);
- }
- /*
- * Don't checksum before start sending, because packet can be
- * piggybacked with ACK
- */
-}
-
-static void
-ksocknal_unpack_msg_v1(struct ksock_msg *msg)
-{
- msg->ksm_csum = 0;
- msg->ksm_type = KSOCK_MSG_LNET;
- msg->ksm_zc_cookies[0] = 0;
- msg->ksm_zc_cookies[1] = 0;
-}
-
-static void
-ksocknal_unpack_msg_v2(struct ksock_msg *msg)
-{
- return; /* Do nothing */
-}
-
-struct ksock_proto ksocknal_protocol_v1x = {
- .pro_version = KSOCK_PROTO_V1,
- .pro_send_hello = ksocknal_send_hello_v1,
- .pro_recv_hello = ksocknal_recv_hello_v1,
- .pro_pack = ksocknal_pack_msg_v1,
- .pro_unpack = ksocknal_unpack_msg_v1,
- .pro_queue_tx_msg = ksocknal_queue_tx_msg_v1,
- .pro_handle_zcreq = NULL,
- .pro_handle_zcack = NULL,
- .pro_queue_tx_zcack = NULL,
- .pro_match_tx = ksocknal_match_tx
-};
-
-struct ksock_proto ksocknal_protocol_v2x = {
- .pro_version = KSOCK_PROTO_V2,
- .pro_send_hello = ksocknal_send_hello_v2,
- .pro_recv_hello = ksocknal_recv_hello_v2,
- .pro_pack = ksocknal_pack_msg_v2,
- .pro_unpack = ksocknal_unpack_msg_v2,
- .pro_queue_tx_msg = ksocknal_queue_tx_msg_v2,
- .pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v2,
- .pro_handle_zcreq = ksocknal_handle_zcreq,
- .pro_handle_zcack = ksocknal_handle_zcack,
- .pro_match_tx = ksocknal_match_tx
-};
-
-struct ksock_proto ksocknal_protocol_v3x = {
- .pro_version = KSOCK_PROTO_V3,
- .pro_send_hello = ksocknal_send_hello_v2,
- .pro_recv_hello = ksocknal_recv_hello_v2,
- .pro_pack = ksocknal_pack_msg_v2,
- .pro_unpack = ksocknal_unpack_msg_v2,
- .pro_queue_tx_msg = ksocknal_queue_tx_msg_v2,
- .pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v3,
- .pro_handle_zcreq = ksocknal_handle_zcreq,
- .pro_handle_zcack = ksocknal_handle_zcack,
- .pro_match_tx = ksocknal_match_tx_v3
-};
diff --git a/drivers/staging/lustre/lnet/libcfs/Makefile b/drivers/staging/lustre/lnet/libcfs/Makefile
deleted file mode 100644
index 6a1b232da495..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LNET) += libcfs.o
-
-libcfs-obj-y += linux-tracefile.o linux-debug.o
-libcfs-obj-y += linux-crypto.o
-libcfs-obj-y += linux-crypto-adler.o
-
-libcfs-obj-y += debug.o fail.o module.o tracefile.o
-libcfs-obj-y += libcfs_string.o hash.o
-libcfs-obj-$(CONFIG_SMP) += libcfs_cpu.o
-libcfs-obj-y += libcfs_mem.o libcfs_lock.o
-
-libcfs-objs := $(libcfs-obj-y)
diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c
deleted file mode 100644
index 06f694f6a28f..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/debug.c
+++ /dev/null
@@ -1,461 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/libcfs/debug.c
- *
- * Author: Phil Schwan <phil@clusterfs.com>
- *
- */
-
-# define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/module.h>
-#include <linux/ctype.h>
-#include <linux/libcfs/libcfs_string.h>
-#include <linux/kthread.h>
-#include "tracefile.h"
-
-static char debug_file_name[1024];
-
-unsigned int libcfs_subsystem_debug = ~0;
-EXPORT_SYMBOL(libcfs_subsystem_debug);
-module_param(libcfs_subsystem_debug, int, 0644);
-MODULE_PARM_DESC(libcfs_subsystem_debug, "Lustre kernel debug subsystem mask");
-
-unsigned int libcfs_debug = (D_CANTMASK |
- D_NETERROR | D_HA | D_CONFIG | D_IOCTL);
-EXPORT_SYMBOL(libcfs_debug);
-module_param(libcfs_debug, int, 0644);
-MODULE_PARM_DESC(libcfs_debug, "Lustre kernel debug mask");
-
-static int libcfs_param_debug_mb_set(const char *val,
- const struct kernel_param *kp)
-{
- int rc;
- unsigned int num;
-
- rc = kstrtouint(val, 0, &num);
- if (rc < 0)
- return rc;
-
- if (!*((unsigned int *)kp->arg)) {
- *((unsigned int *)kp->arg) = num;
- return 0;
- }
-
- rc = cfs_trace_set_debug_mb(num);
-
- if (!rc)
- *((unsigned int *)kp->arg) = cfs_trace_get_debug_mb();
-
- return rc;
-}
-
-/* While debug_mb setting look like unsigned int, in fact
- * it needs quite a bunch of extra processing, so we define special
- * debugmb parameter type with corresponding methods to handle this case
- */
-static const struct kernel_param_ops param_ops_debugmb = {
- .set = libcfs_param_debug_mb_set,
- .get = param_get_uint,
-};
-
-#define param_check_debugmb(name, p) \
- __param_check(name, p, unsigned int)
-
-static unsigned int libcfs_debug_mb;
-module_param(libcfs_debug_mb, debugmb, 0644);
-MODULE_PARM_DESC(libcfs_debug_mb, "Total debug buffer size.");
-
-unsigned int libcfs_printk = D_CANTMASK;
-module_param(libcfs_printk, uint, 0644);
-MODULE_PARM_DESC(libcfs_printk, "Lustre kernel debug console mask");
-
-unsigned int libcfs_console_ratelimit = 1;
-module_param(libcfs_console_ratelimit, uint, 0644);
-MODULE_PARM_DESC(libcfs_console_ratelimit, "Lustre kernel debug console ratelimit (0 to disable)");
-
-static int param_set_delay_minmax(const char *val,
- const struct kernel_param *kp,
- long min, long max)
-{
- long d;
- int sec;
- int rc;
-
- rc = kstrtoint(val, 0, &sec);
- if (rc)
- return -EINVAL;
-
- d = sec * HZ / 100;
- if (d < min || d > max)
- return -EINVAL;
-
- *((unsigned int *)kp->arg) = d;
-
- return 0;
-}
-
-static int param_get_delay(char *buffer, const struct kernel_param *kp)
-{
- unsigned int d = *(unsigned int *)kp->arg;
-
- return sprintf(buffer, "%u", (unsigned int)(d * 100) / HZ);
-}
-
-unsigned int libcfs_console_max_delay;
-unsigned int libcfs_console_min_delay;
-
-static int param_set_console_max_delay(const char *val,
- const struct kernel_param *kp)
-{
- return param_set_delay_minmax(val, kp,
- libcfs_console_min_delay, INT_MAX);
-}
-
-static const struct kernel_param_ops param_ops_console_max_delay = {
- .set = param_set_console_max_delay,
- .get = param_get_delay,
-};
-
-#define param_check_console_max_delay(name, p) \
- __param_check(name, p, unsigned int)
-
-module_param(libcfs_console_max_delay, console_max_delay, 0644);
-MODULE_PARM_DESC(libcfs_console_max_delay, "Lustre kernel debug console max delay (jiffies)");
-
-static int param_set_console_min_delay(const char *val,
- const struct kernel_param *kp)
-{
- return param_set_delay_minmax(val, kp,
- 1, libcfs_console_max_delay);
-}
-
-static const struct kernel_param_ops param_ops_console_min_delay = {
- .set = param_set_console_min_delay,
- .get = param_get_delay,
-};
-
-#define param_check_console_min_delay(name, p) \
- __param_check(name, p, unsigned int)
-
-module_param(libcfs_console_min_delay, console_min_delay, 0644);
-MODULE_PARM_DESC(libcfs_console_min_delay, "Lustre kernel debug console min delay (jiffies)");
-
-static int param_set_uint_minmax(const char *val,
- const struct kernel_param *kp,
- unsigned int min, unsigned int max)
-{
- unsigned int num;
- int ret;
-
- if (!val)
- return -EINVAL;
- ret = kstrtouint(val, 0, &num);
- if (ret < 0 || num < min || num > max)
- return -EINVAL;
- *((unsigned int *)kp->arg) = num;
- return 0;
-}
-
-static int param_set_uintpos(const char *val, const struct kernel_param *kp)
-{
- return param_set_uint_minmax(val, kp, 1, -1);
-}
-
-static const struct kernel_param_ops param_ops_uintpos = {
- .set = param_set_uintpos,
- .get = param_get_uint,
-};
-
-#define param_check_uintpos(name, p) \
- __param_check(name, p, unsigned int)
-
-unsigned int libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF;
-module_param(libcfs_console_backoff, uintpos, 0644);
-MODULE_PARM_DESC(libcfs_console_backoff, "Lustre kernel debug console backoff factor");
-
-unsigned int libcfs_debug_binary = 1;
-
-unsigned int libcfs_stack = 3 * THREAD_SIZE / 4;
-EXPORT_SYMBOL(libcfs_stack);
-
-unsigned int libcfs_catastrophe;
-EXPORT_SYMBOL(libcfs_catastrophe);
-
-unsigned int libcfs_panic_on_lbug = 1;
-module_param(libcfs_panic_on_lbug, uint, 0644);
-MODULE_PARM_DESC(libcfs_panic_on_lbug, "Lustre kernel panic on LBUG");
-
-static wait_queue_head_t debug_ctlwq;
-
-char libcfs_debug_file_path_arr[PATH_MAX] = LIBCFS_DEBUG_FILE_PATH_DEFAULT;
-
-/* We need to pass a pointer here, but elsewhere this must be a const */
-static char *libcfs_debug_file_path;
-module_param(libcfs_debug_file_path, charp, 0644);
-MODULE_PARM_DESC(libcfs_debug_file_path,
- "Path for dumping debug logs, set 'NONE' to prevent log dumping");
-
-int libcfs_panic_in_progress;
-
-/* libcfs_debug_token2mask() expects the returned string in lower-case */
-static const char *
-libcfs_debug_subsys2str(int subsys)
-{
- static const char * const libcfs_debug_subsystems[] =
- LIBCFS_DEBUG_SUBSYS_NAMES;
-
- if (subsys >= ARRAY_SIZE(libcfs_debug_subsystems))
- return NULL;
-
- return libcfs_debug_subsystems[subsys];
-}
-
-/* libcfs_debug_token2mask() expects the returned string in lower-case */
-static const char *
-libcfs_debug_dbg2str(int debug)
-{
- static const char * const libcfs_debug_masks[] =
- LIBCFS_DEBUG_MASKS_NAMES;
-
- if (debug >= ARRAY_SIZE(libcfs_debug_masks))
- return NULL;
-
- return libcfs_debug_masks[debug];
-}
-
-int
-libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys)
-{
- const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str :
- libcfs_debug_dbg2str;
- int len = 0;
- const char *token;
- int i;
-
- if (!mask) { /* "0" */
- if (size > 0)
- str[0] = '0';
- len = 1;
- } else { /* space-separated tokens */
- for (i = 0; i < 32; i++) {
- if (!(mask & (1 << i)))
- continue;
-
- token = fn(i);
- if (!token) /* unused bit */
- continue;
-
- if (len > 0) { /* separator? */
- if (len < size)
- str[len] = ' ';
- len++;
- }
-
- while (*token) {
- if (len < size)
- str[len] = *token;
- token++;
- len++;
- }
- }
- }
-
- /* terminate 'str' */
- if (len < size)
- str[len] = 0;
- else
- str[size - 1] = 0;
-
- return len;
-}
-
-int
-libcfs_debug_str2mask(int *mask, const char *str, int is_subsys)
-{
- const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str :
- libcfs_debug_dbg2str;
- int m = 0;
- int matched;
- int n;
- int t;
-
- /* Allow a number for backwards compatibility */
-
- for (n = strlen(str); n > 0; n--)
- if (!isspace(str[n - 1]))
- break;
- matched = n;
- t = sscanf(str, "%i%n", &m, &matched);
- if (t >= 1 && matched == n) {
- /* don't print warning for lctl set_param debug=0 or -1 */
- if (m && m != -1)
- CWARN("You are trying to use a numerical value for the mask - this will be deprecated in a future release.\n");
- *mask = m;
- return 0;
- }
-
- return cfs_str2mask(str, fn, mask, is_subsys ? 0 : D_CANTMASK,
- 0xffffffff);
-}
-
-/**
- * Dump Lustre log to ::debug_file_path by calling tracefile_dump_all_pages()
- */
-void libcfs_debug_dumplog_internal(void *arg)
-{
- static time64_t last_dump_time;
- time64_t current_time;
- void *journal_info;
-
- journal_info = current->journal_info;
- current->journal_info = NULL;
- current_time = ktime_get_real_seconds();
-
- if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) &&
- current_time > last_dump_time) {
- last_dump_time = current_time;
- snprintf(debug_file_name, sizeof(debug_file_name) - 1,
- "%s.%lld.%ld", libcfs_debug_file_path_arr,
- (s64)current_time, (long)arg);
- pr_alert("LustreError: dumping log to %s\n", debug_file_name);
- cfs_tracefile_dump_all_pages(debug_file_name);
- libcfs_run_debug_log_upcall(debug_file_name);
- }
-
- current->journal_info = journal_info;
-}
-
-static int libcfs_debug_dumplog_thread(void *arg)
-{
- libcfs_debug_dumplog_internal(arg);
- wake_up(&debug_ctlwq);
- return 0;
-}
-
-void libcfs_debug_dumplog(void)
-{
- wait_queue_entry_t wait;
- struct task_struct *dumper;
-
- /* we're being careful to ensure that the kernel thread is
- * able to set our state to running as it exits before we
- * get to schedule()
- */
- init_waitqueue_entry(&wait, current);
- add_wait_queue(&debug_ctlwq, &wait);
-
- dumper = kthread_run(libcfs_debug_dumplog_thread,
- (void *)(long)current->pid,
- "libcfs_debug_dumper");
- set_current_state(TASK_INTERRUPTIBLE);
- if (IS_ERR(dumper))
- pr_err("LustreError: cannot start log dump thread: %ld\n",
- PTR_ERR(dumper));
- else
- schedule();
-
- /* be sure to teardown if cfs_create_thread() failed */
- remove_wait_queue(&debug_ctlwq, &wait);
- set_current_state(TASK_RUNNING);
-}
-EXPORT_SYMBOL(libcfs_debug_dumplog);
-
-int libcfs_debug_init(unsigned long bufsize)
-{
- unsigned int max = libcfs_debug_mb;
- int rc = 0;
-
- init_waitqueue_head(&debug_ctlwq);
-
- if (libcfs_console_max_delay <= 0 || /* not set by user or */
- libcfs_console_min_delay <= 0 || /* set to invalid values */
- libcfs_console_min_delay >= libcfs_console_max_delay) {
- libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY;
- libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY;
- }
-
- if (libcfs_debug_file_path) {
- strlcpy(libcfs_debug_file_path_arr,
- libcfs_debug_file_path,
- sizeof(libcfs_debug_file_path_arr));
- }
-
- /* If libcfs_debug_mb is set to an invalid value or uninitialized
- * then just make the total buffers smp_num_cpus * TCD_MAX_PAGES
- */
- if (max > cfs_trace_max_debug_mb() || max < num_possible_cpus()) {
- max = TCD_MAX_PAGES;
- } else {
- max = max / num_possible_cpus();
- max <<= (20 - PAGE_SHIFT);
- }
-
- rc = cfs_tracefile_init(max);
- if (!rc) {
- libcfs_register_panic_notifier();
- libcfs_debug_mb = cfs_trace_get_debug_mb();
- }
-
- return rc;
-}
-
-int libcfs_debug_cleanup(void)
-{
- libcfs_unregister_panic_notifier();
- cfs_tracefile_exit();
- return 0;
-}
-
-int libcfs_debug_clear_buffer(void)
-{
- cfs_trace_flush_pages();
- return 0;
-}
-
-/* Debug markers, although printed by S_LNET should not be marked as such. */
-#undef DEBUG_SUBSYSTEM
-#define DEBUG_SUBSYSTEM S_UNDEFINED
-int libcfs_debug_mark_buffer(const char *text)
-{
- CDEBUG(D_TRACE,
- "***************************************************\n");
- LCONSOLE(D_WARNING, "DEBUG MARKER: %s\n", text);
- CDEBUG(D_TRACE,
- "***************************************************\n");
-
- return 0;
-}
-
-#undef DEBUG_SUBSYSTEM
-#define DEBUG_SUBSYSTEM S_LNET
diff --git a/drivers/staging/lustre/lnet/libcfs/fail.c b/drivers/staging/lustre/lnet/libcfs/fail.c
deleted file mode 100644
index bd86b3b5bc34..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/fail.c
+++ /dev/null
@@ -1,146 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Oracle Corporation, Inc.
- */
-
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/libcfs/libcfs.h>
-#include <linux/random.h>
-
-unsigned long cfs_fail_loc;
-EXPORT_SYMBOL(cfs_fail_loc);
-
-unsigned int cfs_fail_val;
-EXPORT_SYMBOL(cfs_fail_val);
-
-int cfs_fail_err;
-EXPORT_SYMBOL(cfs_fail_err);
-
-DECLARE_WAIT_QUEUE_HEAD(cfs_race_waitq);
-EXPORT_SYMBOL(cfs_race_waitq);
-
-int cfs_race_state;
-EXPORT_SYMBOL(cfs_race_state);
-
-int __cfs_fail_check_set(u32 id, u32 value, int set)
-{
- static atomic_t cfs_fail_count = ATOMIC_INIT(0);
-
- LASSERT(!(id & CFS_FAIL_ONCE));
-
- if ((cfs_fail_loc & (CFS_FAILED | CFS_FAIL_ONCE)) ==
- (CFS_FAILED | CFS_FAIL_ONCE)) {
- atomic_set(&cfs_fail_count, 0); /* paranoia */
- return 0;
- }
-
- /* Fail 1/cfs_fail_val times */
- if (cfs_fail_loc & CFS_FAIL_RAND) {
- if (cfs_fail_val < 2 || prandom_u32_max(cfs_fail_val) > 0)
- return 0;
- }
-
- /* Skip the first cfs_fail_val, then fail */
- if (cfs_fail_loc & CFS_FAIL_SKIP) {
- if (atomic_inc_return(&cfs_fail_count) <= cfs_fail_val)
- return 0;
- }
-
- /* check cfs_fail_val... */
- if (set == CFS_FAIL_LOC_VALUE) {
- if (cfs_fail_val != -1 && cfs_fail_val != value)
- return 0;
- }
-
- /* Fail cfs_fail_val times, overridden by FAIL_ONCE */
- if (cfs_fail_loc & CFS_FAIL_SOME &&
- (!(cfs_fail_loc & CFS_FAIL_ONCE) || cfs_fail_val <= 1)) {
- int count = atomic_inc_return(&cfs_fail_count);
-
- if (count >= cfs_fail_val) {
- set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc);
- atomic_set(&cfs_fail_count, 0);
- /* we are lost race to increase */
- if (count > cfs_fail_val)
- return 0;
- }
- }
-
- /* Take into account the current call for FAIL_ONCE for ORSET only,
- * as RESET is a new fail_loc, it does not change the current call
- */
- if ((set == CFS_FAIL_LOC_ORSET) && (value & CFS_FAIL_ONCE))
- set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc);
- /* Lost race to set CFS_FAILED_BIT. */
- if (test_and_set_bit(CFS_FAILED_BIT, &cfs_fail_loc)) {
- /* If CFS_FAIL_ONCE is valid, only one process can fail,
- * otherwise multi-process can fail at the same time.
- */
- if (cfs_fail_loc & CFS_FAIL_ONCE)
- return 0;
- }
-
- switch (set) {
- case CFS_FAIL_LOC_NOSET:
- case CFS_FAIL_LOC_VALUE:
- break;
- case CFS_FAIL_LOC_ORSET:
- cfs_fail_loc |= value & ~(CFS_FAILED | CFS_FAIL_ONCE);
- break;
- case CFS_FAIL_LOC_RESET:
- cfs_fail_loc = value;
- atomic_set(&cfs_fail_count, 0);
- break;
- default:
- LASSERTF(0, "called with bad set %u\n", set);
- break;
- }
-
- return 1;
-}
-EXPORT_SYMBOL(__cfs_fail_check_set);
-
-int __cfs_fail_timeout_set(u32 id, u32 value, int ms, int set)
-{
- int ret;
-
- ret = __cfs_fail_check_set(id, value, set);
- if (ret && likely(ms > 0)) {
- CERROR("cfs_fail_timeout id %x sleeping for %dms\n",
- id, ms);
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(ms * HZ / 1000);
- CERROR("cfs_fail_timeout id %x awake\n", id);
- }
- return ret;
-}
-EXPORT_SYMBOL(__cfs_fail_timeout_set);
diff --git a/drivers/staging/lustre/lnet/libcfs/hash.c b/drivers/staging/lustre/lnet/libcfs/hash.c
deleted file mode 100644
index 48be66f0d654..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/hash.c
+++ /dev/null
@@ -1,2065 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/libcfs/hash.c
- *
- * Implement a hash class for hash process in lustre system.
- *
- * Author: YuZhangyong <yzy@clusterfs.com>
- *
- * 2008-08-15: Brian Behlendorf <behlendorf1@llnl.gov>
- * - Simplified API and improved documentation
- * - Added per-hash feature flags:
- * * CFS_HASH_DEBUG additional validation
- * * CFS_HASH_REHASH dynamic rehashing
- * - Added per-hash statistics
- * - General performance enhancements
- *
- * 2009-07-31: Liang Zhen <zhen.liang@sun.com>
- * - move all stuff to libcfs
- * - don't allow cur_bits != max_bits without setting of CFS_HASH_REHASH
- * - ignore hs_rwlock if without CFS_HASH_REHASH setting
- * - buckets are allocated one by one(instead of contiguous memory),
- * to avoid unnecessary cacheline conflict
- *
- * 2010-03-01: Liang Zhen <zhen.liang@sun.com>
- * - "bucket" is a group of hlist_head now, user can specify bucket size
- * by bkt_bits of cfs_hash_create(), all hlist_heads in a bucket share
- * one lock for reducing memory overhead.
- *
- * - support lockless hash, caller will take care of locks:
- * avoid lock overhead for hash tables that are already protected
- * by locking in the caller for another reason
- *
- * - support both spin_lock/rwlock for bucket:
- * overhead of spinlock contention is lower than read/write
- * contention of rwlock, so using spinlock to serialize operations on
- * bucket is more reasonable for those frequently changed hash tables
- *
- * - support one-single lock mode:
- * one lock to protect all hash operations to avoid overhead of
- * multiple locks if hash table is always small
- *
- * - removed a lot of unnecessary addref & decref on hash element:
- * addref & decref are atomic operations in many use-cases which
- * are expensive.
- *
- * - support non-blocking cfs_hash_add() and cfs_hash_findadd():
- * some lustre use-cases require these functions to be strictly
- * non-blocking, we need to schedule required rehash on a different
- * thread on those cases.
- *
- * - safer rehash on large hash table
- * In old implementation, rehash function will exclusively lock the
- * hash table and finish rehash in one batch, it's dangerous on SMP
- * system because rehash millions of elements could take long time.
- * New implemented rehash can release lock and relax CPU in middle
- * of rehash, it's safe for another thread to search/change on the
- * hash table even it's in rehasing.
- *
- * - support two different refcount modes
- * . hash table has refcount on element
- * . hash table doesn't change refcount on adding/removing element
- *
- * - support long name hash table (for param-tree)
- *
- * - fix a bug for cfs_hash_rehash_key:
- * in old implementation, cfs_hash_rehash_key could screw up the
- * hash-table because @key is overwritten without any protection.
- * Now we need user to define hs_keycpy for those rehash enabled
- * hash tables, cfs_hash_rehash_key will overwrite hash-key
- * inside lock by calling hs_keycpy.
- *
- * - better hash iteration:
- * Now we support both locked iteration & lockless iteration of hash
- * table. Also, user can break the iteration by return 1 in callback.
- */
-#include <linux/seq_file.h>
-#include <linux/log2.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/libcfs/libcfs_hash.h>
-
-#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
-static unsigned int warn_on_depth = 8;
-module_param(warn_on_depth, uint, 0644);
-MODULE_PARM_DESC(warn_on_depth, "warning when hash depth is high.");
-#endif
-
-struct workqueue_struct *cfs_rehash_wq;
-
-static inline void
-cfs_hash_nl_lock(union cfs_hash_lock *lock, int exclusive) {}
-
-static inline void
-cfs_hash_nl_unlock(union cfs_hash_lock *lock, int exclusive) {}
-
-static inline void
-cfs_hash_spin_lock(union cfs_hash_lock *lock, int exclusive)
- __acquires(&lock->spin)
-{
- spin_lock(&lock->spin);
-}
-
-static inline void
-cfs_hash_spin_unlock(union cfs_hash_lock *lock, int exclusive)
- __releases(&lock->spin)
-{
- spin_unlock(&lock->spin);
-}
-
-static inline void
-cfs_hash_rw_lock(union cfs_hash_lock *lock, int exclusive)
- __acquires(&lock->rw)
-{
- if (!exclusive)
- read_lock(&lock->rw);
- else
- write_lock(&lock->rw);
-}
-
-static inline void
-cfs_hash_rw_unlock(union cfs_hash_lock *lock, int exclusive)
- __releases(&lock->rw)
-{
- if (!exclusive)
- read_unlock(&lock->rw);
- else
- write_unlock(&lock->rw);
-}
-
-/** No lock hash */
-static struct cfs_hash_lock_ops cfs_hash_nl_lops = {
- .hs_lock = cfs_hash_nl_lock,
- .hs_unlock = cfs_hash_nl_unlock,
- .hs_bkt_lock = cfs_hash_nl_lock,
- .hs_bkt_unlock = cfs_hash_nl_unlock,
-};
-
-/** no bucket lock, one spinlock to protect everything */
-static struct cfs_hash_lock_ops cfs_hash_nbl_lops = {
- .hs_lock = cfs_hash_spin_lock,
- .hs_unlock = cfs_hash_spin_unlock,
- .hs_bkt_lock = cfs_hash_nl_lock,
- .hs_bkt_unlock = cfs_hash_nl_unlock,
-};
-
-/** spin bucket lock, rehash is enabled */
-static struct cfs_hash_lock_ops cfs_hash_bkt_spin_lops = {
- .hs_lock = cfs_hash_rw_lock,
- .hs_unlock = cfs_hash_rw_unlock,
- .hs_bkt_lock = cfs_hash_spin_lock,
- .hs_bkt_unlock = cfs_hash_spin_unlock,
-};
-
-/** rw bucket lock, rehash is enabled */
-static struct cfs_hash_lock_ops cfs_hash_bkt_rw_lops = {
- .hs_lock = cfs_hash_rw_lock,
- .hs_unlock = cfs_hash_rw_unlock,
- .hs_bkt_lock = cfs_hash_rw_lock,
- .hs_bkt_unlock = cfs_hash_rw_unlock,
-};
-
-/** spin bucket lock, rehash is disabled */
-static struct cfs_hash_lock_ops cfs_hash_nr_bkt_spin_lops = {
- .hs_lock = cfs_hash_nl_lock,
- .hs_unlock = cfs_hash_nl_unlock,
- .hs_bkt_lock = cfs_hash_spin_lock,
- .hs_bkt_unlock = cfs_hash_spin_unlock,
-};
-
-/** rw bucket lock, rehash is disabled */
-static struct cfs_hash_lock_ops cfs_hash_nr_bkt_rw_lops = {
- .hs_lock = cfs_hash_nl_lock,
- .hs_unlock = cfs_hash_nl_unlock,
- .hs_bkt_lock = cfs_hash_rw_lock,
- .hs_bkt_unlock = cfs_hash_rw_unlock,
-};
-
-static void
-cfs_hash_lock_setup(struct cfs_hash *hs)
-{
- if (cfs_hash_with_no_lock(hs)) {
- hs->hs_lops = &cfs_hash_nl_lops;
-
- } else if (cfs_hash_with_no_bktlock(hs)) {
- hs->hs_lops = &cfs_hash_nbl_lops;
- spin_lock_init(&hs->hs_lock.spin);
-
- } else if (cfs_hash_with_rehash(hs)) {
- rwlock_init(&hs->hs_lock.rw);
-
- if (cfs_hash_with_rw_bktlock(hs))
- hs->hs_lops = &cfs_hash_bkt_rw_lops;
- else if (cfs_hash_with_spin_bktlock(hs))
- hs->hs_lops = &cfs_hash_bkt_spin_lops;
- else
- LBUG();
- } else {
- if (cfs_hash_with_rw_bktlock(hs))
- hs->hs_lops = &cfs_hash_nr_bkt_rw_lops;
- else if (cfs_hash_with_spin_bktlock(hs))
- hs->hs_lops = &cfs_hash_nr_bkt_spin_lops;
- else
- LBUG();
- }
-}
-
-/**
- * Simple hash head without depth tracking
- * new element is always added to head of hlist
- */
-struct cfs_hash_head {
- struct hlist_head hh_head; /**< entries list */
-};
-
-static int
-cfs_hash_hh_hhead_size(struct cfs_hash *hs)
-{
- return sizeof(struct cfs_hash_head);
-}
-
-static struct hlist_head *
-cfs_hash_hh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
-{
- struct cfs_hash_head *head;
-
- head = (struct cfs_hash_head *)&bd->bd_bucket->hsb_head[0];
- return &head[bd->bd_offset].hh_head;
-}
-
-static int
-cfs_hash_hh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode)
-{
- hlist_add_head(hnode, cfs_hash_hh_hhead(hs, bd));
- return -1; /* unknown depth */
-}
-
-static int
-cfs_hash_hh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode)
-{
- hlist_del_init(hnode);
- return -1; /* unknown depth */
-}
-
-/**
- * Simple hash head with depth tracking
- * new element is always added to head of hlist
- */
-struct cfs_hash_head_dep {
- struct hlist_head hd_head; /**< entries list */
- unsigned int hd_depth; /**< list length */
-};
-
-static int
-cfs_hash_hd_hhead_size(struct cfs_hash *hs)
-{
- return sizeof(struct cfs_hash_head_dep);
-}
-
-static struct hlist_head *
-cfs_hash_hd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
-{
- struct cfs_hash_head_dep *head;
-
- head = (struct cfs_hash_head_dep *)&bd->bd_bucket->hsb_head[0];
- return &head[bd->bd_offset].hd_head;
-}
-
-static int
-cfs_hash_hd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode)
-{
- struct cfs_hash_head_dep *hh;
-
- hh = container_of(cfs_hash_hd_hhead(hs, bd),
- struct cfs_hash_head_dep, hd_head);
- hlist_add_head(hnode, &hh->hd_head);
- return ++hh->hd_depth;
-}
-
-static int
-cfs_hash_hd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode)
-{
- struct cfs_hash_head_dep *hh;
-
- hh = container_of(cfs_hash_hd_hhead(hs, bd),
- struct cfs_hash_head_dep, hd_head);
- hlist_del_init(hnode);
- return --hh->hd_depth;
-}
-
-/**
- * double links hash head without depth tracking
- * new element is always added to tail of hlist
- */
-struct cfs_hash_dhead {
- struct hlist_head dh_head; /**< entries list */
- struct hlist_node *dh_tail; /**< the last entry */
-};
-
-static int
-cfs_hash_dh_hhead_size(struct cfs_hash *hs)
-{
- return sizeof(struct cfs_hash_dhead);
-}
-
-static struct hlist_head *
-cfs_hash_dh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
-{
- struct cfs_hash_dhead *head;
-
- head = (struct cfs_hash_dhead *)&bd->bd_bucket->hsb_head[0];
- return &head[bd->bd_offset].dh_head;
-}
-
-static int
-cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode)
-{
- struct cfs_hash_dhead *dh;
-
- dh = container_of(cfs_hash_dh_hhead(hs, bd),
- struct cfs_hash_dhead, dh_head);
- if (dh->dh_tail) /* not empty */
- hlist_add_behind(hnode, dh->dh_tail);
- else /* empty list */
- hlist_add_head(hnode, &dh->dh_head);
- dh->dh_tail = hnode;
- return -1; /* unknown depth */
-}
-
-static int
-cfs_hash_dh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnd)
-{
- struct cfs_hash_dhead *dh;
-
- dh = container_of(cfs_hash_dh_hhead(hs, bd),
- struct cfs_hash_dhead, dh_head);
- if (!hnd->next) { /* it's the tail */
- dh->dh_tail = (hnd->pprev == &dh->dh_head.first) ? NULL :
- container_of(hnd->pprev, struct hlist_node, next);
- }
- hlist_del_init(hnd);
- return -1; /* unknown depth */
-}
-
-/**
- * double links hash head with depth tracking
- * new element is always added to tail of hlist
- */
-struct cfs_hash_dhead_dep {
- struct hlist_head dd_head; /**< entries list */
- struct hlist_node *dd_tail; /**< the last entry */
- unsigned int dd_depth; /**< list length */
-};
-
-static int
-cfs_hash_dd_hhead_size(struct cfs_hash *hs)
-{
- return sizeof(struct cfs_hash_dhead_dep);
-}
-
-static struct hlist_head *
-cfs_hash_dd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
-{
- struct cfs_hash_dhead_dep *head;
-
- head = (struct cfs_hash_dhead_dep *)&bd->bd_bucket->hsb_head[0];
- return &head[bd->bd_offset].dd_head;
-}
-
-static int
-cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode)
-{
- struct cfs_hash_dhead_dep *dh;
-
- dh = container_of(cfs_hash_dd_hhead(hs, bd),
- struct cfs_hash_dhead_dep, dd_head);
- if (dh->dd_tail) /* not empty */
- hlist_add_behind(hnode, dh->dd_tail);
- else /* empty list */
- hlist_add_head(hnode, &dh->dd_head);
- dh->dd_tail = hnode;
- return ++dh->dd_depth;
-}
-
-static int
-cfs_hash_dd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnd)
-{
- struct cfs_hash_dhead_dep *dh;
-
- dh = container_of(cfs_hash_dd_hhead(hs, bd),
- struct cfs_hash_dhead_dep, dd_head);
- if (!hnd->next) { /* it's the tail */
- dh->dd_tail = (hnd->pprev == &dh->dd_head.first) ? NULL :
- container_of(hnd->pprev, struct hlist_node, next);
- }
- hlist_del_init(hnd);
- return --dh->dd_depth;
-}
-
-static struct cfs_hash_hlist_ops cfs_hash_hh_hops = {
- .hop_hhead = cfs_hash_hh_hhead,
- .hop_hhead_size = cfs_hash_hh_hhead_size,
- .hop_hnode_add = cfs_hash_hh_hnode_add,
- .hop_hnode_del = cfs_hash_hh_hnode_del,
-};
-
-static struct cfs_hash_hlist_ops cfs_hash_hd_hops = {
- .hop_hhead = cfs_hash_hd_hhead,
- .hop_hhead_size = cfs_hash_hd_hhead_size,
- .hop_hnode_add = cfs_hash_hd_hnode_add,
- .hop_hnode_del = cfs_hash_hd_hnode_del,
-};
-
-static struct cfs_hash_hlist_ops cfs_hash_dh_hops = {
- .hop_hhead = cfs_hash_dh_hhead,
- .hop_hhead_size = cfs_hash_dh_hhead_size,
- .hop_hnode_add = cfs_hash_dh_hnode_add,
- .hop_hnode_del = cfs_hash_dh_hnode_del,
-};
-
-static struct cfs_hash_hlist_ops cfs_hash_dd_hops = {
- .hop_hhead = cfs_hash_dd_hhead,
- .hop_hhead_size = cfs_hash_dd_hhead_size,
- .hop_hnode_add = cfs_hash_dd_hnode_add,
- .hop_hnode_del = cfs_hash_dd_hnode_del,
-};
-
-static void
-cfs_hash_hlist_setup(struct cfs_hash *hs)
-{
- if (cfs_hash_with_add_tail(hs)) {
- hs->hs_hops = cfs_hash_with_depth(hs) ?
- &cfs_hash_dd_hops : &cfs_hash_dh_hops;
- } else {
- hs->hs_hops = cfs_hash_with_depth(hs) ?
- &cfs_hash_hd_hops : &cfs_hash_hh_hops;
- }
-}
-
-static void
-cfs_hash_bd_from_key(struct cfs_hash *hs, struct cfs_hash_bucket **bkts,
- unsigned int bits, const void *key, struct cfs_hash_bd *bd)
-{
- unsigned int index = cfs_hash_id(hs, key, (1U << bits) - 1);
-
- LASSERT(bits == hs->hs_cur_bits || bits == hs->hs_rehash_bits);
-
- bd->bd_bucket = bkts[index & ((1U << (bits - hs->hs_bkt_bits)) - 1)];
- bd->bd_offset = index >> (bits - hs->hs_bkt_bits);
-}
-
-void
-cfs_hash_bd_get(struct cfs_hash *hs, const void *key, struct cfs_hash_bd *bd)
-{
- /* NB: caller should hold hs->hs_rwlock if REHASH is set */
- if (likely(!hs->hs_rehash_buckets)) {
- cfs_hash_bd_from_key(hs, hs->hs_buckets,
- hs->hs_cur_bits, key, bd);
- } else {
- LASSERT(hs->hs_rehash_bits);
- cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
- hs->hs_rehash_bits, key, bd);
- }
-}
-EXPORT_SYMBOL(cfs_hash_bd_get);
-
-static inline void
-cfs_hash_bd_dep_record(struct cfs_hash *hs, struct cfs_hash_bd *bd, int dep_cur)
-{
- if (likely(dep_cur <= bd->bd_bucket->hsb_depmax))
- return;
-
- bd->bd_bucket->hsb_depmax = dep_cur;
-# if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
- if (likely(!warn_on_depth ||
- max(warn_on_depth, hs->hs_dep_max) >= dep_cur))
- return;
-
- spin_lock(&hs->hs_dep_lock);
- hs->hs_dep_max = dep_cur;
- hs->hs_dep_bkt = bd->bd_bucket->hsb_index;
- hs->hs_dep_off = bd->bd_offset;
- hs->hs_dep_bits = hs->hs_cur_bits;
- spin_unlock(&hs->hs_dep_lock);
-
- queue_work(cfs_rehash_wq, &hs->hs_dep_work);
-# endif
-}
-
-void
-cfs_hash_bd_add_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode)
-{
- int rc;
-
- rc = hs->hs_hops->hop_hnode_add(hs, bd, hnode);
- cfs_hash_bd_dep_record(hs, bd, rc);
- bd->bd_bucket->hsb_version++;
- if (unlikely(!bd->bd_bucket->hsb_version))
- bd->bd_bucket->hsb_version++;
- bd->bd_bucket->hsb_count++;
-
- if (cfs_hash_with_counter(hs))
- atomic_inc(&hs->hs_count);
- if (!cfs_hash_with_no_itemref(hs))
- cfs_hash_get(hs, hnode);
-}
-EXPORT_SYMBOL(cfs_hash_bd_add_locked);
-
-void
-cfs_hash_bd_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode)
-{
- hs->hs_hops->hop_hnode_del(hs, bd, hnode);
-
- LASSERT(bd->bd_bucket->hsb_count > 0);
- bd->bd_bucket->hsb_count--;
- bd->bd_bucket->hsb_version++;
- if (unlikely(!bd->bd_bucket->hsb_version))
- bd->bd_bucket->hsb_version++;
-
- if (cfs_hash_with_counter(hs)) {
- LASSERT(atomic_read(&hs->hs_count) > 0);
- atomic_dec(&hs->hs_count);
- }
- if (!cfs_hash_with_no_itemref(hs))
- cfs_hash_put_locked(hs, hnode);
-}
-EXPORT_SYMBOL(cfs_hash_bd_del_locked);
-
-void
-cfs_hash_bd_move_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd_old,
- struct cfs_hash_bd *bd_new, struct hlist_node *hnode)
-{
- struct cfs_hash_bucket *obkt = bd_old->bd_bucket;
- struct cfs_hash_bucket *nbkt = bd_new->bd_bucket;
- int rc;
-
- if (!cfs_hash_bd_compare(bd_old, bd_new))
- return;
-
- /* use cfs_hash_bd_hnode_add/del, to avoid atomic & refcount ops
- * in cfs_hash_bd_del/add_locked
- */
- hs->hs_hops->hop_hnode_del(hs, bd_old, hnode);
- rc = hs->hs_hops->hop_hnode_add(hs, bd_new, hnode);
- cfs_hash_bd_dep_record(hs, bd_new, rc);
-
- LASSERT(obkt->hsb_count > 0);
- obkt->hsb_count--;
- obkt->hsb_version++;
- if (unlikely(!obkt->hsb_version))
- obkt->hsb_version++;
- nbkt->hsb_count++;
- nbkt->hsb_version++;
- if (unlikely(!nbkt->hsb_version))
- nbkt->hsb_version++;
-}
-
-enum {
- /** always set, for sanity (avoid ZERO intent) */
- CFS_HS_LOOKUP_MASK_FIND = BIT(0),
- /** return entry with a ref */
- CFS_HS_LOOKUP_MASK_REF = BIT(1),
- /** add entry if not existing */
- CFS_HS_LOOKUP_MASK_ADD = BIT(2),
- /** delete entry, ignore other masks */
- CFS_HS_LOOKUP_MASK_DEL = BIT(3),
-};
-
-enum cfs_hash_lookup_intent {
- /** return item w/o refcount */
- CFS_HS_LOOKUP_IT_PEEK = CFS_HS_LOOKUP_MASK_FIND,
- /** return item with refcount */
- CFS_HS_LOOKUP_IT_FIND = (CFS_HS_LOOKUP_MASK_FIND |
- CFS_HS_LOOKUP_MASK_REF),
- /** return item w/o refcount if existed, otherwise add */
- CFS_HS_LOOKUP_IT_ADD = (CFS_HS_LOOKUP_MASK_FIND |
- CFS_HS_LOOKUP_MASK_ADD),
- /** return item with refcount if existed, otherwise add */
- CFS_HS_LOOKUP_IT_FINDADD = (CFS_HS_LOOKUP_IT_FIND |
- CFS_HS_LOOKUP_MASK_ADD),
- /** delete if existed */
- CFS_HS_LOOKUP_IT_FINDDEL = (CFS_HS_LOOKUP_MASK_FIND |
- CFS_HS_LOOKUP_MASK_DEL)
-};
-
-static struct hlist_node *
-cfs_hash_bd_lookup_intent(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- const void *key, struct hlist_node *hnode,
- enum cfs_hash_lookup_intent intent)
-
-{
- struct hlist_head *hhead = cfs_hash_bd_hhead(hs, bd);
- struct hlist_node *ehnode;
- struct hlist_node *match;
- int intent_add = intent & CFS_HS_LOOKUP_MASK_ADD;
-
- /* with this function, we can avoid a lot of useless refcount ops,
- * which are expensive atomic operations most time.
- */
- match = intent_add ? NULL : hnode;
- hlist_for_each(ehnode, hhead) {
- if (!cfs_hash_keycmp(hs, key, ehnode))
- continue;
-
- if (match && match != ehnode) /* can't match */
- continue;
-
- /* match and ... */
- if (intent & CFS_HS_LOOKUP_MASK_DEL) {
- cfs_hash_bd_del_locked(hs, bd, ehnode);
- return ehnode;
- }
-
- /* caller wants refcount? */
- if (intent & CFS_HS_LOOKUP_MASK_REF)
- cfs_hash_get(hs, ehnode);
- return ehnode;
- }
- /* no match item */
- if (!intent_add)
- return NULL;
-
- LASSERT(hnode);
- cfs_hash_bd_add_locked(hs, bd, hnode);
- return hnode;
-}
-
-struct hlist_node *
-cfs_hash_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- const void *key)
-{
- return cfs_hash_bd_lookup_intent(hs, bd, key, NULL,
- CFS_HS_LOOKUP_IT_FIND);
-}
-EXPORT_SYMBOL(cfs_hash_bd_lookup_locked);
-
-struct hlist_node *
-cfs_hash_bd_peek_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- const void *key)
-{
- return cfs_hash_bd_lookup_intent(hs, bd, key, NULL,
- CFS_HS_LOOKUP_IT_PEEK);
-}
-EXPORT_SYMBOL(cfs_hash_bd_peek_locked);
-
-static void
-cfs_hash_multi_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- unsigned int n, int excl)
-{
- struct cfs_hash_bucket *prev = NULL;
- int i;
-
- /**
- * bds must be ascendantly ordered by bd->bd_bucket->hsb_index.
- * NB: it's possible that several bds point to the same bucket but
- * have different bd::bd_offset, so need take care of deadlock.
- */
- cfs_hash_for_each_bd(bds, n, i) {
- if (prev == bds[i].bd_bucket)
- continue;
-
- LASSERT(!prev || prev->hsb_index < bds[i].bd_bucket->hsb_index);
- cfs_hash_bd_lock(hs, &bds[i], excl);
- prev = bds[i].bd_bucket;
- }
-}
-
-static void
-cfs_hash_multi_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- unsigned int n, int excl)
-{
- struct cfs_hash_bucket *prev = NULL;
- int i;
-
- cfs_hash_for_each_bd(bds, n, i) {
- if (prev != bds[i].bd_bucket) {
- cfs_hash_bd_unlock(hs, &bds[i], excl);
- prev = bds[i].bd_bucket;
- }
- }
-}
-
-static struct hlist_node *
-cfs_hash_multi_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- unsigned int n, const void *key)
-{
- struct hlist_node *ehnode;
- unsigned int i;
-
- cfs_hash_for_each_bd(bds, n, i) {
- ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, NULL,
- CFS_HS_LOOKUP_IT_FIND);
- if (ehnode)
- return ehnode;
- }
- return NULL;
-}
-
-static struct hlist_node *
-cfs_hash_multi_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- unsigned int n, const void *key,
- struct hlist_node *hnode, int noref)
-{
- struct hlist_node *ehnode;
- int intent;
- unsigned int i;
-
- LASSERT(hnode);
- intent = (!noref * CFS_HS_LOOKUP_MASK_REF) | CFS_HS_LOOKUP_IT_PEEK;
-
- cfs_hash_for_each_bd(bds, n, i) {
- ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key,
- NULL, intent);
- if (ehnode)
- return ehnode;
- }
-
- if (i == 1) { /* only one bucket */
- cfs_hash_bd_add_locked(hs, &bds[0], hnode);
- } else {
- struct cfs_hash_bd mybd;
-
- cfs_hash_bd_get(hs, key, &mybd);
- cfs_hash_bd_add_locked(hs, &mybd, hnode);
- }
-
- return hnode;
-}
-
-static struct hlist_node *
-cfs_hash_multi_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- unsigned int n, const void *key,
- struct hlist_node *hnode)
-{
- struct hlist_node *ehnode;
- unsigned int i;
-
- cfs_hash_for_each_bd(bds, n, i) {
- ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, hnode,
- CFS_HS_LOOKUP_IT_FINDDEL);
- if (ehnode)
- return ehnode;
- }
- return NULL;
-}
-
-static void
-cfs_hash_bd_order(struct cfs_hash_bd *bd1, struct cfs_hash_bd *bd2)
-{
- int rc;
-
- if (!bd2->bd_bucket)
- return;
-
- if (!bd1->bd_bucket) {
- *bd1 = *bd2;
- bd2->bd_bucket = NULL;
- return;
- }
-
- rc = cfs_hash_bd_compare(bd1, bd2);
- if (!rc)
- bd2->bd_bucket = NULL;
- else if (rc > 0)
- swap(*bd1, *bd2); /* swap bd1 and bd2 */
-}
-
-void
-cfs_hash_dual_bd_get(struct cfs_hash *hs, const void *key,
- struct cfs_hash_bd *bds)
-{
- /* NB: caller should hold hs_lock.rw if REHASH is set */
- cfs_hash_bd_from_key(hs, hs->hs_buckets,
- hs->hs_cur_bits, key, &bds[0]);
- if (likely(!hs->hs_rehash_buckets)) {
- /* no rehash or not rehashing */
- bds[1].bd_bucket = NULL;
- return;
- }
-
- LASSERT(hs->hs_rehash_bits);
- cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
- hs->hs_rehash_bits, key, &bds[1]);
-
- cfs_hash_bd_order(&bds[0], &bds[1]);
-}
-
-void
-cfs_hash_dual_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl)
-{
- cfs_hash_multi_bd_lock(hs, bds, 2, excl);
-}
-
-void
-cfs_hash_dual_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl)
-{
- cfs_hash_multi_bd_unlock(hs, bds, 2, excl);
-}
-
-struct hlist_node *
-cfs_hash_dual_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- const void *key)
-{
- return cfs_hash_multi_bd_lookup_locked(hs, bds, 2, key);
-}
-
-struct hlist_node *
-cfs_hash_dual_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- const void *key, struct hlist_node *hnode,
- int noref)
-{
- return cfs_hash_multi_bd_findadd_locked(hs, bds, 2, key,
- hnode, noref);
-}
-
-struct hlist_node *
-cfs_hash_dual_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- const void *key, struct hlist_node *hnode)
-{
- return cfs_hash_multi_bd_finddel_locked(hs, bds, 2, key, hnode);
-}
-
-static void
-cfs_hash_buckets_free(struct cfs_hash_bucket **buckets,
- int bkt_size, int prev_size, int size)
-{
- int i;
-
- for (i = prev_size; i < size; i++)
- kfree(buckets[i]);
-
- kvfree(buckets);
-}
-
-/*
- * Create or grow bucket memory. Return old_buckets if no allocation was
- * needed, the newly allocated buckets if allocation was needed and
- * successful, and NULL on error.
- */
-static struct cfs_hash_bucket **
-cfs_hash_buckets_realloc(struct cfs_hash *hs, struct cfs_hash_bucket **old_bkts,
- unsigned int old_size, unsigned int new_size)
-{
- struct cfs_hash_bucket **new_bkts;
- int i;
-
- LASSERT(!old_size || old_bkts);
-
- if (old_bkts && old_size == new_size)
- return old_bkts;
-
- new_bkts = kvmalloc_array(new_size, sizeof(new_bkts[0]), GFP_KERNEL);
- if (!new_bkts)
- return NULL;
-
- if (old_bkts) {
- memcpy(new_bkts, old_bkts,
- min(old_size, new_size) * sizeof(*old_bkts));
- }
-
- for (i = old_size; i < new_size; i++) {
- struct hlist_head *hhead;
- struct cfs_hash_bd bd;
-
- new_bkts[i] = kzalloc(cfs_hash_bkt_size(hs), GFP_KERNEL);
- if (!new_bkts[i]) {
- cfs_hash_buckets_free(new_bkts, cfs_hash_bkt_size(hs),
- old_size, new_size);
- return NULL;
- }
-
- new_bkts[i]->hsb_index = i;
- new_bkts[i]->hsb_version = 1; /* shouldn't be zero */
- new_bkts[i]->hsb_depmax = -1; /* unknown */
- bd.bd_bucket = new_bkts[i];
- cfs_hash_bd_for_each_hlist(hs, &bd, hhead)
- INIT_HLIST_HEAD(hhead);
-
- if (cfs_hash_with_no_lock(hs) ||
- cfs_hash_with_no_bktlock(hs))
- continue;
-
- if (cfs_hash_with_rw_bktlock(hs))
- rwlock_init(&new_bkts[i]->hsb_lock.rw);
- else if (cfs_hash_with_spin_bktlock(hs))
- spin_lock_init(&new_bkts[i]->hsb_lock.spin);
- else
- LBUG(); /* invalid use-case */
- }
- return new_bkts;
-}
-
-/**
- * Initialize new libcfs hash, where:
- * @name - Descriptive hash name
- * @cur_bits - Initial hash table size, in bits
- * @max_bits - Maximum allowed hash table resize, in bits
- * @ops - Registered hash table operations
- * @flags - CFS_HASH_REHASH enable synamic hash resizing
- * - CFS_HASH_SORT enable chained hash sort
- */
-static void cfs_hash_rehash_worker(struct work_struct *work);
-
-#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
-static void cfs_hash_dep_print(struct work_struct *work)
-{
- struct cfs_hash *hs = container_of(work, struct cfs_hash, hs_dep_work);
- int dep;
- int bkt;
- int off;
- int bits;
-
- spin_lock(&hs->hs_dep_lock);
- dep = hs->hs_dep_max;
- bkt = hs->hs_dep_bkt;
- off = hs->hs_dep_off;
- bits = hs->hs_dep_bits;
- spin_unlock(&hs->hs_dep_lock);
-
- LCONSOLE_WARN("#### HASH %s (bits: %d): max depth %d at bucket %d/%d\n",
- hs->hs_name, bits, dep, bkt, off);
- spin_lock(&hs->hs_dep_lock);
- hs->hs_dep_bits = 0; /* mark as workitem done */
- spin_unlock(&hs->hs_dep_lock);
- return 0;
-}
-
-static void cfs_hash_depth_wi_init(struct cfs_hash *hs)
-{
- spin_lock_init(&hs->hs_dep_lock);
- INIT_WORK(&hs->hs_dep_work, cfs_hash_dep_print);
-}
-
-static void cfs_hash_depth_wi_cancel(struct cfs_hash *hs)
-{
- cancel_work_sync(&hs->hs_dep_work);
-}
-
-#else /* CFS_HASH_DEBUG_LEVEL < CFS_HASH_DEBUG_1 */
-
-static inline void cfs_hash_depth_wi_init(struct cfs_hash *hs) {}
-static inline void cfs_hash_depth_wi_cancel(struct cfs_hash *hs) {}
-
-#endif /* CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 */
-
-struct cfs_hash *
-cfs_hash_create(char *name, unsigned int cur_bits, unsigned int max_bits,
- unsigned int bkt_bits, unsigned int extra_bytes,
- unsigned int min_theta, unsigned int max_theta,
- struct cfs_hash_ops *ops, unsigned int flags)
-{
- struct cfs_hash *hs;
- int len;
-
- BUILD_BUG_ON(CFS_HASH_THETA_BITS >= 15);
-
- LASSERT(name);
- LASSERT(ops->hs_key);
- LASSERT(ops->hs_hash);
- LASSERT(ops->hs_object);
- LASSERT(ops->hs_keycmp);
- LASSERT(ops->hs_get);
- LASSERT(ops->hs_put || ops->hs_put_locked);
-
- if (flags & CFS_HASH_REHASH)
- flags |= CFS_HASH_COUNTER; /* must have counter */
-
- LASSERT(cur_bits > 0);
- LASSERT(cur_bits >= bkt_bits);
- LASSERT(max_bits >= cur_bits && max_bits < 31);
- LASSERT(ergo(!(flags & CFS_HASH_REHASH), cur_bits == max_bits));
- LASSERT(ergo(flags & CFS_HASH_REHASH, !(flags & CFS_HASH_NO_LOCK)));
- LASSERT(ergo(flags & CFS_HASH_REHASH_KEY, ops->hs_keycpy));
-
- len = !(flags & CFS_HASH_BIGNAME) ?
- CFS_HASH_NAME_LEN : CFS_HASH_BIGNAME_LEN;
- hs = kzalloc(offsetof(struct cfs_hash, hs_name[len]), GFP_KERNEL);
- if (!hs)
- return NULL;
-
- strlcpy(hs->hs_name, name, len);
- hs->hs_flags = flags;
-
- atomic_set(&hs->hs_refcount, 1);
- atomic_set(&hs->hs_count, 0);
-
- cfs_hash_lock_setup(hs);
- cfs_hash_hlist_setup(hs);
-
- hs->hs_cur_bits = (u8)cur_bits;
- hs->hs_min_bits = (u8)cur_bits;
- hs->hs_max_bits = (u8)max_bits;
- hs->hs_bkt_bits = (u8)bkt_bits;
-
- hs->hs_ops = ops;
- hs->hs_extra_bytes = extra_bytes;
- hs->hs_rehash_bits = 0;
- INIT_WORK(&hs->hs_rehash_work, cfs_hash_rehash_worker);
- cfs_hash_depth_wi_init(hs);
-
- if (cfs_hash_with_rehash(hs))
- __cfs_hash_set_theta(hs, min_theta, max_theta);
-
- hs->hs_buckets = cfs_hash_buckets_realloc(hs, NULL, 0,
- CFS_HASH_NBKT(hs));
- if (hs->hs_buckets)
- return hs;
-
- kfree(hs);
- return NULL;
-}
-EXPORT_SYMBOL(cfs_hash_create);
-
-/**
- * Cleanup libcfs hash @hs.
- */
-static void
-cfs_hash_destroy(struct cfs_hash *hs)
-{
- struct hlist_node *hnode;
- struct hlist_node *pos;
- struct cfs_hash_bd bd;
- int i;
-
- LASSERT(hs);
- LASSERT(!cfs_hash_is_exiting(hs) &&
- !cfs_hash_is_iterating(hs));
-
- /**
- * prohibit further rehashes, don't need any lock because
- * I'm the only (last) one can change it.
- */
- hs->hs_exiting = 1;
- if (cfs_hash_with_rehash(hs))
- cfs_hash_rehash_cancel(hs);
-
- cfs_hash_depth_wi_cancel(hs);
- /* rehash should be done/canceled */
- LASSERT(hs->hs_buckets && !hs->hs_rehash_buckets);
-
- cfs_hash_for_each_bucket(hs, &bd, i) {
- struct hlist_head *hhead;
-
- LASSERT(bd.bd_bucket);
- /* no need to take this lock, just for consistent code */
- cfs_hash_bd_lock(hs, &bd, 1);
-
- cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
- hlist_for_each_safe(hnode, pos, hhead) {
- LASSERTF(!cfs_hash_with_assert_empty(hs),
- "hash %s bucket %u(%u) is not empty: %u items left\n",
- hs->hs_name, bd.bd_bucket->hsb_index,
- bd.bd_offset, bd.bd_bucket->hsb_count);
- /* can't assert key valicate, because we
- * can interrupt rehash
- */
- cfs_hash_bd_del_locked(hs, &bd, hnode);
- cfs_hash_exit(hs, hnode);
- }
- }
- LASSERT(!bd.bd_bucket->hsb_count);
- cfs_hash_bd_unlock(hs, &bd, 1);
- cond_resched();
- }
-
- LASSERT(!atomic_read(&hs->hs_count));
-
- cfs_hash_buckets_free(hs->hs_buckets, cfs_hash_bkt_size(hs),
- 0, CFS_HASH_NBKT(hs));
- i = cfs_hash_with_bigname(hs) ?
- CFS_HASH_BIGNAME_LEN : CFS_HASH_NAME_LEN;
- kfree(hs);
-}
-
-struct cfs_hash *cfs_hash_getref(struct cfs_hash *hs)
-{
- if (atomic_inc_not_zero(&hs->hs_refcount))
- return hs;
- return NULL;
-}
-EXPORT_SYMBOL(cfs_hash_getref);
-
-void cfs_hash_putref(struct cfs_hash *hs)
-{
- if (atomic_dec_and_test(&hs->hs_refcount))
- cfs_hash_destroy(hs);
-}
-EXPORT_SYMBOL(cfs_hash_putref);
-
-static inline int
-cfs_hash_rehash_bits(struct cfs_hash *hs)
-{
- if (cfs_hash_with_no_lock(hs) ||
- !cfs_hash_with_rehash(hs))
- return -EOPNOTSUPP;
-
- if (unlikely(cfs_hash_is_exiting(hs)))
- return -ESRCH;
-
- if (unlikely(cfs_hash_is_rehashing(hs)))
- return -EALREADY;
-
- if (unlikely(cfs_hash_is_iterating(hs)))
- return -EAGAIN;
-
- /* XXX: need to handle case with max_theta != 2.0
- * and the case with min_theta != 0.5
- */
- if ((hs->hs_cur_bits < hs->hs_max_bits) &&
- (__cfs_hash_theta(hs) > hs->hs_max_theta))
- return hs->hs_cur_bits + 1;
-
- if (!cfs_hash_with_shrink(hs))
- return 0;
-
- if ((hs->hs_cur_bits > hs->hs_min_bits) &&
- (__cfs_hash_theta(hs) < hs->hs_min_theta))
- return hs->hs_cur_bits - 1;
-
- return 0;
-}
-
-/**
- * don't allow inline rehash if:
- * - user wants non-blocking change (add/del) on hash table
- * - too many elements
- */
-static inline int
-cfs_hash_rehash_inline(struct cfs_hash *hs)
-{
- return !cfs_hash_with_nblk_change(hs) &&
- atomic_read(&hs->hs_count) < CFS_HASH_LOOP_HOG;
-}
-
-/**
- * Add item @hnode to libcfs hash @hs using @key. The registered
- * ops->hs_get function will be called when the item is added.
- */
-void
-cfs_hash_add(struct cfs_hash *hs, const void *key, struct hlist_node *hnode)
-{
- struct cfs_hash_bd bd;
- int bits;
-
- LASSERT(hlist_unhashed(hnode));
-
- cfs_hash_lock(hs, 0);
- cfs_hash_bd_get_and_lock(hs, key, &bd, 1);
-
- cfs_hash_key_validate(hs, key, hnode);
- cfs_hash_bd_add_locked(hs, &bd, hnode);
-
- cfs_hash_bd_unlock(hs, &bd, 1);
-
- bits = cfs_hash_rehash_bits(hs);
- cfs_hash_unlock(hs, 0);
- if (bits > 0)
- cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
-}
-EXPORT_SYMBOL(cfs_hash_add);
-
-static struct hlist_node *
-cfs_hash_find_or_add(struct cfs_hash *hs, const void *key,
- struct hlist_node *hnode, int noref)
-{
- struct hlist_node *ehnode;
- struct cfs_hash_bd bds[2];
- int bits = 0;
-
- LASSERTF(hlist_unhashed(hnode), "hnode = %p\n", hnode);
-
- cfs_hash_lock(hs, 0);
- cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1);
-
- cfs_hash_key_validate(hs, key, hnode);
- ehnode = cfs_hash_dual_bd_findadd_locked(hs, bds, key,
- hnode, noref);
- cfs_hash_dual_bd_unlock(hs, bds, 1);
-
- if (ehnode == hnode) /* new item added */
- bits = cfs_hash_rehash_bits(hs);
- cfs_hash_unlock(hs, 0);
- if (bits > 0)
- cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
-
- return ehnode;
-}
-
-/**
- * Add item @hnode to libcfs hash @hs using @key. The registered
- * ops->hs_get function will be called if the item was added.
- * Returns 0 on success or -EALREADY on key collisions.
- */
-int
-cfs_hash_add_unique(struct cfs_hash *hs, const void *key,
- struct hlist_node *hnode)
-{
- return cfs_hash_find_or_add(hs, key, hnode, 1) != hnode ?
- -EALREADY : 0;
-}
-EXPORT_SYMBOL(cfs_hash_add_unique);
-
-/**
- * Add item @hnode to libcfs hash @hs using @key. If this @key
- * already exists in the hash then ops->hs_get will be called on the
- * conflicting entry and that entry will be returned to the caller.
- * Otherwise ops->hs_get is called on the item which was added.
- */
-void *
-cfs_hash_findadd_unique(struct cfs_hash *hs, const void *key,
- struct hlist_node *hnode)
-{
- hnode = cfs_hash_find_or_add(hs, key, hnode, 0);
-
- return cfs_hash_object(hs, hnode);
-}
-EXPORT_SYMBOL(cfs_hash_findadd_unique);
-
-/**
- * Delete item @hnode from the libcfs hash @hs using @key. The @key
- * is required to ensure the correct hash bucket is locked since there
- * is no direct linkage from the item to the bucket. The object
- * removed from the hash will be returned and obs->hs_put is called
- * on the removed object.
- */
-void *
-cfs_hash_del(struct cfs_hash *hs, const void *key, struct hlist_node *hnode)
-{
- void *obj = NULL;
- int bits = 0;
- struct cfs_hash_bd bds[2];
-
- cfs_hash_lock(hs, 0);
- cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1);
-
- /* NB: do nothing if @hnode is not in hash table */
- if (!hnode || !hlist_unhashed(hnode)) {
- if (!bds[1].bd_bucket && hnode) {
- cfs_hash_bd_del_locked(hs, &bds[0], hnode);
- } else {
- hnode = cfs_hash_dual_bd_finddel_locked(hs, bds,
- key, hnode);
- }
- }
-
- if (hnode) {
- obj = cfs_hash_object(hs, hnode);
- bits = cfs_hash_rehash_bits(hs);
- }
-
- cfs_hash_dual_bd_unlock(hs, bds, 1);
- cfs_hash_unlock(hs, 0);
- if (bits > 0)
- cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
-
- return obj;
-}
-EXPORT_SYMBOL(cfs_hash_del);
-
-/**
- * Delete item given @key in libcfs hash @hs. The first @key found in
- * the hash will be removed, if the key exists multiple times in the hash
- * @hs this function must be called once per key. The removed object
- * will be returned and ops->hs_put is called on the removed object.
- */
-void *
-cfs_hash_del_key(struct cfs_hash *hs, const void *key)
-{
- return cfs_hash_del(hs, key, NULL);
-}
-EXPORT_SYMBOL(cfs_hash_del_key);
-
-/**
- * Lookup an item using @key in the libcfs hash @hs and return it.
- * If the @key is found in the hash hs->hs_get() is called and the
- * matching objects is returned. It is the callers responsibility
- * to call the counterpart ops->hs_put using the cfs_hash_put() macro
- * when when finished with the object. If the @key was not found
- * in the hash @hs NULL is returned.
- */
-void *
-cfs_hash_lookup(struct cfs_hash *hs, const void *key)
-{
- void *obj = NULL;
- struct hlist_node *hnode;
- struct cfs_hash_bd bds[2];
-
- cfs_hash_lock(hs, 0);
- cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0);
-
- hnode = cfs_hash_dual_bd_lookup_locked(hs, bds, key);
- if (hnode)
- obj = cfs_hash_object(hs, hnode);
-
- cfs_hash_dual_bd_unlock(hs, bds, 0);
- cfs_hash_unlock(hs, 0);
-
- return obj;
-}
-EXPORT_SYMBOL(cfs_hash_lookup);
-
-static void
-cfs_hash_for_each_enter(struct cfs_hash *hs)
-{
- LASSERT(!cfs_hash_is_exiting(hs));
-
- if (!cfs_hash_with_rehash(hs))
- return;
- /*
- * NB: it's race on cfs_has_t::hs_iterating, but doesn't matter
- * because it's just an unreliable signal to rehash-thread,
- * rehash-thread will try to finish rehash ASAP when seeing this.
- */
- hs->hs_iterating = 1;
-
- cfs_hash_lock(hs, 1);
- hs->hs_iterators++;
- cfs_hash_unlock(hs, 1);
-
- /* NB: iteration is mostly called by service thread,
- * we tend to cancel pending rehash-request, instead of
- * blocking service thread, we will relaunch rehash request
- * after iteration
- */
- if (cfs_hash_is_rehashing(hs))
- cfs_hash_rehash_cancel(hs);
-}
-
-static void
-cfs_hash_for_each_exit(struct cfs_hash *hs)
-{
- int remained;
- int bits;
-
- if (!cfs_hash_with_rehash(hs))
- return;
- cfs_hash_lock(hs, 1);
- remained = --hs->hs_iterators;
- bits = cfs_hash_rehash_bits(hs);
- cfs_hash_unlock(hs, 1);
- /* NB: it's race on cfs_has_t::hs_iterating, see above */
- if (!remained)
- hs->hs_iterating = 0;
- if (bits > 0) {
- cfs_hash_rehash(hs, atomic_read(&hs->hs_count) <
- CFS_HASH_LOOP_HOG);
- }
-}
-
-/**
- * For each item in the libcfs hash @hs call the passed callback @func
- * and pass to it as an argument each hash item and the private @data.
- *
- * a) the function may sleep!
- * b) during the callback:
- * . the bucket lock is held so the callback must never sleep.
- * . if @removal_safe is true, use can remove current item by
- * cfs_hash_bd_del_locked
- */
-static u64
-cfs_hash_for_each_tight(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
- void *data, int remove_safe)
-{
- struct hlist_node *hnode;
- struct hlist_node *pos;
- struct cfs_hash_bd bd;
- u64 count = 0;
- int excl = !!remove_safe;
- int loop = 0;
- int i;
-
- cfs_hash_for_each_enter(hs);
-
- cfs_hash_lock(hs, 0);
- LASSERT(!cfs_hash_is_rehashing(hs));
-
- cfs_hash_for_each_bucket(hs, &bd, i) {
- struct hlist_head *hhead;
-
- cfs_hash_bd_lock(hs, &bd, excl);
- if (!func) { /* only glimpse size */
- count += bd.bd_bucket->hsb_count;
- cfs_hash_bd_unlock(hs, &bd, excl);
- continue;
- }
-
- cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
- hlist_for_each_safe(hnode, pos, hhead) {
- cfs_hash_bucket_validate(hs, &bd, hnode);
- count++;
- loop++;
- if (func(hs, &bd, hnode, data)) {
- cfs_hash_bd_unlock(hs, &bd, excl);
- goto out;
- }
- }
- }
- cfs_hash_bd_unlock(hs, &bd, excl);
- if (loop < CFS_HASH_LOOP_HOG)
- continue;
- loop = 0;
- cfs_hash_unlock(hs, 0);
- cond_resched();
- cfs_hash_lock(hs, 0);
- }
- out:
- cfs_hash_unlock(hs, 0);
-
- cfs_hash_for_each_exit(hs);
- return count;
-}
-
-struct cfs_hash_cond_arg {
- cfs_hash_cond_opt_cb_t func;
- void *arg;
-};
-
-static int
-cfs_hash_cond_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode, void *data)
-{
- struct cfs_hash_cond_arg *cond = data;
-
- if (cond->func(cfs_hash_object(hs, hnode), cond->arg))
- cfs_hash_bd_del_locked(hs, bd, hnode);
- return 0;
-}
-
-/**
- * Delete item from the libcfs hash @hs when @func return true.
- * The write lock being hold during loop for each bucket to avoid
- * any object be reference.
- */
-void
-cfs_hash_cond_del(struct cfs_hash *hs, cfs_hash_cond_opt_cb_t func, void *data)
-{
- struct cfs_hash_cond_arg arg = {
- .func = func,
- .arg = data,
- };
-
- cfs_hash_for_each_tight(hs, cfs_hash_cond_del_locked, &arg, 1);
-}
-EXPORT_SYMBOL(cfs_hash_cond_del);
-
-void
-cfs_hash_for_each(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
- void *data)
-{
- cfs_hash_for_each_tight(hs, func, data, 0);
-}
-EXPORT_SYMBOL(cfs_hash_for_each);
-
-void
-cfs_hash_for_each_safe(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
- void *data)
-{
- cfs_hash_for_each_tight(hs, func, data, 1);
-}
-EXPORT_SYMBOL(cfs_hash_for_each_safe);
-
-static int
-cfs_hash_peek(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode, void *data)
-{
- *(int *)data = 0;
- return 1; /* return 1 to break the loop */
-}
-
-int
-cfs_hash_is_empty(struct cfs_hash *hs)
-{
- int empty = 1;
-
- cfs_hash_for_each_tight(hs, cfs_hash_peek, &empty, 0);
- return empty;
-}
-EXPORT_SYMBOL(cfs_hash_is_empty);
-
-u64
-cfs_hash_size_get(struct cfs_hash *hs)
-{
- return cfs_hash_with_counter(hs) ?
- atomic_read(&hs->hs_count) :
- cfs_hash_for_each_tight(hs, NULL, NULL, 0);
-}
-EXPORT_SYMBOL(cfs_hash_size_get);
-
-/*
- * cfs_hash_for_each_relax:
- * Iterate the hash table and call @func on each item without
- * any lock. This function can't guarantee to finish iteration
- * if these features are enabled:
- *
- * a. if rehash_key is enabled, an item can be moved from
- * one bucket to another bucket
- * b. user can remove non-zero-ref item from hash-table,
- * so the item can be removed from hash-table, even worse,
- * it's possible that user changed key and insert to another
- * hash bucket.
- * there's no way for us to finish iteration correctly on previous
- * two cases, so iteration has to be stopped on change.
- */
-static int
-cfs_hash_for_each_relax(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
- void *data, int start)
-{
- struct hlist_node *next = NULL;
- struct hlist_node *hnode;
- struct cfs_hash_bd bd;
- u32 version;
- int count = 0;
- int stop_on_change;
- int has_put_locked;
- int end = -1;
- int rc = 0;
- int i;
-
- stop_on_change = cfs_hash_with_rehash_key(hs) ||
- !cfs_hash_with_no_itemref(hs);
- has_put_locked = hs->hs_ops->hs_put_locked != NULL;
- cfs_hash_lock(hs, 0);
-again:
- LASSERT(!cfs_hash_is_rehashing(hs));
-
- cfs_hash_for_each_bucket(hs, &bd, i) {
- struct hlist_head *hhead;
-
- if (i < start)
- continue;
- else if (end > 0 && i >= end)
- break;
-
- cfs_hash_bd_lock(hs, &bd, 0);
- version = cfs_hash_bd_version_get(&bd);
-
- cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
- hnode = hhead->first;
- if (!hnode)
- continue;
- cfs_hash_get(hs, hnode);
-
- for (; hnode; hnode = next) {
- cfs_hash_bucket_validate(hs, &bd, hnode);
- next = hnode->next;
- if (next)
- cfs_hash_get(hs, next);
- cfs_hash_bd_unlock(hs, &bd, 0);
- cfs_hash_unlock(hs, 0);
-
- rc = func(hs, &bd, hnode, data);
- if (stop_on_change || !has_put_locked)
- cfs_hash_put(hs, hnode);
- cond_resched();
- count++;
-
- cfs_hash_lock(hs, 0);
- cfs_hash_bd_lock(hs, &bd, 0);
- if (stop_on_change) {
- if (version !=
- cfs_hash_bd_version_get(&bd))
- rc = -EINTR;
- } else if (has_put_locked) {
- cfs_hash_put_locked(hs, hnode);
- }
- if (rc) /* callback wants to break iteration */
- break;
- }
- if (next) {
- if (has_put_locked) {
- cfs_hash_put_locked(hs, next);
- next = NULL;
- }
- break;
- } else if (rc) {
- break;
- }
- }
- cfs_hash_bd_unlock(hs, &bd, 0);
- if (next && !has_put_locked) {
- cfs_hash_put(hs, next);
- next = NULL;
- }
- if (rc) /* callback wants to break iteration */
- break;
- }
- if (start > 0 && !rc) {
- end = start;
- start = 0;
- goto again;
- }
-
- cfs_hash_unlock(hs, 0);
- return count;
-}
-
-int
-cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
- void *data, int start)
-{
- if (cfs_hash_with_no_lock(hs) ||
- cfs_hash_with_rehash_key(hs) ||
- !cfs_hash_with_no_itemref(hs))
- return -EOPNOTSUPP;
-
- if (!hs->hs_ops->hs_get ||
- (!hs->hs_ops->hs_put && !hs->hs_ops->hs_put_locked))
- return -EOPNOTSUPP;
-
- cfs_hash_for_each_enter(hs);
- cfs_hash_for_each_relax(hs, func, data, start);
- cfs_hash_for_each_exit(hs);
-
- return 0;
-}
-EXPORT_SYMBOL(cfs_hash_for_each_nolock);
-
-/**
- * For each hash bucket in the libcfs hash @hs call the passed callback
- * @func until all the hash buckets are empty. The passed callback @func
- * or the previously registered callback hs->hs_put must remove the item
- * from the hash. You may either use the cfs_hash_del() or hlist_del()
- * functions. No rwlocks will be held during the callback @func it is
- * safe to sleep if needed. This function will not terminate until the
- * hash is empty. Note it is still possible to concurrently add new
- * items in to the hash. It is the callers responsibility to ensure
- * the required locking is in place to prevent concurrent insertions.
- */
-int
-cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
- void *data)
-{
- unsigned int i = 0;
-
- if (cfs_hash_with_no_lock(hs))
- return -EOPNOTSUPP;
-
- if (!hs->hs_ops->hs_get ||
- (!hs->hs_ops->hs_put && !hs->hs_ops->hs_put_locked))
- return -EOPNOTSUPP;
-
- cfs_hash_for_each_enter(hs);
- while (cfs_hash_for_each_relax(hs, func, data, 0)) {
- CDEBUG(D_INFO, "Try to empty hash: %s, loop: %u\n",
- hs->hs_name, i++);
- }
- cfs_hash_for_each_exit(hs);
- return 0;
-}
-EXPORT_SYMBOL(cfs_hash_for_each_empty);
-
-void
-cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned int hindex,
- cfs_hash_for_each_cb_t func, void *data)
-{
- struct hlist_head *hhead;
- struct hlist_node *hnode;
- struct cfs_hash_bd bd;
-
- cfs_hash_for_each_enter(hs);
- cfs_hash_lock(hs, 0);
- if (hindex >= CFS_HASH_NHLIST(hs))
- goto out;
-
- cfs_hash_bd_index_set(hs, hindex, &bd);
-
- cfs_hash_bd_lock(hs, &bd, 0);
- hhead = cfs_hash_bd_hhead(hs, &bd);
- hlist_for_each(hnode, hhead) {
- if (func(hs, &bd, hnode, data))
- break;
- }
- cfs_hash_bd_unlock(hs, &bd, 0);
-out:
- cfs_hash_unlock(hs, 0);
- cfs_hash_for_each_exit(hs);
-}
-EXPORT_SYMBOL(cfs_hash_hlist_for_each);
-
-/*
- * For each item in the libcfs hash @hs which matches the @key call
- * the passed callback @func and pass to it as an argument each hash
- * item and the private @data. During the callback the bucket lock
- * is held so the callback must never sleep.
- */
-void
-cfs_hash_for_each_key(struct cfs_hash *hs, const void *key,
- cfs_hash_for_each_cb_t func, void *data)
-{
- struct hlist_node *hnode;
- struct cfs_hash_bd bds[2];
- unsigned int i;
-
- cfs_hash_lock(hs, 0);
-
- cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0);
-
- cfs_hash_for_each_bd(bds, 2, i) {
- struct hlist_head *hlist = cfs_hash_bd_hhead(hs, &bds[i]);
-
- hlist_for_each(hnode, hlist) {
- cfs_hash_bucket_validate(hs, &bds[i], hnode);
-
- if (cfs_hash_keycmp(hs, key, hnode)) {
- if (func(hs, &bds[i], hnode, data))
- break;
- }
- }
- }
-
- cfs_hash_dual_bd_unlock(hs, bds, 0);
- cfs_hash_unlock(hs, 0);
-}
-EXPORT_SYMBOL(cfs_hash_for_each_key);
-
-/**
- * Rehash the libcfs hash @hs to the given @bits. This can be used
- * to grow the hash size when excessive chaining is detected, or to
- * shrink the hash when it is larger than needed. When the CFS_HASH_REHASH
- * flag is set in @hs the libcfs hash may be dynamically rehashed
- * during addition or removal if the hash's theta value exceeds
- * either the hs->hs_min_theta or hs->max_theta values. By default
- * these values are tuned to keep the chained hash depth small, and
- * this approach assumes a reasonably uniform hashing function. The
- * theta thresholds for @hs are tunable via cfs_hash_set_theta().
- */
-void
-cfs_hash_rehash_cancel(struct cfs_hash *hs)
-{
- LASSERT(cfs_hash_with_rehash(hs));
- cancel_work_sync(&hs->hs_rehash_work);
-}
-
-void
-cfs_hash_rehash(struct cfs_hash *hs, int do_rehash)
-{
- int rc;
-
- LASSERT(cfs_hash_with_rehash(hs) && !cfs_hash_with_no_lock(hs));
-
- cfs_hash_lock(hs, 1);
-
- rc = cfs_hash_rehash_bits(hs);
- if (rc <= 0) {
- cfs_hash_unlock(hs, 1);
- return;
- }
-
- hs->hs_rehash_bits = rc;
- if (!do_rehash) {
- /* launch and return */
- queue_work(cfs_rehash_wq, &hs->hs_rehash_work);
- cfs_hash_unlock(hs, 1);
- return;
- }
-
- /* rehash right now */
- cfs_hash_unlock(hs, 1);
-
- cfs_hash_rehash_worker(&hs->hs_rehash_work);
-}
-
-static int
-cfs_hash_rehash_bd(struct cfs_hash *hs, struct cfs_hash_bd *old)
-{
- struct cfs_hash_bd new;
- struct hlist_head *hhead;
- struct hlist_node *hnode;
- struct hlist_node *pos;
- void *key;
- int c = 0;
-
- /* hold cfs_hash_lock(hs, 1), so don't need any bucket lock */
- cfs_hash_bd_for_each_hlist(hs, old, hhead) {
- hlist_for_each_safe(hnode, pos, hhead) {
- key = cfs_hash_key(hs, hnode);
- LASSERT(key);
- /* Validate hnode is in the correct bucket. */
- cfs_hash_bucket_validate(hs, old, hnode);
- /*
- * Delete from old hash bucket; move to new bucket.
- * ops->hs_key must be defined.
- */
- cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
- hs->hs_rehash_bits, key, &new);
- cfs_hash_bd_move_locked(hs, old, &new, hnode);
- c++;
- }
- }
-
- return c;
-}
-
-static void
-cfs_hash_rehash_worker(struct work_struct *work)
-{
- struct cfs_hash *hs = container_of(work, struct cfs_hash, hs_rehash_work);
- struct cfs_hash_bucket **bkts;
- struct cfs_hash_bd bd;
- unsigned int old_size;
- unsigned int new_size;
- int bsize;
- int count = 0;
- int rc = 0;
- int i;
-
- LASSERT(hs && cfs_hash_with_rehash(hs));
-
- cfs_hash_lock(hs, 0);
- LASSERT(cfs_hash_is_rehashing(hs));
-
- old_size = CFS_HASH_NBKT(hs);
- new_size = CFS_HASH_RH_NBKT(hs);
-
- cfs_hash_unlock(hs, 0);
-
- /*
- * don't need hs::hs_rwlock for hs::hs_buckets,
- * because nobody can change bkt-table except me.
- */
- bkts = cfs_hash_buckets_realloc(hs, hs->hs_buckets,
- old_size, new_size);
- cfs_hash_lock(hs, 1);
- if (!bkts) {
- rc = -ENOMEM;
- goto out;
- }
-
- if (bkts == hs->hs_buckets) {
- bkts = NULL; /* do nothing */
- goto out;
- }
-
- rc = __cfs_hash_theta(hs);
- if ((rc >= hs->hs_min_theta) && (rc <= hs->hs_max_theta)) {
- /* free the new allocated bkt-table */
- old_size = new_size;
- new_size = CFS_HASH_NBKT(hs);
- rc = -EALREADY;
- goto out;
- }
-
- LASSERT(!hs->hs_rehash_buckets);
- hs->hs_rehash_buckets = bkts;
-
- rc = 0;
- cfs_hash_for_each_bucket(hs, &bd, i) {
- if (cfs_hash_is_exiting(hs)) {
- rc = -ESRCH;
- /* someone wants to destroy the hash, abort now */
- if (old_size < new_size) /* OK to free old bkt-table */
- break;
- /* it's shrinking, need free new bkt-table */
- hs->hs_rehash_buckets = NULL;
- old_size = new_size;
- new_size = CFS_HASH_NBKT(hs);
- goto out;
- }
-
- count += cfs_hash_rehash_bd(hs, &bd);
- if (count < CFS_HASH_LOOP_HOG ||
- cfs_hash_is_iterating(hs)) { /* need to finish ASAP */
- continue;
- }
-
- count = 0;
- cfs_hash_unlock(hs, 1);
- cond_resched();
- cfs_hash_lock(hs, 1);
- }
-
- hs->hs_rehash_count++;
-
- bkts = hs->hs_buckets;
- hs->hs_buckets = hs->hs_rehash_buckets;
- hs->hs_rehash_buckets = NULL;
-
- hs->hs_cur_bits = hs->hs_rehash_bits;
-out:
- hs->hs_rehash_bits = 0;
- bsize = cfs_hash_bkt_size(hs);
- cfs_hash_unlock(hs, 1);
- /* can't refer to @hs anymore because it could be destroyed */
- if (bkts)
- cfs_hash_buckets_free(bkts, bsize, new_size, old_size);
- if (rc)
- CDEBUG(D_INFO, "early quit of rehashing: %d\n", rc);
-}
-
-/**
- * Rehash the object referenced by @hnode in the libcfs hash @hs. The
- * @old_key must be provided to locate the objects previous location
- * in the hash, and the @new_key will be used to reinsert the object.
- * Use this function instead of a cfs_hash_add() + cfs_hash_del()
- * combo when it is critical that there is no window in time where the
- * object is missing from the hash. When an object is being rehashed
- * the registered cfs_hash_get() and cfs_hash_put() functions will
- * not be called.
- */
-void cfs_hash_rehash_key(struct cfs_hash *hs, const void *old_key,
- void *new_key, struct hlist_node *hnode)
-{
- struct cfs_hash_bd bds[3];
- struct cfs_hash_bd old_bds[2];
- struct cfs_hash_bd new_bd;
-
- LASSERT(!hlist_unhashed(hnode));
-
- cfs_hash_lock(hs, 0);
-
- cfs_hash_dual_bd_get(hs, old_key, old_bds);
- cfs_hash_bd_get(hs, new_key, &new_bd);
-
- bds[0] = old_bds[0];
- bds[1] = old_bds[1];
- bds[2] = new_bd;
-
- /* NB: bds[0] and bds[1] are ordered already */
- cfs_hash_bd_order(&bds[1], &bds[2]);
- cfs_hash_bd_order(&bds[0], &bds[1]);
-
- cfs_hash_multi_bd_lock(hs, bds, 3, 1);
- if (likely(!old_bds[1].bd_bucket)) {
- cfs_hash_bd_move_locked(hs, &old_bds[0], &new_bd, hnode);
- } else {
- cfs_hash_dual_bd_finddel_locked(hs, old_bds, old_key, hnode);
- cfs_hash_bd_add_locked(hs, &new_bd, hnode);
- }
- /* overwrite key inside locks, otherwise may screw up with
- * other operations, i.e: rehash
- */
- cfs_hash_keycpy(hs, hnode, new_key);
-
- cfs_hash_multi_bd_unlock(hs, bds, 3, 1);
- cfs_hash_unlock(hs, 0);
-}
-EXPORT_SYMBOL(cfs_hash_rehash_key);
-
-void cfs_hash_debug_header(struct seq_file *m)
-{
- seq_printf(m, "%-*s cur min max theta t-min t-max flags rehash count maxdep maxdepb distribution\n",
- CFS_HASH_BIGNAME_LEN, "name");
-}
-EXPORT_SYMBOL(cfs_hash_debug_header);
-
-static struct cfs_hash_bucket **
-cfs_hash_full_bkts(struct cfs_hash *hs)
-{
- /* NB: caller should hold hs->hs_rwlock if REHASH is set */
- if (!hs->hs_rehash_buckets)
- return hs->hs_buckets;
-
- LASSERT(hs->hs_rehash_bits);
- return hs->hs_rehash_bits > hs->hs_cur_bits ?
- hs->hs_rehash_buckets : hs->hs_buckets;
-}
-
-static unsigned int
-cfs_hash_full_nbkt(struct cfs_hash *hs)
-{
- /* NB: caller should hold hs->hs_rwlock if REHASH is set */
- if (!hs->hs_rehash_buckets)
- return CFS_HASH_NBKT(hs);
-
- LASSERT(hs->hs_rehash_bits);
- return hs->hs_rehash_bits > hs->hs_cur_bits ?
- CFS_HASH_RH_NBKT(hs) : CFS_HASH_NBKT(hs);
-}
-
-void cfs_hash_debug_str(struct cfs_hash *hs, struct seq_file *m)
-{
- int dist[8] = { 0, };
- int maxdep = -1;
- int maxdepb = -1;
- int total = 0;
- int theta;
- int i;
-
- cfs_hash_lock(hs, 0);
- theta = __cfs_hash_theta(hs);
-
- seq_printf(m, "%-*s %5d %5d %5d %d.%03d %d.%03d %d.%03d 0x%02x %6d ",
- CFS_HASH_BIGNAME_LEN, hs->hs_name,
- 1 << hs->hs_cur_bits, 1 << hs->hs_min_bits,
- 1 << hs->hs_max_bits,
- __cfs_hash_theta_int(theta), __cfs_hash_theta_frac(theta),
- __cfs_hash_theta_int(hs->hs_min_theta),
- __cfs_hash_theta_frac(hs->hs_min_theta),
- __cfs_hash_theta_int(hs->hs_max_theta),
- __cfs_hash_theta_frac(hs->hs_max_theta),
- hs->hs_flags, hs->hs_rehash_count);
-
- /*
- * The distribution is a summary of the chained hash depth in
- * each of the libcfs hash buckets. Each buckets hsb_count is
- * divided by the hash theta value and used to generate a
- * histogram of the hash distribution. A uniform hash will
- * result in all hash buckets being close to the average thus
- * only the first few entries in the histogram will be non-zero.
- * If you hash function results in a non-uniform hash the will
- * be observable by outlier bucks in the distribution histogram.
- *
- * Uniform hash distribution: 128/128/0/0/0/0/0/0
- * Non-Uniform hash distribution: 128/125/0/0/0/0/2/1
- */
- for (i = 0; i < cfs_hash_full_nbkt(hs); i++) {
- struct cfs_hash_bd bd;
-
- bd.bd_bucket = cfs_hash_full_bkts(hs)[i];
- cfs_hash_bd_lock(hs, &bd, 0);
- if (maxdep < bd.bd_bucket->hsb_depmax) {
- maxdep = bd.bd_bucket->hsb_depmax;
- maxdepb = ffz(~maxdep);
- }
- total += bd.bd_bucket->hsb_count;
- dist[min(fls(bd.bd_bucket->hsb_count / max(theta, 1)), 7)]++;
- cfs_hash_bd_unlock(hs, &bd, 0);
- }
-
- seq_printf(m, "%7d %7d %7d ", total, maxdep, maxdepb);
- for (i = 0; i < 8; i++)
- seq_printf(m, "%d%c", dist[i], (i == 7) ? '\n' : '/');
-
- cfs_hash_unlock(hs, 0);
-}
-EXPORT_SYMBOL(cfs_hash_debug_str);
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
deleted file mode 100644
index 3d1cf457b286..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ /dev/null
@@ -1,1086 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Please see comments in libcfs/include/libcfs/libcfs_cpu.h for introduction
- *
- * Author: liang@whamcloud.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/cpu.h>
-#include <linux/sched.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/cache.h>
-
-#include <linux/libcfs/libcfs_cpu.h>
-#include <linux/libcfs/libcfs_string.h>
-#include <linux/libcfs/libcfs.h>
-
-/** Global CPU partition table */
-struct cfs_cpt_table *cfs_cpt_tab __read_mostly;
-EXPORT_SYMBOL(cfs_cpt_tab);
-
-/**
- * modparam for setting number of partitions
- *
- * 0 : estimate best value based on cores or NUMA nodes
- * 1 : disable multiple partitions
- * >1 : specify number of partitions
- */
-static int cpu_npartitions;
-module_param(cpu_npartitions, int, 0444);
-MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions");
-
-/**
- * modparam for setting CPU partitions patterns:
- *
- * i.e: "0[0,1,2,3] 1[4,5,6,7]", number before bracket is CPU partition ID,
- * number in bracket is processor ID (core or HT)
- *
- * i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket
- * are NUMA node ID, number before bracket is CPU partition ID.
- *
- * i.e: "N", shortcut expression to create CPT from NUMA & CPU topology
- *
- * NB: If user specified cpu_pattern, cpu_npartitions will be ignored
- */
-static char *cpu_pattern = "N";
-module_param(cpu_pattern, charp, 0444);
-MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern");
-
-static struct cfs_cpt_data {
- /* serialize hotplug etc */
- spinlock_t cpt_lock;
- /* reserved for hotplug */
- unsigned long cpt_version;
- /* mutex to protect cpt_cpumask */
- struct mutex cpt_mutex;
- /* scratch buffer for set/unset_node */
- cpumask_var_t cpt_cpumask;
-} cpt_data;
-
-#define CFS_CPU_VERSION_MAGIC 0xbabecafe
-
-struct cfs_cpt_table *
-cfs_cpt_table_alloc(unsigned int ncpt)
-{
- struct cfs_cpt_table *cptab;
- int i;
-
- cptab = kzalloc(sizeof(*cptab), GFP_NOFS);
- if (!cptab)
- return NULL;
-
- cptab->ctb_nparts = ncpt;
-
- cptab->ctb_nodemask = kzalloc(sizeof(*cptab->ctb_nodemask),
- GFP_NOFS);
- if (!zalloc_cpumask_var(&cptab->ctb_cpumask, GFP_NOFS) ||
- !cptab->ctb_nodemask)
- goto failed;
-
- cptab->ctb_cpu2cpt = kvmalloc_array(num_possible_cpus(),
- sizeof(cptab->ctb_cpu2cpt[0]),
- GFP_KERNEL);
- if (!cptab->ctb_cpu2cpt)
- goto failed;
-
- memset(cptab->ctb_cpu2cpt, -1,
- num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
-
- cptab->ctb_parts = kvmalloc_array(ncpt, sizeof(cptab->ctb_parts[0]),
- GFP_KERNEL);
- if (!cptab->ctb_parts)
- goto failed;
-
- for (i = 0; i < ncpt; i++) {
- struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
-
- part->cpt_nodemask = kzalloc(sizeof(*part->cpt_nodemask),
- GFP_NOFS);
- if (!zalloc_cpumask_var(&part->cpt_cpumask, GFP_NOFS) ||
- !part->cpt_nodemask)
- goto failed;
- }
-
- spin_lock(&cpt_data.cpt_lock);
- /* Reserved for hotplug */
- cptab->ctb_version = cpt_data.cpt_version;
- spin_unlock(&cpt_data.cpt_lock);
-
- return cptab;
-
- failed:
- cfs_cpt_table_free(cptab);
- return NULL;
-}
-EXPORT_SYMBOL(cfs_cpt_table_alloc);
-
-void
-cfs_cpt_table_free(struct cfs_cpt_table *cptab)
-{
- int i;
-
- kvfree(cptab->ctb_cpu2cpt);
-
- for (i = 0; cptab->ctb_parts && i < cptab->ctb_nparts; i++) {
- struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
-
- kfree(part->cpt_nodemask);
- free_cpumask_var(part->cpt_cpumask);
- }
-
- kvfree(cptab->ctb_parts);
-
- kfree(cptab->ctb_nodemask);
- free_cpumask_var(cptab->ctb_cpumask);
-
- kfree(cptab);
-}
-EXPORT_SYMBOL(cfs_cpt_table_free);
-
-int
-cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
-{
- char *tmp = buf;
- int rc = 0;
- int i;
- int j;
-
- for (i = 0; i < cptab->ctb_nparts; i++) {
- if (len > 0) {
- rc = snprintf(tmp, len, "%d\t: ", i);
- len -= rc;
- }
-
- if (len <= 0) {
- rc = -EFBIG;
- goto out;
- }
-
- tmp += rc;
- for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) {
- rc = snprintf(tmp, len, "%d ", j);
- len -= rc;
- if (len <= 0) {
- rc = -EFBIG;
- goto out;
- }
- tmp += rc;
- }
-
- *tmp = '\n';
- tmp++;
- len--;
- }
-
- out:
- if (rc < 0)
- return rc;
-
- return tmp - buf;
-}
-EXPORT_SYMBOL(cfs_cpt_table_print);
-
-static void
-cfs_node_to_cpumask(int node, cpumask_t *mask)
-{
- const cpumask_t *tmp = cpumask_of_node(node);
-
- if (tmp)
- cpumask_copy(mask, tmp);
- else
- cpumask_clear(mask);
-}
-
-int
-cfs_cpt_number(struct cfs_cpt_table *cptab)
-{
- return cptab->ctb_nparts;
-}
-EXPORT_SYMBOL(cfs_cpt_number);
-
-int
-cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
-{
- LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
- return cpt == CFS_CPT_ANY ?
- cpumask_weight(cptab->ctb_cpumask) :
- cpumask_weight(cptab->ctb_parts[cpt].cpt_cpumask);
-}
-EXPORT_SYMBOL(cfs_cpt_weight);
-
-int
-cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
-{
- LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
- return cpt == CFS_CPT_ANY ?
- cpumask_any_and(cptab->ctb_cpumask,
- cpu_online_mask) < nr_cpu_ids :
- cpumask_any_and(cptab->ctb_parts[cpt].cpt_cpumask,
- cpu_online_mask) < nr_cpu_ids;
-}
-EXPORT_SYMBOL(cfs_cpt_online);
-
-cpumask_var_t *
-cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
-{
- LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
- return cpt == CFS_CPT_ANY ?
- &cptab->ctb_cpumask : &cptab->ctb_parts[cpt].cpt_cpumask;
-}
-EXPORT_SYMBOL(cfs_cpt_cpumask);
-
-nodemask_t *
-cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
-{
- LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
- return cpt == CFS_CPT_ANY ?
- cptab->ctb_nodemask : cptab->ctb_parts[cpt].cpt_nodemask;
-}
-EXPORT_SYMBOL(cfs_cpt_nodemask);
-
-int
-cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
-{
- int node;
-
- LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts);
-
- if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_online(cpu)) {
- CDEBUG(D_INFO, "CPU %d is invalid or it's offline\n", cpu);
- return 0;
- }
-
- if (cptab->ctb_cpu2cpt[cpu] != -1) {
- CDEBUG(D_INFO, "CPU %d is already in partition %d\n",
- cpu, cptab->ctb_cpu2cpt[cpu]);
- return 0;
- }
-
- cptab->ctb_cpu2cpt[cpu] = cpt;
-
- LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask));
- LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
-
- cpumask_set_cpu(cpu, cptab->ctb_cpumask);
- cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
-
- node = cpu_to_node(cpu);
-
- /* first CPU of @node in this CPT table */
- if (!node_isset(node, *cptab->ctb_nodemask))
- node_set(node, *cptab->ctb_nodemask);
-
- /* first CPU of @node in this partition */
- if (!node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask))
- node_set(node, *cptab->ctb_parts[cpt].cpt_nodemask);
-
- return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_set_cpu);
-
-void
-cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
-{
- int node;
- int i;
-
- LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
- if (cpu < 0 || cpu >= nr_cpu_ids) {
- CDEBUG(D_INFO, "Invalid CPU id %d\n", cpu);
- return;
- }
-
- if (cpt == CFS_CPT_ANY) {
- /* caller doesn't know the partition ID */
- cpt = cptab->ctb_cpu2cpt[cpu];
- if (cpt < 0) { /* not set in this CPT-table */
- CDEBUG(D_INFO, "Try to unset cpu %d which is not in CPT-table %p\n",
- cpt, cptab);
- return;
- }
-
- } else if (cpt != cptab->ctb_cpu2cpt[cpu]) {
- CDEBUG(D_INFO,
- "CPU %d is not in cpu-partition %d\n", cpu, cpt);
- return;
- }
-
- LASSERT(cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
- LASSERT(cpumask_test_cpu(cpu, cptab->ctb_cpumask));
-
- cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
- cpumask_clear_cpu(cpu, cptab->ctb_cpumask);
- cptab->ctb_cpu2cpt[cpu] = -1;
-
- node = cpu_to_node(cpu);
-
- LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask));
- LASSERT(node_isset(node, *cptab->ctb_nodemask));
-
- for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) {
- /* this CPT has other CPU belonging to this node? */
- if (cpu_to_node(i) == node)
- break;
- }
-
- if (i >= nr_cpu_ids)
- node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask);
-
- for_each_cpu(i, cptab->ctb_cpumask) {
- /* this CPT-table has other CPU belonging to this node? */
- if (cpu_to_node(i) == node)
- break;
- }
-
- if (i >= nr_cpu_ids)
- node_clear(node, *cptab->ctb_nodemask);
-}
-EXPORT_SYMBOL(cfs_cpt_unset_cpu);
-
-int
-cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
-{
- int i;
-
- if (!cpumask_weight(mask) ||
- cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) {
- CDEBUG(D_INFO, "No online CPU is found in the CPU mask for CPU partition %d\n",
- cpt);
- return 0;
- }
-
- for_each_cpu(i, mask) {
- if (!cfs_cpt_set_cpu(cptab, cpt, i))
- return 0;
- }
-
- return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_set_cpumask);
-
-void
-cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
-{
- int i;
-
- for_each_cpu(i, mask)
- cfs_cpt_unset_cpu(cptab, cpt, i);
-}
-EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
-
-int
-cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
-{
- int rc;
-
- if (node < 0 || node >= MAX_NUMNODES) {
- CDEBUG(D_INFO,
- "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
- return 0;
- }
-
- mutex_lock(&cpt_data.cpt_mutex);
-
- cfs_node_to_cpumask(node, cpt_data.cpt_cpumask);
-
- rc = cfs_cpt_set_cpumask(cptab, cpt, cpt_data.cpt_cpumask);
-
- mutex_unlock(&cpt_data.cpt_mutex);
-
- return rc;
-}
-EXPORT_SYMBOL(cfs_cpt_set_node);
-
-void
-cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
-{
- if (node < 0 || node >= MAX_NUMNODES) {
- CDEBUG(D_INFO,
- "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
- return;
- }
-
- mutex_lock(&cpt_data.cpt_mutex);
-
- cfs_node_to_cpumask(node, cpt_data.cpt_cpumask);
-
- cfs_cpt_unset_cpumask(cptab, cpt, cpt_data.cpt_cpumask);
-
- mutex_unlock(&cpt_data.cpt_mutex);
-}
-EXPORT_SYMBOL(cfs_cpt_unset_node);
-
-int
-cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
-{
- int i;
-
- for_each_node_mask(i, *mask) {
- if (!cfs_cpt_set_node(cptab, cpt, i))
- return 0;
- }
-
- return 1;
-}
-EXPORT_SYMBOL(cfs_cpt_set_nodemask);
-
-void
-cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
-{
- int i;
-
- for_each_node_mask(i, *mask)
- cfs_cpt_unset_node(cptab, cpt, i);
-}
-EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
-
-void
-cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
-{
- int last;
- int i;
-
- if (cpt == CFS_CPT_ANY) {
- last = cptab->ctb_nparts - 1;
- cpt = 0;
- } else {
- last = cpt;
- }
-
- for (; cpt <= last; cpt++) {
- for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask)
- cfs_cpt_unset_cpu(cptab, cpt, i);
- }
-}
-EXPORT_SYMBOL(cfs_cpt_clear);
-
-int
-cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
-{
- nodemask_t *mask;
- int weight;
- int rotor;
- int node;
-
- /* convert CPU partition ID to HW node id */
-
- if (cpt < 0 || cpt >= cptab->ctb_nparts) {
- mask = cptab->ctb_nodemask;
- rotor = cptab->ctb_spread_rotor++;
- } else {
- mask = cptab->ctb_parts[cpt].cpt_nodemask;
- rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++;
- }
-
- weight = nodes_weight(*mask);
- LASSERT(weight > 0);
-
- rotor %= weight;
-
- for_each_node_mask(node, *mask) {
- if (!rotor--)
- return node;
- }
-
- LBUG();
- return 0;
-}
-EXPORT_SYMBOL(cfs_cpt_spread_node);
-
-int
-cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
-{
- int cpu;
- int cpt;
-
- preempt_disable();
- cpu = smp_processor_id();
- cpt = cptab->ctb_cpu2cpt[cpu];
-
- if (cpt < 0 && remap) {
- /* don't return negative value for safety of upper layer,
- * instead we shadow the unknown cpu to a valid partition ID
- */
- cpt = cpu % cptab->ctb_nparts;
- }
- preempt_enable();
- return cpt;
-}
-EXPORT_SYMBOL(cfs_cpt_current);
-
-int
-cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
-{
- LASSERT(cpu >= 0 && cpu < nr_cpu_ids);
-
- return cptab->ctb_cpu2cpt[cpu];
-}
-EXPORT_SYMBOL(cfs_cpt_of_cpu);
-
-int
-cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
-{
- cpumask_var_t *cpumask;
- nodemask_t *nodemask;
- int rc;
- int i;
-
- LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
-
- if (cpt == CFS_CPT_ANY) {
- cpumask = &cptab->ctb_cpumask;
- nodemask = cptab->ctb_nodemask;
- } else {
- cpumask = &cptab->ctb_parts[cpt].cpt_cpumask;
- nodemask = cptab->ctb_parts[cpt].cpt_nodemask;
- }
-
- if (cpumask_any_and(*cpumask, cpu_online_mask) >= nr_cpu_ids) {
- CERROR("No online CPU found in CPU partition %d, did someone do CPU hotplug on system? You might need to reload Lustre modules to keep system working well.\n",
- cpt);
- return -EINVAL;
- }
-
- for_each_online_cpu(i) {
- if (cpumask_test_cpu(i, *cpumask))
- continue;
-
- rc = set_cpus_allowed_ptr(current, *cpumask);
- set_mems_allowed(*nodemask);
- if (!rc)
- schedule(); /* switch to allowed CPU */
-
- return rc;
- }
-
- /* don't need to set affinity because all online CPUs are covered */
- return 0;
-}
-EXPORT_SYMBOL(cfs_cpt_bind);
-
-/**
- * Choose max to \a number CPUs from \a node and set them in \a cpt.
- * We always prefer to choose CPU in the same core/socket.
- */
-static int
-cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
- cpumask_t *node, int number)
-{
- cpumask_var_t socket;
- cpumask_var_t core;
- int rc = 0;
- int cpu;
-
- LASSERT(number > 0);
-
- if (number >= cpumask_weight(node)) {
- while (!cpumask_empty(node)) {
- cpu = cpumask_first(node);
-
- rc = cfs_cpt_set_cpu(cptab, cpt, cpu);
- if (!rc)
- return -EINVAL;
- cpumask_clear_cpu(cpu, node);
- }
- return 0;
- }
-
- /*
- * Allocate scratch buffers
- * As we cannot initialize a cpumask_var_t, we need
- * to alloc both before we can risk trying to free either
- */
- if (!zalloc_cpumask_var(&socket, GFP_NOFS))
- rc = -ENOMEM;
- if (!zalloc_cpumask_var(&core, GFP_NOFS))
- rc = -ENOMEM;
- if (rc)
- goto out;
-
- while (!cpumask_empty(node)) {
- cpu = cpumask_first(node);
-
- /* get cpumask for cores in the same socket */
- cpumask_copy(socket, topology_core_cpumask(cpu));
- cpumask_and(socket, socket, node);
-
- LASSERT(!cpumask_empty(socket));
-
- while (!cpumask_empty(socket)) {
- int i;
-
- /* get cpumask for hts in the same core */
- cpumask_copy(core, topology_sibling_cpumask(cpu));
- cpumask_and(core, core, node);
-
- LASSERT(!cpumask_empty(core));
-
- for_each_cpu(i, core) {
- cpumask_clear_cpu(i, socket);
- cpumask_clear_cpu(i, node);
-
- rc = cfs_cpt_set_cpu(cptab, cpt, i);
- if (!rc) {
- rc = -EINVAL;
- goto out;
- }
-
- if (!--number)
- goto out;
- }
- cpu = cpumask_first(socket);
- }
- }
-
-out:
- free_cpumask_var(socket);
- free_cpumask_var(core);
- return rc;
-}
-
-#define CPT_WEIGHT_MIN 4u
-
-static unsigned int
-cfs_cpt_num_estimate(void)
-{
- unsigned int nnode = num_online_nodes();
- unsigned int ncpu = num_online_cpus();
- unsigned int ncpt;
-
- if (ncpu <= CPT_WEIGHT_MIN) {
- ncpt = 1;
- goto out;
- }
-
- /* generate reasonable number of CPU partitions based on total number
- * of CPUs, Preferred N should be power2 and match this condition:
- * 2 * (N - 1)^2 < NCPUS <= 2 * N^2
- */
- for (ncpt = 2; ncpu > 2 * ncpt * ncpt; ncpt <<= 1)
- ;
-
- if (ncpt <= nnode) { /* fat numa system */
- while (nnode > ncpt)
- nnode >>= 1;
-
- } else { /* ncpt > nnode */
- while ((nnode << 1) <= ncpt)
- nnode <<= 1;
- }
-
- ncpt = nnode;
-
-out:
-#if (BITS_PER_LONG == 32)
- /* config many CPU partitions on 32-bit system could consume
- * too much memory
- */
- ncpt = min(2U, ncpt);
-#endif
- while (ncpu % ncpt)
- ncpt--; /* worst case is 1 */
-
- return ncpt;
-}
-
-static struct cfs_cpt_table *
-cfs_cpt_table_create(int ncpt)
-{
- struct cfs_cpt_table *cptab = NULL;
- cpumask_var_t mask;
- int cpt = 0;
- int num;
- int rc;
- int i;
-
- rc = cfs_cpt_num_estimate();
- if (ncpt <= 0)
- ncpt = rc;
-
- if (ncpt > num_online_cpus() || ncpt > 4 * rc) {
- CWARN("CPU partition number %d is larger than suggested value (%d), your system may have performance issue or run out of memory while under pressure\n",
- ncpt, rc);
- }
-
- if (num_online_cpus() % ncpt) {
- CERROR("CPU number %d is not multiple of cpu_npartition %d, please try different cpu_npartitions value or set pattern string by cpu_pattern=STRING\n",
- (int)num_online_cpus(), ncpt);
- goto failed;
- }
-
- cptab = cfs_cpt_table_alloc(ncpt);
- if (!cptab) {
- CERROR("Failed to allocate CPU map(%d)\n", ncpt);
- goto failed;
- }
-
- num = num_online_cpus() / ncpt;
- if (!num) {
- CERROR("CPU changed while setting CPU partition\n");
- goto failed;
- }
-
- if (!zalloc_cpumask_var(&mask, GFP_NOFS)) {
- CERROR("Failed to allocate scratch cpumask\n");
- goto failed;
- }
-
- for_each_online_node(i) {
- cfs_node_to_cpumask(i, mask);
-
- while (!cpumask_empty(mask)) {
- struct cfs_cpu_partition *part;
- int n;
-
- /*
- * Each emulated NUMA node has all allowed CPUs in
- * the mask.
- * End loop when all partitions have assigned CPUs.
- */
- if (cpt == ncpt)
- break;
-
- part = &cptab->ctb_parts[cpt];
-
- n = num - cpumask_weight(part->cpt_cpumask);
- LASSERT(n > 0);
-
- rc = cfs_cpt_choose_ncpus(cptab, cpt, mask, n);
- if (rc < 0)
- goto failed_mask;
-
- LASSERT(num >= cpumask_weight(part->cpt_cpumask));
- if (num == cpumask_weight(part->cpt_cpumask))
- cpt++;
- }
- }
-
- if (cpt != ncpt ||
- num != cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)) {
- CERROR("Expect %d(%d) CPU partitions but got %d(%d), CPU hotplug/unplug while setting?\n",
- cptab->ctb_nparts, num, cpt,
- cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask));
- goto failed_mask;
- }
-
- free_cpumask_var(mask);
-
- return cptab;
-
- failed_mask:
- free_cpumask_var(mask);
- failed:
- CERROR("Failed to setup CPU-partition-table with %d CPU-partitions, online HW nodes: %d, HW cpus: %d.\n",
- ncpt, num_online_nodes(), num_online_cpus());
-
- if (cptab)
- cfs_cpt_table_free(cptab);
-
- return NULL;
-}
-
-static struct cfs_cpt_table *
-cfs_cpt_table_create_pattern(char *pattern)
-{
- struct cfs_cpt_table *cptab;
- char *str;
- int node = 0;
- int high;
- int ncpt = 0;
- int cpt;
- int rc;
- int c;
- int i;
-
- str = strim(pattern);
- if (*str == 'n' || *str == 'N') {
- pattern = str + 1;
- if (*pattern != '\0') {
- node = 1;
- } else { /* shortcut to create CPT from NUMA & CPU topology */
- node = -1;
- ncpt = num_online_nodes();
- }
- }
-
- if (!ncpt) { /* scanning bracket which is mark of partition */
- for (str = pattern;; str++, ncpt++) {
- str = strchr(str, '[');
- if (!str)
- break;
- }
- }
-
- if (!ncpt ||
- (node && ncpt > num_online_nodes()) ||
- (!node && ncpt > num_online_cpus())) {
- CERROR("Invalid pattern %s, or too many partitions %d\n",
- pattern, ncpt);
- return NULL;
- }
-
- cptab = cfs_cpt_table_alloc(ncpt);
- if (!cptab) {
- CERROR("Failed to allocate cpu partition table\n");
- return NULL;
- }
-
- if (node < 0) { /* shortcut to create CPT from NUMA & CPU topology */
- cpt = 0;
-
- for_each_online_node(i) {
- if (cpt >= ncpt) {
- CERROR("CPU changed while setting CPU partition table, %d/%d\n",
- cpt, ncpt);
- goto failed;
- }
-
- rc = cfs_cpt_set_node(cptab, cpt++, i);
- if (!rc)
- goto failed;
- }
- return cptab;
- }
-
- high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1;
-
- for (str = strim(pattern), c = 0;; c++) {
- struct cfs_range_expr *range;
- struct cfs_expr_list *el;
- char *bracket = strchr(str, '[');
- int n;
-
- if (!bracket) {
- if (*str) {
- CERROR("Invalid pattern %s\n", str);
- goto failed;
- }
- if (c != ncpt) {
- CERROR("expect %d partitions but found %d\n",
- ncpt, c);
- goto failed;
- }
- break;
- }
-
- if (sscanf(str, "%d%n", &cpt, &n) < 1) {
- CERROR("Invalid cpu pattern %s\n", str);
- goto failed;
- }
-
- if (cpt < 0 || cpt >= ncpt) {
- CERROR("Invalid partition id %d, total partitions %d\n",
- cpt, ncpt);
- goto failed;
- }
-
- if (cfs_cpt_weight(cptab, cpt)) {
- CERROR("Partition %d has already been set.\n", cpt);
- goto failed;
- }
-
- str = strim(str + n);
- if (str != bracket) {
- CERROR("Invalid pattern %s\n", str);
- goto failed;
- }
-
- bracket = strchr(str, ']');
- if (!bracket) {
- CERROR("missing right bracket for cpt %d, %s\n",
- cpt, str);
- goto failed;
- }
-
- if (cfs_expr_list_parse(str, (bracket - str) + 1,
- 0, high, &el)) {
- CERROR("Can't parse number range: %s\n", str);
- goto failed;
- }
-
- list_for_each_entry(range, &el->el_exprs, re_link) {
- for (i = range->re_lo; i <= range->re_hi; i++) {
- if ((i - range->re_lo) % range->re_stride)
- continue;
-
- rc = node ? cfs_cpt_set_node(cptab, cpt, i) :
- cfs_cpt_set_cpu(cptab, cpt, i);
- if (!rc) {
- cfs_expr_list_free(el);
- goto failed;
- }
- }
- }
-
- cfs_expr_list_free(el);
-
- if (!cfs_cpt_online(cptab, cpt)) {
- CERROR("No online CPU is found on partition %d\n", cpt);
- goto failed;
- }
-
- str = strim(bracket + 1);
- }
-
- return cptab;
-
- failed:
- cfs_cpt_table_free(cptab);
- return NULL;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-static enum cpuhp_state lustre_cpu_online;
-
-static void cfs_cpu_incr_cpt_version(void)
-{
- spin_lock(&cpt_data.cpt_lock);
- cpt_data.cpt_version++;
- spin_unlock(&cpt_data.cpt_lock);
-}
-
-static int cfs_cpu_online(unsigned int cpu)
-{
- cfs_cpu_incr_cpt_version();
- return 0;
-}
-
-static int cfs_cpu_dead(unsigned int cpu)
-{
- bool warn;
-
- cfs_cpu_incr_cpt_version();
-
- mutex_lock(&cpt_data.cpt_mutex);
- /* if all HTs in a core are offline, it may break affinity */
- cpumask_copy(cpt_data.cpt_cpumask, topology_sibling_cpumask(cpu));
- warn = cpumask_any_and(cpt_data.cpt_cpumask,
- cpu_online_mask) >= nr_cpu_ids;
- mutex_unlock(&cpt_data.cpt_mutex);
- CDEBUG(warn ? D_WARNING : D_INFO,
- "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u]\n",
- cpu);
- return 0;
-}
-#endif
-
-void
-cfs_cpu_fini(void)
-{
- if (cfs_cpt_tab)
- cfs_cpt_table_free(cfs_cpt_tab);
-
-#ifdef CONFIG_HOTPLUG_CPU
- if (lustre_cpu_online > 0)
- cpuhp_remove_state_nocalls(lustre_cpu_online);
- cpuhp_remove_state_nocalls(CPUHP_LUSTRE_CFS_DEAD);
-#endif
- free_cpumask_var(cpt_data.cpt_cpumask);
-}
-
-int
-cfs_cpu_init(void)
-{
- int ret = 0;
-
- LASSERT(!cfs_cpt_tab);
-
- memset(&cpt_data, 0, sizeof(cpt_data));
-
- if (!zalloc_cpumask_var(&cpt_data.cpt_cpumask, GFP_NOFS)) {
- CERROR("Failed to allocate scratch buffer\n");
- return -1;
- }
-
- spin_lock_init(&cpt_data.cpt_lock);
- mutex_init(&cpt_data.cpt_mutex);
-
-#ifdef CONFIG_HOTPLUG_CPU
- ret = cpuhp_setup_state_nocalls(CPUHP_LUSTRE_CFS_DEAD,
- "staging/lustre/cfe:dead", NULL,
- cfs_cpu_dead);
- if (ret < 0)
- goto failed;
- ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
- "staging/lustre/cfe:online",
- cfs_cpu_online, NULL);
- if (ret < 0)
- goto failed;
- lustre_cpu_online = ret;
-#endif
- ret = -EINVAL;
-
- if (*cpu_pattern) {
- char *cpu_pattern_dup = kstrdup(cpu_pattern, GFP_KERNEL);
-
- if (!cpu_pattern_dup) {
- CERROR("Failed to duplicate cpu_pattern\n");
- goto failed;
- }
-
- cfs_cpt_tab = cfs_cpt_table_create_pattern(cpu_pattern_dup);
- kfree(cpu_pattern_dup);
- if (!cfs_cpt_tab) {
- CERROR("Failed to create cptab from pattern %s\n",
- cpu_pattern);
- goto failed;
- }
-
- } else {
- cfs_cpt_tab = cfs_cpt_table_create(cpu_npartitions);
- if (!cfs_cpt_tab) {
- CERROR("Failed to create ptable with npartitions %d\n",
- cpu_npartitions);
- goto failed;
- }
- }
-
- spin_lock(&cpt_data.cpt_lock);
- if (cfs_cpt_tab->ctb_version != cpt_data.cpt_version) {
- spin_unlock(&cpt_data.cpt_lock);
- CERROR("CPU hotplug/unplug during setup\n");
- goto failed;
- }
- spin_unlock(&cpt_data.cpt_lock);
-
- LCONSOLE(0, "HW nodes: %d, HW CPU cores: %d, npartitions: %d\n",
- num_online_nodes(), num_online_cpus(),
- cfs_cpt_number(cfs_cpt_tab));
- return 0;
-
- failed:
- cfs_cpu_fini();
- return ret;
-}
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c b/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c
deleted file mode 100644
index 223505c37545..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c
+++ /dev/null
@@ -1,155 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * GPL HEADER END
- */
-/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Author: liang@whamcloud.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/libcfs/libcfs.h>
-#include <linux/libcfs/libcfs_cpu.h>
-
-/** destroy cpu-partition lock, see libcfs_private.h for more detail */
-void
-cfs_percpt_lock_free(struct cfs_percpt_lock *pcl)
-{
- LASSERT(pcl->pcl_locks);
- LASSERT(!pcl->pcl_locked);
-
- cfs_percpt_free(pcl->pcl_locks);
- kfree(pcl);
-}
-EXPORT_SYMBOL(cfs_percpt_lock_free);
-
-/**
- * create cpu-partition lock, see libcfs_private.h for more detail.
- *
- * cpu-partition lock is designed for large-scale SMP system, so we need to
- * reduce cacheline conflict as possible as we can, that's the
- * reason we always allocate cacheline-aligned memory block.
- */
-struct cfs_percpt_lock *
-cfs_percpt_lock_create(struct cfs_cpt_table *cptab,
- struct lock_class_key *keys)
-{
- struct cfs_percpt_lock *pcl;
- spinlock_t *lock;
- int i;
-
- /* NB: cptab can be NULL, pcl will be for HW CPUs on that case */
- pcl = kzalloc(sizeof(*pcl), GFP_NOFS);
- if (!pcl)
- return NULL;
-
- pcl->pcl_cptab = cptab;
- pcl->pcl_locks = cfs_percpt_alloc(cptab, sizeof(*lock));
- if (!pcl->pcl_locks) {
- kfree(pcl);
- return NULL;
- }
-
- if (!keys)
- CWARN("Cannot setup class key for percpt lock, you may see recursive locking warnings which are actually fake.\n");
-
- cfs_percpt_for_each(lock, i, pcl->pcl_locks) {
- spin_lock_init(lock);
- if (keys)
- lockdep_set_class(lock, &keys[i]);
- }
-
- return pcl;
-}
-EXPORT_SYMBOL(cfs_percpt_lock_create);
-
-/**
- * lock a CPU partition
- *
- * \a index != CFS_PERCPT_LOCK_EX
- * hold private lock indexed by \a index
- *
- * \a index == CFS_PERCPT_LOCK_EX
- * exclusively lock @pcl and nobody can take private lock
- */
-void
-cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index)
- __acquires(pcl->pcl_locks)
-{
- int ncpt = cfs_cpt_number(pcl->pcl_cptab);
- int i;
-
- LASSERT(index >= CFS_PERCPT_LOCK_EX && index < ncpt);
-
- if (ncpt == 1) {
- index = 0;
- } else { /* serialize with exclusive lock */
- while (pcl->pcl_locked)
- cpu_relax();
- }
-
- if (likely(index != CFS_PERCPT_LOCK_EX)) {
- spin_lock(pcl->pcl_locks[index]);
- return;
- }
-
- /* exclusive lock request */
- for (i = 0; i < ncpt; i++) {
- spin_lock(pcl->pcl_locks[i]);
- if (!i) {
- LASSERT(!pcl->pcl_locked);
- /* nobody should take private lock after this
- * so I wouldn't starve for too long time
- */
- pcl->pcl_locked = 1;
- }
- }
-}
-EXPORT_SYMBOL(cfs_percpt_lock);
-
-/** unlock a CPU partition */
-void
-cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index)
- __releases(pcl->pcl_locks)
-{
- int ncpt = cfs_cpt_number(pcl->pcl_cptab);
- int i;
-
- index = ncpt == 1 ? 0 : index;
-
- if (likely(index != CFS_PERCPT_LOCK_EX)) {
- spin_unlock(pcl->pcl_locks[index]);
- return;
- }
-
- for (i = ncpt - 1; i >= 0; i--) {
- if (!i) {
- LASSERT(pcl->pcl_locked);
- pcl->pcl_locked = 0;
- }
- spin_unlock(pcl->pcl_locks[i]);
- }
-}
-EXPORT_SYMBOL(cfs_percpt_unlock);
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c b/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c
deleted file mode 100644
index 2d533be9bb30..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c
+++ /dev/null
@@ -1,171 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Author: liang@whamcloud.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/libcfs/libcfs_cpu.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-
-struct cfs_var_array {
- unsigned int va_count; /* # of buffers */
- unsigned int va_size; /* size of each var */
- struct cfs_cpt_table *va_cptab; /* cpu partition table */
- void *va_ptrs[0]; /* buffer addresses */
-};
-
-/*
- * free per-cpu data, see more detail in cfs_percpt_free
- */
-void
-cfs_percpt_free(void *vars)
-{
- struct cfs_var_array *arr;
- int i;
-
- arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
-
- for (i = 0; i < arr->va_count; i++)
- kfree(arr->va_ptrs[i]);
-
- kvfree(arr);
-}
-EXPORT_SYMBOL(cfs_percpt_free);
-
-/*
- * allocate per cpu-partition variables, returned value is an array of pointers,
- * variable can be indexed by CPU partition ID, i.e:
- *
- * arr = cfs_percpt_alloc(cfs_cpu_pt, size);
- * then caller can access memory block for CPU 0 by arr[0],
- * memory block for CPU 1 by arr[1]...
- * memory block for CPU N by arr[N]...
- *
- * cacheline aligned.
- */
-void *
-cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size)
-{
- struct cfs_var_array *arr;
- int count;
- int i;
-
- count = cfs_cpt_number(cptab);
-
- arr = kvzalloc(offsetof(struct cfs_var_array, va_ptrs[count]),
- GFP_KERNEL);
- if (!arr)
- return NULL;
-
- size = L1_CACHE_ALIGN(size);
- arr->va_size = size;
- arr->va_count = count;
- arr->va_cptab = cptab;
-
- for (i = 0; i < count; i++) {
- arr->va_ptrs[i] = kzalloc_node(size, GFP_KERNEL,
- cfs_cpt_spread_node(cptab, i));
- if (!arr->va_ptrs[i]) {
- cfs_percpt_free((void *)&arr->va_ptrs[0]);
- return NULL;
- }
- }
-
- return (void *)&arr->va_ptrs[0];
-}
-EXPORT_SYMBOL(cfs_percpt_alloc);
-
-/*
- * return number of CPUs (or number of elements in per-cpu data)
- * according to cptab of @vars
- */
-int
-cfs_percpt_number(void *vars)
-{
- struct cfs_var_array *arr;
-
- arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
-
- return arr->va_count;
-}
-EXPORT_SYMBOL(cfs_percpt_number);
-
-/*
- * free variable array, see more detail in cfs_array_alloc
- */
-void
-cfs_array_free(void *vars)
-{
- struct cfs_var_array *arr;
- int i;
-
- arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
-
- for (i = 0; i < arr->va_count; i++) {
- if (!arr->va_ptrs[i])
- continue;
-
- kvfree(arr->va_ptrs[i]);
- }
- kvfree(arr);
-}
-EXPORT_SYMBOL(cfs_array_free);
-
-/*
- * allocate a variable array, returned value is an array of pointers.
- * Caller can specify length of array by @count, @size is size of each
- * memory block in array.
- */
-void *
-cfs_array_alloc(int count, unsigned int size)
-{
- struct cfs_var_array *arr;
- int i;
-
- arr = kvmalloc(offsetof(struct cfs_var_array, va_ptrs[count]), GFP_KERNEL);
- if (!arr)
- return NULL;
-
- arr->va_count = count;
- arr->va_size = size;
-
- for (i = 0; i < count; i++) {
- arr->va_ptrs[i] = kvzalloc(size, GFP_KERNEL);
-
- if (!arr->va_ptrs[i]) {
- cfs_array_free((void *)&arr->va_ptrs[0]);
- return NULL;
- }
- }
-
- return (void *)&arr->va_ptrs[0];
-}
-EXPORT_SYMBOL(cfs_array_alloc);
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c
deleted file mode 100644
index e1fb1263e3ae..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c
+++ /dev/null
@@ -1,562 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * String manipulation functions.
- *
- * libcfs/libcfs/libcfs_string.c
- *
- * Author: Nathan Rutman <nathan.rutman@sun.com>
- */
-
-#include <linux/ctype.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/libcfs/libcfs.h>
-#include <linux/libcfs/libcfs_string.h>
-
-/* Convert a text string to a bitmask */
-int cfs_str2mask(const char *str, const char *(*bit2str)(int bit),
- int *oldmask, int minmask, int allmask)
-{
- const char *debugstr;
- char op = '\0';
- int newmask = minmask, i, len, found = 0;
-
- /* <str> must be a list of tokens separated by whitespace
- * and optionally an operator ('+' or '-'). If an operator
- * appears first in <str>, '*oldmask' is used as the starting point
- * (relative), otherwise minmask is used (absolute). An operator
- * applies to all following tokens up to the next operator.
- */
- while (*str != '\0') {
- while (isspace(*str))
- str++;
- if (*str == '\0')
- break;
- if (*str == '+' || *str == '-') {
- op = *str++;
- if (!found)
- /* only if first token is relative */
- newmask = *oldmask;
- while (isspace(*str))
- str++;
- if (*str == '\0') /* trailing op */
- return -EINVAL;
- }
-
- /* find token length */
- len = 0;
- while (str[len] != '\0' && !isspace(str[len]) &&
- str[len] != '+' && str[len] != '-')
- len++;
-
- /* match token */
- found = 0;
- for (i = 0; i < 32; i++) {
- debugstr = bit2str(i);
- if (debugstr && strlen(debugstr) == len &&
- !strncasecmp(str, debugstr, len)) {
- if (op == '-')
- newmask &= ~(1 << i);
- else
- newmask |= (1 << i);
- found = 1;
- break;
- }
- }
- if (!found && len == 3 &&
- !strncasecmp(str, "ALL", len)) {
- if (op == '-')
- newmask = minmask;
- else
- newmask = allmask;
- found = 1;
- }
- if (!found) {
- CWARN("unknown mask '%.*s'.\n"
- "mask usage: [+|-]<all|type> ...\n", len, str);
- return -EINVAL;
- }
- str += len;
- }
-
- *oldmask = newmask;
- return 0;
-}
-
-/* get the first string out of @str */
-char *cfs_firststr(char *str, size_t size)
-{
- size_t i = 0;
- char *end;
-
- /* trim leading spaces */
- while (i < size && *str && isspace(*str)) {
- ++i;
- ++str;
- }
-
- /* string with all spaces */
- if (*str == '\0')
- goto out;
-
- end = str;
- while (i < size && *end != '\0' && !isspace(*end)) {
- ++i;
- ++end;
- }
-
- *end = '\0';
-out:
- return str;
-}
-EXPORT_SYMBOL(cfs_firststr);
-
-/**
- * Extracts tokens from strings.
- *
- * Looks for \a delim in string \a next, sets \a res to point to
- * substring before the delimiter, sets \a next right after the found
- * delimiter.
- *
- * \retval 1 if \a res points to a string of non-whitespace characters
- * \retval 0 otherwise
- */
-int
-cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res)
-{
- char *end;
-
- if (!next->ls_str)
- return 0;
-
- /* skip leading white spaces */
- while (next->ls_len) {
- if (!isspace(*next->ls_str))
- break;
- next->ls_str++;
- next->ls_len--;
- }
-
- if (!next->ls_len) /* whitespaces only */
- return 0;
-
- if (*next->ls_str == delim) {
- /* first non-writespace is the delimiter */
- return 0;
- }
-
- res->ls_str = next->ls_str;
- end = memchr(next->ls_str, delim, next->ls_len);
- if (!end) {
- /* there is no the delimeter in the string */
- end = next->ls_str + next->ls_len;
- next->ls_str = NULL;
- } else {
- next->ls_str = end + 1;
- next->ls_len -= (end - res->ls_str + 1);
- }
-
- /* skip ending whitespaces */
- while (--end != res->ls_str) {
- if (!isspace(*end))
- break;
- }
-
- res->ls_len = end - res->ls_str + 1;
- return 1;
-}
-EXPORT_SYMBOL(cfs_gettok);
-
-/**
- * Converts string to integer.
- *
- * Accepts decimal and hexadecimal number recordings.
- *
- * \retval 1 if first \a nob chars of \a str convert to decimal or
- * hexadecimal integer in the range [\a min, \a max]
- * \retval 0 otherwise
- */
-int
-cfs_str2num_check(char *str, int nob, unsigned int *num,
- unsigned int min, unsigned int max)
-{
- bool all_numbers = true;
- char *endp, cache;
- int rc;
-
- /**
- * kstrouint can only handle strings composed
- * of only numbers. We need to scan the string
- * passed in for the first non-digit character
- * and end the string at that location. If we
- * don't find any non-digit character we still
- * need to place a '\0' at position nob since
- * we are not interested in the rest of the
- * string which is longer than nob in size.
- * After we are done the character at the
- * position we placed '\0' must be restored.
- */
- for (endp = str; endp < str + nob; endp++) {
- if (!isdigit(*endp)) {
- all_numbers = false;
- break;
- }
- }
- cache = *endp;
- *endp = '\0';
-
- rc = kstrtouint(str, 10, num);
- *endp = cache;
- if (rc || !all_numbers)
- return 0;
-
- return (*num >= min && *num <= max);
-}
-EXPORT_SYMBOL(cfs_str2num_check);
-
-/**
- * Parses \<range_expr\> token of the syntax. If \a bracketed is false,
- * \a src should only have a single token which can be \<number\> or \*
- *
- * \retval pointer to allocated range_expr and initialized
- * range_expr::re_lo, range_expr::re_hi and range_expr:re_stride if \a
- `* src parses to
- * \<number\> |
- * \<number\> '-' \<number\> |
- * \<number\> '-' \<number\> '/' \<number\>
- * \retval 0 will be returned if it can be parsed, otherwise -EINVAL or
- * -ENOMEM will be returned.
- */
-static int
-cfs_range_expr_parse(struct cfs_lstr *src, unsigned int min, unsigned int max,
- int bracketed, struct cfs_range_expr **expr)
-{
- struct cfs_range_expr *re;
- struct cfs_lstr tok;
-
- re = kzalloc(sizeof(*re), GFP_NOFS);
- if (!re)
- return -ENOMEM;
-
- if (src->ls_len == 1 && src->ls_str[0] == '*') {
- re->re_lo = min;
- re->re_hi = max;
- re->re_stride = 1;
- goto out;
- }
-
- if (cfs_str2num_check(src->ls_str, src->ls_len,
- &re->re_lo, min, max)) {
- /* <number> is parsed */
- re->re_hi = re->re_lo;
- re->re_stride = 1;
- goto out;
- }
-
- if (!bracketed || !cfs_gettok(src, '-', &tok))
- goto failed;
-
- if (!cfs_str2num_check(tok.ls_str, tok.ls_len,
- &re->re_lo, min, max))
- goto failed;
-
- /* <number> - */
- if (cfs_str2num_check(src->ls_str, src->ls_len,
- &re->re_hi, min, max)) {
- /* <number> - <number> is parsed */
- re->re_stride = 1;
- goto out;
- }
-
- /* go to check <number> '-' <number> '/' <number> */
- if (cfs_gettok(src, '/', &tok)) {
- if (!cfs_str2num_check(tok.ls_str, tok.ls_len,
- &re->re_hi, min, max))
- goto failed;
-
- /* <number> - <number> / ... */
- if (cfs_str2num_check(src->ls_str, src->ls_len,
- &re->re_stride, min, max)) {
- /* <number> - <number> / <number> is parsed */
- goto out;
- }
- }
-
- out:
- *expr = re;
- return 0;
-
- failed:
- kfree(re);
- return -EINVAL;
-}
-
-/**
- * Print the range expression \a re into specified \a buffer.
- * If \a bracketed is true, expression does not need additional
- * brackets.
- *
- * \retval number of characters written
- */
-static int
-cfs_range_expr_print(char *buffer, int count, struct cfs_range_expr *expr,
- bool bracketed)
-{
- int i;
- char s[] = "[";
- char e[] = "]";
-
- if (bracketed) {
- s[0] = '\0';
- e[0] = '\0';
- }
-
- if (expr->re_lo == expr->re_hi)
- i = scnprintf(buffer, count, "%u", expr->re_lo);
- else if (expr->re_stride == 1)
- i = scnprintf(buffer, count, "%s%u-%u%s",
- s, expr->re_lo, expr->re_hi, e);
- else
- i = scnprintf(buffer, count, "%s%u-%u/%u%s",
- s, expr->re_lo, expr->re_hi, expr->re_stride, e);
- return i;
-}
-
-/**
- * Print a list of range expressions (\a expr_list) into specified \a buffer.
- * If the list contains several expressions, separate them with comma
- * and surround the list with brackets.
- *
- * \retval number of characters written
- */
-int
-cfs_expr_list_print(char *buffer, int count, struct cfs_expr_list *expr_list)
-{
- struct cfs_range_expr *expr;
- int i = 0, j = 0;
- int numexprs = 0;
-
- if (count <= 0)
- return 0;
-
- list_for_each_entry(expr, &expr_list->el_exprs, re_link)
- numexprs++;
-
- if (numexprs > 1)
- i += scnprintf(buffer + i, count - i, "[");
-
- list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
- if (j++)
- i += scnprintf(buffer + i, count - i, ",");
- i += cfs_range_expr_print(buffer + i, count - i, expr,
- numexprs > 1);
- }
-
- if (numexprs > 1)
- i += scnprintf(buffer + i, count - i, "]");
-
- return i;
-}
-EXPORT_SYMBOL(cfs_expr_list_print);
-
-/**
- * Matches value (\a value) against ranges expression list \a expr_list.
- *
- * \retval 1 if \a value matches
- * \retval 0 otherwise
- */
-int
-cfs_expr_list_match(u32 value, struct cfs_expr_list *expr_list)
-{
- struct cfs_range_expr *expr;
-
- list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
- if (value >= expr->re_lo && value <= expr->re_hi &&
- !((value - expr->re_lo) % expr->re_stride))
- return 1;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(cfs_expr_list_match);
-
-/**
- * Convert express list (\a expr_list) to an array of all matched values
- *
- * \retval N N is total number of all matched values
- * \retval 0 if expression list is empty
- * \retval < 0 for failure
- */
-int
-cfs_expr_list_values(struct cfs_expr_list *expr_list, int max, u32 **valpp)
-{
- struct cfs_range_expr *expr;
- u32 *val;
- int count = 0;
- int i;
-
- list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
- for (i = expr->re_lo; i <= expr->re_hi; i++) {
- if (!((i - expr->re_lo) % expr->re_stride))
- count++;
- }
- }
-
- if (!count) /* empty expression list */
- return 0;
-
- if (count > max) {
- CERROR("Number of values %d exceeds max allowed %d\n",
- max, count);
- return -EINVAL;
- }
-
- val = kvmalloc_array(count, sizeof(val[0]), GFP_KERNEL | __GFP_ZERO);
- if (!val)
- return -ENOMEM;
-
- count = 0;
- list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
- for (i = expr->re_lo; i <= expr->re_hi; i++) {
- if (!((i - expr->re_lo) % expr->re_stride))
- val[count++] = i;
- }
- }
-
- *valpp = val;
- return count;
-}
-EXPORT_SYMBOL(cfs_expr_list_values);
-
-/**
- * Frees cfs_range_expr structures of \a expr_list.
- *
- * \retval none
- */
-void
-cfs_expr_list_free(struct cfs_expr_list *expr_list)
-{
- while (!list_empty(&expr_list->el_exprs)) {
- struct cfs_range_expr *expr;
-
- expr = list_entry(expr_list->el_exprs.next,
- struct cfs_range_expr, re_link);
- list_del(&expr->re_link);
- kfree(expr);
- }
-
- kfree(expr_list);
-}
-EXPORT_SYMBOL(cfs_expr_list_free);
-
-/**
- * Parses \<cfs_expr_list\> token of the syntax.
- *
- * \retval 0 if \a str parses to \<number\> | \<expr_list\>
- * \retval -errno otherwise
- */
-int
-cfs_expr_list_parse(char *str, int len, unsigned int min, unsigned int max,
- struct cfs_expr_list **elpp)
-{
- struct cfs_expr_list *expr_list;
- struct cfs_range_expr *expr;
- struct cfs_lstr src;
- int rc;
-
- expr_list = kzalloc(sizeof(*expr_list), GFP_NOFS);
- if (!expr_list)
- return -ENOMEM;
-
- src.ls_str = str;
- src.ls_len = len;
-
- INIT_LIST_HEAD(&expr_list->el_exprs);
-
- if (src.ls_str[0] == '[' &&
- src.ls_str[src.ls_len - 1] == ']') {
- src.ls_str++;
- src.ls_len -= 2;
-
- rc = -EINVAL;
- while (src.ls_str) {
- struct cfs_lstr tok;
-
- if (!cfs_gettok(&src, ',', &tok)) {
- rc = -EINVAL;
- break;
- }
-
- rc = cfs_range_expr_parse(&tok, min, max, 1, &expr);
- if (rc)
- break;
-
- list_add_tail(&expr->re_link, &expr_list->el_exprs);
- }
- } else {
- rc = cfs_range_expr_parse(&src, min, max, 0, &expr);
- if (!rc)
- list_add_tail(&expr->re_link, &expr_list->el_exprs);
- }
-
- if (rc)
- cfs_expr_list_free(expr_list);
- else
- *elpp = expr_list;
-
- return rc;
-}
-EXPORT_SYMBOL(cfs_expr_list_parse);
-
-/**
- * Frees cfs_expr_list structures of \a list.
- *
- * For each struct cfs_expr_list structure found on \a list it frees
- * range_expr list attached to it and frees the cfs_expr_list itself.
- *
- * \retval none
- */
-void
-cfs_expr_list_free_list(struct list_head *list)
-{
- struct cfs_expr_list *el;
-
- while (!list_empty(list)) {
- el = list_entry(list->next, struct cfs_expr_list, el_link);
- list_del(&el->el_link);
- cfs_expr_list_free(el);
- }
-}
-EXPORT_SYMBOL(cfs_expr_list_free_list);
diff --git a/drivers/staging/lustre/lnet/libcfs/linux-crypto-adler.c b/drivers/staging/lustre/lnet/libcfs/linux-crypto-adler.c
deleted file mode 100644
index db81ed527452..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/linux-crypto-adler.c
+++ /dev/null
@@ -1,139 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/*
- * Copyright 2012 Xyratex Technology Limited
- */
-
-/*
- * This is crypto api shash wrappers to zlib_adler32.
- */
-
-#include <linux/module.h>
-#include <linux/zutil.h>
-#include <crypto/internal/hash.h>
-#include "linux-crypto.h"
-
-#define CHKSUM_BLOCK_SIZE 1
-#define CHKSUM_DIGEST_SIZE 4
-
-static int adler32_cra_init(struct crypto_tfm *tfm)
-{
- u32 *key = crypto_tfm_ctx(tfm);
-
- *key = 1;
-
- return 0;
-}
-
-static int adler32_setkey(struct crypto_shash *hash, const u8 *key,
- unsigned int keylen)
-{
- u32 *mctx = crypto_shash_ctx(hash);
-
- if (keylen != sizeof(u32)) {
- crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
- return -EINVAL;
- }
- *mctx = *(u32 *)key;
- return 0;
-}
-
-static int adler32_init(struct shash_desc *desc)
-{
- u32 *mctx = crypto_shash_ctx(desc->tfm);
- u32 *cksump = shash_desc_ctx(desc);
-
- *cksump = *mctx;
-
- return 0;
-}
-
-static int adler32_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- u32 *cksump = shash_desc_ctx(desc);
-
- *cksump = zlib_adler32(*cksump, data, len);
- return 0;
-}
-
-static int __adler32_finup(u32 *cksump, const u8 *data, unsigned int len,
- u8 *out)
-{
- *(u32 *)out = zlib_adler32(*cksump, data, len);
- return 0;
-}
-
-static int adler32_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return __adler32_finup(shash_desc_ctx(desc), data, len, out);
-}
-
-static int adler32_final(struct shash_desc *desc, u8 *out)
-{
- u32 *cksump = shash_desc_ctx(desc);
-
- *(u32 *)out = *cksump;
- return 0;
-}
-
-static int adler32_digest(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return __adler32_finup(crypto_shash_ctx(desc->tfm), data, len,
- out);
-}
-
-static struct shash_alg alg = {
- .setkey = adler32_setkey,
- .init = adler32_init,
- .update = adler32_update,
- .final = adler32_final,
- .finup = adler32_finup,
- .digest = adler32_digest,
- .descsize = sizeof(u32),
- .digestsize = CHKSUM_DIGEST_SIZE,
- .base = {
- .cra_name = "adler32",
- .cra_driver_name = "adler32-zlib",
- .cra_priority = 100,
- .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .cra_blocksize = CHKSUM_BLOCK_SIZE,
- .cra_ctxsize = sizeof(u32),
- .cra_module = THIS_MODULE,
- .cra_init = adler32_cra_init,
- }
-};
-
-int cfs_crypto_adler32_register(void)
-{
- return crypto_register_shash(&alg);
-}
-
-void cfs_crypto_adler32_unregister(void)
-{
- crypto_unregister_shash(&alg);
-}
diff --git a/drivers/staging/lustre/lnet/libcfs/linux-crypto.c b/drivers/staging/lustre/lnet/libcfs/linux-crypto.c
deleted file mode 100644
index 21ff9bf6da47..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/linux-crypto.c
+++ /dev/null
@@ -1,447 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/*
- * Copyright 2012 Xyratex Technology Limited
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-
-#include <crypto/hash.h>
-#include <linux/scatterlist.h>
-#include <linux/highmem.h>
-#include <linux/module.h>
-#include <linux/libcfs/libcfs_crypto.h>
-#include <linux/libcfs/libcfs.h>
-#include "linux-crypto.h"
-
-/**
- * Array of hash algorithm speed in MByte per second
- */
-static int cfs_crypto_hash_speeds[CFS_HASH_ALG_MAX];
-
-/**
- * Initialize the state descriptor for the specified hash algorithm.
- *
- * An internal routine to allocate the hash-specific state in \a req for
- * use with cfs_crypto_hash_digest() to compute the hash of a single message,
- * though possibly in multiple chunks. The descriptor internal state should
- * be freed with cfs_crypto_hash_final().
- *
- * \param[in] hash_alg hash algorithm id (CFS_HASH_ALG_*)
- * \param[out] type pointer to the hash description in hash_types[]
- * array
- * \param[in,out] req hash state descriptor to be initialized
- * \param[in] key initial hash value/state, NULL to use default
- * value
- * \param[in] key_len length of \a key
- *
- * \retval 0 on success
- * \retval negative errno on failure
- */
-static int cfs_crypto_hash_alloc(enum cfs_crypto_hash_alg hash_alg,
- const struct cfs_crypto_hash_type **type,
- struct ahash_request **req,
- unsigned char *key,
- unsigned int key_len)
-{
- struct crypto_ahash *tfm;
- int err = 0;
-
- *type = cfs_crypto_hash_type(hash_alg);
-
- if (!*type) {
- CWARN("Unsupported hash algorithm id = %d, max id is %d\n",
- hash_alg, CFS_HASH_ALG_MAX);
- return -EINVAL;
- }
- tfm = crypto_alloc_ahash((*type)->cht_name, 0, CRYPTO_ALG_ASYNC);
-
- if (IS_ERR(tfm)) {
- CDEBUG(D_INFO, "Failed to alloc crypto hash %s\n",
- (*type)->cht_name);
- return PTR_ERR(tfm);
- }
-
- *req = ahash_request_alloc(tfm, GFP_KERNEL);
- if (!*req) {
- CDEBUG(D_INFO, "Failed to alloc ahash_request for %s\n",
- (*type)->cht_name);
- crypto_free_ahash(tfm);
- return -ENOMEM;
- }
-
- ahash_request_set_callback(*req, 0, NULL, NULL);
-
- if (key)
- err = crypto_ahash_setkey(tfm, key, key_len);
- else if ((*type)->cht_key)
- err = crypto_ahash_setkey(tfm,
- (unsigned char *)&((*type)->cht_key),
- (*type)->cht_size);
-
- if (err) {
- ahash_request_free(*req);
- crypto_free_ahash(tfm);
- return err;
- }
-
- CDEBUG(D_INFO, "Using crypto hash: %s (%s) speed %d MB/s\n",
- crypto_ahash_alg_name(tfm), crypto_ahash_driver_name(tfm),
- cfs_crypto_hash_speeds[hash_alg]);
-
- err = crypto_ahash_init(*req);
- if (err) {
- ahash_request_free(*req);
- crypto_free_ahash(tfm);
- }
- return err;
-}
-
-/**
- * Calculate hash digest for the passed buffer.
- *
- * This should be used when computing the hash on a single contiguous buffer.
- * It combines the hash initialization, computation, and cleanup.
- *
- * \param[in] hash_alg id of hash algorithm (CFS_HASH_ALG_*)
- * \param[in] buf data buffer on which to compute hash
- * \param[in] buf_len length of \a buf in bytes
- * \param[in] key initial value/state for algorithm,
- * if \a key = NULL use default initial value
- * \param[in] key_len length of \a key in bytes
- * \param[out] hash pointer to computed hash value,
- * if \a hash = NULL then \a hash_len is to digest
- * size in bytes, retval -ENOSPC
- * \param[in,out] hash_len size of \a hash buffer
- *
- * \retval -EINVAL \a buf, \a buf_len, \a hash_len,
- * \a hash_alg invalid
- * \retval -ENOENT \a hash_alg is unsupported
- * \retval -ENOSPC \a hash is NULL, or \a hash_len less than
- * digest size
- * \retval 0 for success
- * \retval negative errno for other errors from lower
- * layers.
- */
-int cfs_crypto_hash_digest(enum cfs_crypto_hash_alg hash_alg,
- const void *buf, unsigned int buf_len,
- unsigned char *key, unsigned int key_len,
- unsigned char *hash, unsigned int *hash_len)
-{
- struct scatterlist sl;
- struct ahash_request *req;
- int err;
- const struct cfs_crypto_hash_type *type;
-
- if (!buf || !buf_len || !hash_len)
- return -EINVAL;
-
- err = cfs_crypto_hash_alloc(hash_alg, &type, &req, key, key_len);
- if (err)
- return err;
-
- if (!hash || *hash_len < type->cht_size) {
- *hash_len = type->cht_size;
- crypto_free_ahash(crypto_ahash_reqtfm(req));
- ahash_request_free(req);
- return -ENOSPC;
- }
- sg_init_one(&sl, buf, buf_len);
-
- ahash_request_set_crypt(req, &sl, hash, sl.length);
- err = crypto_ahash_digest(req);
- crypto_free_ahash(crypto_ahash_reqtfm(req));
- ahash_request_free(req);
-
- return err;
-}
-EXPORT_SYMBOL(cfs_crypto_hash_digest);
-
-/**
- * Allocate and initialize descriptor for hash algorithm.
- *
- * This should be used to initialize a hash descriptor for multiple calls
- * to a single hash function when computing the hash across multiple
- * separate buffers or pages using cfs_crypto_hash_update{,_page}().
- *
- * The hash descriptor should be freed with cfs_crypto_hash_final().
- *
- * \param[in] hash_alg algorithm id (CFS_HASH_ALG_*)
- * \param[in] key initial value/state for algorithm, if \a key = NULL
- * use default initial value
- * \param[in] key_len length of \a key in bytes
- *
- * \retval pointer to descriptor of hash instance
- * \retval ERR_PTR(errno) in case of error
- */
-struct ahash_request *
-cfs_crypto_hash_init(enum cfs_crypto_hash_alg hash_alg,
- unsigned char *key, unsigned int key_len)
-{
- struct ahash_request *req;
- int err;
- const struct cfs_crypto_hash_type *type;
-
- err = cfs_crypto_hash_alloc(hash_alg, &type, &req, key, key_len);
-
- if (err)
- return ERR_PTR(err);
- return req;
-}
-EXPORT_SYMBOL(cfs_crypto_hash_init);
-
-/**
- * Update hash digest computed on data within the given \a page
- *
- * \param[in] hreq hash state descriptor
- * \param[in] page data page on which to compute the hash
- * \param[in] offset offset within \a page at which to start hash
- * \param[in] len length of data on which to compute hash
- *
- * \retval 0 for success
- * \retval negative errno on failure
- */
-int cfs_crypto_hash_update_page(struct ahash_request *req,
- struct page *page, unsigned int offset,
- unsigned int len)
-{
- struct scatterlist sl;
-
- sg_init_table(&sl, 1);
- sg_set_page(&sl, page, len, offset & ~PAGE_MASK);
-
- ahash_request_set_crypt(req, &sl, NULL, sl.length);
- return crypto_ahash_update(req);
-}
-EXPORT_SYMBOL(cfs_crypto_hash_update_page);
-
-/**
- * Update hash digest computed on the specified data
- *
- * \param[in] req hash state descriptor
- * \param[in] buf data buffer on which to compute the hash
- * \param[in] buf_len length of \buf on which to compute hash
- *
- * \retval 0 for success
- * \retval negative errno on failure
- */
-int cfs_crypto_hash_update(struct ahash_request *req,
- const void *buf, unsigned int buf_len)
-{
- struct scatterlist sl;
-
- sg_init_one(&sl, buf, buf_len);
-
- ahash_request_set_crypt(req, &sl, NULL, sl.length);
- return crypto_ahash_update(req);
-}
-EXPORT_SYMBOL(cfs_crypto_hash_update);
-
-/**
- * Finish hash calculation, copy hash digest to buffer, clean up hash descriptor
- *
- * \param[in] req hash descriptor
- * \param[out] hash pointer to hash buffer to store hash digest
- * \param[in,out] hash_len pointer to hash buffer size, if \a req = NULL
- * only free \a req instead of computing the hash
- *
- * \retval 0 for success
- * \retval -EOVERFLOW if hash_len is too small for the hash digest
- * \retval negative errno for other errors from lower layers
- */
-int cfs_crypto_hash_final(struct ahash_request *req,
- unsigned char *hash, unsigned int *hash_len)
-{
- int err;
- int size = crypto_ahash_digestsize(crypto_ahash_reqtfm(req));
-
- if (!hash || !hash_len) {
- err = 0;
- goto free_ahash;
- }
- if (*hash_len < size) {
- err = -EOVERFLOW;
- goto free_ahash;
- }
-
- ahash_request_set_crypt(req, NULL, hash, 0);
- err = crypto_ahash_final(req);
- if (!err)
- *hash_len = size;
-free_ahash:
- crypto_free_ahash(crypto_ahash_reqtfm(req));
- ahash_request_free(req);
- return err;
-}
-EXPORT_SYMBOL(cfs_crypto_hash_final);
-
-/**
- * Compute the speed of specified hash function
- *
- * Run a speed test on the given hash algorithm on buffer of the given size.
- * The speed is stored internally in the cfs_crypto_hash_speeds[] array, and
- * is available through the cfs_crypto_hash_speed() function.
- *
- * \param[in] hash_alg hash algorithm id (CFS_HASH_ALG_*)
- * \param[in] buf data buffer on which to compute the hash
- * \param[in] buf_len length of \buf on which to compute hash
- */
-static void cfs_crypto_performance_test(enum cfs_crypto_hash_alg hash_alg)
-{
- int buf_len = max(PAGE_SIZE, 1048576UL);
- void *buf;
- unsigned long start, end;
- int bcount, err = 0;
- struct page *page;
- unsigned char hash[CFS_CRYPTO_HASH_DIGESTSIZE_MAX];
- unsigned int hash_len = sizeof(hash);
-
- page = alloc_page(GFP_KERNEL);
- if (!page) {
- err = -ENOMEM;
- goto out_err;
- }
-
- buf = kmap(page);
- memset(buf, 0xAD, PAGE_SIZE);
- kunmap(page);
-
- for (start = jiffies, end = start + msecs_to_jiffies(MSEC_PER_SEC),
- bcount = 0; time_before(jiffies, end); bcount++) {
- struct ahash_request *hdesc;
- int i;
-
- hdesc = cfs_crypto_hash_init(hash_alg, NULL, 0);
- if (IS_ERR(hdesc)) {
- err = PTR_ERR(hdesc);
- break;
- }
-
- for (i = 0; i < buf_len / PAGE_SIZE; i++) {
- err = cfs_crypto_hash_update_page(hdesc, page, 0,
- PAGE_SIZE);
- if (err)
- break;
- }
-
- err = cfs_crypto_hash_final(hdesc, hash, &hash_len);
- if (err)
- break;
- }
- end = jiffies;
- __free_page(page);
-out_err:
- if (err) {
- cfs_crypto_hash_speeds[hash_alg] = err;
- CDEBUG(D_INFO, "Crypto hash algorithm %s test error: rc = %d\n",
- cfs_crypto_hash_name(hash_alg), err);
- } else {
- unsigned long tmp;
-
- tmp = ((bcount * buf_len / jiffies_to_msecs(end - start)) *
- 1000) / (1024 * 1024);
- cfs_crypto_hash_speeds[hash_alg] = (int)tmp;
- CDEBUG(D_CONFIG, "Crypto hash algorithm %s speed = %d MB/s\n",
- cfs_crypto_hash_name(hash_alg),
- cfs_crypto_hash_speeds[hash_alg]);
- }
-}
-
-/**
- * hash speed in Mbytes per second for valid hash algorithm
- *
- * Return the performance of the specified \a hash_alg that was previously
- * computed using cfs_crypto_performance_test().
- *
- * \param[in] hash_alg hash algorithm id (CFS_HASH_ALG_*)
- *
- * \retval positive speed of the hash function in MB/s
- * \retval -ENOENT if \a hash_alg is unsupported
- * \retval negative errno if \a hash_alg speed is unavailable
- */
-int cfs_crypto_hash_speed(enum cfs_crypto_hash_alg hash_alg)
-{
- if (hash_alg < CFS_HASH_ALG_MAX)
- return cfs_crypto_hash_speeds[hash_alg];
- return -ENOENT;
-}
-EXPORT_SYMBOL(cfs_crypto_hash_speed);
-
-/**
- * Run the performance test for all hash algorithms.
- *
- * Run the cfs_crypto_performance_test() benchmark for all of the available
- * hash functions using a 1MB buffer size. This is a reasonable buffer size
- * for Lustre RPCs, even if the actual RPC size is larger or smaller.
- *
- * Since the setup cost and computation speed of various hash algorithms is
- * a function of the buffer size (and possibly internal contention of offload
- * engines), this speed only represents an estimate of the actual speed under
- * actual usage, but is reasonable for comparing available algorithms.
- *
- * The actual speeds are available via cfs_crypto_hash_speed() for later
- * comparison.
- *
- * \retval 0 on success
- * \retval -ENOMEM if no memory is available for test buffer
- */
-static int cfs_crypto_test_hashes(void)
-{
- enum cfs_crypto_hash_alg hash_alg;
-
- for (hash_alg = 0; hash_alg < CFS_HASH_ALG_MAX; hash_alg++)
- cfs_crypto_performance_test(hash_alg);
-
- return 0;
-}
-
-static int adler32;
-
-/**
- * Register available hash functions
- *
- * \retval 0
- */
-int cfs_crypto_register(void)
-{
- request_module("crc32c");
-
- if (cfs_crypto_adler32_register() == 0)
- adler32 = 1;
-
- /* check all algorithms and do performance test */
- cfs_crypto_test_hashes();
- return 0;
-}
-
-/**
- * Unregister previously registered hash functions
- */
-void cfs_crypto_unregister(void)
-{
- if (adler32)
- cfs_crypto_adler32_unregister();
- adler32 = 0;
-}
diff --git a/drivers/staging/lustre/lnet/libcfs/linux-crypto.h b/drivers/staging/lustre/lnet/libcfs/linux-crypto.h
deleted file mode 100644
index 5616e9ea1450..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/linux-crypto.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/**
- * Functions for start/stop shash adler32 algorithm.
- */
-int cfs_crypto_adler32_register(void);
-void cfs_crypto_adler32_unregister(void);
diff --git a/drivers/staging/lustre/lnet/libcfs/linux-debug.c b/drivers/staging/lustre/lnet/libcfs/linux-debug.c
deleted file mode 100644
index 15ab849374c2..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/linux-debug.c
+++ /dev/null
@@ -1,142 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/libcfs/linux/linux-debug.c
- *
- * Author: Phil Schwan <phil@clusterfs.com>
- */
-
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/notifier.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <linux/interrupt.h>
-#include <linux/completion.h>
-#include <linux/fs.h>
-#include <linux/uaccess.h>
-
-# define DEBUG_SUBSYSTEM S_LNET
-
-#include "tracefile.h"
-
-#include <linux/kallsyms.h>
-
-char lnet_debug_log_upcall[1024] = "/usr/lib/lustre/lnet_debug_log_upcall";
-
-/**
- * Upcall function once a Lustre log has been dumped.
- *
- * \param file path of the dumped log
- */
-void libcfs_run_debug_log_upcall(char *file)
-{
- char *argv[3];
- int rc;
- static const char * const envp[] = {
- "HOME=/",
- "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
- NULL
- };
-
- argv[0] = lnet_debug_log_upcall;
-
- LASSERTF(file, "called on a null filename\n");
- argv[1] = file; /* only need to pass the path of the file */
-
- argv[2] = NULL;
-
- rc = call_usermodehelper(argv[0], argv, (char **)envp, 1);
- if (rc < 0 && rc != -ENOENT) {
- CERROR("Error %d invoking LNET debug log upcall %s %s; check /sys/kernel/debug/lnet/debug_log_upcall\n",
- rc, argv[0], argv[1]);
- } else {
- CDEBUG(D_HA, "Invoked LNET debug log upcall %s %s\n",
- argv[0], argv[1]);
- }
-}
-
-/* coverity[+kill] */
-void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msgdata)
-{
- libcfs_catastrophe = 1;
- libcfs_debug_msg(msgdata, "LBUG\n");
-
- if (in_interrupt()) {
- panic("LBUG in interrupt.\n");
- /* not reached */
- }
-
- dump_stack();
- if (!libcfs_panic_on_lbug)
- libcfs_debug_dumplog();
- if (libcfs_panic_on_lbug)
- panic("LBUG");
- set_current_state(TASK_UNINTERRUPTIBLE);
- while (1)
- schedule();
-}
-EXPORT_SYMBOL(lbug_with_loc);
-
-static int panic_notifier(struct notifier_block *self, unsigned long unused1,
- void *unused2)
-{
- if (libcfs_panic_in_progress)
- return 0;
-
- libcfs_panic_in_progress = 1;
- mb();
-
- return 0;
-}
-
-static struct notifier_block libcfs_panic_notifier = {
- .notifier_call = panic_notifier,
- .next = NULL,
- .priority = 10000,
-};
-
-void libcfs_register_panic_notifier(void)
-{
- atomic_notifier_chain_register(&panic_notifier_list,
- &libcfs_panic_notifier);
-}
-
-void libcfs_unregister_panic_notifier(void)
-{
- atomic_notifier_chain_unregister(&panic_notifier_list,
- &libcfs_panic_notifier);
-}
diff --git a/drivers/staging/lustre/lnet/libcfs/linux-tracefile.c b/drivers/staging/lustre/lnet/libcfs/linux-tracefile.c
deleted file mode 100644
index 347138409eba..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/linux-tracefile.c
+++ /dev/null
@@ -1,258 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#define LUSTRE_TRACEFILE_PRIVATE
-
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include "tracefile.h"
-
-/* percents to share the total debug memory for each type */
-static unsigned int pages_factor[CFS_TCD_TYPE_MAX] = {
- 80, /* 80% pages for CFS_TCD_TYPE_PROC */
- 10, /* 10% pages for CFS_TCD_TYPE_SOFTIRQ */
- 10 /* 10% pages for CFS_TCD_TYPE_IRQ */
-};
-
-char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX];
-
-static DECLARE_RWSEM(cfs_tracefile_sem);
-
-int cfs_tracefile_init_arch(void)
-{
- int i;
- int j;
- struct cfs_trace_cpu_data *tcd;
-
- /* initialize trace_data */
- memset(cfs_trace_data, 0, sizeof(cfs_trace_data));
- for (i = 0; i < CFS_TCD_TYPE_MAX; i++) {
- cfs_trace_data[i] =
- kmalloc_array(num_possible_cpus(),
- sizeof(union cfs_trace_data_union),
- GFP_KERNEL);
- if (!cfs_trace_data[i])
- goto out;
- }
-
- /* arch related info initialized */
- cfs_tcd_for_each(tcd, i, j) {
- spin_lock_init(&tcd->tcd_lock);
- tcd->tcd_pages_factor = pages_factor[i];
- tcd->tcd_type = i;
- tcd->tcd_cpu = j;
- }
-
- for (i = 0; i < num_possible_cpus(); i++)
- for (j = 0; j < 3; j++) {
- cfs_trace_console_buffers[i][j] =
- kmalloc(CFS_TRACE_CONSOLE_BUFFER_SIZE,
- GFP_KERNEL);
-
- if (!cfs_trace_console_buffers[i][j])
- goto out;
- }
-
- return 0;
-
-out:
- cfs_tracefile_fini_arch();
- pr_err("lnet: Not enough memory\n");
- return -ENOMEM;
-}
-
-void cfs_tracefile_fini_arch(void)
-{
- int i;
- int j;
-
- for (i = 0; i < num_possible_cpus(); i++)
- for (j = 0; j < 3; j++) {
- kfree(cfs_trace_console_buffers[i][j]);
- cfs_trace_console_buffers[i][j] = NULL;
- }
-
- for (i = 0; cfs_trace_data[i]; i++) {
- kfree(cfs_trace_data[i]);
- cfs_trace_data[i] = NULL;
- }
-}
-
-void cfs_tracefile_read_lock(void)
-{
- down_read(&cfs_tracefile_sem);
-}
-
-void cfs_tracefile_read_unlock(void)
-{
- up_read(&cfs_tracefile_sem);
-}
-
-void cfs_tracefile_write_lock(void)
-{
- down_write(&cfs_tracefile_sem);
-}
-
-void cfs_tracefile_write_unlock(void)
-{
- up_write(&cfs_tracefile_sem);
-}
-
-enum cfs_trace_buf_type cfs_trace_buf_idx_get(void)
-{
- if (in_irq())
- return CFS_TCD_TYPE_IRQ;
- if (in_softirq())
- return CFS_TCD_TYPE_SOFTIRQ;
- return CFS_TCD_TYPE_PROC;
-}
-
-/*
- * The walking argument indicates the locking comes from all tcd types
- * iterator and we must lock it and dissable local irqs to avoid deadlocks
- * with other interrupt locks that might be happening. See LU-1311
- * for details.
- */
-int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
- __acquires(&tcd->tc_lock)
-{
- __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX);
- if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
- spin_lock_irqsave(&tcd->tcd_lock, tcd->tcd_lock_flags);
- else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
- spin_lock_bh(&tcd->tcd_lock);
- else if (unlikely(walking))
- spin_lock_irq(&tcd->tcd_lock);
- else
- spin_lock(&tcd->tcd_lock);
- return 1;
-}
-
-void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
- __releases(&tcd->tcd_lock)
-{
- __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX);
- if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
- spin_unlock_irqrestore(&tcd->tcd_lock, tcd->tcd_lock_flags);
- else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
- spin_unlock_bh(&tcd->tcd_lock);
- else if (unlikely(walking))
- spin_unlock_irq(&tcd->tcd_lock);
- else
- spin_unlock(&tcd->tcd_lock);
-}
-
-void
-cfs_set_ptldebug_header(struct ptldebug_header *header,
- struct libcfs_debug_msg_data *msgdata,
- unsigned long stack)
-{
- struct timespec64 ts;
-
- ktime_get_real_ts64(&ts);
-
- header->ph_subsys = msgdata->msg_subsys;
- header->ph_mask = msgdata->msg_mask;
- header->ph_cpu_id = smp_processor_id();
- header->ph_type = cfs_trace_buf_idx_get();
- /* y2038 safe since all user space treats this as unsigned, but
- * will overflow in 2106
- */
- header->ph_sec = (u32)ts.tv_sec;
- header->ph_usec = ts.tv_nsec / NSEC_PER_USEC;
- header->ph_stack = stack;
- header->ph_pid = current->pid;
- header->ph_line_num = msgdata->msg_line;
- header->ph_extern_pid = 0;
-}
-
-static char *
-dbghdr_to_err_string(struct ptldebug_header *hdr)
-{
- switch (hdr->ph_subsys) {
- case S_LND:
- case S_LNET:
- return "LNetError";
- default:
- return "LustreError";
- }
-}
-
-static char *
-dbghdr_to_info_string(struct ptldebug_header *hdr)
-{
- switch (hdr->ph_subsys) {
- case S_LND:
- case S_LNET:
- return "LNet";
- default:
- return "Lustre";
- }
-}
-
-void cfs_print_to_console(struct ptldebug_header *hdr, int mask,
- const char *buf, int len, const char *file,
- const char *fn)
-{
- char *prefix = "Lustre", *ptype = NULL;
-
- if (mask & D_EMERG) {
- prefix = dbghdr_to_err_string(hdr);
- ptype = KERN_EMERG;
- } else if (mask & D_ERROR) {
- prefix = dbghdr_to_err_string(hdr);
- ptype = KERN_ERR;
- } else if (mask & D_WARNING) {
- prefix = dbghdr_to_info_string(hdr);
- ptype = KERN_WARNING;
- } else if (mask & (D_CONSOLE | libcfs_printk)) {
- prefix = dbghdr_to_info_string(hdr);
- ptype = KERN_INFO;
- }
-
- if (mask & D_CONSOLE) {
- pr_info("%s%s: %.*s", ptype, prefix, len, buf);
- } else {
- pr_info("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix,
- hdr->ph_pid, hdr->ph_extern_pid, file,
- hdr->ph_line_num, fn, len, buf);
- }
-}
-
-int cfs_trace_max_debug_mb(void)
-{
- int total_mb = (totalram_pages >> (20 - PAGE_SHIFT));
-
- return max(512, (total_mb * 80) / 100);
-}
diff --git a/drivers/staging/lustre/lnet/libcfs/module.c b/drivers/staging/lustre/lnet/libcfs/module.c
deleted file mode 100644
index 5dc7de9e6478..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/module.c
+++ /dev/null
@@ -1,758 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-#include <linux/miscdevice.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <net/sock.h>
-#include <linux/uio.h>
-
-#include <linux/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/list.h>
-
-#include <linux/sysctl.h>
-#include <linux/debugfs.h>
-
-# define DEBUG_SUBSYSTEM S_LNET
-
-#include <asm/div64.h>
-
-#include <linux/libcfs/libcfs_crypto.h>
-#include <linux/lnet/lib-lnet.h>
-#include <uapi/linux/lnet/lnet-dlc.h>
-#include "tracefile.h"
-
-struct lnet_debugfs_symlink_def {
- char *name;
- char *target;
-};
-
-static struct dentry *lnet_debugfs_root;
-
-BLOCKING_NOTIFIER_HEAD(libcfs_ioctl_list);
-EXPORT_SYMBOL(libcfs_ioctl_list);
-
-static inline size_t libcfs_ioctl_packlen(struct libcfs_ioctl_data *data)
-{
- size_t len = sizeof(*data);
-
- len += cfs_size_round(data->ioc_inllen1);
- len += cfs_size_round(data->ioc_inllen2);
- return len;
-}
-
-static inline bool libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data)
-{
- if (data->ioc_hdr.ioc_len > BIT(30)) {
- CERROR("LIBCFS ioctl: ioc_len larger than 1<<30\n");
- return true;
- }
- if (data->ioc_inllen1 > BIT(30)) {
- CERROR("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n");
- return true;
- }
- if (data->ioc_inllen2 > BIT(30)) {
- CERROR("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n");
- return true;
- }
- if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
- CERROR("LIBCFS ioctl: inlbuf1 pointer but 0 length\n");
- return true;
- }
- if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
- CERROR("LIBCFS ioctl: inlbuf2 pointer but 0 length\n");
- return true;
- }
- if (data->ioc_pbuf1 && !data->ioc_plen1) {
- CERROR("LIBCFS ioctl: pbuf1 pointer but 0 length\n");
- return true;
- }
- if (data->ioc_pbuf2 && !data->ioc_plen2) {
- CERROR("LIBCFS ioctl: pbuf2 pointer but 0 length\n");
- return true;
- }
- if (data->ioc_plen1 && !data->ioc_pbuf1) {
- CERROR("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n");
- return true;
- }
- if (data->ioc_plen2 && !data->ioc_pbuf2) {
- CERROR("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n");
- return true;
- }
- if ((u32)libcfs_ioctl_packlen(data) != data->ioc_hdr.ioc_len) {
- CERROR("LIBCFS ioctl: packlen != ioc_len\n");
- return true;
- }
- if (data->ioc_inllen1 &&
- data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') {
- CERROR("LIBCFS ioctl: inlbuf1 not 0 terminated\n");
- return true;
- }
- if (data->ioc_inllen2 &&
- data->ioc_bulk[cfs_size_round(data->ioc_inllen1) +
- data->ioc_inllen2 - 1] != '\0') {
- CERROR("LIBCFS ioctl: inlbuf2 not 0 terminated\n");
- return true;
- }
- return false;
-}
-
-static int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data)
-{
- if (libcfs_ioctl_is_invalid(data)) {
- CERROR("libcfs ioctl: parameter not correctly formatted\n");
- return -EINVAL;
- }
-
- if (data->ioc_inllen1)
- data->ioc_inlbuf1 = &data->ioc_bulk[0];
-
- if (data->ioc_inllen2)
- data->ioc_inlbuf2 = &data->ioc_bulk[0] +
- cfs_size_round(data->ioc_inllen1);
-
- return 0;
-}
-
-static int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp,
- const struct libcfs_ioctl_hdr __user *uhdr)
-{
- struct libcfs_ioctl_hdr hdr;
- int err;
-
- if (copy_from_user(&hdr, uhdr, sizeof(hdr)))
- return -EFAULT;
-
- if (hdr.ioc_version != LIBCFS_IOCTL_VERSION &&
- hdr.ioc_version != LIBCFS_IOCTL_VERSION2) {
- CERROR("libcfs ioctl: version mismatch expected %#x, got %#x\n",
- LIBCFS_IOCTL_VERSION, hdr.ioc_version);
- return -EINVAL;
- }
-
- if (hdr.ioc_len < sizeof(hdr)) {
- CERROR("libcfs ioctl: user buffer too small for ioctl\n");
- return -EINVAL;
- }
-
- if (hdr.ioc_len > LIBCFS_IOC_DATA_MAX) {
- CERROR("libcfs ioctl: user buffer is too large %d/%d\n",
- hdr.ioc_len, LIBCFS_IOC_DATA_MAX);
- return -EINVAL;
- }
-
- *hdr_pp = kvmalloc(hdr.ioc_len, GFP_KERNEL);
- if (!*hdr_pp)
- return -ENOMEM;
-
- if (copy_from_user(*hdr_pp, uhdr, hdr.ioc_len)) {
- err = -EFAULT;
- goto free;
- }
-
- if ((*hdr_pp)->ioc_version != hdr.ioc_version ||
- (*hdr_pp)->ioc_len != hdr.ioc_len) {
- err = -EINVAL;
- goto free;
- }
-
- return 0;
-
-free:
- kvfree(*hdr_pp);
- return err;
-}
-
-static int libcfs_ioctl(unsigned long cmd, void __user *uparam)
-{
- struct libcfs_ioctl_data *data = NULL;
- struct libcfs_ioctl_hdr *hdr;
- int err;
-
- /* 'cmd' and permissions get checked in our arch-specific caller */
- err = libcfs_ioctl_getdata(&hdr, uparam);
- if (err) {
- CDEBUG_LIMIT(D_ERROR,
- "libcfs ioctl: data header error %d\n", err);
- return err;
- }
-
- if (hdr->ioc_version == LIBCFS_IOCTL_VERSION) {
- /*
- * The libcfs_ioctl_data_adjust() function performs adjustment
- * operations on the libcfs_ioctl_data structure to make
- * it usable by the code. This doesn't need to be called
- * for new data structures added.
- */
- data = container_of(hdr, struct libcfs_ioctl_data, ioc_hdr);
- err = libcfs_ioctl_data_adjust(data);
- if (err)
- goto out;
- }
-
- CDEBUG(D_IOCTL, "libcfs ioctl cmd %lu\n", cmd);
- switch (cmd) {
- case IOC_LIBCFS_CLEAR_DEBUG:
- libcfs_debug_clear_buffer();
- break;
-
- case IOC_LIBCFS_MARK_DEBUG:
- if (!data || !data->ioc_inlbuf1 ||
- data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0') {
- err = -EINVAL;
- goto out;
- }
- libcfs_debug_mark_buffer(data->ioc_inlbuf1);
- break;
-
- default:
- err = blocking_notifier_call_chain(&libcfs_ioctl_list,
- cmd, hdr);
- if (!(err & NOTIFY_STOP_MASK))
- /* No-one claimed the ioctl */
- err = -EINVAL;
- else
- err = notifier_to_errno(err);
- if (!err)
- if (copy_to_user(uparam, hdr, hdr->ioc_len))
- err = -EFAULT;
- break;
- }
-out:
- kvfree(hdr);
- return err;
-}
-
-static long
-libcfs_psdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
-
- if (_IOC_TYPE(cmd) != IOC_LIBCFS_TYPE ||
- _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR ||
- _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR) {
- CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
- _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
- return -EINVAL;
- }
-
- return libcfs_ioctl(cmd, (void __user *)arg);
-}
-
-static const struct file_operations libcfs_fops = {
- .owner = THIS_MODULE,
- .unlocked_ioctl = libcfs_psdev_ioctl,
-};
-
-static struct miscdevice libcfs_dev = {
- .minor = MISC_DYNAMIC_MINOR,
- .name = "lnet",
- .fops = &libcfs_fops,
-};
-
-static int libcfs_dev_registered;
-
-int lprocfs_call_handler(void *data, int write, loff_t *ppos,
- void __user *buffer, size_t *lenp,
- int (*handler)(void *data, int write, loff_t pos,
- void __user *buffer, int len))
-{
- int rc = handler(data, write, *ppos, buffer, *lenp);
-
- if (rc < 0)
- return rc;
-
- if (write) {
- *ppos += *lenp;
- } else {
- *lenp = rc;
- *ppos += rc;
- }
- return 0;
-}
-EXPORT_SYMBOL(lprocfs_call_handler);
-
-static int __proc_dobitmasks(void *data, int write,
- loff_t pos, void __user *buffer, int nob)
-{
- const int tmpstrlen = 512;
- char *tmpstr;
- int rc;
- unsigned int *mask = data;
- int is_subsys = (mask == &libcfs_subsystem_debug) ? 1 : 0;
- int is_printk = (mask == &libcfs_printk) ? 1 : 0;
-
- rc = cfs_trace_allocate_string_buffer(&tmpstr, tmpstrlen);
- if (rc < 0)
- return rc;
-
- if (!write) {
- libcfs_debug_mask2str(tmpstr, tmpstrlen, *mask, is_subsys);
- rc = strlen(tmpstr);
-
- if (pos >= rc) {
- rc = 0;
- } else {
- rc = cfs_trace_copyout_string(buffer, nob,
- tmpstr + pos, "\n");
- }
- } else {
- rc = cfs_trace_copyin_string(tmpstr, tmpstrlen, buffer, nob);
- if (rc < 0) {
- kfree(tmpstr);
- return rc;
- }
-
- rc = libcfs_debug_str2mask(mask, tmpstr, is_subsys);
- /* Always print LBUG/LASSERT to console, so keep this mask */
- if (is_printk)
- *mask |= D_EMERG;
- }
-
- kfree(tmpstr);
- return rc;
-}
-
-static int proc_dobitmasks(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
- __proc_dobitmasks);
-}
-
-static int __proc_dump_kernel(void *data, int write,
- loff_t pos, void __user *buffer, int nob)
-{
- if (!write)
- return 0;
-
- return cfs_trace_dump_debug_buffer_usrstr(buffer, nob);
-}
-
-static int proc_dump_kernel(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
- __proc_dump_kernel);
-}
-
-static int __proc_daemon_file(void *data, int write,
- loff_t pos, void __user *buffer, int nob)
-{
- if (!write) {
- int len = strlen(cfs_tracefile);
-
- if (pos >= len)
- return 0;
-
- return cfs_trace_copyout_string(buffer, nob,
- cfs_tracefile + pos, "\n");
- }
-
- return cfs_trace_daemon_command_usrstr(buffer, nob);
-}
-
-static int proc_daemon_file(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
- __proc_daemon_file);
-}
-
-static int libcfs_force_lbug(struct ctl_table *table, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos)
-{
- if (write)
- LBUG();
- return 0;
-}
-
-static int proc_fail_loc(struct ctl_table *table, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos)
-{
- int rc;
- long old_fail_loc = cfs_fail_loc;
-
- rc = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
- if (old_fail_loc != cfs_fail_loc)
- wake_up(&cfs_race_waitq);
- return rc;
-}
-
-static int __proc_cpt_table(void *data, int write,
- loff_t pos, void __user *buffer, int nob)
-{
- char *buf = NULL;
- int len = 4096;
- int rc = 0;
-
- if (write)
- return -EPERM;
-
- while (1) {
- buf = kzalloc(len, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- rc = cfs_cpt_table_print(cfs_cpt_tab, buf, len);
- if (rc >= 0)
- break;
-
- if (rc == -EFBIG) {
- kfree(buf);
- len <<= 1;
- continue;
- }
- goto out;
- }
-
- if (pos >= rc) {
- rc = 0;
- goto out;
- }
-
- rc = cfs_trace_copyout_string(buffer, nob, buf + pos, NULL);
- out:
- kfree(buf);
- return rc;
-}
-
-static int proc_cpt_table(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
- __proc_cpt_table);
-}
-
-static struct ctl_table lnet_table[] = {
- {
- .procname = "debug",
- .data = &libcfs_debug,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dobitmasks,
- },
- {
- .procname = "subsystem_debug",
- .data = &libcfs_subsystem_debug,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dobitmasks,
- },
- {
- .procname = "printk",
- .data = &libcfs_printk,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dobitmasks,
- },
- {
- .procname = "cpu_partition_table",
- .maxlen = 128,
- .mode = 0444,
- .proc_handler = &proc_cpt_table,
- },
- {
- .procname = "debug_log_upcall",
- .data = lnet_debug_log_upcall,
- .maxlen = sizeof(lnet_debug_log_upcall),
- .mode = 0644,
- .proc_handler = &proc_dostring,
- },
- {
- .procname = "catastrophe",
- .data = &libcfs_catastrophe,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec,
- },
- {
- .procname = "dump_kernel",
- .maxlen = 256,
- .mode = 0200,
- .proc_handler = &proc_dump_kernel,
- },
- {
- .procname = "daemon_file",
- .mode = 0644,
- .maxlen = 256,
- .proc_handler = &proc_daemon_file,
- },
- {
- .procname = "force_lbug",
- .data = NULL,
- .maxlen = 0,
- .mode = 0200,
- .proc_handler = &libcfs_force_lbug
- },
- {
- .procname = "fail_loc",
- .data = &cfs_fail_loc,
- .maxlen = sizeof(cfs_fail_loc),
- .mode = 0644,
- .proc_handler = &proc_fail_loc
- },
- {
- .procname = "fail_val",
- .data = &cfs_fail_val,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .procname = "fail_err",
- .data = &cfs_fail_err,
- .maxlen = sizeof(cfs_fail_err),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- }
-};
-
-static const struct lnet_debugfs_symlink_def lnet_debugfs_symlinks[] = {
- { "console_ratelimit",
- "/sys/module/libcfs/parameters/libcfs_console_ratelimit"},
- { "debug_path",
- "/sys/module/libcfs/parameters/libcfs_debug_file_path"},
- { "panic_on_lbug",
- "/sys/module/libcfs/parameters/libcfs_panic_on_lbug"},
- { "libcfs_console_backoff",
- "/sys/module/libcfs/parameters/libcfs_console_backoff"},
- { "debug_mb",
- "/sys/module/libcfs/parameters/libcfs_debug_mb"},
- { "console_min_delay_centisecs",
- "/sys/module/libcfs/parameters/libcfs_console_min_delay"},
- { "console_max_delay_centisecs",
- "/sys/module/libcfs/parameters/libcfs_console_max_delay"},
- {},
-};
-
-static ssize_t lnet_debugfs_read(struct file *filp, char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct ctl_table *table = filp->private_data;
- int error;
-
- error = table->proc_handler(table, 0, (void __user *)buf, &count, ppos);
- if (!error)
- error = count;
-
- return error;
-}
-
-static ssize_t lnet_debugfs_write(struct file *filp, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct ctl_table *table = filp->private_data;
- int error;
-
- error = table->proc_handler(table, 1, (void __user *)buf, &count, ppos);
- if (!error)
- error = count;
-
- return error;
-}
-
-static const struct file_operations lnet_debugfs_file_operations_rw = {
- .open = simple_open,
- .read = lnet_debugfs_read,
- .write = lnet_debugfs_write,
- .llseek = default_llseek,
-};
-
-static const struct file_operations lnet_debugfs_file_operations_ro = {
- .open = simple_open,
- .read = lnet_debugfs_read,
- .llseek = default_llseek,
-};
-
-static const struct file_operations lnet_debugfs_file_operations_wo = {
- .open = simple_open,
- .write = lnet_debugfs_write,
- .llseek = default_llseek,
-};
-
-static const struct file_operations *lnet_debugfs_fops_select(umode_t mode)
-{
- if (!(mode & 0222))
- return &lnet_debugfs_file_operations_ro;
-
- if (!(mode & 0444))
- return &lnet_debugfs_file_operations_wo;
-
- return &lnet_debugfs_file_operations_rw;
-}
-
-void lustre_insert_debugfs(struct ctl_table *table)
-{
- if (!lnet_debugfs_root)
- lnet_debugfs_root = debugfs_create_dir("lnet", NULL);
-
- /* Even if we cannot create, just ignore it altogether) */
- if (IS_ERR_OR_NULL(lnet_debugfs_root))
- return;
-
- /*
- * We don't save the dentry returned because we don't call
- * debugfs_remove() but rather remove_recursive()
- */
- for (; table->procname; table++)
- debugfs_create_file(table->procname, table->mode,
- lnet_debugfs_root, table,
- lnet_debugfs_fops_select(table->mode));
-}
-EXPORT_SYMBOL_GPL(lustre_insert_debugfs);
-
-static void lustre_insert_debugfs_links(
- const struct lnet_debugfs_symlink_def *symlinks)
-{
- for (; symlinks && symlinks->name; symlinks++)
- debugfs_create_symlink(symlinks->name, lnet_debugfs_root,
- symlinks->target);
-}
-
-static void lustre_remove_debugfs(void)
-{
- debugfs_remove_recursive(lnet_debugfs_root);
-
- lnet_debugfs_root = NULL;
-}
-
-static DEFINE_MUTEX(libcfs_startup);
-static int libcfs_active;
-
-int libcfs_setup(void)
-{
- int rc = -EINVAL;
-
- mutex_lock(&libcfs_startup);
- if (libcfs_active)
- goto out;
-
- if (!libcfs_dev_registered)
- goto err;
-
- rc = libcfs_debug_init(5 * 1024 * 1024);
- if (rc < 0) {
- pr_err("LustreError: libcfs_debug_init: %d\n", rc);
- goto err;
- }
-
- rc = cfs_cpu_init();
- if (rc)
- goto err;
-
- cfs_rehash_wq = alloc_workqueue("cfs_rh", WQ_SYSFS, 4);
- if (!cfs_rehash_wq) {
- CERROR("Failed to start rehash workqueue.\n");
- rc = -ENOMEM;
- goto err;
- }
-
- rc = cfs_crypto_register();
- if (rc) {
- CERROR("cfs_crypto_register: error %d\n", rc);
- goto err;
- }
-
- lustre_insert_debugfs(lnet_table);
- if (!IS_ERR_OR_NULL(lnet_debugfs_root))
- lustre_insert_debugfs_links(lnet_debugfs_symlinks);
-
- CDEBUG(D_OTHER, "portals setup OK\n");
-out:
- libcfs_active = 1;
- mutex_unlock(&libcfs_startup);
- return 0;
-err:
- cfs_crypto_unregister();
- if (cfs_rehash_wq)
- destroy_workqueue(cfs_rehash_wq);
- cfs_cpu_fini();
- libcfs_debug_cleanup();
- mutex_unlock(&libcfs_startup);
- return rc;
-}
-EXPORT_SYMBOL(libcfs_setup);
-
-static int libcfs_init(void)
-{
- int rc;
-
- rc = misc_register(&libcfs_dev);
- if (rc)
- CERROR("misc_register: error %d\n", rc);
- else
- libcfs_dev_registered = 1;
- return rc;
-}
-
-static void libcfs_exit(void)
-{
- int rc;
-
- lustre_remove_debugfs();
-
- if (cfs_rehash_wq)
- destroy_workqueue(cfs_rehash_wq);
-
- cfs_crypto_unregister();
-
- if (libcfs_dev_registered)
- misc_deregister(&libcfs_dev);
-
- cfs_cpu_fini();
-
- rc = libcfs_debug_cleanup();
- if (rc)
- pr_err("LustreError: libcfs_debug_cleanup: %d\n", rc);
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre helper library");
-MODULE_VERSION(LIBCFS_VERSION);
-MODULE_LICENSE("GPL");
-
-module_init(libcfs_init);
-module_exit(libcfs_exit);
diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.c b/drivers/staging/lustre/lnet/libcfs/tracefile.c
deleted file mode 100644
index 7ca562e156f0..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/tracefile.c
+++ /dev/null
@@ -1,1198 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/libcfs/tracefile.c
- *
- * Author: Zach Brown <zab@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#define LUSTRE_TRACEFILE_PRIVATE
-#define pr_fmt(fmt) "Lustre: " fmt
-
-#include <linux/ratelimit.h>
-#include <linux/highmem.h>
-#include <linux/ctype.h>
-#include <linux/kthread.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include "tracefile.h"
-
-/* XXX move things up to the top, comment */
-union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned;
-
-char cfs_tracefile[TRACEFILE_NAME_SIZE];
-long long cfs_tracefile_size = CFS_TRACEFILE_SIZE;
-static struct tracefiled_ctl trace_tctl;
-static DEFINE_MUTEX(cfs_trace_thread_mutex);
-static int thread_running;
-
-static atomic_t cfs_tage_allocated = ATOMIC_INIT(0);
-
-struct page_collection {
- struct list_head pc_pages;
- /*
- * if this flag is set, collect_pages() will spill both
- * ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise,
- * only ->tcd_pages are spilled.
- */
- int pc_want_daemon_pages;
-};
-
-struct tracefiled_ctl {
- struct completion tctl_start;
- struct completion tctl_stop;
- wait_queue_head_t tctl_waitq;
- pid_t tctl_pid;
- atomic_t tctl_shutdown;
-};
-
-/*
- * small data-structure for each page owned by tracefiled.
- */
-struct cfs_trace_page {
- /*
- * page itself
- */
- struct page *page;
- /*
- * linkage into one of the lists in trace_data_union or
- * page_collection
- */
- struct list_head linkage;
- /*
- * number of bytes used within this page
- */
- unsigned int used;
- /*
- * cpu that owns this page
- */
- unsigned short cpu;
- /*
- * type(context) of this page
- */
- unsigned short type;
-};
-
-static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
- struct cfs_trace_cpu_data *tcd);
-
-static inline struct cfs_trace_page *
-cfs_tage_from_list(struct list_head *list)
-{
- return list_entry(list, struct cfs_trace_page, linkage);
-}
-
-static struct cfs_trace_page *cfs_tage_alloc(gfp_t gfp)
-{
- struct page *page;
- struct cfs_trace_page *tage;
-
- /* My caller is trying to free memory */
- if (!in_interrupt() && (current->flags & PF_MEMALLOC))
- return NULL;
-
- /*
- * Don't spam console with allocation failures: they will be reported
- * by upper layer anyway.
- */
- gfp |= __GFP_NOWARN;
- page = alloc_page(gfp);
- if (!page)
- return NULL;
-
- tage = kmalloc(sizeof(*tage), gfp);
- if (!tage) {
- __free_page(page);
- return NULL;
- }
-
- tage->page = page;
- atomic_inc(&cfs_tage_allocated);
- return tage;
-}
-
-static void cfs_tage_free(struct cfs_trace_page *tage)
-{
- __free_page(tage->page);
- kfree(tage);
- atomic_dec(&cfs_tage_allocated);
-}
-
-static void cfs_tage_to_tail(struct cfs_trace_page *tage,
- struct list_head *queue)
-{
- list_move_tail(&tage->linkage, queue);
-}
-
-int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp,
- struct list_head *stock)
-{
- int i;
-
- /*
- * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
-
- for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++i) {
- struct cfs_trace_page *tage;
-
- tage = cfs_tage_alloc(gfp);
- if (!tage)
- break;
- list_add_tail(&tage->linkage, stock);
- }
- return i;
-}
-
-/* return a page that has 'len' bytes left at the end */
-static struct cfs_trace_page *
-cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len)
-{
- struct cfs_trace_page *tage;
-
- if (tcd->tcd_cur_pages > 0) {
- __LASSERT(!list_empty(&tcd->tcd_pages));
- tage = cfs_tage_from_list(tcd->tcd_pages.prev);
- if (tage->used + len <= PAGE_SIZE)
- return tage;
- }
-
- if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
- if (tcd->tcd_cur_stock_pages > 0) {
- tage = cfs_tage_from_list(tcd->tcd_stock_pages.prev);
- --tcd->tcd_cur_stock_pages;
- list_del_init(&tage->linkage);
- } else {
- tage = cfs_tage_alloc(GFP_ATOMIC);
- if (unlikely(!tage)) {
- if (!(current->flags & PF_MEMALLOC) ||
- in_interrupt())
- pr_warn_ratelimited("cannot allocate a tage (%ld)\n",
- tcd->tcd_cur_pages);
- return NULL;
- }
- }
-
- tage->used = 0;
- tage->cpu = smp_processor_id();
- tage->type = tcd->tcd_type;
- list_add_tail(&tage->linkage, &tcd->tcd_pages);
- tcd->tcd_cur_pages++;
-
- if (tcd->tcd_cur_pages > 8 && thread_running) {
- struct tracefiled_ctl *tctl = &trace_tctl;
- /*
- * wake up tracefiled to process some pages.
- */
- wake_up(&tctl->tctl_waitq);
- }
- return tage;
- }
- return NULL;
-}
-
-static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd)
-{
- int pgcount = tcd->tcd_cur_pages / 10;
- struct page_collection pc;
- struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
-
- /*
- * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
-
- pr_warn_ratelimited("debug daemon buffer overflowed; discarding 10%% of pages (%d of %ld)\n",
- pgcount + 1, tcd->tcd_cur_pages);
-
- INIT_LIST_HEAD(&pc.pc_pages);
-
- list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
- if (!pgcount--)
- break;
-
- list_move_tail(&tage->linkage, &pc.pc_pages);
- tcd->tcd_cur_pages--;
- }
- put_pages_on_tcd_daemon_list(&pc, tcd);
-}
-
-/* return a page that has 'len' bytes left at the end */
-static struct cfs_trace_page *cfs_trace_get_tage(struct cfs_trace_cpu_data *tcd,
- unsigned long len)
-{
- struct cfs_trace_page *tage;
-
- /*
- * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
-
- if (len > PAGE_SIZE) {
- pr_err("cowardly refusing to write %lu bytes in a page\n", len);
- return NULL;
- }
-
- tage = cfs_trace_get_tage_try(tcd, len);
- if (tage)
- return tage;
- if (thread_running)
- cfs_tcd_shrink(tcd);
- if (tcd->tcd_cur_pages > 0) {
- tage = cfs_tage_from_list(tcd->tcd_pages.next);
- tage->used = 0;
- cfs_tage_to_tail(tage, &tcd->tcd_pages);
- }
- return tage;
-}
-
-int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
- const char *format, ...)
-{
- va_list args;
- int rc;
-
- va_start(args, format);
- rc = libcfs_debug_vmsg2(msgdata, format, args, NULL);
- va_end(args);
-
- return rc;
-}
-EXPORT_SYMBOL(libcfs_debug_msg);
-
-int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
- const char *format1, va_list args,
- const char *format2, ...)
-{
- struct cfs_trace_cpu_data *tcd = NULL;
- struct ptldebug_header header = { 0 };
- struct cfs_trace_page *tage;
- /* string_buf is used only if tcd != NULL, and is always set then */
- char *string_buf = NULL;
- char *debug_buf;
- int known_size;
- int needed = 85; /* average message length */
- int max_nob;
- va_list ap;
- int depth;
- int i;
- int remain;
- int mask = msgdata->msg_mask;
- const char *file = kbasename(msgdata->msg_file);
- struct cfs_debug_limit_state *cdls = msgdata->msg_cdls;
-
- tcd = cfs_trace_get_tcd();
-
- /* cfs_trace_get_tcd() grabs a lock, which disables preemption and
- * pins us to a particular CPU. This avoids an smp_processor_id()
- * warning on Linux when debugging is enabled.
- */
- cfs_set_ptldebug_header(&header, msgdata, CDEBUG_STACK());
-
- if (!tcd) /* arch may not log in IRQ context */
- goto console;
-
- if (!tcd->tcd_cur_pages)
- header.ph_flags |= PH_FLAG_FIRST_RECORD;
-
- if (tcd->tcd_shutting_down) {
- cfs_trace_put_tcd(tcd);
- tcd = NULL;
- goto console;
- }
-
- depth = 0;
- known_size = strlen(file) + 1 + depth;
- if (msgdata->msg_fn)
- known_size += strlen(msgdata->msg_fn) + 1;
-
- if (libcfs_debug_binary)
- known_size += sizeof(header);
-
- /*
- * '2' used because vsnprintf return real size required for output
- * _without_ terminating NULL.
- * if needed is to small for this format.
- */
- for (i = 0; i < 2; i++) {
- tage = cfs_trace_get_tage(tcd, needed + known_size + 1);
- if (!tage) {
- if (needed + known_size > PAGE_SIZE)
- mask |= D_ERROR;
-
- cfs_trace_put_tcd(tcd);
- tcd = NULL;
- goto console;
- }
-
- string_buf = (char *)page_address(tage->page) +
- tage->used + known_size;
-
- max_nob = PAGE_SIZE - tage->used - known_size;
- if (max_nob <= 0) {
- pr_emerg("negative max_nob: %d\n", max_nob);
- mask |= D_ERROR;
- cfs_trace_put_tcd(tcd);
- tcd = NULL;
- goto console;
- }
-
- needed = 0;
- if (format1) {
- va_copy(ap, args);
- needed = vsnprintf(string_buf, max_nob, format1, ap);
- va_end(ap);
- }
-
- if (format2) {
- remain = max_nob - needed;
- if (remain < 0)
- remain = 0;
-
- va_start(ap, format2);
- needed += vsnprintf(string_buf + needed, remain,
- format2, ap);
- va_end(ap);
- }
-
- if (needed < max_nob) /* well. printing ok.. */
- break;
- }
-
- if (*(string_buf + needed - 1) != '\n')
- pr_info("format at %s:%d:%s doesn't end in newline\n", file,
- msgdata->msg_line, msgdata->msg_fn);
-
- header.ph_len = known_size + needed;
- debug_buf = (char *)page_address(tage->page) + tage->used;
-
- if (libcfs_debug_binary) {
- memcpy(debug_buf, &header, sizeof(header));
- tage->used += sizeof(header);
- debug_buf += sizeof(header);
- }
-
- /* indent message according to the nesting level */
- while (depth-- > 0) {
- *(debug_buf++) = '.';
- ++tage->used;
- }
-
- strcpy(debug_buf, file);
- tage->used += strlen(file) + 1;
- debug_buf += strlen(file) + 1;
-
- if (msgdata->msg_fn) {
- strcpy(debug_buf, msgdata->msg_fn);
- tage->used += strlen(msgdata->msg_fn) + 1;
- debug_buf += strlen(msgdata->msg_fn) + 1;
- }
-
- __LASSERT(debug_buf == string_buf);
-
- tage->used += needed;
- __LASSERT(tage->used <= PAGE_SIZE);
-
-console:
- if (!(mask & libcfs_printk)) {
- /* no console output requested */
- if (tcd)
- cfs_trace_put_tcd(tcd);
- return 1;
- }
-
- if (cdls) {
- if (libcfs_console_ratelimit &&
- cdls->cdls_next && /* not first time ever */
- !time_after(jiffies, cdls->cdls_next)) {
- /* skipping a console message */
- cdls->cdls_count++;
- if (tcd)
- cfs_trace_put_tcd(tcd);
- return 1;
- }
-
- if (time_after(jiffies,
- cdls->cdls_next + libcfs_console_max_delay +
- 10 * HZ)) {
- /* last timeout was a long time ago */
- cdls->cdls_delay /= libcfs_console_backoff * 4;
- } else {
- cdls->cdls_delay *= libcfs_console_backoff;
- }
-
- if (cdls->cdls_delay < libcfs_console_min_delay)
- cdls->cdls_delay = libcfs_console_min_delay;
- else if (cdls->cdls_delay > libcfs_console_max_delay)
- cdls->cdls_delay = libcfs_console_max_delay;
-
- /* ensure cdls_next is never zero after it's been seen */
- cdls->cdls_next = (jiffies + cdls->cdls_delay) | 1;
- }
-
- if (tcd) {
- cfs_print_to_console(&header, mask, string_buf, needed, file,
- msgdata->msg_fn);
- cfs_trace_put_tcd(tcd);
- } else {
- string_buf = cfs_trace_get_console_buffer();
-
- needed = 0;
- if (format1) {
- va_copy(ap, args);
- needed = vsnprintf(string_buf,
- CFS_TRACE_CONSOLE_BUFFER_SIZE,
- format1, ap);
- va_end(ap);
- }
- if (format2) {
- remain = CFS_TRACE_CONSOLE_BUFFER_SIZE - needed;
- if (remain > 0) {
- va_start(ap, format2);
- needed += vsnprintf(string_buf + needed, remain,
- format2, ap);
- va_end(ap);
- }
- }
- cfs_print_to_console(&header, mask,
- string_buf, needed, file, msgdata->msg_fn);
-
- put_cpu();
- }
-
- if (cdls && cdls->cdls_count) {
- string_buf = cfs_trace_get_console_buffer();
-
- needed = snprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE,
- "Skipped %d previous similar message%s\n",
- cdls->cdls_count,
- (cdls->cdls_count > 1) ? "s" : "");
-
- cfs_print_to_console(&header, mask,
- string_buf, needed, file, msgdata->msg_fn);
-
- put_cpu();
- cdls->cdls_count = 0;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(libcfs_debug_vmsg2);
-
-void
-cfs_trace_assertion_failed(const char *str,
- struct libcfs_debug_msg_data *msgdata)
-{
- struct ptldebug_header hdr;
-
- libcfs_panic_in_progress = 1;
- libcfs_catastrophe = 1;
- mb();
-
- cfs_set_ptldebug_header(&hdr, msgdata, CDEBUG_STACK());
-
- cfs_print_to_console(&hdr, D_EMERG, str, strlen(str),
- msgdata->msg_file, msgdata->msg_fn);
-
- panic("Lustre debug assertion failure\n");
-
- /* not reached */
-}
-
-static void
-panic_collect_pages(struct page_collection *pc)
-{
- /* Do the collect_pages job on a single CPU: assumes that all other
- * CPUs have been stopped during a panic. If this isn't true for some
- * arch, this will have to be implemented separately in each arch.
- */
- struct cfs_trace_cpu_data *tcd;
- int i;
- int j;
-
- INIT_LIST_HEAD(&pc->pc_pages);
-
- cfs_tcd_for_each(tcd, i, j) {
- list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
- tcd->tcd_cur_pages = 0;
-
- if (pc->pc_want_daemon_pages) {
- list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages);
- tcd->tcd_cur_daemon_pages = 0;
- }
- }
-}
-
-static void collect_pages_on_all_cpus(struct page_collection *pc)
-{
- struct cfs_trace_cpu_data *tcd;
- int i, cpu;
-
- for_each_possible_cpu(cpu) {
- cfs_tcd_for_each_type_lock(tcd, i, cpu) {
- list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
- tcd->tcd_cur_pages = 0;
- if (pc->pc_want_daemon_pages) {
- list_splice_init(&tcd->tcd_daemon_pages,
- &pc->pc_pages);
- tcd->tcd_cur_daemon_pages = 0;
- }
- }
- }
-}
-
-static void collect_pages(struct page_collection *pc)
-{
- INIT_LIST_HEAD(&pc->pc_pages);
-
- if (libcfs_panic_in_progress)
- panic_collect_pages(pc);
- else
- collect_pages_on_all_cpus(pc);
-}
-
-static void put_pages_back_on_all_cpus(struct page_collection *pc)
-{
- struct cfs_trace_cpu_data *tcd;
- struct list_head *cur_head;
- struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
- int i, cpu;
-
- for_each_possible_cpu(cpu) {
- cfs_tcd_for_each_type_lock(tcd, i, cpu) {
- cur_head = tcd->tcd_pages.next;
-
- list_for_each_entry_safe(tage, tmp, &pc->pc_pages,
- linkage) {
- __LASSERT_TAGE_INVARIANT(tage);
-
- if (tage->cpu != cpu || tage->type != i)
- continue;
-
- cfs_tage_to_tail(tage, cur_head);
- tcd->tcd_cur_pages++;
- }
- }
- }
-}
-
-static void put_pages_back(struct page_collection *pc)
-{
- if (!libcfs_panic_in_progress)
- put_pages_back_on_all_cpus(pc);
-}
-
-/* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that
- * we have a good amount of data at all times for dumping during an LBUG, even
- * if we have been steadily writing (and otherwise discarding) pages via the
- * debug daemon.
- */
-static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
- struct cfs_trace_cpu_data *tcd)
-{
- struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
-
- list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
- __LASSERT_TAGE_INVARIANT(tage);
-
- if (tage->cpu != tcd->tcd_cpu || tage->type != tcd->tcd_type)
- continue;
-
- cfs_tage_to_tail(tage, &tcd->tcd_daemon_pages);
- tcd->tcd_cur_daemon_pages++;
-
- if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
- struct cfs_trace_page *victim;
-
- __LASSERT(!list_empty(&tcd->tcd_daemon_pages));
- victim = cfs_tage_from_list(tcd->tcd_daemon_pages.next);
-
- __LASSERT_TAGE_INVARIANT(victim);
-
- list_del(&victim->linkage);
- cfs_tage_free(victim);
- tcd->tcd_cur_daemon_pages--;
- }
- }
-}
-
-static void put_pages_on_daemon_list(struct page_collection *pc)
-{
- struct cfs_trace_cpu_data *tcd;
- int i, cpu;
-
- for_each_possible_cpu(cpu) {
- cfs_tcd_for_each_type_lock(tcd, i, cpu)
- put_pages_on_tcd_daemon_list(pc, tcd);
- }
-}
-
-void cfs_trace_debug_print(void)
-{
- struct page_collection pc;
- struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
-
- pc.pc_want_daemon_pages = 1;
- collect_pages(&pc);
- list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
- char *p, *file, *fn;
- struct page *page;
-
- __LASSERT_TAGE_INVARIANT(tage);
-
- page = tage->page;
- p = page_address(page);
- while (p < ((char *)page_address(page) + tage->used)) {
- struct ptldebug_header *hdr;
- int len;
-
- hdr = (void *)p;
- p += sizeof(*hdr);
- file = p;
- p += strlen(file) + 1;
- fn = p;
- p += strlen(fn) + 1;
- len = hdr->ph_len - (int)(p - (char *)hdr);
-
- cfs_print_to_console(hdr, D_EMERG, p, len, file, fn);
-
- p += len;
- }
-
- list_del(&tage->linkage);
- cfs_tage_free(tage);
- }
-}
-
-int cfs_tracefile_dump_all_pages(char *filename)
-{
- struct page_collection pc;
- struct file *filp;
- struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
- char *buf;
- mm_segment_t __oldfs;
- int rc;
-
- cfs_tracefile_write_lock();
-
- filp = filp_open(filename, O_CREAT | O_EXCL | O_WRONLY | O_LARGEFILE,
- 0600);
- if (IS_ERR(filp)) {
- rc = PTR_ERR(filp);
- filp = NULL;
- pr_err("LustreError: can't open %s for dump: rc %d\n",
- filename, rc);
- goto out;
- }
-
- pc.pc_want_daemon_pages = 1;
- collect_pages(&pc);
- if (list_empty(&pc.pc_pages)) {
- rc = 0;
- goto close;
- }
- __oldfs = get_fs();
- set_fs(get_ds());
-
- /* ok, for now, just write the pages. in the future we'll be building
- * iobufs with the pages and calling generic_direct_IO
- */
- list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
- __LASSERT_TAGE_INVARIANT(tage);
-
- buf = kmap(tage->page);
- rc = kernel_write(filp, buf, tage->used, &filp->f_pos);
- kunmap(tage->page);
-
- if (rc != (int)tage->used) {
- pr_warn("wanted to write %u but wrote %d\n", tage->used,
- rc);
- put_pages_back(&pc);
- __LASSERT(list_empty(&pc.pc_pages));
- break;
- }
- list_del(&tage->linkage);
- cfs_tage_free(tage);
- }
- set_fs(__oldfs);
- rc = vfs_fsync(filp, 1);
- if (rc)
- pr_err("sync returns %d\n", rc);
-close:
- filp_close(filp, NULL);
-out:
- cfs_tracefile_write_unlock();
- return rc;
-}
-
-void cfs_trace_flush_pages(void)
-{
- struct page_collection pc;
- struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
-
- pc.pc_want_daemon_pages = 1;
- collect_pages(&pc);
- list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
- __LASSERT_TAGE_INVARIANT(tage);
-
- list_del(&tage->linkage);
- cfs_tage_free(tage);
- }
-}
-
-int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
- const char __user *usr_buffer, int usr_buffer_nob)
-{
- int nob;
-
- if (usr_buffer_nob > knl_buffer_nob)
- return -EOVERFLOW;
-
- if (copy_from_user((void *)knl_buffer,
- usr_buffer, usr_buffer_nob))
- return -EFAULT;
-
- nob = strnlen(knl_buffer, usr_buffer_nob);
- while (--nob >= 0) /* strip trailing whitespace */
- if (!isspace(knl_buffer[nob]))
- break;
-
- if (nob < 0) /* empty string */
- return -EINVAL;
-
- if (nob == knl_buffer_nob) /* no space to terminate */
- return -EOVERFLOW;
-
- knl_buffer[nob + 1] = 0; /* terminate */
- return 0;
-}
-EXPORT_SYMBOL(cfs_trace_copyin_string);
-
-int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
- const char *knl_buffer, char *append)
-{
- /*
- * NB if 'append' != NULL, it's a single character to append to the
- * copied out string - usually "\n" or "" (i.e. a terminating zero byte)
- */
- int nob = strlen(knl_buffer);
-
- if (nob > usr_buffer_nob)
- nob = usr_buffer_nob;
-
- if (copy_to_user(usr_buffer, knl_buffer, nob))
- return -EFAULT;
-
- if (append && nob < usr_buffer_nob) {
- if (copy_to_user(usr_buffer + nob, append, 1))
- return -EFAULT;
-
- nob++;
- }
-
- return nob;
-}
-EXPORT_SYMBOL(cfs_trace_copyout_string);
-
-int cfs_trace_allocate_string_buffer(char **str, int nob)
-{
- if (nob > 2 * PAGE_SIZE) /* string must be "sensible" */
- return -EINVAL;
-
- *str = kmalloc(nob, GFP_KERNEL | __GFP_ZERO);
- if (!*str)
- return -ENOMEM;
-
- return 0;
-}
-
-int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob)
-{
- char *str;
- int rc;
-
- rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
- if (rc)
- return rc;
-
- rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
- usr_str, usr_str_nob);
- if (rc)
- goto out;
-
- if (str[0] != '/') {
- rc = -EINVAL;
- goto out;
- }
- rc = cfs_tracefile_dump_all_pages(str);
-out:
- kfree(str);
- return rc;
-}
-
-int cfs_trace_daemon_command(char *str)
-{
- int rc = 0;
-
- cfs_tracefile_write_lock();
-
- if (!strcmp(str, "stop")) {
- cfs_tracefile_write_unlock();
- cfs_trace_stop_thread();
- cfs_tracefile_write_lock();
- memset(cfs_tracefile, 0, sizeof(cfs_tracefile));
-
- } else if (!strncmp(str, "size=", 5)) {
- unsigned long tmp;
-
- rc = kstrtoul(str + 5, 10, &tmp);
- if (!rc) {
- if (tmp < 10 || tmp > 20480)
- cfs_tracefile_size = CFS_TRACEFILE_SIZE;
- else
- cfs_tracefile_size = tmp << 20;
- }
- } else if (strlen(str) >= sizeof(cfs_tracefile)) {
- rc = -ENAMETOOLONG;
- } else if (str[0] != '/') {
- rc = -EINVAL;
- } else {
- strcpy(cfs_tracefile, str);
-
- pr_info("debug daemon will attempt to start writing to %s (%lukB max)\n",
- cfs_tracefile,
- (long)(cfs_tracefile_size >> 10));
-
- cfs_trace_start_thread();
- }
-
- cfs_tracefile_write_unlock();
- return rc;
-}
-
-int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob)
-{
- char *str;
- int rc;
-
- rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
- if (rc)
- return rc;
-
- rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
- usr_str, usr_str_nob);
- if (!rc)
- rc = cfs_trace_daemon_command(str);
-
- kfree(str);
- return rc;
-}
-
-int cfs_trace_set_debug_mb(int mb)
-{
- int i;
- int j;
- int pages;
- int limit = cfs_trace_max_debug_mb();
- struct cfs_trace_cpu_data *tcd;
-
- if (mb < num_possible_cpus()) {
- pr_warn("%d MB is too small for debug buffer size, setting it to %d MB.\n",
- mb, num_possible_cpus());
- mb = num_possible_cpus();
- }
-
- if (mb > limit) {
- pr_warn("%d MB is too large for debug buffer size, setting it to %d MB.\n",
- mb, limit);
- mb = limit;
- }
-
- mb /= num_possible_cpus();
- pages = mb << (20 - PAGE_SHIFT);
-
- cfs_tracefile_write_lock();
-
- cfs_tcd_for_each(tcd, i, j)
- tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
-
- cfs_tracefile_write_unlock();
-
- return 0;
-}
-
-int cfs_trace_get_debug_mb(void)
-{
- int i;
- int j;
- struct cfs_trace_cpu_data *tcd;
- int total_pages = 0;
-
- cfs_tracefile_read_lock();
-
- cfs_tcd_for_each(tcd, i, j)
- total_pages += tcd->tcd_max_pages;
-
- cfs_tracefile_read_unlock();
-
- return (total_pages >> (20 - PAGE_SHIFT)) + 1;
-}
-
-static int tracefiled(void *arg)
-{
- struct page_collection pc;
- struct tracefiled_ctl *tctl = arg;
- struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
- struct file *filp;
- char *buf;
- int last_loop = 0;
- int rc;
-
- /* we're started late enough that we pick up init's fs context */
- /* this is so broken in uml? what on earth is going on? */
-
- complete(&tctl->tctl_start);
-
- while (1) {
- wait_queue_entry_t __wait;
-
- pc.pc_want_daemon_pages = 0;
- collect_pages(&pc);
- if (list_empty(&pc.pc_pages))
- goto end_loop;
-
- filp = NULL;
- cfs_tracefile_read_lock();
- if (cfs_tracefile[0]) {
- filp = filp_open(cfs_tracefile,
- O_CREAT | O_RDWR | O_LARGEFILE,
- 0600);
- if (IS_ERR(filp)) {
- rc = PTR_ERR(filp);
- filp = NULL;
- pr_warn("couldn't open %s: %d\n", cfs_tracefile,
- rc);
- }
- }
- cfs_tracefile_read_unlock();
- if (!filp) {
- put_pages_on_daemon_list(&pc);
- __LASSERT(list_empty(&pc.pc_pages));
- goto end_loop;
- }
-
- list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
- static loff_t f_pos;
-
- __LASSERT_TAGE_INVARIANT(tage);
-
- if (f_pos >= (off_t)cfs_tracefile_size)
- f_pos = 0;
- else if (f_pos > i_size_read(file_inode(filp)))
- f_pos = i_size_read(file_inode(filp));
-
- buf = kmap(tage->page);
- rc = kernel_write(filp, buf, tage->used, &f_pos);
- kunmap(tage->page);
-
- if (rc != (int)tage->used) {
- pr_warn("wanted to write %u but wrote %d\n",
- tage->used, rc);
- put_pages_back(&pc);
- __LASSERT(list_empty(&pc.pc_pages));
- break;
- }
- }
-
- filp_close(filp, NULL);
- put_pages_on_daemon_list(&pc);
- if (!list_empty(&pc.pc_pages)) {
- int i;
-
- pr_alert("trace pages aren't empty\n");
- pr_err("total cpus(%d): ", num_possible_cpus());
- for (i = 0; i < num_possible_cpus(); i++)
- if (cpu_online(i))
- pr_cont("%d(on) ", i);
- else
- pr_cont("%d(off) ", i);
- pr_cont("\n");
-
- i = 0;
- list_for_each_entry_safe(tage, tmp, &pc.pc_pages,
- linkage)
- pr_err("page %d belongs to cpu %d\n",
- ++i, tage->cpu);
- pr_err("There are %d pages unwritten\n", i);
- }
- __LASSERT(list_empty(&pc.pc_pages));
-end_loop:
- if (atomic_read(&tctl->tctl_shutdown)) {
- if (!last_loop) {
- last_loop = 1;
- continue;
- } else {
- break;
- }
- }
- init_waitqueue_entry(&__wait, current);
- add_wait_queue(&tctl->tctl_waitq, &__wait);
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(HZ);
- remove_wait_queue(&tctl->tctl_waitq, &__wait);
- }
- complete(&tctl->tctl_stop);
- return 0;
-}
-
-int cfs_trace_start_thread(void)
-{
- struct tracefiled_ctl *tctl = &trace_tctl;
- struct task_struct *task;
- int rc = 0;
-
- mutex_lock(&cfs_trace_thread_mutex);
- if (thread_running)
- goto out;
-
- init_completion(&tctl->tctl_start);
- init_completion(&tctl->tctl_stop);
- init_waitqueue_head(&tctl->tctl_waitq);
- atomic_set(&tctl->tctl_shutdown, 0);
-
- task = kthread_run(tracefiled, tctl, "ktracefiled");
- if (IS_ERR(task)) {
- rc = PTR_ERR(task);
- goto out;
- }
-
- wait_for_completion(&tctl->tctl_start);
- thread_running = 1;
-out:
- mutex_unlock(&cfs_trace_thread_mutex);
- return rc;
-}
-
-void cfs_trace_stop_thread(void)
-{
- struct tracefiled_ctl *tctl = &trace_tctl;
-
- mutex_lock(&cfs_trace_thread_mutex);
- if (thread_running) {
- pr_info("shutting down debug daemon thread...\n");
- atomic_set(&tctl->tctl_shutdown, 1);
- wait_for_completion(&tctl->tctl_stop);
- thread_running = 0;
- }
- mutex_unlock(&cfs_trace_thread_mutex);
-}
-
-int cfs_tracefile_init(int max_pages)
-{
- struct cfs_trace_cpu_data *tcd;
- int i;
- int j;
- int rc;
- int factor;
-
- rc = cfs_tracefile_init_arch();
- if (rc)
- return rc;
-
- cfs_tcd_for_each(tcd, i, j) {
- /* tcd_pages_factor is initialized int tracefile_init_arch. */
- factor = tcd->tcd_pages_factor;
- INIT_LIST_HEAD(&tcd->tcd_pages);
- INIT_LIST_HEAD(&tcd->tcd_stock_pages);
- INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
- tcd->tcd_cur_pages = 0;
- tcd->tcd_cur_stock_pages = 0;
- tcd->tcd_cur_daemon_pages = 0;
- tcd->tcd_max_pages = (max_pages * factor) / 100;
- LASSERT(tcd->tcd_max_pages > 0);
- tcd->tcd_shutting_down = 0;
- }
-
- return 0;
-}
-
-static void trace_cleanup_on_all_cpus(void)
-{
- struct cfs_trace_cpu_data *tcd;
- struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
- int i, cpu;
-
- for_each_possible_cpu(cpu) {
- cfs_tcd_for_each_type_lock(tcd, i, cpu) {
- tcd->tcd_shutting_down = 1;
-
- list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages,
- linkage) {
- __LASSERT_TAGE_INVARIANT(tage);
-
- list_del(&tage->linkage);
- cfs_tage_free(tage);
- }
-
- tcd->tcd_cur_pages = 0;
- }
- }
-}
-
-static void cfs_trace_cleanup(void)
-{
- struct page_collection pc;
-
- INIT_LIST_HEAD(&pc.pc_pages);
-
- trace_cleanup_on_all_cpus();
-
- cfs_tracefile_fini_arch();
-}
-
-void cfs_tracefile_exit(void)
-{
- cfs_trace_stop_thread();
- cfs_trace_cleanup();
-}
diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.h b/drivers/staging/lustre/lnet/libcfs/tracefile.h
deleted file mode 100644
index 0608240d897f..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/tracefile.h
+++ /dev/null
@@ -1,274 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef __LIBCFS_TRACEFILE_H__
-#define __LIBCFS_TRACEFILE_H__
-
-#include <linux/spinlock.h>
-#include <linux/list.h>
-#include <linux/cache.h>
-#include <linux/threads.h>
-#include <linux/limits.h>
-#include <linux/smp.h>
-#include <linux/libcfs/libcfs.h>
-
-enum cfs_trace_buf_type {
- CFS_TCD_TYPE_PROC = 0,
- CFS_TCD_TYPE_SOFTIRQ,
- CFS_TCD_TYPE_IRQ,
- CFS_TCD_TYPE_MAX
-};
-
-/* trace file lock routines */
-
-#define TRACEFILE_NAME_SIZE 1024
-extern char cfs_tracefile[TRACEFILE_NAME_SIZE];
-extern long long cfs_tracefile_size;
-
-/**
- * The path of debug log dump upcall script.
- */
-extern char lnet_debug_log_upcall[1024];
-
-void libcfs_run_debug_log_upcall(char *file);
-
-int cfs_tracefile_init_arch(void);
-void cfs_tracefile_fini_arch(void);
-
-void cfs_tracefile_read_lock(void);
-void cfs_tracefile_read_unlock(void);
-void cfs_tracefile_write_lock(void);
-void cfs_tracefile_write_unlock(void);
-
-int cfs_tracefile_dump_all_pages(char *filename);
-void cfs_trace_debug_print(void);
-void cfs_trace_flush_pages(void);
-int cfs_trace_start_thread(void);
-void cfs_trace_stop_thread(void);
-int cfs_tracefile_init(int max_pages);
-void cfs_tracefile_exit(void);
-
-int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
- const char __user *usr_buffer, int usr_buffer_nob);
-int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
- const char *knl_str, char *append);
-int cfs_trace_allocate_string_buffer(char **str, int nob);
-int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob);
-int cfs_trace_daemon_command(char *str);
-int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob);
-int cfs_trace_set_debug_mb(int mb);
-int cfs_trace_get_debug_mb(void);
-
-void libcfs_debug_dumplog_internal(void *arg);
-void libcfs_register_panic_notifier(void);
-void libcfs_unregister_panic_notifier(void);
-extern int libcfs_panic_in_progress;
-int cfs_trace_max_debug_mb(void);
-
-#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
-#define TCD_STOCK_PAGES (TCD_MAX_PAGES)
-#define CFS_TRACEFILE_SIZE (500 << 20)
-
-#ifdef LUSTRE_TRACEFILE_PRIVATE
-
-/*
- * Private declare for tracefile
- */
-#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
-#define TCD_STOCK_PAGES (TCD_MAX_PAGES)
-
-#define CFS_TRACEFILE_SIZE (500 << 20)
-
-/*
- * Size of a buffer for sprinting console messages if we can't get a page
- * from system
- */
-#define CFS_TRACE_CONSOLE_BUFFER_SIZE 1024
-
-union cfs_trace_data_union {
- struct cfs_trace_cpu_data {
- /*
- * Even though this structure is meant to be per-CPU, locking
- * is needed because in some places the data may be accessed
- * from other CPUs. This lock is directly used in trace_get_tcd
- * and trace_put_tcd, which are called in libcfs_debug_vmsg2 and
- * tcd_for_each_type_lock
- */
- spinlock_t tcd_lock;
- unsigned long tcd_lock_flags;
-
- /*
- * pages with trace records not yet processed by tracefiled.
- */
- struct list_head tcd_pages;
- /* number of pages on ->tcd_pages */
- unsigned long tcd_cur_pages;
-
- /*
- * pages with trace records already processed by
- * tracefiled. These pages are kept in memory, so that some
- * portion of log can be written in the event of LBUG. This
- * list is maintained in LRU order.
- *
- * Pages are moved to ->tcd_daemon_pages by tracefiled()
- * (put_pages_on_daemon_list()). LRU pages from this list are
- * discarded when list grows too large.
- */
- struct list_head tcd_daemon_pages;
- /* number of pages on ->tcd_daemon_pages */
- unsigned long tcd_cur_daemon_pages;
-
- /*
- * Maximal number of pages allowed on ->tcd_pages and
- * ->tcd_daemon_pages each.
- * Always TCD_MAX_PAGES * tcd_pages_factor / 100 in current
- * implementation.
- */
- unsigned long tcd_max_pages;
-
- /*
- * preallocated pages to write trace records into. Pages from
- * ->tcd_stock_pages are moved to ->tcd_pages by
- * portals_debug_msg().
- *
- * This list is necessary, because on some platforms it's
- * impossible to perform efficient atomic page allocation in a
- * non-blockable context.
- *
- * Such platforms fill ->tcd_stock_pages "on occasion", when
- * tracing code is entered in blockable context.
- *
- * trace_get_tage_try() tries to get a page from
- * ->tcd_stock_pages first and resorts to atomic page
- * allocation only if this queue is empty. ->tcd_stock_pages
- * is replenished when tracing code is entered in blocking
- * context (darwin-tracefile.c:trace_get_tcd()). We try to
- * maintain TCD_STOCK_PAGES (40 by default) pages in this
- * queue. Atomic allocation is only required if more than
- * TCD_STOCK_PAGES pagesful are consumed by trace records all
- * emitted in non-blocking contexts. Which is quite unlikely.
- */
- struct list_head tcd_stock_pages;
- /* number of pages on ->tcd_stock_pages */
- unsigned long tcd_cur_stock_pages;
-
- unsigned short tcd_shutting_down;
- unsigned short tcd_cpu;
- unsigned short tcd_type;
- /* The factors to share debug memory. */
- unsigned short tcd_pages_factor;
- } tcd;
- char __pad[L1_CACHE_ALIGN(sizeof(struct cfs_trace_cpu_data))];
-};
-
-#define TCD_MAX_TYPES 8
-extern union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS];
-
-#define cfs_tcd_for_each(tcd, i, j) \
- for (i = 0; cfs_trace_data[i]; i++) \
- for (j = 0, ((tcd) = &(*cfs_trace_data[i])[j].tcd); \
- j < num_possible_cpus(); \
- j++, (tcd) = &(*cfs_trace_data[i])[j].tcd)
-
-#define cfs_tcd_for_each_type_lock(tcd, i, cpu) \
- for (i = 0; cfs_trace_data[i] && \
- (tcd = &(*cfs_trace_data[i])[cpu].tcd) && \
- cfs_trace_lock_tcd(tcd, 1); cfs_trace_unlock_tcd(tcd, 1), i++)
-
-void cfs_set_ptldebug_header(struct ptldebug_header *header,
- struct libcfs_debug_msg_data *m,
- unsigned long stack);
-void cfs_print_to_console(struct ptldebug_header *hdr, int mask,
- const char *buf, int len, const char *file,
- const char *fn);
-
-int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking);
-void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking);
-
-extern char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX];
-enum cfs_trace_buf_type cfs_trace_buf_idx_get(void);
-
-static inline char *
-cfs_trace_get_console_buffer(void)
-{
- unsigned int i = get_cpu();
- unsigned int j = cfs_trace_buf_idx_get();
-
- return cfs_trace_console_buffers[i][j];
-}
-
-static inline struct cfs_trace_cpu_data *
-cfs_trace_get_tcd(void)
-{
- struct cfs_trace_cpu_data *tcd =
- &(*cfs_trace_data[cfs_trace_buf_idx_get()])[get_cpu()].tcd;
-
- cfs_trace_lock_tcd(tcd, 0);
-
- return tcd;
-}
-
-static inline void cfs_trace_put_tcd(struct cfs_trace_cpu_data *tcd)
-{
- cfs_trace_unlock_tcd(tcd, 0);
-
- put_cpu();
-}
-
-int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp,
- struct list_head *stock);
-
-void cfs_trace_assertion_failed(const char *str,
- struct libcfs_debug_msg_data *m);
-
-/* ASSERTION that is safe to use within the debug system */
-#define __LASSERT(cond) \
-do { \
- if (unlikely(!(cond))) { \
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_EMERG, NULL); \
- cfs_trace_assertion_failed("ASSERTION("#cond") failed", \
- &msgdata); \
- } \
-} while (0)
-
-#define __LASSERT_TAGE_INVARIANT(tage) \
-do { \
- __LASSERT(tage); \
- __LASSERT(tage->page); \
- __LASSERT(tage->used <= PAGE_SIZE); \
- __LASSERT(page_count(tage->page) > 0); \
-} while (0)
-
-#endif /* LUSTRE_TRACEFILE_PRIVATE */
-
-#endif /* __LIBCFS_TRACEFILE_H__ */
diff --git a/drivers/staging/lustre/lnet/lnet/Makefile b/drivers/staging/lustre/lnet/lnet/Makefile
deleted file mode 100644
index 0a9d70924fe0..000000000000
--- a/drivers/staging/lustre/lnet/lnet/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LNET) += lnet.o
-
-lnet-y := api-ni.o config.o nidstrings.o net_fault.o \
- lib-me.o lib-msg.o lib-eq.o lib-md.o lib-ptl.o \
- lib-socket.o lib-move.o module.o lo.o \
- router.o router_proc.o acceptor.o peer.o
diff --git a/drivers/staging/lustre/lnet/lnet/acceptor.c b/drivers/staging/lustre/lnet/lnet/acceptor.c
deleted file mode 100644
index 5648f17eddc0..000000000000
--- a/drivers/staging/lustre/lnet/lnet/acceptor.c
+++ /dev/null
@@ -1,501 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#include <linux/completion.h>
-#include <net/sock.h>
-#include <linux/lnet/lib-lnet.h>
-
-static int accept_port = 988;
-static int accept_backlog = 127;
-static int accept_timeout = 5;
-
-static struct {
- int pta_shutdown;
- struct socket *pta_sock;
- struct completion pta_signal;
-} lnet_acceptor_state = {
- .pta_shutdown = 1
-};
-
-int
-lnet_acceptor_port(void)
-{
- return accept_port;
-}
-EXPORT_SYMBOL(lnet_acceptor_port);
-
-static inline int
-lnet_accept_magic(__u32 magic, __u32 constant)
-{
- return (magic == constant ||
- magic == __swab32(constant));
-}
-
-static char *accept = "secure";
-
-module_param(accept, charp, 0444);
-MODULE_PARM_DESC(accept, "Accept connections (secure|all|none)");
-module_param(accept_port, int, 0444);
-MODULE_PARM_DESC(accept_port, "Acceptor's port (same on all nodes)");
-module_param(accept_backlog, int, 0444);
-MODULE_PARM_DESC(accept_backlog, "Acceptor's listen backlog");
-module_param(accept_timeout, int, 0644);
-MODULE_PARM_DESC(accept_timeout, "Acceptor's timeout (seconds)");
-
-static char *accept_type;
-
-static int
-lnet_acceptor_get_tunables(void)
-{
- /*
- * Userland acceptor uses 'accept_type' instead of 'accept', due to
- * conflict with 'accept(2)', but kernel acceptor still uses 'accept'
- * for compatibility. Hence the trick.
- */
- accept_type = accept;
- return 0;
-}
-
-int
-lnet_acceptor_timeout(void)
-{
- return accept_timeout;
-}
-EXPORT_SYMBOL(lnet_acceptor_timeout);
-
-void
-lnet_connect_console_error(int rc, lnet_nid_t peer_nid,
- __u32 peer_ip, int peer_port)
-{
- switch (rc) {
- /* "normal" errors */
- case -ECONNREFUSED:
- CNETERR("Connection to %s at host %pI4h on port %d was refused: check that Lustre is running on that node.\n",
- libcfs_nid2str(peer_nid),
- &peer_ip, peer_port);
- break;
- case -EHOSTUNREACH:
- case -ENETUNREACH:
- CNETERR("Connection to %s at host %pI4h was unreachable: the network or that node may be down, or Lustre may be misconfigured.\n",
- libcfs_nid2str(peer_nid), &peer_ip);
- break;
- case -ETIMEDOUT:
- CNETERR("Connection to %s at host %pI4h on port %d took too long: that node may be hung or experiencing high load.\n",
- libcfs_nid2str(peer_nid),
- &peer_ip, peer_port);
- break;
- case -ECONNRESET:
- LCONSOLE_ERROR_MSG(0x11b, "Connection to %s at host %pI4h on port %d was reset: is it running a compatible version of Lustre and is %s one of its NIDs?\n",
- libcfs_nid2str(peer_nid),
- &peer_ip, peer_port,
- libcfs_nid2str(peer_nid));
- break;
- case -EPROTO:
- LCONSOLE_ERROR_MSG(0x11c, "Protocol error connecting to %s at host %pI4h on port %d: is it running a compatible version of Lustre?\n",
- libcfs_nid2str(peer_nid),
- &peer_ip, peer_port);
- break;
- case -EADDRINUSE:
- LCONSOLE_ERROR_MSG(0x11d, "No privileged ports available to connect to %s at host %pI4h on port %d\n",
- libcfs_nid2str(peer_nid),
- &peer_ip, peer_port);
- break;
- default:
- LCONSOLE_ERROR_MSG(0x11e, "Unexpected error %d connecting to %s at host %pI4h on port %d\n",
- rc, libcfs_nid2str(peer_nid),
- &peer_ip, peer_port);
- break;
- }
-}
-EXPORT_SYMBOL(lnet_connect_console_error);
-
-int
-lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
- __u32 local_ip, __u32 peer_ip, int peer_port)
-{
- struct lnet_acceptor_connreq cr;
- struct socket *sock;
- int rc;
- int port;
- int fatal;
-
- BUILD_BUG_ON(sizeof(cr) > 16); /* too big to be on the stack */
-
- for (port = LNET_ACCEPTOR_MAX_RESERVED_PORT;
- port >= LNET_ACCEPTOR_MIN_RESERVED_PORT;
- --port) {
- /* Iterate through reserved ports. */
-
- rc = lnet_sock_connect(&sock, &fatal, local_ip, port, peer_ip,
- peer_port);
- if (rc) {
- if (fatal)
- goto failed;
- continue;
- }
-
- BUILD_BUG_ON(LNET_PROTO_ACCEPTOR_VERSION != 1);
-
- cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
- cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
- cr.acr_nid = peer_nid;
-
- if (the_lnet.ln_testprotocompat) {
- /* single-shot proto check */
- lnet_net_lock(LNET_LOCK_EX);
- if (the_lnet.ln_testprotocompat & 4) {
- cr.acr_version++;
- the_lnet.ln_testprotocompat &= ~4;
- }
- if (the_lnet.ln_testprotocompat & 8) {
- cr.acr_magic = LNET_PROTO_MAGIC;
- the_lnet.ln_testprotocompat &= ~8;
- }
- lnet_net_unlock(LNET_LOCK_EX);
- }
-
- rc = lnet_sock_write(sock, &cr, sizeof(cr), accept_timeout);
- if (rc)
- goto failed_sock;
-
- *sockp = sock;
- return 0;
- }
-
- rc = -EADDRINUSE;
- goto failed;
-
- failed_sock:
- sock_release(sock);
- failed:
- lnet_connect_console_error(rc, peer_nid, peer_ip, peer_port);
- return rc;
-}
-EXPORT_SYMBOL(lnet_connect);
-
-static int
-lnet_accept(struct socket *sock, __u32 magic)
-{
- struct lnet_acceptor_connreq cr;
- __u32 peer_ip;
- int peer_port;
- int rc;
- int flip;
- struct lnet_ni *ni;
- char *str;
-
- LASSERT(sizeof(cr) <= 16); /* not too big for the stack */
-
- rc = lnet_sock_getaddr(sock, 1, &peer_ip, &peer_port);
- LASSERT(!rc); /* we succeeded before */
-
- if (!lnet_accept_magic(magic, LNET_PROTO_ACCEPTOR_MAGIC)) {
- if (lnet_accept_magic(magic, LNET_PROTO_MAGIC)) {
- /*
- * future version compatibility!
- * When LNET unifies protocols over all LNDs, the first
- * thing sent will be a version query. I send back
- * LNET_PROTO_ACCEPTOR_MAGIC to tell her I'm "old"
- */
- memset(&cr, 0, sizeof(cr));
- cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
- cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
- rc = lnet_sock_write(sock, &cr, sizeof(cr),
- accept_timeout);
-
- if (rc)
- CERROR("Error sending magic+version in response to LNET magic from %pI4h: %d\n",
- &peer_ip, rc);
- return -EPROTO;
- }
-
- if (lnet_accept_magic(magic, LNET_PROTO_TCP_MAGIC))
- str = "'old' socknal/tcpnal";
- else
- str = "unrecognised";
-
- LCONSOLE_ERROR_MSG(0x11f, "Refusing connection from %pI4h magic %08x: %s acceptor protocol\n",
- &peer_ip, magic, str);
- return -EPROTO;
- }
-
- flip = (magic != LNET_PROTO_ACCEPTOR_MAGIC);
-
- rc = lnet_sock_read(sock, &cr.acr_version, sizeof(cr.acr_version),
- accept_timeout);
- if (rc) {
- CERROR("Error %d reading connection request version from %pI4h\n",
- rc, &peer_ip);
- return -EIO;
- }
-
- if (flip)
- __swab32s(&cr.acr_version);
-
- if (cr.acr_version != LNET_PROTO_ACCEPTOR_VERSION) {
- /*
- * future version compatibility!
- * An acceptor-specific protocol rev will first send a version
- * query. I send back my current version to tell her I'm
- * "old".
- */
- int peer_version = cr.acr_version;
-
- memset(&cr, 0, sizeof(cr));
- cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
- cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
-
- rc = lnet_sock_write(sock, &cr, sizeof(cr), accept_timeout);
- if (rc)
- CERROR("Error sending magic+version in response to version %d from %pI4h: %d\n",
- peer_version, &peer_ip, rc);
- return -EPROTO;
- }
-
- rc = lnet_sock_read(sock, &cr.acr_nid,
- sizeof(cr) -
- offsetof(struct lnet_acceptor_connreq, acr_nid),
- accept_timeout);
- if (rc) {
- CERROR("Error %d reading connection request from %pI4h\n",
- rc, &peer_ip);
- return -EIO;
- }
-
- if (flip)
- __swab64s(&cr.acr_nid);
-
- ni = lnet_net2ni(LNET_NIDNET(cr.acr_nid));
- if (!ni || /* no matching net */
- ni->ni_nid != cr.acr_nid) { /* right NET, wrong NID! */
- if (ni)
- lnet_ni_decref(ni);
- LCONSOLE_ERROR_MSG(0x120, "Refusing connection from %pI4h for %s: No matching NI\n",
- &peer_ip, libcfs_nid2str(cr.acr_nid));
- return -EPERM;
- }
-
- if (!ni->ni_lnd->lnd_accept) {
- /* This catches a request for the loopback LND */
- lnet_ni_decref(ni);
- LCONSOLE_ERROR_MSG(0x121, "Refusing connection from %pI4h for %s: NI doesn not accept IP connections\n",
- &peer_ip, libcfs_nid2str(cr.acr_nid));
- return -EPERM;
- }
-
- CDEBUG(D_NET, "Accept %s from %pI4h\n",
- libcfs_nid2str(cr.acr_nid), &peer_ip);
-
- rc = ni->ni_lnd->lnd_accept(ni, sock);
-
- lnet_ni_decref(ni);
- return rc;
-}
-
-static int
-lnet_acceptor(void *arg)
-{
- struct socket *newsock;
- int rc;
- __u32 magic;
- __u32 peer_ip;
- int peer_port;
- int secure = (int)((long)arg);
-
- LASSERT(!lnet_acceptor_state.pta_sock);
-
- rc = lnet_sock_listen(&lnet_acceptor_state.pta_sock, 0, accept_port,
- accept_backlog);
- if (rc) {
- if (rc == -EADDRINUSE)
- LCONSOLE_ERROR_MSG(0x122, "Can't start acceptor on port %d: port already in use\n",
- accept_port);
- else
- LCONSOLE_ERROR_MSG(0x123, "Can't start acceptor on port %d: unexpected error %d\n",
- accept_port, rc);
-
- lnet_acceptor_state.pta_sock = NULL;
- } else {
- LCONSOLE(0, "Accept %s, port %d\n", accept_type, accept_port);
- }
-
- /* set init status and unblock parent */
- lnet_acceptor_state.pta_shutdown = rc;
- complete(&lnet_acceptor_state.pta_signal);
-
- if (rc)
- return rc;
-
- while (!lnet_acceptor_state.pta_shutdown) {
- rc = lnet_sock_accept(&newsock, lnet_acceptor_state.pta_sock);
- if (rc) {
- if (rc != -EAGAIN) {
- CWARN("Accept error %d: pausing...\n", rc);
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
- }
- continue;
- }
-
- /* maybe the LNet acceptor thread has been waken */
- if (lnet_acceptor_state.pta_shutdown) {
- sock_release(newsock);
- break;
- }
-
- rc = lnet_sock_getaddr(newsock, 1, &peer_ip, &peer_port);
- if (rc) {
- CERROR("Can't determine new connection's address\n");
- goto failed;
- }
-
- if (secure && peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
- CERROR("Refusing connection from %pI4h: insecure port %d\n",
- &peer_ip, peer_port);
- goto failed;
- }
-
- rc = lnet_sock_read(newsock, &magic, sizeof(magic),
- accept_timeout);
- if (rc) {
- CERROR("Error %d reading connection request from %pI4h\n",
- rc, &peer_ip);
- goto failed;
- }
-
- rc = lnet_accept(newsock, magic);
- if (rc)
- goto failed;
-
- continue;
-
-failed:
- sock_release(newsock);
- }
-
- sock_release(lnet_acceptor_state.pta_sock);
- lnet_acceptor_state.pta_sock = NULL;
-
- CDEBUG(D_NET, "Acceptor stopping\n");
-
- /* unblock lnet_acceptor_stop() */
- complete(&lnet_acceptor_state.pta_signal);
- return 0;
-}
-
-static inline int
-accept2secure(const char *acc, long *sec)
-{
- if (!strcmp(acc, "secure")) {
- *sec = 1;
- return 1;
- } else if (!strcmp(acc, "all")) {
- *sec = 0;
- return 1;
- } else if (!strcmp(acc, "none")) {
- return 0;
- }
-
- LCONSOLE_ERROR_MSG(0x124, "Can't parse 'accept=\"%s\"'\n",
- acc);
- return -EINVAL;
-}
-
-int
-lnet_acceptor_start(void)
-{
- struct task_struct *task;
- int rc;
- long rc2;
- long secure;
-
- /* if acceptor is already running return immediately */
- if (!lnet_acceptor_state.pta_shutdown)
- return 0;
-
- LASSERT(!lnet_acceptor_state.pta_sock);
-
- rc = lnet_acceptor_get_tunables();
- if (rc)
- return rc;
-
- init_completion(&lnet_acceptor_state.pta_signal);
- rc = accept2secure(accept_type, &secure);
- if (rc <= 0)
- return rc;
-
- if (!lnet_count_acceptor_nis()) /* not required */
- return 0;
-
- task = kthread_run(lnet_acceptor, (void *)(uintptr_t)secure,
- "acceptor_%03ld", secure);
- if (IS_ERR(task)) {
- rc2 = PTR_ERR(task);
- CERROR("Can't start acceptor thread: %ld\n", rc2);
-
- return -ESRCH;
- }
-
- /* wait for acceptor to startup */
- wait_for_completion(&lnet_acceptor_state.pta_signal);
-
- if (!lnet_acceptor_state.pta_shutdown) {
- /* started OK */
- LASSERT(lnet_acceptor_state.pta_sock);
- return 0;
- }
-
- LASSERT(!lnet_acceptor_state.pta_sock);
-
- return -ENETDOWN;
-}
-
-void
-lnet_acceptor_stop(void)
-{
- struct sock *sk;
-
- if (lnet_acceptor_state.pta_shutdown) /* not running */
- return;
-
- lnet_acceptor_state.pta_shutdown = 1;
-
- sk = lnet_acceptor_state.pta_sock->sk;
-
- /* awake any sleepers using safe method */
- sk->sk_state_change(sk);
-
- /* block until acceptor signals exit */
- wait_for_completion(&lnet_acceptor_state.pta_signal);
-}
diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
deleted file mode 100644
index f9ed6977056c..000000000000
--- a/drivers/staging/lustre/lnet/lnet/api-ni.c
+++ /dev/null
@@ -1,2307 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#include <linux/log2.h>
-#include <linux/ktime.h>
-
-#include <linux/lnet/lib-lnet.h>
-#include <uapi/linux/lnet/lnet-dlc.h>
-
-#define D_LNI D_CONSOLE
-
-struct lnet the_lnet; /* THE state of the network */
-EXPORT_SYMBOL(the_lnet);
-
-static char *ip2nets = "";
-module_param(ip2nets, charp, 0444);
-MODULE_PARM_DESC(ip2nets, "LNET network <- IP table");
-
-static char *networks = "";
-module_param(networks, charp, 0444);
-MODULE_PARM_DESC(networks, "local networks");
-
-static char *routes = "";
-module_param(routes, charp, 0444);
-MODULE_PARM_DESC(routes, "routes to non-local networks");
-
-static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
-module_param(rnet_htable_size, int, 0444);
-MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
-
-static int lnet_ping(struct lnet_process_id id, int timeout_ms,
- struct lnet_process_id __user *ids, int n_ids);
-
-static char *
-lnet_get_routes(void)
-{
- return routes;
-}
-
-static char *
-lnet_get_networks(void)
-{
- char *nets;
- int rc;
-
- if (*networks && *ip2nets) {
- LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or 'ip2nets' but not both at once\n");
- return NULL;
- }
-
- if (*ip2nets) {
- rc = lnet_parse_ip2nets(&nets, ip2nets);
- return !rc ? nets : NULL;
- }
-
- if (*networks)
- return networks;
-
- return "tcp";
-}
-
-static void
-lnet_init_locks(void)
-{
- spin_lock_init(&the_lnet.ln_eq_wait_lock);
- init_waitqueue_head(&the_lnet.ln_eq_waitq);
- init_waitqueue_head(&the_lnet.ln_rc_waitq);
- mutex_init(&the_lnet.ln_lnd_mutex);
- mutex_init(&the_lnet.ln_api_mutex);
-}
-
-static int
-lnet_create_remote_nets_table(void)
-{
- int i;
- struct list_head *hash;
-
- LASSERT(!the_lnet.ln_remote_nets_hash);
- LASSERT(the_lnet.ln_remote_nets_hbits > 0);
- hash = kvmalloc_array(LNET_REMOTE_NETS_HASH_SIZE, sizeof(*hash),
- GFP_KERNEL);
- if (!hash) {
- CERROR("Failed to create remote nets hash table\n");
- return -ENOMEM;
- }
-
- for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
- INIT_LIST_HEAD(&hash[i]);
- the_lnet.ln_remote_nets_hash = hash;
- return 0;
-}
-
-static void
-lnet_destroy_remote_nets_table(void)
-{
- int i;
-
- if (!the_lnet.ln_remote_nets_hash)
- return;
-
- for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
- LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
-
- kvfree(the_lnet.ln_remote_nets_hash);
- the_lnet.ln_remote_nets_hash = NULL;
-}
-
-static void
-lnet_destroy_locks(void)
-{
- if (the_lnet.ln_res_lock) {
- cfs_percpt_lock_free(the_lnet.ln_res_lock);
- the_lnet.ln_res_lock = NULL;
- }
-
- if (the_lnet.ln_net_lock) {
- cfs_percpt_lock_free(the_lnet.ln_net_lock);
- the_lnet.ln_net_lock = NULL;
- }
-}
-
-static int
-lnet_create_locks(void)
-{
- lnet_init_locks();
-
- the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
- if (!the_lnet.ln_res_lock)
- goto failed;
-
- the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
- if (!the_lnet.ln_net_lock)
- goto failed;
-
- return 0;
-
- failed:
- lnet_destroy_locks();
- return -ENOMEM;
-}
-
-static void lnet_assert_wire_constants(void)
-{
- /*
- * Wire protocol assertions generated by 'wirecheck'
- * running on Linux robert.bartonsoftware.com 2.6.8-1.521
- * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
- * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7)
- */
-
- /* Constants... */
- BUILD_BUG_ON(LNET_PROTO_TCP_MAGIC != 0xeebc0ded);
- BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MAJOR != 1);
- BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MINOR != 0);
- BUILD_BUG_ON(LNET_MSG_ACK != 0);
- BUILD_BUG_ON(LNET_MSG_PUT != 1);
- BUILD_BUG_ON(LNET_MSG_GET != 2);
- BUILD_BUG_ON(LNET_MSG_REPLY != 3);
- BUILD_BUG_ON(LNET_MSG_HELLO != 4);
-
- /* Checks for struct ptl_handle_wire_t */
- BUILD_BUG_ON((int)sizeof(struct lnet_handle_wire) != 16);
- BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire, wh_interface_cookie) != 0);
- BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_interface_cookie) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire, wh_object_cookie) != 8);
- BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_object_cookie) != 8);
-
- /* Checks for struct struct lnet_magicversion */
- BUILD_BUG_ON((int)sizeof(struct lnet_magicversion) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, magic) != 0);
- BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->magic) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, version_major) != 4);
- BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_major) != 2);
- BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, version_minor) != 6);
- BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_minor) != 2);
-
- /* Checks for struct struct lnet_hdr */
- BUILD_BUG_ON((int)sizeof(struct lnet_hdr) != 72);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, dest_nid) != 0);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->dest_nid) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, src_nid) != 8);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->src_nid) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, dest_pid) != 16);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->dest_pid) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, src_pid) != 20);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->src_pid) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, type) != 24);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->type) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, payload_length) != 28);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->payload_length) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg) != 32);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg) != 40);
-
- /* Ack */
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.ack.dst_wmd) != 32);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.ack.dst_wmd) != 16);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.ack.match_bits) != 48);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.ack.match_bits) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.ack.mlength) != 56);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.ack.mlength) != 4);
-
- /* Put */
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.ack_wmd) != 32);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.ack_wmd) != 16);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.match_bits) != 48);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.match_bits) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.hdr_data) != 56);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.hdr_data) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.ptl_index) != 64);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.ptl_index) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.offset) != 68);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.offset) != 4);
-
- /* Get */
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.return_wmd) != 32);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.return_wmd) != 16);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.match_bits) != 48);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.match_bits) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.ptl_index) != 56);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.ptl_index) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.src_offset) != 60);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.src_offset) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.sink_length) != 64);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.sink_length) != 4);
-
- /* Reply */
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.reply.dst_wmd) != 32);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.reply.dst_wmd) != 16);
-
- /* Hello */
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.hello.incarnation) != 32);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.hello.incarnation) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.hello.type) != 40);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.hello.type) != 4);
-}
-
-static struct lnet_lnd *
-lnet_find_lnd_by_type(__u32 type)
-{
- struct lnet_lnd *lnd;
- struct list_head *tmp;
-
- /* holding lnd mutex */
- list_for_each(tmp, &the_lnet.ln_lnds) {
- lnd = list_entry(tmp, struct lnet_lnd, lnd_list);
-
- if (lnd->lnd_type == type)
- return lnd;
- }
-
- return NULL;
-}
-
-void
-lnet_register_lnd(struct lnet_lnd *lnd)
-{
- mutex_lock(&the_lnet.ln_lnd_mutex);
-
- LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
- LASSERT(!lnet_find_lnd_by_type(lnd->lnd_type));
-
- list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds);
- lnd->lnd_refcount = 0;
-
- CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
-
- mutex_unlock(&the_lnet.ln_lnd_mutex);
-}
-EXPORT_SYMBOL(lnet_register_lnd);
-
-void
-lnet_unregister_lnd(struct lnet_lnd *lnd)
-{
- mutex_lock(&the_lnet.ln_lnd_mutex);
-
- LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
- LASSERT(!lnd->lnd_refcount);
-
- list_del(&lnd->lnd_list);
- CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
-
- mutex_unlock(&the_lnet.ln_lnd_mutex);
-}
-EXPORT_SYMBOL(lnet_unregister_lnd);
-
-void
-lnet_counters_get(struct lnet_counters *counters)
-{
- struct lnet_counters *ctr;
- int i;
-
- memset(counters, 0, sizeof(*counters));
-
- lnet_net_lock(LNET_LOCK_EX);
-
- cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
- counters->msgs_max += ctr->msgs_max;
- counters->msgs_alloc += ctr->msgs_alloc;
- counters->errors += ctr->errors;
- counters->send_count += ctr->send_count;
- counters->recv_count += ctr->recv_count;
- counters->route_count += ctr->route_count;
- counters->drop_count += ctr->drop_count;
- counters->send_length += ctr->send_length;
- counters->recv_length += ctr->recv_length;
- counters->route_length += ctr->route_length;
- counters->drop_length += ctr->drop_length;
- }
- lnet_net_unlock(LNET_LOCK_EX);
-}
-EXPORT_SYMBOL(lnet_counters_get);
-
-void
-lnet_counters_reset(void)
-{
- struct lnet_counters *counters;
- int i;
-
- lnet_net_lock(LNET_LOCK_EX);
-
- cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
- memset(counters, 0, sizeof(struct lnet_counters));
-
- lnet_net_unlock(LNET_LOCK_EX);
-}
-
-static char *
-lnet_res_type2str(int type)
-{
- switch (type) {
- default:
- LBUG();
- case LNET_COOKIE_TYPE_MD:
- return "MD";
- case LNET_COOKIE_TYPE_ME:
- return "ME";
- case LNET_COOKIE_TYPE_EQ:
- return "EQ";
- }
-}
-
-static void
-lnet_res_container_cleanup(struct lnet_res_container *rec)
-{
- int count = 0;
-
- if (!rec->rec_type) /* not set yet, it's uninitialized */
- return;
-
- while (!list_empty(&rec->rec_active)) {
- struct list_head *e = rec->rec_active.next;
-
- list_del_init(e);
- if (rec->rec_type == LNET_COOKIE_TYPE_EQ) {
- kfree(list_entry(e, struct lnet_eq, eq_list));
-
- } else if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
- kfree(list_entry(e, struct lnet_libmd, md_list));
-
- } else { /* NB: Active MEs should be attached on portals */
- LBUG();
- }
- count++;
- }
-
- if (count > 0) {
- /*
- * Found alive MD/ME/EQ, user really should unlink/free
- * all of them before finalize LNet, but if someone didn't,
- * we have to recycle garbage for him
- */
- CERROR("%d active elements on exit of %s container\n",
- count, lnet_res_type2str(rec->rec_type));
- }
-
- kfree(rec->rec_lh_hash);
- rec->rec_lh_hash = NULL;
-
- rec->rec_type = 0; /* mark it as finalized */
-}
-
-static int
-lnet_res_container_setup(struct lnet_res_container *rec, int cpt, int type)
-{
- int rc = 0;
- int i;
-
- LASSERT(!rec->rec_type);
-
- rec->rec_type = type;
- INIT_LIST_HEAD(&rec->rec_active);
- rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
-
- /* Arbitrary choice of hash table size */
- rec->rec_lh_hash = kvmalloc_cpt(LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]),
- GFP_KERNEL, cpt);
- if (!rec->rec_lh_hash) {
- rc = -ENOMEM;
- goto out;
- }
-
- for (i = 0; i < LNET_LH_HASH_SIZE; i++)
- INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
-
- return 0;
-
-out:
- CERROR("Failed to setup %s resource container\n",
- lnet_res_type2str(type));
- lnet_res_container_cleanup(rec);
- return rc;
-}
-
-static void
-lnet_res_containers_destroy(struct lnet_res_container **recs)
-{
- struct lnet_res_container *rec;
- int i;
-
- cfs_percpt_for_each(rec, i, recs)
- lnet_res_container_cleanup(rec);
-
- cfs_percpt_free(recs);
-}
-
-static struct lnet_res_container **
-lnet_res_containers_create(int type)
-{
- struct lnet_res_container **recs;
- struct lnet_res_container *rec;
- int rc;
- int i;
-
- recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
- if (!recs) {
- CERROR("Failed to allocate %s resource containers\n",
- lnet_res_type2str(type));
- return NULL;
- }
-
- cfs_percpt_for_each(rec, i, recs) {
- rc = lnet_res_container_setup(rec, i, type);
- if (rc) {
- lnet_res_containers_destroy(recs);
- return NULL;
- }
- }
-
- return recs;
-}
-
-struct lnet_libhandle *
-lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
-{
- /* ALWAYS called with lnet_res_lock held */
- struct list_head *head;
- struct lnet_libhandle *lh;
- unsigned int hash;
-
- if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
- return NULL;
-
- hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
- head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
-
- list_for_each_entry(lh, head, lh_hash_chain) {
- if (lh->lh_cookie == cookie)
- return lh;
- }
-
- return NULL;
-}
-
-void
-lnet_res_lh_initialize(struct lnet_res_container *rec,
- struct lnet_libhandle *lh)
-{
- /* ALWAYS called with lnet_res_lock held */
- unsigned int ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
- unsigned int hash;
-
- lh->lh_cookie = rec->rec_lh_cookie;
- rec->rec_lh_cookie += 1 << ibits;
-
- hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
-
- list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
-}
-
-static int lnet_unprepare(void);
-
-static int
-lnet_prepare(lnet_pid_t requested_pid)
-{
- /* Prepare to bring up the network */
- struct lnet_res_container **recs;
- int rc = 0;
-
- if (requested_pid == LNET_PID_ANY) {
- /* Don't instantiate LNET just for me */
- return -ENETDOWN;
- }
-
- LASSERT(!the_lnet.ln_refcount);
-
- the_lnet.ln_routing = 0;
-
- LASSERT(!(requested_pid & LNET_PID_USERFLAG));
- the_lnet.ln_pid = requested_pid;
-
- INIT_LIST_HEAD(&the_lnet.ln_test_peers);
- INIT_LIST_HEAD(&the_lnet.ln_nis);
- INIT_LIST_HEAD(&the_lnet.ln_nis_cpt);
- INIT_LIST_HEAD(&the_lnet.ln_nis_zombie);
- INIT_LIST_HEAD(&the_lnet.ln_routers);
- INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
- INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
-
- rc = lnet_create_remote_nets_table();
- if (rc)
- goto failed;
- /*
- * NB the interface cookie in wire handles guards against delayed
- * replies and ACKs appearing valid after reboot.
- */
- the_lnet.ln_interface_cookie = ktime_get_ns();
-
- the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(struct lnet_counters));
- if (!the_lnet.ln_counters) {
- CERROR("Failed to allocate counters for LNet\n");
- rc = -ENOMEM;
- goto failed;
- }
-
- rc = lnet_peer_tables_create();
- if (rc)
- goto failed;
-
- rc = lnet_msg_containers_create();
- if (rc)
- goto failed;
-
- rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
- LNET_COOKIE_TYPE_EQ);
- if (rc)
- goto failed;
-
- recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME);
- if (!recs) {
- rc = -ENOMEM;
- goto failed;
- }
-
- the_lnet.ln_me_containers = recs;
-
- recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD);
- if (!recs) {
- rc = -ENOMEM;
- goto failed;
- }
-
- the_lnet.ln_md_containers = recs;
-
- rc = lnet_portals_create();
- if (rc) {
- CERROR("Failed to create portals for LNet: %d\n", rc);
- goto failed;
- }
-
- return 0;
-
- failed:
- lnet_unprepare();
- return rc;
-}
-
-static int
-lnet_unprepare(void)
-{
- /*
- * NB no LNET_LOCK since this is the last reference. All LND instances
- * have shut down already, so it is safe to unlink and free all
- * descriptors, even those that appear committed to a network op (eg MD
- * with non-zero pending count)
- */
- lnet_fail_nid(LNET_NID_ANY, 0);
-
- LASSERT(!the_lnet.ln_refcount);
- LASSERT(list_empty(&the_lnet.ln_test_peers));
- LASSERT(list_empty(&the_lnet.ln_nis));
- LASSERT(list_empty(&the_lnet.ln_nis_cpt));
- LASSERT(list_empty(&the_lnet.ln_nis_zombie));
-
- lnet_portals_destroy();
-
- if (the_lnet.ln_md_containers) {
- lnet_res_containers_destroy(the_lnet.ln_md_containers);
- the_lnet.ln_md_containers = NULL;
- }
-
- if (the_lnet.ln_me_containers) {
- lnet_res_containers_destroy(the_lnet.ln_me_containers);
- the_lnet.ln_me_containers = NULL;
- }
-
- lnet_res_container_cleanup(&the_lnet.ln_eq_container);
-
- lnet_msg_containers_destroy();
- lnet_peer_tables_destroy();
- lnet_rtrpools_free(0);
-
- if (the_lnet.ln_counters) {
- cfs_percpt_free(the_lnet.ln_counters);
- the_lnet.ln_counters = NULL;
- }
- lnet_destroy_remote_nets_table();
-
- return 0;
-}
-
-struct lnet_ni *
-lnet_net2ni_locked(__u32 net, int cpt)
-{
- struct list_head *tmp;
- struct lnet_ni *ni;
-
- LASSERT(cpt != LNET_LOCK_EX);
-
- list_for_each(tmp, &the_lnet.ln_nis) {
- ni = list_entry(tmp, struct lnet_ni, ni_list);
-
- if (LNET_NIDNET(ni->ni_nid) == net) {
- lnet_ni_addref_locked(ni, cpt);
- return ni;
- }
- }
-
- return NULL;
-}
-
-struct lnet_ni *
-lnet_net2ni(__u32 net)
-{
- struct lnet_ni *ni;
-
- lnet_net_lock(0);
- ni = lnet_net2ni_locked(net, 0);
- lnet_net_unlock(0);
-
- return ni;
-}
-EXPORT_SYMBOL(lnet_net2ni);
-
-static unsigned int
-lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
-{
- __u64 key = nid;
- unsigned int val;
-
- LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
-
- if (number == 1)
- return 0;
-
- val = hash_long(key, LNET_CPT_BITS);
- /* NB: LNET_CP_NUMBER doesn't have to be PO2 */
- if (val < number)
- return val;
-
- return (unsigned int)(key + val + (val >> 1)) % number;
-}
-
-int
-lnet_cpt_of_nid_locked(lnet_nid_t nid)
-{
- struct lnet_ni *ni;
-
- /* must called with hold of lnet_net_lock */
- if (LNET_CPT_NUMBER == 1)
- return 0; /* the only one */
-
- /* take lnet_net_lock(any) would be OK */
- if (!list_empty(&the_lnet.ln_nis_cpt)) {
- list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) {
- if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid))
- continue;
-
- LASSERT(ni->ni_cpts);
- return ni->ni_cpts[lnet_nid_cpt_hash
- (nid, ni->ni_ncpts)];
- }
- }
-
- return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
-}
-
-int
-lnet_cpt_of_nid(lnet_nid_t nid)
-{
- int cpt;
- int cpt2;
-
- if (LNET_CPT_NUMBER == 1)
- return 0; /* the only one */
-
- if (list_empty(&the_lnet.ln_nis_cpt))
- return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
-
- cpt = lnet_net_lock_current();
- cpt2 = lnet_cpt_of_nid_locked(nid);
- lnet_net_unlock(cpt);
-
- return cpt2;
-}
-EXPORT_SYMBOL(lnet_cpt_of_nid);
-
-int
-lnet_islocalnet(__u32 net)
-{
- struct lnet_ni *ni;
- int cpt;
-
- cpt = lnet_net_lock_current();
-
- ni = lnet_net2ni_locked(net, cpt);
- if (ni)
- lnet_ni_decref_locked(ni, cpt);
-
- lnet_net_unlock(cpt);
-
- return !!ni;
-}
-
-struct lnet_ni *
-lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
-{
- struct lnet_ni *ni;
- struct list_head *tmp;
-
- LASSERT(cpt != LNET_LOCK_EX);
-
- list_for_each(tmp, &the_lnet.ln_nis) {
- ni = list_entry(tmp, struct lnet_ni, ni_list);
-
- if (ni->ni_nid == nid) {
- lnet_ni_addref_locked(ni, cpt);
- return ni;
- }
- }
-
- return NULL;
-}
-
-int
-lnet_islocalnid(lnet_nid_t nid)
-{
- struct lnet_ni *ni;
- int cpt;
-
- cpt = lnet_net_lock_current();
- ni = lnet_nid2ni_locked(nid, cpt);
- if (ni)
- lnet_ni_decref_locked(ni, cpt);
- lnet_net_unlock(cpt);
-
- return !!ni;
-}
-
-int
-lnet_count_acceptor_nis(void)
-{
- /* Return the # of NIs that need the acceptor. */
- int count = 0;
- struct list_head *tmp;
- struct lnet_ni *ni;
- int cpt;
-
- cpt = lnet_net_lock_current();
- list_for_each(tmp, &the_lnet.ln_nis) {
- ni = list_entry(tmp, struct lnet_ni, ni_list);
-
- if (ni->ni_lnd->lnd_accept)
- count++;
- }
-
- lnet_net_unlock(cpt);
-
- return count;
-}
-
-static struct lnet_ping_info *
-lnet_ping_info_create(int num_ni)
-{
- struct lnet_ping_info *ping_info;
- unsigned int infosz;
-
- infosz = offsetof(struct lnet_ping_info, pi_ni[num_ni]);
- ping_info = kvzalloc(infosz, GFP_KERNEL);
- if (!ping_info) {
- CERROR("Can't allocate ping info[%d]\n", num_ni);
- return NULL;
- }
-
- ping_info->pi_nnis = num_ni;
- ping_info->pi_pid = the_lnet.ln_pid;
- ping_info->pi_magic = LNET_PROTO_PING_MAGIC;
- ping_info->pi_features = LNET_PING_FEAT_NI_STATUS;
-
- return ping_info;
-}
-
-static inline int
-lnet_get_ni_count(void)
-{
- struct lnet_ni *ni;
- int count = 0;
-
- lnet_net_lock(0);
-
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list)
- count++;
-
- lnet_net_unlock(0);
-
- return count;
-}
-
-static inline void
-lnet_ping_info_free(struct lnet_ping_info *pinfo)
-{
- kvfree(pinfo);
-}
-
-static void
-lnet_ping_info_destroy(void)
-{
- struct lnet_ni *ni;
-
- lnet_net_lock(LNET_LOCK_EX);
-
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
- lnet_ni_lock(ni);
- ni->ni_status = NULL;
- lnet_ni_unlock(ni);
- }
-
- lnet_ping_info_free(the_lnet.ln_ping_info);
- the_lnet.ln_ping_info = NULL;
-
- lnet_net_unlock(LNET_LOCK_EX);
-}
-
-static void
-lnet_ping_event_handler(struct lnet_event *event)
-{
- struct lnet_ping_info *pinfo = event->md.user_ptr;
-
- if (event->unlinked)
- pinfo->pi_features = LNET_PING_FEAT_INVAL;
-}
-
-static int
-lnet_ping_info_setup(struct lnet_ping_info **ppinfo,
- struct lnet_handle_md *md_handle,
- int ni_count, bool set_eq)
-{
- struct lnet_process_id id = {LNET_NID_ANY, LNET_PID_ANY};
- struct lnet_handle_me me_handle;
- struct lnet_md md = { NULL };
- int rc, rc2;
-
- if (set_eq) {
- rc = LNetEQAlloc(0, lnet_ping_event_handler,
- &the_lnet.ln_ping_target_eq);
- if (rc) {
- CERROR("Can't allocate ping EQ: %d\n", rc);
- return rc;
- }
- }
-
- *ppinfo = lnet_ping_info_create(ni_count);
- if (!*ppinfo) {
- rc = -ENOMEM;
- goto failed_0;
- }
-
- rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
- LNET_PROTO_PING_MATCHBITS, 0,
- LNET_UNLINK, LNET_INS_AFTER,
- &me_handle);
- if (rc) {
- CERROR("Can't create ping ME: %d\n", rc);
- goto failed_1;
- }
-
- /* initialize md content */
- md.start = *ppinfo;
- md.length = offsetof(struct lnet_ping_info,
- pi_ni[(*ppinfo)->pi_nnis]);
- md.threshold = LNET_MD_THRESH_INF;
- md.max_size = 0;
- md.options = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
- LNET_MD_MANAGE_REMOTE;
- md.user_ptr = NULL;
- md.eq_handle = the_lnet.ln_ping_target_eq;
- md.user_ptr = *ppinfo;
-
- rc = LNetMDAttach(me_handle, md, LNET_RETAIN, md_handle);
- if (rc) {
- CERROR("Can't attach ping MD: %d\n", rc);
- goto failed_2;
- }
-
- return 0;
-
-failed_2:
- rc2 = LNetMEUnlink(me_handle);
- LASSERT(!rc2);
-failed_1:
- lnet_ping_info_free(*ppinfo);
- *ppinfo = NULL;
-failed_0:
- if (set_eq)
- LNetEQFree(the_lnet.ln_ping_target_eq);
- return rc;
-}
-
-static void
-lnet_ping_md_unlink(struct lnet_ping_info *pinfo,
- struct lnet_handle_md *md_handle)
-{
- LNetMDUnlink(*md_handle);
- LNetInvalidateMDHandle(md_handle);
-
- /* NB md could be busy; this just starts the unlink */
- while (pinfo->pi_features != LNET_PING_FEAT_INVAL) {
- CDEBUG(D_NET, "Still waiting for ping MD to unlink\n");
- set_current_state(TASK_NOLOAD);
- schedule_timeout(HZ);
- }
-}
-
-static void
-lnet_ping_info_install_locked(struct lnet_ping_info *ping_info)
-{
- struct lnet_ni_status *ns;
- struct lnet_ni *ni;
- int i = 0;
-
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
- LASSERT(i < ping_info->pi_nnis);
-
- ns = &ping_info->pi_ni[i];
-
- ns->ns_nid = ni->ni_nid;
-
- lnet_ni_lock(ni);
- ns->ns_status = (ni->ni_status) ?
- ni->ni_status->ns_status : LNET_NI_STATUS_UP;
- ni->ni_status = ns;
- lnet_ni_unlock(ni);
-
- i++;
- }
-}
-
-static void
-lnet_ping_target_update(struct lnet_ping_info *pinfo,
- struct lnet_handle_md md_handle)
-{
- struct lnet_ping_info *old_pinfo = NULL;
- struct lnet_handle_md old_md;
-
- /* switch the NIs to point to the new ping info created */
- lnet_net_lock(LNET_LOCK_EX);
-
- if (!the_lnet.ln_routing)
- pinfo->pi_features |= LNET_PING_FEAT_RTE_DISABLED;
- lnet_ping_info_install_locked(pinfo);
-
- if (the_lnet.ln_ping_info) {
- old_pinfo = the_lnet.ln_ping_info;
- old_md = the_lnet.ln_ping_target_md;
- }
- the_lnet.ln_ping_target_md = md_handle;
- the_lnet.ln_ping_info = pinfo;
-
- lnet_net_unlock(LNET_LOCK_EX);
-
- if (old_pinfo) {
- /* unlink the old ping info */
- lnet_ping_md_unlink(old_pinfo, &old_md);
- lnet_ping_info_free(old_pinfo);
- }
-}
-
-static void
-lnet_ping_target_fini(void)
-{
- int rc;
-
- lnet_ping_md_unlink(the_lnet.ln_ping_info,
- &the_lnet.ln_ping_target_md);
-
- rc = LNetEQFree(the_lnet.ln_ping_target_eq);
- LASSERT(!rc);
-
- lnet_ping_info_destroy();
-}
-
-static int
-lnet_ni_tq_credits(struct lnet_ni *ni)
-{
- int credits;
-
- LASSERT(ni->ni_ncpts >= 1);
-
- if (ni->ni_ncpts == 1)
- return ni->ni_maxtxcredits;
-
- credits = ni->ni_maxtxcredits / ni->ni_ncpts;
- credits = max(credits, 8 * ni->ni_peertxcredits);
- credits = min(credits, ni->ni_maxtxcredits);
-
- return credits;
-}
-
-static void
-lnet_ni_unlink_locked(struct lnet_ni *ni)
-{
- if (!list_empty(&ni->ni_cptlist)) {
- list_del_init(&ni->ni_cptlist);
- lnet_ni_decref_locked(ni, 0);
- }
-
- /* move it to zombie list and nobody can find it anymore */
- LASSERT(!list_empty(&ni->ni_list));
- list_move(&ni->ni_list, &the_lnet.ln_nis_zombie);
- lnet_ni_decref_locked(ni, 0); /* drop ln_nis' ref */
-}
-
-static void
-lnet_clear_zombies_nis_locked(void)
-{
- int i;
- int islo;
- struct lnet_ni *ni;
- struct lnet_ni *temp;
-
- /*
- * Now wait for the NI's I just nuked to show up on ln_zombie_nis
- * and shut them down in guaranteed thread context
- */
- i = 2;
- list_for_each_entry_safe(ni, temp, &the_lnet.ln_nis_zombie, ni_list) {
- int *ref;
- int j;
-
- list_del_init(&ni->ni_list);
- cfs_percpt_for_each(ref, j, ni->ni_refs) {
- if (!*ref)
- continue;
- /* still busy, add it back to zombie list */
- list_add(&ni->ni_list, &the_lnet.ln_nis_zombie);
- break;
- }
-
- if (!list_empty(&ni->ni_list)) {
- lnet_net_unlock(LNET_LOCK_EX);
- ++i;
- if ((i & (-i)) == i) {
- CDEBUG(D_WARNING, "Waiting for zombie LNI %s\n",
- libcfs_nid2str(ni->ni_nid));
- }
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
- lnet_net_lock(LNET_LOCK_EX);
- continue;
- }
-
- ni->ni_lnd->lnd_refcount--;
- lnet_net_unlock(LNET_LOCK_EX);
-
- islo = ni->ni_lnd->lnd_type == LOLND;
-
- LASSERT(!in_interrupt());
- ni->ni_lnd->lnd_shutdown(ni);
-
- /*
- * can't deref lnd anymore now; it might have unregistered
- * itself...
- */
- if (!islo)
- CDEBUG(D_LNI, "Removed LNI %s\n",
- libcfs_nid2str(ni->ni_nid));
-
- lnet_ni_free(ni);
- i = 2;
-
- lnet_net_lock(LNET_LOCK_EX);
- }
-}
-
-static void
-lnet_shutdown_lndnis(void)
-{
- struct lnet_ni *ni;
- struct lnet_ni *temp;
- int i;
-
- /* NB called holding the global mutex */
-
- /* All quiet on the API front */
- LASSERT(!the_lnet.ln_shutdown);
- LASSERT(!the_lnet.ln_refcount);
- LASSERT(list_empty(&the_lnet.ln_nis_zombie));
-
- lnet_net_lock(LNET_LOCK_EX);
- the_lnet.ln_shutdown = 1; /* flag shutdown */
-
- /* Unlink NIs from the global table */
- list_for_each_entry_safe(ni, temp, &the_lnet.ln_nis, ni_list) {
- lnet_ni_unlink_locked(ni);
- }
-
- /* Drop the cached loopback NI. */
- if (the_lnet.ln_loni) {
- lnet_ni_decref_locked(the_lnet.ln_loni, 0);
- the_lnet.ln_loni = NULL;
- }
-
- lnet_net_unlock(LNET_LOCK_EX);
-
- /*
- * Clear lazy portals and drop delayed messages which hold refs
- * on their lnet_msg::msg_rxpeer
- */
- for (i = 0; i < the_lnet.ln_nportals; i++)
- LNetClearLazyPortal(i);
-
- /*
- * Clear the peer table and wait for all peers to go (they hold refs on
- * their NIs)
- */
- lnet_peer_tables_cleanup(NULL);
-
- lnet_net_lock(LNET_LOCK_EX);
-
- lnet_clear_zombies_nis_locked();
- the_lnet.ln_shutdown = 0;
- lnet_net_unlock(LNET_LOCK_EX);
-}
-
-/* shutdown down the NI and release refcount */
-static void
-lnet_shutdown_lndni(struct lnet_ni *ni)
-{
- int i;
-
- lnet_net_lock(LNET_LOCK_EX);
- lnet_ni_unlink_locked(ni);
- lnet_net_unlock(LNET_LOCK_EX);
-
- /* clear messages for this NI on the lazy portal */
- for (i = 0; i < the_lnet.ln_nportals; i++)
- lnet_clear_lazy_portal(ni, i, "Shutting down NI");
-
- /* Do peer table cleanup for this ni */
- lnet_peer_tables_cleanup(ni);
-
- lnet_net_lock(LNET_LOCK_EX);
- lnet_clear_zombies_nis_locked();
- lnet_net_unlock(LNET_LOCK_EX);
-}
-
-static int
-lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf)
-{
- struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL;
- int rc = -EINVAL;
- int lnd_type;
- struct lnet_lnd *lnd;
- struct lnet_tx_queue *tq;
- int i;
- u32 seed;
-
- lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
-
- LASSERT(libcfs_isknown_lnd(lnd_type));
-
- if (lnd_type == CIBLND || lnd_type == OPENIBLND ||
- lnd_type == IIBLND || lnd_type == VIBLND) {
- CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type));
- goto failed0;
- }
-
- /* Make sure this new NI is unique. */
- lnet_net_lock(LNET_LOCK_EX);
- rc = lnet_net_unique(LNET_NIDNET(ni->ni_nid), &the_lnet.ln_nis);
- lnet_net_unlock(LNET_LOCK_EX);
- if (!rc) {
- if (lnd_type == LOLND) {
- lnet_ni_free(ni);
- return 0;
- }
-
- CERROR("Net %s is not unique\n",
- libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
- rc = -EEXIST;
- goto failed0;
- }
-
- mutex_lock(&the_lnet.ln_lnd_mutex);
- lnd = lnet_find_lnd_by_type(lnd_type);
-
- if (!lnd) {
- mutex_unlock(&the_lnet.ln_lnd_mutex);
- rc = request_module("%s", libcfs_lnd2modname(lnd_type));
- mutex_lock(&the_lnet.ln_lnd_mutex);
-
- lnd = lnet_find_lnd_by_type(lnd_type);
- if (!lnd) {
- mutex_unlock(&the_lnet.ln_lnd_mutex);
- CERROR("Can't load LND %s, module %s, rc=%d\n",
- libcfs_lnd2str(lnd_type),
- libcfs_lnd2modname(lnd_type), rc);
- rc = -EINVAL;
- goto failed0;
- }
- }
-
- lnet_net_lock(LNET_LOCK_EX);
- lnd->lnd_refcount++;
- lnet_net_unlock(LNET_LOCK_EX);
-
- ni->ni_lnd = lnd;
-
- if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf))
- lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk;
-
- if (lnd_tunables) {
- ni->ni_lnd_tunables = kzalloc(sizeof(*ni->ni_lnd_tunables),
- GFP_NOFS);
- if (!ni->ni_lnd_tunables) {
- mutex_unlock(&the_lnet.ln_lnd_mutex);
- rc = -ENOMEM;
- goto failed0;
- }
- memcpy(ni->ni_lnd_tunables, lnd_tunables,
- sizeof(*ni->ni_lnd_tunables));
- }
-
- /*
- * If given some LND tunable parameters, parse those now to
- * override the values in the NI structure.
- */
- if (conf) {
- if (conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0)
- ni->ni_peerrtrcredits =
- conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
- if (conf->cfg_config_u.cfg_net.net_peer_timeout >= 0)
- ni->ni_peertimeout =
- conf->cfg_config_u.cfg_net.net_peer_timeout;
- if (conf->cfg_config_u.cfg_net.net_peer_tx_credits != -1)
- ni->ni_peertxcredits =
- conf->cfg_config_u.cfg_net.net_peer_tx_credits;
- if (conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0)
- ni->ni_maxtxcredits =
- conf->cfg_config_u.cfg_net.net_max_tx_credits;
- }
-
- rc = lnd->lnd_startup(ni);
-
- mutex_unlock(&the_lnet.ln_lnd_mutex);
-
- if (rc) {
- LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
- rc, libcfs_lnd2str(lnd->lnd_type));
- lnet_net_lock(LNET_LOCK_EX);
- lnd->lnd_refcount--;
- lnet_net_unlock(LNET_LOCK_EX);
- goto failed0;
- }
-
- LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query);
-
- lnet_net_lock(LNET_LOCK_EX);
- /* refcount for ln_nis */
- lnet_ni_addref_locked(ni, 0);
- list_add_tail(&ni->ni_list, &the_lnet.ln_nis);
- if (ni->ni_cpts) {
- lnet_ni_addref_locked(ni, 0);
- list_add_tail(&ni->ni_cptlist, &the_lnet.ln_nis_cpt);
- }
-
- lnet_net_unlock(LNET_LOCK_EX);
-
- if (lnd->lnd_type == LOLND) {
- lnet_ni_addref(ni);
- LASSERT(!the_lnet.ln_loni);
- the_lnet.ln_loni = ni;
- return 0;
- }
-
- if (!ni->ni_peertxcredits || !ni->ni_maxtxcredits) {
- LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
- libcfs_lnd2str(lnd->lnd_type),
- !ni->ni_peertxcredits ?
- "" : "per-peer ");
- /*
- * shutdown the NI since if we get here then it must've already
- * been started
- */
- lnet_shutdown_lndni(ni);
- return -EINVAL;
- }
-
- cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
- tq->tq_credits_min =
- tq->tq_credits_max =
- tq->tq_credits = lnet_ni_tq_credits(ni);
- }
-
- /* Nodes with small feet have little entropy. The NID for this
- * node gives the most entropy in the low bits.
- */
- seed = LNET_NIDADDR(ni->ni_nid);
- add_device_randomness(&seed, sizeof(seed));
-
- CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
- libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits,
- lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
- ni->ni_peerrtrcredits, ni->ni_peertimeout);
-
- return 0;
-failed0:
- lnet_ni_free(ni);
- return rc;
-}
-
-static int
-lnet_startup_lndnis(struct list_head *nilist)
-{
- struct lnet_ni *ni;
- int rc;
- int ni_count = 0;
-
- while (!list_empty(nilist)) {
- ni = list_entry(nilist->next, struct lnet_ni, ni_list);
- list_del(&ni->ni_list);
- rc = lnet_startup_lndni(ni, NULL);
-
- if (rc < 0)
- goto failed;
-
- ni_count++;
- }
-
- return ni_count;
-failed:
- lnet_shutdown_lndnis();
-
- return rc;
-}
-
-/**
- * Initialize LNet library.
- *
- * Automatically called at module loading time. Caller has to call
- * lnet_lib_exit() after a call to lnet_lib_init(), if and only if the
- * latter returned 0. It must be called exactly once.
- *
- * \retval 0 on success
- * \retval -ve on failures.
- */
-int lnet_lib_init(void)
-{
- int rc;
-
- lnet_assert_wire_constants();
-
- memset(&the_lnet, 0, sizeof(the_lnet));
-
- /* refer to global cfs_cpt_tab for now */
- the_lnet.ln_cpt_table = cfs_cpt_tab;
- the_lnet.ln_cpt_number = cfs_cpt_number(cfs_cpt_tab);
-
- LASSERT(the_lnet.ln_cpt_number > 0);
- if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
- /* we are under risk of consuming all lh_cookie */
- CERROR("Can't have %d CPTs for LNet (max allowed is %d), please change setting of CPT-table and retry\n",
- the_lnet.ln_cpt_number, LNET_CPT_MAX);
- return -E2BIG;
- }
-
- while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
- the_lnet.ln_cpt_bits++;
-
- rc = lnet_create_locks();
- if (rc) {
- CERROR("Can't create LNet global locks: %d\n", rc);
- return rc;
- }
-
- the_lnet.ln_refcount = 0;
- LNetInvalidateEQHandle(&the_lnet.ln_rc_eqh);
- INIT_LIST_HEAD(&the_lnet.ln_lnds);
- INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
- INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
-
- /*
- * The hash table size is the number of bits it takes to express the set
- * ln_num_routes, minus 1 (better to under estimate than over so we
- * don't waste memory).
- */
- if (rnet_htable_size <= 0)
- rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
- else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
- rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
- the_lnet.ln_remote_nets_hbits = max_t(int, 1,
- order_base_2(rnet_htable_size) - 1);
-
- /*
- * All LNDs apart from the LOLND are in separate modules. They
- * register themselves when their module loads, and unregister
- * themselves when their module is unloaded.
- */
- lnet_register_lnd(&the_lolnd);
- return 0;
-}
-
-/**
- * Finalize LNet library.
- *
- * \pre lnet_lib_init() called with success.
- * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
- */
-void lnet_lib_exit(void)
-{
- LASSERT(!the_lnet.ln_refcount);
-
- while (!list_empty(&the_lnet.ln_lnds))
- lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
- struct lnet_lnd, lnd_list));
- lnet_destroy_locks();
-}
-
-/**
- * Set LNet PID and start LNet interfaces, routing, and forwarding.
- *
- * Users must call this function at least once before any other functions.
- * For each successful call there must be a corresponding call to
- * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
- * ignored.
- *
- * The PID used by LNet may be different from the one requested.
- * See LNetGetId().
- *
- * \param requested_pid PID requested by the caller.
- *
- * \return >= 0 on success, and < 0 error code on failures.
- */
-int
-LNetNIInit(lnet_pid_t requested_pid)
-{
- int im_a_router = 0;
- int rc;
- int ni_count;
- struct lnet_ping_info *pinfo;
- struct lnet_handle_md md_handle;
- struct list_head net_head;
-
- INIT_LIST_HEAD(&net_head);
-
- mutex_lock(&the_lnet.ln_api_mutex);
-
- CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
-
- if (the_lnet.ln_refcount > 0) {
- rc = the_lnet.ln_refcount++;
- mutex_unlock(&the_lnet.ln_api_mutex);
- return rc;
- }
-
- rc = lnet_prepare(requested_pid);
- if (rc) {
- mutex_unlock(&the_lnet.ln_api_mutex);
- return rc;
- }
-
- /* Add in the loopback network */
- if (!lnet_ni_alloc(LNET_MKNET(LOLND, 0), NULL, &net_head)) {
- rc = -ENOMEM;
- goto err_empty_list;
- }
-
- /*
- * If LNet is being initialized via DLC it is possible
- * that the user requests not to load module parameters (ones which
- * are supported by DLC) on initialization. Therefore, make sure not
- * to load networks, routes and forwarding from module parameters
- * in this case. On cleanup in case of failure only clean up
- * routes if it has been loaded
- */
- if (!the_lnet.ln_nis_from_mod_params) {
- rc = lnet_parse_networks(&net_head, lnet_get_networks());
- if (rc < 0)
- goto err_empty_list;
- }
-
- ni_count = lnet_startup_lndnis(&net_head);
- if (ni_count < 0) {
- rc = ni_count;
- goto err_empty_list;
- }
-
- if (!the_lnet.ln_nis_from_mod_params) {
- rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
- if (rc)
- goto err_shutdown_lndnis;
-
- rc = lnet_check_routes();
- if (rc)
- goto err_destroy_routes;
-
- rc = lnet_rtrpools_alloc(im_a_router);
- if (rc)
- goto err_destroy_routes;
- }
-
- rc = lnet_acceptor_start();
- if (rc)
- goto err_destroy_routes;
-
- the_lnet.ln_refcount = 1;
- /* Now I may use my own API functions... */
-
- rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count, true);
- if (rc)
- goto err_acceptor_stop;
-
- lnet_ping_target_update(pinfo, md_handle);
-
- rc = lnet_router_checker_start();
- if (rc)
- goto err_stop_ping;
-
- lnet_fault_init();
- lnet_router_debugfs_init();
-
- mutex_unlock(&the_lnet.ln_api_mutex);
-
- return 0;
-
-err_stop_ping:
- lnet_ping_target_fini();
-err_acceptor_stop:
- the_lnet.ln_refcount = 0;
- lnet_acceptor_stop();
-err_destroy_routes:
- if (!the_lnet.ln_nis_from_mod_params)
- lnet_destroy_routes();
-err_shutdown_lndnis:
- lnet_shutdown_lndnis();
-err_empty_list:
- lnet_unprepare();
- LASSERT(rc < 0);
- mutex_unlock(&the_lnet.ln_api_mutex);
- while (!list_empty(&net_head)) {
- struct lnet_ni *ni;
-
- ni = list_entry(net_head.next, struct lnet_ni, ni_list);
- list_del_init(&ni->ni_list);
- lnet_ni_free(ni);
- }
- return rc;
-}
-EXPORT_SYMBOL(LNetNIInit);
-
-/**
- * Stop LNet interfaces, routing, and forwarding.
- *
- * Users must call this function once for each successful call to LNetNIInit().
- * Once the LNetNIFini() operation has been started, the results of pending
- * API operations are undefined.
- *
- * \return always 0 for current implementation.
- */
-int
-LNetNIFini(void)
-{
- mutex_lock(&the_lnet.ln_api_mutex);
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if (the_lnet.ln_refcount != 1) {
- the_lnet.ln_refcount--;
- } else {
- LASSERT(!the_lnet.ln_niinit_self);
-
- lnet_fault_fini();
- lnet_router_debugfs_fini();
- lnet_router_checker_stop();
- lnet_ping_target_fini();
-
- /* Teardown fns that use my own API functions BEFORE here */
- the_lnet.ln_refcount = 0;
-
- lnet_acceptor_stop();
- lnet_destroy_routes();
- lnet_shutdown_lndnis();
- lnet_unprepare();
- }
-
- mutex_unlock(&the_lnet.ln_api_mutex);
- return 0;
-}
-EXPORT_SYMBOL(LNetNIFini);
-
-/**
- * Grabs the ni data from the ni structure and fills the out
- * parameters
- *
- * \param[in] ni network interface structure
- * \param[out] config NI configuration
- */
-static void
-lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_data *config)
-{
- struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL;
- struct lnet_ioctl_net_config *net_config;
- size_t min_size, tunable_size = 0;
- int i;
-
- if (!ni || !config)
- return;
-
- net_config = (struct lnet_ioctl_net_config *)config->cfg_bulk;
- if (!net_config)
- return;
-
- BUILD_BUG_ON(ARRAY_SIZE(ni->ni_interfaces) !=
- ARRAY_SIZE(net_config->ni_interfaces));
-
- for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
- if (!ni->ni_interfaces[i])
- break;
-
- strncpy(net_config->ni_interfaces[i],
- ni->ni_interfaces[i],
- sizeof(net_config->ni_interfaces[i]));
- }
-
- config->cfg_nid = ni->ni_nid;
- config->cfg_config_u.cfg_net.net_peer_timeout = ni->ni_peertimeout;
- config->cfg_config_u.cfg_net.net_max_tx_credits = ni->ni_maxtxcredits;
- config->cfg_config_u.cfg_net.net_peer_tx_credits = ni->ni_peertxcredits;
- config->cfg_config_u.cfg_net.net_peer_rtr_credits = ni->ni_peerrtrcredits;
-
- net_config->ni_status = ni->ni_status->ns_status;
-
- if (ni->ni_cpts) {
- int num_cpts = min(ni->ni_ncpts, LNET_MAX_SHOW_NUM_CPT);
-
- for (i = 0; i < num_cpts; i++)
- net_config->ni_cpts[i] = ni->ni_cpts[i];
-
- config->cfg_ncpts = num_cpts;
- }
-
- /*
- * See if user land tools sent in a newer and larger version
- * of struct lnet_tunables than what the kernel uses.
- */
- min_size = sizeof(*config) + sizeof(*net_config);
-
- if (config->cfg_hdr.ioc_len > min_size)
- tunable_size = config->cfg_hdr.ioc_len - min_size;
-
- /* Don't copy to much data to user space */
- min_size = min(tunable_size, sizeof(*ni->ni_lnd_tunables));
- lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
-
- if (ni->ni_lnd_tunables && lnd_cfg && min_size) {
- memcpy(lnd_cfg, ni->ni_lnd_tunables, min_size);
- config->cfg_config_u.cfg_net.net_interface_count = 1;
-
- /* Tell user land that kernel side has less data */
- if (tunable_size > sizeof(*ni->ni_lnd_tunables)) {
- min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
- config->cfg_hdr.ioc_len -= min_size;
- }
- }
-}
-
-static int
-lnet_get_net_config(struct lnet_ioctl_config_data *config)
-{
- struct lnet_ni *ni;
- struct list_head *tmp;
- int idx = config->cfg_count;
- int cpt, i = 0;
- int rc = -ENOENT;
-
- cpt = lnet_net_lock_current();
-
- list_for_each(tmp, &the_lnet.ln_nis) {
- if (i++ != idx)
- continue;
-
- ni = list_entry(tmp, struct lnet_ni, ni_list);
- lnet_ni_lock(ni);
- lnet_fill_ni_info(ni, config);
- lnet_ni_unlock(ni);
- rc = 0;
- break;
- }
-
- lnet_net_unlock(cpt);
- return rc;
-}
-
-int
-lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf)
-{
- char *nets = conf->cfg_config_u.cfg_net.net_intf;
- struct lnet_ping_info *pinfo;
- struct lnet_handle_md md_handle;
- struct lnet_ni *ni;
- struct list_head net_head;
- struct lnet_remotenet *rnet;
- int rc;
-
- INIT_LIST_HEAD(&net_head);
-
- /* Create a ni structure for the network string */
- rc = lnet_parse_networks(&net_head, nets);
- if (rc <= 0)
- return !rc ? -EINVAL : rc;
-
- mutex_lock(&the_lnet.ln_api_mutex);
-
- if (rc > 1) {
- rc = -EINVAL; /* only add one interface per call */
- goto failed0;
- }
-
- ni = list_entry(net_head.next, struct lnet_ni, ni_list);
-
- lnet_net_lock(LNET_LOCK_EX);
- rnet = lnet_find_net_locked(LNET_NIDNET(ni->ni_nid));
- lnet_net_unlock(LNET_LOCK_EX);
- /*
- * make sure that the net added doesn't invalidate the current
- * configuration LNet is keeping
- */
- if (rnet) {
- CERROR("Adding net %s will invalidate routing configuration\n",
- nets);
- rc = -EUSERS;
- goto failed0;
- }
-
- rc = lnet_ping_info_setup(&pinfo, &md_handle, 1 + lnet_get_ni_count(),
- false);
- if (rc)
- goto failed0;
-
- list_del_init(&ni->ni_list);
-
- rc = lnet_startup_lndni(ni, conf);
- if (rc)
- goto failed1;
-
- if (ni->ni_lnd->lnd_accept) {
- rc = lnet_acceptor_start();
- if (rc < 0) {
- /* shutdown the ni that we just started */
- CERROR("Failed to start up acceptor thread\n");
- lnet_shutdown_lndni(ni);
- goto failed1;
- }
- }
-
- lnet_ping_target_update(pinfo, md_handle);
- mutex_unlock(&the_lnet.ln_api_mutex);
-
- return 0;
-
-failed1:
- lnet_ping_md_unlink(pinfo, &md_handle);
- lnet_ping_info_free(pinfo);
-failed0:
- mutex_unlock(&the_lnet.ln_api_mutex);
- while (!list_empty(&net_head)) {
- ni = list_entry(net_head.next, struct lnet_ni, ni_list);
- list_del_init(&ni->ni_list);
- lnet_ni_free(ni);
- }
- return rc;
-}
-
-int
-lnet_dyn_del_ni(__u32 net)
-{
- struct lnet_ni *ni;
- struct lnet_ping_info *pinfo;
- struct lnet_handle_md md_handle;
- int rc;
-
- /* don't allow userspace to shutdown the LOLND */
- if (LNET_NETTYP(net) == LOLND)
- return -EINVAL;
-
- mutex_lock(&the_lnet.ln_api_mutex);
- /* create and link a new ping info, before removing the old one */
- rc = lnet_ping_info_setup(&pinfo, &md_handle,
- lnet_get_ni_count() - 1, false);
- if (rc)
- goto out;
-
- ni = lnet_net2ni(net);
- if (!ni) {
- rc = -EINVAL;
- goto failed;
- }
-
- /* decrement the reference counter taken by lnet_net2ni() */
- lnet_ni_decref_locked(ni, 0);
-
- lnet_shutdown_lndni(ni);
-
- if (!lnet_count_acceptor_nis())
- lnet_acceptor_stop();
-
- lnet_ping_target_update(pinfo, md_handle);
- goto out;
-failed:
- lnet_ping_md_unlink(pinfo, &md_handle);
- lnet_ping_info_free(pinfo);
-out:
- mutex_unlock(&the_lnet.ln_api_mutex);
-
- return rc;
-}
-
-/**
- * LNet ioctl handler.
- *
- */
-int
-LNetCtl(unsigned int cmd, void *arg)
-{
- struct libcfs_ioctl_data *data = arg;
- struct lnet_ioctl_config_data *config;
- struct lnet_process_id id = {0};
- struct lnet_ni *ni;
- int rc;
- unsigned long secs_passed;
-
- BUILD_BUG_ON(LIBCFS_IOC_DATA_MAX <
- sizeof(struct lnet_ioctl_net_config) +
- sizeof(struct lnet_ioctl_config_data));
-
- switch (cmd) {
- case IOC_LIBCFS_GET_NI:
- rc = LNetGetId(data->ioc_count, &id);
- data->ioc_nid = id.nid;
- return rc;
-
- case IOC_LIBCFS_FAIL_NID:
- return lnet_fail_nid(data->ioc_nid, data->ioc_count);
-
- case IOC_LIBCFS_ADD_ROUTE:
- config = arg;
-
- if (config->cfg_hdr.ioc_len < sizeof(*config))
- return -EINVAL;
-
- mutex_lock(&the_lnet.ln_api_mutex);
- rc = lnet_add_route(config->cfg_net,
- config->cfg_config_u.cfg_route.rtr_hop,
- config->cfg_nid,
- config->cfg_config_u.cfg_route.rtr_priority);
- if (!rc) {
- rc = lnet_check_routes();
- if (rc)
- lnet_del_route(config->cfg_net,
- config->cfg_nid);
- }
- mutex_unlock(&the_lnet.ln_api_mutex);
- return rc;
-
- case IOC_LIBCFS_DEL_ROUTE:
- config = arg;
-
- if (config->cfg_hdr.ioc_len < sizeof(*config))
- return -EINVAL;
-
- mutex_lock(&the_lnet.ln_api_mutex);
- rc = lnet_del_route(config->cfg_net, config->cfg_nid);
- mutex_unlock(&the_lnet.ln_api_mutex);
- return rc;
-
- case IOC_LIBCFS_GET_ROUTE:
- config = arg;
-
- if (config->cfg_hdr.ioc_len < sizeof(*config))
- return -EINVAL;
-
- return lnet_get_route(config->cfg_count,
- &config->cfg_net,
- &config->cfg_config_u.cfg_route.rtr_hop,
- &config->cfg_nid,
- &config->cfg_config_u.cfg_route.rtr_flags,
- &config->cfg_config_u.cfg_route.rtr_priority);
-
- case IOC_LIBCFS_GET_NET: {
- size_t total = sizeof(*config) +
- sizeof(struct lnet_ioctl_net_config);
- config = arg;
-
- if (config->cfg_hdr.ioc_len < total)
- return -EINVAL;
-
- return lnet_get_net_config(config);
- }
-
- case IOC_LIBCFS_GET_LNET_STATS: {
- struct lnet_ioctl_lnet_stats *lnet_stats = arg;
-
- if (lnet_stats->st_hdr.ioc_len < sizeof(*lnet_stats))
- return -EINVAL;
-
- lnet_counters_get(&lnet_stats->st_cntrs);
- return 0;
- }
-
- case IOC_LIBCFS_CONFIG_RTR:
- config = arg;
-
- if (config->cfg_hdr.ioc_len < sizeof(*config))
- return -EINVAL;
-
- mutex_lock(&the_lnet.ln_api_mutex);
- if (config->cfg_config_u.cfg_buffers.buf_enable) {
- rc = lnet_rtrpools_enable();
- mutex_unlock(&the_lnet.ln_api_mutex);
- return rc;
- }
- lnet_rtrpools_disable();
- mutex_unlock(&the_lnet.ln_api_mutex);
- return 0;
-
- case IOC_LIBCFS_ADD_BUF:
- config = arg;
-
- if (config->cfg_hdr.ioc_len < sizeof(*config))
- return -EINVAL;
-
- mutex_lock(&the_lnet.ln_api_mutex);
- rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.buf_tiny,
- config->cfg_config_u.cfg_buffers.buf_small,
- config->cfg_config_u.cfg_buffers.buf_large);
- mutex_unlock(&the_lnet.ln_api_mutex);
- return rc;
-
- case IOC_LIBCFS_GET_BUF: {
- struct lnet_ioctl_pool_cfg *pool_cfg;
- size_t total = sizeof(*config) + sizeof(*pool_cfg);
-
- config = arg;
-
- if (config->cfg_hdr.ioc_len < total)
- return -EINVAL;
-
- pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
- return lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
- }
-
- case IOC_LIBCFS_GET_PEER_INFO: {
- struct lnet_ioctl_peer *peer_info = arg;
-
- if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info))
- return -EINVAL;
-
- return lnet_get_peer_info(peer_info->pr_count,
- &peer_info->pr_nid,
- peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
- &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
- &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
- &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
- &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
- &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
- &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_rtr_credits,
- &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
- }
-
- case IOC_LIBCFS_NOTIFY_ROUTER:
- secs_passed = (ktime_get_real_seconds() - data->ioc_u64[0]);
- secs_passed *= msecs_to_jiffies(MSEC_PER_SEC);
-
- return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
- jiffies - secs_passed);
-
- case IOC_LIBCFS_LNET_DIST:
- rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
- if (rc < 0 && rc != -EHOSTUNREACH)
- return rc;
-
- data->ioc_u32[0] = rc;
- return 0;
-
- case IOC_LIBCFS_TESTPROTOCOMPAT:
- lnet_net_lock(LNET_LOCK_EX);
- the_lnet.ln_testprotocompat = data->ioc_flags;
- lnet_net_unlock(LNET_LOCK_EX);
- return 0;
-
- case IOC_LIBCFS_LNET_FAULT:
- return lnet_fault_ctl(data->ioc_flags, data);
-
- case IOC_LIBCFS_PING:
- id.nid = data->ioc_nid;
- id.pid = data->ioc_u32[0];
- rc = lnet_ping(id, data->ioc_u32[1], /* timeout */
- data->ioc_pbuf1,
- data->ioc_plen1 / sizeof(struct lnet_process_id));
- if (rc < 0)
- return rc;
- data->ioc_count = rc;
- return 0;
-
- default:
- ni = lnet_net2ni(data->ioc_net);
- if (!ni)
- return -EINVAL;
-
- if (!ni->ni_lnd->lnd_ctl)
- rc = -EINVAL;
- else
- rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg);
-
- lnet_ni_decref(ni);
- return rc;
- }
- /* not reached */
-}
-EXPORT_SYMBOL(LNetCtl);
-
-void LNetDebugPeer(struct lnet_process_id id)
-{
- lnet_debug_peer(id.nid);
-}
-EXPORT_SYMBOL(LNetDebugPeer);
-
-/**
- * Retrieve the lnet_process_id ID of LNet interface at \a index. Note that
- * all interfaces share a same PID, as requested by LNetNIInit().
- *
- * \param index Index of the interface to look up.
- * \param id On successful return, this location will hold the
- * lnet_process_id ID of the interface.
- *
- * \retval 0 If an interface exists at \a index.
- * \retval -ENOENT If no interface has been found.
- */
-int
-LNetGetId(unsigned int index, struct lnet_process_id *id)
-{
- struct lnet_ni *ni;
- struct list_head *tmp;
- int cpt;
- int rc = -ENOENT;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- cpt = lnet_net_lock_current();
-
- list_for_each(tmp, &the_lnet.ln_nis) {
- if (index--)
- continue;
-
- ni = list_entry(tmp, struct lnet_ni, ni_list);
-
- id->nid = ni->ni_nid;
- id->pid = the_lnet.ln_pid;
- rc = 0;
- break;
- }
-
- lnet_net_unlock(cpt);
- return rc;
-}
-EXPORT_SYMBOL(LNetGetId);
-
-static int lnet_ping(struct lnet_process_id id, int timeout_ms,
- struct lnet_process_id __user *ids, int n_ids)
-{
- struct lnet_handle_eq eqh;
- struct lnet_handle_md mdh;
- struct lnet_event event;
- struct lnet_md md = { NULL };
- int which;
- int unlinked = 0;
- int replied = 0;
- const int a_long_time = 60000; /* mS */
- int infosz;
- struct lnet_ping_info *info;
- struct lnet_process_id tmpid;
- int i;
- int nob;
- int rc;
- int rc2;
-
- infosz = offsetof(struct lnet_ping_info, pi_ni[n_ids]);
-
- if (n_ids <= 0 ||
- id.nid == LNET_NID_ANY ||
- timeout_ms > 500000 || /* arbitrary limit! */
- n_ids > 20) /* arbitrary limit! */
- return -EINVAL;
-
- if (id.pid == LNET_PID_ANY)
- id.pid = LNET_PID_LUSTRE;
-
- info = kzalloc(infosz, GFP_KERNEL);
- if (!info)
- return -ENOMEM;
-
- /* NB 2 events max (including any unlink event) */
- rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
- if (rc) {
- CERROR("Can't allocate EQ: %d\n", rc);
- goto out_0;
- }
-
- /* initialize md content */
- md.start = info;
- md.length = infosz;
- md.threshold = 2; /*GET/REPLY*/
- md.max_size = 0;
- md.options = LNET_MD_TRUNCATE;
- md.user_ptr = NULL;
- md.eq_handle = eqh;
-
- rc = LNetMDBind(md, LNET_UNLINK, &mdh);
- if (rc) {
- CERROR("Can't bind MD: %d\n", rc);
- goto out_1;
- }
-
- rc = LNetGet(LNET_NID_ANY, mdh, id,
- LNET_RESERVED_PORTAL,
- LNET_PROTO_PING_MATCHBITS, 0);
-
- if (rc) {
- /* Don't CERROR; this could be deliberate! */
-
- rc2 = LNetMDUnlink(mdh);
- LASSERT(!rc2);
-
- /* NB must wait for the UNLINK event below... */
- unlinked = 1;
- timeout_ms = a_long_time;
- }
-
- do {
- /* MUST block for unlink to complete */
-
- rc2 = LNetEQPoll(&eqh, 1, timeout_ms, !unlinked,
- &event, &which);
-
- CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
- (rc2 <= 0) ? -1 : event.type,
- (rc2 <= 0) ? -1 : event.status,
- (rc2 > 0 && event.unlinked) ? " unlinked" : "");
-
- LASSERT(rc2 != -EOVERFLOW); /* can't miss anything */
-
- if (rc2 <= 0 || event.status) {
- /* timeout or error */
- if (!replied && !rc)
- rc = (rc2 < 0) ? rc2 :
- !rc2 ? -ETIMEDOUT :
- event.status;
-
- if (!unlinked) {
- /* Ensure completion in finite time... */
- LNetMDUnlink(mdh);
- /* No assertion (racing with network) */
- unlinked = 1;
- timeout_ms = a_long_time;
- } else if (!rc2) {
- /* timed out waiting for unlink */
- CWARN("ping %s: late network completion\n",
- libcfs_id2str(id));
- }
- } else if (event.type == LNET_EVENT_REPLY) {
- replied = 1;
- rc = event.mlength;
- }
-
- } while (rc2 <= 0 || !event.unlinked);
-
- if (!replied) {
- if (rc >= 0)
- CWARN("%s: Unexpected rc >= 0 but no reply!\n",
- libcfs_id2str(id));
- rc = -EIO;
- goto out_1;
- }
-
- nob = rc;
- LASSERT(nob >= 0 && nob <= infosz);
-
- rc = -EPROTO; /* if I can't parse... */
-
- if (nob < 8) {
- /* can't check magic/version */
- CERROR("%s: ping info too short %d\n",
- libcfs_id2str(id), nob);
- goto out_1;
- }
-
- if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
- lnet_swap_pinginfo(info);
- } else if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
- CERROR("%s: Unexpected magic %08x\n",
- libcfs_id2str(id), info->pi_magic);
- goto out_1;
- }
-
- if (!(info->pi_features & LNET_PING_FEAT_NI_STATUS)) {
- CERROR("%s: ping w/o NI status: 0x%x\n",
- libcfs_id2str(id), info->pi_features);
- goto out_1;
- }
-
- if (nob < offsetof(struct lnet_ping_info, pi_ni[0])) {
- CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
- nob, (int)offsetof(struct lnet_ping_info, pi_ni[0]));
- goto out_1;
- }
-
- if (info->pi_nnis < n_ids)
- n_ids = info->pi_nnis;
-
- if (nob < offsetof(struct lnet_ping_info, pi_ni[n_ids])) {
- CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
- nob, (int)offsetof(struct lnet_ping_info, pi_ni[n_ids]));
- goto out_1;
- }
-
- rc = -EFAULT; /* If I SEGV... */
-
- memset(&tmpid, 0, sizeof(tmpid));
- for (i = 0; i < n_ids; i++) {
- tmpid.pid = info->pi_pid;
- tmpid.nid = info->pi_ni[i].ns_nid;
- if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
- goto out_1;
- }
- rc = info->pi_nnis;
-
- out_1:
- rc2 = LNetEQFree(eqh);
- if (rc2)
- CERROR("rc2 %d\n", rc2);
- LASSERT(!rc2);
-
- out_0:
- kfree(info);
- return rc;
-}
diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c
deleted file mode 100644
index 55ecc1998b7e..000000000000
--- a/drivers/staging/lustre/lnet/lnet/config.c
+++ /dev/null
@@ -1,1235 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#include <linux/nsproxy.h>
-#include <net/net_namespace.h>
-#include <linux/ctype.h>
-#include <linux/lnet/lib-lnet.h>
-
-struct lnet_text_buf { /* tmp struct for parsing routes */
- struct list_head ltb_list; /* stash on lists */
- int ltb_size; /* allocated size */
- char ltb_text[0]; /* text buffer */
-};
-
-static int lnet_tbnob; /* track text buf allocation */
-#define LNET_MAX_TEXTBUF_NOB (64 << 10) /* bound allocation */
-#define LNET_SINGLE_TEXTBUF_NOB (4 << 10)
-
-static void
-lnet_syntax(char *name, char *str, int offset, int width)
-{
- static char dots[LNET_SINGLE_TEXTBUF_NOB];
- static char dashes[LNET_SINGLE_TEXTBUF_NOB];
-
- memset(dots, '.', sizeof(dots));
- dots[sizeof(dots) - 1] = 0;
- memset(dashes, '-', sizeof(dashes));
- dashes[sizeof(dashes) - 1] = 0;
-
- LCONSOLE_ERROR_MSG(0x10f, "Error parsing '%s=\"%s\"'\n", name, str);
- LCONSOLE_ERROR_MSG(0x110, "here...........%.*s..%.*s|%.*s|\n",
- (int)strlen(name), dots, offset, dots,
- (width < 1) ? 0 : width - 1, dashes);
-}
-
-static int
-lnet_issep(char c)
-{
- switch (c) {
- case '\n':
- case '\r':
- case ';':
- return 1;
- default:
- return 0;
- }
-}
-
-int
-lnet_net_unique(__u32 net, struct list_head *nilist)
-{
- struct list_head *tmp;
- struct lnet_ni *ni;
-
- list_for_each(tmp, nilist) {
- ni = list_entry(tmp, struct lnet_ni, ni_list);
-
- if (LNET_NIDNET(ni->ni_nid) == net)
- return 0;
- }
-
- return 1;
-}
-
-void
-lnet_ni_free(struct lnet_ni *ni)
-{
- int i;
-
- if (ni->ni_refs)
- cfs_percpt_free(ni->ni_refs);
-
- if (ni->ni_tx_queues)
- cfs_percpt_free(ni->ni_tx_queues);
-
- if (ni->ni_cpts)
- cfs_expr_list_values_free(ni->ni_cpts, ni->ni_ncpts);
-
- kfree(ni->ni_lnd_tunables);
-
- for (i = 0; i < LNET_MAX_INTERFACES && ni->ni_interfaces[i]; i++)
- kfree(ni->ni_interfaces[i]);
-
- /* release reference to net namespace */
- if (ni->ni_net_ns)
- put_net(ni->ni_net_ns);
-
- kfree(ni);
-}
-
-struct lnet_ni *
-lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist)
-{
- struct lnet_tx_queue *tq;
- struct lnet_ni *ni;
- int rc;
- int i;
-
- if (!lnet_net_unique(net, nilist)) {
- LCONSOLE_ERROR_MSG(0x111, "Duplicate network specified: %s\n",
- libcfs_net2str(net));
- return NULL;
- }
-
- ni = kzalloc(sizeof(*ni), GFP_NOFS);
- if (!ni) {
- CERROR("Out of memory creating network %s\n",
- libcfs_net2str(net));
- return NULL;
- }
-
- spin_lock_init(&ni->ni_lock);
- INIT_LIST_HEAD(&ni->ni_cptlist);
- ni->ni_refs = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(*ni->ni_refs[0]));
- if (!ni->ni_refs)
- goto failed;
-
- ni->ni_tx_queues = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(*ni->ni_tx_queues[0]));
- if (!ni->ni_tx_queues)
- goto failed;
-
- cfs_percpt_for_each(tq, i, ni->ni_tx_queues)
- INIT_LIST_HEAD(&tq->tq_delayed);
-
- if (!el) {
- ni->ni_cpts = NULL;
- ni->ni_ncpts = LNET_CPT_NUMBER;
- } else {
- rc = cfs_expr_list_values(el, LNET_CPT_NUMBER, &ni->ni_cpts);
- if (rc <= 0) {
- CERROR("Failed to set CPTs for NI %s: %d\n",
- libcfs_net2str(net), rc);
- goto failed;
- }
-
- LASSERT(rc <= LNET_CPT_NUMBER);
- if (rc == LNET_CPT_NUMBER) {
- cfs_expr_list_values_free(ni->ni_cpts, LNET_CPT_NUMBER);
- ni->ni_cpts = NULL;
- }
-
- ni->ni_ncpts = rc;
- }
-
- /* LND will fill in the address part of the NID */
- ni->ni_nid = LNET_MKNID(net, 0);
-
- /* Store net namespace in which current ni is being created */
- if (current->nsproxy->net_ns)
- ni->ni_net_ns = get_net(current->nsproxy->net_ns);
- else
- ni->ni_net_ns = NULL;
-
- ni->ni_last_alive = ktime_get_real_seconds();
- list_add_tail(&ni->ni_list, nilist);
- return ni;
- failed:
- lnet_ni_free(ni);
- return NULL;
-}
-
-int
-lnet_parse_networks(struct list_head *nilist, char *networks)
-{
- struct cfs_expr_list *el = NULL;
- char *tokens;
- char *str;
- char *tmp;
- struct lnet_ni *ni;
- __u32 net;
- int nnets = 0;
- struct list_head *temp_node;
-
- if (!networks) {
- CERROR("networks string is undefined\n");
- return -EINVAL;
- }
-
- if (strlen(networks) > LNET_SINGLE_TEXTBUF_NOB) {
- /* _WAY_ conservative */
- LCONSOLE_ERROR_MSG(0x112,
- "Can't parse networks: string too long\n");
- return -EINVAL;
- }
-
- tokens = kstrdup(networks, GFP_KERNEL);
- if (!tokens) {
- CERROR("Can't allocate net tokens\n");
- return -ENOMEM;
- }
-
- tmp = tokens;
- str = tokens;
-
- while (str && *str) {
- char *comma = strchr(str, ',');
- char *bracket = strchr(str, '(');
- char *square = strchr(str, '[');
- char *iface;
- int niface;
- int rc;
-
- /*
- * NB we don't check interface conflicts here; it's the LNDs
- * responsibility (if it cares at all)
- */
- if (square && (!comma || square < comma)) {
- /*
- * i.e: o2ib0(ib0)[1,2], number between square
- * brackets are CPTs this NI needs to be bond
- */
- if (bracket && bracket > square) {
- tmp = square;
- goto failed_syntax;
- }
-
- tmp = strchr(square, ']');
- if (!tmp) {
- tmp = square;
- goto failed_syntax;
- }
-
- rc = cfs_expr_list_parse(square, tmp - square + 1,
- 0, LNET_CPT_NUMBER - 1, &el);
- if (rc) {
- tmp = square;
- goto failed_syntax;
- }
-
- while (square <= tmp)
- *square++ = ' ';
- }
-
- if (!bracket || (comma && comma < bracket)) {
- /* no interface list specified */
-
- if (comma)
- *comma++ = 0;
- net = libcfs_str2net(strim(str));
-
- if (net == LNET_NIDNET(LNET_NID_ANY)) {
- LCONSOLE_ERROR_MSG(0x113,
- "Unrecognised network type\n");
- tmp = str;
- goto failed_syntax;
- }
-
- if (LNET_NETTYP(net) != LOLND && /* LO is implicit */
- !lnet_ni_alloc(net, el, nilist))
- goto failed;
-
- if (el) {
- cfs_expr_list_free(el);
- el = NULL;
- }
-
- str = comma;
- continue;
- }
-
- *bracket = 0;
- net = libcfs_str2net(strim(str));
- if (net == LNET_NIDNET(LNET_NID_ANY)) {
- tmp = str;
- goto failed_syntax;
- }
-
- ni = lnet_ni_alloc(net, el, nilist);
- if (!ni)
- goto failed;
-
- if (el) {
- cfs_expr_list_free(el);
- el = NULL;
- }
-
- niface = 0;
- iface = bracket + 1;
-
- bracket = strchr(iface, ')');
- if (!bracket) {
- tmp = iface;
- goto failed_syntax;
- }
-
- *bracket = 0;
- do {
- comma = strchr(iface, ',');
- if (comma)
- *comma++ = 0;
-
- iface = strim(iface);
- if (!*iface) {
- tmp = iface;
- goto failed_syntax;
- }
-
- if (niface == LNET_MAX_INTERFACES) {
- LCONSOLE_ERROR_MSG(0x115,
- "Too many interfaces for net %s\n",
- libcfs_net2str(net));
- goto failed;
- }
-
- /*
- * Allocate a separate piece of memory and copy
- * into it the string, so we don't have
- * a depencency on the tokens string. This way we
- * can free the tokens at the end of the function.
- * The newly allocated ni_interfaces[] can be
- * freed when freeing the NI
- */
- ni->ni_interfaces[niface] = kstrdup(iface, GFP_KERNEL);
- if (!ni->ni_interfaces[niface]) {
- CERROR("Can't allocate net interface name\n");
- goto failed;
- }
- niface++;
- iface = comma;
- } while (iface);
-
- str = bracket + 1;
- comma = strchr(bracket + 1, ',');
- if (comma) {
- *comma = 0;
- str = strim(str);
- if (*str) {
- tmp = str;
- goto failed_syntax;
- }
- str = comma + 1;
- continue;
- }
-
- str = strim(str);
- if (*str) {
- tmp = str;
- goto failed_syntax;
- }
- }
-
- list_for_each(temp_node, nilist)
- nnets++;
-
- kfree(tokens);
- return nnets;
-
- failed_syntax:
- lnet_syntax("networks", networks, (int)(tmp - tokens), strlen(tmp));
- failed:
- while (!list_empty(nilist)) {
- ni = list_entry(nilist->next, struct lnet_ni, ni_list);
-
- list_del(&ni->ni_list);
- lnet_ni_free(ni);
- }
-
- if (el)
- cfs_expr_list_free(el);
-
- kfree(tokens);
-
- return -EINVAL;
-}
-
-static struct lnet_text_buf *
-lnet_new_text_buf(int str_len)
-{
- struct lnet_text_buf *ltb;
- int nob;
-
- /* NB allocate space for the terminating 0 */
- nob = offsetof(struct lnet_text_buf, ltb_text[str_len + 1]);
- if (nob > LNET_SINGLE_TEXTBUF_NOB) {
- /* _way_ conservative for "route net gateway..." */
- CERROR("text buffer too big\n");
- return NULL;
- }
-
- if (lnet_tbnob + nob > LNET_MAX_TEXTBUF_NOB) {
- CERROR("Too many text buffers\n");
- return NULL;
- }
-
- ltb = kzalloc(nob, GFP_KERNEL);
- if (!ltb)
- return NULL;
-
- ltb->ltb_size = nob;
- ltb->ltb_text[0] = 0;
- lnet_tbnob += nob;
- return ltb;
-}
-
-static void
-lnet_free_text_buf(struct lnet_text_buf *ltb)
-{
- lnet_tbnob -= ltb->ltb_size;
- kfree(ltb);
-}
-
-static void
-lnet_free_text_bufs(struct list_head *tbs)
-{
- struct lnet_text_buf *ltb;
-
- while (!list_empty(tbs)) {
- ltb = list_entry(tbs->next, struct lnet_text_buf, ltb_list);
-
- list_del(&ltb->ltb_list);
- lnet_free_text_buf(ltb);
- }
-}
-
-static int
-lnet_str2tbs_sep(struct list_head *tbs, char *str)
-{
- struct list_head pending;
- char *sep;
- int nob;
- int i;
- struct lnet_text_buf *ltb;
-
- INIT_LIST_HEAD(&pending);
-
- /* Split 'str' into separate commands */
- for (;;) {
- /* skip leading whitespace */
- while (isspace(*str))
- str++;
-
- /* scan for separator or comment */
- for (sep = str; *sep; sep++)
- if (lnet_issep(*sep) || *sep == '#')
- break;
-
- nob = (int)(sep - str);
- if (nob > 0) {
- ltb = lnet_new_text_buf(nob);
- if (!ltb) {
- lnet_free_text_bufs(&pending);
- return -ENOMEM;
- }
-
- for (i = 0; i < nob; i++)
- if (isspace(str[i]))
- ltb->ltb_text[i] = ' ';
- else
- ltb->ltb_text[i] = str[i];
-
- ltb->ltb_text[nob] = 0;
-
- list_add_tail(&ltb->ltb_list, &pending);
- }
-
- if (*sep == '#') {
- /* scan for separator */
- do {
- sep++;
- } while (*sep && !lnet_issep(*sep));
- }
-
- if (!*sep)
- break;
-
- str = sep + 1;
- }
-
- list_splice(&pending, tbs->prev);
- return 0;
-}
-
-static int
-lnet_expand1tb(struct list_head *list,
- char *str, char *sep1, char *sep2,
- char *item, int itemlen)
-{
- int len1 = (int)(sep1 - str);
- int len2 = strlen(sep2 + 1);
- struct lnet_text_buf *ltb;
-
- LASSERT(*sep1 == '[');
- LASSERT(*sep2 == ']');
-
- ltb = lnet_new_text_buf(len1 + itemlen + len2);
- if (!ltb)
- return -ENOMEM;
-
- memcpy(ltb->ltb_text, str, len1);
- memcpy(&ltb->ltb_text[len1], item, itemlen);
- memcpy(&ltb->ltb_text[len1 + itemlen], sep2 + 1, len2);
- ltb->ltb_text[len1 + itemlen + len2] = 0;
-
- list_add_tail(&ltb->ltb_list, list);
- return 0;
-}
-
-static int
-lnet_str2tbs_expand(struct list_head *tbs, char *str)
-{
- char num[16];
- struct list_head pending;
- char *sep;
- char *sep2;
- char *parsed;
- char *enditem;
- int lo;
- int hi;
- int stride;
- int i;
- int nob;
- int scanned;
-
- INIT_LIST_HEAD(&pending);
-
- sep = strchr(str, '[');
- if (!sep) /* nothing to expand */
- return 0;
-
- sep2 = strchr(sep, ']');
- if (!sep2)
- goto failed;
-
- for (parsed = sep; parsed < sep2; parsed = enditem) {
- enditem = ++parsed;
- while (enditem < sep2 && *enditem != ',')
- enditem++;
-
- if (enditem == parsed) /* no empty items */
- goto failed;
-
- if (sscanf(parsed, "%d-%d/%d%n", &lo, &hi,
- &stride, &scanned) < 3) {
- if (sscanf(parsed, "%d-%d%n", &lo, &hi, &scanned) < 2) {
- /* simple string enumeration */
- if (lnet_expand1tb(&pending, str, sep, sep2,
- parsed,
- (int)(enditem - parsed))) {
- goto failed;
- }
- continue;
- }
-
- stride = 1;
- }
-
- /* range expansion */
-
- if (enditem != parsed + scanned) /* no trailing junk */
- goto failed;
-
- if (hi < 0 || lo < 0 || stride < 0 || hi < lo ||
- (hi - lo) % stride)
- goto failed;
-
- for (i = lo; i <= hi; i += stride) {
- snprintf(num, sizeof(num), "%d", i);
- nob = strlen(num);
- if (nob + 1 == sizeof(num))
- goto failed;
-
- if (lnet_expand1tb(&pending, str, sep, sep2,
- num, nob))
- goto failed;
- }
- }
-
- list_splice(&pending, tbs->prev);
- return 1;
-
- failed:
- lnet_free_text_bufs(&pending);
- return -EINVAL;
-}
-
-static int
-lnet_parse_hops(char *str, unsigned int *hops)
-{
- int len = strlen(str);
- int nob = len;
-
- return (sscanf(str, "%u%n", hops, &nob) >= 1 &&
- nob == len &&
- *hops > 0 && *hops < 256);
-}
-
-#define LNET_PRIORITY_SEPARATOR (':')
-
-static int
-lnet_parse_priority(char *str, unsigned int *priority, char **token)
-{
- int nob;
- char *sep;
- int len;
-
- sep = strchr(str, LNET_PRIORITY_SEPARATOR);
- if (!sep) {
- *priority = 0;
- return 0;
- }
- len = strlen(sep + 1);
-
- if ((sscanf((sep + 1), "%u%n", priority, &nob) < 1) || (len != nob)) {
- /*
- * Update the caller's token pointer so it treats the found
- * priority as the token to report in the error message.
- */
- *token += sep - str + 1;
- return -EINVAL;
- }
-
- CDEBUG(D_NET, "gateway %s, priority %d, nob %d\n", str, *priority, nob);
-
- /*
- * Change priority separator to \0 to be able to parse NID
- */
- *sep = '\0';
- return 0;
-}
-
-static int
-lnet_parse_route(char *str, int *im_a_router)
-{
- /* static scratch buffer OK (single threaded) */
- static char cmd[LNET_SINGLE_TEXTBUF_NOB];
-
- struct list_head nets;
- struct list_head gateways;
- struct list_head *tmp1;
- struct list_head *tmp2;
- __u32 net;
- lnet_nid_t nid;
- struct lnet_text_buf *ltb;
- int rc;
- char *sep;
- char *token = str;
- int ntokens = 0;
- int myrc = -1;
- __u32 hops;
- int got_hops = 0;
- unsigned int priority = 0;
-
- INIT_LIST_HEAD(&gateways);
- INIT_LIST_HEAD(&nets);
-
- /* save a copy of the string for error messages */
- strncpy(cmd, str, sizeof(cmd));
- cmd[sizeof(cmd) - 1] = '\0';
-
- sep = str;
- for (;;) {
- /* scan for token start */
- while (isspace(*sep))
- sep++;
- if (!*sep) {
- if (ntokens < (got_hops ? 3 : 2))
- goto token_error;
- break;
- }
-
- ntokens++;
- token = sep++;
-
- /* scan for token end */
- while (*sep && !isspace(*sep))
- sep++;
- if (*sep)
- *sep++ = 0;
-
- if (ntokens == 1) {
- tmp2 = &nets; /* expanding nets */
- } else if (ntokens == 2 &&
- lnet_parse_hops(token, &hops)) {
- got_hops = 1; /* got a hop count */
- continue;
- } else {
- tmp2 = &gateways; /* expanding gateways */
- }
-
- ltb = lnet_new_text_buf(strlen(token));
- if (!ltb)
- goto out;
-
- strcpy(ltb->ltb_text, token);
- tmp1 = &ltb->ltb_list;
- list_add_tail(tmp1, tmp2);
-
- while (tmp1 != tmp2) {
- ltb = list_entry(tmp1, struct lnet_text_buf, ltb_list);
-
- rc = lnet_str2tbs_expand(tmp1->next, ltb->ltb_text);
- if (rc < 0)
- goto token_error;
-
- tmp1 = tmp1->next;
-
- if (rc > 0) { /* expanded! */
- list_del(&ltb->ltb_list);
- lnet_free_text_buf(ltb);
- continue;
- }
-
- if (ntokens == 1) {
- net = libcfs_str2net(ltb->ltb_text);
- if (net == LNET_NIDNET(LNET_NID_ANY) ||
- LNET_NETTYP(net) == LOLND)
- goto token_error;
- } else {
- rc = lnet_parse_priority(ltb->ltb_text,
- &priority, &token);
- if (rc < 0)
- goto token_error;
-
- nid = libcfs_str2nid(ltb->ltb_text);
- if (nid == LNET_NID_ANY ||
- LNET_NETTYP(LNET_NIDNET(nid)) == LOLND)
- goto token_error;
- }
- }
- }
-
- /**
- * if there are no hops set then we want to flag this value as
- * unset since hops is an optional parameter
- */
- if (!got_hops)
- hops = LNET_UNDEFINED_HOPS;
-
- LASSERT(!list_empty(&nets));
- LASSERT(!list_empty(&gateways));
-
- list_for_each(tmp1, &nets) {
- ltb = list_entry(tmp1, struct lnet_text_buf, ltb_list);
- net = libcfs_str2net(ltb->ltb_text);
- LASSERT(net != LNET_NIDNET(LNET_NID_ANY));
-
- list_for_each(tmp2, &gateways) {
- ltb = list_entry(tmp2, struct lnet_text_buf, ltb_list);
- nid = libcfs_str2nid(ltb->ltb_text);
- LASSERT(nid != LNET_NID_ANY);
-
- if (lnet_islocalnid(nid)) {
- *im_a_router = 1;
- continue;
- }
-
- rc = lnet_add_route(net, hops, nid, priority);
- if (rc && rc != -EEXIST && rc != -EHOSTUNREACH) {
- CERROR("Can't create route to %s via %s\n",
- libcfs_net2str(net),
- libcfs_nid2str(nid));
- goto out;
- }
- }
- }
-
- myrc = 0;
- goto out;
-
- token_error:
- lnet_syntax("routes", cmd, (int)(token - str), strlen(token));
- out:
- lnet_free_text_bufs(&nets);
- lnet_free_text_bufs(&gateways);
- return myrc;
-}
-
-static int
-lnet_parse_route_tbs(struct list_head *tbs, int *im_a_router)
-{
- struct lnet_text_buf *ltb;
-
- while (!list_empty(tbs)) {
- ltb = list_entry(tbs->next, struct lnet_text_buf, ltb_list);
-
- if (lnet_parse_route(ltb->ltb_text, im_a_router) < 0) {
- lnet_free_text_bufs(tbs);
- return -EINVAL;
- }
-
- list_del(&ltb->ltb_list);
- lnet_free_text_buf(ltb);
- }
-
- return 0;
-}
-
-int
-lnet_parse_routes(char *routes, int *im_a_router)
-{
- struct list_head tbs;
- int rc = 0;
-
- *im_a_router = 0;
-
- INIT_LIST_HEAD(&tbs);
-
- if (lnet_str2tbs_sep(&tbs, routes) < 0) {
- CERROR("Error parsing routes\n");
- rc = -EINVAL;
- } else {
- rc = lnet_parse_route_tbs(&tbs, im_a_router);
- }
-
- LASSERT(!lnet_tbnob);
- return rc;
-}
-
-static int
-lnet_match_network_token(char *token, int len, __u32 *ipaddrs, int nip)
-{
- LIST_HEAD(list);
- int rc;
- int i;
-
- rc = cfs_ip_addr_parse(token, len, &list);
- if (rc)
- return rc;
-
- for (rc = i = 0; !rc && i < nip; i++)
- rc = cfs_ip_addr_match(ipaddrs[i], &list);
-
- cfs_expr_list_free_list(&list);
-
- return rc;
-}
-
-static int
-lnet_match_network_tokens(char *net_entry, __u32 *ipaddrs, int nip)
-{
- static char tokens[LNET_SINGLE_TEXTBUF_NOB];
-
- int matched = 0;
- int ntokens = 0;
- int len;
- char *net = NULL;
- char *sep;
- char *token;
- int rc;
-
- LASSERT(strlen(net_entry) < sizeof(tokens));
-
- /* work on a copy of the string */
- strcpy(tokens, net_entry);
- sep = tokens;
- for (;;) {
- /* scan for token start */
- while (isspace(*sep))
- sep++;
- if (!*sep)
- break;
-
- token = sep++;
-
- /* scan for token end */
- while (*sep && !isspace(*sep))
- sep++;
- if (*sep)
- *sep++ = 0;
-
- if (!ntokens++) {
- net = token;
- continue;
- }
-
- len = strlen(token);
-
- rc = lnet_match_network_token(token, len, ipaddrs, nip);
- if (rc < 0) {
- lnet_syntax("ip2nets", net_entry,
- (int)(token - tokens), len);
- return rc;
- }
-
- if (rc)
- matched |= 1;
- }
-
- if (!matched)
- return 0;
-
- strcpy(net_entry, net); /* replace with matched net */
- return 1;
-}
-
-static __u32
-lnet_netspec2net(char *netspec)
-{
- char *bracket = strchr(netspec, '(');
- __u32 net;
-
- if (bracket)
- *bracket = 0;
-
- net = libcfs_str2net(netspec);
-
- if (bracket)
- *bracket = '(';
-
- return net;
-}
-
-static int
-lnet_splitnets(char *source, struct list_head *nets)
-{
- int offset = 0;
- int offset2;
- int len;
- struct lnet_text_buf *tb;
- struct lnet_text_buf *tb2;
- struct list_head *t;
- char *sep;
- char *bracket;
- __u32 net;
-
- LASSERT(!list_empty(nets));
- LASSERT(nets->next == nets->prev); /* single entry */
-
- tb = list_entry(nets->next, struct lnet_text_buf, ltb_list);
-
- for (;;) {
- sep = strchr(tb->ltb_text, ',');
- bracket = strchr(tb->ltb_text, '(');
-
- if (sep && bracket && bracket < sep) {
- /* netspec lists interfaces... */
-
- offset2 = offset + (int)(bracket - tb->ltb_text);
- len = strlen(bracket);
-
- bracket = strchr(bracket + 1, ')');
-
- if (!bracket ||
- !(bracket[1] == ',' || !bracket[1])) {
- lnet_syntax("ip2nets", source, offset2, len);
- return -EINVAL;
- }
-
- sep = !bracket[1] ? NULL : bracket + 1;
- }
-
- if (sep)
- *sep++ = 0;
-
- net = lnet_netspec2net(tb->ltb_text);
- if (net == LNET_NIDNET(LNET_NID_ANY)) {
- lnet_syntax("ip2nets", source, offset,
- strlen(tb->ltb_text));
- return -EINVAL;
- }
-
- list_for_each(t, nets) {
- tb2 = list_entry(t, struct lnet_text_buf, ltb_list);
-
- if (tb2 == tb)
- continue;
-
- if (net == lnet_netspec2net(tb2->ltb_text)) {
- /* duplicate network */
- lnet_syntax("ip2nets", source, offset,
- strlen(tb->ltb_text));
- return -EINVAL;
- }
- }
-
- if (!sep)
- return 0;
-
- offset += (int)(sep - tb->ltb_text);
- len = strlen(sep);
- tb2 = lnet_new_text_buf(len);
- if (!tb2)
- return -ENOMEM;
-
- strncpy(tb2->ltb_text, sep, len);
- tb2->ltb_text[len] = '\0';
- list_add_tail(&tb2->ltb_list, nets);
-
- tb = tb2;
- }
-}
-
-static int
-lnet_match_networks(char **networksp, char *ip2nets, __u32 *ipaddrs, int nip)
-{
- static char networks[LNET_SINGLE_TEXTBUF_NOB];
- static char source[LNET_SINGLE_TEXTBUF_NOB];
-
- struct list_head raw_entries;
- struct list_head matched_nets;
- struct list_head current_nets;
- struct list_head *t;
- struct list_head *t2;
- struct lnet_text_buf *tb;
- struct lnet_text_buf *temp;
- struct lnet_text_buf *tb2;
- __u32 net1;
- __u32 net2;
- int len;
- int count;
- int dup;
- int rc;
-
- INIT_LIST_HEAD(&raw_entries);
- if (lnet_str2tbs_sep(&raw_entries, ip2nets) < 0) {
- CERROR("Error parsing ip2nets\n");
- LASSERT(!lnet_tbnob);
- return -EINVAL;
- }
-
- INIT_LIST_HEAD(&matched_nets);
- INIT_LIST_HEAD(&current_nets);
- networks[0] = 0;
- count = 0;
- len = 0;
- rc = 0;
-
- list_for_each_entry_safe(tb, temp, &raw_entries, ltb_list) {
- strncpy(source, tb->ltb_text, sizeof(source));
- source[sizeof(source) - 1] = '\0';
-
- /* replace ltb_text with the network(s) add on match */
- rc = lnet_match_network_tokens(tb->ltb_text, ipaddrs, nip);
- if (rc < 0)
- break;
-
- list_del(&tb->ltb_list);
-
- if (!rc) { /* no match */
- lnet_free_text_buf(tb);
- continue;
- }
-
- /* split into separate networks */
- INIT_LIST_HEAD(&current_nets);
- list_add(&tb->ltb_list, &current_nets);
- rc = lnet_splitnets(source, &current_nets);
- if (rc < 0)
- break;
-
- dup = 0;
- list_for_each(t, &current_nets) {
- tb = list_entry(t, struct lnet_text_buf, ltb_list);
- net1 = lnet_netspec2net(tb->ltb_text);
- LASSERT(net1 != LNET_NIDNET(LNET_NID_ANY));
-
- list_for_each(t2, &matched_nets) {
- tb2 = list_entry(t2, struct lnet_text_buf,
- ltb_list);
- net2 = lnet_netspec2net(tb2->ltb_text);
- LASSERT(net2 != LNET_NIDNET(LNET_NID_ANY));
-
- if (net1 == net2) {
- dup = 1;
- break;
- }
- }
-
- if (dup)
- break;
- }
-
- if (dup) {
- lnet_free_text_bufs(&current_nets);
- continue;
- }
-
- list_for_each_safe(t, t2, &current_nets) {
- tb = list_entry(t, struct lnet_text_buf, ltb_list);
-
- list_del(&tb->ltb_list);
- list_add_tail(&tb->ltb_list, &matched_nets);
-
- len += snprintf(networks + len, sizeof(networks) - len,
- "%s%s", !len ? "" : ",",
- tb->ltb_text);
-
- if (len >= sizeof(networks)) {
- CERROR("Too many matched networks\n");
- rc = -E2BIG;
- goto out;
- }
- }
-
- count++;
- }
-
- out:
- lnet_free_text_bufs(&raw_entries);
- lnet_free_text_bufs(&matched_nets);
- lnet_free_text_bufs(&current_nets);
- LASSERT(!lnet_tbnob);
-
- if (rc < 0)
- return rc;
-
- *networksp = networks;
- return count;
-}
-
-static int
-lnet_ipaddr_enumerate(__u32 **ipaddrsp)
-{
- int up;
- __u32 netmask;
- __u32 *ipaddrs;
- __u32 *ipaddrs2;
- int nip;
- char **ifnames;
- int nif = lnet_ipif_enumerate(&ifnames);
- int i;
- int rc;
-
- if (nif <= 0)
- return nif;
-
- ipaddrs = kcalloc(nif, sizeof(*ipaddrs), GFP_KERNEL);
- if (!ipaddrs) {
- CERROR("Can't allocate ipaddrs[%d]\n", nif);
- lnet_ipif_free_enumeration(ifnames, nif);
- return -ENOMEM;
- }
-
- for (i = nip = 0; i < nif; i++) {
- if (!strcmp(ifnames[i], "lo"))
- continue;
-
- rc = lnet_ipif_query(ifnames[i], &up, &ipaddrs[nip], &netmask);
- if (rc) {
- CWARN("Can't query interface %s: %d\n",
- ifnames[i], rc);
- continue;
- }
-
- if (!up) {
- CWARN("Ignoring interface %s: it's down\n",
- ifnames[i]);
- continue;
- }
-
- nip++;
- }
-
- lnet_ipif_free_enumeration(ifnames, nif);
-
- if (nip == nif) {
- *ipaddrsp = ipaddrs;
- } else {
- if (nip > 0) {
- ipaddrs2 = kcalloc(nip, sizeof(*ipaddrs2),
- GFP_KERNEL);
- if (!ipaddrs2) {
- CERROR("Can't allocate ipaddrs[%d]\n", nip);
- nip = -ENOMEM;
- } else {
- memcpy(ipaddrs2, ipaddrs,
- nip * sizeof(*ipaddrs));
- *ipaddrsp = ipaddrs2;
- rc = nip;
- }
- }
- kfree(ipaddrs);
- }
- return nip;
-}
-
-int
-lnet_parse_ip2nets(char **networksp, char *ip2nets)
-{
- __u32 *ipaddrs = NULL;
- int nip = lnet_ipaddr_enumerate(&ipaddrs);
- int rc;
-
- if (nip < 0) {
- LCONSOLE_ERROR_MSG(0x117,
- "Error %d enumerating local IP interfaces for ip2nets to match\n",
- nip);
- return nip;
- }
-
- if (!nip) {
- LCONSOLE_ERROR_MSG(0x118,
- "No local IP interfaces for ip2nets to match\n");
- return -ENOENT;
- }
-
- rc = lnet_match_networks(networksp, ip2nets, ipaddrs, nip);
- kfree(ipaddrs);
-
- if (rc < 0) {
- LCONSOLE_ERROR_MSG(0x119, "Error %d parsing ip2nets\n", rc);
- return rc;
- }
-
- if (!rc) {
- LCONSOLE_ERROR_MSG(0x11a,
- "ip2nets does not match any local IP interfaces\n");
- return -ENOENT;
- }
-
- return 0;
-}
diff --git a/drivers/staging/lustre/lnet/lnet/lib-eq.c b/drivers/staging/lustre/lnet/lnet/lib-eq.c
deleted file mode 100644
index c78e70373ab4..000000000000
--- a/drivers/staging/lustre/lnet/lnet/lib-eq.c
+++ /dev/null
@@ -1,426 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-eq.c
- *
- * Library level Event queue management routines
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-/**
- * Create an event queue that has room for \a count number of events.
- *
- * The event queue is circular and older events will be overwritten by new
- * ones if they are not removed in time by the user using the functions
- * LNetEQGet(), LNetEQWait(), or LNetEQPoll(). It is up to the user to
- * determine the appropriate size of the event queue to prevent this loss
- * of events. Note that when EQ handler is specified in \a callback, no
- * event loss can happen, since the handler is run for each event deposited
- * into the EQ.
- *
- * \param count The number of events to be stored in the event queue. It
- * will be rounded up to the next power of two.
- * \param callback A handler function that runs when an event is deposited
- * into the EQ. The constant value LNET_EQ_HANDLER_NONE can be used to
- * indicate that no event handler is desired.
- * \param handle On successful return, this location will hold a handle for
- * the newly created EQ.
- *
- * \retval 0 On success.
- * \retval -EINVAL If an parameter is not valid.
- * \retval -ENOMEM If memory for the EQ can't be allocated.
- *
- * \see lnet_eq_handler_t for the discussion on EQ handler semantics.
- */
-int
-LNetEQAlloc(unsigned int count, lnet_eq_handler_t callback,
- struct lnet_handle_eq *handle)
-{
- struct lnet_eq *eq;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- /*
- * We need count to be a power of 2 so that when eq_{enq,deq}_seq
- * overflow, they don't skip entries, so the queue has the same
- * apparent capacity at all times
- */
- if (count)
- count = roundup_pow_of_two(count);
-
- if (callback != LNET_EQ_HANDLER_NONE && count)
- CWARN("EQ callback is guaranteed to get every event, do you still want to set eqcount %d for polling event which will have locking overhead? Please contact with developer to confirm\n", count);
-
- /*
- * count can be 0 if only need callback, we can eliminate
- * overhead of enqueue event
- */
- if (!count && callback == LNET_EQ_HANDLER_NONE)
- return -EINVAL;
-
- eq = kzalloc(sizeof(*eq), GFP_NOFS);
- if (!eq)
- return -ENOMEM;
-
- if (count) {
- eq->eq_events = kvmalloc_array(count, sizeof(struct lnet_event),
- GFP_KERNEL | __GFP_ZERO);
- if (!eq->eq_events)
- goto failed;
- /*
- * NB allocator has set all event sequence numbers to 0,
- * so all them should be earlier than eq_deq_seq
- */
- }
-
- eq->eq_deq_seq = 1;
- eq->eq_enq_seq = 1;
- eq->eq_size = count;
- eq->eq_callback = callback;
-
- eq->eq_refs = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(*eq->eq_refs[0]));
- if (!eq->eq_refs)
- goto failed;
-
- /* MUST hold both exclusive lnet_res_lock */
- lnet_res_lock(LNET_LOCK_EX);
- /*
- * NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do
- * both EQ lookup and poll event with only lnet_eq_wait_lock
- */
- lnet_eq_wait_lock();
-
- lnet_res_lh_initialize(&the_lnet.ln_eq_container, &eq->eq_lh);
- list_add(&eq->eq_list, &the_lnet.ln_eq_container.rec_active);
-
- lnet_eq_wait_unlock();
- lnet_res_unlock(LNET_LOCK_EX);
-
- lnet_eq2handle(handle, eq);
- return 0;
-
-failed:
- kvfree(eq->eq_events);
-
- if (eq->eq_refs)
- cfs_percpt_free(eq->eq_refs);
-
- kfree(eq);
- return -ENOMEM;
-}
-EXPORT_SYMBOL(LNetEQAlloc);
-
-/**
- * Release the resources associated with an event queue if it's idle;
- * otherwise do nothing and it's up to the user to try again.
- *
- * \param eqh A handle for the event queue to be released.
- *
- * \retval 0 If the EQ is not in use and freed.
- * \retval -ENOENT If \a eqh does not point to a valid EQ.
- * \retval -EBUSY If the EQ is still in use by some MDs.
- */
-int
-LNetEQFree(struct lnet_handle_eq eqh)
-{
- struct lnet_eq *eq;
- struct lnet_event *events = NULL;
- int **refs = NULL;
- int *ref;
- int rc = 0;
- int size = 0;
- int i;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- lnet_res_lock(LNET_LOCK_EX);
- /*
- * NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do
- * both EQ lookup and poll event with only lnet_eq_wait_lock
- */
- lnet_eq_wait_lock();
-
- eq = lnet_handle2eq(&eqh);
- if (!eq) {
- rc = -ENOENT;
- goto out;
- }
-
- cfs_percpt_for_each(ref, i, eq->eq_refs) {
- LASSERT(*ref >= 0);
- if (!*ref)
- continue;
-
- CDEBUG(D_NET, "Event equeue (%d: %d) busy on destroy.\n",
- i, *ref);
- rc = -EBUSY;
- goto out;
- }
-
- /* stash for free after lock dropped */
- events = eq->eq_events;
- size = eq->eq_size;
- refs = eq->eq_refs;
-
- lnet_res_lh_invalidate(&eq->eq_lh);
- list_del(&eq->eq_list);
- kfree(eq);
- out:
- lnet_eq_wait_unlock();
- lnet_res_unlock(LNET_LOCK_EX);
-
- kvfree(events);
- if (refs)
- cfs_percpt_free(refs);
-
- return rc;
-}
-EXPORT_SYMBOL(LNetEQFree);
-
-void
-lnet_eq_enqueue_event(struct lnet_eq *eq, struct lnet_event *ev)
-{
- /* MUST called with resource lock hold but w/o lnet_eq_wait_lock */
- int index;
-
- if (!eq->eq_size) {
- LASSERT(eq->eq_callback != LNET_EQ_HANDLER_NONE);
- eq->eq_callback(ev);
- return;
- }
-
- lnet_eq_wait_lock();
- ev->sequence = eq->eq_enq_seq++;
-
- LASSERT(is_power_of_2(eq->eq_size));
- index = ev->sequence & (eq->eq_size - 1);
-
- eq->eq_events[index] = *ev;
-
- if (eq->eq_callback != LNET_EQ_HANDLER_NONE)
- eq->eq_callback(ev);
-
- /* Wake anyone waiting in LNetEQPoll() */
- if (waitqueue_active(&the_lnet.ln_eq_waitq))
- wake_up_all(&the_lnet.ln_eq_waitq);
- lnet_eq_wait_unlock();
-}
-
-static int
-lnet_eq_dequeue_event(struct lnet_eq *eq, struct lnet_event *ev)
-{
- int new_index = eq->eq_deq_seq & (eq->eq_size - 1);
- struct lnet_event *new_event = &eq->eq_events[new_index];
- int rc;
-
- /* must called with lnet_eq_wait_lock hold */
- if (LNET_SEQ_GT(eq->eq_deq_seq, new_event->sequence))
- return 0;
-
- /* We've got a new event... */
- *ev = *new_event;
-
- CDEBUG(D_INFO, "event: %p, sequence: %lu, eq->size: %u\n",
- new_event, eq->eq_deq_seq, eq->eq_size);
-
- /* ...but did it overwrite an event we've not seen yet? */
- if (eq->eq_deq_seq == new_event->sequence) {
- rc = 1;
- } else {
- /*
- * don't complain with CERROR: some EQs are sized small
- * anyway; if it's important, the caller should complain
- */
- CDEBUG(D_NET, "Event Queue Overflow: eq seq %lu ev seq %lu\n",
- eq->eq_deq_seq, new_event->sequence);
- rc = -EOVERFLOW;
- }
-
- eq->eq_deq_seq = new_event->sequence + 1;
- return rc;
-}
-
-/**
- * A nonblocking function that can be used to get the next event in an EQ.
- * If an event handler is associated with the EQ, the handler will run before
- * this function returns successfully. The event is removed from the queue.
- *
- * \param eventq A handle for the event queue.
- * \param event On successful return (1 or -EOVERFLOW), this location will
- * hold the next event in the EQ.
- *
- * \retval 0 No pending event in the EQ.
- * \retval 1 Indicates success.
- * \retval -ENOENT If \a eventq does not point to a valid EQ.
- * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
- * at least one event between this event and the last event obtained from the
- * EQ has been dropped due to limited space in the EQ.
- */
-
-/**
- * Block the calling process until there is an event in the EQ.
- * If an event handler is associated with the EQ, the handler will run before
- * this function returns successfully. This function returns the next event
- * in the EQ and removes it from the EQ.
- *
- * \param eventq A handle for the event queue.
- * \param event On successful return (1 or -EOVERFLOW), this location will
- * hold the next event in the EQ.
- *
- * \retval 1 Indicates success.
- * \retval -ENOENT If \a eventq does not point to a valid EQ.
- * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
- * at least one event between this event and the last event obtained from the
- * EQ has been dropped due to limited space in the EQ.
- */
-
-static int
-lnet_eq_wait_locked(int *timeout_ms, long state)
-__must_hold(&the_lnet.ln_eq_wait_lock)
-{
- int tms = *timeout_ms;
- int wait;
- wait_queue_entry_t wl;
- unsigned long now;
-
- if (!tms)
- return -ENXIO; /* don't want to wait and no new event */
-
- init_waitqueue_entry(&wl, current);
- set_current_state(state);
- add_wait_queue(&the_lnet.ln_eq_waitq, &wl);
-
- lnet_eq_wait_unlock();
-
- if (tms < 0) {
- schedule();
- } else {
- now = jiffies;
- schedule_timeout(msecs_to_jiffies(tms));
- tms -= jiffies_to_msecs(jiffies - now);
- if (tms < 0) /* no more wait but may have new event */
- tms = 0;
- }
-
- wait = tms; /* might need to call here again */
- *timeout_ms = tms;
-
- lnet_eq_wait_lock();
- remove_wait_queue(&the_lnet.ln_eq_waitq, &wl);
-
- return wait;
-}
-
-/**
- * Block the calling process until there's an event from a set of EQs or
- * timeout happens.
- *
- * If an event handler is associated with the EQ, the handler will run before
- * this function returns successfully, in which case the corresponding event
- * is consumed.
- *
- * LNetEQPoll() provides a timeout to allow applications to poll, block for a
- * fixed period, or block indefinitely.
- *
- * \param eventqs,neq An array of EQ handles, and size of the array.
- * \param timeout_ms Time in milliseconds to wait for an event to occur on
- * one of the EQs. The constant LNET_TIME_FOREVER can be used to indicate an
- * infinite timeout.
- * \param interruptible, if true, use TASK_INTERRUPTIBLE, else TASK_NOLOAD
- * \param event,which On successful return (1 or -EOVERFLOW), \a event will
- * hold the next event in the EQs, and \a which will contain the index of the
- * EQ from which the event was taken.
- *
- * \retval 0 No pending event in the EQs after timeout.
- * \retval 1 Indicates success.
- * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
- * at least one event between this event and the last event obtained from the
- * EQ indicated by \a which has been dropped due to limited space in the EQ.
- * \retval -ENOENT If there's an invalid handle in \a eventqs.
- */
-int
-LNetEQPoll(struct lnet_handle_eq *eventqs, int neq, int timeout_ms,
- int interruptible,
- struct lnet_event *event, int *which)
-{
- int wait = 1;
- int rc;
- int i;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if (neq < 1)
- return -ENOENT;
-
- lnet_eq_wait_lock();
-
- for (;;) {
- for (i = 0; i < neq; i++) {
- struct lnet_eq *eq = lnet_handle2eq(&eventqs[i]);
-
- if (!eq) {
- lnet_eq_wait_unlock();
- return -ENOENT;
- }
-
- rc = lnet_eq_dequeue_event(eq, event);
- if (rc) {
- lnet_eq_wait_unlock();
- *which = i;
- return rc;
- }
- }
-
- if (!wait)
- break;
-
- /*
- * return value of lnet_eq_wait_locked:
- * -1 : did nothing and it's sure no new event
- * 1 : sleep inside and wait until new event
- * 0 : don't want to wait anymore, but might have new event
- * so need to call dequeue again
- */
- wait = lnet_eq_wait_locked(&timeout_ms,
- interruptible ? TASK_INTERRUPTIBLE
- : TASK_NOLOAD);
- if (wait < 0) /* no new event */
- break;
- }
-
- lnet_eq_wait_unlock();
- return 0;
-}
diff --git a/drivers/staging/lustre/lnet/lnet/lib-md.c b/drivers/staging/lustre/lnet/lnet/lib-md.c
deleted file mode 100644
index 8a22514aaf71..000000000000
--- a/drivers/staging/lustre/lnet/lnet/lib-md.c
+++ /dev/null
@@ -1,463 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-md.c
- *
- * Memory Descriptor management routines
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-/* must be called with lnet_res_lock held */
-void
-lnet_md_unlink(struct lnet_libmd *md)
-{
- if (!(md->md_flags & LNET_MD_FLAG_ZOMBIE)) {
- /* first unlink attempt... */
- struct lnet_me *me = md->md_me;
-
- md->md_flags |= LNET_MD_FLAG_ZOMBIE;
-
- /*
- * Disassociate from ME (if any),
- * and unlink it if it was created
- * with LNET_UNLINK
- */
- if (me) {
- /* detach MD from portal */
- lnet_ptl_detach_md(me, md);
- if (me->me_unlink == LNET_UNLINK)
- lnet_me_unlink(me);
- }
-
- /* ensure all future handle lookups fail */
- lnet_res_lh_invalidate(&md->md_lh);
- }
-
- if (md->md_refcount) {
- CDEBUG(D_NET, "Queueing unlink of md %p\n", md);
- return;
- }
-
- CDEBUG(D_NET, "Unlinking md %p\n", md);
-
- if (md->md_eq) {
- int cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie);
-
- LASSERT(*md->md_eq->eq_refs[cpt] > 0);
- (*md->md_eq->eq_refs[cpt])--;
- }
-
- LASSERT(!list_empty(&md->md_list));
- list_del_init(&md->md_list);
- kfree(md);
-}
-
-static int
-lnet_md_build(struct lnet_libmd *lmd, struct lnet_md *umd, int unlink)
-{
- int i;
- unsigned int niov;
- int total_length = 0;
-
- lmd->md_me = NULL;
- lmd->md_start = umd->start;
- lmd->md_offset = 0;
- lmd->md_max_size = umd->max_size;
- lmd->md_options = umd->options;
- lmd->md_user_ptr = umd->user_ptr;
- lmd->md_eq = NULL;
- lmd->md_threshold = umd->threshold;
- lmd->md_refcount = 0;
- lmd->md_flags = (unlink == LNET_UNLINK) ? LNET_MD_FLAG_AUTO_UNLINK : 0;
-
- if (umd->options & LNET_MD_IOVEC) {
- if (umd->options & LNET_MD_KIOV) /* Can't specify both */
- return -EINVAL;
-
- niov = umd->length;
- lmd->md_niov = umd->length;
- memcpy(lmd->md_iov.iov, umd->start,
- niov * sizeof(lmd->md_iov.iov[0]));
-
- for (i = 0; i < (int)niov; i++) {
- /* We take the base address on trust */
- /* invalid length */
- if (lmd->md_iov.iov[i].iov_len <= 0)
- return -EINVAL;
-
- total_length += lmd->md_iov.iov[i].iov_len;
- }
-
- lmd->md_length = total_length;
-
- if ((umd->options & LNET_MD_MAX_SIZE) && /* use max size */
- (umd->max_size < 0 ||
- umd->max_size > total_length)) /* illegal max_size */
- return -EINVAL;
-
- } else if (umd->options & LNET_MD_KIOV) {
- niov = umd->length;
- lmd->md_niov = umd->length;
- memcpy(lmd->md_iov.kiov, umd->start,
- niov * sizeof(lmd->md_iov.kiov[0]));
-
- for (i = 0; i < (int)niov; i++) {
- /* We take the page pointer on trust */
- if (lmd->md_iov.kiov[i].bv_offset +
- lmd->md_iov.kiov[i].bv_len > PAGE_SIZE)
- return -EINVAL; /* invalid length */
-
- total_length += lmd->md_iov.kiov[i].bv_len;
- }
-
- lmd->md_length = total_length;
-
- if ((umd->options & LNET_MD_MAX_SIZE) && /* max size used */
- (umd->max_size < 0 ||
- umd->max_size > total_length)) /* illegal max_size */
- return -EINVAL;
- } else { /* contiguous */
- lmd->md_length = umd->length;
- niov = 1;
- lmd->md_niov = 1;
- lmd->md_iov.iov[0].iov_base = umd->start;
- lmd->md_iov.iov[0].iov_len = umd->length;
-
- if ((umd->options & LNET_MD_MAX_SIZE) && /* max size used */
- (umd->max_size < 0 ||
- umd->max_size > (int)umd->length)) /* illegal max_size */
- return -EINVAL;
- }
-
- return 0;
-}
-
-/* must be called with resource lock held */
-static int
-lnet_md_link(struct lnet_libmd *md, struct lnet_handle_eq eq_handle, int cpt)
-{
- struct lnet_res_container *container = the_lnet.ln_md_containers[cpt];
-
- /*
- * NB we are passed an allocated, but inactive md.
- * if we return success, caller may lnet_md_unlink() it.
- * otherwise caller may only kfree() it.
- */
- /*
- * This implementation doesn't know how to create START events or
- * disable END events. Best to LASSERT our caller is compliant so
- * we find out quickly...
- */
- /*
- * TODO - reevaluate what should be here in light of
- * the removal of the start and end events
- * maybe there we shouldn't even allow LNET_EQ_NONE!)
- * LASSERT(!eq);
- */
- if (!LNetEQHandleIsInvalid(eq_handle)) {
- md->md_eq = lnet_handle2eq(&eq_handle);
-
- if (!md->md_eq)
- return -ENOENT;
-
- (*md->md_eq->eq_refs[cpt])++;
- }
-
- lnet_res_lh_initialize(container, &md->md_lh);
-
- LASSERT(list_empty(&md->md_list));
- list_add(&md->md_list, &container->rec_active);
-
- return 0;
-}
-
-/* must be called with lnet_res_lock held */
-void
-lnet_md_deconstruct(struct lnet_libmd *lmd, struct lnet_md *umd)
-{
- /* NB this doesn't copy out all the iov entries so when a
- * discontiguous MD is copied out, the target gets to know the
- * original iov pointer (in start) and the number of entries it had
- * and that's all.
- */
- umd->start = lmd->md_start;
- umd->length = !(lmd->md_options &
- (LNET_MD_IOVEC | LNET_MD_KIOV)) ?
- lmd->md_length : lmd->md_niov;
- umd->threshold = lmd->md_threshold;
- umd->max_size = lmd->md_max_size;
- umd->options = lmd->md_options;
- umd->user_ptr = lmd->md_user_ptr;
- lnet_eq2handle(&umd->eq_handle, lmd->md_eq);
-}
-
-static int
-lnet_md_validate(struct lnet_md *umd)
-{
- if (!umd->start && umd->length) {
- CERROR("MD start pointer can not be NULL with length %u\n",
- umd->length);
- return -EINVAL;
- }
-
- if ((umd->options & (LNET_MD_KIOV | LNET_MD_IOVEC)) &&
- umd->length > LNET_MAX_IOV) {
- CERROR("Invalid option: too many fragments %u, %d max\n",
- umd->length, LNET_MAX_IOV);
- return -EINVAL;
- }
-
- return 0;
-}
-
-/**
- * Create a memory descriptor and attach it to a ME
- *
- * \param meh A handle for a ME to associate the new MD with.
- * \param umd Provides initial values for the user-visible parts of a MD.
- * Other than its use for initialization, there is no linkage between this
- * structure and the MD maintained by the LNet.
- * \param unlink A flag to indicate whether the MD is automatically unlinked
- * when it becomes inactive, either because the operation threshold drops to
- * zero or because the available memory becomes less than \a umd.max_size.
- * (Note that the check for unlinking a MD only occurs after the completion
- * of a successful operation on the MD.) The value LNET_UNLINK enables auto
- * unlinking; the value LNET_RETAIN disables it.
- * \param handle On successful returns, a handle to the newly created MD is
- * saved here. This handle can be used later in LNetMDUnlink().
- *
- * \retval 0 On success.
- * \retval -EINVAL If \a umd is not valid.
- * \retval -ENOMEM If new MD cannot be allocated.
- * \retval -ENOENT Either \a meh or \a umd.eq_handle does not point to a
- * valid object. Note that it's OK to supply a NULL \a umd.eq_handle by
- * calling LNetInvalidateHandle() on it.
- * \retval -EBUSY If the ME pointed to by \a meh is already associated with
- * a MD.
- */
-int
-LNetMDAttach(struct lnet_handle_me meh, struct lnet_md umd,
- enum lnet_unlink unlink, struct lnet_handle_md *handle)
-{
- LIST_HEAD(matches);
- LIST_HEAD(drops);
- struct lnet_me *me;
- struct lnet_libmd *md;
- int cpt;
- int rc;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if (lnet_md_validate(&umd))
- return -EINVAL;
-
- if (!(umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT))) {
- CERROR("Invalid option: no MD_OP set\n");
- return -EINVAL;
- }
-
- md = lnet_md_alloc(&umd);
- if (!md)
- return -ENOMEM;
-
- rc = lnet_md_build(md, &umd, unlink);
- if (rc)
- goto out_free;
-
- cpt = lnet_cpt_of_cookie(meh.cookie);
-
- lnet_res_lock(cpt);
-
- me = lnet_handle2me(&meh);
- if (!me)
- rc = -ENOENT;
- else if (me->me_md)
- rc = -EBUSY;
- else
- rc = lnet_md_link(md, umd.eq_handle, cpt);
-
- if (rc)
- goto out_unlock;
-
- /*
- * attach this MD to portal of ME and check if it matches any
- * blocked msgs on this portal
- */
- lnet_ptl_attach_md(me, md, &matches, &drops);
-
- lnet_md2handle(handle, md);
-
- lnet_res_unlock(cpt);
-
- lnet_drop_delayed_msg_list(&drops, "Bad match");
- lnet_recv_delayed_msg_list(&matches);
-
- return 0;
-
-out_unlock:
- lnet_res_unlock(cpt);
-out_free:
- kfree(md);
- return rc;
-}
-EXPORT_SYMBOL(LNetMDAttach);
-
-/**
- * Create a "free floating" memory descriptor - a MD that is not associated
- * with a ME. Such MDs are usually used in LNetPut() and LNetGet() operations.
- *
- * \param umd,unlink See the discussion for LNetMDAttach().
- * \param handle On successful returns, a handle to the newly created MD is
- * saved here. This handle can be used later in LNetMDUnlink(), LNetPut(),
- * and LNetGet() operations.
- *
- * \retval 0 On success.
- * \retval -EINVAL If \a umd is not valid.
- * \retval -ENOMEM If new MD cannot be allocated.
- * \retval -ENOENT \a umd.eq_handle does not point to a valid EQ. Note that
- * it's OK to supply a NULL \a umd.eq_handle by calling
- * LNetInvalidateHandle() on it.
- */
-int
-LNetMDBind(struct lnet_md umd, enum lnet_unlink unlink,
- struct lnet_handle_md *handle)
-{
- struct lnet_libmd *md;
- int cpt;
- int rc;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if (lnet_md_validate(&umd))
- return -EINVAL;
-
- if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT))) {
- CERROR("Invalid option: GET|PUT illegal on active MDs\n");
- return -EINVAL;
- }
-
- md = lnet_md_alloc(&umd);
- if (!md)
- return -ENOMEM;
-
- rc = lnet_md_build(md, &umd, unlink);
- if (rc)
- goto out_free;
-
- cpt = lnet_res_lock_current();
-
- rc = lnet_md_link(md, umd.eq_handle, cpt);
- if (rc)
- goto out_unlock;
-
- lnet_md2handle(handle, md);
-
- lnet_res_unlock(cpt);
- return 0;
-
-out_unlock:
- lnet_res_unlock(cpt);
-out_free:
- kfree(md);
-
- return rc;
-}
-EXPORT_SYMBOL(LNetMDBind);
-
-/**
- * Unlink the memory descriptor from any ME it may be linked to and release
- * the internal resources associated with it. As a result, active messages
- * associated with the MD may get aborted.
- *
- * This function does not free the memory region associated with the MD;
- * i.e., the memory the user allocated for this MD. If the ME associated with
- * this MD is not NULL and was created with auto unlink enabled, the ME is
- * unlinked as well (see LNetMEAttach()).
- *
- * Explicitly unlinking a MD via this function call has the same behavior as
- * a MD that has been automatically unlinked, except that no LNET_EVENT_UNLINK
- * is generated in the latter case.
- *
- * An unlinked event can be reported in two ways:
- * - If there's no pending operations on the MD, it's unlinked immediately
- * and an LNET_EVENT_UNLINK event is logged before this function returns.
- * - Otherwise, the MD is only marked for deletion when this function
- * returns, and the unlinked event will be piggybacked on the event of
- * the completion of the last operation by setting the unlinked field of
- * the event. No dedicated LNET_EVENT_UNLINK event is generated.
- *
- * Note that in both cases the unlinked field of the event is always set; no
- * more event will happen on the MD after such an event is logged.
- *
- * \param mdh A handle for the MD to be unlinked.
- *
- * \retval 0 On success.
- * \retval -ENOENT If \a mdh does not point to a valid MD object.
- */
-int
-LNetMDUnlink(struct lnet_handle_md mdh)
-{
- struct lnet_event ev;
- struct lnet_libmd *md;
- int cpt;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- cpt = lnet_cpt_of_cookie(mdh.cookie);
- lnet_res_lock(cpt);
-
- md = lnet_handle2md(&mdh);
- if (!md) {
- lnet_res_unlock(cpt);
- return -ENOENT;
- }
-
- md->md_flags |= LNET_MD_FLAG_ABORTED;
- /*
- * If the MD is busy, lnet_md_unlink just marks it for deletion, and
- * when the LND is done, the completion event flags that the MD was
- * unlinked. Otherwise, we enqueue an event now...
- */
- if (md->md_eq && !md->md_refcount) {
- lnet_build_unlink_event(md, &ev);
- lnet_eq_enqueue_event(md->md_eq, &ev);
- }
-
- lnet_md_unlink(md);
-
- lnet_res_unlock(cpt);
- return 0;
-}
-EXPORT_SYMBOL(LNetMDUnlink);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-me.c b/drivers/staging/lustre/lnet/lnet/lib-me.c
deleted file mode 100644
index 672e37bdd045..000000000000
--- a/drivers/staging/lustre/lnet/lnet/lib-me.c
+++ /dev/null
@@ -1,274 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-me.c
- *
- * Match Entry management routines
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-/**
- * Create and attach a match entry to the match list of \a portal. The new
- * ME is empty, i.e. not associated with a memory descriptor. LNetMDAttach()
- * can be used to attach a MD to an empty ME.
- *
- * \param portal The portal table index where the ME should be attached.
- * \param match_id Specifies the match criteria for the process ID of
- * the requester. The constants LNET_PID_ANY and LNET_NID_ANY can be
- * used to wildcard either of the identifiers in the lnet_process_id
- * structure.
- * \param match_bits,ignore_bits Specify the match criteria to apply
- * to the match bits in the incoming request. The ignore bits are used
- * to mask out insignificant bits in the incoming match bits. The resulting
- * bits are then compared to the ME's match bits to determine if the
- * incoming request meets the match criteria.
- * \param unlink Indicates whether the ME should be unlinked when the memory
- * descriptor associated with it is unlinked (Note that the check for
- * unlinking a ME only occurs when the memory descriptor is unlinked.).
- * Valid values are LNET_RETAIN and LNET_UNLINK.
- * \param pos Indicates whether the new ME should be prepended or
- * appended to the match list. Allowed constants: LNET_INS_BEFORE,
- * LNET_INS_AFTER.
- * \param handle On successful returns, a handle to the newly created ME
- * object is saved here. This handle can be used later in LNetMEInsert(),
- * LNetMEUnlink(), or LNetMDAttach() functions.
- *
- * \retval 0 On success.
- * \retval -EINVAL If \a portal is invalid.
- * \retval -ENOMEM If new ME object cannot be allocated.
- */
-int
-LNetMEAttach(unsigned int portal,
- struct lnet_process_id match_id,
- __u64 match_bits, __u64 ignore_bits,
- enum lnet_unlink unlink, enum lnet_ins_pos pos,
- struct lnet_handle_me *handle)
-{
- struct lnet_match_table *mtable;
- struct lnet_me *me;
- struct list_head *head;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if ((int)portal >= the_lnet.ln_nportals)
- return -EINVAL;
-
- mtable = lnet_mt_of_attach(portal, match_id,
- match_bits, ignore_bits, pos);
- if (!mtable) /* can't match portal type */
- return -EPERM;
-
- me = kzalloc(sizeof(*me), GFP_NOFS);
- if (!me)
- return -ENOMEM;
-
- lnet_res_lock(mtable->mt_cpt);
-
- me->me_portal = portal;
- me->me_match_id = match_id;
- me->me_match_bits = match_bits;
- me->me_ignore_bits = ignore_bits;
- me->me_unlink = unlink;
- me->me_md = NULL;
-
- lnet_res_lh_initialize(the_lnet.ln_me_containers[mtable->mt_cpt],
- &me->me_lh);
- if (ignore_bits)
- head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE];
- else
- head = lnet_mt_match_head(mtable, match_id, match_bits);
-
- me->me_pos = head - &mtable->mt_mhash[0];
- if (pos == LNET_INS_AFTER || pos == LNET_INS_LOCAL)
- list_add_tail(&me->me_list, head);
- else
- list_add(&me->me_list, head);
-
- lnet_me2handle(handle, me);
-
- lnet_res_unlock(mtable->mt_cpt);
- return 0;
-}
-EXPORT_SYMBOL(LNetMEAttach);
-
-/**
- * Create and a match entry and insert it before or after the ME pointed to by
- * \a current_meh. The new ME is empty, i.e. not associated with a memory
- * descriptor. LNetMDAttach() can be used to attach a MD to an empty ME.
- *
- * This function is identical to LNetMEAttach() except for the position
- * where the new ME is inserted.
- *
- * \param current_meh A handle for a ME. The new ME will be inserted
- * immediately before or immediately after this ME.
- * \param match_id,match_bits,ignore_bits,unlink,pos,handle See the discussion
- * for LNetMEAttach().
- *
- * \retval 0 On success.
- * \retval -ENOMEM If new ME object cannot be allocated.
- * \retval -ENOENT If \a current_meh does not point to a valid match entry.
- */
-int
-LNetMEInsert(struct lnet_handle_me current_meh,
- struct lnet_process_id match_id,
- __u64 match_bits, __u64 ignore_bits,
- enum lnet_unlink unlink, enum lnet_ins_pos pos,
- struct lnet_handle_me *handle)
-{
- struct lnet_me *current_me;
- struct lnet_me *new_me;
- struct lnet_portal *ptl;
- int cpt;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if (pos == LNET_INS_LOCAL)
- return -EPERM;
-
- new_me = kzalloc(sizeof(*new_me), GFP_NOFS);
- if (!new_me)
- return -ENOMEM;
-
- cpt = lnet_cpt_of_cookie(current_meh.cookie);
-
- lnet_res_lock(cpt);
-
- current_me = lnet_handle2me(&current_meh);
- if (!current_me) {
- kfree(new_me);
-
- lnet_res_unlock(cpt);
- return -ENOENT;
- }
-
- LASSERT(current_me->me_portal < the_lnet.ln_nportals);
-
- ptl = the_lnet.ln_portals[current_me->me_portal];
- if (lnet_ptl_is_unique(ptl)) {
- /* nosense to insertion on unique portal */
- kfree(new_me);
- lnet_res_unlock(cpt);
- return -EPERM;
- }
-
- new_me->me_pos = current_me->me_pos;
- new_me->me_portal = current_me->me_portal;
- new_me->me_match_id = match_id;
- new_me->me_match_bits = match_bits;
- new_me->me_ignore_bits = ignore_bits;
- new_me->me_unlink = unlink;
- new_me->me_md = NULL;
-
- lnet_res_lh_initialize(the_lnet.ln_me_containers[cpt], &new_me->me_lh);
-
- if (pos == LNET_INS_AFTER)
- list_add(&new_me->me_list, &current_me->me_list);
- else
- list_add_tail(&new_me->me_list, &current_me->me_list);
-
- lnet_me2handle(handle, new_me);
-
- lnet_res_unlock(cpt);
-
- return 0;
-}
-EXPORT_SYMBOL(LNetMEInsert);
-
-/**
- * Unlink a match entry from its match list.
- *
- * This operation also releases any resources associated with the ME. If a
- * memory descriptor is attached to the ME, then it will be unlinked as well
- * and an unlink event will be generated. It is an error to use the ME handle
- * after calling LNetMEUnlink().
- *
- * \param meh A handle for the ME to be unlinked.
- *
- * \retval 0 On success.
- * \retval -ENOENT If \a meh does not point to a valid ME.
- * \see LNetMDUnlink() for the discussion on delivering unlink event.
- */
-int
-LNetMEUnlink(struct lnet_handle_me meh)
-{
- struct lnet_me *me;
- struct lnet_libmd *md;
- struct lnet_event ev;
- int cpt;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- cpt = lnet_cpt_of_cookie(meh.cookie);
- lnet_res_lock(cpt);
-
- me = lnet_handle2me(&meh);
- if (!me) {
- lnet_res_unlock(cpt);
- return -ENOENT;
- }
-
- md = me->me_md;
- if (md) {
- md->md_flags |= LNET_MD_FLAG_ABORTED;
- if (md->md_eq && !md->md_refcount) {
- lnet_build_unlink_event(md, &ev);
- lnet_eq_enqueue_event(md->md_eq, &ev);
- }
- }
-
- lnet_me_unlink(me);
-
- lnet_res_unlock(cpt);
- return 0;
-}
-EXPORT_SYMBOL(LNetMEUnlink);
-
-/* call with lnet_res_lock please */
-void
-lnet_me_unlink(struct lnet_me *me)
-{
- list_del(&me->me_list);
-
- if (me->me_md) {
- struct lnet_libmd *md = me->me_md;
-
- /* detach MD from portal of this ME */
- lnet_ptl_detach_md(me, md);
- lnet_md_unlink(md);
- }
-
- lnet_res_lh_invalidate(&me->me_lh);
- kfree(me);
-}
diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c
deleted file mode 100644
index f8eaf8ff8d8d..000000000000
--- a/drivers/staging/lustre/lnet/lnet/lib-move.c
+++ /dev/null
@@ -1,2386 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-move.c
- *
- * Data movement routines
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-#include <linux/nsproxy.h>
-#include <net/net_namespace.h>
-
-static int local_nid_dist_zero = 1;
-module_param(local_nid_dist_zero, int, 0444);
-MODULE_PARM_DESC(local_nid_dist_zero, "Reserved");
-
-int
-lnet_fail_nid(lnet_nid_t nid, unsigned int threshold)
-{
- struct lnet_test_peer *tp;
- struct lnet_test_peer *temp;
- struct list_head *el;
- struct list_head *next;
- struct list_head cull;
-
- /* NB: use lnet_net_lock(0) to serialize operations on test peers */
- if (threshold) {
- /* Adding a new entry */
- tp = kzalloc(sizeof(*tp), GFP_NOFS);
- if (!tp)
- return -ENOMEM;
-
- tp->tp_nid = nid;
- tp->tp_threshold = threshold;
-
- lnet_net_lock(0);
- list_add_tail(&tp->tp_list, &the_lnet.ln_test_peers);
- lnet_net_unlock(0);
- return 0;
- }
-
- /* removing entries */
- INIT_LIST_HEAD(&cull);
-
- lnet_net_lock(0);
-
- list_for_each_safe(el, next, &the_lnet.ln_test_peers) {
- tp = list_entry(el, struct lnet_test_peer, tp_list);
-
- if (!tp->tp_threshold || /* needs culling anyway */
- nid == LNET_NID_ANY || /* removing all entries */
- tp->tp_nid == nid) { /* matched this one */
- list_del(&tp->tp_list);
- list_add(&tp->tp_list, &cull);
- }
- }
-
- lnet_net_unlock(0);
-
- list_for_each_entry_safe(tp, temp, &cull, tp_list) {
- list_del(&tp->tp_list);
- kfree(tp);
- }
- return 0;
-}
-
-static int
-fail_peer(lnet_nid_t nid, int outgoing)
-{
- struct lnet_test_peer *tp;
- struct lnet_test_peer *temp;
- struct list_head *el;
- struct list_head *next;
- struct list_head cull;
- int fail = 0;
-
- INIT_LIST_HEAD(&cull);
-
- /* NB: use lnet_net_lock(0) to serialize operations on test peers */
- lnet_net_lock(0);
-
- list_for_each_safe(el, next, &the_lnet.ln_test_peers) {
- tp = list_entry(el, struct lnet_test_peer, tp_list);
-
- if (!tp->tp_threshold) {
- /* zombie entry */
- if (outgoing) {
- /*
- * only cull zombies on outgoing tests,
- * since we may be at interrupt priority on
- * incoming messages.
- */
- list_del(&tp->tp_list);
- list_add(&tp->tp_list, &cull);
- }
- continue;
- }
-
- if (tp->tp_nid == LNET_NID_ANY || /* fail every peer */
- nid == tp->tp_nid) { /* fail this peer */
- fail = 1;
-
- if (tp->tp_threshold != LNET_MD_THRESH_INF) {
- tp->tp_threshold--;
- if (outgoing &&
- !tp->tp_threshold) {
- /* see above */
- list_del(&tp->tp_list);
- list_add(&tp->tp_list, &cull);
- }
- }
- break;
- }
- }
-
- lnet_net_unlock(0);
-
- list_for_each_entry_safe(tp, temp, &cull, tp_list) {
- list_del(&tp->tp_list);
-
- kfree(tp);
- }
-
- return fail;
-}
-
-unsigned int
-lnet_iov_nob(unsigned int niov, struct kvec *iov)
-{
- unsigned int nob = 0;
-
- LASSERT(!niov || iov);
- while (niov-- > 0)
- nob += (iov++)->iov_len;
-
- return nob;
-}
-EXPORT_SYMBOL(lnet_iov_nob);
-
-void
-lnet_copy_iov2iter(struct iov_iter *to,
- unsigned int nsiov, const struct kvec *siov,
- unsigned int soffset, unsigned int nob)
-{
- /* NB diov, siov are READ-ONLY */
- const char *s;
- size_t left;
-
- if (!nob)
- return;
-
- /* skip complete frags before 'soffset' */
- LASSERT(nsiov > 0);
- while (soffset >= siov->iov_len) {
- soffset -= siov->iov_len;
- siov++;
- nsiov--;
- LASSERT(nsiov > 0);
- }
-
- s = (char *)siov->iov_base + soffset;
- left = siov->iov_len - soffset;
- do {
- size_t n, copy = left;
-
- LASSERT(nsiov > 0);
-
- if (copy > nob)
- copy = nob;
- n = copy_to_iter(s, copy, to);
- if (n != copy)
- return;
- nob -= n;
-
- siov++;
- s = (char *)siov->iov_base;
- left = siov->iov_len;
- nsiov--;
- } while (nob > 0);
-}
-EXPORT_SYMBOL(lnet_copy_iov2iter);
-
-void
-lnet_copy_kiov2iter(struct iov_iter *to,
- unsigned int nsiov, const struct bio_vec *siov,
- unsigned int soffset, unsigned int nob)
-{
- if (!nob)
- return;
-
- LASSERT(!in_interrupt());
-
- LASSERT(nsiov > 0);
- while (soffset >= siov->bv_len) {
- soffset -= siov->bv_len;
- siov++;
- nsiov--;
- LASSERT(nsiov > 0);
- }
-
- do {
- size_t copy = siov->bv_len - soffset, n;
-
- LASSERT(nsiov > 0);
-
- if (copy > nob)
- copy = nob;
- n = copy_page_to_iter(siov->bv_page,
- siov->bv_offset + soffset,
- copy, to);
- if (n != copy)
- return;
- nob -= n;
- siov++;
- nsiov--;
- soffset = 0;
- } while (nob > 0);
-}
-EXPORT_SYMBOL(lnet_copy_kiov2iter);
-
-int
-lnet_extract_iov(int dst_niov, struct kvec *dst,
- int src_niov, const struct kvec *src,
- unsigned int offset, unsigned int len)
-{
- /*
- * Initialise 'dst' to the subset of 'src' starting at 'offset',
- * for exactly 'len' bytes, and return the number of entries.
- * NB not destructive to 'src'
- */
- unsigned int frag_len;
- unsigned int niov;
-
- if (!len) /* no data => */
- return 0; /* no frags */
-
- LASSERT(src_niov > 0);
- while (offset >= src->iov_len) { /* skip initial frags */
- offset -= src->iov_len;
- src_niov--;
- src++;
- LASSERT(src_niov > 0);
- }
-
- niov = 1;
- for (;;) {
- LASSERT(src_niov > 0);
- LASSERT((int)niov <= dst_niov);
-
- frag_len = src->iov_len - offset;
- dst->iov_base = ((char *)src->iov_base) + offset;
-
- if (len <= frag_len) {
- dst->iov_len = len;
- return niov;
- }
-
- dst->iov_len = frag_len;
-
- len -= frag_len;
- dst++;
- src++;
- niov++;
- src_niov--;
- offset = 0;
- }
-}
-EXPORT_SYMBOL(lnet_extract_iov);
-
-unsigned int
-lnet_kiov_nob(unsigned int niov, struct bio_vec *kiov)
-{
- unsigned int nob = 0;
-
- LASSERT(!niov || kiov);
- while (niov-- > 0)
- nob += (kiov++)->bv_len;
-
- return nob;
-}
-EXPORT_SYMBOL(lnet_kiov_nob);
-
-int
-lnet_extract_kiov(int dst_niov, struct bio_vec *dst,
- int src_niov, const struct bio_vec *src,
- unsigned int offset, unsigned int len)
-{
- /*
- * Initialise 'dst' to the subset of 'src' starting at 'offset',
- * for exactly 'len' bytes, and return the number of entries.
- * NB not destructive to 'src'
- */
- unsigned int frag_len;
- unsigned int niov;
-
- if (!len) /* no data => */
- return 0; /* no frags */
-
- LASSERT(src_niov > 0);
- while (offset >= src->bv_len) { /* skip initial frags */
- offset -= src->bv_len;
- src_niov--;
- src++;
- LASSERT(src_niov > 0);
- }
-
- niov = 1;
- for (;;) {
- LASSERT(src_niov > 0);
- LASSERT((int)niov <= dst_niov);
-
- frag_len = src->bv_len - offset;
- dst->bv_page = src->bv_page;
- dst->bv_offset = src->bv_offset + offset;
-
- if (len <= frag_len) {
- dst->bv_len = len;
- LASSERT(dst->bv_offset + dst->bv_len
- <= PAGE_SIZE);
- return niov;
- }
-
- dst->bv_len = frag_len;
- LASSERT(dst->bv_offset + dst->bv_len <= PAGE_SIZE);
-
- len -= frag_len;
- dst++;
- src++;
- niov++;
- src_niov--;
- offset = 0;
- }
-}
-EXPORT_SYMBOL(lnet_extract_kiov);
-
-void
-lnet_ni_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg,
- int delayed, unsigned int offset, unsigned int mlen,
- unsigned int rlen)
-{
- unsigned int niov = 0;
- struct kvec *iov = NULL;
- struct bio_vec *kiov = NULL;
- struct iov_iter to;
- int rc;
-
- LASSERT(!in_interrupt());
- LASSERT(!mlen || msg);
-
- if (msg) {
- LASSERT(msg->msg_receiving);
- LASSERT(!msg->msg_sending);
- LASSERT(rlen == msg->msg_len);
- LASSERT(mlen <= msg->msg_len);
- LASSERT(msg->msg_offset == offset);
- LASSERT(msg->msg_wanted == mlen);
-
- msg->msg_receiving = 0;
-
- if (mlen) {
- niov = msg->msg_niov;
- iov = msg->msg_iov;
- kiov = msg->msg_kiov;
-
- LASSERT(niov > 0);
- LASSERT(!iov != !kiov);
- }
- }
-
- if (iov) {
- iov_iter_kvec(&to, ITER_KVEC | READ, iov, niov, mlen + offset);
- iov_iter_advance(&to, offset);
- } else {
- iov_iter_bvec(&to, ITER_BVEC | READ, kiov, niov, mlen + offset);
- iov_iter_advance(&to, offset);
- }
- rc = ni->ni_lnd->lnd_recv(ni, private, msg, delayed, &to, rlen);
- if (rc < 0)
- lnet_finalize(ni, msg, rc);
-}
-
-static void
-lnet_setpayloadbuffer(struct lnet_msg *msg)
-{
- struct lnet_libmd *md = msg->msg_md;
-
- LASSERT(msg->msg_len > 0);
- LASSERT(!msg->msg_routing);
- LASSERT(md);
- LASSERT(!msg->msg_niov);
- LASSERT(!msg->msg_iov);
- LASSERT(!msg->msg_kiov);
-
- msg->msg_niov = md->md_niov;
- if (md->md_options & LNET_MD_KIOV)
- msg->msg_kiov = md->md_iov.kiov;
- else
- msg->msg_iov = md->md_iov.iov;
-}
-
-void
-lnet_prep_send(struct lnet_msg *msg, int type, struct lnet_process_id target,
- unsigned int offset, unsigned int len)
-{
- msg->msg_type = type;
- msg->msg_target = target;
- msg->msg_len = len;
- msg->msg_offset = offset;
-
- if (len)
- lnet_setpayloadbuffer(msg);
-
- memset(&msg->msg_hdr, 0, sizeof(msg->msg_hdr));
- msg->msg_hdr.type = cpu_to_le32(type);
- msg->msg_hdr.dest_nid = cpu_to_le64(target.nid);
- msg->msg_hdr.dest_pid = cpu_to_le32(target.pid);
- /* src_nid will be set later */
- msg->msg_hdr.src_pid = cpu_to_le32(the_lnet.ln_pid);
- msg->msg_hdr.payload_length = cpu_to_le32(len);
-}
-
-static void
-lnet_ni_send(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- void *priv = msg->msg_private;
- int rc;
-
- LASSERT(!in_interrupt());
- LASSERT(LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND ||
- (msg->msg_txcredit && msg->msg_peertxcredit));
-
- rc = ni->ni_lnd->lnd_send(ni, priv, msg);
- if (rc < 0)
- lnet_finalize(ni, msg, rc);
-}
-
-static int
-lnet_ni_eager_recv(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- int rc;
-
- LASSERT(!msg->msg_sending);
- LASSERT(msg->msg_receiving);
- LASSERT(!msg->msg_rx_ready_delay);
- LASSERT(ni->ni_lnd->lnd_eager_recv);
-
- msg->msg_rx_ready_delay = 1;
- rc = ni->ni_lnd->lnd_eager_recv(ni, msg->msg_private, msg,
- &msg->msg_private);
- if (rc) {
- CERROR("recv from %s / send to %s aborted: eager_recv failed %d\n",
- libcfs_nid2str(msg->msg_rxpeer->lp_nid),
- libcfs_id2str(msg->msg_target), rc);
- LASSERT(rc < 0); /* required by my callers */
- }
-
- return rc;
-}
-
-/* NB: caller shall hold a ref on 'lp' as I'd drop lnet_net_lock */
-static void
-lnet_ni_query_locked(struct lnet_ni *ni, struct lnet_peer *lp)
-{
- unsigned long last_alive = 0;
-
- LASSERT(lnet_peer_aliveness_enabled(lp));
- LASSERT(ni->ni_lnd->lnd_query);
-
- lnet_net_unlock(lp->lp_cpt);
- ni->ni_lnd->lnd_query(ni, lp->lp_nid, &last_alive);
- lnet_net_lock(lp->lp_cpt);
-
- lp->lp_last_query = jiffies;
-
- if (last_alive) /* NI has updated timestamp */
- lp->lp_last_alive = last_alive;
-}
-
-/* NB: always called with lnet_net_lock held */
-static inline int
-lnet_peer_is_alive(struct lnet_peer *lp, unsigned long now)
-{
- int alive;
- unsigned long deadline;
-
- LASSERT(lnet_peer_aliveness_enabled(lp));
-
- /* Trust lnet_notify() if it has more recent aliveness news, but
- * ignore the initial assumed death (see lnet_peers_start_down()).
- */
- if (!lp->lp_alive && lp->lp_alive_count > 0 &&
- time_after_eq(lp->lp_timestamp, lp->lp_last_alive))
- return 0;
-
- deadline = lp->lp_last_alive + lp->lp_ni->ni_peertimeout * HZ;
- alive = time_after(deadline, now);
-
- /* Update obsolete lp_alive except for routers assumed to be dead
- * initially, because router checker would update aliveness in this
- * case, and moreover lp_last_alive at peer creation is assumed.
- */
- if (alive && !lp->lp_alive &&
- !(lnet_isrouter(lp) && !lp->lp_alive_count))
- lnet_notify_locked(lp, 0, 1, lp->lp_last_alive);
-
- return alive;
-}
-
-/*
- * NB: returns 1 when alive, 0 when dead, negative when error;
- * may drop the lnet_net_lock
- */
-static int
-lnet_peer_alive_locked(struct lnet_peer *lp)
-{
- unsigned long now = jiffies;
-
- if (!lnet_peer_aliveness_enabled(lp))
- return -ENODEV;
-
- if (lnet_peer_is_alive(lp, now))
- return 1;
-
- /*
- * Peer appears dead, but we should avoid frequent NI queries (at
- * most once per lnet_queryinterval seconds).
- */
- if (lp->lp_last_query) {
- static const int lnet_queryinterval = 1;
-
- unsigned long next_query =
- lp->lp_last_query + lnet_queryinterval * HZ;
-
- if (time_before(now, next_query)) {
- if (lp->lp_alive)
- CWARN("Unexpected aliveness of peer %s: %d < %d (%d/%d)\n",
- libcfs_nid2str(lp->lp_nid),
- (int)now, (int)next_query,
- lnet_queryinterval,
- lp->lp_ni->ni_peertimeout);
- return 0;
- }
- }
-
- /* query NI for latest aliveness news */
- lnet_ni_query_locked(lp->lp_ni, lp);
-
- if (lnet_peer_is_alive(lp, now))
- return 1;
-
- lnet_notify_locked(lp, 0, 0, lp->lp_last_alive);
- return 0;
-}
-
-/**
- * \param msg The message to be sent.
- * \param do_send True if lnet_ni_send() should be called in this function.
- * lnet_send() is going to lnet_net_unlock immediately after this, so
- * it sets do_send FALSE and I don't do the unlock/send/lock bit.
- *
- * \retval LNET_CREDIT_OK If \a msg sent or OK to send.
- * \retval LNET_CREDIT_WAIT If \a msg blocked for credit.
- * \retval -EHOSTUNREACH If the next hop of the message appears dead.
- * \retval -ECANCELED If the MD of the message has been unlinked.
- */
-static int
-lnet_post_send_locked(struct lnet_msg *msg, int do_send)
-{
- struct lnet_peer *lp = msg->msg_txpeer;
- struct lnet_ni *ni = lp->lp_ni;
- int cpt = msg->msg_tx_cpt;
- struct lnet_tx_queue *tq = ni->ni_tx_queues[cpt];
-
- /* non-lnet_send() callers have checked before */
- LASSERT(!do_send || msg->msg_tx_delayed);
- LASSERT(!msg->msg_receiving);
- LASSERT(msg->msg_tx_committed);
-
- /* NB 'lp' is always the next hop */
- if (!(msg->msg_target.pid & LNET_PID_USERFLAG) &&
- !lnet_peer_alive_locked(lp)) {
- the_lnet.ln_counters[cpt]->drop_count++;
- the_lnet.ln_counters[cpt]->drop_length += msg->msg_len;
- lnet_net_unlock(cpt);
-
- CNETERR("Dropping message for %s: peer not alive\n",
- libcfs_id2str(msg->msg_target));
- if (do_send)
- lnet_finalize(ni, msg, -EHOSTUNREACH);
-
- lnet_net_lock(cpt);
- return -EHOSTUNREACH;
- }
-
- if (msg->msg_md &&
- (msg->msg_md->md_flags & LNET_MD_FLAG_ABORTED)) {
- lnet_net_unlock(cpt);
-
- CNETERR("Aborting message for %s: LNetM[DE]Unlink() already called on the MD/ME.\n",
- libcfs_id2str(msg->msg_target));
- if (do_send)
- lnet_finalize(ni, msg, -ECANCELED);
-
- lnet_net_lock(cpt);
- return -ECANCELED;
- }
-
- if (!msg->msg_peertxcredit) {
- LASSERT((lp->lp_txcredits < 0) ==
- !list_empty(&lp->lp_txq));
-
- msg->msg_peertxcredit = 1;
- lp->lp_txqnob += msg->msg_len + sizeof(struct lnet_hdr);
- lp->lp_txcredits--;
-
- if (lp->lp_txcredits < lp->lp_mintxcredits)
- lp->lp_mintxcredits = lp->lp_txcredits;
-
- if (lp->lp_txcredits < 0) {
- msg->msg_tx_delayed = 1;
- list_add_tail(&msg->msg_list, &lp->lp_txq);
- return LNET_CREDIT_WAIT;
- }
- }
-
- if (!msg->msg_txcredit) {
- LASSERT((tq->tq_credits < 0) ==
- !list_empty(&tq->tq_delayed));
-
- msg->msg_txcredit = 1;
- tq->tq_credits--;
-
- if (tq->tq_credits < tq->tq_credits_min)
- tq->tq_credits_min = tq->tq_credits;
-
- if (tq->tq_credits < 0) {
- msg->msg_tx_delayed = 1;
- list_add_tail(&msg->msg_list, &tq->tq_delayed);
- return LNET_CREDIT_WAIT;
- }
- }
-
- if (do_send) {
- lnet_net_unlock(cpt);
- lnet_ni_send(ni, msg);
- lnet_net_lock(cpt);
- }
- return LNET_CREDIT_OK;
-}
-
-static struct lnet_rtrbufpool *
-lnet_msg2bufpool(struct lnet_msg *msg)
-{
- struct lnet_rtrbufpool *rbp;
- int cpt;
-
- LASSERT(msg->msg_rx_committed);
-
- cpt = msg->msg_rx_cpt;
- rbp = &the_lnet.ln_rtrpools[cpt][0];
-
- LASSERT(msg->msg_len <= LNET_MTU);
- while (msg->msg_len > (unsigned int)rbp->rbp_npages * PAGE_SIZE) {
- rbp++;
- LASSERT(rbp < &the_lnet.ln_rtrpools[cpt][LNET_NRBPOOLS]);
- }
-
- return rbp;
-}
-
-static int
-lnet_post_routed_recv_locked(struct lnet_msg *msg, int do_recv)
-{
- /*
- * lnet_parse is going to lnet_net_unlock immediately after this, so it
- * sets do_recv FALSE and I don't do the unlock/send/lock bit.
- * I return LNET_CREDIT_WAIT if msg blocked and LNET_CREDIT_OK if
- * received or OK to receive
- */
- struct lnet_peer *lp = msg->msg_rxpeer;
- struct lnet_rtrbufpool *rbp;
- struct lnet_rtrbuf *rb;
-
- LASSERT(!msg->msg_iov);
- LASSERT(!msg->msg_kiov);
- LASSERT(!msg->msg_niov);
- LASSERT(msg->msg_routing);
- LASSERT(msg->msg_receiving);
- LASSERT(!msg->msg_sending);
-
- /* non-lnet_parse callers only receive delayed messages */
- LASSERT(!do_recv || msg->msg_rx_delayed);
-
- if (!msg->msg_peerrtrcredit) {
- LASSERT((lp->lp_rtrcredits < 0) ==
- !list_empty(&lp->lp_rtrq));
-
- msg->msg_peerrtrcredit = 1;
- lp->lp_rtrcredits--;
- if (lp->lp_rtrcredits < lp->lp_minrtrcredits)
- lp->lp_minrtrcredits = lp->lp_rtrcredits;
-
- if (lp->lp_rtrcredits < 0) {
- /* must have checked eager_recv before here */
- LASSERT(msg->msg_rx_ready_delay);
- msg->msg_rx_delayed = 1;
- list_add_tail(&msg->msg_list, &lp->lp_rtrq);
- return LNET_CREDIT_WAIT;
- }
- }
-
- rbp = lnet_msg2bufpool(msg);
-
- if (!msg->msg_rtrcredit) {
- msg->msg_rtrcredit = 1;
- rbp->rbp_credits--;
- if (rbp->rbp_credits < rbp->rbp_mincredits)
- rbp->rbp_mincredits = rbp->rbp_credits;
-
- if (rbp->rbp_credits < 0) {
- /* must have checked eager_recv before here */
- LASSERT(msg->msg_rx_ready_delay);
- msg->msg_rx_delayed = 1;
- list_add_tail(&msg->msg_list, &rbp->rbp_msgs);
- return LNET_CREDIT_WAIT;
- }
- }
-
- LASSERT(!list_empty(&rbp->rbp_bufs));
- rb = list_entry(rbp->rbp_bufs.next, struct lnet_rtrbuf, rb_list);
- list_del(&rb->rb_list);
-
- msg->msg_niov = rbp->rbp_npages;
- msg->msg_kiov = &rb->rb_kiov[0];
-
- if (do_recv) {
- int cpt = msg->msg_rx_cpt;
-
- lnet_net_unlock(cpt);
- lnet_ni_recv(lp->lp_ni, msg->msg_private, msg, 1,
- 0, msg->msg_len, msg->msg_len);
- lnet_net_lock(cpt);
- }
- return LNET_CREDIT_OK;
-}
-
-void
-lnet_return_tx_credits_locked(struct lnet_msg *msg)
-{
- struct lnet_peer *txpeer = msg->msg_txpeer;
- struct lnet_msg *msg2;
-
- if (msg->msg_txcredit) {
- struct lnet_ni *ni = txpeer->lp_ni;
- struct lnet_tx_queue *tq = ni->ni_tx_queues[msg->msg_tx_cpt];
-
- /* give back NI txcredits */
- msg->msg_txcredit = 0;
-
- LASSERT((tq->tq_credits < 0) ==
- !list_empty(&tq->tq_delayed));
-
- tq->tq_credits++;
- if (tq->tq_credits <= 0) {
- msg2 = list_entry(tq->tq_delayed.next,
- struct lnet_msg, msg_list);
- list_del(&msg2->msg_list);
-
- LASSERT(msg2->msg_txpeer->lp_ni == ni);
- LASSERT(msg2->msg_tx_delayed);
-
- (void)lnet_post_send_locked(msg2, 1);
- }
- }
-
- if (msg->msg_peertxcredit) {
- /* give back peer txcredits */
- msg->msg_peertxcredit = 0;
-
- LASSERT((txpeer->lp_txcredits < 0) ==
- !list_empty(&txpeer->lp_txq));
-
- txpeer->lp_txqnob -= msg->msg_len + sizeof(struct lnet_hdr);
- LASSERT(txpeer->lp_txqnob >= 0);
-
- txpeer->lp_txcredits++;
- if (txpeer->lp_txcredits <= 0) {
- msg2 = list_entry(txpeer->lp_txq.next,
- struct lnet_msg, msg_list);
- list_del(&msg2->msg_list);
-
- LASSERT(msg2->msg_txpeer == txpeer);
- LASSERT(msg2->msg_tx_delayed);
-
- (void)lnet_post_send_locked(msg2, 1);
- }
- }
-
- if (txpeer) {
- msg->msg_txpeer = NULL;
- lnet_peer_decref_locked(txpeer);
- }
-}
-
-void
-lnet_schedule_blocked_locked(struct lnet_rtrbufpool *rbp)
-{
- struct lnet_msg *msg;
-
- if (list_empty(&rbp->rbp_msgs))
- return;
- msg = list_entry(rbp->rbp_msgs.next,
- struct lnet_msg, msg_list);
- list_del(&msg->msg_list);
-
- (void)lnet_post_routed_recv_locked(msg, 1);
-}
-
-void
-lnet_drop_routed_msgs_locked(struct list_head *list, int cpt)
-{
- struct list_head drop;
- struct lnet_msg *msg;
- struct lnet_msg *tmp;
-
- INIT_LIST_HEAD(&drop);
-
- list_splice_init(list, &drop);
-
- lnet_net_unlock(cpt);
-
- list_for_each_entry_safe(msg, tmp, &drop, msg_list) {
- lnet_ni_recv(msg->msg_rxpeer->lp_ni, msg->msg_private, NULL,
- 0, 0, 0, msg->msg_hdr.payload_length);
- list_del_init(&msg->msg_list);
- lnet_finalize(NULL, msg, -ECANCELED);
- }
-
- lnet_net_lock(cpt);
-}
-
-void
-lnet_return_rx_credits_locked(struct lnet_msg *msg)
-{
- struct lnet_peer *rxpeer = msg->msg_rxpeer;
- struct lnet_msg *msg2;
-
- if (msg->msg_rtrcredit) {
- /* give back global router credits */
- struct lnet_rtrbuf *rb;
- struct lnet_rtrbufpool *rbp;
-
- /*
- * NB If a msg ever blocks for a buffer in rbp_msgs, it stays
- * there until it gets one allocated, or aborts the wait
- * itself
- */
- LASSERT(msg->msg_kiov);
-
- rb = container_of(msg->msg_kiov, struct lnet_rtrbuf, rb_kiov[0]);
- rbp = rb->rb_pool;
-
- msg->msg_kiov = NULL;
- msg->msg_rtrcredit = 0;
-
- LASSERT(rbp == lnet_msg2bufpool(msg));
-
- LASSERT((rbp->rbp_credits > 0) ==
- !list_empty(&rbp->rbp_bufs));
-
- /*
- * If routing is now turned off, we just drop this buffer and
- * don't bother trying to return credits.
- */
- if (!the_lnet.ln_routing) {
- lnet_destroy_rtrbuf(rb, rbp->rbp_npages);
- goto routing_off;
- }
-
- /*
- * It is possible that a user has lowered the desired number of
- * buffers in this pool. Make sure we never put back
- * more buffers than the stated number.
- */
- if (unlikely(rbp->rbp_credits >= rbp->rbp_req_nbuffers)) {
- /* Discard this buffer so we don't have too many. */
- lnet_destroy_rtrbuf(rb, rbp->rbp_npages);
- rbp->rbp_nbuffers--;
- } else {
- list_add(&rb->rb_list, &rbp->rbp_bufs);
- rbp->rbp_credits++;
- if (rbp->rbp_credits <= 0)
- lnet_schedule_blocked_locked(rbp);
- }
- }
-
-routing_off:
- if (msg->msg_peerrtrcredit) {
- /* give back peer router credits */
- msg->msg_peerrtrcredit = 0;
-
- LASSERT((rxpeer->lp_rtrcredits < 0) ==
- !list_empty(&rxpeer->lp_rtrq));
-
- rxpeer->lp_rtrcredits++;
- /*
- * drop all messages which are queued to be routed on that
- * peer.
- */
- if (!the_lnet.ln_routing) {
- lnet_drop_routed_msgs_locked(&rxpeer->lp_rtrq,
- msg->msg_rx_cpt);
- } else if (rxpeer->lp_rtrcredits <= 0) {
- msg2 = list_entry(rxpeer->lp_rtrq.next,
- struct lnet_msg, msg_list);
- list_del(&msg2->msg_list);
-
- (void)lnet_post_routed_recv_locked(msg2, 1);
- }
- }
- if (rxpeer) {
- msg->msg_rxpeer = NULL;
- lnet_peer_decref_locked(rxpeer);
- }
-}
-
-static int
-lnet_compare_routes(struct lnet_route *r1, struct lnet_route *r2)
-{
- struct lnet_peer *p1 = r1->lr_gateway;
- struct lnet_peer *p2 = r2->lr_gateway;
- int r1_hops = (r1->lr_hops == LNET_UNDEFINED_HOPS) ? 1 : r1->lr_hops;
- int r2_hops = (r2->lr_hops == LNET_UNDEFINED_HOPS) ? 1 : r2->lr_hops;
-
- if (r1->lr_priority < r2->lr_priority)
- return 1;
-
- if (r1->lr_priority > r2->lr_priority)
- return -ERANGE;
-
- if (r1_hops < r2_hops)
- return 1;
-
- if (r1_hops > r2_hops)
- return -ERANGE;
-
- if (p1->lp_txqnob < p2->lp_txqnob)
- return 1;
-
- if (p1->lp_txqnob > p2->lp_txqnob)
- return -ERANGE;
-
- if (p1->lp_txcredits > p2->lp_txcredits)
- return 1;
-
- if (p1->lp_txcredits < p2->lp_txcredits)
- return -ERANGE;
-
- if (r1->lr_seq - r2->lr_seq <= 0)
- return 1;
-
- return -ERANGE;
-}
-
-static struct lnet_peer *
-lnet_find_route_locked(struct lnet_ni *ni, lnet_nid_t target,
- lnet_nid_t rtr_nid)
-{
- struct lnet_remotenet *rnet;
- struct lnet_route *route;
- struct lnet_route *best_route;
- struct lnet_route *last_route;
- struct lnet_peer *lp_best;
- struct lnet_peer *lp;
- int rc;
-
- /*
- * If @rtr_nid is not LNET_NID_ANY, return the gateway with
- * rtr_nid nid, otherwise find the best gateway I can use
- */
- rnet = lnet_find_net_locked(LNET_NIDNET(target));
- if (!rnet)
- return NULL;
-
- lp_best = NULL;
- best_route = NULL;
- last_route = NULL;
- list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
- lp = route->lr_gateway;
-
- if (!lnet_is_route_alive(route))
- continue;
-
- if (ni && lp->lp_ni != ni)
- continue;
-
- if (lp->lp_nid == rtr_nid) /* it's pre-determined router */
- return lp;
-
- if (!lp_best) {
- best_route = route;
- last_route = route;
- lp_best = lp;
- continue;
- }
-
- /* no protection on below fields, but it's harmless */
- if (last_route->lr_seq - route->lr_seq < 0)
- last_route = route;
-
- rc = lnet_compare_routes(route, best_route);
- if (rc < 0)
- continue;
-
- best_route = route;
- lp_best = lp;
- }
-
- /*
- * set sequence number on the best router to the latest sequence + 1
- * so we can round-robin all routers, it's race and inaccurate but
- * harmless and functional
- */
- if (best_route)
- best_route->lr_seq = last_route->lr_seq + 1;
- return lp_best;
-}
-
-int
-lnet_send(lnet_nid_t src_nid, struct lnet_msg *msg, lnet_nid_t rtr_nid)
-{
- lnet_nid_t dst_nid = msg->msg_target.nid;
- struct lnet_ni *src_ni;
- struct lnet_ni *local_ni;
- struct lnet_peer *lp;
- int cpt;
- int cpt2;
- int rc;
-
- /*
- * NB: rtr_nid is set to LNET_NID_ANY for all current use-cases,
- * but we might want to use pre-determined router for ACK/REPLY
- * in the future
- */
- /* NB: ni == interface pre-determined (ACK/REPLY) */
- LASSERT(!msg->msg_txpeer);
- LASSERT(!msg->msg_sending);
- LASSERT(!msg->msg_target_is_router);
- LASSERT(!msg->msg_receiving);
-
- msg->msg_sending = 1;
-
- LASSERT(!msg->msg_tx_committed);
- cpt = lnet_cpt_of_nid(rtr_nid == LNET_NID_ANY ? dst_nid : rtr_nid);
- again:
- lnet_net_lock(cpt);
-
- if (the_lnet.ln_shutdown) {
- lnet_net_unlock(cpt);
- return -ESHUTDOWN;
- }
-
- if (src_nid == LNET_NID_ANY) {
- src_ni = NULL;
- } else {
- src_ni = lnet_nid2ni_locked(src_nid, cpt);
- if (!src_ni) {
- lnet_net_unlock(cpt);
- LCONSOLE_WARN("Can't send to %s: src %s is not a local nid\n",
- libcfs_nid2str(dst_nid),
- libcfs_nid2str(src_nid));
- return -EINVAL;
- }
- LASSERT(!msg->msg_routing);
- }
-
- /* Is this for someone on a local network? */
- local_ni = lnet_net2ni_locked(LNET_NIDNET(dst_nid), cpt);
-
- if (local_ni) {
- if (!src_ni) {
- src_ni = local_ni;
- src_nid = src_ni->ni_nid;
- } else if (src_ni == local_ni) {
- lnet_ni_decref_locked(local_ni, cpt);
- } else {
- lnet_ni_decref_locked(local_ni, cpt);
- lnet_ni_decref_locked(src_ni, cpt);
- lnet_net_unlock(cpt);
- LCONSOLE_WARN("No route to %s via from %s\n",
- libcfs_nid2str(dst_nid),
- libcfs_nid2str(src_nid));
- return -EINVAL;
- }
-
- LASSERT(src_nid != LNET_NID_ANY);
- lnet_msg_commit(msg, cpt);
-
- if (!msg->msg_routing)
- msg->msg_hdr.src_nid = cpu_to_le64(src_nid);
-
- if (src_ni == the_lnet.ln_loni) {
- /* No send credit hassles with LOLND */
- lnet_net_unlock(cpt);
- lnet_ni_send(src_ni, msg);
-
- lnet_net_lock(cpt);
- lnet_ni_decref_locked(src_ni, cpt);
- lnet_net_unlock(cpt);
- return 0;
- }
-
- rc = lnet_nid2peer_locked(&lp, dst_nid, cpt);
- /* lp has ref on src_ni; lose mine */
- lnet_ni_decref_locked(src_ni, cpt);
- if (rc) {
- lnet_net_unlock(cpt);
- LCONSOLE_WARN("Error %d finding peer %s\n", rc,
- libcfs_nid2str(dst_nid));
- /* ENOMEM or shutting down */
- return rc;
- }
- LASSERT(lp->lp_ni == src_ni);
- } else {
- /* sending to a remote network */
- lp = lnet_find_route_locked(src_ni, dst_nid, rtr_nid);
- if (!lp) {
- if (src_ni)
- lnet_ni_decref_locked(src_ni, cpt);
- lnet_net_unlock(cpt);
-
- LCONSOLE_WARN("No route to %s via %s (all routers down)\n",
- libcfs_id2str(msg->msg_target),
- libcfs_nid2str(src_nid));
- return -EHOSTUNREACH;
- }
-
- /*
- * rtr_nid is LNET_NID_ANY or NID of pre-determined router,
- * it's possible that rtr_nid isn't LNET_NID_ANY and lp isn't
- * pre-determined router, this can happen if router table
- * was changed when we release the lock
- */
- if (rtr_nid != lp->lp_nid) {
- cpt2 = lnet_cpt_of_nid_locked(lp->lp_nid);
- if (cpt2 != cpt) {
- if (src_ni)
- lnet_ni_decref_locked(src_ni, cpt);
- lnet_net_unlock(cpt);
-
- rtr_nid = lp->lp_nid;
- cpt = cpt2;
- goto again;
- }
- }
-
- CDEBUG(D_NET, "Best route to %s via %s for %s %d\n",
- libcfs_nid2str(dst_nid), libcfs_nid2str(lp->lp_nid),
- lnet_msgtyp2str(msg->msg_type), msg->msg_len);
-
- if (!src_ni) {
- src_ni = lp->lp_ni;
- src_nid = src_ni->ni_nid;
- } else {
- LASSERT(src_ni == lp->lp_ni);
- lnet_ni_decref_locked(src_ni, cpt);
- }
-
- lnet_peer_addref_locked(lp);
-
- LASSERT(src_nid != LNET_NID_ANY);
- lnet_msg_commit(msg, cpt);
-
- if (!msg->msg_routing) {
- /* I'm the source and now I know which NI to send on */
- msg->msg_hdr.src_nid = cpu_to_le64(src_nid);
- }
-
- msg->msg_target_is_router = 1;
- msg->msg_target.nid = lp->lp_nid;
- msg->msg_target.pid = LNET_PID_LUSTRE;
- }
-
- /* 'lp' is our best choice of peer */
-
- LASSERT(!msg->msg_peertxcredit);
- LASSERT(!msg->msg_txcredit);
- LASSERT(!msg->msg_txpeer);
-
- msg->msg_txpeer = lp; /* msg takes my ref on lp */
-
- rc = lnet_post_send_locked(msg, 0);
- lnet_net_unlock(cpt);
-
- if (rc < 0)
- return rc;
-
- if (rc == LNET_CREDIT_OK)
- lnet_ni_send(src_ni, msg);
-
- return 0; /* rc == LNET_CREDIT_OK or LNET_CREDIT_WAIT */
-}
-
-void
-lnet_drop_message(struct lnet_ni *ni, int cpt, void *private, unsigned int nob)
-{
- lnet_net_lock(cpt);
- the_lnet.ln_counters[cpt]->drop_count++;
- the_lnet.ln_counters[cpt]->drop_length += nob;
- lnet_net_unlock(cpt);
-
- lnet_ni_recv(ni, private, NULL, 0, 0, 0, nob);
-}
-
-static void
-lnet_recv_put(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- struct lnet_hdr *hdr = &msg->msg_hdr;
-
- if (msg->msg_wanted)
- lnet_setpayloadbuffer(msg);
-
- lnet_build_msg_event(msg, LNET_EVENT_PUT);
-
- /*
- * Must I ACK? If so I'll grab the ack_wmd out of the header and put
- * it back into the ACK during lnet_finalize()
- */
- msg->msg_ack = !lnet_is_wire_handle_none(&hdr->msg.put.ack_wmd) &&
- !(msg->msg_md->md_options & LNET_MD_ACK_DISABLE);
-
- lnet_ni_recv(ni, msg->msg_private, msg, msg->msg_rx_delayed,
- msg->msg_offset, msg->msg_wanted, hdr->payload_length);
-}
-
-static int
-lnet_parse_put(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- struct lnet_hdr *hdr = &msg->msg_hdr;
- struct lnet_match_info info;
- bool ready_delay;
- int rc;
-
- /* Convert put fields to host byte order */
- le64_to_cpus(&hdr->msg.put.match_bits);
- le32_to_cpus(&hdr->msg.put.ptl_index);
- le32_to_cpus(&hdr->msg.put.offset);
-
- info.mi_id.nid = hdr->src_nid;
- info.mi_id.pid = hdr->src_pid;
- info.mi_opc = LNET_MD_OP_PUT;
- info.mi_portal = hdr->msg.put.ptl_index;
- info.mi_rlength = hdr->payload_length;
- info.mi_roffset = hdr->msg.put.offset;
- info.mi_mbits = hdr->msg.put.match_bits;
-
- msg->msg_rx_ready_delay = !ni->ni_lnd->lnd_eager_recv;
- ready_delay = msg->msg_rx_ready_delay;
-
- again:
- rc = lnet_ptl_match_md(&info, msg);
- switch (rc) {
- default:
- LBUG();
-
- case LNET_MATCHMD_OK:
- lnet_recv_put(ni, msg);
- return 0;
-
- case LNET_MATCHMD_NONE:
- /**
- * no eager_recv or has already called it, should
- * have been attached on delayed list
- */
- if (ready_delay)
- return 0;
-
- rc = lnet_ni_eager_recv(ni, msg);
- if (!rc) {
- ready_delay = true;
- goto again;
- }
- /* fall through */
-
- case LNET_MATCHMD_DROP:
- CNETERR("Dropping PUT from %s portal %d match %llu offset %d length %d: %d\n",
- libcfs_id2str(info.mi_id), info.mi_portal,
- info.mi_mbits, info.mi_roffset, info.mi_rlength, rc);
-
- return -ENOENT; /* -ve: OK but no match */
- }
-}
-
-static int
-lnet_parse_get(struct lnet_ni *ni, struct lnet_msg *msg, int rdma_get)
-{
- struct lnet_match_info info;
- struct lnet_hdr *hdr = &msg->msg_hdr;
- struct lnet_handle_wire reply_wmd;
- int rc;
-
- /* Convert get fields to host byte order */
- le64_to_cpus(&hdr->msg.get.match_bits);
- le32_to_cpus(&hdr->msg.get.ptl_index);
- le32_to_cpus(&hdr->msg.get.sink_length);
- le32_to_cpus(&hdr->msg.get.src_offset);
-
- info.mi_id.nid = hdr->src_nid;
- info.mi_id.pid = hdr->src_pid;
- info.mi_opc = LNET_MD_OP_GET;
- info.mi_portal = hdr->msg.get.ptl_index;
- info.mi_rlength = hdr->msg.get.sink_length;
- info.mi_roffset = hdr->msg.get.src_offset;
- info.mi_mbits = hdr->msg.get.match_bits;
-
- rc = lnet_ptl_match_md(&info, msg);
- if (rc == LNET_MATCHMD_DROP) {
- CNETERR("Dropping GET from %s portal %d match %llu offset %d length %d\n",
- libcfs_id2str(info.mi_id), info.mi_portal,
- info.mi_mbits, info.mi_roffset, info.mi_rlength);
- return -ENOENT; /* -ve: OK but no match */
- }
-
- LASSERT(rc == LNET_MATCHMD_OK);
-
- lnet_build_msg_event(msg, LNET_EVENT_GET);
-
- reply_wmd = hdr->msg.get.return_wmd;
-
- lnet_prep_send(msg, LNET_MSG_REPLY, info.mi_id,
- msg->msg_offset, msg->msg_wanted);
-
- msg->msg_hdr.msg.reply.dst_wmd = reply_wmd;
-
- if (rdma_get) {
- /* The LND completes the REPLY from her recv procedure */
- lnet_ni_recv(ni, msg->msg_private, msg, 0,
- msg->msg_offset, msg->msg_len, msg->msg_len);
- return 0;
- }
-
- lnet_ni_recv(ni, msg->msg_private, NULL, 0, 0, 0, 0);
- msg->msg_receiving = 0;
-
- rc = lnet_send(ni->ni_nid, msg, LNET_NID_ANY);
- if (rc < 0) {
- /* didn't get as far as lnet_ni_send() */
- CERROR("%s: Unable to send REPLY for GET from %s: %d\n",
- libcfs_nid2str(ni->ni_nid),
- libcfs_id2str(info.mi_id), rc);
-
- lnet_finalize(ni, msg, rc);
- }
-
- return 0;
-}
-
-static int
-lnet_parse_reply(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- void *private = msg->msg_private;
- struct lnet_hdr *hdr = &msg->msg_hdr;
- struct lnet_process_id src = {0};
- struct lnet_libmd *md;
- int rlength;
- int mlength;
- int cpt;
-
- cpt = lnet_cpt_of_cookie(hdr->msg.reply.dst_wmd.wh_object_cookie);
- lnet_res_lock(cpt);
-
- src.nid = hdr->src_nid;
- src.pid = hdr->src_pid;
-
- /* NB handles only looked up by creator (no flips) */
- md = lnet_wire_handle2md(&hdr->msg.reply.dst_wmd);
- if (!md || !md->md_threshold || md->md_me) {
- CNETERR("%s: Dropping REPLY from %s for %s MD %#llx.%#llx\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
- !md ? "invalid" : "inactive",
- hdr->msg.reply.dst_wmd.wh_interface_cookie,
- hdr->msg.reply.dst_wmd.wh_object_cookie);
- if (md && md->md_me)
- CERROR("REPLY MD also attached to portal %d\n",
- md->md_me->me_portal);
-
- lnet_res_unlock(cpt);
- return -ENOENT; /* -ve: OK but no match */
- }
-
- LASSERT(!md->md_offset);
-
- rlength = hdr->payload_length;
- mlength = min_t(uint, rlength, md->md_length);
-
- if (mlength < rlength &&
- !(md->md_options & LNET_MD_TRUNCATE)) {
- CNETERR("%s: Dropping REPLY from %s length %d for MD %#llx would overflow (%d)\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
- rlength, hdr->msg.reply.dst_wmd.wh_object_cookie,
- mlength);
- lnet_res_unlock(cpt);
- return -ENOENT; /* -ve: OK but no match */
- }
-
- CDEBUG(D_NET, "%s: Reply from %s of length %d/%d into md %#llx\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
- mlength, rlength, hdr->msg.reply.dst_wmd.wh_object_cookie);
-
- lnet_msg_attach_md(msg, md, 0, mlength);
-
- if (mlength)
- lnet_setpayloadbuffer(msg);
-
- lnet_res_unlock(cpt);
-
- lnet_build_msg_event(msg, LNET_EVENT_REPLY);
-
- lnet_ni_recv(ni, private, msg, 0, 0, mlength, rlength);
- return 0;
-}
-
-static int
-lnet_parse_ack(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- struct lnet_hdr *hdr = &msg->msg_hdr;
- struct lnet_process_id src = {0};
- struct lnet_libmd *md;
- int cpt;
-
- src.nid = hdr->src_nid;
- src.pid = hdr->src_pid;
-
- /* Convert ack fields to host byte order */
- le64_to_cpus(&hdr->msg.ack.match_bits);
- le32_to_cpus(&hdr->msg.ack.mlength);
-
- cpt = lnet_cpt_of_cookie(hdr->msg.ack.dst_wmd.wh_object_cookie);
- lnet_res_lock(cpt);
-
- /* NB handles only looked up by creator (no flips) */
- md = lnet_wire_handle2md(&hdr->msg.ack.dst_wmd);
- if (!md || !md->md_threshold || md->md_me) {
- /* Don't moan; this is expected */
- CDEBUG(D_NET,
- "%s: Dropping ACK from %s to %s MD %#llx.%#llx\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
- !md ? "invalid" : "inactive",
- hdr->msg.ack.dst_wmd.wh_interface_cookie,
- hdr->msg.ack.dst_wmd.wh_object_cookie);
- if (md && md->md_me)
- CERROR("Source MD also attached to portal %d\n",
- md->md_me->me_portal);
-
- lnet_res_unlock(cpt);
- return -ENOENT; /* -ve! */
- }
-
- CDEBUG(D_NET, "%s: ACK from %s into md %#llx\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
- hdr->msg.ack.dst_wmd.wh_object_cookie);
-
- lnet_msg_attach_md(msg, md, 0, 0);
-
- lnet_res_unlock(cpt);
-
- lnet_build_msg_event(msg, LNET_EVENT_ACK);
-
- lnet_ni_recv(ni, msg->msg_private, msg, 0, 0, 0, msg->msg_len);
- return 0;
-}
-
-/**
- * \retval LNET_CREDIT_OK If \a msg is forwarded
- * \retval LNET_CREDIT_WAIT If \a msg is blocked because w/o buffer
- * \retval -ve error code
- */
-int
-lnet_parse_forward_locked(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- int rc = 0;
-
- if (!the_lnet.ln_routing)
- return -ECANCELED;
-
- if (msg->msg_rxpeer->lp_rtrcredits <= 0 ||
- lnet_msg2bufpool(msg)->rbp_credits <= 0) {
- if (!ni->ni_lnd->lnd_eager_recv) {
- msg->msg_rx_ready_delay = 1;
- } else {
- lnet_net_unlock(msg->msg_rx_cpt);
- rc = lnet_ni_eager_recv(ni, msg);
- lnet_net_lock(msg->msg_rx_cpt);
- }
- }
-
- if (!rc)
- rc = lnet_post_routed_recv_locked(msg, 0);
- return rc;
-}
-
-int
-lnet_parse_local(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- int rc;
-
- switch (msg->msg_type) {
- case LNET_MSG_ACK:
- rc = lnet_parse_ack(ni, msg);
- break;
- case LNET_MSG_PUT:
- rc = lnet_parse_put(ni, msg);
- break;
- case LNET_MSG_GET:
- rc = lnet_parse_get(ni, msg, msg->msg_rdma_get);
- break;
- case LNET_MSG_REPLY:
- rc = lnet_parse_reply(ni, msg);
- break;
- default: /* prevent an unused label if !kernel */
- LASSERT(0);
- return -EPROTO;
- }
-
- LASSERT(!rc || rc == -ENOENT);
- return rc;
-}
-
-char *
-lnet_msgtyp2str(int type)
-{
- switch (type) {
- case LNET_MSG_ACK:
- return "ACK";
- case LNET_MSG_PUT:
- return "PUT";
- case LNET_MSG_GET:
- return "GET";
- case LNET_MSG_REPLY:
- return "REPLY";
- case LNET_MSG_HELLO:
- return "HELLO";
- default:
- return "<UNKNOWN>";
- }
-}
-
-void
-lnet_print_hdr(struct lnet_hdr *hdr)
-{
- struct lnet_process_id src = {0};
- struct lnet_process_id dst = {0};
- char *type_str = lnet_msgtyp2str(hdr->type);
-
- src.nid = hdr->src_nid;
- src.pid = hdr->src_pid;
-
- dst.nid = hdr->dest_nid;
- dst.pid = hdr->dest_pid;
-
- CWARN("P3 Header at %p of type %s\n", hdr, type_str);
- CWARN(" From %s\n", libcfs_id2str(src));
- CWARN(" To %s\n", libcfs_id2str(dst));
-
- switch (hdr->type) {
- default:
- break;
-
- case LNET_MSG_PUT:
- CWARN(" Ptl index %d, ack md %#llx.%#llx, match bits %llu\n",
- hdr->msg.put.ptl_index,
- hdr->msg.put.ack_wmd.wh_interface_cookie,
- hdr->msg.put.ack_wmd.wh_object_cookie,
- hdr->msg.put.match_bits);
- CWARN(" Length %d, offset %d, hdr data %#llx\n",
- hdr->payload_length, hdr->msg.put.offset,
- hdr->msg.put.hdr_data);
- break;
-
- case LNET_MSG_GET:
- CWARN(" Ptl index %d, return md %#llx.%#llx, match bits %llu\n",
- hdr->msg.get.ptl_index,
- hdr->msg.get.return_wmd.wh_interface_cookie,
- hdr->msg.get.return_wmd.wh_object_cookie,
- hdr->msg.get.match_bits);
- CWARN(" Length %d, src offset %d\n",
- hdr->msg.get.sink_length,
- hdr->msg.get.src_offset);
- break;
-
- case LNET_MSG_ACK:
- CWARN(" dst md %#llx.%#llx, manipulated length %d\n",
- hdr->msg.ack.dst_wmd.wh_interface_cookie,
- hdr->msg.ack.dst_wmd.wh_object_cookie,
- hdr->msg.ack.mlength);
- break;
-
- case LNET_MSG_REPLY:
- CWARN(" dst md %#llx.%#llx, length %d\n",
- hdr->msg.reply.dst_wmd.wh_interface_cookie,
- hdr->msg.reply.dst_wmd.wh_object_cookie,
- hdr->payload_length);
- }
-}
-
-int
-lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr, lnet_nid_t from_nid,
- void *private, int rdma_req)
-{
- int rc = 0;
- int cpt;
- int for_me;
- struct lnet_msg *msg;
- lnet_pid_t dest_pid;
- lnet_nid_t dest_nid;
- lnet_nid_t src_nid;
- __u32 payload_length;
- __u32 type;
-
- LASSERT(!in_interrupt());
-
- type = le32_to_cpu(hdr->type);
- src_nid = le64_to_cpu(hdr->src_nid);
- dest_nid = le64_to_cpu(hdr->dest_nid);
- dest_pid = le32_to_cpu(hdr->dest_pid);
- payload_length = le32_to_cpu(hdr->payload_length);
-
- for_me = (ni->ni_nid == dest_nid);
- cpt = lnet_cpt_of_nid(from_nid);
-
- switch (type) {
- case LNET_MSG_ACK:
- case LNET_MSG_GET:
- if (payload_length > 0) {
- CERROR("%s, src %s: bad %s payload %d (0 expected)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- lnet_msgtyp2str(type), payload_length);
- return -EPROTO;
- }
- break;
-
- case LNET_MSG_PUT:
- case LNET_MSG_REPLY:
- if (payload_length >
- (__u32)(for_me ? LNET_MAX_PAYLOAD : LNET_MTU)) {
- CERROR("%s, src %s: bad %s payload %d (%d max expected)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- lnet_msgtyp2str(type),
- payload_length,
- for_me ? LNET_MAX_PAYLOAD : LNET_MTU);
- return -EPROTO;
- }
- break;
-
- default:
- CERROR("%s, src %s: Bad message type 0x%x\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid), type);
- return -EPROTO;
- }
-
- if (the_lnet.ln_routing &&
- ni->ni_last_alive != ktime_get_real_seconds()) {
- /* NB: so far here is the only place to set NI status to "up */
- lnet_ni_lock(ni);
- ni->ni_last_alive = ktime_get_real_seconds();
- if (ni->ni_status &&
- ni->ni_status->ns_status == LNET_NI_STATUS_DOWN)
- ni->ni_status->ns_status = LNET_NI_STATUS_UP;
- lnet_ni_unlock(ni);
- }
-
- /*
- * Regard a bad destination NID as a protocol error. Senders should
- * know what they're doing; if they don't they're misconfigured, buggy
- * or malicious so we chop them off at the knees :)
- */
- if (!for_me) {
- if (LNET_NIDNET(dest_nid) == LNET_NIDNET(ni->ni_nid)) {
- /* should have gone direct */
- CERROR("%s, src %s: Bad dest nid %s (should have been sent direct)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- libcfs_nid2str(dest_nid));
- return -EPROTO;
- }
-
- if (lnet_islocalnid(dest_nid)) {
- /*
- * dest is another local NI; sender should have used
- * this node's NID on its own network
- */
- CERROR("%s, src %s: Bad dest nid %s (it's my nid but on a different network)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- libcfs_nid2str(dest_nid));
- return -EPROTO;
- }
-
- if (rdma_req && type == LNET_MSG_GET) {
- CERROR("%s, src %s: Bad optimized GET for %s (final destination must be me)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- libcfs_nid2str(dest_nid));
- return -EPROTO;
- }
-
- if (!the_lnet.ln_routing) {
- CERROR("%s, src %s: Dropping message for %s (routing not enabled)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- libcfs_nid2str(dest_nid));
- goto drop;
- }
- }
-
- /*
- * Message looks OK; we're not going to return an error, so we MUST
- * call back lnd_recv() come what may...
- */
- if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */
- fail_peer(src_nid, 0)) { /* shall we now? */
- CERROR("%s, src %s: Dropping %s to simulate failure\n",
- libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
- lnet_msgtyp2str(type));
- goto drop;
- }
-
- if (!list_empty(&the_lnet.ln_drop_rules) &&
- lnet_drop_rule_match(hdr)) {
- CDEBUG(D_NET, "%s, src %s, dst %s: Dropping %s to simulate silent message loss\n",
- libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
- libcfs_nid2str(dest_nid), lnet_msgtyp2str(type));
- goto drop;
- }
-
- msg = kzalloc(sizeof(*msg), GFP_NOFS);
- if (!msg) {
- CERROR("%s, src %s: Dropping %s (out of memory)\n",
- libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
- lnet_msgtyp2str(type));
- goto drop;
- }
-
- /* msg zeroed by kzalloc()
- * i.e. flags all clear, pointers NULL etc
- */
- msg->msg_type = type;
- msg->msg_private = private;
- msg->msg_receiving = 1;
- msg->msg_rdma_get = rdma_req;
- msg->msg_wanted = payload_length;
- msg->msg_len = payload_length;
- msg->msg_offset = 0;
- msg->msg_hdr = *hdr;
- /* for building message event */
- msg->msg_from = from_nid;
- if (!for_me) {
- msg->msg_target.pid = dest_pid;
- msg->msg_target.nid = dest_nid;
- msg->msg_routing = 1;
-
- } else {
- /* convert common msg->hdr fields to host byteorder */
- msg->msg_hdr.type = type;
- msg->msg_hdr.src_nid = src_nid;
- le32_to_cpus(&msg->msg_hdr.src_pid);
- msg->msg_hdr.dest_nid = dest_nid;
- msg->msg_hdr.dest_pid = dest_pid;
- msg->msg_hdr.payload_length = payload_length;
- }
-
- lnet_net_lock(cpt);
- rc = lnet_nid2peer_locked(&msg->msg_rxpeer, from_nid, cpt);
- if (rc) {
- lnet_net_unlock(cpt);
- CERROR("%s, src %s: Dropping %s (error %d looking up sender)\n",
- libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
- lnet_msgtyp2str(type), rc);
- kfree(msg);
- if (rc == -ESHUTDOWN)
- /* We are shutting down. Don't do anything more */
- return 0;
- goto drop;
- }
-
- if (lnet_isrouter(msg->msg_rxpeer)) {
- lnet_peer_set_alive(msg->msg_rxpeer);
- if (avoid_asym_router_failure &&
- LNET_NIDNET(src_nid) != LNET_NIDNET(from_nid)) {
- /* received a remote message from router, update
- * remote NI status on this router.
- * NB: multi-hop routed message will be ignored.
- */
- lnet_router_ni_update_locked(msg->msg_rxpeer,
- LNET_NIDNET(src_nid));
- }
- }
-
- lnet_msg_commit(msg, cpt);
-
- /* message delay simulation */
- if (unlikely(!list_empty(&the_lnet.ln_delay_rules) &&
- lnet_delay_rule_match_locked(hdr, msg))) {
- lnet_net_unlock(cpt);
- return 0;
- }
-
- if (!for_me) {
- rc = lnet_parse_forward_locked(ni, msg);
- lnet_net_unlock(cpt);
-
- if (rc < 0)
- goto free_drop;
-
- if (rc == LNET_CREDIT_OK) {
- lnet_ni_recv(ni, msg->msg_private, msg, 0,
- 0, payload_length, payload_length);
- }
- return 0;
- }
-
- lnet_net_unlock(cpt);
-
- rc = lnet_parse_local(ni, msg);
- if (rc)
- goto free_drop;
- return 0;
-
- free_drop:
- LASSERT(!msg->msg_md);
- lnet_finalize(ni, msg, rc);
-
- drop:
- lnet_drop_message(ni, cpt, private, payload_length);
- return 0;
-}
-EXPORT_SYMBOL(lnet_parse);
-
-void
-lnet_drop_delayed_msg_list(struct list_head *head, char *reason)
-{
- while (!list_empty(head)) {
- struct lnet_process_id id = {0};
- struct lnet_msg *msg;
-
- msg = list_entry(head->next, struct lnet_msg, msg_list);
- list_del(&msg->msg_list);
-
- id.nid = msg->msg_hdr.src_nid;
- id.pid = msg->msg_hdr.src_pid;
-
- LASSERT(!msg->msg_md);
- LASSERT(msg->msg_rx_delayed);
- LASSERT(msg->msg_rxpeer);
- LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);
-
- CWARN("Dropping delayed PUT from %s portal %d match %llu offset %d length %d: %s\n",
- libcfs_id2str(id),
- msg->msg_hdr.msg.put.ptl_index,
- msg->msg_hdr.msg.put.match_bits,
- msg->msg_hdr.msg.put.offset,
- msg->msg_hdr.payload_length, reason);
-
- /*
- * NB I can't drop msg's ref on msg_rxpeer until after I've
- * called lnet_drop_message(), so I just hang onto msg as well
- * until that's done
- */
- lnet_drop_message(msg->msg_rxpeer->lp_ni,
- msg->msg_rxpeer->lp_cpt,
- msg->msg_private, msg->msg_len);
- /*
- * NB: message will not generate event because w/o attached MD,
- * but we still should give error code so lnet_msg_decommit()
- * can skip counters operations and other checks.
- */
- lnet_finalize(msg->msg_rxpeer->lp_ni, msg, -ENOENT);
- }
-}
-
-void
-lnet_recv_delayed_msg_list(struct list_head *head)
-{
- while (!list_empty(head)) {
- struct lnet_msg *msg;
- struct lnet_process_id id;
-
- msg = list_entry(head->next, struct lnet_msg, msg_list);
- list_del(&msg->msg_list);
-
- /*
- * md won't disappear under me, since each msg
- * holds a ref on it
- */
- id.nid = msg->msg_hdr.src_nid;
- id.pid = msg->msg_hdr.src_pid;
-
- LASSERT(msg->msg_rx_delayed);
- LASSERT(msg->msg_md);
- LASSERT(msg->msg_rxpeer);
- LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);
-
- CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d match %llu offset %d length %d.\n",
- libcfs_id2str(id), msg->msg_hdr.msg.put.ptl_index,
- msg->msg_hdr.msg.put.match_bits,
- msg->msg_hdr.msg.put.offset,
- msg->msg_hdr.payload_length);
-
- lnet_recv_put(msg->msg_rxpeer->lp_ni, msg);
- }
-}
-
-/**
- * Initiate an asynchronous PUT operation.
- *
- * There are several events associated with a PUT: completion of the send on
- * the initiator node (LNET_EVENT_SEND), and when the send completes
- * successfully, the receipt of an acknowledgment (LNET_EVENT_ACK) indicating
- * that the operation was accepted by the target. The event LNET_EVENT_PUT is
- * used at the target node to indicate the completion of incoming data
- * delivery.
- *
- * The local events will be logged in the EQ associated with the MD pointed to
- * by \a mdh handle. Using a MD without an associated EQ results in these
- * events being discarded. In this case, the caller must have another
- * mechanism (e.g., a higher level protocol) for determining when it is safe
- * to modify the memory region associated with the MD.
- *
- * Note that LNet does not guarantee the order of LNET_EVENT_SEND and
- * LNET_EVENT_ACK, though intuitively ACK should happen after SEND.
- *
- * \param self Indicates the NID of a local interface through which to send
- * the PUT request. Use LNET_NID_ANY to let LNet choose one by itself.
- * \param mdh A handle for the MD that describes the memory to be sent. The MD
- * must be "free floating" (See LNetMDBind()).
- * \param ack Controls whether an acknowledgment is requested.
- * Acknowledgments are only sent when they are requested by the initiating
- * process and the target MD enables them.
- * \param target A process identifier for the target process.
- * \param portal The index in the \a target's portal table.
- * \param match_bits The match bits to use for MD selection at the target
- * process.
- * \param offset The offset into the target MD (only used when the target
- * MD has the LNET_MD_MANAGE_REMOTE option set).
- * \param hdr_data 64 bits of user data that can be included in the message
- * header. This data is written to an event queue entry at the target if an
- * EQ is present on the matching MD.
- *
- * \retval 0 Success, and only in this case events will be generated
- * and logged to EQ (if it exists).
- * \retval -EIO Simulated failure.
- * \retval -ENOMEM Memory allocation failure.
- * \retval -ENOENT Invalid MD object.
- *
- * \see lnet_event::hdr_data and lnet_event_kind.
- */
-int
-LNetPut(lnet_nid_t self, struct lnet_handle_md mdh, enum lnet_ack_req ack,
- struct lnet_process_id target, unsigned int portal,
- __u64 match_bits, unsigned int offset,
- __u64 hdr_data)
-{
- struct lnet_msg *msg;
- struct lnet_libmd *md;
- int cpt;
- int rc;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */
- fail_peer(target.nid, 1)) { /* shall we now? */
- CERROR("Dropping PUT to %s: simulated failure\n",
- libcfs_id2str(target));
- return -EIO;
- }
-
- msg = kzalloc(sizeof(*msg), GFP_NOFS);
- if (!msg) {
- CERROR("Dropping PUT to %s: ENOMEM on struct lnet_msg\n",
- libcfs_id2str(target));
- return -ENOMEM;
- }
- msg->msg_vmflush = !!(current->flags & PF_MEMALLOC);
-
- cpt = lnet_cpt_of_cookie(mdh.cookie);
- lnet_res_lock(cpt);
-
- md = lnet_handle2md(&mdh);
- if (!md || !md->md_threshold || md->md_me) {
- CERROR("Dropping PUT (%llu:%d:%s): MD (%d) invalid\n",
- match_bits, portal, libcfs_id2str(target),
- !md ? -1 : md->md_threshold);
- if (md && md->md_me)
- CERROR("Source MD also attached to portal %d\n",
- md->md_me->me_portal);
- lnet_res_unlock(cpt);
-
- kfree(msg);
- return -ENOENT;
- }
-
- CDEBUG(D_NET, "%s -> %s\n", __func__, libcfs_id2str(target));
-
- lnet_msg_attach_md(msg, md, 0, 0);
-
- lnet_prep_send(msg, LNET_MSG_PUT, target, 0, md->md_length);
-
- msg->msg_hdr.msg.put.match_bits = cpu_to_le64(match_bits);
- msg->msg_hdr.msg.put.ptl_index = cpu_to_le32(portal);
- msg->msg_hdr.msg.put.offset = cpu_to_le32(offset);
- msg->msg_hdr.msg.put.hdr_data = hdr_data;
-
- /* NB handles only looked up by creator (no flips) */
- if (ack == LNET_ACK_REQ) {
- msg->msg_hdr.msg.put.ack_wmd.wh_interface_cookie =
- the_lnet.ln_interface_cookie;
- msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie =
- md->md_lh.lh_cookie;
- } else {
- msg->msg_hdr.msg.put.ack_wmd.wh_interface_cookie =
- LNET_WIRE_HANDLE_COOKIE_NONE;
- msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie =
- LNET_WIRE_HANDLE_COOKIE_NONE;
- }
-
- lnet_res_unlock(cpt);
-
- lnet_build_msg_event(msg, LNET_EVENT_SEND);
-
- rc = lnet_send(self, msg, LNET_NID_ANY);
- if (rc) {
- CNETERR("Error sending PUT to %s: %d\n",
- libcfs_id2str(target), rc);
- lnet_finalize(NULL, msg, rc);
- }
-
- /* completion will be signalled by an event */
- return 0;
-}
-EXPORT_SYMBOL(LNetPut);
-
-struct lnet_msg *
-lnet_create_reply_msg(struct lnet_ni *ni, struct lnet_msg *getmsg)
-{
- /*
- * The LND can DMA direct to the GET md (i.e. no REPLY msg). This
- * returns a msg for the LND to pass to lnet_finalize() when the sink
- * data has been received.
- *
- * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when
- * lnet_finalize() is called on it, so the LND must call this first
- */
- struct lnet_msg *msg = kzalloc(sizeof(*msg), GFP_NOFS);
- struct lnet_libmd *getmd = getmsg->msg_md;
- struct lnet_process_id peer_id = getmsg->msg_target;
- int cpt;
-
- LASSERT(!getmsg->msg_target_is_router);
- LASSERT(!getmsg->msg_routing);
-
- if (!msg) {
- CERROR("%s: Dropping REPLY from %s: can't allocate msg\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id));
- goto drop;
- }
-
- cpt = lnet_cpt_of_cookie(getmd->md_lh.lh_cookie);
- lnet_res_lock(cpt);
-
- LASSERT(getmd->md_refcount > 0);
-
- if (!getmd->md_threshold) {
- CERROR("%s: Dropping REPLY from %s for inactive MD %p\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id),
- getmd);
- lnet_res_unlock(cpt);
- goto drop;
- }
-
- LASSERT(!getmd->md_offset);
-
- CDEBUG(D_NET, "%s: Reply from %s md %p\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id), getmd);
-
- /* setup information for lnet_build_msg_event */
- msg->msg_from = peer_id.nid;
- msg->msg_type = LNET_MSG_GET; /* flag this msg as an "optimized" GET */
- msg->msg_hdr.src_nid = peer_id.nid;
- msg->msg_hdr.payload_length = getmd->md_length;
- msg->msg_receiving = 1; /* required by lnet_msg_attach_md */
-
- lnet_msg_attach_md(msg, getmd, getmd->md_offset, getmd->md_length);
- lnet_res_unlock(cpt);
-
- cpt = lnet_cpt_of_nid(peer_id.nid);
-
- lnet_net_lock(cpt);
- lnet_msg_commit(msg, cpt);
- lnet_net_unlock(cpt);
-
- lnet_build_msg_event(msg, LNET_EVENT_REPLY);
-
- return msg;
-
- drop:
- cpt = lnet_cpt_of_nid(peer_id.nid);
-
- lnet_net_lock(cpt);
- the_lnet.ln_counters[cpt]->drop_count++;
- the_lnet.ln_counters[cpt]->drop_length += getmd->md_length;
- lnet_net_unlock(cpt);
-
- kfree(msg);
-
- return NULL;
-}
-EXPORT_SYMBOL(lnet_create_reply_msg);
-
-void
-lnet_set_reply_msg_len(struct lnet_ni *ni, struct lnet_msg *reply,
- unsigned int len)
-{
- /*
- * Set the REPLY length, now the RDMA that elides the REPLY message has
- * completed and I know it.
- */
- LASSERT(reply);
- LASSERT(reply->msg_type == LNET_MSG_GET);
- LASSERT(reply->msg_ev.type == LNET_EVENT_REPLY);
-
- /*
- * NB I trusted my peer to RDMA. If she tells me she's written beyond
- * the end of my buffer, I might as well be dead.
- */
- LASSERT(len <= reply->msg_ev.mlength);
-
- reply->msg_ev.mlength = len;
-}
-EXPORT_SYMBOL(lnet_set_reply_msg_len);
-
-/**
- * Initiate an asynchronous GET operation.
- *
- * On the initiator node, an LNET_EVENT_SEND is logged when the GET request
- * is sent, and an LNET_EVENT_REPLY is logged when the data returned from
- * the target node in the REPLY has been written to local MD.
- *
- * On the target node, an LNET_EVENT_GET is logged when the GET request
- * arrives and is accepted into a MD.
- *
- * \param self,target,portal,match_bits,offset See the discussion in LNetPut().
- * \param mdh A handle for the MD that describes the memory into which the
- * requested data will be received. The MD must be "free floating"
- * (See LNetMDBind()).
- *
- * \retval 0 Success, and only in this case events will be generated
- * and logged to EQ (if it exists) of the MD.
- * \retval -EIO Simulated failure.
- * \retval -ENOMEM Memory allocation failure.
- * \retval -ENOENT Invalid MD object.
- */
-int
-LNetGet(lnet_nid_t self, struct lnet_handle_md mdh,
- struct lnet_process_id target, unsigned int portal,
- __u64 match_bits, unsigned int offset)
-{
- struct lnet_msg *msg;
- struct lnet_libmd *md;
- int cpt;
- int rc;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */
- fail_peer(target.nid, 1)) { /* shall we now? */
- CERROR("Dropping GET to %s: simulated failure\n",
- libcfs_id2str(target));
- return -EIO;
- }
-
- msg = kzalloc(sizeof(*msg), GFP_NOFS);
- if (!msg) {
- CERROR("Dropping GET to %s: ENOMEM on struct lnet_msg\n",
- libcfs_id2str(target));
- return -ENOMEM;
- }
-
- cpt = lnet_cpt_of_cookie(mdh.cookie);
- lnet_res_lock(cpt);
-
- md = lnet_handle2md(&mdh);
- if (!md || !md->md_threshold || md->md_me) {
- CERROR("Dropping GET (%llu:%d:%s): MD (%d) invalid\n",
- match_bits, portal, libcfs_id2str(target),
- !md ? -1 : md->md_threshold);
- if (md && md->md_me)
- CERROR("REPLY MD also attached to portal %d\n",
- md->md_me->me_portal);
-
- lnet_res_unlock(cpt);
-
- kfree(msg);
- return -ENOENT;
- }
-
- CDEBUG(D_NET, "%s -> %s\n", __func__, libcfs_id2str(target));
-
- lnet_msg_attach_md(msg, md, 0, 0);
-
- lnet_prep_send(msg, LNET_MSG_GET, target, 0, 0);
-
- msg->msg_hdr.msg.get.match_bits = cpu_to_le64(match_bits);
- msg->msg_hdr.msg.get.ptl_index = cpu_to_le32(portal);
- msg->msg_hdr.msg.get.src_offset = cpu_to_le32(offset);
- msg->msg_hdr.msg.get.sink_length = cpu_to_le32(md->md_length);
-
- /* NB handles only looked up by creator (no flips) */
- msg->msg_hdr.msg.get.return_wmd.wh_interface_cookie =
- the_lnet.ln_interface_cookie;
- msg->msg_hdr.msg.get.return_wmd.wh_object_cookie =
- md->md_lh.lh_cookie;
-
- lnet_res_unlock(cpt);
-
- lnet_build_msg_event(msg, LNET_EVENT_SEND);
-
- rc = lnet_send(self, msg, LNET_NID_ANY);
- if (rc < 0) {
- CNETERR("Error sending GET to %s: %d\n",
- libcfs_id2str(target), rc);
- lnet_finalize(NULL, msg, rc);
- }
-
- /* completion will be signalled by an event */
- return 0;
-}
-EXPORT_SYMBOL(LNetGet);
-
-/**
- * Calculate distance to node at \a dstnid.
- *
- * \param dstnid Target NID.
- * \param srcnidp If not NULL, NID of the local interface to reach \a dstnid
- * is saved here.
- * \param orderp If not NULL, order of the route to reach \a dstnid is saved
- * here.
- *
- * \retval 0 If \a dstnid belongs to a local interface, and reserved option
- * local_nid_dist_zero is set, which is the default.
- * \retval positives Distance to target NID, i.e. number of hops plus one.
- * \retval -EHOSTUNREACH If \a dstnid is not reachable.
- */
-int
-LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
-{
- struct list_head *e;
- struct lnet_ni *ni;
- struct lnet_remotenet *rnet;
- __u32 dstnet = LNET_NIDNET(dstnid);
- int hops;
- int cpt;
- __u32 order = 2;
- struct list_head *rn_list;
-
- /*
- * if !local_nid_dist_zero, I don't return a distance of 0 ever
- * (when lustre sees a distance of 0, it substitutes 0@lo), so I
- * keep order 0 free for 0@lo and order 1 free for a local NID
- * match
- */
- LASSERT(the_lnet.ln_refcount > 0);
-
- cpt = lnet_net_lock_current();
-
- list_for_each(e, &the_lnet.ln_nis) {
- ni = list_entry(e, struct lnet_ni, ni_list);
-
- if (ni->ni_nid == dstnid) {
- if (srcnidp)
- *srcnidp = dstnid;
- if (orderp) {
- if (LNET_NETTYP(LNET_NIDNET(dstnid)) == LOLND)
- *orderp = 0;
- else
- *orderp = 1;
- }
- lnet_net_unlock(cpt);
-
- return local_nid_dist_zero ? 0 : 1;
- }
-
- if (LNET_NIDNET(ni->ni_nid) == dstnet) {
- /*
- * Check if ni was originally created in
- * current net namespace.
- * If not, assign order above 0xffff0000,
- * to make this ni not a priority.
- */
- if (!net_eq(ni->ni_net_ns, current->nsproxy->net_ns))
- order += 0xffff0000;
-
- if (srcnidp)
- *srcnidp = ni->ni_nid;
- if (orderp)
- *orderp = order;
- lnet_net_unlock(cpt);
- return 1;
- }
-
- order++;
- }
-
- rn_list = lnet_net2rnethash(dstnet);
- list_for_each(e, rn_list) {
- rnet = list_entry(e, struct lnet_remotenet, lrn_list);
-
- if (rnet->lrn_net == dstnet) {
- struct lnet_route *route;
- struct lnet_route *shortest = NULL;
- __u32 shortest_hops = LNET_UNDEFINED_HOPS;
- __u32 route_hops;
-
- LASSERT(!list_empty(&rnet->lrn_routes));
-
- list_for_each_entry(route, &rnet->lrn_routes,
- lr_list) {
- route_hops = route->lr_hops;
- if (route_hops == LNET_UNDEFINED_HOPS)
- route_hops = 1;
- if (!shortest ||
- route_hops < shortest_hops) {
- shortest = route;
- shortest_hops = route_hops;
- }
- }
-
- LASSERT(shortest);
- hops = shortest_hops;
- if (srcnidp)
- *srcnidp = shortest->lr_gateway->lp_ni->ni_nid;
- if (orderp)
- *orderp = order;
- lnet_net_unlock(cpt);
- return hops + 1;
- }
- order++;
- }
-
- lnet_net_unlock(cpt);
- return -EHOSTUNREACH;
-}
-EXPORT_SYMBOL(LNetDist);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-msg.c b/drivers/staging/lustre/lnet/lnet/lib-msg.c
deleted file mode 100644
index 0091273c04b9..000000000000
--- a/drivers/staging/lustre/lnet/lnet/lib-msg.c
+++ /dev/null
@@ -1,625 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-msg.c
- *
- * Message decoding, parsing and finalizing routines
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-void
-lnet_build_unlink_event(struct lnet_libmd *md, struct lnet_event *ev)
-{
- memset(ev, 0, sizeof(*ev));
-
- ev->status = 0;
- ev->unlinked = 1;
- ev->type = LNET_EVENT_UNLINK;
- lnet_md_deconstruct(md, &ev->md);
- lnet_md2handle(&ev->md_handle, md);
-}
-
-/*
- * Don't need any lock, must be called after lnet_commit_md
- */
-void
-lnet_build_msg_event(struct lnet_msg *msg, enum lnet_event_kind ev_type)
-{
- struct lnet_hdr *hdr = &msg->msg_hdr;
- struct lnet_event *ev = &msg->msg_ev;
-
- LASSERT(!msg->msg_routing);
-
- ev->type = ev_type;
-
- if (ev_type == LNET_EVENT_SEND) {
- /* event for active message */
- ev->target.nid = le64_to_cpu(hdr->dest_nid);
- ev->target.pid = le32_to_cpu(hdr->dest_pid);
- ev->initiator.nid = LNET_NID_ANY;
- ev->initiator.pid = the_lnet.ln_pid;
- ev->sender = LNET_NID_ANY;
- } else {
- /* event for passive message */
- ev->target.pid = hdr->dest_pid;
- ev->target.nid = hdr->dest_nid;
- ev->initiator.pid = hdr->src_pid;
- ev->initiator.nid = hdr->src_nid;
- ev->rlength = hdr->payload_length;
- ev->sender = msg->msg_from;
- ev->mlength = msg->msg_wanted;
- ev->offset = msg->msg_offset;
- }
-
- switch (ev_type) {
- default:
- LBUG();
-
- case LNET_EVENT_PUT: /* passive PUT */
- ev->pt_index = hdr->msg.put.ptl_index;
- ev->match_bits = hdr->msg.put.match_bits;
- ev->hdr_data = hdr->msg.put.hdr_data;
- return;
-
- case LNET_EVENT_GET: /* passive GET */
- ev->pt_index = hdr->msg.get.ptl_index;
- ev->match_bits = hdr->msg.get.match_bits;
- ev->hdr_data = 0;
- return;
-
- case LNET_EVENT_ACK: /* ACK */
- ev->match_bits = hdr->msg.ack.match_bits;
- ev->mlength = hdr->msg.ack.mlength;
- return;
-
- case LNET_EVENT_REPLY: /* REPLY */
- return;
-
- case LNET_EVENT_SEND: /* active message */
- if (msg->msg_type == LNET_MSG_PUT) {
- ev->pt_index = le32_to_cpu(hdr->msg.put.ptl_index);
- ev->match_bits = le64_to_cpu(hdr->msg.put.match_bits);
- ev->offset = le32_to_cpu(hdr->msg.put.offset);
- ev->mlength =
- ev->rlength = le32_to_cpu(hdr->payload_length);
- ev->hdr_data = le64_to_cpu(hdr->msg.put.hdr_data);
-
- } else {
- LASSERT(msg->msg_type == LNET_MSG_GET);
- ev->pt_index = le32_to_cpu(hdr->msg.get.ptl_index);
- ev->match_bits = le64_to_cpu(hdr->msg.get.match_bits);
- ev->mlength =
- ev->rlength = le32_to_cpu(hdr->msg.get.sink_length);
- ev->offset = le32_to_cpu(hdr->msg.get.src_offset);
- ev->hdr_data = 0;
- }
- return;
- }
-}
-
-void
-lnet_msg_commit(struct lnet_msg *msg, int cpt)
-{
- struct lnet_msg_container *container = the_lnet.ln_msg_containers[cpt];
- struct lnet_counters *counters = the_lnet.ln_counters[cpt];
-
- /* routed message can be committed for both receiving and sending */
- LASSERT(!msg->msg_tx_committed);
-
- if (msg->msg_sending) {
- LASSERT(!msg->msg_receiving);
-
- msg->msg_tx_cpt = cpt;
- msg->msg_tx_committed = 1;
- if (msg->msg_rx_committed) { /* routed message REPLY */
- LASSERT(msg->msg_onactivelist);
- return;
- }
- } else {
- LASSERT(!msg->msg_sending);
- msg->msg_rx_cpt = cpt;
- msg->msg_rx_committed = 1;
- }
-
- LASSERT(!msg->msg_onactivelist);
- msg->msg_onactivelist = 1;
- list_add(&msg->msg_activelist, &container->msc_active);
-
- counters->msgs_alloc++;
- if (counters->msgs_alloc > counters->msgs_max)
- counters->msgs_max = counters->msgs_alloc;
-}
-
-static void
-lnet_msg_decommit_tx(struct lnet_msg *msg, int status)
-{
- struct lnet_counters *counters;
- struct lnet_event *ev = &msg->msg_ev;
-
- LASSERT(msg->msg_tx_committed);
- if (status)
- goto out;
-
- counters = the_lnet.ln_counters[msg->msg_tx_cpt];
- switch (ev->type) {
- default: /* routed message */
- LASSERT(msg->msg_routing);
- LASSERT(msg->msg_rx_committed);
- LASSERT(!ev->type);
-
- counters->route_length += msg->msg_len;
- counters->route_count++;
- goto out;
-
- case LNET_EVENT_PUT:
- /* should have been decommitted */
- LASSERT(!msg->msg_rx_committed);
- /* overwritten while sending ACK */
- LASSERT(msg->msg_type == LNET_MSG_ACK);
- msg->msg_type = LNET_MSG_PUT; /* fix type */
- break;
-
- case LNET_EVENT_SEND:
- LASSERT(!msg->msg_rx_committed);
- if (msg->msg_type == LNET_MSG_PUT)
- counters->send_length += msg->msg_len;
- break;
-
- case LNET_EVENT_GET:
- LASSERT(msg->msg_rx_committed);
- /*
- * overwritten while sending reply, we should never be
- * here for optimized GET
- */
- LASSERT(msg->msg_type == LNET_MSG_REPLY);
- msg->msg_type = LNET_MSG_GET; /* fix type */
- break;
- }
-
- counters->send_count++;
- out:
- lnet_return_tx_credits_locked(msg);
- msg->msg_tx_committed = 0;
-}
-
-static void
-lnet_msg_decommit_rx(struct lnet_msg *msg, int status)
-{
- struct lnet_counters *counters;
- struct lnet_event *ev = &msg->msg_ev;
-
- LASSERT(!msg->msg_tx_committed); /* decommitted or never committed */
- LASSERT(msg->msg_rx_committed);
-
- if (status)
- goto out;
-
- counters = the_lnet.ln_counters[msg->msg_rx_cpt];
- switch (ev->type) {
- default:
- LASSERT(!ev->type);
- LASSERT(msg->msg_routing);
- goto out;
-
- case LNET_EVENT_ACK:
- LASSERT(msg->msg_type == LNET_MSG_ACK);
- break;
-
- case LNET_EVENT_GET:
- /*
- * type is "REPLY" if it's an optimized GET on passive side,
- * because optimized GET will never be committed for sending,
- * so message type wouldn't be changed back to "GET" by
- * lnet_msg_decommit_tx(), see details in lnet_parse_get()
- */
- LASSERT(msg->msg_type == LNET_MSG_REPLY ||
- msg->msg_type == LNET_MSG_GET);
- counters->send_length += msg->msg_wanted;
- break;
-
- case LNET_EVENT_PUT:
- LASSERT(msg->msg_type == LNET_MSG_PUT);
- break;
-
- case LNET_EVENT_REPLY:
- /*
- * type is "GET" if it's an optimized GET on active side,
- * see details in lnet_create_reply_msg()
- */
- LASSERT(msg->msg_type == LNET_MSG_GET ||
- msg->msg_type == LNET_MSG_REPLY);
- break;
- }
-
- counters->recv_count++;
- if (ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_REPLY)
- counters->recv_length += msg->msg_wanted;
-
- out:
- lnet_return_rx_credits_locked(msg);
- msg->msg_rx_committed = 0;
-}
-
-void
-lnet_msg_decommit(struct lnet_msg *msg, int cpt, int status)
-{
- int cpt2 = cpt;
-
- LASSERT(msg->msg_tx_committed || msg->msg_rx_committed);
- LASSERT(msg->msg_onactivelist);
-
- if (msg->msg_tx_committed) { /* always decommit for sending first */
- LASSERT(cpt == msg->msg_tx_cpt);
- lnet_msg_decommit_tx(msg, status);
- }
-
- if (msg->msg_rx_committed) {
- /* forwarding msg committed for both receiving and sending */
- if (cpt != msg->msg_rx_cpt) {
- lnet_net_unlock(cpt);
- cpt2 = msg->msg_rx_cpt;
- lnet_net_lock(cpt2);
- }
- lnet_msg_decommit_rx(msg, status);
- }
-
- list_del(&msg->msg_activelist);
- msg->msg_onactivelist = 0;
-
- the_lnet.ln_counters[cpt2]->msgs_alloc--;
-
- if (cpt2 != cpt) {
- lnet_net_unlock(cpt2);
- lnet_net_lock(cpt);
- }
-}
-
-void
-lnet_msg_attach_md(struct lnet_msg *msg, struct lnet_libmd *md,
- unsigned int offset, unsigned int mlen)
-{
- /* NB: @offset and @len are only useful for receiving */
- /*
- * Here, we attach the MD on lnet_msg and mark it busy and
- * decrementing its threshold. Come what may, the lnet_msg "owns"
- * the MD until a call to lnet_msg_detach_md or lnet_finalize()
- * signals completion.
- */
- LASSERT(!msg->msg_routing);
-
- msg->msg_md = md;
- if (msg->msg_receiving) { /* committed for receiving */
- msg->msg_offset = offset;
- msg->msg_wanted = mlen;
- }
-
- md->md_refcount++;
- if (md->md_threshold != LNET_MD_THRESH_INF) {
- LASSERT(md->md_threshold > 0);
- md->md_threshold--;
- }
-
- /* build umd in event */
- lnet_md2handle(&msg->msg_ev.md_handle, md);
- lnet_md_deconstruct(md, &msg->msg_ev.md);
-}
-
-void
-lnet_msg_detach_md(struct lnet_msg *msg, int status)
-{
- struct lnet_libmd *md = msg->msg_md;
- int unlink;
-
- /* Now it's safe to drop my caller's ref */
- md->md_refcount--;
- LASSERT(md->md_refcount >= 0);
-
- unlink = lnet_md_unlinkable(md);
- if (md->md_eq) {
- msg->msg_ev.status = status;
- msg->msg_ev.unlinked = unlink;
- lnet_eq_enqueue_event(md->md_eq, &msg->msg_ev);
- }
-
- if (unlink)
- lnet_md_unlink(md);
-
- msg->msg_md = NULL;
-}
-
-static int
-lnet_complete_msg_locked(struct lnet_msg *msg, int cpt)
-{
- struct lnet_handle_wire ack_wmd;
- int rc;
- int status = msg->msg_ev.status;
-
- LASSERT(msg->msg_onactivelist);
-
- if (!status && msg->msg_ack) {
- /* Only send an ACK if the PUT completed successfully */
-
- lnet_msg_decommit(msg, cpt, 0);
-
- msg->msg_ack = 0;
- lnet_net_unlock(cpt);
-
- LASSERT(msg->msg_ev.type == LNET_EVENT_PUT);
- LASSERT(!msg->msg_routing);
-
- ack_wmd = msg->msg_hdr.msg.put.ack_wmd;
-
- lnet_prep_send(msg, LNET_MSG_ACK, msg->msg_ev.initiator, 0, 0);
-
- msg->msg_hdr.msg.ack.dst_wmd = ack_wmd;
- msg->msg_hdr.msg.ack.match_bits = msg->msg_ev.match_bits;
- msg->msg_hdr.msg.ack.mlength = cpu_to_le32(msg->msg_ev.mlength);
-
- /*
- * NB: we probably want to use NID of msg::msg_from as 3rd
- * parameter (router NID) if it's routed message
- */
- rc = lnet_send(msg->msg_ev.target.nid, msg, LNET_NID_ANY);
-
- lnet_net_lock(cpt);
- /*
- * NB: message is committed for sending, we should return
- * on success because LND will finalize this message later.
- *
- * Also, there is possibility that message is committed for
- * sending and also failed before delivering to LND,
- * i.e: ENOMEM, in that case we can't fall through either
- * because CPT for sending can be different with CPT for
- * receiving, so we should return back to lnet_finalize()
- * to make sure we are locking the correct partition.
- */
- return rc;
-
- } else if (!status && /* OK so far */
- (msg->msg_routing && !msg->msg_sending)) {
- /* not forwarded */
- LASSERT(!msg->msg_receiving); /* called back recv already */
- lnet_net_unlock(cpt);
-
- rc = lnet_send(LNET_NID_ANY, msg, LNET_NID_ANY);
-
- lnet_net_lock(cpt);
- /*
- * NB: message is committed for sending, we should return
- * on success because LND will finalize this message later.
- *
- * Also, there is possibility that message is committed for
- * sending and also failed before delivering to LND,
- * i.e: ENOMEM, in that case we can't fall through either:
- * - The rule is message must decommit for sending first if
- * the it's committed for both sending and receiving
- * - CPT for sending can be different with CPT for receiving,
- * so we should return back to lnet_finalize() to make
- * sure we are locking the correct partition.
- */
- return rc;
- }
-
- lnet_msg_decommit(msg, cpt, status);
- kfree(msg);
- return 0;
-}
-
-void
-lnet_finalize(struct lnet_ni *ni, struct lnet_msg *msg, int status)
-{
- struct lnet_msg_container *container;
- int my_slot;
- int cpt;
- int rc;
- int i;
-
- LASSERT(!in_interrupt());
-
- if (!msg)
- return;
-
- msg->msg_ev.status = status;
-
- if (msg->msg_md) {
- cpt = lnet_cpt_of_cookie(msg->msg_md->md_lh.lh_cookie);
-
- lnet_res_lock(cpt);
- lnet_msg_detach_md(msg, status);
- lnet_res_unlock(cpt);
- }
-
- again:
- rc = 0;
- if (!msg->msg_tx_committed && !msg->msg_rx_committed) {
- /* not committed to network yet */
- LASSERT(!msg->msg_onactivelist);
- kfree(msg);
- return;
- }
-
- /*
- * NB: routed message can be committed for both receiving and sending,
- * we should finalize in LIFO order and keep counters correct.
- * (finalize sending first then finalize receiving)
- */
- cpt = msg->msg_tx_committed ? msg->msg_tx_cpt : msg->msg_rx_cpt;
- lnet_net_lock(cpt);
-
- container = the_lnet.ln_msg_containers[cpt];
- list_add_tail(&msg->msg_list, &container->msc_finalizing);
-
- /*
- * Recursion breaker. Don't complete the message here if I am (or
- * enough other threads are) already completing messages
- */
- my_slot = -1;
- for (i = 0; i < container->msc_nfinalizers; i++) {
- if (container->msc_finalizers[i] == current)
- break;
-
- if (my_slot < 0 && !container->msc_finalizers[i])
- my_slot = i;
- }
-
- if (i < container->msc_nfinalizers || my_slot < 0) {
- lnet_net_unlock(cpt);
- return;
- }
-
- container->msc_finalizers[my_slot] = current;
-
- while (!list_empty(&container->msc_finalizing)) {
- msg = list_entry(container->msc_finalizing.next,
- struct lnet_msg, msg_list);
-
- list_del(&msg->msg_list);
-
- /*
- * NB drops and regains the lnet lock if it actually does
- * anything, so my finalizing friends can chomp along too
- */
- rc = lnet_complete_msg_locked(msg, cpt);
- if (rc)
- break;
- }
-
- if (unlikely(!list_empty(&the_lnet.ln_delay_rules))) {
- lnet_net_unlock(cpt);
- lnet_delay_rule_check();
- lnet_net_lock(cpt);
- }
-
- container->msc_finalizers[my_slot] = NULL;
- lnet_net_unlock(cpt);
-
- if (rc)
- goto again;
-}
-EXPORT_SYMBOL(lnet_finalize);
-
-void
-lnet_msg_container_cleanup(struct lnet_msg_container *container)
-{
- int count = 0;
-
- if (!container->msc_init)
- return;
-
- while (!list_empty(&container->msc_active)) {
- struct lnet_msg *msg;
-
- msg = list_entry(container->msc_active.next,
- struct lnet_msg, msg_activelist);
- LASSERT(msg->msg_onactivelist);
- msg->msg_onactivelist = 0;
- list_del(&msg->msg_activelist);
- kfree(msg);
- count++;
- }
-
- if (count > 0)
- CERROR("%d active msg on exit\n", count);
-
- kvfree(container->msc_finalizers);
- container->msc_finalizers = NULL;
- container->msc_init = 0;
-}
-
-int
-lnet_msg_container_setup(struct lnet_msg_container *container, int cpt)
-{
- container->msc_init = 1;
-
- INIT_LIST_HEAD(&container->msc_active);
- INIT_LIST_HEAD(&container->msc_finalizing);
-
- /* number of CPUs */
- container->msc_nfinalizers = cfs_cpt_weight(lnet_cpt_table(), cpt);
-
- container->msc_finalizers = kvzalloc_cpt(container->msc_nfinalizers *
- sizeof(*container->msc_finalizers),
- GFP_KERNEL, cpt);
-
- if (!container->msc_finalizers) {
- CERROR("Failed to allocate message finalizers\n");
- lnet_msg_container_cleanup(container);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-void
-lnet_msg_containers_destroy(void)
-{
- struct lnet_msg_container *container;
- int i;
-
- if (!the_lnet.ln_msg_containers)
- return;
-
- cfs_percpt_for_each(container, i, the_lnet.ln_msg_containers)
- lnet_msg_container_cleanup(container);
-
- cfs_percpt_free(the_lnet.ln_msg_containers);
- the_lnet.ln_msg_containers = NULL;
-}
-
-int
-lnet_msg_containers_create(void)
-{
- struct lnet_msg_container *container;
- int rc;
- int i;
-
- the_lnet.ln_msg_containers = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(*container));
-
- if (!the_lnet.ln_msg_containers) {
- CERROR("Failed to allocate cpu-partition data for network\n");
- return -ENOMEM;
- }
-
- cfs_percpt_for_each(container, i, the_lnet.ln_msg_containers) {
- rc = lnet_msg_container_setup(container, i);
- if (rc) {
- lnet_msg_containers_destroy();
- return rc;
- }
- }
-
- return 0;
-}
diff --git a/drivers/staging/lustre/lnet/lnet/lib-ptl.c b/drivers/staging/lustre/lnet/lnet/lib-ptl.c
deleted file mode 100644
index fc47379c5938..000000000000
--- a/drivers/staging/lustre/lnet/lnet/lib-ptl.c
+++ /dev/null
@@ -1,987 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-ptl.c
- *
- * portal & match routines
- *
- * Author: liang@whamcloud.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-/* NB: add /proc interfaces in upcoming patches */
-int portal_rotor = LNET_PTL_ROTOR_HASH_RT;
-module_param(portal_rotor, int, 0644);
-MODULE_PARM_DESC(portal_rotor, "redirect PUTs to different cpu-partitions");
-
-static int
-lnet_ptl_match_type(unsigned int index, struct lnet_process_id match_id,
- __u64 mbits, __u64 ignore_bits)
-{
- struct lnet_portal *ptl = the_lnet.ln_portals[index];
- int unique;
-
- unique = !ignore_bits &&
- match_id.nid != LNET_NID_ANY &&
- match_id.pid != LNET_PID_ANY;
-
- LASSERT(!lnet_ptl_is_unique(ptl) || !lnet_ptl_is_wildcard(ptl));
-
- /* prefer to check w/o any lock */
- if (likely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl)))
- goto match;
-
- /* unset, new portal */
- lnet_ptl_lock(ptl);
- /* check again with lock */
- if (unlikely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl))) {
- lnet_ptl_unlock(ptl);
- goto match;
- }
-
- /* still not set */
- if (unique)
- lnet_ptl_setopt(ptl, LNET_PTL_MATCH_UNIQUE);
- else
- lnet_ptl_setopt(ptl, LNET_PTL_MATCH_WILDCARD);
-
- lnet_ptl_unlock(ptl);
-
- return 1;
-
- match:
- if ((lnet_ptl_is_unique(ptl) && !unique) ||
- (lnet_ptl_is_wildcard(ptl) && unique))
- return 0;
- return 1;
-}
-
-static void
-lnet_ptl_enable_mt(struct lnet_portal *ptl, int cpt)
-{
- struct lnet_match_table *mtable = ptl->ptl_mtables[cpt];
- int i;
-
- /* with hold of both lnet_res_lock(cpt) and lnet_ptl_lock */
- LASSERT(lnet_ptl_is_wildcard(ptl));
-
- mtable->mt_enabled = 1;
-
- ptl->ptl_mt_maps[ptl->ptl_mt_nmaps] = cpt;
- for (i = ptl->ptl_mt_nmaps - 1; i >= 0; i--) {
- LASSERT(ptl->ptl_mt_maps[i] != cpt);
- if (ptl->ptl_mt_maps[i] < cpt)
- break;
-
- /* swap to order */
- ptl->ptl_mt_maps[i + 1] = ptl->ptl_mt_maps[i];
- ptl->ptl_mt_maps[i] = cpt;
- }
-
- ptl->ptl_mt_nmaps++;
-}
-
-static void
-lnet_ptl_disable_mt(struct lnet_portal *ptl, int cpt)
-{
- struct lnet_match_table *mtable = ptl->ptl_mtables[cpt];
- int i;
-
- /* with hold of both lnet_res_lock(cpt) and lnet_ptl_lock */
- LASSERT(lnet_ptl_is_wildcard(ptl));
-
- if (LNET_CPT_NUMBER == 1)
- return; /* never disable the only match-table */
-
- mtable->mt_enabled = 0;
-
- LASSERT(ptl->ptl_mt_nmaps > 0 &&
- ptl->ptl_mt_nmaps <= LNET_CPT_NUMBER);
-
- /* remove it from mt_maps */
- ptl->ptl_mt_nmaps--;
- for (i = 0; i < ptl->ptl_mt_nmaps; i++) {
- if (ptl->ptl_mt_maps[i] >= cpt) /* overwrite it */
- ptl->ptl_mt_maps[i] = ptl->ptl_mt_maps[i + 1];
- }
-}
-
-static int
-lnet_try_match_md(struct lnet_libmd *md,
- struct lnet_match_info *info, struct lnet_msg *msg)
-{
- /*
- * ALWAYS called holding the lnet_res_lock, and can't lnet_res_unlock;
- * lnet_match_blocked_msg() relies on this to avoid races
- */
- unsigned int offset;
- unsigned int mlength;
- struct lnet_me *me = md->md_me;
-
- /* MD exhausted */
- if (lnet_md_exhausted(md))
- return LNET_MATCHMD_NONE | LNET_MATCHMD_EXHAUSTED;
-
- /* mismatched MD op */
- if (!(md->md_options & info->mi_opc))
- return LNET_MATCHMD_NONE;
-
- /* mismatched ME nid/pid? */
- if (me->me_match_id.nid != LNET_NID_ANY &&
- me->me_match_id.nid != info->mi_id.nid)
- return LNET_MATCHMD_NONE;
-
- if (me->me_match_id.pid != LNET_PID_ANY &&
- me->me_match_id.pid != info->mi_id.pid)
- return LNET_MATCHMD_NONE;
-
- /* mismatched ME matchbits? */
- if ((me->me_match_bits ^ info->mi_mbits) & ~me->me_ignore_bits)
- return LNET_MATCHMD_NONE;
-
- /* Hurrah! This _is_ a match; check it out... */
-
- if (!(md->md_options & LNET_MD_MANAGE_REMOTE))
- offset = md->md_offset;
- else
- offset = info->mi_roffset;
-
- if (md->md_options & LNET_MD_MAX_SIZE) {
- mlength = md->md_max_size;
- LASSERT(md->md_offset + mlength <= md->md_length);
- } else {
- mlength = md->md_length - offset;
- }
-
- if (info->mi_rlength <= mlength) { /* fits in allowed space */
- mlength = info->mi_rlength;
- } else if (!(md->md_options & LNET_MD_TRUNCATE)) {
- /* this packet _really_ is too big */
- CERROR("Matching packet from %s, match %llu length %d too big: %d left, %d allowed\n",
- libcfs_id2str(info->mi_id), info->mi_mbits,
- info->mi_rlength, md->md_length - offset, mlength);
-
- return LNET_MATCHMD_DROP;
- }
-
- /* Commit to this ME/MD */
- CDEBUG(D_NET, "Incoming %s index %x from %s of length %d/%d into md %#llx [%d] + %d\n",
- (info->mi_opc == LNET_MD_OP_PUT) ? "put" : "get",
- info->mi_portal, libcfs_id2str(info->mi_id), mlength,
- info->mi_rlength, md->md_lh.lh_cookie, md->md_niov, offset);
-
- lnet_msg_attach_md(msg, md, offset, mlength);
- md->md_offset = offset + mlength;
-
- if (!lnet_md_exhausted(md))
- return LNET_MATCHMD_OK;
-
- /*
- * Auto-unlink NOW, so the ME gets unlinked if required.
- * We bumped md->md_refcount above so the MD just gets flagged
- * for unlink when it is finalized.
- */
- if (md->md_flags & LNET_MD_FLAG_AUTO_UNLINK)
- lnet_md_unlink(md);
-
- return LNET_MATCHMD_OK | LNET_MATCHMD_EXHAUSTED;
-}
-
-static struct lnet_match_table *
-lnet_match2mt(struct lnet_portal *ptl, struct lnet_process_id id, __u64 mbits)
-{
- if (LNET_CPT_NUMBER == 1)
- return ptl->ptl_mtables[0]; /* the only one */
-
- /* if it's a unique portal, return match-table hashed by NID */
- return lnet_ptl_is_unique(ptl) ?
- ptl->ptl_mtables[lnet_cpt_of_nid(id.nid)] : NULL;
-}
-
-struct lnet_match_table *
-lnet_mt_of_attach(unsigned int index, struct lnet_process_id id,
- __u64 mbits, __u64 ignore_bits, enum lnet_ins_pos pos)
-{
- struct lnet_portal *ptl;
- struct lnet_match_table *mtable;
-
- /* NB: called w/o lock */
- LASSERT(index < the_lnet.ln_nportals);
-
- if (!lnet_ptl_match_type(index, id, mbits, ignore_bits))
- return NULL;
-
- ptl = the_lnet.ln_portals[index];
-
- mtable = lnet_match2mt(ptl, id, mbits);
- if (mtable) /* unique portal or only one match-table */
- return mtable;
-
- /* it's a wildcard portal */
- switch (pos) {
- default:
- return NULL;
- case LNET_INS_BEFORE:
- case LNET_INS_AFTER:
- /*
- * posted by no affinity thread, always hash to specific
- * match-table to avoid buffer stealing which is heavy
- */
- return ptl->ptl_mtables[ptl->ptl_index % LNET_CPT_NUMBER];
- case LNET_INS_LOCAL:
- /* posted by cpu-affinity thread */
- return ptl->ptl_mtables[lnet_cpt_current()];
- }
-}
-
-static struct lnet_match_table *
-lnet_mt_of_match(struct lnet_match_info *info, struct lnet_msg *msg)
-{
- struct lnet_match_table *mtable;
- struct lnet_portal *ptl;
- unsigned int nmaps;
- unsigned int rotor;
- unsigned int cpt;
- bool routed;
-
- /* NB: called w/o lock */
- LASSERT(info->mi_portal < the_lnet.ln_nportals);
- ptl = the_lnet.ln_portals[info->mi_portal];
-
- LASSERT(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl));
-
- mtable = lnet_match2mt(ptl, info->mi_id, info->mi_mbits);
- if (mtable)
- return mtable;
-
- /* it's a wildcard portal */
- routed = LNET_NIDNET(msg->msg_hdr.src_nid) !=
- LNET_NIDNET(msg->msg_hdr.dest_nid);
-
- if (portal_rotor == LNET_PTL_ROTOR_OFF ||
- (portal_rotor != LNET_PTL_ROTOR_ON && !routed)) {
- cpt = lnet_cpt_current();
- if (ptl->ptl_mtables[cpt]->mt_enabled)
- return ptl->ptl_mtables[cpt];
- }
-
- rotor = ptl->ptl_rotor++; /* get round-robin factor */
- if (portal_rotor == LNET_PTL_ROTOR_HASH_RT && routed)
- cpt = lnet_cpt_of_nid(msg->msg_hdr.src_nid);
- else
- cpt = rotor % LNET_CPT_NUMBER;
-
- if (!ptl->ptl_mtables[cpt]->mt_enabled) {
- /* is there any active entry for this portal? */
- nmaps = ptl->ptl_mt_nmaps;
- /* map to an active mtable to avoid heavy "stealing" */
- if (nmaps) {
- /*
- * NB: there is possibility that ptl_mt_maps is being
- * changed because we are not under protection of
- * lnet_ptl_lock, but it shouldn't hurt anything
- */
- cpt = ptl->ptl_mt_maps[rotor % nmaps];
- }
- }
-
- return ptl->ptl_mtables[cpt];
-}
-
-static int
-lnet_mt_test_exhausted(struct lnet_match_table *mtable, int pos)
-{
- __u64 *bmap;
- int i;
-
- if (!lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]))
- return 0;
-
- if (pos < 0) { /* check all bits */
- for (i = 0; i < LNET_MT_EXHAUSTED_BMAP; i++) {
- if (mtable->mt_exhausted[i] != (__u64)(-1))
- return 0;
- }
- return 1;
- }
-
- LASSERT(pos <= LNET_MT_HASH_IGNORE);
- /* mtable::mt_mhash[pos] is marked as exhausted or not */
- bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64];
- pos &= (1 << LNET_MT_BITS_U64) - 1;
-
- return (*bmap & BIT(pos));
-}
-
-static void
-lnet_mt_set_exhausted(struct lnet_match_table *mtable, int pos, int exhausted)
-{
- __u64 *bmap;
-
- LASSERT(lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]));
- LASSERT(pos <= LNET_MT_HASH_IGNORE);
-
- /* set mtable::mt_mhash[pos] as exhausted/non-exhausted */
- bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64];
- pos &= (1 << LNET_MT_BITS_U64) - 1;
-
- if (!exhausted)
- *bmap &= ~(1ULL << pos);
- else
- *bmap |= 1ULL << pos;
-}
-
-struct list_head *
-lnet_mt_match_head(struct lnet_match_table *mtable,
- struct lnet_process_id id, __u64 mbits)
-{
- struct lnet_portal *ptl = the_lnet.ln_portals[mtable->mt_portal];
- unsigned long hash = mbits;
-
- if (!lnet_ptl_is_wildcard(ptl)) {
- hash += id.nid + id.pid;
-
- LASSERT(lnet_ptl_is_unique(ptl));
- hash = hash_long(hash, LNET_MT_HASH_BITS);
- }
- return &mtable->mt_mhash[hash & LNET_MT_HASH_MASK];
-}
-
-int
-lnet_mt_match_md(struct lnet_match_table *mtable,
- struct lnet_match_info *info, struct lnet_msg *msg)
-{
- struct list_head *head;
- struct lnet_me *me;
- struct lnet_me *tmp;
- int exhausted = 0;
- int rc;
-
- /* any ME with ignore bits? */
- if (!list_empty(&mtable->mt_mhash[LNET_MT_HASH_IGNORE]))
- head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE];
- else
- head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits);
- again:
- /* NB: only wildcard portal needs to return LNET_MATCHMD_EXHAUSTED */
- if (lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]))
- exhausted = LNET_MATCHMD_EXHAUSTED;
-
- list_for_each_entry_safe(me, tmp, head, me_list) {
- /* ME attached but MD not attached yet */
- if (!me->me_md)
- continue;
-
- LASSERT(me == me->me_md->md_me);
-
- rc = lnet_try_match_md(me->me_md, info, msg);
- if (!(rc & LNET_MATCHMD_EXHAUSTED))
- exhausted = 0; /* mlist is not empty */
-
- if (rc & LNET_MATCHMD_FINISH) {
- /*
- * don't return EXHAUSTED bit because we don't know
- * whether the mlist is empty or not
- */
- return rc & ~LNET_MATCHMD_EXHAUSTED;
- }
- }
-
- if (exhausted == LNET_MATCHMD_EXHAUSTED) { /* @head is exhausted */
- lnet_mt_set_exhausted(mtable, head - mtable->mt_mhash, 1);
- if (!lnet_mt_test_exhausted(mtable, -1))
- exhausted = 0;
- }
-
- if (!exhausted && head == &mtable->mt_mhash[LNET_MT_HASH_IGNORE]) {
- head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits);
- goto again; /* re-check MEs w/o ignore-bits */
- }
-
- if (info->mi_opc == LNET_MD_OP_GET ||
- !lnet_ptl_is_lazy(the_lnet.ln_portals[info->mi_portal]))
- return exhausted | LNET_MATCHMD_DROP;
-
- return exhausted | LNET_MATCHMD_NONE;
-}
-
-static int
-lnet_ptl_match_early(struct lnet_portal *ptl, struct lnet_msg *msg)
-{
- int rc;
-
- /*
- * message arrived before any buffer posting on this portal,
- * simply delay or drop this message
- */
- if (likely(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl)))
- return 0;
-
- lnet_ptl_lock(ptl);
- /* check it again with hold of lock */
- if (lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl)) {
- lnet_ptl_unlock(ptl);
- return 0;
- }
-
- if (lnet_ptl_is_lazy(ptl)) {
- if (msg->msg_rx_ready_delay) {
- msg->msg_rx_delayed = 1;
- list_add_tail(&msg->msg_list,
- &ptl->ptl_msg_delayed);
- }
- rc = LNET_MATCHMD_NONE;
- } else {
- rc = LNET_MATCHMD_DROP;
- }
-
- lnet_ptl_unlock(ptl);
- return rc;
-}
-
-static int
-lnet_ptl_match_delay(struct lnet_portal *ptl,
- struct lnet_match_info *info, struct lnet_msg *msg)
-{
- int first = ptl->ptl_mt_maps[0]; /* read w/o lock */
- int rc = 0;
- int i;
-
- /**
- * Steal buffer from other CPTs, and delay msg if nothing to
- * steal. This function is more expensive than a regular
- * match, but we don't expect it can happen a lot. The return
- * code contains one of LNET_MATCHMD_OK, LNET_MATCHMD_DROP, or
- * LNET_MATCHMD_NONE.
- */
- LASSERT(lnet_ptl_is_wildcard(ptl));
-
- for (i = 0; i < LNET_CPT_NUMBER; i++) {
- struct lnet_match_table *mtable;
- int cpt;
-
- cpt = (first + i) % LNET_CPT_NUMBER;
- mtable = ptl->ptl_mtables[cpt];
- if (i && i != LNET_CPT_NUMBER - 1 && !mtable->mt_enabled)
- continue;
-
- lnet_res_lock(cpt);
- lnet_ptl_lock(ptl);
-
- if (!i) {
- /* The first try, add to stealing list. */
- list_add_tail(&msg->msg_list,
- &ptl->ptl_msg_stealing);
- }
-
- if (!list_empty(&msg->msg_list)) {
- /* On stealing list. */
- rc = lnet_mt_match_md(mtable, info, msg);
-
- if ((rc & LNET_MATCHMD_EXHAUSTED) &&
- mtable->mt_enabled)
- lnet_ptl_disable_mt(ptl, cpt);
-
- if (rc & LNET_MATCHMD_FINISH) {
- /* Match found, remove from stealing list. */
- list_del_init(&msg->msg_list);
- } else if (i == LNET_CPT_NUMBER - 1 || /* (1) */
- !ptl->ptl_mt_nmaps || /* (2) */
- (ptl->ptl_mt_nmaps == 1 && /* (3) */
- ptl->ptl_mt_maps[0] == cpt)) {
- /**
- * No match found, and this is either
- * (1) the last cpt to check, or
- * (2) there is no active cpt, or
- * (3) this is the only active cpt.
- * There is nothing to steal: delay or
- * drop the message.
- */
- list_del_init(&msg->msg_list);
-
- if (lnet_ptl_is_lazy(ptl)) {
- msg->msg_rx_delayed = 1;
- list_add_tail(&msg->msg_list,
- &ptl->ptl_msg_delayed);
- rc = LNET_MATCHMD_NONE;
- } else {
- rc = LNET_MATCHMD_DROP;
- }
- } else {
- /* Do another iteration. */
- rc = 0;
- }
- } else {
- /**
- * No longer on stealing list: another thread
- * matched the message in lnet_ptl_attach_md().
- * We are now expected to handle the message.
- */
- rc = !msg->msg_md ?
- LNET_MATCHMD_DROP : LNET_MATCHMD_OK;
- }
-
- lnet_ptl_unlock(ptl);
- lnet_res_unlock(cpt);
-
- /**
- * Note that test (1) above ensures that we always
- * exit the loop through this break statement.
- *
- * LNET_MATCHMD_NONE means msg was added to the
- * delayed queue, and we may no longer reference it
- * after lnet_ptl_unlock() and lnet_res_unlock().
- */
- if (rc & (LNET_MATCHMD_FINISH | LNET_MATCHMD_NONE))
- break;
- }
-
- return rc;
-}
-
-int
-lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg)
-{
- struct lnet_match_table *mtable;
- struct lnet_portal *ptl;
- int rc;
-
- CDEBUG(D_NET, "Request from %s of length %d into portal %d MB=%#llx\n",
- libcfs_id2str(info->mi_id), info->mi_rlength, info->mi_portal,
- info->mi_mbits);
-
- if (info->mi_portal >= the_lnet.ln_nportals) {
- CERROR("Invalid portal %d not in [0-%d]\n",
- info->mi_portal, the_lnet.ln_nportals);
- return LNET_MATCHMD_DROP;
- }
-
- ptl = the_lnet.ln_portals[info->mi_portal];
- rc = lnet_ptl_match_early(ptl, msg);
- if (rc) /* matched or delayed early message */
- return rc;
-
- mtable = lnet_mt_of_match(info, msg);
- lnet_res_lock(mtable->mt_cpt);
-
- if (the_lnet.ln_shutdown) {
- rc = LNET_MATCHMD_DROP;
- goto out1;
- }
-
- rc = lnet_mt_match_md(mtable, info, msg);
- if ((rc & LNET_MATCHMD_EXHAUSTED) && mtable->mt_enabled) {
- lnet_ptl_lock(ptl);
- lnet_ptl_disable_mt(ptl, mtable->mt_cpt);
- lnet_ptl_unlock(ptl);
- }
-
- if (rc & LNET_MATCHMD_FINISH) /* matched or dropping */
- goto out1;
-
- if (!msg->msg_rx_ready_delay)
- goto out1;
-
- LASSERT(lnet_ptl_is_lazy(ptl));
- LASSERT(!msg->msg_rx_delayed);
-
- /* NB: we don't expect "delay" can happen a lot */
- if (lnet_ptl_is_unique(ptl) || LNET_CPT_NUMBER == 1) {
- lnet_ptl_lock(ptl);
-
- msg->msg_rx_delayed = 1;
- list_add_tail(&msg->msg_list, &ptl->ptl_msg_delayed);
-
- lnet_ptl_unlock(ptl);
- lnet_res_unlock(mtable->mt_cpt);
- rc = LNET_MATCHMD_NONE;
- } else {
- lnet_res_unlock(mtable->mt_cpt);
- rc = lnet_ptl_match_delay(ptl, info, msg);
- }
-
- /* LNET_MATCHMD_NONE means msg was added to the delay queue */
- if (rc & LNET_MATCHMD_NONE) {
- CDEBUG(D_NET,
- "Delaying %s from %s ptl %d MB %#llx off %d len %d\n",
- info->mi_opc == LNET_MD_OP_PUT ? "PUT" : "GET",
- libcfs_id2str(info->mi_id), info->mi_portal,
- info->mi_mbits, info->mi_roffset, info->mi_rlength);
- }
- goto out0;
- out1:
- lnet_res_unlock(mtable->mt_cpt);
- out0:
- /* EXHAUSTED bit is only meaningful for internal functions */
- return rc & ~LNET_MATCHMD_EXHAUSTED;
-}
-
-void
-lnet_ptl_detach_md(struct lnet_me *me, struct lnet_libmd *md)
-{
- LASSERT(me->me_md == md && md->md_me == me);
-
- me->me_md = NULL;
- md->md_me = NULL;
-}
-
-/* called with lnet_res_lock held */
-void
-lnet_ptl_attach_md(struct lnet_me *me, struct lnet_libmd *md,
- struct list_head *matches, struct list_head *drops)
-{
- struct lnet_portal *ptl = the_lnet.ln_portals[me->me_portal];
- struct lnet_match_table *mtable;
- struct list_head *head;
- struct lnet_msg *tmp;
- struct lnet_msg *msg;
- int exhausted = 0;
- int cpt;
-
- LASSERT(!md->md_refcount); /* a brand new MD */
-
- me->me_md = md;
- md->md_me = me;
-
- cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie);
- mtable = ptl->ptl_mtables[cpt];
-
- if (list_empty(&ptl->ptl_msg_stealing) &&
- list_empty(&ptl->ptl_msg_delayed) &&
- !lnet_mt_test_exhausted(mtable, me->me_pos))
- return;
-
- lnet_ptl_lock(ptl);
- head = &ptl->ptl_msg_stealing;
- again:
- list_for_each_entry_safe(msg, tmp, head, msg_list) {
- struct lnet_match_info info;
- struct lnet_hdr *hdr;
- int rc;
-
- LASSERT(msg->msg_rx_delayed || head == &ptl->ptl_msg_stealing);
-
- hdr = &msg->msg_hdr;
- info.mi_id.nid = hdr->src_nid;
- info.mi_id.pid = hdr->src_pid;
- info.mi_opc = LNET_MD_OP_PUT;
- info.mi_portal = hdr->msg.put.ptl_index;
- info.mi_rlength = hdr->payload_length;
- info.mi_roffset = hdr->msg.put.offset;
- info.mi_mbits = hdr->msg.put.match_bits;
-
- rc = lnet_try_match_md(md, &info, msg);
-
- exhausted = (rc & LNET_MATCHMD_EXHAUSTED);
- if (rc & LNET_MATCHMD_NONE) {
- if (exhausted)
- break;
- continue;
- }
-
- /* Hurrah! This _is_ a match */
- LASSERT(rc & LNET_MATCHMD_FINISH);
- list_del_init(&msg->msg_list);
-
- if (head == &ptl->ptl_msg_stealing) {
- if (exhausted)
- break;
- /* stealing thread will handle the message */
- continue;
- }
-
- if (rc & LNET_MATCHMD_OK) {
- list_add_tail(&msg->msg_list, matches);
-
- CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d match %llu offset %d length %d.\n",
- libcfs_id2str(info.mi_id),
- info.mi_portal, info.mi_mbits,
- info.mi_roffset, info.mi_rlength);
- } else {
- list_add_tail(&msg->msg_list, drops);
- }
-
- if (exhausted)
- break;
- }
-
- if (!exhausted && head == &ptl->ptl_msg_stealing) {
- head = &ptl->ptl_msg_delayed;
- goto again;
- }
-
- if (lnet_ptl_is_wildcard(ptl) && !exhausted) {
- lnet_mt_set_exhausted(mtable, me->me_pos, 0);
- if (!mtable->mt_enabled)
- lnet_ptl_enable_mt(ptl, cpt);
- }
-
- lnet_ptl_unlock(ptl);
-}
-
-static void
-lnet_ptl_cleanup(struct lnet_portal *ptl)
-{
- struct lnet_match_table *mtable;
- int i;
-
- if (!ptl->ptl_mtables) /* uninitialized portal */
- return;
-
- LASSERT(list_empty(&ptl->ptl_msg_delayed));
- LASSERT(list_empty(&ptl->ptl_msg_stealing));
- cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) {
- struct list_head *mhash;
- struct lnet_me *me;
- int j;
-
- if (!mtable->mt_mhash) /* uninitialized match-table */
- continue;
-
- mhash = mtable->mt_mhash;
- /* cleanup ME */
- for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++) {
- while (!list_empty(&mhash[j])) {
- me = list_entry(mhash[j].next,
- struct lnet_me, me_list);
- CERROR("Active ME %p on exit\n", me);
- list_del(&me->me_list);
- kfree(me);
- }
- }
- /* the extra entry is for MEs with ignore bits */
- kvfree(mhash);
- }
-
- cfs_percpt_free(ptl->ptl_mtables);
- ptl->ptl_mtables = NULL;
-}
-
-static int
-lnet_ptl_setup(struct lnet_portal *ptl, int index)
-{
- struct lnet_match_table *mtable;
- struct list_head *mhash;
- int i;
- int j;
-
- ptl->ptl_mtables = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(struct lnet_match_table));
- if (!ptl->ptl_mtables) {
- CERROR("Failed to create match table for portal %d\n", index);
- return -ENOMEM;
- }
-
- ptl->ptl_index = index;
- INIT_LIST_HEAD(&ptl->ptl_msg_delayed);
- INIT_LIST_HEAD(&ptl->ptl_msg_stealing);
- spin_lock_init(&ptl->ptl_lock);
- cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) {
- /* the extra entry is for MEs with ignore bits */
- mhash = kvzalloc_cpt(sizeof(*mhash) * (LNET_MT_HASH_SIZE + 1),
- GFP_KERNEL, i);
- if (!mhash) {
- CERROR("Failed to create match hash for portal %d\n",
- index);
- goto failed;
- }
-
- memset(&mtable->mt_exhausted[0], -1,
- sizeof(mtable->mt_exhausted[0]) *
- LNET_MT_EXHAUSTED_BMAP);
- mtable->mt_mhash = mhash;
- for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++)
- INIT_LIST_HEAD(&mhash[j]);
-
- mtable->mt_portal = index;
- mtable->mt_cpt = i;
- }
-
- return 0;
- failed:
- lnet_ptl_cleanup(ptl);
- return -ENOMEM;
-}
-
-void
-lnet_portals_destroy(void)
-{
- int i;
-
- if (!the_lnet.ln_portals)
- return;
-
- for (i = 0; i < the_lnet.ln_nportals; i++)
- lnet_ptl_cleanup(the_lnet.ln_portals[i]);
-
- cfs_array_free(the_lnet.ln_portals);
- the_lnet.ln_portals = NULL;
- the_lnet.ln_nportals = 0;
-}
-
-int
-lnet_portals_create(void)
-{
- int size;
- int i;
-
- size = offsetof(struct lnet_portal, ptl_mt_maps[LNET_CPT_NUMBER]);
-
- the_lnet.ln_portals = cfs_array_alloc(MAX_PORTALS, size);
- if (!the_lnet.ln_portals) {
- CERROR("Failed to allocate portals table\n");
- return -ENOMEM;
- }
- the_lnet.ln_nportals = MAX_PORTALS;
-
- for (i = 0; i < the_lnet.ln_nportals; i++) {
- if (lnet_ptl_setup(the_lnet.ln_portals[i], i)) {
- lnet_portals_destroy();
- return -ENOMEM;
- }
- }
-
- return 0;
-}
-
-/**
- * Turn on the lazy portal attribute. Use with caution!
- *
- * This portal attribute only affects incoming PUT requests to the portal,
- * and is off by default. By default, if there's no matching MD for an
- * incoming PUT request, it is simply dropped. With the lazy attribute on,
- * such requests are queued indefinitely until either a matching MD is
- * posted to the portal or the lazy attribute is turned off.
- *
- * It would prevent dropped requests, however it should be regarded as the
- * last line of defense - i.e. users must keep a close watch on active
- * buffers on a lazy portal and once it becomes too low post more buffers as
- * soon as possible. This is because delayed requests usually have detrimental
- * effects on underlying network connections. A few delayed requests often
- * suffice to bring an underlying connection to a complete halt, due to flow
- * control mechanisms.
- *
- * There's also a DOS attack risk. If users don't post match-all MDs on a
- * lazy portal, a malicious peer can easily stop a service by sending some
- * PUT requests with match bits that won't match any MD. A routed server is
- * especially vulnerable since the connections to its neighbor routers are
- * shared among all clients.
- *
- * \param portal Index of the portal to enable the lazy attribute on.
- *
- * \retval 0 On success.
- * \retval -EINVAL If \a portal is not a valid index.
- */
-int
-LNetSetLazyPortal(int portal)
-{
- struct lnet_portal *ptl;
-
- if (portal < 0 || portal >= the_lnet.ln_nportals)
- return -EINVAL;
-
- CDEBUG(D_NET, "Setting portal %d lazy\n", portal);
- ptl = the_lnet.ln_portals[portal];
-
- lnet_res_lock(LNET_LOCK_EX);
- lnet_ptl_lock(ptl);
-
- lnet_ptl_setopt(ptl, LNET_PTL_LAZY);
-
- lnet_ptl_unlock(ptl);
- lnet_res_unlock(LNET_LOCK_EX);
-
- return 0;
-}
-EXPORT_SYMBOL(LNetSetLazyPortal);
-
-int
-lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason)
-{
- struct lnet_portal *ptl;
- LIST_HEAD(zombies);
-
- if (portal < 0 || portal >= the_lnet.ln_nportals)
- return -EINVAL;
-
- ptl = the_lnet.ln_portals[portal];
-
- lnet_res_lock(LNET_LOCK_EX);
- lnet_ptl_lock(ptl);
-
- if (!lnet_ptl_is_lazy(ptl)) {
- lnet_ptl_unlock(ptl);
- lnet_res_unlock(LNET_LOCK_EX);
- return 0;
- }
-
- if (ni) {
- struct lnet_msg *msg, *tmp;
-
- /* grab all messages which are on the NI passed in */
- list_for_each_entry_safe(msg, tmp, &ptl->ptl_msg_delayed,
- msg_list) {
- if (msg->msg_rxpeer->lp_ni == ni)
- list_move(&msg->msg_list, &zombies);
- }
- } else {
- if (the_lnet.ln_shutdown)
- CWARN("Active lazy portal %d on exit\n", portal);
- else
- CDEBUG(D_NET, "clearing portal %d lazy\n", portal);
-
- /* grab all the blocked messages atomically */
- list_splice_init(&ptl->ptl_msg_delayed, &zombies);
-
- lnet_ptl_unsetopt(ptl, LNET_PTL_LAZY);
- }
-
- lnet_ptl_unlock(ptl);
- lnet_res_unlock(LNET_LOCK_EX);
-
- lnet_drop_delayed_msg_list(&zombies, reason);
-
- return 0;
-}
-
-/**
- * Turn off the lazy portal attribute. Delayed requests on the portal,
- * if any, will be all dropped when this function returns.
- *
- * \param portal Index of the portal to disable the lazy attribute on.
- *
- * \retval 0 On success.
- * \retval -EINVAL If \a portal is not a valid index.
- */
-int
-LNetClearLazyPortal(int portal)
-{
- return lnet_clear_lazy_portal(NULL, portal,
- "Clearing lazy portal attr");
-}
-EXPORT_SYMBOL(LNetClearLazyPortal);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c
deleted file mode 100644
index 9b61260155f2..000000000000
--- a/drivers/staging/lustre/lnet/lnet/lib-socket.c
+++ /dev/null
@@ -1,585 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Seagate, Inc.
- */
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/if.h>
-#include <linux/in.h>
-#include <linux/net.h>
-#include <linux/file.h>
-#include <linux/pagemap.h>
-/* For sys_open & sys_close */
-#include <linux/syscalls.h>
-#include <net/sock.h>
-
-#include <linux/lnet/lib-lnet.h>
-
-static int
-kernel_sock_unlocked_ioctl(struct file *filp, int cmd, unsigned long arg)
-{
- mm_segment_t oldfs = get_fs();
- int err;
-
- set_fs(KERNEL_DS);
- err = filp->f_op->unlocked_ioctl(filp, cmd, arg);
- set_fs(oldfs);
-
- return err;
-}
-
-static int
-lnet_sock_ioctl(int cmd, unsigned long arg)
-{
- struct file *sock_filp;
- struct socket *sock;
- int rc;
-
- rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
- if (rc) {
- CERROR("Can't create socket: %d\n", rc);
- return rc;
- }
-
- sock_filp = sock_alloc_file(sock, 0, NULL);
- if (IS_ERR(sock_filp))
- return PTR_ERR(sock_filp);
-
- rc = kernel_sock_unlocked_ioctl(sock_filp, cmd, arg);
-
- fput(sock_filp);
- return rc;
-}
-
-int
-lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask)
-{
- struct ifreq ifr;
- int nob;
- int rc;
- __be32 val;
-
- nob = strnlen(name, IFNAMSIZ);
- if (nob == IFNAMSIZ) {
- CERROR("Interface name %s too long\n", name);
- return -EINVAL;
- }
-
- BUILD_BUG_ON(sizeof(ifr.ifr_name) < IFNAMSIZ);
-
- if (strlen(name) > sizeof(ifr.ifr_name) - 1)
- return -E2BIG;
- strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
-
- rc = lnet_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr);
- if (rc) {
- CERROR("Can't get flags for interface %s\n", name);
- return rc;
- }
-
- if (!(ifr.ifr_flags & IFF_UP)) {
- CDEBUG(D_NET, "Interface %s down\n", name);
- *up = 0;
- *ip = *mask = 0;
- return 0;
- }
- *up = 1;
-
- if (strlen(name) > sizeof(ifr.ifr_name) - 1)
- return -E2BIG;
- strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
-
- ifr.ifr_addr.sa_family = AF_INET;
- rc = lnet_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr);
- if (rc) {
- CERROR("Can't get IP address for interface %s\n", name);
- return rc;
- }
-
- val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
- *ip = ntohl(val);
-
- if (strlen(name) > sizeof(ifr.ifr_name) - 1)
- return -E2BIG;
- strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
-
- ifr.ifr_addr.sa_family = AF_INET;
- rc = lnet_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr);
- if (rc) {
- CERROR("Can't get netmask for interface %s\n", name);
- return rc;
- }
-
- val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr;
- *mask = ntohl(val);
-
- return 0;
-}
-EXPORT_SYMBOL(lnet_ipif_query);
-
-int
-lnet_ipif_enumerate(char ***namesp)
-{
- /* Allocate and fill in 'names', returning # interfaces/error */
- char **names;
- int toobig;
- int nalloc;
- int nfound;
- struct ifreq *ifr;
- struct ifconf ifc;
- int rc;
- int nob;
- int i;
-
- nalloc = 16; /* first guess at max interfaces */
- toobig = 0;
- for (;;) {
- if (nalloc * sizeof(*ifr) > PAGE_SIZE) {
- toobig = 1;
- nalloc = PAGE_SIZE / sizeof(*ifr);
- CWARN("Too many interfaces: only enumerating first %d\n",
- nalloc);
- }
-
- ifr = kzalloc(nalloc * sizeof(*ifr), GFP_KERNEL);
- if (!ifr) {
- CERROR("ENOMEM enumerating up to %d interfaces\n",
- nalloc);
- rc = -ENOMEM;
- goto out0;
- }
-
- ifc.ifc_buf = (char *)ifr;
- ifc.ifc_len = nalloc * sizeof(*ifr);
-
- rc = lnet_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc);
- if (rc < 0) {
- CERROR("Error %d enumerating interfaces\n", rc);
- goto out1;
- }
-
- LASSERT(!rc);
-
- nfound = ifc.ifc_len / sizeof(*ifr);
- LASSERT(nfound <= nalloc);
-
- if (nfound < nalloc || toobig)
- break;
-
- kfree(ifr);
- nalloc *= 2;
- }
-
- if (!nfound)
- goto out1;
-
- names = kzalloc(nfound * sizeof(*names), GFP_KERNEL);
- if (!names) {
- rc = -ENOMEM;
- goto out1;
- }
-
- for (i = 0; i < nfound; i++) {
- nob = strnlen(ifr[i].ifr_name, IFNAMSIZ);
- if (nob == IFNAMSIZ) {
- /* no space for terminating NULL */
- CERROR("interface name %.*s too long (%d max)\n",
- nob, ifr[i].ifr_name, IFNAMSIZ);
- rc = -ENAMETOOLONG;
- goto out2;
- }
-
- names[i] = kmalloc(IFNAMSIZ, GFP_KERNEL);
- if (!names[i]) {
- rc = -ENOMEM;
- goto out2;
- }
-
- memcpy(names[i], ifr[i].ifr_name, nob);
- names[i][nob] = 0;
- }
-
- *namesp = names;
- rc = nfound;
-
-out2:
- if (rc < 0)
- lnet_ipif_free_enumeration(names, nfound);
-out1:
- kfree(ifr);
-out0:
- return rc;
-}
-EXPORT_SYMBOL(lnet_ipif_enumerate);
-
-void
-lnet_ipif_free_enumeration(char **names, int n)
-{
- int i;
-
- LASSERT(n > 0);
-
- for (i = 0; i < n && names[i]; i++)
- kfree(names[i]);
-
- kfree(names);
-}
-EXPORT_SYMBOL(lnet_ipif_free_enumeration);
-
-int
-lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
-{
- int rc;
- long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
- unsigned long then;
- struct timeval tv;
- struct kvec iov = { .iov_base = buffer, .iov_len = nob };
- struct msghdr msg = {NULL,};
-
- LASSERT(nob > 0);
- /*
- * Caller may pass a zero timeout if she thinks the socket buffer is
- * empty enough to take the whole message immediately
- */
- iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, nob);
- for (;;) {
- msg.msg_flags = !timeout ? MSG_DONTWAIT : 0;
- if (timeout) {
- /* Set send timeout to remaining time */
- jiffies_to_timeval(jiffies_left, &tv);
- rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO,
- (char *)&tv, sizeof(tv));
- if (rc) {
- CERROR("Can't set socket send timeout %ld.%06d: %d\n",
- (long)tv.tv_sec, (int)tv.tv_usec, rc);
- return rc;
- }
- }
-
- then = jiffies;
- rc = kernel_sendmsg(sock, &msg, &iov, 1, nob);
- jiffies_left -= jiffies - then;
-
- if (rc < 0)
- return rc;
-
- if (!rc) {
- CERROR("Unexpected zero rc\n");
- return -ECONNABORTED;
- }
-
- if (!msg_data_left(&msg))
- break;
-
- if (jiffies_left <= 0)
- return -EAGAIN;
- }
- return 0;
-}
-EXPORT_SYMBOL(lnet_sock_write);
-
-int
-lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout)
-{
- int rc;
- long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
- unsigned long then;
- struct timeval tv;
- struct kvec iov = {
- .iov_base = buffer,
- .iov_len = nob
- };
- struct msghdr msg = {
- .msg_flags = 0
- };
-
- LASSERT(nob > 0);
- LASSERT(jiffies_left > 0);
-
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, nob);
-
- for (;;) {
- /* Set receive timeout to remaining time */
- jiffies_to_timeval(jiffies_left, &tv);
- rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO,
- (char *)&tv, sizeof(tv));
- if (rc) {
- CERROR("Can't set socket recv timeout %ld.%06d: %d\n",
- (long)tv.tv_sec, (int)tv.tv_usec, rc);
- return rc;
- }
-
- then = jiffies;
- rc = sock_recvmsg(sock, &msg, 0);
- jiffies_left -= jiffies - then;
-
- if (rc < 0)
- return rc;
-
- if (!rc)
- return -ECONNRESET;
-
- if (!msg_data_left(&msg))
- return 0;
-
- if (jiffies_left <= 0)
- return -ETIMEDOUT;
- }
-}
-EXPORT_SYMBOL(lnet_sock_read);
-
-static int
-lnet_sock_create(struct socket **sockp, int *fatal, __u32 local_ip,
- int local_port)
-{
- struct sockaddr_in locaddr;
- struct socket *sock;
- int rc;
- int option;
-
- /* All errors are fatal except bind failure if the port is in use */
- *fatal = 1;
-
- rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
- *sockp = sock;
- if (rc) {
- CERROR("Can't create socket: %d\n", rc);
- return rc;
- }
-
- option = 1;
- rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
- (char *)&option, sizeof(option));
- if (rc) {
- CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
- goto failed;
- }
-
- if (local_ip || local_port) {
- memset(&locaddr, 0, sizeof(locaddr));
- locaddr.sin_family = AF_INET;
- locaddr.sin_port = htons(local_port);
- if (!local_ip)
- locaddr.sin_addr.s_addr = htonl(INADDR_ANY);
- else
- locaddr.sin_addr.s_addr = htonl(local_ip);
-
- rc = kernel_bind(sock, (struct sockaddr *)&locaddr,
- sizeof(locaddr));
- if (rc == -EADDRINUSE) {
- CDEBUG(D_NET, "Port %d already in use\n", local_port);
- *fatal = 0;
- goto failed;
- }
- if (rc) {
- CERROR("Error trying to bind to port %d: %d\n",
- local_port, rc);
- goto failed;
- }
- }
- return 0;
-
-failed:
- sock_release(sock);
- return rc;
-}
-
-int
-lnet_sock_setbuf(struct socket *sock, int txbufsize, int rxbufsize)
-{
- int option;
- int rc;
-
- if (txbufsize) {
- option = txbufsize;
- rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
- (char *)&option, sizeof(option));
- if (rc) {
- CERROR("Can't set send buffer %d: %d\n",
- option, rc);
- return rc;
- }
- }
-
- if (rxbufsize) {
- option = rxbufsize;
- rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
- (char *)&option, sizeof(option));
- if (rc) {
- CERROR("Can't set receive buffer %d: %d\n",
- option, rc);
- return rc;
- }
- }
- return 0;
-}
-EXPORT_SYMBOL(lnet_sock_setbuf);
-
-int
-lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port)
-{
- struct sockaddr_in sin;
- int rc;
-
- if (remote)
- rc = kernel_getpeername(sock, (struct sockaddr *)&sin);
- else
- rc = kernel_getsockname(sock, (struct sockaddr *)&sin);
- if (rc < 0) {
- CERROR("Error %d getting sock %s IP/port\n",
- rc, remote ? "peer" : "local");
- return rc;
- }
-
- if (ip)
- *ip = ntohl(sin.sin_addr.s_addr);
-
- if (port)
- *port = ntohs(sin.sin_port);
-
- return 0;
-}
-EXPORT_SYMBOL(lnet_sock_getaddr);
-
-int
-lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize)
-{
- if (txbufsize)
- *txbufsize = sock->sk->sk_sndbuf;
-
- if (rxbufsize)
- *rxbufsize = sock->sk->sk_rcvbuf;
-
- return 0;
-}
-EXPORT_SYMBOL(lnet_sock_getbuf);
-
-int
-lnet_sock_listen(struct socket **sockp, __u32 local_ip, int local_port,
- int backlog)
-{
- int fatal;
- int rc;
-
- rc = lnet_sock_create(sockp, &fatal, local_ip, local_port);
- if (rc) {
- if (!fatal)
- CERROR("Can't create socket: port %d already in use\n",
- local_port);
- return rc;
- }
-
- rc = kernel_listen(*sockp, backlog);
- if (!rc)
- return 0;
-
- CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
- sock_release(*sockp);
- return rc;
-}
-
-int
-lnet_sock_accept(struct socket **newsockp, struct socket *sock)
-{
- wait_queue_entry_t wait;
- struct socket *newsock;
- int rc;
-
- /*
- * XXX this should add a ref to sock->ops->owner, if
- * TCP could be a module
- */
- rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock);
- if (rc) {
- CERROR("Can't allocate socket\n");
- return rc;
- }
-
- newsock->ops = sock->ops;
-
- rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
- if (rc == -EAGAIN) {
- /* Nothing ready, so wait for activity */
- init_waitqueue_entry(&wait, current);
- add_wait_queue(sk_sleep(sock->sk), &wait);
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
- remove_wait_queue(sk_sleep(sock->sk), &wait);
- rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
- }
-
- if (rc)
- goto failed;
-
- *newsockp = newsock;
- return 0;
-
-failed:
- sock_release(newsock);
- return rc;
-}
-
-int
-lnet_sock_connect(struct socket **sockp, int *fatal, __u32 local_ip,
- int local_port, __u32 peer_ip, int peer_port)
-{
- struct sockaddr_in srvaddr;
- int rc;
-
- rc = lnet_sock_create(sockp, fatal, local_ip, local_port);
- if (rc)
- return rc;
-
- memset(&srvaddr, 0, sizeof(srvaddr));
- srvaddr.sin_family = AF_INET;
- srvaddr.sin_port = htons(peer_port);
- srvaddr.sin_addr.s_addr = htonl(peer_ip);
-
- rc = kernel_connect(*sockp, (struct sockaddr *)&srvaddr,
- sizeof(srvaddr), 0);
- if (!rc)
- return 0;
-
- /*
- * EADDRNOTAVAIL probably means we're already connected to the same
- * peer/port on the same local port on a differently typed
- * connection. Let our caller retry with a different local
- * port...
- */
- *fatal = !(rc == -EADDRNOTAVAIL);
-
- CDEBUG_LIMIT(*fatal ? D_NETERROR : D_NET,
- "Error %d connecting %pI4h/%d -> %pI4h/%d\n", rc,
- &local_ip, local_port, &peer_ip, peer_port);
-
- sock_release(*sockp);
- return rc;
-}
diff --git a/drivers/staging/lustre/lnet/lnet/lo.c b/drivers/staging/lustre/lnet/lnet/lo.c
deleted file mode 100644
index 7456b989e451..000000000000
--- a/drivers/staging/lustre/lnet/lnet/lo.c
+++ /dev/null
@@ -1,105 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-static int
-lolnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
-{
- LASSERT(!lntmsg->msg_routing);
- LASSERT(!lntmsg->msg_target_is_router);
-
- return lnet_parse(ni, &lntmsg->msg_hdr, ni->ni_nid, lntmsg, 0);
-}
-
-static int
-lolnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
- int delayed, struct iov_iter *to, unsigned int rlen)
-{
- struct lnet_msg *sendmsg = private;
-
- if (lntmsg) { /* not discarding */
- if (sendmsg->msg_iov)
- lnet_copy_iov2iter(to,
- sendmsg->msg_niov,
- sendmsg->msg_iov,
- sendmsg->msg_offset,
- iov_iter_count(to));
- else
- lnet_copy_kiov2iter(to,
- sendmsg->msg_niov,
- sendmsg->msg_kiov,
- sendmsg->msg_offset,
- iov_iter_count(to));
-
- lnet_finalize(ni, lntmsg, 0);
- }
-
- lnet_finalize(ni, sendmsg, 0);
- return 0;
-}
-
-static int lolnd_instanced;
-
-static void
-lolnd_shutdown(struct lnet_ni *ni)
-{
- CDEBUG(D_NET, "shutdown\n");
- LASSERT(lolnd_instanced);
-
- lolnd_instanced = 0;
-}
-
-static int
-lolnd_startup(struct lnet_ni *ni)
-{
- LASSERT(ni->ni_lnd == &the_lolnd);
- LASSERT(!lolnd_instanced);
- lolnd_instanced = 1;
-
- return 0;
-}
-
-struct lnet_lnd the_lolnd = {
- /* .lnd_list = */ {&the_lolnd.lnd_list, &the_lolnd.lnd_list},
- /* .lnd_refcount = */ 0,
- /* .lnd_type = */ LOLND,
- /* .lnd_startup = */ lolnd_startup,
- /* .lnd_shutdown = */ lolnd_shutdown,
- /* .lnt_ctl = */ NULL,
- /* .lnd_send = */ lolnd_send,
- /* .lnd_recv = */ lolnd_recv,
- /* .lnd_eager_recv = */ NULL,
- /* .lnd_notify = */ NULL,
- /* .lnd_accept = */ NULL
-};
diff --git a/drivers/staging/lustre/lnet/lnet/module.c b/drivers/staging/lustre/lnet/lnet/module.c
deleted file mode 100644
index 9d06664f0c17..000000000000
--- a/drivers/staging/lustre/lnet/lnet/module.c
+++ /dev/null
@@ -1,239 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-#include <uapi/linux/lnet/lnet-dlc.h>
-
-static int config_on_load;
-module_param(config_on_load, int, 0444);
-MODULE_PARM_DESC(config_on_load, "configure network at module load");
-
-static struct mutex lnet_config_mutex;
-
-static int
-lnet_configure(void *arg)
-{
- /* 'arg' only there so I can be passed to cfs_create_thread() */
- int rc = 0;
-
- mutex_lock(&lnet_config_mutex);
-
- if (!the_lnet.ln_niinit_self) {
- rc = try_module_get(THIS_MODULE);
-
- if (rc != 1)
- goto out;
-
- rc = LNetNIInit(LNET_PID_LUSTRE);
- if (rc >= 0) {
- the_lnet.ln_niinit_self = 1;
- rc = 0;
- } else {
- module_put(THIS_MODULE);
- }
- }
-
-out:
- mutex_unlock(&lnet_config_mutex);
- return rc;
-}
-
-static int
-lnet_unconfigure(void)
-{
- int refcount;
-
- mutex_lock(&lnet_config_mutex);
-
- if (the_lnet.ln_niinit_self) {
- the_lnet.ln_niinit_self = 0;
- LNetNIFini();
- module_put(THIS_MODULE);
- }
-
- mutex_lock(&the_lnet.ln_api_mutex);
- refcount = the_lnet.ln_refcount;
- mutex_unlock(&the_lnet.ln_api_mutex);
-
- mutex_unlock(&lnet_config_mutex);
- return !refcount ? 0 : -EBUSY;
-}
-
-static int
-lnet_dyn_configure(struct libcfs_ioctl_hdr *hdr)
-{
- struct lnet_ioctl_config_data *conf =
- (struct lnet_ioctl_config_data *)hdr;
- int rc;
-
- if (conf->cfg_hdr.ioc_len < sizeof(*conf))
- return -EINVAL;
-
- mutex_lock(&lnet_config_mutex);
- if (!the_lnet.ln_niinit_self) {
- rc = -EINVAL;
- goto out_unlock;
- }
- rc = lnet_dyn_add_ni(LNET_PID_LUSTRE, conf);
-out_unlock:
- mutex_unlock(&lnet_config_mutex);
-
- return rc;
-}
-
-static int
-lnet_dyn_unconfigure(struct libcfs_ioctl_hdr *hdr)
-{
- struct lnet_ioctl_config_data *conf =
- (struct lnet_ioctl_config_data *)hdr;
- int rc;
-
- if (conf->cfg_hdr.ioc_len < sizeof(*conf))
- return -EINVAL;
-
- mutex_lock(&lnet_config_mutex);
- if (!the_lnet.ln_niinit_self) {
- rc = -EINVAL;
- goto out_unlock;
- }
- rc = lnet_dyn_del_ni(conf->cfg_net);
-out_unlock:
- mutex_unlock(&lnet_config_mutex);
-
- return rc;
-}
-
-static int
-lnet_ioctl(struct notifier_block *nb,
- unsigned long cmd, void *vdata)
-{
- int rc;
- struct libcfs_ioctl_hdr *hdr = vdata;
-
- switch (cmd) {
- case IOC_LIBCFS_CONFIGURE: {
- struct libcfs_ioctl_data *data =
- (struct libcfs_ioctl_data *)hdr;
-
- if (data->ioc_hdr.ioc_len < sizeof(*data)) {
- rc = -EINVAL;
- } else {
- the_lnet.ln_nis_from_mod_params = data->ioc_flags;
- rc = lnet_configure(NULL);
- }
- break;
- }
-
- case IOC_LIBCFS_UNCONFIGURE:
- rc = lnet_unconfigure();
- break;
-
- case IOC_LIBCFS_ADD_NET:
- rc = lnet_dyn_configure(hdr);
- break;
-
- case IOC_LIBCFS_DEL_NET:
- rc = lnet_dyn_unconfigure(hdr);
- break;
-
- default:
- /*
- * Passing LNET_PID_ANY only gives me a ref if the net is up
- * already; I'll need it to ensure the net can't go down while
- * I'm called into it
- */
- rc = LNetNIInit(LNET_PID_ANY);
- if (rc >= 0) {
- rc = LNetCtl(cmd, hdr);
- LNetNIFini();
- }
- break;
- }
- return notifier_from_ioctl_errno(rc);
-}
-
-static struct notifier_block lnet_ioctl_handler = {
- .notifier_call = lnet_ioctl,
-};
-
-static int __init lnet_init(void)
-{
- int rc;
-
- mutex_init(&lnet_config_mutex);
-
- rc = libcfs_setup();
- if (rc)
- return rc;
-
- rc = lnet_lib_init();
- if (rc) {
- CERROR("lnet_lib_init: error %d\n", rc);
- return rc;
- }
-
- rc = blocking_notifier_chain_register(&libcfs_ioctl_list,
- &lnet_ioctl_handler);
- LASSERT(!rc);
-
- if (config_on_load) {
- /*
- * Have to schedule a separate thread to avoid deadlocking
- * in modload
- */
- (void)kthread_run(lnet_configure, NULL, "lnet_initd");
- }
-
- return 0;
-}
-
-static void __exit lnet_exit(void)
-{
- int rc;
-
- rc = blocking_notifier_chain_unregister(&libcfs_ioctl_list,
- &lnet_ioctl_handler);
- LASSERT(!rc);
-
- lnet_lib_exit();
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre Networking layer");
-MODULE_VERSION(LNET_VERSION);
-MODULE_LICENSE("GPL");
-
-module_init(lnet_init);
-module_exit(lnet_exit);
diff --git a/drivers/staging/lustre/lnet/lnet/net_fault.c b/drivers/staging/lustre/lnet/lnet/net_fault.c
deleted file mode 100644
index 0066394b0bb0..000000000000
--- a/drivers/staging/lustre/lnet/lnet/net_fault.c
+++ /dev/null
@@ -1,1023 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2014, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Seagate, Inc.
- *
- * lnet/lnet/net_fault.c
- *
- * Lustre network fault simulation
- *
- * Author: liang.zhen@intel.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-#include <uapi/linux/lnet/lnetctl.h>
-
-#define LNET_MSG_MASK (LNET_PUT_BIT | LNET_ACK_BIT | \
- LNET_GET_BIT | LNET_REPLY_BIT)
-
-struct lnet_drop_rule {
- /** link chain on the_lnet.ln_drop_rules */
- struct list_head dr_link;
- /** attributes of this rule */
- struct lnet_fault_attr dr_attr;
- /** lock to protect \a dr_drop_at and \a dr_stat */
- spinlock_t dr_lock;
- /**
- * the message sequence to drop, which means message is dropped when
- * dr_stat.drs_count == dr_drop_at
- */
- unsigned long dr_drop_at;
- /**
- * seconds to drop the next message, it's exclusive with dr_drop_at
- */
- unsigned long dr_drop_time;
- /** baseline to caculate dr_drop_time */
- unsigned long dr_time_base;
- /** statistic of dropped messages */
- struct lnet_fault_stat dr_stat;
-};
-
-static bool
-lnet_fault_nid_match(lnet_nid_t nid, lnet_nid_t msg_nid)
-{
- if (nid == msg_nid || nid == LNET_NID_ANY)
- return true;
-
- if (LNET_NIDNET(nid) != LNET_NIDNET(msg_nid))
- return false;
-
- /* 255.255.255.255@net is wildcard for all addresses in a network */
- return LNET_NIDADDR(nid) == LNET_NIDADDR(LNET_NID_ANY);
-}
-
-static bool
-lnet_fault_attr_match(struct lnet_fault_attr *attr, lnet_nid_t src,
- lnet_nid_t dst, unsigned int type, unsigned int portal)
-{
- if (!lnet_fault_nid_match(attr->fa_src, src) ||
- !lnet_fault_nid_match(attr->fa_dst, dst))
- return false;
-
- if (!(attr->fa_msg_mask & (1 << type)))
- return false;
-
- /**
- * NB: ACK and REPLY have no portal, but they should have been
- * rejected by message mask
- */
- if (attr->fa_ptl_mask && /* has portal filter */
- !(attr->fa_ptl_mask & (1ULL << portal)))
- return false;
-
- return true;
-}
-
-static int
-lnet_fault_attr_validate(struct lnet_fault_attr *attr)
-{
- if (!attr->fa_msg_mask)
- attr->fa_msg_mask = LNET_MSG_MASK; /* all message types */
-
- if (!attr->fa_ptl_mask) /* no portal filter */
- return 0;
-
- /* NB: only PUT and GET can be filtered if portal filter has been set */
- attr->fa_msg_mask &= LNET_GET_BIT | LNET_PUT_BIT;
- if (!attr->fa_msg_mask) {
- CDEBUG(D_NET, "can't find valid message type bits %x\n",
- attr->fa_msg_mask);
- return -EINVAL;
- }
- return 0;
-}
-
-static void
-lnet_fault_stat_inc(struct lnet_fault_stat *stat, unsigned int type)
-{
- /* NB: fs_counter is NOT updated by this function */
- switch (type) {
- case LNET_MSG_PUT:
- stat->fs_put++;
- return;
- case LNET_MSG_ACK:
- stat->fs_ack++;
- return;
- case LNET_MSG_GET:
- stat->fs_get++;
- return;
- case LNET_MSG_REPLY:
- stat->fs_reply++;
- return;
- }
-}
-
-/**
- * LNet message drop simulation
- */
-
-/**
- * Add a new drop rule to LNet
- * There is no check for duplicated drop rule, all rules will be checked for
- * incoming message.
- */
-static int
-lnet_drop_rule_add(struct lnet_fault_attr *attr)
-{
- struct lnet_drop_rule *rule;
-
- if (attr->u.drop.da_rate & attr->u.drop.da_interval) {
- CDEBUG(D_NET, "please provide either drop rate or drop interval, but not both at the same time %d/%d\n",
- attr->u.drop.da_rate, attr->u.drop.da_interval);
- return -EINVAL;
- }
-
- if (lnet_fault_attr_validate(attr))
- return -EINVAL;
-
- rule = kzalloc(sizeof(*rule), GFP_NOFS);
- if (!rule)
- return -ENOMEM;
-
- spin_lock_init(&rule->dr_lock);
-
- rule->dr_attr = *attr;
- if (attr->u.drop.da_interval) {
- rule->dr_time_base = jiffies + attr->u.drop.da_interval * HZ;
- rule->dr_drop_time = jiffies +
- prandom_u32_max(attr->u.drop.da_interval) * HZ;
- } else {
- rule->dr_drop_at = prandom_u32_max(attr->u.drop.da_rate);
- }
-
- lnet_net_lock(LNET_LOCK_EX);
- list_add(&rule->dr_link, &the_lnet.ln_drop_rules);
- lnet_net_unlock(LNET_LOCK_EX);
-
- CDEBUG(D_NET, "Added drop rule: src %s, dst %s, rate %d, interval %d\n",
- libcfs_nid2str(attr->fa_src), libcfs_nid2str(attr->fa_src),
- attr->u.drop.da_rate, attr->u.drop.da_interval);
- return 0;
-}
-
-/**
- * Remove matched drop rules from lnet, all rules that can match \a src and
- * \a dst will be removed.
- * If \a src is zero, then all rules have \a dst as destination will be remove
- * If \a dst is zero, then all rules have \a src as source will be removed
- * If both of them are zero, all rules will be removed
- */
-static int
-lnet_drop_rule_del(lnet_nid_t src, lnet_nid_t dst)
-{
- struct lnet_drop_rule *rule;
- struct lnet_drop_rule *tmp;
- struct list_head zombies;
- int n = 0;
-
- INIT_LIST_HEAD(&zombies);
-
- lnet_net_lock(LNET_LOCK_EX);
- list_for_each_entry_safe(rule, tmp, &the_lnet.ln_drop_rules, dr_link) {
- if (rule->dr_attr.fa_src != src && src)
- continue;
-
- if (rule->dr_attr.fa_dst != dst && dst)
- continue;
-
- list_move(&rule->dr_link, &zombies);
- }
- lnet_net_unlock(LNET_LOCK_EX);
-
- list_for_each_entry_safe(rule, tmp, &zombies, dr_link) {
- CDEBUG(D_NET, "Remove drop rule: src %s->dst: %s (1/%d, %d)\n",
- libcfs_nid2str(rule->dr_attr.fa_src),
- libcfs_nid2str(rule->dr_attr.fa_dst),
- rule->dr_attr.u.drop.da_rate,
- rule->dr_attr.u.drop.da_interval);
-
- list_del(&rule->dr_link);
- kfree(rule);
- n++;
- }
-
- return n;
-}
-
-/**
- * List drop rule at position of \a pos
- */
-static int
-lnet_drop_rule_list(int pos, struct lnet_fault_attr *attr,
- struct lnet_fault_stat *stat)
-{
- struct lnet_drop_rule *rule;
- int cpt;
- int i = 0;
- int rc = -ENOENT;
-
- cpt = lnet_net_lock_current();
- list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) {
- if (i++ < pos)
- continue;
-
- spin_lock(&rule->dr_lock);
- *attr = rule->dr_attr;
- *stat = rule->dr_stat;
- spin_unlock(&rule->dr_lock);
- rc = 0;
- break;
- }
-
- lnet_net_unlock(cpt);
- return rc;
-}
-
-/**
- * reset counters for all drop rules
- */
-static void
-lnet_drop_rule_reset(void)
-{
- struct lnet_drop_rule *rule;
- int cpt;
-
- cpt = lnet_net_lock_current();
-
- list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) {
- struct lnet_fault_attr *attr = &rule->dr_attr;
-
- spin_lock(&rule->dr_lock);
-
- memset(&rule->dr_stat, 0, sizeof(rule->dr_stat));
- if (attr->u.drop.da_rate) {
- rule->dr_drop_at = prandom_u32_max(attr->u.drop.da_rate);
- } else {
- rule->dr_drop_time = jiffies +
- prandom_u32_max(attr->u.drop.da_interval) * HZ;
- rule->dr_time_base = jiffies + attr->u.drop.da_interval * HZ;
- }
- spin_unlock(&rule->dr_lock);
- }
-
- lnet_net_unlock(cpt);
-}
-
-/**
- * check source/destination NID, portal, message type and drop rate,
- * decide whether should drop this message or not
- */
-static bool
-drop_rule_match(struct lnet_drop_rule *rule, lnet_nid_t src,
- lnet_nid_t dst, unsigned int type, unsigned int portal)
-{
- struct lnet_fault_attr *attr = &rule->dr_attr;
- bool drop;
-
- if (!lnet_fault_attr_match(attr, src, dst, type, portal))
- return false;
-
- /* match this rule, check drop rate now */
- spin_lock(&rule->dr_lock);
- if (rule->dr_drop_time) { /* time based drop */
- unsigned long now = jiffies;
-
- rule->dr_stat.fs_count++;
- drop = time_after_eq(now, rule->dr_drop_time);
- if (drop) {
- if (time_after(now, rule->dr_time_base))
- rule->dr_time_base = now;
-
- rule->dr_drop_time = rule->dr_time_base +
- prandom_u32_max(attr->u.drop.da_interval) * HZ;
- rule->dr_time_base += attr->u.drop.da_interval * HZ;
-
- CDEBUG(D_NET, "Drop Rule %s->%s: next drop : %lu\n",
- libcfs_nid2str(attr->fa_src),
- libcfs_nid2str(attr->fa_dst),
- rule->dr_drop_time);
- }
-
- } else { /* rate based drop */
- drop = rule->dr_stat.fs_count++ == rule->dr_drop_at;
-
- if (!do_div(rule->dr_stat.fs_count, attr->u.drop.da_rate)) {
- rule->dr_drop_at = rule->dr_stat.fs_count +
- prandom_u32_max(attr->u.drop.da_rate);
- CDEBUG(D_NET, "Drop Rule %s->%s: next drop: %lu\n",
- libcfs_nid2str(attr->fa_src),
- libcfs_nid2str(attr->fa_dst), rule->dr_drop_at);
- }
- }
-
- if (drop) { /* drop this message, update counters */
- lnet_fault_stat_inc(&rule->dr_stat, type);
- rule->dr_stat.u.drop.ds_dropped++;
- }
-
- spin_unlock(&rule->dr_lock);
- return drop;
-}
-
-/**
- * Check if message from \a src to \a dst can match any existed drop rule
- */
-bool
-lnet_drop_rule_match(struct lnet_hdr *hdr)
-{
- struct lnet_drop_rule *rule;
- lnet_nid_t src = le64_to_cpu(hdr->src_nid);
- lnet_nid_t dst = le64_to_cpu(hdr->dest_nid);
- unsigned int typ = le32_to_cpu(hdr->type);
- unsigned int ptl = -1;
- bool drop = false;
- int cpt;
-
- /**
- * NB: if Portal is specified, then only PUT and GET will be
- * filtered by drop rule
- */
- if (typ == LNET_MSG_PUT)
- ptl = le32_to_cpu(hdr->msg.put.ptl_index);
- else if (typ == LNET_MSG_GET)
- ptl = le32_to_cpu(hdr->msg.get.ptl_index);
-
- cpt = lnet_net_lock_current();
- list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) {
- drop = drop_rule_match(rule, src, dst, typ, ptl);
- if (drop)
- break;
- }
-
- lnet_net_unlock(cpt);
- return drop;
-}
-
-/**
- * LNet Delay Simulation
- */
-/** timestamp (second) to send delayed message */
-#define msg_delay_send msg_ev.hdr_data
-
-struct lnet_delay_rule {
- /** link chain on the_lnet.ln_delay_rules */
- struct list_head dl_link;
- /** link chain on delay_dd.dd_sched_rules */
- struct list_head dl_sched_link;
- /** attributes of this rule */
- struct lnet_fault_attr dl_attr;
- /** lock to protect \a below members */
- spinlock_t dl_lock;
- /** refcount of delay rule */
- atomic_t dl_refcount;
- /**
- * the message sequence to delay, which means message is delayed when
- * dl_stat.fs_count == dl_delay_at
- */
- unsigned long dl_delay_at;
- /**
- * seconds to delay the next message, it's exclusive with dl_delay_at
- */
- unsigned long dl_delay_time;
- /** baseline to caculate dl_delay_time */
- unsigned long dl_time_base;
- /** jiffies to send the next delayed message */
- unsigned long dl_msg_send;
- /** delayed message list */
- struct list_head dl_msg_list;
- /** statistic of delayed messages */
- struct lnet_fault_stat dl_stat;
- /** timer to wakeup delay_daemon */
- struct timer_list dl_timer;
-};
-
-struct delay_daemon_data {
- /** serialise rule add/remove */
- struct mutex dd_mutex;
- /** protect rules on \a dd_sched_rules */
- spinlock_t dd_lock;
- /** scheduled delay rules (by timer) */
- struct list_head dd_sched_rules;
- /** daemon thread sleeps at here */
- wait_queue_head_t dd_waitq;
- /** controller (lctl command) wait at here */
- wait_queue_head_t dd_ctl_waitq;
- /** daemon is running */
- unsigned int dd_running;
- /** daemon stopped */
- unsigned int dd_stopped;
-};
-
-static struct delay_daemon_data delay_dd;
-
-static unsigned long
-round_timeout(unsigned long timeout)
-{
- return (unsigned int)rounddown(timeout, HZ) + HZ;
-}
-
-static void
-delay_rule_decref(struct lnet_delay_rule *rule)
-{
- if (atomic_dec_and_test(&rule->dl_refcount)) {
- LASSERT(list_empty(&rule->dl_sched_link));
- LASSERT(list_empty(&rule->dl_msg_list));
- LASSERT(list_empty(&rule->dl_link));
-
- kfree(rule);
- }
-}
-
-/**
- * check source/destination NID, portal, message type and delay rate,
- * decide whether should delay this message or not
- */
-static bool
-delay_rule_match(struct lnet_delay_rule *rule, lnet_nid_t src,
- lnet_nid_t dst, unsigned int type, unsigned int portal,
- struct lnet_msg *msg)
-{
- struct lnet_fault_attr *attr = &rule->dl_attr;
- bool delay;
-
- if (!lnet_fault_attr_match(attr, src, dst, type, portal))
- return false;
-
- /* match this rule, check delay rate now */
- spin_lock(&rule->dl_lock);
- if (rule->dl_delay_time) { /* time based delay */
- unsigned long now = jiffies;
-
- rule->dl_stat.fs_count++;
- delay = time_after_eq(now, rule->dl_delay_time);
- if (delay) {
- if (time_after(now, rule->dl_time_base))
- rule->dl_time_base = now;
-
- rule->dl_delay_time = rule->dl_time_base +
- prandom_u32_max(attr->u.delay.la_interval) * HZ;
- rule->dl_time_base += attr->u.delay.la_interval * HZ;
-
- CDEBUG(D_NET, "Delay Rule %s->%s: next delay : %lu\n",
- libcfs_nid2str(attr->fa_src),
- libcfs_nid2str(attr->fa_dst),
- rule->dl_delay_time);
- }
-
- } else { /* rate based delay */
- delay = rule->dl_stat.fs_count++ == rule->dl_delay_at;
- /* generate the next random rate sequence */
- if (!do_div(rule->dl_stat.fs_count, attr->u.delay.la_rate)) {
- rule->dl_delay_at = rule->dl_stat.fs_count +
- prandom_u32_max(attr->u.delay.la_rate);
- CDEBUG(D_NET, "Delay Rule %s->%s: next delay: %lu\n",
- libcfs_nid2str(attr->fa_src),
- libcfs_nid2str(attr->fa_dst), rule->dl_delay_at);
- }
- }
-
- if (!delay) {
- spin_unlock(&rule->dl_lock);
- return false;
- }
-
- /* delay this message, update counters */
- lnet_fault_stat_inc(&rule->dl_stat, type);
- rule->dl_stat.u.delay.ls_delayed++;
-
- list_add_tail(&msg->msg_list, &rule->dl_msg_list);
- msg->msg_delay_send = round_timeout(
- jiffies + attr->u.delay.la_latency * HZ);
- if (rule->dl_msg_send == -1) {
- rule->dl_msg_send = msg->msg_delay_send;
- mod_timer(&rule->dl_timer, rule->dl_msg_send);
- }
-
- spin_unlock(&rule->dl_lock);
- return true;
-}
-
-/**
- * check if \a msg can match any Delay Rule, receiving of this message
- * will be delayed if there is a match.
- */
-bool
-lnet_delay_rule_match_locked(struct lnet_hdr *hdr, struct lnet_msg *msg)
-{
- struct lnet_delay_rule *rule;
- lnet_nid_t src = le64_to_cpu(hdr->src_nid);
- lnet_nid_t dst = le64_to_cpu(hdr->dest_nid);
- unsigned int typ = le32_to_cpu(hdr->type);
- unsigned int ptl = -1;
-
- /* NB: called with hold of lnet_net_lock */
-
- /**
- * NB: if Portal is specified, then only PUT and GET will be
- * filtered by delay rule
- */
- if (typ == LNET_MSG_PUT)
- ptl = le32_to_cpu(hdr->msg.put.ptl_index);
- else if (typ == LNET_MSG_GET)
- ptl = le32_to_cpu(hdr->msg.get.ptl_index);
-
- list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) {
- if (delay_rule_match(rule, src, dst, typ, ptl, msg))
- return true;
- }
-
- return false;
-}
-
-/** check out delayed messages for send */
-static void
-delayed_msg_check(struct lnet_delay_rule *rule, bool all,
- struct list_head *msg_list)
-{
- struct lnet_msg *msg;
- struct lnet_msg *tmp;
- unsigned long now = jiffies;
-
- if (!all && rule->dl_msg_send > now)
- return;
-
- spin_lock(&rule->dl_lock);
- list_for_each_entry_safe(msg, tmp, &rule->dl_msg_list, msg_list) {
- if (!all && msg->msg_delay_send > now)
- break;
-
- msg->msg_delay_send = 0;
- list_move_tail(&msg->msg_list, msg_list);
- }
-
- if (list_empty(&rule->dl_msg_list)) {
- del_timer(&rule->dl_timer);
- rule->dl_msg_send = -1;
-
- } else if (!list_empty(msg_list)) {
- /*
- * dequeued some timedout messages, update timer for the
- * next delayed message on rule
- */
- msg = list_entry(rule->dl_msg_list.next,
- struct lnet_msg, msg_list);
- rule->dl_msg_send = msg->msg_delay_send;
- mod_timer(&rule->dl_timer, rule->dl_msg_send);
- }
- spin_unlock(&rule->dl_lock);
-}
-
-static void
-delayed_msg_process(struct list_head *msg_list, bool drop)
-{
- struct lnet_msg *msg;
-
- while (!list_empty(msg_list)) {
- struct lnet_ni *ni;
- int cpt;
- int rc;
-
- msg = list_entry(msg_list->next, struct lnet_msg, msg_list);
- LASSERT(msg->msg_rxpeer);
-
- ni = msg->msg_rxpeer->lp_ni;
- cpt = msg->msg_rx_cpt;
-
- list_del_init(&msg->msg_list);
- if (drop) {
- rc = -ECANCELED;
-
- } else if (!msg->msg_routing) {
- rc = lnet_parse_local(ni, msg);
- if (!rc)
- continue;
-
- } else {
- lnet_net_lock(cpt);
- rc = lnet_parse_forward_locked(ni, msg);
- lnet_net_unlock(cpt);
-
- switch (rc) {
- case LNET_CREDIT_OK:
- lnet_ni_recv(ni, msg->msg_private, msg, 0,
- 0, msg->msg_len, msg->msg_len);
- /* fall through */
- case LNET_CREDIT_WAIT:
- continue;
- default: /* failures */
- break;
- }
- }
-
- lnet_drop_message(ni, cpt, msg->msg_private, msg->msg_len);
- lnet_finalize(ni, msg, rc);
- }
-}
-
-/**
- * Process delayed messages for scheduled rules
- * This function can either be called by delay_rule_daemon, or by lnet_finalise
- */
-void
-lnet_delay_rule_check(void)
-{
- struct lnet_delay_rule *rule;
- struct list_head msgs;
-
- INIT_LIST_HEAD(&msgs);
- while (1) {
- if (list_empty(&delay_dd.dd_sched_rules))
- break;
-
- spin_lock_bh(&delay_dd.dd_lock);
- if (list_empty(&delay_dd.dd_sched_rules)) {
- spin_unlock_bh(&delay_dd.dd_lock);
- break;
- }
-
- rule = list_entry(delay_dd.dd_sched_rules.next,
- struct lnet_delay_rule, dl_sched_link);
- list_del_init(&rule->dl_sched_link);
- spin_unlock_bh(&delay_dd.dd_lock);
-
- delayed_msg_check(rule, false, &msgs);
- delay_rule_decref(rule); /* -1 for delay_dd.dd_sched_rules */
- }
-
- if (!list_empty(&msgs))
- delayed_msg_process(&msgs, false);
-}
-
-/** daemon thread to handle delayed messages */
-static int
-lnet_delay_rule_daemon(void *arg)
-{
- delay_dd.dd_running = 1;
- wake_up(&delay_dd.dd_ctl_waitq);
-
- while (delay_dd.dd_running) {
- wait_event_interruptible(delay_dd.dd_waitq,
- !delay_dd.dd_running ||
- !list_empty(&delay_dd.dd_sched_rules));
- lnet_delay_rule_check();
- }
-
- /* in case more rules have been enqueued after my last check */
- lnet_delay_rule_check();
- delay_dd.dd_stopped = 1;
- wake_up(&delay_dd.dd_ctl_waitq);
-
- return 0;
-}
-
-static void
-delay_timer_cb(struct timer_list *t)
-{
- struct lnet_delay_rule *rule = from_timer(rule, t, dl_timer);
-
- spin_lock_bh(&delay_dd.dd_lock);
- if (list_empty(&rule->dl_sched_link) && delay_dd.dd_running) {
- atomic_inc(&rule->dl_refcount);
- list_add_tail(&rule->dl_sched_link, &delay_dd.dd_sched_rules);
- wake_up(&delay_dd.dd_waitq);
- }
- spin_unlock_bh(&delay_dd.dd_lock);
-}
-
-/**
- * Add a new delay rule to LNet
- * There is no check for duplicated delay rule, all rules will be checked for
- * incoming message.
- */
-int
-lnet_delay_rule_add(struct lnet_fault_attr *attr)
-{
- struct lnet_delay_rule *rule;
- int rc = 0;
-
- if (attr->u.delay.la_rate & attr->u.delay.la_interval) {
- CDEBUG(D_NET, "please provide either delay rate or delay interval, but not both at the same time %d/%d\n",
- attr->u.delay.la_rate, attr->u.delay.la_interval);
- return -EINVAL;
- }
-
- if (!attr->u.delay.la_latency) {
- CDEBUG(D_NET, "delay latency cannot be zero\n");
- return -EINVAL;
- }
-
- if (lnet_fault_attr_validate(attr))
- return -EINVAL;
-
- rule = kzalloc(sizeof(*rule), GFP_NOFS);
- if (!rule)
- return -ENOMEM;
-
- mutex_lock(&delay_dd.dd_mutex);
- if (!delay_dd.dd_running) {
- struct task_struct *task;
-
- /**
- * NB: although LND threads will process delayed message
- * in lnet_finalize, but there is no guarantee that LND
- * threads will be waken up if no other message needs to
- * be handled.
- * Only one daemon thread, performance is not the concern
- * of this simualation module.
- */
- task = kthread_run(lnet_delay_rule_daemon, NULL, "lnet_dd");
- if (IS_ERR(task)) {
- rc = PTR_ERR(task);
- goto failed;
- }
- wait_event(delay_dd.dd_ctl_waitq, delay_dd.dd_running);
- }
-
- timer_setup(&rule->dl_timer, delay_timer_cb, 0);
-
- spin_lock_init(&rule->dl_lock);
- INIT_LIST_HEAD(&rule->dl_msg_list);
- INIT_LIST_HEAD(&rule->dl_sched_link);
-
- rule->dl_attr = *attr;
- if (attr->u.delay.la_interval) {
- rule->dl_time_base = jiffies + attr->u.delay.la_interval * HZ;
- rule->dl_delay_time = jiffies +
- prandom_u32_max(attr->u.delay.la_interval) * HZ;
- } else {
- rule->dl_delay_at = prandom_u32_max(attr->u.delay.la_rate);
- }
-
- rule->dl_msg_send = -1;
-
- lnet_net_lock(LNET_LOCK_EX);
- atomic_set(&rule->dl_refcount, 1);
- list_add(&rule->dl_link, &the_lnet.ln_delay_rules);
- lnet_net_unlock(LNET_LOCK_EX);
-
- CDEBUG(D_NET, "Added delay rule: src %s, dst %s, rate %d\n",
- libcfs_nid2str(attr->fa_src), libcfs_nid2str(attr->fa_src),
- attr->u.delay.la_rate);
-
- mutex_unlock(&delay_dd.dd_mutex);
- return 0;
-failed:
- mutex_unlock(&delay_dd.dd_mutex);
- kfree(rule);
- return rc;
-}
-
-/**
- * Remove matched Delay Rules from lnet, if \a shutdown is true or both \a src
- * and \a dst are zero, all rules will be removed, otherwise only matched rules
- * will be removed.
- * If \a src is zero, then all rules have \a dst as destination will be remove
- * If \a dst is zero, then all rules have \a src as source will be removed
- *
- * When a delay rule is removed, all delayed messages of this rule will be
- * processed immediately.
- */
-int
-lnet_delay_rule_del(lnet_nid_t src, lnet_nid_t dst, bool shutdown)
-{
- struct lnet_delay_rule *rule;
- struct lnet_delay_rule *tmp;
- struct list_head rule_list;
- struct list_head msg_list;
- int n = 0;
- bool cleanup;
-
- INIT_LIST_HEAD(&rule_list);
- INIT_LIST_HEAD(&msg_list);
-
- if (shutdown) {
- src = 0;
- dst = 0;
- }
-
- mutex_lock(&delay_dd.dd_mutex);
- lnet_net_lock(LNET_LOCK_EX);
-
- list_for_each_entry_safe(rule, tmp, &the_lnet.ln_delay_rules, dl_link) {
- if (rule->dl_attr.fa_src != src && src)
- continue;
-
- if (rule->dl_attr.fa_dst != dst && dst)
- continue;
-
- CDEBUG(D_NET, "Remove delay rule: src %s->dst: %s (1/%d, %d)\n",
- libcfs_nid2str(rule->dl_attr.fa_src),
- libcfs_nid2str(rule->dl_attr.fa_dst),
- rule->dl_attr.u.delay.la_rate,
- rule->dl_attr.u.delay.la_interval);
- /* refcount is taken over by rule_list */
- list_move(&rule->dl_link, &rule_list);
- }
-
- /* check if we need to shutdown delay_daemon */
- cleanup = list_empty(&the_lnet.ln_delay_rules) &&
- !list_empty(&rule_list);
- lnet_net_unlock(LNET_LOCK_EX);
-
- list_for_each_entry_safe(rule, tmp, &rule_list, dl_link) {
- list_del_init(&rule->dl_link);
-
- del_timer_sync(&rule->dl_timer);
- delayed_msg_check(rule, true, &msg_list);
- delay_rule_decref(rule); /* -1 for the_lnet.ln_delay_rules */
- n++;
- }
-
- if (cleanup) { /* no more delay rule, shutdown delay_daemon */
- LASSERT(delay_dd.dd_running);
- delay_dd.dd_running = 0;
- wake_up(&delay_dd.dd_waitq);
-
- while (!delay_dd.dd_stopped)
- wait_event(delay_dd.dd_ctl_waitq, delay_dd.dd_stopped);
- }
- mutex_unlock(&delay_dd.dd_mutex);
-
- if (!list_empty(&msg_list))
- delayed_msg_process(&msg_list, shutdown);
-
- return n;
-}
-
-/**
- * List Delay Rule at position of \a pos
- */
-int
-lnet_delay_rule_list(int pos, struct lnet_fault_attr *attr,
- struct lnet_fault_stat *stat)
-{
- struct lnet_delay_rule *rule;
- int cpt;
- int i = 0;
- int rc = -ENOENT;
-
- cpt = lnet_net_lock_current();
- list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) {
- if (i++ < pos)
- continue;
-
- spin_lock(&rule->dl_lock);
- *attr = rule->dl_attr;
- *stat = rule->dl_stat;
- spin_unlock(&rule->dl_lock);
- rc = 0;
- break;
- }
-
- lnet_net_unlock(cpt);
- return rc;
-}
-
-/**
- * reset counters for all Delay Rules
- */
-void
-lnet_delay_rule_reset(void)
-{
- struct lnet_delay_rule *rule;
- int cpt;
-
- cpt = lnet_net_lock_current();
-
- list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) {
- struct lnet_fault_attr *attr = &rule->dl_attr;
-
- spin_lock(&rule->dl_lock);
-
- memset(&rule->dl_stat, 0, sizeof(rule->dl_stat));
- if (attr->u.delay.la_rate) {
- rule->dl_delay_at = prandom_u32_max(attr->u.delay.la_rate);
- } else {
- rule->dl_delay_time =
- jiffies + prandom_u32_max(
- attr->u.delay.la_interval) * HZ;
- rule->dl_time_base = jiffies + attr->u.delay.la_interval * HZ;
- }
- spin_unlock(&rule->dl_lock);
- }
-
- lnet_net_unlock(cpt);
-}
-
-int
-lnet_fault_ctl(int opc, struct libcfs_ioctl_data *data)
-{
- struct lnet_fault_attr *attr;
- struct lnet_fault_stat *stat;
-
- attr = (struct lnet_fault_attr *)data->ioc_inlbuf1;
-
- switch (opc) {
- default:
- return -EINVAL;
-
- case LNET_CTL_DROP_ADD:
- if (!attr)
- return -EINVAL;
-
- return lnet_drop_rule_add(attr);
-
- case LNET_CTL_DROP_DEL:
- if (!attr)
- return -EINVAL;
-
- data->ioc_count = lnet_drop_rule_del(attr->fa_src,
- attr->fa_dst);
- return 0;
-
- case LNET_CTL_DROP_RESET:
- lnet_drop_rule_reset();
- return 0;
-
- case LNET_CTL_DROP_LIST:
- stat = (struct lnet_fault_stat *)data->ioc_inlbuf2;
- if (!attr || !stat)
- return -EINVAL;
-
- return lnet_drop_rule_list(data->ioc_count, attr, stat);
-
- case LNET_CTL_DELAY_ADD:
- if (!attr)
- return -EINVAL;
-
- return lnet_delay_rule_add(attr);
-
- case LNET_CTL_DELAY_DEL:
- if (!attr)
- return -EINVAL;
-
- data->ioc_count = lnet_delay_rule_del(attr->fa_src,
- attr->fa_dst, false);
- return 0;
-
- case LNET_CTL_DELAY_RESET:
- lnet_delay_rule_reset();
- return 0;
-
- case LNET_CTL_DELAY_LIST:
- stat = (struct lnet_fault_stat *)data->ioc_inlbuf2;
- if (!attr || !stat)
- return -EINVAL;
-
- return lnet_delay_rule_list(data->ioc_count, attr, stat);
- }
-}
-
-int
-lnet_fault_init(void)
-{
- BUILD_BUG_ON(LNET_PUT_BIT != 1 << LNET_MSG_PUT);
- BUILD_BUG_ON(LNET_ACK_BIT != 1 << LNET_MSG_ACK);
- BUILD_BUG_ON(LNET_GET_BIT != 1 << LNET_MSG_GET);
- BUILD_BUG_ON(LNET_REPLY_BIT != 1 << LNET_MSG_REPLY);
-
- mutex_init(&delay_dd.dd_mutex);
- spin_lock_init(&delay_dd.dd_lock);
- init_waitqueue_head(&delay_dd.dd_waitq);
- init_waitqueue_head(&delay_dd.dd_ctl_waitq);
- INIT_LIST_HEAD(&delay_dd.dd_sched_rules);
-
- return 0;
-}
-
-void
-lnet_fault_fini(void)
-{
- lnet_drop_rule_del(0, 0);
- lnet_delay_rule_del(0, 0, true);
-
- LASSERT(list_empty(&the_lnet.ln_drop_rules));
- LASSERT(list_empty(&the_lnet.ln_delay_rules));
- LASSERT(list_empty(&delay_dd.dd_sched_rules));
-}
diff --git a/drivers/staging/lustre/lnet/lnet/nidstrings.c b/drivers/staging/lustre/lnet/lnet/nidstrings.c
deleted file mode 100644
index 0f6c3fa16c65..000000000000
--- a/drivers/staging/lustre/lnet/lnet/nidstrings.c
+++ /dev/null
@@ -1,1261 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/nidstrings.c
- *
- * Author: Phil Schwan <phil@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <linux/libcfs/libcfs.h>
-#include <linux/libcfs/libcfs_string.h>
-#include <uapi/linux/lnet/nidstr.h>
-
-/* max value for numeric network address */
-#define MAX_NUMERIC_VALUE 0xffffffff
-
-#define IPSTRING_LENGTH 16
-
-/* CAVEAT VENDITOR! Keep the canonical string representation of nets/nids
- * consistent in all conversion functions. Some code fragments are copied
- * around for the sake of clarity...
- */
-
-/* CAVEAT EMPTOR! Racey temporary buffer allocation!
- * Choose the number of nidstrings to support the MAXIMUM expected number of
- * concurrent users. If there are more, the returned string will be volatile.
- * NB this number must allow for a process to be descheduled for a timeslice
- * between getting its string and using it.
- */
-
-static char libcfs_nidstrings[LNET_NIDSTR_COUNT][LNET_NIDSTR_SIZE];
-static int libcfs_nidstring_idx;
-
-static DEFINE_SPINLOCK(libcfs_nidstring_lock);
-
-static struct netstrfns *libcfs_namenum2netstrfns(const char *name);
-
-char *
-libcfs_next_nidstring(void)
-{
- char *str;
- unsigned long flags;
-
- spin_lock_irqsave(&libcfs_nidstring_lock, flags);
-
- str = libcfs_nidstrings[libcfs_nidstring_idx++];
- if (libcfs_nidstring_idx == ARRAY_SIZE(libcfs_nidstrings))
- libcfs_nidstring_idx = 0;
-
- spin_unlock_irqrestore(&libcfs_nidstring_lock, flags);
- return str;
-}
-EXPORT_SYMBOL(libcfs_next_nidstring);
-
-/**
- * Nid range list syntax.
- * \verbatim
- *
- * <nidlist> :== <nidrange> [ ' ' <nidrange> ]
- * <nidrange> :== <addrrange> '@' <net>
- * <addrrange> :== '*' |
- * <ipaddr_range> |
- * <cfs_expr_list>
- * <ipaddr_range> :== <cfs_expr_list>.<cfs_expr_list>.<cfs_expr_list>.
- * <cfs_expr_list>
- * <cfs_expr_list> :== <number> |
- * <expr_list>
- * <expr_list> :== '[' <range_expr> [ ',' <range_expr>] ']'
- * <range_expr> :== <number> |
- * <number> '-' <number> |
- * <number> '-' <number> '/' <number>
- * <net> :== <netname> | <netname><number>
- * <netname> :== "lo" | "tcp" | "o2ib" | "cib" | "openib" | "iib" |
- * "vib" | "ra" | "elan" | "mx" | "ptl"
- * \endverbatim
- */
-
-/**
- * Structure to represent \<nidrange\> token of the syntax.
- *
- * One of this is created for each \<net\> parsed.
- */
-struct nidrange {
- /**
- * Link to list of this structures which is built on nid range
- * list parsing.
- */
- struct list_head nr_link;
- /**
- * List head for addrrange::ar_link.
- */
- struct list_head nr_addrranges;
- /**
- * Flag indicating that *@<net> is found.
- */
- int nr_all;
- /**
- * Pointer to corresponding element of libcfs_netstrfns.
- */
- struct netstrfns *nr_netstrfns;
- /**
- * Number of network. E.g. 5 if \<net\> is "elan5".
- */
- int nr_netnum;
-};
-
-/**
- * Structure to represent \<addrrange\> token of the syntax.
- */
-struct addrrange {
- /**
- * Link to nidrange::nr_addrranges.
- */
- struct list_head ar_link;
- /**
- * List head for cfs_expr_list::el_list.
- */
- struct list_head ar_numaddr_ranges;
-};
-
-/**
- * Parses \<addrrange\> token on the syntax.
- *
- * Allocates struct addrrange and links to \a nidrange via
- * (nidrange::nr_addrranges)
- *
- * \retval 0 if \a src parses to '*' | \<ipaddr_range\> | \<cfs_expr_list\>
- * \retval -errno otherwise
- */
-static int
-parse_addrange(const struct cfs_lstr *src, struct nidrange *nidrange)
-{
- struct addrrange *addrrange;
-
- if (src->ls_len == 1 && src->ls_str[0] == '*') {
- nidrange->nr_all = 1;
- return 0;
- }
-
- addrrange = kzalloc(sizeof(struct addrrange), GFP_NOFS);
- if (!addrrange)
- return -ENOMEM;
- list_add_tail(&addrrange->ar_link, &nidrange->nr_addrranges);
- INIT_LIST_HEAD(&addrrange->ar_numaddr_ranges);
-
- return nidrange->nr_netstrfns->nf_parse_addrlist(src->ls_str,
- src->ls_len,
- &addrrange->ar_numaddr_ranges);
-}
-
-/**
- * Finds or creates struct nidrange.
- *
- * Checks if \a src is a valid network name, looks for corresponding
- * nidrange on the ist of nidranges (\a nidlist), creates new struct
- * nidrange if it is not found.
- *
- * \retval pointer to struct nidrange matching network specified via \a src
- * \retval NULL if \a src does not match any network
- */
-static struct nidrange *
-add_nidrange(const struct cfs_lstr *src,
- struct list_head *nidlist)
-{
- struct netstrfns *nf;
- struct nidrange *nr;
- int endlen;
- unsigned int netnum;
-
- if (src->ls_len >= LNET_NIDSTR_SIZE)
- return NULL;
-
- nf = libcfs_namenum2netstrfns(src->ls_str);
- if (!nf)
- return NULL;
- endlen = src->ls_len - strlen(nf->nf_name);
- if (!endlen)
- /* network name only, e.g. "elan" or "tcp" */
- netnum = 0;
- else {
- /*
- * e.g. "elan25" or "tcp23", refuse to parse if
- * network name is not appended with decimal or
- * hexadecimal number
- */
- if (!cfs_str2num_check(src->ls_str + strlen(nf->nf_name),
- endlen, &netnum, 0, MAX_NUMERIC_VALUE))
- return NULL;
- }
-
- list_for_each_entry(nr, nidlist, nr_link) {
- if (nr->nr_netstrfns != nf)
- continue;
- if (nr->nr_netnum != netnum)
- continue;
- return nr;
- }
-
- nr = kzalloc(sizeof(struct nidrange), GFP_NOFS);
- if (!nr)
- return NULL;
- list_add_tail(&nr->nr_link, nidlist);
- INIT_LIST_HEAD(&nr->nr_addrranges);
- nr->nr_netstrfns = nf;
- nr->nr_all = 0;
- nr->nr_netnum = netnum;
-
- return nr;
-}
-
-/**
- * Parses \<nidrange\> token of the syntax.
- *
- * \retval 1 if \a src parses to \<addrrange\> '@' \<net\>
- * \retval 0 otherwise
- */
-static int
-parse_nidrange(struct cfs_lstr *src, struct list_head *nidlist)
-{
- struct cfs_lstr addrrange;
- struct cfs_lstr net;
- struct nidrange *nr;
-
- if (!cfs_gettok(src, '@', &addrrange))
- goto failed;
-
- if (!cfs_gettok(src, '@', &net) || src->ls_str)
- goto failed;
-
- nr = add_nidrange(&net, nidlist);
- if (!nr)
- goto failed;
-
- if (parse_addrange(&addrrange, nr))
- goto failed;
-
- return 1;
-failed:
- return 0;
-}
-
-/**
- * Frees addrrange structures of \a list.
- *
- * For each struct addrrange structure found on \a list it frees
- * cfs_expr_list list attached to it and frees the addrrange itself.
- *
- * \retval none
- */
-static void
-free_addrranges(struct list_head *list)
-{
- while (!list_empty(list)) {
- struct addrrange *ar;
-
- ar = list_entry(list->next, struct addrrange, ar_link);
-
- cfs_expr_list_free_list(&ar->ar_numaddr_ranges);
- list_del(&ar->ar_link);
- kfree(ar);
- }
-}
-
-/**
- * Frees nidrange strutures of \a list.
- *
- * For each struct nidrange structure found on \a list it frees
- * addrrange list attached to it and frees the nidrange itself.
- *
- * \retval none
- */
-void
-cfs_free_nidlist(struct list_head *list)
-{
- struct list_head *pos, *next;
- struct nidrange *nr;
-
- list_for_each_safe(pos, next, list) {
- nr = list_entry(pos, struct nidrange, nr_link);
- free_addrranges(&nr->nr_addrranges);
- list_del(pos);
- kfree(nr);
- }
-}
-EXPORT_SYMBOL(cfs_free_nidlist);
-
-/**
- * Parses nid range list.
- *
- * Parses with rigorous syntax and overflow checking \a str into
- * \<nidrange\> [ ' ' \<nidrange\> ], compiles \a str into set of
- * structures and links that structure to \a nidlist. The resulting
- * list can be used to match a NID againts set of NIDS defined by \a
- * str.
- * \see cfs_match_nid
- *
- * \retval 1 on success
- * \retval 0 otherwise
- */
-int
-cfs_parse_nidlist(char *str, int len, struct list_head *nidlist)
-{
- struct cfs_lstr src;
- struct cfs_lstr res;
- int rc;
-
- src.ls_str = str;
- src.ls_len = len;
- INIT_LIST_HEAD(nidlist);
- while (src.ls_str) {
- rc = cfs_gettok(&src, ' ', &res);
- if (!rc) {
- cfs_free_nidlist(nidlist);
- return 0;
- }
- rc = parse_nidrange(&res, nidlist);
- if (!rc) {
- cfs_free_nidlist(nidlist);
- return 0;
- }
- }
- return 1;
-}
-EXPORT_SYMBOL(cfs_parse_nidlist);
-
-/**
- * Matches a nid (\a nid) against the compiled list of nidranges (\a nidlist).
- *
- * \see cfs_parse_nidlist()
- *
- * \retval 1 on match
- * \retval 0 otherwises
- */
-int cfs_match_nid(lnet_nid_t nid, struct list_head *nidlist)
-{
- struct nidrange *nr;
- struct addrrange *ar;
-
- list_for_each_entry(nr, nidlist, nr_link) {
- if (nr->nr_netstrfns->nf_type != LNET_NETTYP(LNET_NIDNET(nid)))
- continue;
- if (nr->nr_netnum != LNET_NETNUM(LNET_NIDNET(nid)))
- continue;
- if (nr->nr_all)
- return 1;
- list_for_each_entry(ar, &nr->nr_addrranges, ar_link)
- if (nr->nr_netstrfns->nf_match_addr(LNET_NIDADDR(nid),
- &ar->ar_numaddr_ranges))
- return 1;
- }
- return 0;
-}
-EXPORT_SYMBOL(cfs_match_nid);
-
-/**
- * Print the network part of the nidrange \a nr into the specified \a buffer.
- *
- * \retval number of characters written
- */
-static int
-cfs_print_network(char *buffer, int count, struct nidrange *nr)
-{
- struct netstrfns *nf = nr->nr_netstrfns;
-
- if (!nr->nr_netnum)
- return scnprintf(buffer, count, "@%s", nf->nf_name);
- else
- return scnprintf(buffer, count, "@%s%u",
- nf->nf_name, nr->nr_netnum);
-}
-
-/**
- * Print a list of addrrange (\a addrranges) into the specified \a buffer.
- * At max \a count characters can be printed into \a buffer.
- *
- * \retval number of characters written
- */
-static int
-cfs_print_addrranges(char *buffer, int count, struct list_head *addrranges,
- struct nidrange *nr)
-{
- int i = 0;
- struct addrrange *ar;
- struct netstrfns *nf = nr->nr_netstrfns;
-
- list_for_each_entry(ar, addrranges, ar_link) {
- if (i)
- i += scnprintf(buffer + i, count - i, " ");
- i += nf->nf_print_addrlist(buffer + i, count - i,
- &ar->ar_numaddr_ranges);
- i += cfs_print_network(buffer + i, count - i, nr);
- }
- return i;
-}
-
-/**
- * Print a list of nidranges (\a nidlist) into the specified \a buffer.
- * At max \a count characters can be printed into \a buffer.
- * Nidranges are separated by a space character.
- *
- * \retval number of characters written
- */
-int cfs_print_nidlist(char *buffer, int count, struct list_head *nidlist)
-{
- int i = 0;
- struct nidrange *nr;
-
- if (count <= 0)
- return 0;
-
- list_for_each_entry(nr, nidlist, nr_link) {
- if (i)
- i += scnprintf(buffer + i, count - i, " ");
-
- if (nr->nr_all) {
- LASSERT(list_empty(&nr->nr_addrranges));
- i += scnprintf(buffer + i, count - i, "*");
- i += cfs_print_network(buffer + i, count - i, nr);
- } else {
- i += cfs_print_addrranges(buffer + i, count - i,
- &nr->nr_addrranges, nr);
- }
- }
- return i;
-}
-EXPORT_SYMBOL(cfs_print_nidlist);
-
-/**
- * Determines minimum and maximum addresses for a single
- * numeric address range
- *
- * \param ar
- * \param min_nid
- * \param max_nid
- */
-static void cfs_ip_ar_min_max(struct addrrange *ar, __u32 *min_nid,
- __u32 *max_nid)
-{
- struct cfs_expr_list *el;
- struct cfs_range_expr *re;
- __u32 tmp_ip_addr = 0;
- unsigned int min_ip[4] = {0};
- unsigned int max_ip[4] = {0};
- int re_count = 0;
-
- list_for_each_entry(el, &ar->ar_numaddr_ranges, el_link) {
- list_for_each_entry(re, &el->el_exprs, re_link) {
- min_ip[re_count] = re->re_lo;
- max_ip[re_count] = re->re_hi;
- re_count++;
- }
- }
-
- tmp_ip_addr = ((min_ip[0] << 24) | (min_ip[1] << 16) |
- (min_ip[2] << 8) | min_ip[3]);
-
- if (min_nid)
- *min_nid = tmp_ip_addr;
-
- tmp_ip_addr = ((max_ip[0] << 24) | (max_ip[1] << 16) |
- (max_ip[2] << 8) | max_ip[3]);
-
- if (max_nid)
- *max_nid = tmp_ip_addr;
-}
-
-/**
- * Determines minimum and maximum addresses for a single
- * numeric address range
- *
- * \param ar
- * \param min_nid
- * \param max_nid
- */
-static void cfs_num_ar_min_max(struct addrrange *ar, __u32 *min_nid,
- __u32 *max_nid)
-{
- struct cfs_expr_list *el;
- struct cfs_range_expr *re;
- unsigned int min_addr = 0;
- unsigned int max_addr = 0;
-
- list_for_each_entry(el, &ar->ar_numaddr_ranges, el_link) {
- list_for_each_entry(re, &el->el_exprs, re_link) {
- if (re->re_lo < min_addr || !min_addr)
- min_addr = re->re_lo;
- if (re->re_hi > max_addr)
- max_addr = re->re_hi;
- }
- }
-
- if (min_nid)
- *min_nid = min_addr;
- if (max_nid)
- *max_nid = max_addr;
-}
-
-/**
- * Determines whether an expression list in an nidrange contains exactly
- * one contiguous address range. Calls the correct netstrfns for the LND
- *
- * \param *nidlist
- *
- * \retval true if contiguous
- * \retval false if not contiguous
- */
-bool cfs_nidrange_is_contiguous(struct list_head *nidlist)
-{
- struct nidrange *nr;
- struct netstrfns *nf = NULL;
- char *lndname = NULL;
- int netnum = -1;
-
- list_for_each_entry(nr, nidlist, nr_link) {
- nf = nr->nr_netstrfns;
- if (!lndname)
- lndname = nf->nf_name;
- if (netnum == -1)
- netnum = nr->nr_netnum;
-
- if (strcmp(lndname, nf->nf_name) ||
- netnum != nr->nr_netnum)
- return false;
- }
-
- if (!nf)
- return false;
-
- if (!nf->nf_is_contiguous(nidlist))
- return false;
-
- return true;
-}
-EXPORT_SYMBOL(cfs_nidrange_is_contiguous);
-
-/**
- * Determines whether an expression list in an num nidrange contains exactly
- * one contiguous address range.
- *
- * \param *nidlist
- *
- * \retval true if contiguous
- * \retval false if not contiguous
- */
-static bool cfs_num_is_contiguous(struct list_head *nidlist)
-{
- struct nidrange *nr;
- struct addrrange *ar;
- struct cfs_expr_list *el;
- struct cfs_range_expr *re;
- int last_hi = 0;
- __u32 last_end_nid = 0;
- __u32 current_start_nid = 0;
- __u32 current_end_nid = 0;
-
- list_for_each_entry(nr, nidlist, nr_link) {
- list_for_each_entry(ar, &nr->nr_addrranges, ar_link) {
- cfs_num_ar_min_max(ar, &current_start_nid,
- &current_end_nid);
- if (last_end_nid &&
- (current_start_nid - last_end_nid != 1))
- return false;
- last_end_nid = current_end_nid;
- list_for_each_entry(el, &ar->ar_numaddr_ranges,
- el_link) {
- list_for_each_entry(re, &el->el_exprs,
- re_link) {
- if (re->re_stride > 1)
- return false;
- else if (last_hi &&
- re->re_hi - last_hi != 1)
- return false;
- last_hi = re->re_hi;
- }
- }
- }
- }
-
- return true;
-}
-
-/**
- * Determines whether an expression list in an ip nidrange contains exactly
- * one contiguous address range.
- *
- * \param *nidlist
- *
- * \retval true if contiguous
- * \retval false if not contiguous
- */
-static bool cfs_ip_is_contiguous(struct list_head *nidlist)
-{
- struct nidrange *nr;
- struct addrrange *ar;
- struct cfs_expr_list *el;
- struct cfs_range_expr *re;
- int expr_count;
- int last_hi = 255;
- int last_diff = 0;
- __u32 last_end_nid = 0;
- __u32 current_start_nid = 0;
- __u32 current_end_nid = 0;
-
- list_for_each_entry(nr, nidlist, nr_link) {
- list_for_each_entry(ar, &nr->nr_addrranges, ar_link) {
- last_hi = 255;
- last_diff = 0;
- cfs_ip_ar_min_max(ar, &current_start_nid,
- &current_end_nid);
- if (last_end_nid &&
- (current_start_nid - last_end_nid != 1))
- return false;
- last_end_nid = current_end_nid;
- list_for_each_entry(el, &ar->ar_numaddr_ranges,
- el_link) {
- expr_count = 0;
- list_for_each_entry(re, &el->el_exprs,
- re_link) {
- expr_count++;
- if (re->re_stride > 1 ||
- (last_diff > 0 && last_hi != 255) ||
- (last_diff > 0 && last_hi == 255 &&
- re->re_lo > 0))
- return false;
- last_hi = re->re_hi;
- last_diff = re->re_hi - re->re_lo;
- }
- }
- }
- }
-
- return true;
-}
-
-/**
- * Takes a linked list of nidrange expressions, determines the minimum
- * and maximum nid and creates appropriate nid structures
- *
- * \param *nidlist
- * \param *min_nid
- * \param *max_nid
- */
-void cfs_nidrange_find_min_max(struct list_head *nidlist, char *min_nid,
- char *max_nid, size_t nidstr_length)
-{
- struct nidrange *nr;
- struct netstrfns *nf = NULL;
- int netnum = -1;
- __u32 min_addr;
- __u32 max_addr;
- char *lndname = NULL;
- char min_addr_str[IPSTRING_LENGTH];
- char max_addr_str[IPSTRING_LENGTH];
-
- list_for_each_entry(nr, nidlist, nr_link) {
- nf = nr->nr_netstrfns;
- lndname = nf->nf_name;
- if (netnum == -1)
- netnum = nr->nr_netnum;
-
- nf->nf_min_max(nidlist, &min_addr, &max_addr);
- }
- nf->nf_addr2str(min_addr, min_addr_str, sizeof(min_addr_str));
- nf->nf_addr2str(max_addr, max_addr_str, sizeof(max_addr_str));
-
- snprintf(min_nid, nidstr_length, "%s@%s%d", min_addr_str, lndname,
- netnum);
- snprintf(max_nid, nidstr_length, "%s@%s%d", max_addr_str, lndname,
- netnum);
-}
-EXPORT_SYMBOL(cfs_nidrange_find_min_max);
-
-/**
- * Determines the min and max NID values for num LNDs
- *
- * \param *nidlist
- * \param *min_nid
- * \param *max_nid
- */
-static void cfs_num_min_max(struct list_head *nidlist, __u32 *min_nid,
- __u32 *max_nid)
-{
- struct nidrange *nr;
- struct addrrange *ar;
- unsigned int tmp_min_addr = 0;
- unsigned int tmp_max_addr = 0;
- unsigned int min_addr = 0;
- unsigned int max_addr = 0;
-
- list_for_each_entry(nr, nidlist, nr_link) {
- list_for_each_entry(ar, &nr->nr_addrranges, ar_link) {
- cfs_num_ar_min_max(ar, &tmp_min_addr,
- &tmp_max_addr);
- if (tmp_min_addr < min_addr || !min_addr)
- min_addr = tmp_min_addr;
- if (tmp_max_addr > max_addr)
- max_addr = tmp_min_addr;
- }
- }
- *max_nid = max_addr;
- *min_nid = min_addr;
-}
-
-/**
- * Takes an nidlist and determines the minimum and maximum
- * ip addresses.
- *
- * \param *nidlist
- * \param *min_nid
- * \param *max_nid
- */
-static void cfs_ip_min_max(struct list_head *nidlist, __u32 *min_nid,
- __u32 *max_nid)
-{
- struct nidrange *nr;
- struct addrrange *ar;
- __u32 tmp_min_ip_addr = 0;
- __u32 tmp_max_ip_addr = 0;
- __u32 min_ip_addr = 0;
- __u32 max_ip_addr = 0;
-
- list_for_each_entry(nr, nidlist, nr_link) {
- list_for_each_entry(ar, &nr->nr_addrranges, ar_link) {
- cfs_ip_ar_min_max(ar, &tmp_min_ip_addr,
- &tmp_max_ip_addr);
- if (tmp_min_ip_addr < min_ip_addr || !min_ip_addr)
- min_ip_addr = tmp_min_ip_addr;
- if (tmp_max_ip_addr > max_ip_addr)
- max_ip_addr = tmp_max_ip_addr;
- }
- }
-
- if (min_nid)
- *min_nid = min_ip_addr;
- if (max_nid)
- *max_nid = max_ip_addr;
-}
-
-static int
-libcfs_lo_str2addr(const char *str, int nob, __u32 *addr)
-{
- *addr = 0;
- return 1;
-}
-
-static void
-libcfs_ip_addr2str(__u32 addr, char *str, size_t size)
-{
- snprintf(str, size, "%u.%u.%u.%u",
- (addr >> 24) & 0xff, (addr >> 16) & 0xff,
- (addr >> 8) & 0xff, addr & 0xff);
-}
-
-/*
- * CAVEAT EMPTOR XscanfX
- * I use "%n" at the end of a sscanf format to detect trailing junk. However
- * sscanf may return immediately if it sees the terminating '0' in a string, so
- * I initialise the %n variable to the expected length. If sscanf sets it;
- * fine, if it doesn't, then the scan ended at the end of the string, which is
- * fine too :)
- */
-static int
-libcfs_ip_str2addr(const char *str, int nob, __u32 *addr)
-{
- unsigned int a;
- unsigned int b;
- unsigned int c;
- unsigned int d;
- int n = nob; /* XscanfX */
-
- /* numeric IP? */
- if (sscanf(str, "%u.%u.%u.%u%n", &a, &b, &c, &d, &n) >= 4 &&
- n == nob &&
- !(a & ~0xff) && !(b & ~0xff) &&
- !(c & ~0xff) && !(d & ~0xff)) {
- *addr = ((a << 24) | (b << 16) | (c << 8) | d);
- return 1;
- }
-
- return 0;
-}
-
-/* Used by lnet/config.c so it can't be static */
-int
-cfs_ip_addr_parse(char *str, int len, struct list_head *list)
-{
- struct cfs_expr_list *el;
- struct cfs_lstr src;
- int rc;
- int i;
-
- src.ls_str = str;
- src.ls_len = len;
- i = 0;
-
- while (src.ls_str) {
- struct cfs_lstr res;
-
- if (!cfs_gettok(&src, '.', &res)) {
- rc = -EINVAL;
- goto out;
- }
-
- rc = cfs_expr_list_parse(res.ls_str, res.ls_len, 0, 255, &el);
- if (rc)
- goto out;
-
- list_add_tail(&el->el_link, list);
- i++;
- }
-
- if (i == 4)
- return 0;
-
- rc = -EINVAL;
-out:
- cfs_expr_list_free_list(list);
-
- return rc;
-}
-
-static int
-libcfs_ip_addr_range_print(char *buffer, int count, struct list_head *list)
-{
- int i = 0, j = 0;
- struct cfs_expr_list *el;
-
- list_for_each_entry(el, list, el_link) {
- LASSERT(j++ < 4);
- if (i)
- i += scnprintf(buffer + i, count - i, ".");
- i += cfs_expr_list_print(buffer + i, count - i, el);
- }
- return i;
-}
-
-/**
- * Matches address (\a addr) against address set encoded in \a list.
- *
- * \retval 1 if \a addr matches
- * \retval 0 otherwise
- */
-int
-cfs_ip_addr_match(__u32 addr, struct list_head *list)
-{
- struct cfs_expr_list *el;
- int i = 0;
-
- list_for_each_entry_reverse(el, list, el_link) {
- if (!cfs_expr_list_match(addr & 0xff, el))
- return 0;
- addr >>= 8;
- i++;
- }
-
- return i == 4;
-}
-
-static void
-libcfs_decnum_addr2str(__u32 addr, char *str, size_t size)
-{
- snprintf(str, size, "%u", addr);
-}
-
-static int
-libcfs_num_str2addr(const char *str, int nob, __u32 *addr)
-{
- int n;
-
- n = nob;
- if (sscanf(str, "0x%x%n", addr, &n) >= 1 && n == nob)
- return 1;
-
- n = nob;
- if (sscanf(str, "0X%x%n", addr, &n) >= 1 && n == nob)
- return 1;
-
- n = nob;
- if (sscanf(str, "%u%n", addr, &n) >= 1 && n == nob)
- return 1;
-
- return 0;
-}
-
-/**
- * Nf_parse_addrlist method for networks using numeric addresses.
- *
- * Examples of such networks are gm and elan.
- *
- * \retval 0 if \a str parsed to numeric address
- * \retval errno otherwise
- */
-static int
-libcfs_num_parse(char *str, int len, struct list_head *list)
-{
- struct cfs_expr_list *el;
- int rc;
-
- rc = cfs_expr_list_parse(str, len, 0, MAX_NUMERIC_VALUE, &el);
- if (!rc)
- list_add_tail(&el->el_link, list);
-
- return rc;
-}
-
-static int
-libcfs_num_addr_range_print(char *buffer, int count, struct list_head *list)
-{
- int i = 0, j = 0;
- struct cfs_expr_list *el;
-
- list_for_each_entry(el, list, el_link) {
- LASSERT(j++ < 1);
- i += cfs_expr_list_print(buffer + i, count - i, el);
- }
- return i;
-}
-
-/*
- * Nf_match_addr method for networks using numeric addresses
- *
- * \retval 1 on match
- * \retval 0 otherwise
- */
-static int
-libcfs_num_match(__u32 addr, struct list_head *numaddr)
-{
- struct cfs_expr_list *el;
-
- LASSERT(!list_empty(numaddr));
- el = list_entry(numaddr->next, struct cfs_expr_list, el_link);
-
- return cfs_expr_list_match(addr, el);
-}
-
-static struct netstrfns libcfs_netstrfns[] = {
- { .nf_type = LOLND,
- .nf_name = "lo",
- .nf_modname = "klolnd",
- .nf_addr2str = libcfs_decnum_addr2str,
- .nf_str2addr = libcfs_lo_str2addr,
- .nf_parse_addrlist = libcfs_num_parse,
- .nf_print_addrlist = libcfs_num_addr_range_print,
- .nf_match_addr = libcfs_num_match,
- .nf_is_contiguous = cfs_num_is_contiguous,
- .nf_min_max = cfs_num_min_max },
- { .nf_type = SOCKLND,
- .nf_name = "tcp",
- .nf_modname = "ksocklnd",
- .nf_addr2str = libcfs_ip_addr2str,
- .nf_str2addr = libcfs_ip_str2addr,
- .nf_parse_addrlist = cfs_ip_addr_parse,
- .nf_print_addrlist = libcfs_ip_addr_range_print,
- .nf_match_addr = cfs_ip_addr_match,
- .nf_is_contiguous = cfs_ip_is_contiguous,
- .nf_min_max = cfs_ip_min_max },
- { .nf_type = O2IBLND,
- .nf_name = "o2ib",
- .nf_modname = "ko2iblnd",
- .nf_addr2str = libcfs_ip_addr2str,
- .nf_str2addr = libcfs_ip_str2addr,
- .nf_parse_addrlist = cfs_ip_addr_parse,
- .nf_print_addrlist = libcfs_ip_addr_range_print,
- .nf_match_addr = cfs_ip_addr_match,
- .nf_is_contiguous = cfs_ip_is_contiguous,
- .nf_min_max = cfs_ip_min_max },
- { .nf_type = GNILND,
- .nf_name = "gni",
- .nf_modname = "kgnilnd",
- .nf_addr2str = libcfs_decnum_addr2str,
- .nf_str2addr = libcfs_num_str2addr,
- .nf_parse_addrlist = libcfs_num_parse,
- .nf_print_addrlist = libcfs_num_addr_range_print,
- .nf_match_addr = libcfs_num_match,
- .nf_is_contiguous = cfs_num_is_contiguous,
- .nf_min_max = cfs_num_min_max },
- { .nf_type = GNIIPLND,
- .nf_name = "gip",
- .nf_modname = "kgnilnd",
- .nf_addr2str = libcfs_ip_addr2str,
- .nf_str2addr = libcfs_ip_str2addr,
- .nf_parse_addrlist = cfs_ip_addr_parse,
- .nf_print_addrlist = libcfs_ip_addr_range_print,
- .nf_match_addr = cfs_ip_addr_match,
- .nf_is_contiguous = cfs_ip_is_contiguous,
- .nf_min_max = cfs_ip_min_max },
-};
-
-static const size_t libcfs_nnetstrfns = ARRAY_SIZE(libcfs_netstrfns);
-
-static struct netstrfns *
-libcfs_lnd2netstrfns(__u32 lnd)
-{
- int i;
-
- for (i = 0; i < libcfs_nnetstrfns; i++)
- if (lnd == libcfs_netstrfns[i].nf_type)
- return &libcfs_netstrfns[i];
-
- return NULL;
-}
-
-static struct netstrfns *
-libcfs_namenum2netstrfns(const char *name)
-{
- struct netstrfns *nf;
- int i;
-
- for (i = 0; i < libcfs_nnetstrfns; i++) {
- nf = &libcfs_netstrfns[i];
- if (!strncmp(name, nf->nf_name, strlen(nf->nf_name)))
- return nf;
- }
- return NULL;
-}
-
-static struct netstrfns *
-libcfs_name2netstrfns(const char *name)
-{
- int i;
-
- for (i = 0; i < libcfs_nnetstrfns; i++)
- if (!strcmp(libcfs_netstrfns[i].nf_name, name))
- return &libcfs_netstrfns[i];
-
- return NULL;
-}
-
-int
-libcfs_isknown_lnd(__u32 lnd)
-{
- return !!libcfs_lnd2netstrfns(lnd);
-}
-EXPORT_SYMBOL(libcfs_isknown_lnd);
-
-char *
-libcfs_lnd2modname(__u32 lnd)
-{
- struct netstrfns *nf = libcfs_lnd2netstrfns(lnd);
-
- return nf ? nf->nf_modname : NULL;
-}
-EXPORT_SYMBOL(libcfs_lnd2modname);
-
-int
-libcfs_str2lnd(const char *str)
-{
- struct netstrfns *nf = libcfs_name2netstrfns(str);
-
- if (nf)
- return nf->nf_type;
-
- return -ENXIO;
-}
-EXPORT_SYMBOL(libcfs_str2lnd);
-
-char *
-libcfs_lnd2str_r(__u32 lnd, char *buf, size_t buf_size)
-{
- struct netstrfns *nf;
-
- nf = libcfs_lnd2netstrfns(lnd);
- if (!nf)
- snprintf(buf, buf_size, "?%u?", lnd);
- else
- snprintf(buf, buf_size, "%s", nf->nf_name);
-
- return buf;
-}
-EXPORT_SYMBOL(libcfs_lnd2str_r);
-
-char *
-libcfs_net2str_r(__u32 net, char *buf, size_t buf_size)
-{
- __u32 nnum = LNET_NETNUM(net);
- __u32 lnd = LNET_NETTYP(net);
- struct netstrfns *nf;
-
- nf = libcfs_lnd2netstrfns(lnd);
- if (!nf)
- snprintf(buf, buf_size, "<%u:%u>", lnd, nnum);
- else if (!nnum)
- snprintf(buf, buf_size, "%s", nf->nf_name);
- else
- snprintf(buf, buf_size, "%s%u", nf->nf_name, nnum);
-
- return buf;
-}
-EXPORT_SYMBOL(libcfs_net2str_r);
-
-char *
-libcfs_nid2str_r(lnet_nid_t nid, char *buf, size_t buf_size)
-{
- __u32 addr = LNET_NIDADDR(nid);
- __u32 net = LNET_NIDNET(nid);
- __u32 nnum = LNET_NETNUM(net);
- __u32 lnd = LNET_NETTYP(net);
- struct netstrfns *nf;
-
- if (nid == LNET_NID_ANY) {
- strncpy(buf, "<?>", buf_size);
- buf[buf_size - 1] = '\0';
- return buf;
- }
-
- nf = libcfs_lnd2netstrfns(lnd);
- if (!nf) {
- snprintf(buf, buf_size, "%x@<%u:%u>", addr, lnd, nnum);
- } else {
- size_t addr_len;
-
- nf->nf_addr2str(addr, buf, buf_size);
- addr_len = strlen(buf);
- if (!nnum)
- snprintf(buf + addr_len, buf_size - addr_len, "@%s",
- nf->nf_name);
- else
- snprintf(buf + addr_len, buf_size - addr_len, "@%s%u",
- nf->nf_name, nnum);
- }
-
- return buf;
-}
-EXPORT_SYMBOL(libcfs_nid2str_r);
-
-static struct netstrfns *
-libcfs_str2net_internal(const char *str, __u32 *net)
-{
- struct netstrfns *nf = NULL;
- int nob;
- unsigned int netnum;
- int i;
-
- for (i = 0; i < libcfs_nnetstrfns; i++) {
- nf = &libcfs_netstrfns[i];
- if (!strncmp(str, nf->nf_name, strlen(nf->nf_name)))
- break;
- }
-
- if (i == libcfs_nnetstrfns)
- return NULL;
-
- nob = strlen(nf->nf_name);
-
- if (strlen(str) == (unsigned int)nob) {
- netnum = 0;
- } else {
- if (nf->nf_type == LOLND) /* net number not allowed */
- return NULL;
-
- str += nob;
- i = strlen(str);
- if (sscanf(str, "%u%n", &netnum, &i) < 1 ||
- i != (int)strlen(str))
- return NULL;
- }
-
- *net = LNET_MKNET(nf->nf_type, netnum);
- return nf;
-}
-
-__u32
-libcfs_str2net(const char *str)
-{
- __u32 net;
-
- if (libcfs_str2net_internal(str, &net))
- return net;
-
- return LNET_NIDNET(LNET_NID_ANY);
-}
-EXPORT_SYMBOL(libcfs_str2net);
-
-lnet_nid_t
-libcfs_str2nid(const char *str)
-{
- const char *sep = strchr(str, '@');
- struct netstrfns *nf;
- __u32 net;
- __u32 addr;
-
- if (sep) {
- nf = libcfs_str2net_internal(sep + 1, &net);
- if (!nf)
- return LNET_NID_ANY;
- } else {
- sep = str + strlen(str);
- net = LNET_MKNET(SOCKLND, 0);
- nf = libcfs_lnd2netstrfns(SOCKLND);
- LASSERT(nf);
- }
-
- if (!nf->nf_str2addr(str, (int)(sep - str), &addr))
- return LNET_NID_ANY;
-
- return LNET_MKNID(net, addr);
-}
-EXPORT_SYMBOL(libcfs_str2nid);
-
-char *
-libcfs_id2str(struct lnet_process_id id)
-{
- char *str = libcfs_next_nidstring();
-
- if (id.pid == LNET_PID_ANY) {
- snprintf(str, LNET_NIDSTR_SIZE,
- "LNET_PID_ANY-%s", libcfs_nid2str(id.nid));
- return str;
- }
-
- snprintf(str, LNET_NIDSTR_SIZE, "%s%u-%s",
- id.pid & LNET_PID_USERFLAG ? "U" : "",
- id.pid & ~LNET_PID_USERFLAG, libcfs_nid2str(id.nid));
- return str;
-}
-EXPORT_SYMBOL(libcfs_id2str);
-
-int
-libcfs_str2anynid(lnet_nid_t *nidp, const char *str)
-{
- if (!strcmp(str, "*")) {
- *nidp = LNET_NID_ANY;
- return 1;
- }
-
- *nidp = libcfs_str2nid(str);
- return *nidp != LNET_NID_ANY;
-}
-EXPORT_SYMBOL(libcfs_str2anynid);
diff --git a/drivers/staging/lustre/lnet/lnet/peer.c b/drivers/staging/lustre/lnet/lnet/peer.c
deleted file mode 100644
index 58294149f7b2..000000000000
--- a/drivers/staging/lustre/lnet/lnet/peer.c
+++ /dev/null
@@ -1,456 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/peer.c
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-#include <uapi/linux/lnet/lnet-dlc.h>
-
-int
-lnet_peer_tables_create(void)
-{
- struct lnet_peer_table *ptable;
- struct list_head *hash;
- int i;
- int j;
-
- the_lnet.ln_peer_tables = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(*ptable));
- if (!the_lnet.ln_peer_tables) {
- CERROR("Failed to allocate cpu-partition peer tables\n");
- return -ENOMEM;
- }
-
- cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
- INIT_LIST_HEAD(&ptable->pt_deathrow);
-
- hash = kvmalloc_cpt(LNET_PEER_HASH_SIZE * sizeof(*hash),
- GFP_KERNEL, i);
- if (!hash) {
- CERROR("Failed to create peer hash table\n");
- lnet_peer_tables_destroy();
- return -ENOMEM;
- }
-
- for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
- INIT_LIST_HEAD(&hash[j]);
- ptable->pt_hash = hash; /* sign of initialization */
- }
-
- return 0;
-}
-
-void
-lnet_peer_tables_destroy(void)
-{
- struct lnet_peer_table *ptable;
- struct list_head *hash;
- int i;
- int j;
-
- if (!the_lnet.ln_peer_tables)
- return;
-
- cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
- hash = ptable->pt_hash;
- if (!hash) /* not initialized */
- break;
-
- LASSERT(list_empty(&ptable->pt_deathrow));
-
- ptable->pt_hash = NULL;
- for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
- LASSERT(list_empty(&hash[j]));
-
- kvfree(hash);
- }
-
- cfs_percpt_free(the_lnet.ln_peer_tables);
- the_lnet.ln_peer_tables = NULL;
-}
-
-static void
-lnet_peer_table_cleanup_locked(struct lnet_ni *ni,
- struct lnet_peer_table *ptable)
-{
- int i;
- struct lnet_peer *lp;
- struct lnet_peer *tmp;
-
- for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
- list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
- lp_hashlist) {
- if (ni && ni != lp->lp_ni)
- continue;
- list_del_init(&lp->lp_hashlist);
- /* Lose hash table's ref */
- ptable->pt_zombies++;
- lnet_peer_decref_locked(lp);
- }
- }
-}
-
-static void
-lnet_peer_table_deathrow_wait_locked(struct lnet_peer_table *ptable,
- int cpt_locked)
-{
- int i;
-
- for (i = 3; ptable->pt_zombies; i++) {
- lnet_net_unlock(cpt_locked);
-
- if (is_power_of_2(i)) {
- CDEBUG(D_WARNING,
- "Waiting for %d zombies on peer table\n",
- ptable->pt_zombies);
- }
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ >> 1);
- lnet_net_lock(cpt_locked);
- }
-}
-
-static void
-lnet_peer_table_del_rtrs_locked(struct lnet_ni *ni,
- struct lnet_peer_table *ptable,
- int cpt_locked)
-{
- struct lnet_peer *lp;
- struct lnet_peer *tmp;
- lnet_nid_t lp_nid;
- int i;
-
- for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
- list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
- lp_hashlist) {
- if (ni != lp->lp_ni)
- continue;
-
- if (!lp->lp_rtr_refcount)
- continue;
-
- lp_nid = lp->lp_nid;
-
- lnet_net_unlock(cpt_locked);
- lnet_del_route(LNET_NIDNET(LNET_NID_ANY), lp_nid);
- lnet_net_lock(cpt_locked);
- }
- }
-}
-
-void
-lnet_peer_tables_cleanup(struct lnet_ni *ni)
-{
- struct lnet_peer_table *ptable;
- struct list_head deathrow;
- struct lnet_peer *lp;
- struct lnet_peer *temp;
- int i;
-
- INIT_LIST_HEAD(&deathrow);
-
- LASSERT(the_lnet.ln_shutdown || ni);
- /*
- * If just deleting the peers for a NI, get rid of any routes these
- * peers are gateways for.
- */
- cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
- lnet_net_lock(i);
- lnet_peer_table_del_rtrs_locked(ni, ptable, i);
- lnet_net_unlock(i);
- }
-
- /*
- * Start the process of moving the applicable peers to
- * deathrow.
- */
- cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
- lnet_net_lock(i);
- lnet_peer_table_cleanup_locked(ni, ptable);
- lnet_net_unlock(i);
- }
-
- /* Cleanup all entries on deathrow. */
- cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
- lnet_net_lock(i);
- lnet_peer_table_deathrow_wait_locked(ptable, i);
- list_splice_init(&ptable->pt_deathrow, &deathrow);
- lnet_net_unlock(i);
- }
-
- list_for_each_entry_safe(lp, temp, &deathrow, lp_hashlist) {
- list_del(&lp->lp_hashlist);
- kfree(lp);
- }
-}
-
-void
-lnet_destroy_peer_locked(struct lnet_peer *lp)
-{
- struct lnet_peer_table *ptable;
-
- LASSERT(!lp->lp_refcount);
- LASSERT(!lp->lp_rtr_refcount);
- LASSERT(list_empty(&lp->lp_txq));
- LASSERT(list_empty(&lp->lp_hashlist));
- LASSERT(!lp->lp_txqnob);
-
- ptable = the_lnet.ln_peer_tables[lp->lp_cpt];
- LASSERT(ptable->pt_number > 0);
- ptable->pt_number--;
-
- lnet_ni_decref_locked(lp->lp_ni, lp->lp_cpt);
- lp->lp_ni = NULL;
-
- list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
- LASSERT(ptable->pt_zombies > 0);
- ptable->pt_zombies--;
-}
-
-struct lnet_peer *
-lnet_find_peer_locked(struct lnet_peer_table *ptable, lnet_nid_t nid)
-{
- struct list_head *peers;
- struct lnet_peer *lp;
-
- LASSERT(!the_lnet.ln_shutdown);
-
- peers = &ptable->pt_hash[lnet_nid2peerhash(nid)];
- list_for_each_entry(lp, peers, lp_hashlist) {
- if (lp->lp_nid == nid) {
- lnet_peer_addref_locked(lp);
- return lp;
- }
- }
-
- return NULL;
-}
-
-int
-lnet_nid2peer_locked(struct lnet_peer **lpp, lnet_nid_t nid, int cpt)
-{
- struct lnet_peer_table *ptable;
- struct lnet_peer *lp = NULL;
- struct lnet_peer *lp2;
- int cpt2;
- int rc = 0;
-
- *lpp = NULL;
- if (the_lnet.ln_shutdown) /* it's shutting down */
- return -ESHUTDOWN;
-
- /* cpt can be LNET_LOCK_EX if it's called from router functions */
- cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid);
-
- ptable = the_lnet.ln_peer_tables[cpt2];
- lp = lnet_find_peer_locked(ptable, nid);
- if (lp) {
- *lpp = lp;
- return 0;
- }
-
- if (!list_empty(&ptable->pt_deathrow)) {
- lp = list_entry(ptable->pt_deathrow.next,
- struct lnet_peer, lp_hashlist);
- list_del(&lp->lp_hashlist);
- }
-
- /*
- * take extra refcount in case another thread has shutdown LNet
- * and destroyed locks and peer-table before I finish the allocation
- */
- ptable->pt_number++;
- lnet_net_unlock(cpt);
-
- if (lp)
- memset(lp, 0, sizeof(*lp));
- else
- lp = kzalloc_cpt(sizeof(*lp), GFP_NOFS, cpt2);
-
- if (!lp) {
- rc = -ENOMEM;
- lnet_net_lock(cpt);
- goto out;
- }
-
- INIT_LIST_HEAD(&lp->lp_txq);
- INIT_LIST_HEAD(&lp->lp_rtrq);
- INIT_LIST_HEAD(&lp->lp_routes);
-
- lp->lp_notify = 0;
- lp->lp_notifylnd = 0;
- lp->lp_notifying = 0;
- lp->lp_alive_count = 0;
- lp->lp_timestamp = 0;
- lp->lp_alive = !lnet_peers_start_down(); /* 1 bit!! */
- lp->lp_last_alive = jiffies; /* assumes alive */
- lp->lp_last_query = 0; /* haven't asked NI yet */
- lp->lp_ping_timestamp = 0;
- lp->lp_ping_feats = LNET_PING_FEAT_INVAL;
- lp->lp_nid = nid;
- lp->lp_cpt = cpt2;
- lp->lp_refcount = 2; /* 1 for caller; 1 for hash */
- lp->lp_rtr_refcount = 0;
-
- lnet_net_lock(cpt);
-
- if (the_lnet.ln_shutdown) {
- rc = -ESHUTDOWN;
- goto out;
- }
-
- lp2 = lnet_find_peer_locked(ptable, nid);
- if (lp2) {
- *lpp = lp2;
- goto out;
- }
-
- lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid), cpt2);
- if (!lp->lp_ni) {
- rc = -EHOSTUNREACH;
- goto out;
- }
-
- lp->lp_txcredits = lp->lp_ni->ni_peertxcredits;
- lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits;
- lp->lp_rtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
- lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
-
- list_add_tail(&lp->lp_hashlist,
- &ptable->pt_hash[lnet_nid2peerhash(nid)]);
- ptable->pt_version++;
- *lpp = lp;
-
- return 0;
-out:
- if (lp)
- list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
- ptable->pt_number--;
- return rc;
-}
-
-void
-lnet_debug_peer(lnet_nid_t nid)
-{
- char *aliveness = "NA";
- struct lnet_peer *lp;
- int rc;
- int cpt;
-
- cpt = lnet_cpt_of_nid(nid);
- lnet_net_lock(cpt);
-
- rc = lnet_nid2peer_locked(&lp, nid, cpt);
- if (rc) {
- lnet_net_unlock(cpt);
- CDEBUG(D_WARNING, "No peer %s\n", libcfs_nid2str(nid));
- return;
- }
-
- if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp))
- aliveness = lp->lp_alive ? "up" : "down";
-
- CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
- libcfs_nid2str(lp->lp_nid), lp->lp_refcount,
- aliveness, lp->lp_ni->ni_peertxcredits,
- lp->lp_rtrcredits, lp->lp_minrtrcredits,
- lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob);
-
- lnet_peer_decref_locked(lp);
-
- lnet_net_unlock(cpt);
-}
-
-int
-lnet_get_peer_info(__u32 peer_index, __u64 *nid,
- char aliveness[LNET_MAX_STR_LEN],
- __u32 *cpt_iter, __u32 *refcount,
- __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits,
- __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credits,
- __u32 *peer_tx_qnob)
-{
- struct lnet_peer_table *peer_table;
- struct lnet_peer *lp;
- bool found = false;
- int lncpt, j;
-
- /* get the number of CPTs */
- lncpt = cfs_percpt_number(the_lnet.ln_peer_tables);
-
- /*
- * if the cpt number to be examined is >= the number of cpts in
- * the system then indicate that there are no more cpts to examin
- */
- if (*cpt_iter >= lncpt)
- return -ENOENT;
-
- /* get the current table */
- peer_table = the_lnet.ln_peer_tables[*cpt_iter];
- /* if the ptable is NULL then there are no more cpts to examine */
- if (!peer_table)
- return -ENOENT;
-
- lnet_net_lock(*cpt_iter);
-
- for (j = 0; j < LNET_PEER_HASH_SIZE && !found; j++) {
- struct list_head *peers = &peer_table->pt_hash[j];
-
- list_for_each_entry(lp, peers, lp_hashlist) {
- if (peer_index-- > 0)
- continue;
-
- snprintf(aliveness, LNET_MAX_STR_LEN, "NA");
- if (lnet_isrouter(lp) ||
- lnet_peer_aliveness_enabled(lp))
- snprintf(aliveness, LNET_MAX_STR_LEN,
- lp->lp_alive ? "up" : "down");
-
- *nid = lp->lp_nid;
- *refcount = lp->lp_refcount;
- *ni_peer_tx_credits = lp->lp_ni->ni_peertxcredits;
- *peer_tx_credits = lp->lp_txcredits;
- *peer_rtr_credits = lp->lp_rtrcredits;
- *peer_min_rtr_credits = lp->lp_mintxcredits;
- *peer_tx_qnob = lp->lp_txqnob;
-
- found = true;
- }
- }
- lnet_net_unlock(*cpt_iter);
-
- *cpt_iter = lncpt;
-
- return found ? 0 : -ENOENT;
-}
diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c
deleted file mode 100644
index 6267d5e4bbd6..000000000000
--- a/drivers/staging/lustre/lnet/lnet/router.c
+++ /dev/null
@@ -1,1799 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- *
- * This file is part of Portals
- * http://sourceforge.net/projects/sandiaportals/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/completion.h>
-#include <linux/lnet/lib-lnet.h>
-
-#define LNET_NRB_TINY_MIN 512 /* min value for each CPT */
-#define LNET_NRB_TINY (LNET_NRB_TINY_MIN * 4)
-#define LNET_NRB_SMALL_MIN 4096 /* min value for each CPT */
-#define LNET_NRB_SMALL (LNET_NRB_SMALL_MIN * 4)
-#define LNET_NRB_SMALL_PAGES 1
-#define LNET_NRB_LARGE_MIN 256 /* min value for each CPT */
-#define LNET_NRB_LARGE (LNET_NRB_LARGE_MIN * 4)
-#define LNET_NRB_LARGE_PAGES ((LNET_MTU + PAGE_SIZE - 1) >> \
- PAGE_SHIFT)
-
-static char *forwarding = "";
-module_param(forwarding, charp, 0444);
-MODULE_PARM_DESC(forwarding, "Explicitly enable/disable forwarding between networks");
-
-static int tiny_router_buffers;
-module_param(tiny_router_buffers, int, 0444);
-MODULE_PARM_DESC(tiny_router_buffers, "# of 0 payload messages to buffer in the router");
-static int small_router_buffers;
-module_param(small_router_buffers, int, 0444);
-MODULE_PARM_DESC(small_router_buffers, "# of small (1 page) messages to buffer in the router");
-static int large_router_buffers;
-module_param(large_router_buffers, int, 0444);
-MODULE_PARM_DESC(large_router_buffers, "# of large messages to buffer in the router");
-static int peer_buffer_credits;
-module_param(peer_buffer_credits, int, 0444);
-MODULE_PARM_DESC(peer_buffer_credits, "# router buffer credits per peer");
-
-static int auto_down = 1;
-module_param(auto_down, int, 0444);
-MODULE_PARM_DESC(auto_down, "Automatically mark peers down on comms error");
-
-int
-lnet_peer_buffer_credits(struct lnet_ni *ni)
-{
- /* NI option overrides LNet default */
- if (ni->ni_peerrtrcredits > 0)
- return ni->ni_peerrtrcredits;
- if (peer_buffer_credits > 0)
- return peer_buffer_credits;
-
- /*
- * As an approximation, allow this peer the same number of router
- * buffers as it is allowed outstanding sends
- */
- return ni->ni_peertxcredits;
-}
-
-/* forward ref's */
-static int lnet_router_checker(void *);
-
-static int check_routers_before_use;
-module_param(check_routers_before_use, int, 0444);
-MODULE_PARM_DESC(check_routers_before_use, "Assume routers are down and ping them before use");
-
-int avoid_asym_router_failure = 1;
-module_param(avoid_asym_router_failure, int, 0644);
-MODULE_PARM_DESC(avoid_asym_router_failure, "Avoid asymmetrical router failures (0 to disable)");
-
-static int dead_router_check_interval = 60;
-module_param(dead_router_check_interval, int, 0644);
-MODULE_PARM_DESC(dead_router_check_interval, "Seconds between dead router health checks (<= 0 to disable)");
-
-static int live_router_check_interval = 60;
-module_param(live_router_check_interval, int, 0644);
-MODULE_PARM_DESC(live_router_check_interval, "Seconds between live router health checks (<= 0 to disable)");
-
-static int router_ping_timeout = 50;
-module_param(router_ping_timeout, int, 0644);
-MODULE_PARM_DESC(router_ping_timeout, "Seconds to wait for the reply to a router health query");
-
-int
-lnet_peers_start_down(void)
-{
- return check_routers_before_use;
-}
-
-void
-lnet_notify_locked(struct lnet_peer *lp, int notifylnd, int alive,
- unsigned long when)
-{
- if (time_before(when, lp->lp_timestamp)) { /* out of date information */
- CDEBUG(D_NET, "Out of date\n");
- return;
- }
-
- lp->lp_timestamp = when; /* update timestamp */
- lp->lp_ping_deadline = 0; /* disable ping timeout */
-
- if (lp->lp_alive_count && /* got old news */
- (!lp->lp_alive) == (!alive)) { /* new date for old news */
- CDEBUG(D_NET, "Old news\n");
- return;
- }
-
- /* Flag that notification is outstanding */
-
- lp->lp_alive_count++;
- lp->lp_alive = !(!alive); /* 1 bit! */
- lp->lp_notify = 1;
- lp->lp_notifylnd |= notifylnd;
- if (lp->lp_alive)
- lp->lp_ping_feats = LNET_PING_FEAT_INVAL; /* reset */
-
- CDEBUG(D_NET, "set %s %d\n", libcfs_nid2str(lp->lp_nid), alive);
-}
-
-static void
-lnet_ni_notify_locked(struct lnet_ni *ni, struct lnet_peer *lp)
-{
- int alive;
- int notifylnd;
-
- /*
- * Notify only in 1 thread at any time to ensure ordered notification.
- * NB individual events can be missed; the only guarantee is that you
- * always get the most recent news
- */
- if (lp->lp_notifying || !ni)
- return;
-
- lp->lp_notifying = 1;
-
- while (lp->lp_notify) {
- alive = lp->lp_alive;
- notifylnd = lp->lp_notifylnd;
-
- lp->lp_notifylnd = 0;
- lp->lp_notify = 0;
-
- if (notifylnd && ni->ni_lnd->lnd_notify) {
- lnet_net_unlock(lp->lp_cpt);
-
- /*
- * A new notification could happen now; I'll handle it
- * when control returns to me
- */
- ni->ni_lnd->lnd_notify(ni, lp->lp_nid, alive);
-
- lnet_net_lock(lp->lp_cpt);
- }
- }
-
- lp->lp_notifying = 0;
-}
-
-static void
-lnet_rtr_addref_locked(struct lnet_peer *lp)
-{
- LASSERT(lp->lp_refcount > 0);
- LASSERT(lp->lp_rtr_refcount >= 0);
-
- /* lnet_net_lock must be exclusively locked */
- lp->lp_rtr_refcount++;
- if (lp->lp_rtr_refcount == 1) {
- struct list_head *pos;
-
- /* a simple insertion sort */
- list_for_each_prev(pos, &the_lnet.ln_routers) {
- struct lnet_peer *rtr;
-
- rtr = list_entry(pos, struct lnet_peer, lp_rtr_list);
- if (rtr->lp_nid < lp->lp_nid)
- break;
- }
-
- list_add(&lp->lp_rtr_list, pos);
- /* addref for the_lnet.ln_routers */
- lnet_peer_addref_locked(lp);
- the_lnet.ln_routers_version++;
- }
-}
-
-static void
-lnet_rtr_decref_locked(struct lnet_peer *lp)
-{
- LASSERT(lp->lp_refcount > 0);
- LASSERT(lp->lp_rtr_refcount > 0);
-
- /* lnet_net_lock must be exclusively locked */
- lp->lp_rtr_refcount--;
- if (!lp->lp_rtr_refcount) {
- LASSERT(list_empty(&lp->lp_routes));
-
- if (lp->lp_rcd) {
- list_add(&lp->lp_rcd->rcd_list,
- &the_lnet.ln_rcd_deathrow);
- lp->lp_rcd = NULL;
- }
-
- list_del(&lp->lp_rtr_list);
- /* decref for the_lnet.ln_routers */
- lnet_peer_decref_locked(lp);
- the_lnet.ln_routers_version++;
- }
-}
-
-struct lnet_remotenet *
-lnet_find_net_locked(__u32 net)
-{
- struct lnet_remotenet *rnet;
- struct list_head *rn_list;
-
- LASSERT(!the_lnet.ln_shutdown);
-
- rn_list = lnet_net2rnethash(net);
- list_for_each_entry(rnet, rn_list, lrn_list) {
- if (rnet->lrn_net == net)
- return rnet;
- }
- return NULL;
-}
-
-static void lnet_shuffle_seed(void)
-{
- static int seeded;
- struct lnet_ni *ni;
-
- if (seeded)
- return;
-
- /*
- * Nodes with small feet have little entropy
- * the NID for this node gives the most entropy in the low bits
- */
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
- __u32 lnd_type, seed;
-
- lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
- if (lnd_type != LOLND) {
- seed = (LNET_NIDADDR(ni->ni_nid) | lnd_type);
- add_device_randomness(&seed, sizeof(seed));
- }
- }
-
- seeded = 1;
-}
-
-/* NB expects LNET_LOCK held */
-static void
-lnet_add_route_to_rnet(struct lnet_remotenet *rnet, struct lnet_route *route)
-{
- unsigned int len = 0;
- unsigned int offset = 0;
- struct list_head *e;
-
- lnet_shuffle_seed();
-
- list_for_each(e, &rnet->lrn_routes) {
- len++;
- }
-
- /* len+1 positions to add a new entry */
- offset = prandom_u32_max(len + 1);
- list_for_each(e, &rnet->lrn_routes) {
- if (!offset)
- break;
- offset--;
- }
- list_add(&route->lr_list, e);
- list_add(&route->lr_gwlist, &route->lr_gateway->lp_routes);
-
- the_lnet.ln_remote_nets_version++;
- lnet_rtr_addref_locked(route->lr_gateway);
-}
-
-int
-lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
- unsigned int priority)
-{
- struct list_head *e;
- struct lnet_remotenet *rnet;
- struct lnet_remotenet *rnet2;
- struct lnet_route *route;
- struct lnet_ni *ni;
- int add_route;
- int rc;
-
- CDEBUG(D_NET, "Add route: net %s hops %d priority %u gw %s\n",
- libcfs_net2str(net), hops, priority, libcfs_nid2str(gateway));
-
- if (gateway == LNET_NID_ANY ||
- LNET_NETTYP(LNET_NIDNET(gateway)) == LOLND ||
- net == LNET_NIDNET(LNET_NID_ANY) ||
- LNET_NETTYP(net) == LOLND ||
- LNET_NIDNET(gateway) == net ||
- (hops != LNET_UNDEFINED_HOPS && (hops < 1 || hops > 255)))
- return -EINVAL;
-
- if (lnet_islocalnet(net)) /* it's a local network */
- return -EEXIST;
-
- /* Assume net, route, all new */
- route = kzalloc(sizeof(*route), GFP_NOFS);
- rnet = kzalloc(sizeof(*rnet), GFP_NOFS);
- if (!route || !rnet) {
- CERROR("Out of memory creating route %s %d %s\n",
- libcfs_net2str(net), hops, libcfs_nid2str(gateway));
- kfree(route);
- kfree(rnet);
- return -ENOMEM;
- }
-
- INIT_LIST_HEAD(&rnet->lrn_routes);
- rnet->lrn_net = net;
- route->lr_hops = hops;
- route->lr_net = net;
- route->lr_priority = priority;
-
- lnet_net_lock(LNET_LOCK_EX);
-
- rc = lnet_nid2peer_locked(&route->lr_gateway, gateway, LNET_LOCK_EX);
- if (rc) {
- lnet_net_unlock(LNET_LOCK_EX);
-
- kfree(route);
- kfree(rnet);
-
- if (rc == -EHOSTUNREACH) /* gateway is not on a local net */
- return rc; /* ignore the route entry */
- CERROR("Error %d creating route %s %d %s\n", rc,
- libcfs_net2str(net), hops,
- libcfs_nid2str(gateway));
- return rc;
- }
-
- LASSERT(!the_lnet.ln_shutdown);
-
- rnet2 = lnet_find_net_locked(net);
- if (!rnet2) {
- /* new network */
- list_add_tail(&rnet->lrn_list, lnet_net2rnethash(net));
- rnet2 = rnet;
- }
-
- /* Search for a duplicate route (it's a NOOP if it is) */
- add_route = 1;
- list_for_each(e, &rnet2->lrn_routes) {
- struct lnet_route *route2;
-
- route2 = list_entry(e, struct lnet_route, lr_list);
- if (route2->lr_gateway == route->lr_gateway) {
- add_route = 0;
- break;
- }
-
- /* our lookups must be true */
- LASSERT(route2->lr_gateway->lp_nid != gateway);
- }
-
- if (add_route) {
- lnet_peer_addref_locked(route->lr_gateway); /* +1 for notify */
- lnet_add_route_to_rnet(rnet2, route);
-
- ni = route->lr_gateway->lp_ni;
- lnet_net_unlock(LNET_LOCK_EX);
-
- /* XXX Assume alive */
- if (ni->ni_lnd->lnd_notify)
- ni->ni_lnd->lnd_notify(ni, gateway, 1);
-
- lnet_net_lock(LNET_LOCK_EX);
- }
-
- /* -1 for notify or !add_route */
- lnet_peer_decref_locked(route->lr_gateway);
- lnet_net_unlock(LNET_LOCK_EX);
- rc = 0;
-
- if (!add_route) {
- rc = -EEXIST;
- kfree(route);
- }
-
- if (rnet != rnet2)
- kfree(rnet);
-
- /* indicate to startup the router checker if configured */
- wake_up(&the_lnet.ln_rc_waitq);
-
- return rc;
-}
-
-int
-lnet_check_routes(void)
-{
- struct lnet_remotenet *rnet;
- struct lnet_route *route;
- struct lnet_route *route2;
- struct list_head *e1;
- struct list_head *e2;
- int cpt;
- struct list_head *rn_list;
- int i;
-
- cpt = lnet_net_lock_current();
-
- for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
- rn_list = &the_lnet.ln_remote_nets_hash[i];
- list_for_each(e1, rn_list) {
- rnet = list_entry(e1, struct lnet_remotenet, lrn_list);
-
- route2 = NULL;
- list_for_each(e2, &rnet->lrn_routes) {
- lnet_nid_t nid1;
- lnet_nid_t nid2;
- int net;
-
- route = list_entry(e2, struct lnet_route, lr_list);
-
- if (!route2) {
- route2 = route;
- continue;
- }
-
- if (route->lr_gateway->lp_ni ==
- route2->lr_gateway->lp_ni)
- continue;
-
- nid1 = route->lr_gateway->lp_nid;
- nid2 = route2->lr_gateway->lp_nid;
- net = rnet->lrn_net;
-
- lnet_net_unlock(cpt);
-
- CERROR("Routes to %s via %s and %s not supported\n",
- libcfs_net2str(net),
- libcfs_nid2str(nid1),
- libcfs_nid2str(nid2));
- return -EINVAL;
- }
- }
- }
-
- lnet_net_unlock(cpt);
- return 0;
-}
-
-int
-lnet_del_route(__u32 net, lnet_nid_t gw_nid)
-{
- struct lnet_peer *gateway;
- struct lnet_remotenet *rnet;
- struct lnet_route *route;
- struct list_head *e1;
- struct list_head *e2;
- int rc = -ENOENT;
- struct list_head *rn_list;
- int idx = 0;
-
- CDEBUG(D_NET, "Del route: net %s : gw %s\n",
- libcfs_net2str(net), libcfs_nid2str(gw_nid));
-
- /*
- * NB Caller may specify either all routes via the given gateway
- * or a specific route entry actual NIDs)
- */
- lnet_net_lock(LNET_LOCK_EX);
- if (net == LNET_NIDNET(LNET_NID_ANY))
- rn_list = &the_lnet.ln_remote_nets_hash[0];
- else
- rn_list = lnet_net2rnethash(net);
-
- again:
- list_for_each(e1, rn_list) {
- rnet = list_entry(e1, struct lnet_remotenet, lrn_list);
-
- if (!(net == LNET_NIDNET(LNET_NID_ANY) ||
- net == rnet->lrn_net))
- continue;
-
- list_for_each(e2, &rnet->lrn_routes) {
- route = list_entry(e2, struct lnet_route, lr_list);
-
- gateway = route->lr_gateway;
- if (!(gw_nid == LNET_NID_ANY ||
- gw_nid == gateway->lp_nid))
- continue;
-
- list_del(&route->lr_list);
- list_del(&route->lr_gwlist);
- the_lnet.ln_remote_nets_version++;
-
- if (list_empty(&rnet->lrn_routes))
- list_del(&rnet->lrn_list);
- else
- rnet = NULL;
-
- lnet_rtr_decref_locked(gateway);
- lnet_peer_decref_locked(gateway);
-
- lnet_net_unlock(LNET_LOCK_EX);
-
- kfree(route);
- kfree(rnet);
-
- rc = 0;
- lnet_net_lock(LNET_LOCK_EX);
- goto again;
- }
- }
-
- if (net == LNET_NIDNET(LNET_NID_ANY) &&
- ++idx < LNET_REMOTE_NETS_HASH_SIZE) {
- rn_list = &the_lnet.ln_remote_nets_hash[idx];
- goto again;
- }
- lnet_net_unlock(LNET_LOCK_EX);
-
- return rc;
-}
-
-void
-lnet_destroy_routes(void)
-{
- lnet_del_route(LNET_NIDNET(LNET_NID_ANY), LNET_NID_ANY);
-}
-
-int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg)
-{
- int i, rc = -ENOENT, j;
-
- if (!the_lnet.ln_rtrpools)
- return rc;
-
- for (i = 0; i < LNET_NRBPOOLS; i++) {
- struct lnet_rtrbufpool *rbp;
-
- lnet_net_lock(LNET_LOCK_EX);
- cfs_percpt_for_each(rbp, j, the_lnet.ln_rtrpools) {
- if (i++ != idx)
- continue;
-
- pool_cfg->pl_pools[i].pl_npages = rbp[i].rbp_npages;
- pool_cfg->pl_pools[i].pl_nbuffers = rbp[i].rbp_nbuffers;
- pool_cfg->pl_pools[i].pl_credits = rbp[i].rbp_credits;
- pool_cfg->pl_pools[i].pl_mincredits = rbp[i].rbp_mincredits;
- rc = 0;
- break;
- }
- lnet_net_unlock(LNET_LOCK_EX);
- }
-
- lnet_net_lock(LNET_LOCK_EX);
- pool_cfg->pl_routing = the_lnet.ln_routing;
- lnet_net_unlock(LNET_LOCK_EX);
-
- return rc;
-}
-
-int
-lnet_get_route(int idx, __u32 *net, __u32 *hops,
- lnet_nid_t *gateway, __u32 *alive, __u32 *priority)
-{
- struct list_head *e1;
- struct list_head *e2;
- struct lnet_remotenet *rnet;
- struct lnet_route *route;
- int cpt;
- int i;
- struct list_head *rn_list;
-
- cpt = lnet_net_lock_current();
-
- for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
- rn_list = &the_lnet.ln_remote_nets_hash[i];
- list_for_each(e1, rn_list) {
- rnet = list_entry(e1, struct lnet_remotenet, lrn_list);
-
- list_for_each(e2, &rnet->lrn_routes) {
- route = list_entry(e2, struct lnet_route,
- lr_list);
-
- if (!idx--) {
- *net = rnet->lrn_net;
- *hops = route->lr_hops;
- *priority = route->lr_priority;
- *gateway = route->lr_gateway->lp_nid;
- *alive = lnet_is_route_alive(route);
- lnet_net_unlock(cpt);
- return 0;
- }
- }
- }
- }
-
- lnet_net_unlock(cpt);
- return -ENOENT;
-}
-
-void
-lnet_swap_pinginfo(struct lnet_ping_info *info)
-{
- int i;
- struct lnet_ni_status *stat;
-
- __swab32s(&info->pi_magic);
- __swab32s(&info->pi_features);
- __swab32s(&info->pi_pid);
- __swab32s(&info->pi_nnis);
- for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) {
- stat = &info->pi_ni[i];
- __swab64s(&stat->ns_nid);
- __swab32s(&stat->ns_status);
- }
-}
-
-/**
- * parse router-checker pinginfo, record number of down NIs for remote
- * networks on that router.
- */
-static void
-lnet_parse_rc_info(struct lnet_rc_data *rcd)
-{
- struct lnet_ping_info *info = rcd->rcd_pinginfo;
- struct lnet_peer *gw = rcd->rcd_gateway;
- struct lnet_route *rte;
-
- if (!gw->lp_alive)
- return;
-
- if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC))
- lnet_swap_pinginfo(info);
-
- /* NB always racing with network! */
- if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
- CDEBUG(D_NET, "%s: Unexpected magic %08x\n",
- libcfs_nid2str(gw->lp_nid), info->pi_magic);
- gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
- return;
- }
-
- gw->lp_ping_feats = info->pi_features;
- if (!(gw->lp_ping_feats & LNET_PING_FEAT_MASK)) {
- CDEBUG(D_NET, "%s: Unexpected features 0x%x\n",
- libcfs_nid2str(gw->lp_nid), gw->lp_ping_feats);
- return; /* nothing I can understand */
- }
-
- if (!(gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS))
- return; /* can't carry NI status info */
-
- list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) {
- int down = 0;
- int up = 0;
- int i;
-
- if (gw->lp_ping_feats & LNET_PING_FEAT_RTE_DISABLED) {
- rte->lr_downis = 1;
- continue;
- }
-
- for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) {
- struct lnet_ni_status *stat = &info->pi_ni[i];
- lnet_nid_t nid = stat->ns_nid;
-
- if (nid == LNET_NID_ANY) {
- CDEBUG(D_NET, "%s: unexpected LNET_NID_ANY\n",
- libcfs_nid2str(gw->lp_nid));
- gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
- return;
- }
-
- if (LNET_NETTYP(LNET_NIDNET(nid)) == LOLND)
- continue;
-
- if (stat->ns_status == LNET_NI_STATUS_DOWN) {
- down++;
- continue;
- }
-
- if (stat->ns_status == LNET_NI_STATUS_UP) {
- if (LNET_NIDNET(nid) == rte->lr_net) {
- up = 1;
- break;
- }
- continue;
- }
-
- CDEBUG(D_NET, "%s: Unexpected status 0x%x\n",
- libcfs_nid2str(gw->lp_nid), stat->ns_status);
- gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
- return;
- }
-
- if (up) { /* ignore downed NIs if NI for dest network is up */
- rte->lr_downis = 0;
- continue;
- }
- /**
- * if @down is zero and this route is single-hop, it means
- * we can't find NI for target network
- */
- if (!down && rte->lr_hops == 1)
- down = 1;
-
- rte->lr_downis = down;
- }
-}
-
-static void
-lnet_router_checker_event(struct lnet_event *event)
-{
- struct lnet_rc_data *rcd = event->md.user_ptr;
- struct lnet_peer *lp;
-
- LASSERT(rcd);
-
- if (event->unlinked) {
- LNetInvalidateMDHandle(&rcd->rcd_mdh);
- return;
- }
-
- LASSERT(event->type == LNET_EVENT_SEND ||
- event->type == LNET_EVENT_REPLY);
-
- lp = rcd->rcd_gateway;
- LASSERT(lp);
-
- /*
- * NB: it's called with holding lnet_res_lock, we have a few
- * places need to hold both locks at the same time, please take
- * care of lock ordering
- */
- lnet_net_lock(lp->lp_cpt);
- if (!lnet_isrouter(lp) || lp->lp_rcd != rcd) {
- /* ignore if no longer a router or rcd is replaced */
- goto out;
- }
-
- if (event->type == LNET_EVENT_SEND) {
- lp->lp_ping_notsent = 0;
- if (!event->status)
- goto out;
- }
-
- /* LNET_EVENT_REPLY */
- /*
- * A successful REPLY means the router is up. If _any_ comms
- * to the router fail I assume it's down (this will happen if
- * we ping alive routers to try to detect router death before
- * apps get burned).
- */
- lnet_notify_locked(lp, 1, !event->status, jiffies);
-
- /*
- * The router checker will wake up very shortly and do the
- * actual notification.
- * XXX If 'lp' stops being a router before then, it will still
- * have the notification pending!!!
- */
- if (avoid_asym_router_failure && !event->status)
- lnet_parse_rc_info(rcd);
-
- out:
- lnet_net_unlock(lp->lp_cpt);
-}
-
-static void
-lnet_wait_known_routerstate(void)
-{
- struct lnet_peer *rtr;
- struct list_head *entry;
- int all_known;
-
- LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
-
- for (;;) {
- int cpt = lnet_net_lock_current();
-
- all_known = 1;
- list_for_each(entry, &the_lnet.ln_routers) {
- rtr = list_entry(entry, struct lnet_peer, lp_rtr_list);
-
- if (!rtr->lp_alive_count) {
- all_known = 0;
- break;
- }
- }
-
- lnet_net_unlock(cpt);
-
- if (all_known)
- return;
-
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
- }
-}
-
-void
-lnet_router_ni_update_locked(struct lnet_peer *gw, __u32 net)
-{
- struct lnet_route *rte;
-
- if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS)) {
- list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) {
- if (rte->lr_net == net) {
- rte->lr_downis = 0;
- break;
- }
- }
- }
-}
-
-static void
-lnet_update_ni_status_locked(void)
-{
- struct lnet_ni *ni;
- time64_t now;
- int timeout;
-
- LASSERT(the_lnet.ln_routing);
-
- timeout = router_ping_timeout +
- max(live_router_check_interval, dead_router_check_interval);
-
- now = ktime_get_real_seconds();
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
- if (ni->ni_lnd->lnd_type == LOLND)
- continue;
-
- if (now < ni->ni_last_alive + timeout)
- continue;
-
- lnet_ni_lock(ni);
- /* re-check with lock */
- if (now < ni->ni_last_alive + timeout) {
- lnet_ni_unlock(ni);
- continue;
- }
-
- LASSERT(ni->ni_status);
-
- if (ni->ni_status->ns_status != LNET_NI_STATUS_DOWN) {
- CDEBUG(D_NET, "NI(%s:%d) status changed to down\n",
- libcfs_nid2str(ni->ni_nid), timeout);
- /*
- * NB: so far, this is the only place to set
- * NI status to "down"
- */
- ni->ni_status->ns_status = LNET_NI_STATUS_DOWN;
- }
- lnet_ni_unlock(ni);
- }
-}
-
-static void
-lnet_destroy_rc_data(struct lnet_rc_data *rcd)
-{
- LASSERT(list_empty(&rcd->rcd_list));
- /* detached from network */
- LASSERT(LNetMDHandleIsInvalid(rcd->rcd_mdh));
-
- if (rcd->rcd_gateway) {
- int cpt = rcd->rcd_gateway->lp_cpt;
-
- lnet_net_lock(cpt);
- lnet_peer_decref_locked(rcd->rcd_gateway);
- lnet_net_unlock(cpt);
- }
-
- kfree(rcd->rcd_pinginfo);
-
- kfree(rcd);
-}
-
-static struct lnet_rc_data *
-lnet_create_rc_data_locked(struct lnet_peer *gateway)
-{
- struct lnet_rc_data *rcd = NULL;
- struct lnet_ping_info *pi;
- struct lnet_md md;
- int rc;
- int i;
-
- lnet_net_unlock(gateway->lp_cpt);
-
- rcd = kzalloc(sizeof(*rcd), GFP_NOFS);
- if (!rcd)
- goto out;
-
- LNetInvalidateMDHandle(&rcd->rcd_mdh);
- INIT_LIST_HEAD(&rcd->rcd_list);
-
- pi = kzalloc(LNET_PINGINFO_SIZE, GFP_NOFS);
- if (!pi)
- goto out;
-
- for (i = 0; i < LNET_MAX_RTR_NIS; i++) {
- pi->pi_ni[i].ns_nid = LNET_NID_ANY;
- pi->pi_ni[i].ns_status = LNET_NI_STATUS_INVALID;
- }
- rcd->rcd_pinginfo = pi;
-
- md.start = pi;
- md.user_ptr = rcd;
- md.length = LNET_PINGINFO_SIZE;
- md.threshold = LNET_MD_THRESH_INF;
- md.options = LNET_MD_TRUNCATE;
- md.eq_handle = the_lnet.ln_rc_eqh;
-
- LASSERT(!LNetEQHandleIsInvalid(the_lnet.ln_rc_eqh));
- rc = LNetMDBind(md, LNET_UNLINK, &rcd->rcd_mdh);
- if (rc < 0) {
- CERROR("Can't bind MD: %d\n", rc);
- goto out;
- }
- LASSERT(!rc);
-
- lnet_net_lock(gateway->lp_cpt);
- /* router table changed or someone has created rcd for this gateway */
- if (!lnet_isrouter(gateway) || gateway->lp_rcd) {
- lnet_net_unlock(gateway->lp_cpt);
- goto out;
- }
-
- lnet_peer_addref_locked(gateway);
- rcd->rcd_gateway = gateway;
- gateway->lp_rcd = rcd;
- gateway->lp_ping_notsent = 0;
-
- return rcd;
-
- out:
- if (rcd) {
- if (!LNetMDHandleIsInvalid(rcd->rcd_mdh)) {
- rc = LNetMDUnlink(rcd->rcd_mdh);
- LASSERT(!rc);
- }
- lnet_destroy_rc_data(rcd);
- }
-
- lnet_net_lock(gateway->lp_cpt);
- return gateway->lp_rcd;
-}
-
-static int
-lnet_router_check_interval(struct lnet_peer *rtr)
-{
- int secs;
-
- secs = rtr->lp_alive ? live_router_check_interval :
- dead_router_check_interval;
- if (secs < 0)
- secs = 0;
-
- return secs;
-}
-
-static void
-lnet_ping_router_locked(struct lnet_peer *rtr)
-{
- struct lnet_rc_data *rcd = NULL;
- unsigned long now = jiffies;
- int secs;
-
- lnet_peer_addref_locked(rtr);
-
- if (rtr->lp_ping_deadline && /* ping timed out? */
- time_after(now, rtr->lp_ping_deadline))
- lnet_notify_locked(rtr, 1, 0, now);
-
- /* Run any outstanding notifications */
- lnet_ni_notify_locked(rtr->lp_ni, rtr);
-
- if (!lnet_isrouter(rtr) ||
- the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
- /* router table changed or router checker is shutting down */
- lnet_peer_decref_locked(rtr);
- return;
- }
-
- rcd = rtr->lp_rcd ?
- rtr->lp_rcd : lnet_create_rc_data_locked(rtr);
-
- if (!rcd)
- return;
-
- secs = lnet_router_check_interval(rtr);
-
- CDEBUG(D_NET,
- "rtr %s %d: deadline %lu ping_notsent %d alive %d alive_count %d lp_ping_timestamp %lu\n",
- libcfs_nid2str(rtr->lp_nid), secs,
- rtr->lp_ping_deadline, rtr->lp_ping_notsent,
- rtr->lp_alive, rtr->lp_alive_count, rtr->lp_ping_timestamp);
-
- if (secs && !rtr->lp_ping_notsent &&
- time_after(now, rtr->lp_ping_timestamp + secs * HZ)) {
- int rc;
- struct lnet_process_id id;
- struct lnet_handle_md mdh;
-
- id.nid = rtr->lp_nid;
- id.pid = LNET_PID_LUSTRE;
- CDEBUG(D_NET, "Check: %s\n", libcfs_id2str(id));
-
- rtr->lp_ping_notsent = 1;
- rtr->lp_ping_timestamp = now;
-
- mdh = rcd->rcd_mdh;
-
- if (!rtr->lp_ping_deadline) {
- rtr->lp_ping_deadline =
- jiffies + router_ping_timeout * HZ;
- }
-
- lnet_net_unlock(rtr->lp_cpt);
-
- rc = LNetGet(LNET_NID_ANY, mdh, id, LNET_RESERVED_PORTAL,
- LNET_PROTO_PING_MATCHBITS, 0);
-
- lnet_net_lock(rtr->lp_cpt);
- if (rc)
- rtr->lp_ping_notsent = 0; /* no event pending */
- }
-
- lnet_peer_decref_locked(rtr);
-}
-
-int
-lnet_router_checker_start(void)
-{
- struct task_struct *task;
- int rc;
- int eqsz = 0;
-
- LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
-
- if (check_routers_before_use &&
- dead_router_check_interval <= 0) {
- LCONSOLE_ERROR_MSG(0x10a, "'dead_router_check_interval' must be set if 'check_routers_before_use' is set\n");
- return -EINVAL;
- }
-
- init_completion(&the_lnet.ln_rc_signal);
-
- rc = LNetEQAlloc(0, lnet_router_checker_event, &the_lnet.ln_rc_eqh);
- if (rc) {
- CERROR("Can't allocate EQ(%d): %d\n", eqsz, rc);
- return -ENOMEM;
- }
-
- the_lnet.ln_rc_state = LNET_RC_STATE_RUNNING;
- task = kthread_run(lnet_router_checker, NULL, "router_checker");
- if (IS_ERR(task)) {
- rc = PTR_ERR(task);
- CERROR("Can't start router checker thread: %d\n", rc);
- /* block until event callback signals exit */
- wait_for_completion(&the_lnet.ln_rc_signal);
- rc = LNetEQFree(the_lnet.ln_rc_eqh);
- LASSERT(!rc);
- the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
- return -ENOMEM;
- }
-
- if (check_routers_before_use) {
- /*
- * Note that a helpful side-effect of pinging all known routers
- * at startup is that it makes them drop stale connections they
- * may have to a previous instance of me.
- */
- lnet_wait_known_routerstate();
- }
-
- return 0;
-}
-
-void
-lnet_router_checker_stop(void)
-{
- int rc;
-
- if (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN)
- return;
-
- LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
- the_lnet.ln_rc_state = LNET_RC_STATE_STOPPING;
- /* wakeup the RC thread if it's sleeping */
- wake_up(&the_lnet.ln_rc_waitq);
-
- /* block until event callback signals exit */
- wait_for_completion(&the_lnet.ln_rc_signal);
- LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
-
- rc = LNetEQFree(the_lnet.ln_rc_eqh);
- LASSERT(!rc);
-}
-
-static void
-lnet_prune_rc_data(int wait_unlink)
-{
- struct lnet_rc_data *rcd;
- struct lnet_rc_data *tmp;
- struct lnet_peer *lp;
- struct list_head head;
- int i = 2;
-
- if (likely(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING &&
- list_empty(&the_lnet.ln_rcd_deathrow) &&
- list_empty(&the_lnet.ln_rcd_zombie)))
- return;
-
- INIT_LIST_HEAD(&head);
-
- lnet_net_lock(LNET_LOCK_EX);
-
- if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
- /* router checker is stopping, prune all */
- list_for_each_entry(lp, &the_lnet.ln_routers,
- lp_rtr_list) {
- if (!lp->lp_rcd)
- continue;
-
- LASSERT(list_empty(&lp->lp_rcd->rcd_list));
- list_add(&lp->lp_rcd->rcd_list,
- &the_lnet.ln_rcd_deathrow);
- lp->lp_rcd = NULL;
- }
- }
-
- /* unlink all RCDs on deathrow list */
- list_splice_init(&the_lnet.ln_rcd_deathrow, &head);
-
- if (!list_empty(&head)) {
- lnet_net_unlock(LNET_LOCK_EX);
-
- list_for_each_entry(rcd, &head, rcd_list)
- LNetMDUnlink(rcd->rcd_mdh);
-
- lnet_net_lock(LNET_LOCK_EX);
- }
-
- list_splice_init(&head, &the_lnet.ln_rcd_zombie);
-
- /* release all zombie RCDs */
- while (!list_empty(&the_lnet.ln_rcd_zombie)) {
- list_for_each_entry_safe(rcd, tmp, &the_lnet.ln_rcd_zombie,
- rcd_list) {
- if (LNetMDHandleIsInvalid(rcd->rcd_mdh))
- list_move(&rcd->rcd_list, &head);
- }
-
- wait_unlink = wait_unlink &&
- !list_empty(&the_lnet.ln_rcd_zombie);
-
- lnet_net_unlock(LNET_LOCK_EX);
-
- while (!list_empty(&head)) {
- rcd = list_entry(head.next,
- struct lnet_rc_data, rcd_list);
- list_del_init(&rcd->rcd_list);
- lnet_destroy_rc_data(rcd);
- }
-
- if (!wait_unlink)
- return;
-
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
- "Waiting for rc buffers to unlink\n");
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ / 4);
-
- lnet_net_lock(LNET_LOCK_EX);
- }
-
- lnet_net_unlock(LNET_LOCK_EX);
-}
-
-/*
- * This function is called to check if the RC should block indefinitely.
- * It's called from lnet_router_checker() as well as being passed to
- * wait_event_interruptible() to avoid the lost wake_up problem.
- *
- * When it's called from wait_event_interruptible() it is necessary to
- * also not sleep if the rc state is not running to avoid a deadlock
- * when the system is shutting down
- */
-static inline bool
-lnet_router_checker_active(void)
-{
- if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING)
- return true;
-
- /*
- * Router Checker thread needs to run when routing is enabled in
- * order to call lnet_update_ni_status_locked()
- */
- if (the_lnet.ln_routing)
- return true;
-
- return !list_empty(&the_lnet.ln_routers) &&
- (live_router_check_interval > 0 ||
- dead_router_check_interval > 0);
-}
-
-static int
-lnet_router_checker(void *arg)
-{
- struct lnet_peer *rtr;
- struct list_head *entry;
-
- while (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING) {
- __u64 version;
- int cpt;
- int cpt2;
-
- cpt = lnet_net_lock_current();
-rescan:
- version = the_lnet.ln_routers_version;
-
- list_for_each(entry, &the_lnet.ln_routers) {
- rtr = list_entry(entry, struct lnet_peer, lp_rtr_list);
-
- cpt2 = lnet_cpt_of_nid_locked(rtr->lp_nid);
- if (cpt != cpt2) {
- lnet_net_unlock(cpt);
- cpt = cpt2;
- lnet_net_lock(cpt);
- /* the routers list has changed */
- if (version != the_lnet.ln_routers_version)
- goto rescan;
- }
-
- lnet_ping_router_locked(rtr);
-
- /* NB dropped lock */
- if (version != the_lnet.ln_routers_version) {
- /* the routers list has changed */
- goto rescan;
- }
- }
-
- if (the_lnet.ln_routing)
- lnet_update_ni_status_locked();
-
- lnet_net_unlock(cpt);
-
- lnet_prune_rc_data(0); /* don't wait for UNLINK */
-
- /*
- * Call schedule_timeout() here always adds 1 to load average
- * because kernel counts # active tasks as nr_running
- * + nr_uninterruptible.
- */
- /*
- * if there are any routes then wakeup every second. If
- * there are no routes then sleep indefinitely until woken
- * up by a user adding a route
- */
- if (!lnet_router_checker_active())
- wait_event_interruptible(the_lnet.ln_rc_waitq,
- lnet_router_checker_active());
- else
- wait_event_interruptible_timeout(the_lnet.ln_rc_waitq,
- false,
- HZ);
- }
-
- lnet_prune_rc_data(1); /* wait for UNLINK */
-
- the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
- complete(&the_lnet.ln_rc_signal);
- /* The unlink event callback will signal final completion */
- return 0;
-}
-
-void
-lnet_destroy_rtrbuf(struct lnet_rtrbuf *rb, int npages)
-{
- while (--npages >= 0)
- __free_page(rb->rb_kiov[npages].bv_page);
-
- kfree(rb);
-}
-
-static struct lnet_rtrbuf *
-lnet_new_rtrbuf(struct lnet_rtrbufpool *rbp, int cpt)
-{
- int npages = rbp->rbp_npages;
- int sz = offsetof(struct lnet_rtrbuf, rb_kiov[npages]);
- struct page *page;
- struct lnet_rtrbuf *rb;
- int i;
-
- rb = kzalloc_cpt(sz, GFP_NOFS, cpt);
- if (!rb)
- return NULL;
-
- rb->rb_pool = rbp;
-
- for (i = 0; i < npages; i++) {
- page = alloc_pages_node(
- cfs_cpt_spread_node(lnet_cpt_table(), cpt),
- GFP_KERNEL | __GFP_ZERO, 0);
- if (!page) {
- while (--i >= 0)
- __free_page(rb->rb_kiov[i].bv_page);
-
- kfree(rb);
- return NULL;
- }
-
- rb->rb_kiov[i].bv_len = PAGE_SIZE;
- rb->rb_kiov[i].bv_offset = 0;
- rb->rb_kiov[i].bv_page = page;
- }
-
- return rb;
-}
-
-static void
-lnet_rtrpool_free_bufs(struct lnet_rtrbufpool *rbp, int cpt)
-{
- int npages = rbp->rbp_npages;
- struct list_head tmp;
- struct lnet_rtrbuf *rb;
- struct lnet_rtrbuf *temp;
-
- if (!rbp->rbp_nbuffers) /* not initialized or already freed */
- return;
-
- INIT_LIST_HEAD(&tmp);
-
- lnet_net_lock(cpt);
- lnet_drop_routed_msgs_locked(&rbp->rbp_msgs, cpt);
- list_splice_init(&rbp->rbp_bufs, &tmp);
- rbp->rbp_req_nbuffers = 0;
- rbp->rbp_nbuffers = 0;
- rbp->rbp_credits = 0;
- rbp->rbp_mincredits = 0;
- lnet_net_unlock(cpt);
-
- /* Free buffers on the free list. */
- list_for_each_entry_safe(rb, temp, &tmp, rb_list) {
- list_del(&rb->rb_list);
- lnet_destroy_rtrbuf(rb, npages);
- }
-}
-
-static int
-lnet_rtrpool_adjust_bufs(struct lnet_rtrbufpool *rbp, int nbufs, int cpt)
-{
- struct list_head rb_list;
- struct lnet_rtrbuf *rb;
- int num_rb;
- int num_buffers = 0;
- int old_req_nbufs;
- int npages = rbp->rbp_npages;
-
- lnet_net_lock(cpt);
- /*
- * If we are called for less buffers than already in the pool, we
- * just lower the req_nbuffers number and excess buffers will be
- * thrown away as they are returned to the free list. Credits
- * then get adjusted as well.
- * If we already have enough buffers allocated to serve the
- * increase requested, then we can treat that the same way as we
- * do the decrease.
- */
- num_rb = nbufs - rbp->rbp_nbuffers;
- if (nbufs <= rbp->rbp_req_nbuffers || num_rb <= 0) {
- rbp->rbp_req_nbuffers = nbufs;
- lnet_net_unlock(cpt);
- return 0;
- }
- /*
- * store the older value of rbp_req_nbuffers and then set it to
- * the new request to prevent lnet_return_rx_credits_locked() from
- * freeing buffers that we need to keep around
- */
- old_req_nbufs = rbp->rbp_req_nbuffers;
- rbp->rbp_req_nbuffers = nbufs;
- lnet_net_unlock(cpt);
-
- INIT_LIST_HEAD(&rb_list);
-
- /*
- * allocate the buffers on a local list first. If all buffers are
- * allocated successfully then join this list to the rbp buffer
- * list. If not then free all allocated buffers.
- */
- while (num_rb-- > 0) {
- rb = lnet_new_rtrbuf(rbp, cpt);
- if (!rb) {
- CERROR("Failed to allocate %d route bufs of %d pages\n",
- nbufs, npages);
-
- lnet_net_lock(cpt);
- rbp->rbp_req_nbuffers = old_req_nbufs;
- lnet_net_unlock(cpt);
-
- goto failed;
- }
-
- list_add(&rb->rb_list, &rb_list);
- num_buffers++;
- }
-
- lnet_net_lock(cpt);
-
- list_splice_tail(&rb_list, &rbp->rbp_bufs);
- rbp->rbp_nbuffers += num_buffers;
- rbp->rbp_credits += num_buffers;
- rbp->rbp_mincredits = rbp->rbp_credits;
- /*
- * We need to schedule blocked msg using the newly
- * added buffers.
- */
- while (!list_empty(&rbp->rbp_bufs) &&
- !list_empty(&rbp->rbp_msgs))
- lnet_schedule_blocked_locked(rbp);
-
- lnet_net_unlock(cpt);
-
- return 0;
-
-failed:
- while (!list_empty(&rb_list)) {
- rb = list_entry(rb_list.next, struct lnet_rtrbuf, rb_list);
- list_del(&rb->rb_list);
- lnet_destroy_rtrbuf(rb, npages);
- }
-
- return -ENOMEM;
-}
-
-static void
-lnet_rtrpool_init(struct lnet_rtrbufpool *rbp, int npages)
-{
- INIT_LIST_HEAD(&rbp->rbp_msgs);
- INIT_LIST_HEAD(&rbp->rbp_bufs);
-
- rbp->rbp_npages = npages;
- rbp->rbp_credits = 0;
- rbp->rbp_mincredits = 0;
-}
-
-void
-lnet_rtrpools_free(int keep_pools)
-{
- struct lnet_rtrbufpool *rtrp;
- int i;
-
- if (!the_lnet.ln_rtrpools) /* uninitialized or freed */
- return;
-
- cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
- lnet_rtrpool_free_bufs(&rtrp[LNET_TINY_BUF_IDX], i);
- lnet_rtrpool_free_bufs(&rtrp[LNET_SMALL_BUF_IDX], i);
- lnet_rtrpool_free_bufs(&rtrp[LNET_LARGE_BUF_IDX], i);
- }
-
- if (!keep_pools) {
- cfs_percpt_free(the_lnet.ln_rtrpools);
- the_lnet.ln_rtrpools = NULL;
- }
-}
-
-static int
-lnet_nrb_tiny_calculate(void)
-{
- int nrbs = LNET_NRB_TINY;
-
- if (tiny_router_buffers < 0) {
- LCONSOLE_ERROR_MSG(0x10c,
- "tiny_router_buffers=%d invalid when routing enabled\n",
- tiny_router_buffers);
- return -EINVAL;
- }
-
- if (tiny_router_buffers > 0)
- nrbs = tiny_router_buffers;
-
- nrbs /= LNET_CPT_NUMBER;
- return max(nrbs, LNET_NRB_TINY_MIN);
-}
-
-static int
-lnet_nrb_small_calculate(void)
-{
- int nrbs = LNET_NRB_SMALL;
-
- if (small_router_buffers < 0) {
- LCONSOLE_ERROR_MSG(0x10c,
- "small_router_buffers=%d invalid when routing enabled\n",
- small_router_buffers);
- return -EINVAL;
- }
-
- if (small_router_buffers > 0)
- nrbs = small_router_buffers;
-
- nrbs /= LNET_CPT_NUMBER;
- return max(nrbs, LNET_NRB_SMALL_MIN);
-}
-
-static int
-lnet_nrb_large_calculate(void)
-{
- int nrbs = LNET_NRB_LARGE;
-
- if (large_router_buffers < 0) {
- LCONSOLE_ERROR_MSG(0x10c,
- "large_router_buffers=%d invalid when routing enabled\n",
- large_router_buffers);
- return -EINVAL;
- }
-
- if (large_router_buffers > 0)
- nrbs = large_router_buffers;
-
- nrbs /= LNET_CPT_NUMBER;
- return max(nrbs, LNET_NRB_LARGE_MIN);
-}
-
-int
-lnet_rtrpools_alloc(int im_a_router)
-{
- struct lnet_rtrbufpool *rtrp;
- int nrb_tiny;
- int nrb_small;
- int nrb_large;
- int rc;
- int i;
-
- if (!strcmp(forwarding, "")) {
- /* not set either way */
- if (!im_a_router)
- return 0;
- } else if (!strcmp(forwarding, "disabled")) {
- /* explicitly disabled */
- return 0;
- } else if (!strcmp(forwarding, "enabled")) {
- /* explicitly enabled */
- } else {
- LCONSOLE_ERROR_MSG(0x10b, "'forwarding' not set to either 'enabled' or 'disabled'\n");
- return -EINVAL;
- }
-
- nrb_tiny = lnet_nrb_tiny_calculate();
- if (nrb_tiny < 0)
- return -EINVAL;
-
- nrb_small = lnet_nrb_small_calculate();
- if (nrb_small < 0)
- return -EINVAL;
-
- nrb_large = lnet_nrb_large_calculate();
- if (nrb_large < 0)
- return -EINVAL;
-
- the_lnet.ln_rtrpools = cfs_percpt_alloc(lnet_cpt_table(),
- LNET_NRBPOOLS *
- sizeof(struct lnet_rtrbufpool));
- if (!the_lnet.ln_rtrpools) {
- LCONSOLE_ERROR_MSG(0x10c,
- "Failed to initialize router buffe pool\n");
- return -ENOMEM;
- }
-
- cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
- lnet_rtrpool_init(&rtrp[LNET_TINY_BUF_IDX], 0);
- rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_TINY_BUF_IDX],
- nrb_tiny, i);
- if (rc)
- goto failed;
-
- lnet_rtrpool_init(&rtrp[LNET_SMALL_BUF_IDX],
- LNET_NRB_SMALL_PAGES);
- rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_SMALL_BUF_IDX],
- nrb_small, i);
- if (rc)
- goto failed;
-
- lnet_rtrpool_init(&rtrp[LNET_LARGE_BUF_IDX],
- LNET_NRB_LARGE_PAGES);
- rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_LARGE_BUF_IDX],
- nrb_large, i);
- if (rc)
- goto failed;
- }
-
- lnet_net_lock(LNET_LOCK_EX);
- the_lnet.ln_routing = 1;
- lnet_net_unlock(LNET_LOCK_EX);
-
- return 0;
-
- failed:
- lnet_rtrpools_free(0);
- return rc;
-}
-
-static int
-lnet_rtrpools_adjust_helper(int tiny, int small, int large)
-{
- int nrb = 0;
- int rc = 0;
- int i;
- struct lnet_rtrbufpool *rtrp;
-
- /*
- * If the provided values for each buffer pool are different than the
- * configured values, we need to take action.
- */
- if (tiny >= 0) {
- tiny_router_buffers = tiny;
- nrb = lnet_nrb_tiny_calculate();
- cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
- rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_TINY_BUF_IDX],
- nrb, i);
- if (rc)
- return rc;
- }
- }
- if (small >= 0) {
- small_router_buffers = small;
- nrb = lnet_nrb_small_calculate();
- cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
- rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_SMALL_BUF_IDX],
- nrb, i);
- if (rc)
- return rc;
- }
- }
- if (large >= 0) {
- large_router_buffers = large;
- nrb = lnet_nrb_large_calculate();
- cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
- rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_LARGE_BUF_IDX],
- nrb, i);
- if (rc)
- return rc;
- }
- }
-
- return 0;
-}
-
-int
-lnet_rtrpools_adjust(int tiny, int small, int large)
-{
- /*
- * this function doesn't revert the changes if adding new buffers
- * failed. It's up to the user space caller to revert the
- * changes.
- */
- if (!the_lnet.ln_routing)
- return 0;
-
- return lnet_rtrpools_adjust_helper(tiny, small, large);
-}
-
-int
-lnet_rtrpools_enable(void)
-{
- int rc = 0;
-
- if (the_lnet.ln_routing)
- return 0;
-
- if (!the_lnet.ln_rtrpools)
- /*
- * If routing is turned off, and we have never
- * initialized the pools before, just call the
- * standard buffer pool allocation routine as
- * if we are just configuring this for the first
- * time.
- */
- rc = lnet_rtrpools_alloc(1);
- else
- rc = lnet_rtrpools_adjust_helper(0, 0, 0);
- if (rc)
- return rc;
-
- lnet_net_lock(LNET_LOCK_EX);
- the_lnet.ln_routing = 1;
-
- the_lnet.ln_ping_info->pi_features &= ~LNET_PING_FEAT_RTE_DISABLED;
- lnet_net_unlock(LNET_LOCK_EX);
-
- return rc;
-}
-
-void
-lnet_rtrpools_disable(void)
-{
- if (!the_lnet.ln_routing)
- return;
-
- lnet_net_lock(LNET_LOCK_EX);
- the_lnet.ln_routing = 0;
- the_lnet.ln_ping_info->pi_features |= LNET_PING_FEAT_RTE_DISABLED;
-
- tiny_router_buffers = 0;
- small_router_buffers = 0;
- large_router_buffers = 0;
- lnet_net_unlock(LNET_LOCK_EX);
- lnet_rtrpools_free(1);
-}
-
-int
-lnet_notify(struct lnet_ni *ni, lnet_nid_t nid, int alive, unsigned long when)
-{
- struct lnet_peer *lp = NULL;
- unsigned long now = jiffies;
- int cpt = lnet_cpt_of_nid(nid);
-
- LASSERT(!in_interrupt());
-
- CDEBUG(D_NET, "%s notifying %s: %s\n",
- !ni ? "userspace" : libcfs_nid2str(ni->ni_nid),
- libcfs_nid2str(nid),
- alive ? "up" : "down");
-
- if (ni &&
- LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid)) {
- CWARN("Ignoring notification of %s %s by %s (different net)\n",
- libcfs_nid2str(nid), alive ? "birth" : "death",
- libcfs_nid2str(ni->ni_nid));
- return -EINVAL;
- }
-
- /* can't do predictions... */
- if (time_after(when, now)) {
- CWARN("Ignoring prediction from %s of %s %s %ld seconds in the future\n",
- !ni ? "userspace" : libcfs_nid2str(ni->ni_nid),
- libcfs_nid2str(nid), alive ? "up" : "down",
- (when - now) / HZ);
- return -EINVAL;
- }
-
- if (ni && !alive && /* LND telling me she's down */
- !auto_down) { /* auto-down disabled */
- CDEBUG(D_NET, "Auto-down disabled\n");
- return 0;
- }
-
- lnet_net_lock(cpt);
-
- if (the_lnet.ln_shutdown) {
- lnet_net_unlock(cpt);
- return -ESHUTDOWN;
- }
-
- lp = lnet_find_peer_locked(the_lnet.ln_peer_tables[cpt], nid);
- if (!lp) {
- /* nid not found */
- lnet_net_unlock(cpt);
- CDEBUG(D_NET, "%s not found\n", libcfs_nid2str(nid));
- return 0;
- }
-
- /*
- * We can't fully trust LND on reporting exact peer last_alive
- * if he notifies us about dead peer. For example ksocklnd can
- * call us with when == _time_when_the_node_was_booted_ if
- * no connections were successfully established
- */
- if (ni && !alive && when < lp->lp_last_alive)
- when = lp->lp_last_alive;
-
- lnet_notify_locked(lp, !ni, alive, when);
-
- if (ni)
- lnet_ni_notify_locked(ni, lp);
-
- lnet_peer_decref_locked(lp);
-
- lnet_net_unlock(cpt);
- return 0;
-}
-EXPORT_SYMBOL(lnet_notify);
diff --git a/drivers/staging/lustre/lnet/lnet/router_proc.c b/drivers/staging/lustre/lnet/lnet/router_proc.c
deleted file mode 100644
index ae4b7f5953a0..000000000000
--- a/drivers/staging/lustre/lnet/lnet/router_proc.c
+++ /dev/null
@@ -1,907 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- *
- * This file is part of Portals
- * http://sourceforge.net/projects/sandiaportals/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-/*
- * This is really lnet_proc.c. You might need to update sanity test 215
- * if any file format is changed.
- */
-
-#define LNET_LOFFT_BITS (sizeof(loff_t) * 8)
-/*
- * NB: max allowed LNET_CPT_BITS is 8 on 64-bit system and 2 on 32-bit system
- */
-#define LNET_PROC_CPT_BITS (LNET_CPT_BITS + 1)
-/* change version, 16 bits or 8 bits */
-#define LNET_PROC_VER_BITS max_t(size_t, min_t(size_t, LNET_LOFFT_BITS, 64) / 4, 8)
-
-#define LNET_PROC_HASH_BITS LNET_PEER_HASH_BITS
-/*
- * bits for peer hash offset
- * NB: we don't use the highest bit of *ppos because it's signed
- */
-#define LNET_PROC_HOFF_BITS (LNET_LOFFT_BITS - \
- LNET_PROC_CPT_BITS - \
- LNET_PROC_VER_BITS - \
- LNET_PROC_HASH_BITS - 1)
-/* bits for hash index + position */
-#define LNET_PROC_HPOS_BITS (LNET_PROC_HASH_BITS + LNET_PROC_HOFF_BITS)
-/* bits for peer hash table + hash version */
-#define LNET_PROC_VPOS_BITS (LNET_PROC_HPOS_BITS + LNET_PROC_VER_BITS)
-
-#define LNET_PROC_CPT_MASK ((1ULL << LNET_PROC_CPT_BITS) - 1)
-#define LNET_PROC_VER_MASK ((1ULL << LNET_PROC_VER_BITS) - 1)
-#define LNET_PROC_HASH_MASK ((1ULL << LNET_PROC_HASH_BITS) - 1)
-#define LNET_PROC_HOFF_MASK ((1ULL << LNET_PROC_HOFF_BITS) - 1)
-
-#define LNET_PROC_CPT_GET(pos) \
- (int)(((pos) >> LNET_PROC_VPOS_BITS) & LNET_PROC_CPT_MASK)
-
-#define LNET_PROC_VER_GET(pos) \
- (int)(((pos) >> LNET_PROC_HPOS_BITS) & LNET_PROC_VER_MASK)
-
-#define LNET_PROC_HASH_GET(pos) \
- (int)(((pos) >> LNET_PROC_HOFF_BITS) & LNET_PROC_HASH_MASK)
-
-#define LNET_PROC_HOFF_GET(pos) \
- (int)((pos) & LNET_PROC_HOFF_MASK)
-
-#define LNET_PROC_POS_MAKE(cpt, ver, hash, off) \
- (((((loff_t)(cpt)) & LNET_PROC_CPT_MASK) << LNET_PROC_VPOS_BITS) | \
- ((((loff_t)(ver)) & LNET_PROC_VER_MASK) << LNET_PROC_HPOS_BITS) | \
- ((((loff_t)(hash)) & LNET_PROC_HASH_MASK) << LNET_PROC_HOFF_BITS) | \
- ((off) & LNET_PROC_HOFF_MASK))
-
-#define LNET_PROC_VERSION(v) ((unsigned int)((v) & LNET_PROC_VER_MASK))
-
-static int __proc_lnet_stats(void *data, int write,
- loff_t pos, void __user *buffer, int nob)
-{
- int rc;
- struct lnet_counters *ctrs;
- int len;
- char *tmpstr;
- const int tmpsiz = 256; /* 7 %u and 4 %llu */
-
- if (write) {
- lnet_counters_reset();
- return 0;
- }
-
- /* read */
-
- ctrs = kzalloc(sizeof(*ctrs), GFP_NOFS);
- if (!ctrs)
- return -ENOMEM;
-
- tmpstr = kmalloc(tmpsiz, GFP_KERNEL);
- if (!tmpstr) {
- kfree(ctrs);
- return -ENOMEM;
- }
-
- lnet_counters_get(ctrs);
-
- len = snprintf(tmpstr, tmpsiz,
- "%u %u %u %u %u %u %u %llu %llu %llu %llu",
- ctrs->msgs_alloc, ctrs->msgs_max,
- ctrs->errors,
- ctrs->send_count, ctrs->recv_count,
- ctrs->route_count, ctrs->drop_count,
- ctrs->send_length, ctrs->recv_length,
- ctrs->route_length, ctrs->drop_length);
-
- if (pos >= min_t(int, len, strlen(tmpstr)))
- rc = 0;
- else
- rc = cfs_trace_copyout_string(buffer, nob,
- tmpstr + pos, "\n");
-
- kfree(tmpstr);
- kfree(ctrs);
- return rc;
-}
-
-static int proc_lnet_stats(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
- __proc_lnet_stats);
-}
-
-static int proc_lnet_routes(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- const int tmpsiz = 256;
- char *tmpstr;
- char *s;
- int rc = 0;
- int len;
- int ver;
- int off;
-
- BUILD_BUG_ON(sizeof(loff_t) < 4);
-
- off = LNET_PROC_HOFF_GET(*ppos);
- ver = LNET_PROC_VER_GET(*ppos);
-
- LASSERT(!write);
-
- if (!*lenp)
- return 0;
-
- tmpstr = kmalloc(tmpsiz, GFP_KERNEL);
- if (!tmpstr)
- return -ENOMEM;
-
- s = tmpstr; /* points to current position in tmpstr[] */
-
- if (!*ppos) {
- s += snprintf(s, tmpstr + tmpsiz - s, "Routing %s\n",
- the_lnet.ln_routing ? "enabled" : "disabled");
- LASSERT(tmpstr + tmpsiz - s > 0);
-
- s += snprintf(s, tmpstr + tmpsiz - s, "%-8s %4s %8s %7s %s\n",
- "net", "hops", "priority", "state", "router");
- LASSERT(tmpstr + tmpsiz - s > 0);
-
- lnet_net_lock(0);
- ver = (unsigned int)the_lnet.ln_remote_nets_version;
- lnet_net_unlock(0);
- *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
- } else {
- struct list_head *n;
- struct list_head *r;
- struct lnet_route *route = NULL;
- struct lnet_remotenet *rnet = NULL;
- int skip = off - 1;
- struct list_head *rn_list;
- int i;
-
- lnet_net_lock(0);
-
- if (ver != LNET_PROC_VERSION(the_lnet.ln_remote_nets_version)) {
- lnet_net_unlock(0);
- kfree(tmpstr);
- return -ESTALE;
- }
-
- for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE && !route; i++) {
- rn_list = &the_lnet.ln_remote_nets_hash[i];
-
- n = rn_list->next;
-
- while (n != rn_list && !route) {
- rnet = list_entry(n, struct lnet_remotenet,
- lrn_list);
-
- r = rnet->lrn_routes.next;
-
- while (r != &rnet->lrn_routes) {
- struct lnet_route *re;
-
- re = list_entry(r, struct lnet_route,
- lr_list);
- if (!skip) {
- route = re;
- break;
- }
-
- skip--;
- r = r->next;
- }
-
- n = n->next;
- }
- }
-
- if (route) {
- __u32 net = rnet->lrn_net;
- __u32 hops = route->lr_hops;
- unsigned int priority = route->lr_priority;
- lnet_nid_t nid = route->lr_gateway->lp_nid;
- int alive = lnet_is_route_alive(route);
-
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-8s %4u %8u %7s %s\n",
- libcfs_net2str(net), hops,
- priority,
- alive ? "up" : "down",
- libcfs_nid2str(nid));
- LASSERT(tmpstr + tmpsiz - s > 0);
- }
-
- lnet_net_unlock(0);
- }
-
- len = s - tmpstr; /* how many bytes was written */
-
- if (len > *lenp) { /* linux-supplied buffer is too small */
- rc = -EINVAL;
- } else if (len > 0) { /* wrote something */
- if (copy_to_user(buffer, tmpstr, len)) {
- rc = -EFAULT;
- } else {
- off += 1;
- *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
- }
- }
-
- kfree(tmpstr);
-
- if (!rc)
- *lenp = len;
-
- return rc;
-}
-
-static int proc_lnet_routers(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- int rc = 0;
- char *tmpstr;
- char *s;
- const int tmpsiz = 256;
- int len;
- int ver;
- int off;
-
- off = LNET_PROC_HOFF_GET(*ppos);
- ver = LNET_PROC_VER_GET(*ppos);
-
- LASSERT(!write);
-
- if (!*lenp)
- return 0;
-
- tmpstr = kmalloc(tmpsiz, GFP_KERNEL);
- if (!tmpstr)
- return -ENOMEM;
-
- s = tmpstr; /* points to current position in tmpstr[] */
-
- if (!*ppos) {
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-4s %7s %9s %6s %12s %9s %8s %7s %s\n",
- "ref", "rtr_ref", "alive_cnt", "state",
- "last_ping", "ping_sent", "deadline",
- "down_ni", "router");
- LASSERT(tmpstr + tmpsiz - s > 0);
-
- lnet_net_lock(0);
- ver = (unsigned int)the_lnet.ln_routers_version;
- lnet_net_unlock(0);
- *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
- } else {
- struct list_head *r;
- struct lnet_peer *peer = NULL;
- int skip = off - 1;
-
- lnet_net_lock(0);
-
- if (ver != LNET_PROC_VERSION(the_lnet.ln_routers_version)) {
- lnet_net_unlock(0);
-
- kfree(tmpstr);
- return -ESTALE;
- }
-
- r = the_lnet.ln_routers.next;
-
- while (r != &the_lnet.ln_routers) {
- struct lnet_peer *lp;
-
- lp = list_entry(r, struct lnet_peer, lp_rtr_list);
- if (!skip) {
- peer = lp;
- break;
- }
-
- skip--;
- r = r->next;
- }
-
- if (peer) {
- lnet_nid_t nid = peer->lp_nid;
- unsigned long now = jiffies;
- unsigned long deadline = peer->lp_ping_deadline;
- int nrefs = peer->lp_refcount;
- int nrtrrefs = peer->lp_rtr_refcount;
- int alive_cnt = peer->lp_alive_count;
- int alive = peer->lp_alive;
- int pingsent = !peer->lp_ping_notsent;
- int last_ping = (now - peer->lp_ping_timestamp) / HZ;
- int down_ni = 0;
- struct lnet_route *rtr;
-
- if ((peer->lp_ping_feats &
- LNET_PING_FEAT_NI_STATUS)) {
- list_for_each_entry(rtr, &peer->lp_routes,
- lr_gwlist) {
- /*
- * downis on any route should be the
- * number of downis on the gateway
- */
- if (rtr->lr_downis) {
- down_ni = rtr->lr_downis;
- break;
- }
- }
- }
-
- if (!deadline)
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-4d %7d %9d %6s %12d %9d %8s %7d %s\n",
- nrefs, nrtrrefs, alive_cnt,
- alive ? "up" : "down", last_ping,
- pingsent, "NA", down_ni,
- libcfs_nid2str(nid));
- else
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-4d %7d %9d %6s %12d %9d %8lu %7d %s\n",
- nrefs, nrtrrefs, alive_cnt,
- alive ? "up" : "down", last_ping,
- pingsent,
- (deadline - now) / HZ,
- down_ni, libcfs_nid2str(nid));
- LASSERT(tmpstr + tmpsiz - s > 0);
- }
-
- lnet_net_unlock(0);
- }
-
- len = s - tmpstr; /* how many bytes was written */
-
- if (len > *lenp) { /* linux-supplied buffer is too small */
- rc = -EINVAL;
- } else if (len > 0) { /* wrote something */
- if (copy_to_user(buffer, tmpstr, len)) {
- rc = -EFAULT;
- } else {
- off += 1;
- *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
- }
- }
-
- kfree(tmpstr);
-
- if (!rc)
- *lenp = len;
-
- return rc;
-}
-
-static int proc_lnet_peers(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- const int tmpsiz = 256;
- struct lnet_peer_table *ptable;
- char *tmpstr;
- char *s;
- int cpt = LNET_PROC_CPT_GET(*ppos);
- int ver = LNET_PROC_VER_GET(*ppos);
- int hash = LNET_PROC_HASH_GET(*ppos);
- int hoff = LNET_PROC_HOFF_GET(*ppos);
- int rc = 0;
- int len;
-
- BUILD_BUG_ON(LNET_PROC_HASH_BITS < LNET_PEER_HASH_BITS);
- LASSERT(!write);
-
- if (!*lenp)
- return 0;
-
- if (cpt >= LNET_CPT_NUMBER) {
- *lenp = 0;
- return 0;
- }
-
- tmpstr = kmalloc(tmpsiz, GFP_KERNEL);
- if (!tmpstr)
- return -ENOMEM;
-
- s = tmpstr; /* points to current position in tmpstr[] */
-
- if (!*ppos) {
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-24s %4s %5s %5s %5s %5s %5s %5s %5s %s\n",
- "nid", "refs", "state", "last", "max",
- "rtr", "min", "tx", "min", "queue");
- LASSERT(tmpstr + tmpsiz - s > 0);
-
- hoff++;
- } else {
- struct lnet_peer *peer;
- struct list_head *p;
- int skip;
- again:
- p = NULL;
- peer = NULL;
- skip = hoff - 1;
-
- lnet_net_lock(cpt);
- ptable = the_lnet.ln_peer_tables[cpt];
- if (hoff == 1)
- ver = LNET_PROC_VERSION(ptable->pt_version);
-
- if (ver != LNET_PROC_VERSION(ptable->pt_version)) {
- lnet_net_unlock(cpt);
- kfree(tmpstr);
- return -ESTALE;
- }
-
- while (hash < LNET_PEER_HASH_SIZE) {
- if (!p)
- p = ptable->pt_hash[hash].next;
-
- while (p != &ptable->pt_hash[hash]) {
- struct lnet_peer *lp;
-
- lp = list_entry(p, struct lnet_peer,
- lp_hashlist);
- if (!skip) {
- peer = lp;
-
- /*
- * minor optimization: start from idx+1
- * on next iteration if we've just
- * drained lp_hashlist
- */
- if (lp->lp_hashlist.next ==
- &ptable->pt_hash[hash]) {
- hoff = 1;
- hash++;
- } else {
- hoff++;
- }
-
- break;
- }
-
- skip--;
- p = lp->lp_hashlist.next;
- }
-
- if (peer)
- break;
-
- p = NULL;
- hoff = 1;
- hash++;
- }
-
- if (peer) {
- lnet_nid_t nid = peer->lp_nid;
- int nrefs = peer->lp_refcount;
- int lastalive = -1;
- char *aliveness = "NA";
- int maxcr = peer->lp_ni->ni_peertxcredits;
- int txcr = peer->lp_txcredits;
- int mintxcr = peer->lp_mintxcredits;
- int rtrcr = peer->lp_rtrcredits;
- int minrtrcr = peer->lp_minrtrcredits;
- int txqnob = peer->lp_txqnob;
-
- if (lnet_isrouter(peer) ||
- lnet_peer_aliveness_enabled(peer))
- aliveness = peer->lp_alive ? "up" : "down";
-
- if (lnet_peer_aliveness_enabled(peer)) {
- unsigned long now = jiffies;
- long delta;
-
- delta = now - peer->lp_last_alive;
- lastalive = (delta) / HZ;
-
- /* No need to mess up peers contents with
- * arbitrarily long integers - it suffices to
- * know that lastalive is more than 10000s old
- */
- if (lastalive >= 10000)
- lastalive = 9999;
- }
-
- lnet_net_unlock(cpt);
-
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-24s %4d %5s %5d %5d %5d %5d %5d %5d %d\n",
- libcfs_nid2str(nid), nrefs, aliveness,
- lastalive, maxcr, rtrcr, minrtrcr, txcr,
- mintxcr, txqnob);
- LASSERT(tmpstr + tmpsiz - s > 0);
-
- } else { /* peer is NULL */
- lnet_net_unlock(cpt);
- }
-
- if (hash == LNET_PEER_HASH_SIZE) {
- cpt++;
- hash = 0;
- hoff = 1;
- if (!peer && cpt < LNET_CPT_NUMBER)
- goto again;
- }
- }
-
- len = s - tmpstr; /* how many bytes was written */
-
- if (len > *lenp) { /* linux-supplied buffer is too small */
- rc = -EINVAL;
- } else if (len > 0) { /* wrote something */
- if (copy_to_user(buffer, tmpstr, len))
- rc = -EFAULT;
- else
- *ppos = LNET_PROC_POS_MAKE(cpt, ver, hash, hoff);
- }
-
- kfree(tmpstr);
-
- if (!rc)
- *lenp = len;
-
- return rc;
-}
-
-static int __proc_lnet_buffers(void *data, int write,
- loff_t pos, void __user *buffer, int nob)
-{
- char *s;
- char *tmpstr;
- int tmpsiz;
- int idx;
- int len;
- int rc;
- int i;
-
- LASSERT(!write);
-
- /* (4 %d) * 4 * LNET_CPT_NUMBER */
- tmpsiz = 64 * (LNET_NRBPOOLS + 1) * LNET_CPT_NUMBER;
- tmpstr = kvmalloc(tmpsiz, GFP_KERNEL);
- if (!tmpstr)
- return -ENOMEM;
-
- s = tmpstr; /* points to current position in tmpstr[] */
-
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%5s %5s %7s %7s\n",
- "pages", "count", "credits", "min");
- LASSERT(tmpstr + tmpsiz - s > 0);
-
- if (!the_lnet.ln_rtrpools)
- goto out; /* I'm not a router */
-
- for (idx = 0; idx < LNET_NRBPOOLS; idx++) {
- struct lnet_rtrbufpool *rbp;
-
- lnet_net_lock(LNET_LOCK_EX);
- cfs_percpt_for_each(rbp, i, the_lnet.ln_rtrpools) {
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%5d %5d %7d %7d\n",
- rbp[idx].rbp_npages,
- rbp[idx].rbp_nbuffers,
- rbp[idx].rbp_credits,
- rbp[idx].rbp_mincredits);
- LASSERT(tmpstr + tmpsiz - s > 0);
- }
- lnet_net_unlock(LNET_LOCK_EX);
- }
-
- out:
- len = s - tmpstr;
-
- if (pos >= min_t(int, len, strlen(tmpstr)))
- rc = 0;
- else
- rc = cfs_trace_copyout_string(buffer, nob,
- tmpstr + pos, NULL);
-
- kvfree(tmpstr);
- return rc;
-}
-
-static int proc_lnet_buffers(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
- __proc_lnet_buffers);
-}
-
-static int proc_lnet_nis(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- int tmpsiz = 128 * LNET_CPT_NUMBER;
- int rc = 0;
- char *tmpstr;
- char *s;
- int len;
-
- LASSERT(!write);
-
- if (!*lenp)
- return 0;
-
- tmpstr = kvmalloc(tmpsiz, GFP_KERNEL);
- if (!tmpstr)
- return -ENOMEM;
-
- s = tmpstr; /* points to current position in tmpstr[] */
-
- if (!*ppos) {
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-24s %6s %5s %4s %4s %4s %5s %5s %5s\n",
- "nid", "status", "alive", "refs", "peer",
- "rtr", "max", "tx", "min");
- LASSERT(tmpstr + tmpsiz - s > 0);
- } else {
- struct list_head *n;
- struct lnet_ni *ni = NULL;
- int skip = *ppos - 1;
-
- lnet_net_lock(0);
-
- n = the_lnet.ln_nis.next;
-
- while (n != &the_lnet.ln_nis) {
- struct lnet_ni *a_ni;
-
- a_ni = list_entry(n, struct lnet_ni, ni_list);
- if (!skip) {
- ni = a_ni;
- break;
- }
-
- skip--;
- n = n->next;
- }
-
- if (ni) {
- struct lnet_tx_queue *tq;
- char *stat;
- time64_t now = ktime_get_real_seconds();
- int last_alive = -1;
- int i;
- int j;
-
- if (the_lnet.ln_routing)
- last_alive = now - ni->ni_last_alive;
-
- /* @lo forever alive */
- if (ni->ni_lnd->lnd_type == LOLND)
- last_alive = 0;
-
- lnet_ni_lock(ni);
- LASSERT(ni->ni_status);
- stat = (ni->ni_status->ns_status ==
- LNET_NI_STATUS_UP) ? "up" : "down";
- lnet_ni_unlock(ni);
-
- /*
- * we actually output credits information for
- * TX queue of each partition
- */
- cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
- for (j = 0; ni->ni_cpts &&
- j < ni->ni_ncpts; j++) {
- if (i == ni->ni_cpts[j])
- break;
- }
-
- if (j == ni->ni_ncpts)
- continue;
-
- if (i)
- lnet_net_lock(i);
-
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-24s %6s %5d %4d %4d %4d %5d %5d %5d\n",
- libcfs_nid2str(ni->ni_nid), stat,
- last_alive, *ni->ni_refs[i],
- ni->ni_peertxcredits,
- ni->ni_peerrtrcredits,
- tq->tq_credits_max,
- tq->tq_credits,
- tq->tq_credits_min);
- if (i)
- lnet_net_unlock(i);
- }
- LASSERT(tmpstr + tmpsiz - s > 0);
- }
-
- lnet_net_unlock(0);
- }
-
- len = s - tmpstr; /* how many bytes was written */
-
- if (len > *lenp) { /* linux-supplied buffer is too small */
- rc = -EINVAL;
- } else if (len > 0) { /* wrote something */
- if (copy_to_user(buffer, tmpstr, len))
- rc = -EFAULT;
- else
- *ppos += 1;
- }
-
- kvfree(tmpstr);
-
- if (!rc)
- *lenp = len;
-
- return rc;
-}
-
-struct lnet_portal_rotors {
- int pr_value;
- const char *pr_name;
- const char *pr_desc;
-};
-
-static struct lnet_portal_rotors portal_rotors[] = {
- {
- .pr_value = LNET_PTL_ROTOR_OFF,
- .pr_name = "OFF",
- .pr_desc = "Turn off message rotor for wildcard portals"
- },
- {
- .pr_value = LNET_PTL_ROTOR_ON,
- .pr_name = "ON",
- .pr_desc = "round-robin dispatch all PUT messages for wildcard portals"
- },
- {
- .pr_value = LNET_PTL_ROTOR_RR_RT,
- .pr_name = "RR_RT",
- .pr_desc = "round-robin dispatch routed PUT message for wildcard portals"
- },
- {
- .pr_value = LNET_PTL_ROTOR_HASH_RT,
- .pr_name = "HASH_RT",
- .pr_desc = "dispatch routed PUT message by hashing source NID for wildcard portals"
- },
- {
- .pr_value = -1,
- .pr_name = NULL,
- .pr_desc = NULL
- },
-};
-
-static int __proc_lnet_portal_rotor(void *data, int write,
- loff_t pos, void __user *buffer, int nob)
-{
- const int buf_len = 128;
- char *buf;
- char *tmp;
- int rc;
- int i;
-
- buf = kmalloc(buf_len, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- if (!write) {
- lnet_res_lock(0);
-
- for (i = 0; portal_rotors[i].pr_value >= 0; i++) {
- if (portal_rotors[i].pr_value == portal_rotor)
- break;
- }
-
- LASSERT(portal_rotors[i].pr_value == portal_rotor);
- lnet_res_unlock(0);
-
- rc = snprintf(buf, buf_len,
- "{\n\tportals: all\n"
- "\trotor: %s\n\tdescription: %s\n}",
- portal_rotors[i].pr_name,
- portal_rotors[i].pr_desc);
-
- if (pos >= min_t(int, rc, buf_len)) {
- rc = 0;
- } else {
- rc = cfs_trace_copyout_string(buffer, nob,
- buf + pos, "\n");
- }
- goto out;
- }
-
- rc = cfs_trace_copyin_string(buf, buf_len, buffer, nob);
- if (rc < 0)
- goto out;
-
- tmp = strim(buf);
-
- rc = -EINVAL;
- lnet_res_lock(0);
- for (i = 0; portal_rotors[i].pr_name; i++) {
- if (!strncasecmp(portal_rotors[i].pr_name, tmp,
- strlen(portal_rotors[i].pr_name))) {
- portal_rotor = portal_rotors[i].pr_value;
- rc = 0;
- break;
- }
- }
- lnet_res_unlock(0);
-out:
- kfree(buf);
- return rc;
-}
-
-static int proc_lnet_portal_rotor(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
-{
- return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
- __proc_lnet_portal_rotor);
-}
-
-static struct ctl_table lnet_table[] = {
- /*
- * NB No .strategy entries have been provided since sysctl(8) prefers
- * to go via /proc for portability.
- */
- {
- .procname = "stats",
- .mode = 0644,
- .proc_handler = &proc_lnet_stats,
- },
- {
- .procname = "routes",
- .mode = 0444,
- .proc_handler = &proc_lnet_routes,
- },
- {
- .procname = "routers",
- .mode = 0444,
- .proc_handler = &proc_lnet_routers,
- },
- {
- .procname = "peers",
- .mode = 0444,
- .proc_handler = &proc_lnet_peers,
- },
- {
- .procname = "buffers",
- .mode = 0444,
- .proc_handler = &proc_lnet_buffers,
- },
- {
- .procname = "nis",
- .mode = 0444,
- .proc_handler = &proc_lnet_nis,
- },
- {
- .procname = "portal_rotor",
- .mode = 0644,
- .proc_handler = &proc_lnet_portal_rotor,
- },
- {
- }
-};
-
-void lnet_router_debugfs_init(void)
-{
- lustre_insert_debugfs(lnet_table);
-}
-
-void lnet_router_debugfs_fini(void)
-{
-}
diff --git a/drivers/staging/lustre/lnet/selftest/Makefile b/drivers/staging/lustre/lnet/selftest/Makefile
deleted file mode 100644
index 3ccc8966b566..000000000000
--- a/drivers/staging/lustre/lnet/selftest/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LNET_SELFTEST) := lnet_selftest.o
-
-lnet_selftest-y := console.o conrpc.o conctl.o framework.o timer.o rpc.o \
- module.o ping_test.o brw_test.o
diff --git a/drivers/staging/lustre/lnet/selftest/brw_test.c b/drivers/staging/lustre/lnet/selftest/brw_test.c
deleted file mode 100644
index f1ee219bc8f3..000000000000
--- a/drivers/staging/lustre/lnet/selftest/brw_test.c
+++ /dev/null
@@ -1,526 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/brw_test.c
- *
- * Author: Isaac Huang <isaac@clusterfs.com>
- */
-
-#include "selftest.h"
-
-static int brw_srv_workitems = SFW_TEST_WI_MAX;
-module_param(brw_srv_workitems, int, 0644);
-MODULE_PARM_DESC(brw_srv_workitems, "# BRW server workitems");
-
-static int brw_inject_errors;
-module_param(brw_inject_errors, int, 0644);
-MODULE_PARM_DESC(brw_inject_errors, "# data errors to inject randomly, zero by default");
-
-#define BRW_POISON 0xbeefbeefbeefbeefULL
-#define BRW_MAGIC 0xeeb0eeb1eeb2eeb3ULL
-#define BRW_MSIZE sizeof(u64)
-
-static void
-brw_client_fini(struct sfw_test_instance *tsi)
-{
- struct srpc_bulk *bulk;
- struct sfw_test_unit *tsu;
-
- LASSERT(tsi->tsi_is_client);
-
- list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
- bulk = tsu->tsu_private;
- if (!bulk)
- continue;
-
- srpc_free_bulk(bulk);
- tsu->tsu_private = NULL;
- }
-}
-
-static int
-brw_client_init(struct sfw_test_instance *tsi)
-{
- struct sfw_session *sn = tsi->tsi_batch->bat_session;
- int flags;
- int off;
- int npg;
- int len;
- int opc;
- struct srpc_bulk *bulk;
- struct sfw_test_unit *tsu;
-
- LASSERT(sn);
- LASSERT(tsi->tsi_is_client);
-
- if (!(sn->sn_features & LST_FEAT_BULK_LEN)) {
- struct test_bulk_req *breq = &tsi->tsi_u.bulk_v0;
-
- opc = breq->blk_opc;
- flags = breq->blk_flags;
- npg = breq->blk_npg;
- /*
- * NB: this is not going to work for variable page size,
- * but we have to keep it for compatibility
- */
- len = npg * PAGE_SIZE;
- off = 0;
- } else {
- struct test_bulk_req_v1 *breq = &tsi->tsi_u.bulk_v1;
-
- /*
- * I should never get this step if it's unknown feature
- * because make_session will reject unknown feature
- */
- LASSERT(!(sn->sn_features & ~LST_FEATS_MASK));
-
- opc = breq->blk_opc;
- flags = breq->blk_flags;
- len = breq->blk_len;
- off = breq->blk_offset & ~PAGE_MASK;
- npg = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- }
-
- if (off % BRW_MSIZE)
- return -EINVAL;
-
- if (npg > LNET_MAX_IOV || npg <= 0)
- return -EINVAL;
-
- if (opc != LST_BRW_READ && opc != LST_BRW_WRITE)
- return -EINVAL;
-
- if (flags != LST_BRW_CHECK_NONE &&
- flags != LST_BRW_CHECK_FULL && flags != LST_BRW_CHECK_SIMPLE)
- return -EINVAL;
-
- list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
- bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid),
- off, npg, len, opc == LST_BRW_READ);
- if (!bulk) {
- brw_client_fini(tsi);
- return -ENOMEM;
- }
-
- tsu->tsu_private = bulk;
- }
-
- return 0;
-}
-
-static int brw_inject_one_error(void)
-{
- struct timespec64 ts;
-
- if (brw_inject_errors <= 0)
- return 0;
-
- ktime_get_ts64(&ts);
-
- if (!((ts.tv_nsec / NSEC_PER_USEC) & 1))
- return 0;
-
- return brw_inject_errors--;
-}
-
-static void
-brw_fill_page(struct page *pg, int off, int len, int pattern, __u64 magic)
-{
- char *addr = page_address(pg) + off;
- int i;
-
- LASSERT(addr);
- LASSERT(!(off % BRW_MSIZE) && !(len % BRW_MSIZE));
-
- if (pattern == LST_BRW_CHECK_NONE)
- return;
-
- if (magic == BRW_MAGIC)
- magic += brw_inject_one_error();
-
- if (pattern == LST_BRW_CHECK_SIMPLE) {
- memcpy(addr, &magic, BRW_MSIZE);
- if (len > BRW_MSIZE) {
- addr += PAGE_SIZE - BRW_MSIZE;
- memcpy(addr, &magic, BRW_MSIZE);
- }
- return;
- }
-
- if (pattern == LST_BRW_CHECK_FULL) {
- for (i = 0; i < len; i += BRW_MSIZE)
- memcpy(addr + i, &magic, BRW_MSIZE);
- return;
- }
-
- LBUG();
-}
-
-static int
-brw_check_page(struct page *pg, int off, int len, int pattern, __u64 magic)
-{
- char *addr = page_address(pg) + off;
- __u64 data = 0; /* make compiler happy */
- int i;
-
- LASSERT(addr);
- LASSERT(!(off % BRW_MSIZE) && !(len % BRW_MSIZE));
-
- if (pattern == LST_BRW_CHECK_NONE)
- return 0;
-
- if (pattern == LST_BRW_CHECK_SIMPLE) {
- data = *((__u64 *)addr);
- if (data != magic)
- goto bad_data;
-
- if (len > BRW_MSIZE) {
- addr += PAGE_SIZE - BRW_MSIZE;
- data = *((__u64 *)addr);
- if (data != magic)
- goto bad_data;
- }
- return 0;
- }
-
- if (pattern == LST_BRW_CHECK_FULL) {
- for (i = 0; i < len; i += BRW_MSIZE) {
- data = *(u64 *)(addr + i);
- if (data != magic)
- goto bad_data;
- }
- return 0;
- }
-
- LBUG();
-
-bad_data:
- CERROR("Bad data in page %p: %#llx, %#llx expected\n",
- pg, data, magic);
- return 1;
-}
-
-static void
-brw_fill_bulk(struct srpc_bulk *bk, int pattern, __u64 magic)
-{
- int i;
- struct page *pg;
-
- for (i = 0; i < bk->bk_niov; i++) {
- int off, len;
-
- pg = bk->bk_iovs[i].bv_page;
- off = bk->bk_iovs[i].bv_offset;
- len = bk->bk_iovs[i].bv_len;
- brw_fill_page(pg, off, len, pattern, magic);
- }
-}
-
-static int
-brw_check_bulk(struct srpc_bulk *bk, int pattern, __u64 magic)
-{
- int i;
- struct page *pg;
-
- for (i = 0; i < bk->bk_niov; i++) {
- int off, len;
-
- pg = bk->bk_iovs[i].bv_page;
- off = bk->bk_iovs[i].bv_offset;
- len = bk->bk_iovs[i].bv_len;
- if (brw_check_page(pg, off, len, pattern, magic)) {
- CERROR("Bulk page %p (%d/%d) is corrupted!\n",
- pg, i, bk->bk_niov);
- return 1;
- }
- }
-
- return 0;
-}
-
-static int
-brw_client_prep_rpc(struct sfw_test_unit *tsu, struct lnet_process_id dest,
- struct srpc_client_rpc **rpcpp)
-{
- struct srpc_bulk *bulk = tsu->tsu_private;
- struct sfw_test_instance *tsi = tsu->tsu_instance;
- struct sfw_session *sn = tsi->tsi_batch->bat_session;
- struct srpc_client_rpc *rpc;
- struct srpc_brw_reqst *req;
- int flags;
- int npg;
- int len;
- int opc;
- int rc;
-
- LASSERT(sn);
- LASSERT(bulk);
-
- if (!(sn->sn_features & LST_FEAT_BULK_LEN)) {
- struct test_bulk_req *breq = &tsi->tsi_u.bulk_v0;
-
- opc = breq->blk_opc;
- flags = breq->blk_flags;
- npg = breq->blk_npg;
- len = npg * PAGE_SIZE;
- } else {
- struct test_bulk_req_v1 *breq = &tsi->tsi_u.bulk_v1;
- int off;
-
- /*
- * I should never get this step if it's unknown feature
- * because make_session will reject unknown feature
- */
- LASSERT(!(sn->sn_features & ~LST_FEATS_MASK));
-
- opc = breq->blk_opc;
- flags = breq->blk_flags;
- len = breq->blk_len;
- off = breq->blk_offset;
- npg = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- }
-
- rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, npg, len, &rpc);
- if (rc)
- return rc;
-
- memcpy(&rpc->crpc_bulk, bulk, offsetof(struct srpc_bulk, bk_iovs[npg]));
- if (opc == LST_BRW_WRITE)
- brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_MAGIC);
- else
- brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_POISON);
-
- req = &rpc->crpc_reqstmsg.msg_body.brw_reqst;
- req->brw_flags = flags;
- req->brw_rw = opc;
- req->brw_len = len;
-
- *rpcpp = rpc;
- return 0;
-}
-
-static void
-brw_client_done_rpc(struct sfw_test_unit *tsu, struct srpc_client_rpc *rpc)
-{
- __u64 magic = BRW_MAGIC;
- struct sfw_test_instance *tsi = tsu->tsu_instance;
- struct sfw_session *sn = tsi->tsi_batch->bat_session;
- struct srpc_msg *msg = &rpc->crpc_replymsg;
- struct srpc_brw_reply *reply = &msg->msg_body.brw_reply;
- struct srpc_brw_reqst *reqst = &rpc->crpc_reqstmsg.msg_body.brw_reqst;
-
- LASSERT(sn);
-
- if (rpc->crpc_status) {
- CERROR("BRW RPC to %s failed with %d\n",
- libcfs_id2str(rpc->crpc_dest), rpc->crpc_status);
- if (!tsi->tsi_stopping) /* rpc could have been aborted */
- atomic_inc(&sn->sn_brw_errors);
- return;
- }
-
- if (msg->msg_magic != SRPC_MSG_MAGIC) {
- __swab64s(&magic);
- __swab32s(&reply->brw_status);
- }
-
- CDEBUG(reply->brw_status ? D_WARNING : D_NET,
- "BRW RPC to %s finished with brw_status: %d\n",
- libcfs_id2str(rpc->crpc_dest), reply->brw_status);
-
- if (reply->brw_status) {
- atomic_inc(&sn->sn_brw_errors);
- rpc->crpc_status = -(int)reply->brw_status;
- return;
- }
-
- if (reqst->brw_rw == LST_BRW_WRITE)
- return;
-
- if (brw_check_bulk(&rpc->crpc_bulk, reqst->brw_flags, magic)) {
- CERROR("Bulk data from %s is corrupted!\n",
- libcfs_id2str(rpc->crpc_dest));
- atomic_inc(&sn->sn_brw_errors);
- rpc->crpc_status = -EBADMSG;
- }
-}
-
-static void
-brw_server_rpc_done(struct srpc_server_rpc *rpc)
-{
- struct srpc_bulk *blk = rpc->srpc_bulk;
-
- if (!blk)
- return;
-
- if (rpc->srpc_status)
- CERROR("Bulk transfer %s %s has failed: %d\n",
- blk->bk_sink ? "from" : "to",
- libcfs_id2str(rpc->srpc_peer), rpc->srpc_status);
- else
- CDEBUG(D_NET, "Transferred %d pages bulk data %s %s\n",
- blk->bk_niov, blk->bk_sink ? "from" : "to",
- libcfs_id2str(rpc->srpc_peer));
-
- sfw_free_pages(rpc);
-}
-
-static int
-brw_bulk_ready(struct srpc_server_rpc *rpc, int status)
-{
- __u64 magic = BRW_MAGIC;
- struct srpc_brw_reply *reply = &rpc->srpc_replymsg.msg_body.brw_reply;
- struct srpc_brw_reqst *reqst;
- struct srpc_msg *reqstmsg;
-
- LASSERT(rpc->srpc_bulk);
- LASSERT(rpc->srpc_reqstbuf);
-
- reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
- reqst = &reqstmsg->msg_body.brw_reqst;
-
- if (status) {
- CERROR("BRW bulk %s failed for RPC from %s: %d\n",
- reqst->brw_rw == LST_BRW_READ ? "READ" : "WRITE",
- libcfs_id2str(rpc->srpc_peer), status);
- return -EIO;
- }
-
- if (reqst->brw_rw == LST_BRW_READ)
- return 0;
-
- if (reqstmsg->msg_magic != SRPC_MSG_MAGIC)
- __swab64s(&magic);
-
- if (brw_check_bulk(rpc->srpc_bulk, reqst->brw_flags, magic)) {
- CERROR("Bulk data from %s is corrupted!\n",
- libcfs_id2str(rpc->srpc_peer));
- reply->brw_status = EBADMSG;
- }
-
- return 0;
-}
-
-static int
-brw_server_handle(struct srpc_server_rpc *rpc)
-{
- struct srpc_service *sv = rpc->srpc_scd->scd_svc;
- struct srpc_msg *replymsg = &rpc->srpc_replymsg;
- struct srpc_msg *reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
- struct srpc_brw_reply *reply = &replymsg->msg_body.brw_reply;
- struct srpc_brw_reqst *reqst = &reqstmsg->msg_body.brw_reqst;
- int npg;
- int rc;
-
- LASSERT(sv->sv_id == SRPC_SERVICE_BRW);
-
- if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) {
- LASSERT(reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC));
-
- __swab32s(&reqst->brw_rw);
- __swab32s(&reqst->brw_len);
- __swab32s(&reqst->brw_flags);
- __swab64s(&reqst->brw_rpyid);
- __swab64s(&reqst->brw_bulkid);
- }
- LASSERT(reqstmsg->msg_type == (__u32)srpc_service2request(sv->sv_id));
-
- reply->brw_status = 0;
- rpc->srpc_done = brw_server_rpc_done;
-
- if ((reqst->brw_rw != LST_BRW_READ && reqst->brw_rw != LST_BRW_WRITE) ||
- (reqst->brw_flags != LST_BRW_CHECK_NONE &&
- reqst->brw_flags != LST_BRW_CHECK_FULL &&
- reqst->brw_flags != LST_BRW_CHECK_SIMPLE)) {
- reply->brw_status = EINVAL;
- return 0;
- }
-
- if (reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) {
- replymsg->msg_ses_feats = LST_FEATS_MASK;
- reply->brw_status = EPROTO;
- return 0;
- }
-
- if (!(reqstmsg->msg_ses_feats & LST_FEAT_BULK_LEN)) {
- /* compat with old version */
- if (reqst->brw_len & ~PAGE_MASK) {
- reply->brw_status = EINVAL;
- return 0;
- }
- npg = reqst->brw_len >> PAGE_SHIFT;
-
- } else {
- npg = (reqst->brw_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- }
-
- replymsg->msg_ses_feats = reqstmsg->msg_ses_feats;
-
- if (!reqst->brw_len || npg > LNET_MAX_IOV) {
- reply->brw_status = EINVAL;
- return 0;
- }
-
- rc = sfw_alloc_pages(rpc, rpc->srpc_scd->scd_cpt, npg,
- reqst->brw_len,
- reqst->brw_rw == LST_BRW_WRITE);
- if (rc)
- return rc;
-
- if (reqst->brw_rw == LST_BRW_READ)
- brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_MAGIC);
- else
- brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_POISON);
-
- return 0;
-}
-
-struct sfw_test_client_ops brw_test_client;
-
-void brw_init_test_client(void)
-{
- brw_test_client.tso_init = brw_client_init;
- brw_test_client.tso_fini = brw_client_fini;
- brw_test_client.tso_prep_rpc = brw_client_prep_rpc;
- brw_test_client.tso_done_rpc = brw_client_done_rpc;
-};
-
-struct srpc_service brw_test_service;
-
-void brw_init_test_service(void)
-{
- brw_test_service.sv_id = SRPC_SERVICE_BRW;
- brw_test_service.sv_name = "brw_test";
- brw_test_service.sv_handler = brw_server_handle;
- brw_test_service.sv_bulk_ready = brw_bulk_ready;
- brw_test_service.sv_wi_total = brw_srv_workitems;
-}
diff --git a/drivers/staging/lustre/lnet/selftest/conctl.c b/drivers/staging/lustre/lnet/selftest/conctl.c
deleted file mode 100644
index 906d82d90c0c..000000000000
--- a/drivers/staging/lustre/lnet/selftest/conctl.c
+++ /dev/null
@@ -1,801 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/conctl.c
- *
- * IOC handle in kernel
- *
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- */
-
-#include <linux/lnet/lib-lnet.h>
-#include <uapi/linux/lnet/lnetst.h>
-#include "console.h"
-
-static int
-lst_session_new_ioctl(struct lstio_session_new_args *args)
-{
- char name[LST_NAME_SIZE + 1];
- int rc;
-
- if (!args->lstio_ses_idp || /* address for output sid */
- !args->lstio_ses_key || /* no key is specified */
- !args->lstio_ses_namep || /* session name */
- args->lstio_ses_nmlen <= 0 ||
- args->lstio_ses_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (copy_from_user(name, args->lstio_ses_namep,
- args->lstio_ses_nmlen)) {
- return -EFAULT;
- }
-
- name[args->lstio_ses_nmlen] = 0;
-
- rc = lstcon_session_new(name,
- args->lstio_ses_key,
- args->lstio_ses_feats,
- args->lstio_ses_timeout,
- args->lstio_ses_force,
- args->lstio_ses_idp);
-
- return rc;
-}
-
-static int
-lst_session_end_ioctl(struct lstio_session_end_args *args)
-{
- if (args->lstio_ses_key != console_session.ses_key)
- return -EACCES;
-
- return lstcon_session_end();
-}
-
-static int
-lst_session_info_ioctl(struct lstio_session_info_args *args)
-{
- /* no checking of key */
-
- if (!args->lstio_ses_idp || /* address for output sid */
- !args->lstio_ses_keyp || /* address for output key */
- !args->lstio_ses_featp || /* address for output features */
- !args->lstio_ses_ndinfo || /* address for output ndinfo */
- !args->lstio_ses_namep || /* address for output name */
- args->lstio_ses_nmlen <= 0 ||
- args->lstio_ses_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- return lstcon_session_info(args->lstio_ses_idp,
- args->lstio_ses_keyp,
- args->lstio_ses_featp,
- args->lstio_ses_ndinfo,
- args->lstio_ses_namep,
- args->lstio_ses_nmlen);
-}
-
-static int
-lst_debug_ioctl(struct lstio_debug_args *args)
-{
- char name[LST_NAME_SIZE + 1];
- int client = 1;
- int rc;
-
- if (args->lstio_dbg_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_dbg_resultp)
- return -EINVAL;
-
- if (args->lstio_dbg_namep && /* name of batch/group */
- (args->lstio_dbg_nmlen <= 0 ||
- args->lstio_dbg_nmlen > LST_NAME_SIZE))
- return -EINVAL;
-
- if (args->lstio_dbg_namep) {
-
- if (copy_from_user(name, args->lstio_dbg_namep,
- args->lstio_dbg_nmlen))
- return -EFAULT;
-
- name[args->lstio_dbg_nmlen] = 0;
- }
-
- rc = -EINVAL;
-
- switch (args->lstio_dbg_type) {
- case LST_OPC_SESSION:
- rc = lstcon_session_debug(args->lstio_dbg_timeout,
- args->lstio_dbg_resultp);
- break;
-
- case LST_OPC_BATCHSRV:
- client = 0;
- /* fall through */
- case LST_OPC_BATCHCLI:
- if (!args->lstio_dbg_namep)
- goto out;
-
- rc = lstcon_batch_debug(args->lstio_dbg_timeout,
- name, client, args->lstio_dbg_resultp);
- break;
-
- case LST_OPC_GROUP:
- if (!args->lstio_dbg_namep)
- goto out;
-
- rc = lstcon_group_debug(args->lstio_dbg_timeout,
- name, args->lstio_dbg_resultp);
- break;
-
- case LST_OPC_NODES:
- if (args->lstio_dbg_count <= 0 ||
- !args->lstio_dbg_idsp)
- goto out;
-
- rc = lstcon_nodes_debug(args->lstio_dbg_timeout,
- args->lstio_dbg_count,
- args->lstio_dbg_idsp,
- args->lstio_dbg_resultp);
- break;
-
- default:
- break;
- }
-
-out:
- return rc;
-}
-
-static int
-lst_group_add_ioctl(struct lstio_group_add_args *args)
-{
- char name[LST_NAME_SIZE + 1];
- int rc;
-
- if (args->lstio_grp_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_grp_namep ||
- args->lstio_grp_nmlen <= 0 ||
- args->lstio_grp_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (copy_from_user(name, args->lstio_grp_namep,
- args->lstio_grp_nmlen))
- return -EFAULT;
-
- name[args->lstio_grp_nmlen] = 0;
-
- rc = lstcon_group_add(name);
-
- return rc;
-}
-
-static int
-lst_group_del_ioctl(struct lstio_group_del_args *args)
-{
- int rc;
- char name[LST_NAME_SIZE + 1];
-
- if (args->lstio_grp_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_grp_namep ||
- args->lstio_grp_nmlen <= 0 ||
- args->lstio_grp_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (copy_from_user(name, args->lstio_grp_namep,
- args->lstio_grp_nmlen))
- return -EFAULT;
-
- name[args->lstio_grp_nmlen] = 0;
-
- rc = lstcon_group_del(name);
-
- return rc;
-}
-
-static int
-lst_group_update_ioctl(struct lstio_group_update_args *args)
-{
- int rc;
- char name[LST_NAME_SIZE + 1];
-
- if (args->lstio_grp_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_grp_resultp ||
- !args->lstio_grp_namep ||
- args->lstio_grp_nmlen <= 0 ||
- args->lstio_grp_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (copy_from_user(name, args->lstio_grp_namep,
- args->lstio_grp_nmlen))
- return -EFAULT;
-
- name[args->lstio_grp_nmlen] = 0;
-
- switch (args->lstio_grp_opc) {
- case LST_GROUP_CLEAN:
- rc = lstcon_group_clean(name, args->lstio_grp_args);
- break;
-
- case LST_GROUP_REFRESH:
- rc = lstcon_group_refresh(name, args->lstio_grp_resultp);
- break;
-
- case LST_GROUP_RMND:
- if (args->lstio_grp_count <= 0 ||
- !args->lstio_grp_idsp) {
- rc = -EINVAL;
- break;
- }
- rc = lstcon_nodes_remove(name, args->lstio_grp_count,
- args->lstio_grp_idsp,
- args->lstio_grp_resultp);
- break;
-
- default:
- rc = -EINVAL;
- break;
- }
-
- return rc;
-}
-
-static int
-lst_nodes_add_ioctl(struct lstio_group_nodes_args *args)
-{
- unsigned int feats;
- int rc;
- char name[LST_NAME_SIZE + 1];
-
- if (args->lstio_grp_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_grp_idsp || /* array of ids */
- args->lstio_grp_count <= 0 ||
- !args->lstio_grp_resultp ||
- !args->lstio_grp_featp ||
- !args->lstio_grp_namep ||
- args->lstio_grp_nmlen <= 0 ||
- args->lstio_grp_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (copy_from_user(name, args->lstio_grp_namep,
- args->lstio_grp_nmlen))
- return -EFAULT;
-
- name[args->lstio_grp_nmlen] = 0;
-
- rc = lstcon_nodes_add(name, args->lstio_grp_count,
- args->lstio_grp_idsp, &feats,
- args->lstio_grp_resultp);
-
- if (!rc &&
- copy_to_user(args->lstio_grp_featp, &feats, sizeof(feats))) {
- return -EINVAL;
- }
-
- return rc;
-}
-
-static int
-lst_group_list_ioctl(struct lstio_group_list_args *args)
-{
- if (args->lstio_grp_key != console_session.ses_key)
- return -EACCES;
-
- if (args->lstio_grp_idx < 0 ||
- !args->lstio_grp_namep ||
- args->lstio_grp_nmlen <= 0 ||
- args->lstio_grp_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- return lstcon_group_list(args->lstio_grp_idx,
- args->lstio_grp_nmlen,
- args->lstio_grp_namep);
-}
-
-static int
-lst_group_info_ioctl(struct lstio_group_info_args *args)
-{
- char name[LST_NAME_SIZE + 1];
- int ndent;
- int index;
- int rc;
-
- if (args->lstio_grp_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_grp_namep ||
- args->lstio_grp_nmlen <= 0 ||
- args->lstio_grp_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (!args->lstio_grp_entp && /* output: group entry */
- !args->lstio_grp_dentsp) /* output: node entry */
- return -EINVAL;
-
- if (args->lstio_grp_dentsp) { /* have node entry */
- if (!args->lstio_grp_idxp || /* node index */
- !args->lstio_grp_ndentp) /* # of node entry */
- return -EINVAL;
-
- if (copy_from_user(&ndent, args->lstio_grp_ndentp,
- sizeof(ndent)) ||
- copy_from_user(&index, args->lstio_grp_idxp,
- sizeof(index)))
- return -EFAULT;
-
- if (ndent <= 0 || index < 0)
- return -EINVAL;
- }
-
- if (copy_from_user(name, args->lstio_grp_namep,
- args->lstio_grp_nmlen))
- return -EFAULT;
-
- name[args->lstio_grp_nmlen] = 0;
-
- rc = lstcon_group_info(name, args->lstio_grp_entp,
- &index, &ndent, args->lstio_grp_dentsp);
-
- if (rc)
- return rc;
-
- if (args->lstio_grp_dentsp &&
- (copy_to_user(args->lstio_grp_idxp, &index, sizeof(index)) ||
- copy_to_user(args->lstio_grp_ndentp, &ndent, sizeof(ndent))))
- return -EFAULT;
-
- return 0;
-}
-
-static int
-lst_batch_add_ioctl(struct lstio_batch_add_args *args)
-{
- int rc;
- char name[LST_NAME_SIZE + 1];
-
- if (args->lstio_bat_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_bat_namep ||
- args->lstio_bat_nmlen <= 0 ||
- args->lstio_bat_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (copy_from_user(name, args->lstio_bat_namep,
- args->lstio_bat_nmlen))
- return -EFAULT;
-
- name[args->lstio_bat_nmlen] = 0;
-
- rc = lstcon_batch_add(name);
-
- return rc;
-}
-
-static int
-lst_batch_run_ioctl(struct lstio_batch_run_args *args)
-{
- int rc;
- char name[LST_NAME_SIZE + 1];
-
- if (args->lstio_bat_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_bat_namep ||
- args->lstio_bat_nmlen <= 0 ||
- args->lstio_bat_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (copy_from_user(name, args->lstio_bat_namep,
- args->lstio_bat_nmlen))
- return -EFAULT;
-
- name[args->lstio_bat_nmlen] = 0;
-
- rc = lstcon_batch_run(name, args->lstio_bat_timeout,
- args->lstio_bat_resultp);
-
- return rc;
-}
-
-static int
-lst_batch_stop_ioctl(struct lstio_batch_stop_args *args)
-{
- int rc;
- char name[LST_NAME_SIZE + 1];
-
- if (args->lstio_bat_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_bat_resultp ||
- !args->lstio_bat_namep ||
- args->lstio_bat_nmlen <= 0 ||
- args->lstio_bat_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (copy_from_user(name, args->lstio_bat_namep,
- args->lstio_bat_nmlen))
- return -EFAULT;
-
- name[args->lstio_bat_nmlen] = 0;
-
- rc = lstcon_batch_stop(name, args->lstio_bat_force,
- args->lstio_bat_resultp);
-
- return rc;
-}
-
-static int
-lst_batch_query_ioctl(struct lstio_batch_query_args *args)
-{
- char name[LST_NAME_SIZE + 1];
- int rc;
-
- if (args->lstio_bat_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_bat_resultp ||
- !args->lstio_bat_namep ||
- args->lstio_bat_nmlen <= 0 ||
- args->lstio_bat_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (args->lstio_bat_testidx < 0)
- return -EINVAL;
-
- if (copy_from_user(name, args->lstio_bat_namep,
- args->lstio_bat_nmlen))
- return -EFAULT;
-
- name[args->lstio_bat_nmlen] = 0;
-
- rc = lstcon_test_batch_query(name,
- args->lstio_bat_testidx,
- args->lstio_bat_client,
- args->lstio_bat_timeout,
- args->lstio_bat_resultp);
-
- return rc;
-}
-
-static int
-lst_batch_list_ioctl(struct lstio_batch_list_args *args)
-{
- if (args->lstio_bat_key != console_session.ses_key)
- return -EACCES;
-
- if (args->lstio_bat_idx < 0 ||
- !args->lstio_bat_namep ||
- args->lstio_bat_nmlen <= 0 ||
- args->lstio_bat_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- return lstcon_batch_list(args->lstio_bat_idx,
- args->lstio_bat_nmlen,
- args->lstio_bat_namep);
-}
-
-static int
-lst_batch_info_ioctl(struct lstio_batch_info_args *args)
-{
- char name[LST_NAME_SIZE + 1];
- int rc;
- int index;
- int ndent;
-
- if (args->lstio_bat_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_bat_namep || /* batch name */
- args->lstio_bat_nmlen <= 0 ||
- args->lstio_bat_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (!args->lstio_bat_entp && /* output: batch entry */
- !args->lstio_bat_dentsp) /* output: node entry */
- return -EINVAL;
-
- if (args->lstio_bat_dentsp) { /* have node entry */
- if (!args->lstio_bat_idxp || /* node index */
- !args->lstio_bat_ndentp) /* # of node entry */
- return -EINVAL;
-
- if (copy_from_user(&index, args->lstio_bat_idxp,
- sizeof(index)) ||
- copy_from_user(&ndent, args->lstio_bat_ndentp,
- sizeof(ndent)))
- return -EFAULT;
-
- if (ndent <= 0 || index < 0)
- return -EINVAL;
- }
-
- if (copy_from_user(name, args->lstio_bat_namep,
- args->lstio_bat_nmlen))
- return -EFAULT;
-
- name[args->lstio_bat_nmlen] = 0;
-
- rc = lstcon_batch_info(name, args->lstio_bat_entp,
- args->lstio_bat_server, args->lstio_bat_testidx,
- &index, &ndent, args->lstio_bat_dentsp);
-
- if (rc)
- return rc;
-
- if (args->lstio_bat_dentsp &&
- (copy_to_user(args->lstio_bat_idxp, &index, sizeof(index)) ||
- copy_to_user(args->lstio_bat_ndentp, &ndent, sizeof(ndent))))
- rc = -EFAULT;
-
- return rc;
-}
-
-static int
-lst_stat_query_ioctl(struct lstio_stat_args *args)
-{
- int rc;
- char name[LST_NAME_SIZE + 1];
-
- /* TODO: not finished */
- if (args->lstio_sta_key != console_session.ses_key)
- return -EACCES;
-
- if (!args->lstio_sta_resultp)
- return -EINVAL;
-
- if (args->lstio_sta_idsp) {
- if (args->lstio_sta_count <= 0)
- return -EINVAL;
-
- rc = lstcon_nodes_stat(args->lstio_sta_count,
- args->lstio_sta_idsp,
- args->lstio_sta_timeout,
- args->lstio_sta_resultp);
- } else if (args->lstio_sta_namep) {
- if (args->lstio_sta_nmlen <= 0 ||
- args->lstio_sta_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- rc = copy_from_user(name, args->lstio_sta_namep,
- args->lstio_sta_nmlen);
- if (!rc)
- rc = lstcon_group_stat(name, args->lstio_sta_timeout,
- args->lstio_sta_resultp);
- else
- rc = -EFAULT;
- } else {
- rc = -EINVAL;
- }
-
- return rc;
-}
-
-static int lst_test_add_ioctl(struct lstio_test_args *args)
-{
- char batch_name[LST_NAME_SIZE + 1];
- char src_name[LST_NAME_SIZE + 1];
- char dst_name[LST_NAME_SIZE + 1];
- void *param = NULL;
- int ret = 0;
- int rc = -ENOMEM;
-
- if (!args->lstio_tes_resultp ||
- !args->lstio_tes_retp ||
- !args->lstio_tes_bat_name || /* no specified batch */
- args->lstio_tes_bat_nmlen <= 0 ||
- args->lstio_tes_bat_nmlen > LST_NAME_SIZE ||
- !args->lstio_tes_sgrp_name || /* no source group */
- args->lstio_tes_sgrp_nmlen <= 0 ||
- args->lstio_tes_sgrp_nmlen > LST_NAME_SIZE ||
- !args->lstio_tes_dgrp_name || /* no target group */
- args->lstio_tes_dgrp_nmlen <= 0 ||
- args->lstio_tes_dgrp_nmlen > LST_NAME_SIZE)
- return -EINVAL;
-
- if (!args->lstio_tes_loop || /* negative is infinite */
- args->lstio_tes_concur <= 0 ||
- args->lstio_tes_dist <= 0 ||
- args->lstio_tes_span <= 0)
- return -EINVAL;
-
- /* have parameter, check if parameter length is valid */
- if (args->lstio_tes_param &&
- (args->lstio_tes_param_len <= 0 ||
- args->lstio_tes_param_len >
- PAGE_SIZE - sizeof(struct lstcon_test)))
- return -EINVAL;
-
- /* Enforce zero parameter length if there's no parameter */
- if (!args->lstio_tes_param && args->lstio_tes_param_len)
- return -EINVAL;
-
- if (args->lstio_tes_param) {
- param = memdup_user(args->lstio_tes_param,
- args->lstio_tes_param_len);
- if (IS_ERR(param))
- return PTR_ERR(param);
- }
-
- rc = -EFAULT;
- if (copy_from_user(batch_name, args->lstio_tes_bat_name,
- args->lstio_tes_bat_nmlen) ||
- copy_from_user(src_name, args->lstio_tes_sgrp_name,
- args->lstio_tes_sgrp_nmlen) ||
- copy_from_user(dst_name, args->lstio_tes_dgrp_name,
- args->lstio_tes_dgrp_nmlen))
- goto out;
-
- rc = lstcon_test_add(batch_name, args->lstio_tes_type,
- args->lstio_tes_loop, args->lstio_tes_concur,
- args->lstio_tes_dist, args->lstio_tes_span,
- src_name, dst_name, param,
- args->lstio_tes_param_len,
- &ret, args->lstio_tes_resultp);
-
- if (!rc && ret)
- rc = (copy_to_user(args->lstio_tes_retp, &ret,
- sizeof(ret))) ? -EFAULT : 0;
-out:
- kfree(param);
-
- return rc;
-}
-
-int
-lstcon_ioctl_entry(struct notifier_block *nb,
- unsigned long cmd, void *vdata)
-{
- struct libcfs_ioctl_hdr *hdr = vdata;
- char *buf = NULL;
- struct libcfs_ioctl_data *data;
- int opc;
- int rc = -EINVAL;
-
- if (cmd != IOC_LIBCFS_LNETST)
- goto err;
-
- data = container_of(hdr, struct libcfs_ioctl_data, ioc_hdr);
-
- opc = data->ioc_u32[0];
-
- if (data->ioc_plen1 > PAGE_SIZE)
- goto err;
-
- buf = kmalloc(data->ioc_plen1, GFP_KERNEL);
- rc = -ENOMEM;
- if (!buf)
- goto err;
-
- /* copy in parameter */
- rc = -EFAULT;
- if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1))
- goto err;
-
- mutex_lock(&console_session.ses_mutex);
-
- console_session.ses_laststamp = ktime_get_real_seconds();
-
- if (console_session.ses_shutdown) {
- rc = -ESHUTDOWN;
- goto out;
- }
-
- if (console_session.ses_expired)
- lstcon_session_end();
-
- if (opc != LSTIO_SESSION_NEW &&
- console_session.ses_state == LST_SESSION_NONE) {
- CDEBUG(D_NET, "LST no active session\n");
- rc = -ESRCH;
- goto out;
- }
-
- memset(&console_session.ses_trans_stat, 0, sizeof(struct lstcon_trans_stat));
-
- switch (opc) {
- case LSTIO_SESSION_NEW:
- rc = lst_session_new_ioctl((struct lstio_session_new_args *)buf);
- break;
- case LSTIO_SESSION_END:
- rc = lst_session_end_ioctl((struct lstio_session_end_args *)buf);
- break;
- case LSTIO_SESSION_INFO:
- rc = lst_session_info_ioctl((struct lstio_session_info_args *)buf);
- break;
- case LSTIO_DEBUG:
- rc = lst_debug_ioctl((struct lstio_debug_args *)buf);
- break;
- case LSTIO_GROUP_ADD:
- rc = lst_group_add_ioctl((struct lstio_group_add_args *)buf);
- break;
- case LSTIO_GROUP_DEL:
- rc = lst_group_del_ioctl((struct lstio_group_del_args *)buf);
- break;
- case LSTIO_GROUP_UPDATE:
- rc = lst_group_update_ioctl((struct lstio_group_update_args *)buf);
- break;
- case LSTIO_NODES_ADD:
- rc = lst_nodes_add_ioctl((struct lstio_group_nodes_args *)buf);
- break;
- case LSTIO_GROUP_LIST:
- rc = lst_group_list_ioctl((struct lstio_group_list_args *)buf);
- break;
- case LSTIO_GROUP_INFO:
- rc = lst_group_info_ioctl((struct lstio_group_info_args *)buf);
- break;
- case LSTIO_BATCH_ADD:
- rc = lst_batch_add_ioctl((struct lstio_batch_add_args *)buf);
- break;
- case LSTIO_BATCH_START:
- rc = lst_batch_run_ioctl((struct lstio_batch_run_args *)buf);
- break;
- case LSTIO_BATCH_STOP:
- rc = lst_batch_stop_ioctl((struct lstio_batch_stop_args *)buf);
- break;
- case LSTIO_BATCH_QUERY:
- rc = lst_batch_query_ioctl((struct lstio_batch_query_args *)buf);
- break;
- case LSTIO_BATCH_LIST:
- rc = lst_batch_list_ioctl((struct lstio_batch_list_args *)buf);
- break;
- case LSTIO_BATCH_INFO:
- rc = lst_batch_info_ioctl((struct lstio_batch_info_args *)buf);
- break;
- case LSTIO_TEST_ADD:
- rc = lst_test_add_ioctl((struct lstio_test_args *)buf);
- break;
- case LSTIO_STAT_QUERY:
- rc = lst_stat_query_ioctl((struct lstio_stat_args *)buf);
- break;
- default:
- rc = -EINVAL;
- goto out;
- }
-
- if (copy_to_user(data->ioc_pbuf2, &console_session.ses_trans_stat,
- sizeof(struct lstcon_trans_stat)))
- rc = -EFAULT;
-out:
- mutex_unlock(&console_session.ses_mutex);
-err:
- kfree(buf);
-
- return notifier_from_ioctl_errno(rc);
-}
diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c
deleted file mode 100644
index 0dabade3d091..000000000000
--- a/drivers/staging/lustre/lnet/selftest/conrpc.c
+++ /dev/null
@@ -1,1396 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/conctl.c
- *
- * Console framework rpcs
- *
- * Author: Liang Zhen <liang@whamcloud.com>
- */
-
-#include <linux/lnet/lib-lnet.h>
-#include "timer.h"
-#include "conrpc.h"
-#include "console.h"
-
-void lstcon_rpc_stat_reply(struct lstcon_rpc_trans *, struct srpc_msg *,
- struct lstcon_node *, struct lstcon_trans_stat *);
-
-static void
-lstcon_rpc_done(struct srpc_client_rpc *rpc)
-{
- struct lstcon_rpc *crpc = (struct lstcon_rpc *)rpc->crpc_priv;
-
- LASSERT(crpc && rpc == crpc->crp_rpc);
- LASSERT(crpc->crp_posted && !crpc->crp_finished);
-
- spin_lock(&rpc->crpc_lock);
-
- if (!crpc->crp_trans) {
- /*
- * Orphan RPC is not in any transaction,
- * I'm just a poor body and nobody loves me
- */
- spin_unlock(&rpc->crpc_lock);
-
- /* release it */
- lstcon_rpc_put(crpc);
- return;
- }
-
- /* not an orphan RPC */
- crpc->crp_finished = 1;
-
- if (!crpc->crp_stamp) {
- /* not aborted */
- LASSERT(!crpc->crp_status);
-
- crpc->crp_stamp = jiffies;
- crpc->crp_status = rpc->crpc_status;
- }
-
- /* wakeup (transaction)thread if I'm the last RPC in the transaction */
- if (atomic_dec_and_test(&crpc->crp_trans->tas_remaining))
- wake_up(&crpc->crp_trans->tas_waitq);
-
- spin_unlock(&rpc->crpc_lock);
-}
-
-static int
-lstcon_rpc_init(struct lstcon_node *nd, int service, unsigned int feats,
- int bulk_npg, int bulk_len, int embedded,
- struct lstcon_rpc *crpc)
-{
- crpc->crp_rpc = sfw_create_rpc(nd->nd_id, service,
- feats, bulk_npg, bulk_len,
- lstcon_rpc_done, (void *)crpc);
- if (!crpc->crp_rpc)
- return -ENOMEM;
-
- crpc->crp_trans = NULL;
- crpc->crp_node = nd;
- crpc->crp_posted = 0;
- crpc->crp_finished = 0;
- crpc->crp_unpacked = 0;
- crpc->crp_status = 0;
- crpc->crp_stamp = 0;
- crpc->crp_embedded = embedded;
- INIT_LIST_HEAD(&crpc->crp_link);
-
- atomic_inc(&console_session.ses_rpc_counter);
-
- return 0;
-}
-
-static int
-lstcon_rpc_prep(struct lstcon_node *nd, int service, unsigned int feats,
- int bulk_npg, int bulk_len, struct lstcon_rpc **crpcpp)
-{
- struct lstcon_rpc *crpc = NULL;
- int rc;
-
- spin_lock(&console_session.ses_rpc_lock);
-
- crpc = list_first_entry_or_null(&console_session.ses_rpc_freelist,
- struct lstcon_rpc, crp_link);
- if (crpc)
- list_del_init(&crpc->crp_link);
-
- spin_unlock(&console_session.ses_rpc_lock);
-
- if (!crpc) {
- crpc = kzalloc(sizeof(*crpc), GFP_NOFS);
- if (!crpc)
- return -ENOMEM;
- }
-
- rc = lstcon_rpc_init(nd, service, feats, bulk_npg, bulk_len, 0, crpc);
- if (!rc) {
- *crpcpp = crpc;
- return 0;
- }
-
- kfree(crpc);
-
- return rc;
-}
-
-void
-lstcon_rpc_put(struct lstcon_rpc *crpc)
-{
- struct srpc_bulk *bulk = &crpc->crp_rpc->crpc_bulk;
- int i;
-
- LASSERT(list_empty(&crpc->crp_link));
-
- for (i = 0; i < bulk->bk_niov; i++) {
- if (!bulk->bk_iovs[i].bv_page)
- continue;
-
- __free_page(bulk->bk_iovs[i].bv_page);
- }
-
- srpc_client_rpc_decref(crpc->crp_rpc);
-
- if (crpc->crp_embedded) {
- /* embedded RPC, don't recycle it */
- memset(crpc, 0, sizeof(*crpc));
- crpc->crp_embedded = 1;
-
- } else {
- spin_lock(&console_session.ses_rpc_lock);
-
- list_add(&crpc->crp_link,
- &console_session.ses_rpc_freelist);
-
- spin_unlock(&console_session.ses_rpc_lock);
- }
-
- /* RPC is not alive now */
- atomic_dec(&console_session.ses_rpc_counter);
-}
-
-static void
-lstcon_rpc_post(struct lstcon_rpc *crpc)
-{
- struct lstcon_rpc_trans *trans = crpc->crp_trans;
-
- LASSERT(trans);
-
- atomic_inc(&trans->tas_remaining);
- crpc->crp_posted = 1;
-
- sfw_post_rpc(crpc->crp_rpc);
-}
-
-static char *
-lstcon_rpc_trans_name(int transop)
-{
- if (transop == LST_TRANS_SESNEW)
- return "SESNEW";
-
- if (transop == LST_TRANS_SESEND)
- return "SESEND";
-
- if (transop == LST_TRANS_SESQRY)
- return "SESQRY";
-
- if (transop == LST_TRANS_SESPING)
- return "SESPING";
-
- if (transop == LST_TRANS_TSBCLIADD)
- return "TSBCLIADD";
-
- if (transop == LST_TRANS_TSBSRVADD)
- return "TSBSRVADD";
-
- if (transop == LST_TRANS_TSBRUN)
- return "TSBRUN";
-
- if (transop == LST_TRANS_TSBSTOP)
- return "TSBSTOP";
-
- if (transop == LST_TRANS_TSBCLIQRY)
- return "TSBCLIQRY";
-
- if (transop == LST_TRANS_TSBSRVQRY)
- return "TSBSRVQRY";
-
- if (transop == LST_TRANS_STATQRY)
- return "STATQRY";
-
- return "Unknown";
-}
-
-int
-lstcon_rpc_trans_prep(struct list_head *translist, int transop,
- struct lstcon_rpc_trans **transpp)
-{
- struct lstcon_rpc_trans *trans;
-
- if (translist) {
- list_for_each_entry(trans, translist, tas_link) {
- /*
- * Can't enqueue two private transaction on
- * the same object
- */
- if ((trans->tas_opc & transop) == LST_TRANS_PRIVATE)
- return -EPERM;
- }
- }
-
- /* create a trans group */
- trans = kzalloc(sizeof(*trans), GFP_NOFS);
- if (!trans)
- return -ENOMEM;
-
- trans->tas_opc = transop;
-
- if (!translist)
- INIT_LIST_HEAD(&trans->tas_olink);
- else
- list_add_tail(&trans->tas_olink, translist);
-
- list_add_tail(&trans->tas_link, &console_session.ses_trans_list);
-
- INIT_LIST_HEAD(&trans->tas_rpcs_list);
- atomic_set(&trans->tas_remaining, 0);
- init_waitqueue_head(&trans->tas_waitq);
-
- spin_lock(&console_session.ses_rpc_lock);
- trans->tas_features = console_session.ses_features;
- spin_unlock(&console_session.ses_rpc_lock);
-
- *transpp = trans;
- return 0;
-}
-
-void
-lstcon_rpc_trans_addreq(struct lstcon_rpc_trans *trans, struct lstcon_rpc *crpc)
-{
- list_add_tail(&crpc->crp_link, &trans->tas_rpcs_list);
- crpc->crp_trans = trans;
-}
-
-void
-lstcon_rpc_trans_abort(struct lstcon_rpc_trans *trans, int error)
-{
- struct srpc_client_rpc *rpc;
- struct lstcon_rpc *crpc;
- struct lstcon_node *nd;
-
- list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
- rpc = crpc->crp_rpc;
-
- spin_lock(&rpc->crpc_lock);
-
- if (!crpc->crp_posted || /* not posted */
- crpc->crp_stamp) { /* rpc done or aborted already */
- if (!crpc->crp_stamp) {
- crpc->crp_stamp = jiffies;
- crpc->crp_status = -EINTR;
- }
- spin_unlock(&rpc->crpc_lock);
- continue;
- }
-
- crpc->crp_stamp = jiffies;
- crpc->crp_status = error;
-
- spin_unlock(&rpc->crpc_lock);
-
- sfw_abort_rpc(rpc);
-
- if (error != -ETIMEDOUT)
- continue;
-
- nd = crpc->crp_node;
- if (time_after(nd->nd_stamp, crpc->crp_stamp))
- continue;
-
- nd->nd_stamp = crpc->crp_stamp;
- nd->nd_state = LST_NODE_DOWN;
- }
-}
-
-static int
-lstcon_rpc_trans_check(struct lstcon_rpc_trans *trans)
-{
- if (console_session.ses_shutdown &&
- !list_empty(&trans->tas_olink)) /* Not an end session RPC */
- return 1;
-
- return !atomic_read(&trans->tas_remaining) ? 1 : 0;
-}
-
-int
-lstcon_rpc_trans_postwait(struct lstcon_rpc_trans *trans, int timeout)
-{
- struct lstcon_rpc *crpc;
- int rc;
-
- if (list_empty(&trans->tas_rpcs_list))
- return 0;
-
- if (timeout < LST_TRANS_MIN_TIMEOUT)
- timeout = LST_TRANS_MIN_TIMEOUT;
-
- CDEBUG(D_NET, "Transaction %s started\n",
- lstcon_rpc_trans_name(trans->tas_opc));
-
- /* post all requests */
- list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
- LASSERT(!crpc->crp_posted);
-
- lstcon_rpc_post(crpc);
- }
-
- mutex_unlock(&console_session.ses_mutex);
-
- rc = wait_event_interruptible_timeout(trans->tas_waitq,
- lstcon_rpc_trans_check(trans),
- timeout * HZ);
- rc = (rc > 0) ? 0 : ((rc < 0) ? -EINTR : -ETIMEDOUT);
-
- mutex_lock(&console_session.ses_mutex);
-
- if (console_session.ses_shutdown)
- rc = -ESHUTDOWN;
-
- if (rc || atomic_read(&trans->tas_remaining)) {
- /* treat short timeout as canceled */
- if (rc == -ETIMEDOUT && timeout < LST_TRANS_MIN_TIMEOUT * 2)
- rc = -EINTR;
-
- lstcon_rpc_trans_abort(trans, rc);
- }
-
- CDEBUG(D_NET, "Transaction %s stopped: %d\n",
- lstcon_rpc_trans_name(trans->tas_opc), rc);
-
- lstcon_rpc_trans_stat(trans, lstcon_trans_stat());
-
- return rc;
-}
-
-static int
-lstcon_rpc_get_reply(struct lstcon_rpc *crpc, struct srpc_msg **msgpp)
-{
- struct lstcon_node *nd = crpc->crp_node;
- struct srpc_client_rpc *rpc = crpc->crp_rpc;
- struct srpc_generic_reply *rep;
-
- LASSERT(nd && rpc);
- LASSERT(crpc->crp_stamp);
-
- if (crpc->crp_status) {
- *msgpp = NULL;
- return crpc->crp_status;
- }
-
- *msgpp = &rpc->crpc_replymsg;
- if (!crpc->crp_unpacked) {
- sfw_unpack_message(*msgpp);
- crpc->crp_unpacked = 1;
- }
-
- if (time_after(nd->nd_stamp, crpc->crp_stamp))
- return 0;
-
- nd->nd_stamp = crpc->crp_stamp;
- rep = &(*msgpp)->msg_body.reply;
-
- if (rep->sid.ses_nid == LNET_NID_ANY)
- nd->nd_state = LST_NODE_UNKNOWN;
- else if (lstcon_session_match(rep->sid))
- nd->nd_state = LST_NODE_ACTIVE;
- else
- nd->nd_state = LST_NODE_BUSY;
-
- return 0;
-}
-
-void
-lstcon_rpc_trans_stat(struct lstcon_rpc_trans *trans, struct lstcon_trans_stat *stat)
-{
- struct lstcon_rpc *crpc;
- struct srpc_msg *rep;
- int error;
-
- LASSERT(stat);
-
- memset(stat, 0, sizeof(*stat));
-
- list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
- lstcon_rpc_stat_total(stat, 1);
-
- LASSERT(crpc->crp_stamp);
-
- error = lstcon_rpc_get_reply(crpc, &rep);
- if (error) {
- lstcon_rpc_stat_failure(stat, 1);
- if (!stat->trs_rpc_errno)
- stat->trs_rpc_errno = -error;
-
- continue;
- }
-
- lstcon_rpc_stat_success(stat, 1);
-
- lstcon_rpc_stat_reply(trans, rep, crpc->crp_node, stat);
- }
-
- if (trans->tas_opc == LST_TRANS_SESNEW && !stat->trs_fwk_errno) {
- stat->trs_fwk_errno =
- lstcon_session_feats_check(trans->tas_features);
- }
-
- CDEBUG(D_NET, "transaction %s : success %d, failure %d, total %d, RPC error(%d), Framework error(%d)\n",
- lstcon_rpc_trans_name(trans->tas_opc),
- lstcon_rpc_stat_success(stat, 0),
- lstcon_rpc_stat_failure(stat, 0),
- lstcon_rpc_stat_total(stat, 0),
- stat->trs_rpc_errno, stat->trs_fwk_errno);
-}
-
-int
-lstcon_rpc_trans_interpreter(struct lstcon_rpc_trans *trans,
- struct list_head __user *head_up,
- lstcon_rpc_readent_func_t readent)
-{
- struct list_head tmp;
- struct list_head __user *next;
- struct lstcon_rpc_ent *ent;
- struct srpc_generic_reply *rep;
- struct lstcon_rpc *crpc;
- struct srpc_msg *msg;
- struct lstcon_node *nd;
- long dur;
- struct timeval tv;
- int error;
-
- LASSERT(head_up);
-
- next = head_up;
-
- list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
- if (copy_from_user(&tmp, next,
- sizeof(struct list_head)))
- return -EFAULT;
-
- next = tmp.next;
- if (next == head_up)
- return 0;
-
- ent = list_entry(next, struct lstcon_rpc_ent, rpe_link);
-
- LASSERT(crpc->crp_stamp);
-
- error = lstcon_rpc_get_reply(crpc, &msg);
-
- nd = crpc->crp_node;
-
- dur = (long)(crpc->crp_stamp -
- (unsigned long)console_session.ses_id.ses_stamp);
- jiffies_to_timeval(dur, &tv);
-
- if (copy_to_user(&ent->rpe_peer, &nd->nd_id,
- sizeof(struct lnet_process_id)) ||
- copy_to_user(&ent->rpe_stamp, &tv, sizeof(tv)) ||
- copy_to_user(&ent->rpe_state, &nd->nd_state,
- sizeof(nd->nd_state)) ||
- copy_to_user(&ent->rpe_rpc_errno, &error,
- sizeof(error)))
- return -EFAULT;
-
- if (error)
- continue;
-
- /* RPC is done */
- rep = (struct srpc_generic_reply *)&msg->msg_body.reply;
-
- if (copy_to_user(&ent->rpe_sid, &rep->sid, sizeof(rep->sid)) ||
- copy_to_user(&ent->rpe_fwk_errno, &rep->status,
- sizeof(rep->status)))
- return -EFAULT;
-
- if (!readent)
- continue;
-
- error = readent(trans->tas_opc, msg, ent);
- if (error)
- return error;
- }
-
- return 0;
-}
-
-void
-lstcon_rpc_trans_destroy(struct lstcon_rpc_trans *trans)
-{
- struct srpc_client_rpc *rpc;
- struct lstcon_rpc *crpc;
- struct lstcon_rpc *tmp;
- int count = 0;
-
- list_for_each_entry_safe(crpc, tmp, &trans->tas_rpcs_list, crp_link) {
- rpc = crpc->crp_rpc;
-
- spin_lock(&rpc->crpc_lock);
-
- /* free it if not posted or finished already */
- if (!crpc->crp_posted || crpc->crp_finished) {
- spin_unlock(&rpc->crpc_lock);
-
- list_del_init(&crpc->crp_link);
- lstcon_rpc_put(crpc);
-
- continue;
- }
-
- /*
- * rpcs can be still not callbacked (even LNetMDUnlink is
- * called) because huge timeout for inaccessible network,
- * don't make user wait for them, just abandon them, they
- * will be recycled in callback
- */
- LASSERT(crpc->crp_status);
-
- crpc->crp_node = NULL;
- crpc->crp_trans = NULL;
- list_del_init(&crpc->crp_link);
- count++;
-
- spin_unlock(&rpc->crpc_lock);
-
- atomic_dec(&trans->tas_remaining);
- }
-
- LASSERT(!atomic_read(&trans->tas_remaining));
-
- list_del(&trans->tas_link);
- if (!list_empty(&trans->tas_olink))
- list_del(&trans->tas_olink);
-
- CDEBUG(D_NET, "Transaction %s destroyed with %d pending RPCs\n",
- lstcon_rpc_trans_name(trans->tas_opc), count);
-
- kfree(trans);
-}
-
-int
-lstcon_sesrpc_prep(struct lstcon_node *nd, int transop,
- unsigned int feats, struct lstcon_rpc **crpc)
-{
- struct srpc_mksn_reqst *msrq;
- struct srpc_rmsn_reqst *rsrq;
- int rc;
-
- switch (transop) {
- case LST_TRANS_SESNEW:
- rc = lstcon_rpc_prep(nd, SRPC_SERVICE_MAKE_SESSION,
- feats, 0, 0, crpc);
- if (rc)
- return rc;
-
- msrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.mksn_reqst;
- msrq->mksn_sid = console_session.ses_id;
- msrq->mksn_force = console_session.ses_force;
- strlcpy(msrq->mksn_name, console_session.ses_name,
- sizeof(msrq->mksn_name));
- break;
-
- case LST_TRANS_SESEND:
- rc = lstcon_rpc_prep(nd, SRPC_SERVICE_REMOVE_SESSION,
- feats, 0, 0, crpc);
- if (rc)
- return rc;
-
- rsrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.rmsn_reqst;
- rsrq->rmsn_sid = console_session.ses_id;
- break;
-
- default:
- LBUG();
- }
-
- return 0;
-}
-
-int
-lstcon_dbgrpc_prep(struct lstcon_node *nd, unsigned int feats,
- struct lstcon_rpc **crpc)
-{
- struct srpc_debug_reqst *drq;
- int rc;
-
- rc = lstcon_rpc_prep(nd, SRPC_SERVICE_DEBUG, feats, 0, 0, crpc);
- if (rc)
- return rc;
-
- drq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.dbg_reqst;
-
- drq->dbg_sid = console_session.ses_id;
- drq->dbg_flags = 0;
-
- return rc;
-}
-
-int
-lstcon_batrpc_prep(struct lstcon_node *nd, int transop, unsigned int feats,
- struct lstcon_tsb_hdr *tsb, struct lstcon_rpc **crpc)
-{
- struct lstcon_batch *batch;
- struct srpc_batch_reqst *brq;
- int rc;
-
- rc = lstcon_rpc_prep(nd, SRPC_SERVICE_BATCH, feats, 0, 0, crpc);
- if (rc)
- return rc;
-
- brq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.bat_reqst;
-
- brq->bar_sid = console_session.ses_id;
- brq->bar_bid = tsb->tsb_id;
- brq->bar_testidx = tsb->tsb_index;
- brq->bar_opc = transop == LST_TRANS_TSBRUN ? SRPC_BATCH_OPC_RUN :
- (transop == LST_TRANS_TSBSTOP ? SRPC_BATCH_OPC_STOP :
- SRPC_BATCH_OPC_QUERY);
-
- if (transop != LST_TRANS_TSBRUN &&
- transop != LST_TRANS_TSBSTOP)
- return 0;
-
- LASSERT(!tsb->tsb_index);
-
- batch = (struct lstcon_batch *)tsb;
- brq->bar_arg = batch->bat_arg;
-
- return 0;
-}
-
-int
-lstcon_statrpc_prep(struct lstcon_node *nd, unsigned int feats,
- struct lstcon_rpc **crpc)
-{
- struct srpc_stat_reqst *srq;
- int rc;
-
- rc = lstcon_rpc_prep(nd, SRPC_SERVICE_QUERY_STAT, feats, 0, 0, crpc);
- if (rc)
- return rc;
-
- srq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.stat_reqst;
-
- srq->str_sid = console_session.ses_id;
- srq->str_type = 0; /* XXX remove it */
-
- return 0;
-}
-
-static struct lnet_process_id_packed *
-lstcon_next_id(int idx, int nkiov, struct bio_vec *kiov)
-{
- struct lnet_process_id_packed *pid;
- int i;
-
- i = idx / SFW_ID_PER_PAGE;
-
- LASSERT(i < nkiov);
-
- pid = (struct lnet_process_id_packed *)page_address(kiov[i].bv_page);
-
- return &pid[idx % SFW_ID_PER_PAGE];
-}
-
-static int
-lstcon_dstnodes_prep(struct lstcon_group *grp, int idx,
- int dist, int span, int nkiov, struct bio_vec *kiov)
-{
- struct lnet_process_id_packed *pid;
- struct lstcon_ndlink *ndl;
- struct lstcon_node *nd;
- int start;
- int end;
- int i = 0;
-
- LASSERT(dist >= 1);
- LASSERT(span >= 1);
- LASSERT(grp->grp_nnode >= 1);
-
- if (span > grp->grp_nnode)
- return -EINVAL;
-
- start = ((idx / dist) * span) % grp->grp_nnode;
- end = ((idx / dist) * span + span - 1) % grp->grp_nnode;
-
- list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) {
- nd = ndl->ndl_node;
- if (i < start) {
- i++;
- continue;
- }
-
- if (i > (end >= start ? end : grp->grp_nnode))
- break;
-
- pid = lstcon_next_id((i - start), nkiov, kiov);
- pid->nid = nd->nd_id.nid;
- pid->pid = nd->nd_id.pid;
- i++;
- }
-
- if (start <= end) /* done */
- return 0;
-
- list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) {
- if (i > grp->grp_nnode + end)
- break;
-
- nd = ndl->ndl_node;
- pid = lstcon_next_id((i - start), nkiov, kiov);
- pid->nid = nd->nd_id.nid;
- pid->pid = nd->nd_id.pid;
- i++;
- }
-
- return 0;
-}
-
-static int
-lstcon_pingrpc_prep(struct lst_test_ping_param *param, struct srpc_test_reqst *req)
-{
- struct test_ping_req *prq = &req->tsr_u.ping;
-
- prq->png_size = param->png_size;
- prq->png_flags = param->png_flags;
- /* TODO dest */
- return 0;
-}
-
-static int
-lstcon_bulkrpc_v0_prep(struct lst_test_bulk_param *param,
- struct srpc_test_reqst *req)
-{
- struct test_bulk_req *brq = &req->tsr_u.bulk_v0;
-
- brq->blk_opc = param->blk_opc;
- brq->blk_npg = DIV_ROUND_UP(param->blk_size, PAGE_SIZE);
- brq->blk_flags = param->blk_flags;
-
- return 0;
-}
-
-static int
-lstcon_bulkrpc_v1_prep(struct lst_test_bulk_param *param, bool is_client,
- struct srpc_test_reqst *req)
-{
- struct test_bulk_req_v1 *brq = &req->tsr_u.bulk_v1;
-
- brq->blk_opc = param->blk_opc;
- brq->blk_flags = param->blk_flags;
- brq->blk_len = param->blk_size;
- brq->blk_offset = is_client ? param->blk_cli_off : param->blk_srv_off;
-
- return 0;
-}
-
-int
-lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned int feats,
- struct lstcon_test *test, struct lstcon_rpc **crpc)
-{
- struct lstcon_group *sgrp = test->tes_src_grp;
- struct lstcon_group *dgrp = test->tes_dst_grp;
- struct srpc_test_reqst *trq;
- struct srpc_bulk *bulk;
- int i;
- int npg = 0;
- int nob = 0;
- int rc = 0;
-
- if (transop == LST_TRANS_TSBCLIADD) {
- npg = sfw_id_pages(test->tes_span);
- nob = !(feats & LST_FEAT_BULK_LEN) ?
- npg * PAGE_SIZE :
- sizeof(struct lnet_process_id_packed) * test->tes_span;
- }
-
- rc = lstcon_rpc_prep(nd, SRPC_SERVICE_TEST, feats, npg, nob, crpc);
- if (rc)
- return rc;
-
- trq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.tes_reqst;
-
- if (transop == LST_TRANS_TSBSRVADD) {
- int ndist = DIV_ROUND_UP(sgrp->grp_nnode, test->tes_dist);
- int nspan = DIV_ROUND_UP(dgrp->grp_nnode, test->tes_span);
- int nmax = DIV_ROUND_UP(ndist, nspan);
-
- trq->tsr_ndest = 0;
- trq->tsr_loop = nmax * test->tes_dist * test->tes_concur;
- } else {
- bulk = &(*crpc)->crp_rpc->crpc_bulk;
-
- for (i = 0; i < npg; i++) {
- int len;
-
- LASSERT(nob > 0);
-
- len = !(feats & LST_FEAT_BULK_LEN) ?
- PAGE_SIZE :
- min_t(int, nob, PAGE_SIZE);
- nob -= len;
-
- bulk->bk_iovs[i].bv_offset = 0;
- bulk->bk_iovs[i].bv_len = len;
- bulk->bk_iovs[i].bv_page = alloc_page(GFP_KERNEL);
-
- if (!bulk->bk_iovs[i].bv_page) {
- lstcon_rpc_put(*crpc);
- return -ENOMEM;
- }
- }
-
- bulk->bk_sink = 0;
-
- LASSERT(transop == LST_TRANS_TSBCLIADD);
-
- rc = lstcon_dstnodes_prep(test->tes_dst_grp,
- test->tes_cliidx++,
- test->tes_dist,
- test->tes_span,
- npg, &bulk->bk_iovs[0]);
- if (rc) {
- lstcon_rpc_put(*crpc);
- return rc;
- }
-
- trq->tsr_ndest = test->tes_span;
- trq->tsr_loop = test->tes_loop;
- }
-
- trq->tsr_sid = console_session.ses_id;
- trq->tsr_bid = test->tes_hdr.tsb_id;
- trq->tsr_concur = test->tes_concur;
- trq->tsr_is_client = (transop == LST_TRANS_TSBCLIADD) ? 1 : 0;
- trq->tsr_stop_onerr = !!test->tes_stop_onerr;
-
- switch (test->tes_type) {
- case LST_TEST_PING:
- trq->tsr_service = SRPC_SERVICE_PING;
- rc = lstcon_pingrpc_prep((struct lst_test_ping_param *)
- &test->tes_param[0], trq);
- break;
-
- case LST_TEST_BULK:
- trq->tsr_service = SRPC_SERVICE_BRW;
- if (!(feats & LST_FEAT_BULK_LEN)) {
- rc = lstcon_bulkrpc_v0_prep((struct lst_test_bulk_param *)
- &test->tes_param[0], trq);
- } else {
- rc = lstcon_bulkrpc_v1_prep((struct lst_test_bulk_param *)
- &test->tes_param[0],
- trq->tsr_is_client, trq);
- }
-
- break;
- default:
- LBUG();
- break;
- }
-
- return rc;
-}
-
-static int
-lstcon_sesnew_stat_reply(struct lstcon_rpc_trans *trans,
- struct lstcon_node *nd, struct srpc_msg *reply)
-{
- struct srpc_mksn_reply *mksn_rep = &reply->msg_body.mksn_reply;
- int status = mksn_rep->mksn_status;
-
- if (!status &&
- (reply->msg_ses_feats & ~LST_FEATS_MASK)) {
- mksn_rep->mksn_status = EPROTO;
- status = EPROTO;
- }
-
- if (status == EPROTO) {
- CNETERR("session protocol error from %s: %u\n",
- libcfs_nid2str(nd->nd_id.nid),
- reply->msg_ses_feats);
- }
-
- if (status)
- return status;
-
- if (!trans->tas_feats_updated) {
- spin_lock(&console_session.ses_rpc_lock);
- if (!trans->tas_feats_updated) { /* recheck with lock */
- trans->tas_feats_updated = 1;
- trans->tas_features = reply->msg_ses_feats;
- }
- spin_unlock(&console_session.ses_rpc_lock);
- }
-
- if (reply->msg_ses_feats != trans->tas_features) {
- CNETERR("Framework features %x from %s is different with features on this transaction: %x\n",
- reply->msg_ses_feats, libcfs_nid2str(nd->nd_id.nid),
- trans->tas_features);
- mksn_rep->mksn_status = EPROTO;
- status = EPROTO;
- }
-
- if (!status) {
- /* session timeout on remote node */
- nd->nd_timeout = mksn_rep->mksn_timeout;
- }
-
- return status;
-}
-
-void
-lstcon_rpc_stat_reply(struct lstcon_rpc_trans *trans, struct srpc_msg *msg,
- struct lstcon_node *nd, struct lstcon_trans_stat *stat)
-{
- struct srpc_rmsn_reply *rmsn_rep;
- struct srpc_debug_reply *dbg_rep;
- struct srpc_batch_reply *bat_rep;
- struct srpc_test_reply *test_rep;
- struct srpc_stat_reply *stat_rep;
- int rc = 0;
-
- switch (trans->tas_opc) {
- case LST_TRANS_SESNEW:
- rc = lstcon_sesnew_stat_reply(trans, nd, msg);
- if (!rc) {
- lstcon_sesop_stat_success(stat, 1);
- return;
- }
-
- lstcon_sesop_stat_failure(stat, 1);
- break;
-
- case LST_TRANS_SESEND:
- rmsn_rep = &msg->msg_body.rmsn_reply;
- /* ESRCH is not an error for end session */
- if (!rmsn_rep->rmsn_status ||
- rmsn_rep->rmsn_status == ESRCH) {
- lstcon_sesop_stat_success(stat, 1);
- return;
- }
-
- lstcon_sesop_stat_failure(stat, 1);
- rc = rmsn_rep->rmsn_status;
- break;
-
- case LST_TRANS_SESQRY:
- case LST_TRANS_SESPING:
- dbg_rep = &msg->msg_body.dbg_reply;
-
- if (dbg_rep->dbg_status == ESRCH) {
- lstcon_sesqry_stat_unknown(stat, 1);
- return;
- }
-
- if (lstcon_session_match(dbg_rep->dbg_sid))
- lstcon_sesqry_stat_active(stat, 1);
- else
- lstcon_sesqry_stat_busy(stat, 1);
- return;
-
- case LST_TRANS_TSBRUN:
- case LST_TRANS_TSBSTOP:
- bat_rep = &msg->msg_body.bat_reply;
-
- if (!bat_rep->bar_status) {
- lstcon_tsbop_stat_success(stat, 1);
- return;
- }
-
- if (bat_rep->bar_status == EPERM &&
- trans->tas_opc == LST_TRANS_TSBSTOP) {
- lstcon_tsbop_stat_success(stat, 1);
- return;
- }
-
- lstcon_tsbop_stat_failure(stat, 1);
- rc = bat_rep->bar_status;
- break;
-
- case LST_TRANS_TSBCLIQRY:
- case LST_TRANS_TSBSRVQRY:
- bat_rep = &msg->msg_body.bat_reply;
-
- if (bat_rep->bar_active)
- lstcon_tsbqry_stat_run(stat, 1);
- else
- lstcon_tsbqry_stat_idle(stat, 1);
-
- if (!bat_rep->bar_status)
- return;
-
- lstcon_tsbqry_stat_failure(stat, 1);
- rc = bat_rep->bar_status;
- break;
-
- case LST_TRANS_TSBCLIADD:
- case LST_TRANS_TSBSRVADD:
- test_rep = &msg->msg_body.tes_reply;
-
- if (!test_rep->tsr_status) {
- lstcon_tsbop_stat_success(stat, 1);
- return;
- }
-
- lstcon_tsbop_stat_failure(stat, 1);
- rc = test_rep->tsr_status;
- break;
-
- case LST_TRANS_STATQRY:
- stat_rep = &msg->msg_body.stat_reply;
-
- if (!stat_rep->str_status) {
- lstcon_statqry_stat_success(stat, 1);
- return;
- }
-
- lstcon_statqry_stat_failure(stat, 1);
- rc = stat_rep->str_status;
- break;
-
- default:
- LBUG();
- }
-
- if (!stat->trs_fwk_errno)
- stat->trs_fwk_errno = rc;
-}
-
-int
-lstcon_rpc_trans_ndlist(struct list_head *ndlist,
- struct list_head *translist, int transop,
- void *arg, lstcon_rpc_cond_func_t condition,
- struct lstcon_rpc_trans **transpp)
-{
- struct lstcon_rpc_trans *trans;
- struct lstcon_ndlink *ndl;
- struct lstcon_node *nd;
- struct lstcon_rpc *rpc;
- unsigned int feats;
- int rc;
-
- /* Creating session RPG for list of nodes */
-
- rc = lstcon_rpc_trans_prep(translist, transop, &trans);
- if (rc) {
- CERROR("Can't create transaction %d: %d\n", transop, rc);
- return rc;
- }
-
- feats = trans->tas_features;
- list_for_each_entry(ndl, ndlist, ndl_link) {
- rc = !condition ? 1 :
- condition(transop, ndl->ndl_node, arg);
-
- if (!rc)
- continue;
-
- if (rc < 0) {
- CDEBUG(D_NET, "Condition error while creating RPC for transaction %d: %d\n",
- transop, rc);
- break;
- }
-
- nd = ndl->ndl_node;
-
- switch (transop) {
- case LST_TRANS_SESNEW:
- case LST_TRANS_SESEND:
- rc = lstcon_sesrpc_prep(nd, transop, feats, &rpc);
- break;
- case LST_TRANS_SESQRY:
- case LST_TRANS_SESPING:
- rc = lstcon_dbgrpc_prep(nd, feats, &rpc);
- break;
- case LST_TRANS_TSBCLIADD:
- case LST_TRANS_TSBSRVADD:
- rc = lstcon_testrpc_prep(nd, transop, feats,
- (struct lstcon_test *)arg,
- &rpc);
- break;
- case LST_TRANS_TSBRUN:
- case LST_TRANS_TSBSTOP:
- case LST_TRANS_TSBCLIQRY:
- case LST_TRANS_TSBSRVQRY:
- rc = lstcon_batrpc_prep(nd, transop, feats,
- (struct lstcon_tsb_hdr *)arg,
- &rpc);
- break;
- case LST_TRANS_STATQRY:
- rc = lstcon_statrpc_prep(nd, feats, &rpc);
- break;
- default:
- rc = -EINVAL;
- break;
- }
-
- if (rc) {
- CERROR("Failed to create RPC for transaction %s: %d\n",
- lstcon_rpc_trans_name(transop), rc);
- break;
- }
-
- lstcon_rpc_trans_addreq(trans, rpc);
- }
-
- if (!rc) {
- *transpp = trans;
- return 0;
- }
-
- lstcon_rpc_trans_destroy(trans);
-
- return rc;
-}
-
-static void
-lstcon_rpc_pinger(void *arg)
-{
- struct stt_timer *ptimer = (struct stt_timer *)arg;
- struct lstcon_rpc_trans *trans;
- struct lstcon_rpc *crpc;
- struct srpc_msg *rep;
- struct srpc_debug_reqst *drq;
- struct lstcon_ndlink *ndl;
- struct lstcon_node *nd;
- int intv;
- int count = 0;
- int rc;
-
- /*
- * RPC pinger is a special case of transaction,
- * it's called by timer at 8 seconds interval.
- */
- mutex_lock(&console_session.ses_mutex);
-
- if (console_session.ses_shutdown || console_session.ses_expired) {
- mutex_unlock(&console_session.ses_mutex);
- return;
- }
-
- if (!console_session.ses_expired &&
- ktime_get_real_seconds() - console_session.ses_laststamp >
- (time64_t)console_session.ses_timeout)
- console_session.ses_expired = 1;
-
- trans = console_session.ses_ping;
-
- LASSERT(trans);
-
- list_for_each_entry(ndl, &console_session.ses_ndl_list, ndl_link) {
- nd = ndl->ndl_node;
-
- if (console_session.ses_expired) {
- /* idle console, end session on all nodes */
- if (nd->nd_state != LST_NODE_ACTIVE)
- continue;
-
- rc = lstcon_sesrpc_prep(nd, LST_TRANS_SESEND,
- trans->tas_features, &crpc);
- if (rc) {
- CERROR("Out of memory\n");
- break;
- }
-
- lstcon_rpc_trans_addreq(trans, crpc);
- lstcon_rpc_post(crpc);
-
- continue;
- }
-
- crpc = &nd->nd_ping;
-
- if (crpc->crp_rpc) {
- LASSERT(crpc->crp_trans == trans);
- LASSERT(!list_empty(&crpc->crp_link));
-
- spin_lock(&crpc->crp_rpc->crpc_lock);
-
- LASSERT(crpc->crp_posted);
-
- if (!crpc->crp_finished) {
- /* in flight */
- spin_unlock(&crpc->crp_rpc->crpc_lock);
- continue;
- }
-
- spin_unlock(&crpc->crp_rpc->crpc_lock);
-
- lstcon_rpc_get_reply(crpc, &rep);
-
- list_del_init(&crpc->crp_link);
-
- lstcon_rpc_put(crpc);
- }
-
- if (nd->nd_state != LST_NODE_ACTIVE)
- continue;
-
- intv = (jiffies - nd->nd_stamp) / msecs_to_jiffies(MSEC_PER_SEC);
- if (intv < nd->nd_timeout / 2)
- continue;
-
- rc = lstcon_rpc_init(nd, SRPC_SERVICE_DEBUG,
- trans->tas_features, 0, 0, 1, crpc);
- if (rc) {
- CERROR("Out of memory\n");
- break;
- }
-
- drq = &crpc->crp_rpc->crpc_reqstmsg.msg_body.dbg_reqst;
-
- drq->dbg_sid = console_session.ses_id;
- drq->dbg_flags = 0;
-
- lstcon_rpc_trans_addreq(trans, crpc);
- lstcon_rpc_post(crpc);
-
- count++;
- }
-
- if (console_session.ses_expired) {
- mutex_unlock(&console_session.ses_mutex);
- return;
- }
-
- CDEBUG(D_NET, "Ping %d nodes in session\n", count);
-
- ptimer->stt_expires = ktime_get_real_seconds() + LST_PING_INTERVAL;
- stt_add_timer(ptimer);
-
- mutex_unlock(&console_session.ses_mutex);
-}
-
-int
-lstcon_rpc_pinger_start(void)
-{
- struct stt_timer *ptimer;
- int rc;
-
- LASSERT(list_empty(&console_session.ses_rpc_freelist));
- LASSERT(!atomic_read(&console_session.ses_rpc_counter));
-
- rc = lstcon_rpc_trans_prep(NULL, LST_TRANS_SESPING,
- &console_session.ses_ping);
- if (rc) {
- CERROR("Failed to create console pinger\n");
- return rc;
- }
-
- ptimer = &console_session.ses_ping_timer;
- ptimer->stt_expires = ktime_get_real_seconds() + LST_PING_INTERVAL;
-
- stt_add_timer(ptimer);
-
- return 0;
-}
-
-void
-lstcon_rpc_pinger_stop(void)
-{
- LASSERT(console_session.ses_shutdown);
-
- stt_del_timer(&console_session.ses_ping_timer);
-
- lstcon_rpc_trans_abort(console_session.ses_ping, -ESHUTDOWN);
- lstcon_rpc_trans_stat(console_session.ses_ping, lstcon_trans_stat());
- lstcon_rpc_trans_destroy(console_session.ses_ping);
-
- memset(lstcon_trans_stat(), 0, sizeof(struct lstcon_trans_stat));
-
- console_session.ses_ping = NULL;
-}
-
-void
-lstcon_rpc_cleanup_wait(void)
-{
- struct lstcon_rpc_trans *trans;
- struct lstcon_rpc *crpc;
- struct lstcon_rpc *temp;
- struct list_head *pacer;
- struct list_head zlist;
-
- /* Called with hold of global mutex */
-
- LASSERT(console_session.ses_shutdown);
-
- while (!list_empty(&console_session.ses_trans_list)) {
- list_for_each(pacer, &console_session.ses_trans_list) {
- trans = list_entry(pacer, struct lstcon_rpc_trans,
- tas_link);
-
- CDEBUG(D_NET, "Session closed, wakeup transaction %s\n",
- lstcon_rpc_trans_name(trans->tas_opc));
-
- wake_up(&trans->tas_waitq);
- }
-
- mutex_unlock(&console_session.ses_mutex);
-
- CWARN("Session is shutting down, waiting for termination of transactions\n");
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
-
- mutex_lock(&console_session.ses_mutex);
- }
-
- spin_lock(&console_session.ses_rpc_lock);
-
- lst_wait_until(!atomic_read(&console_session.ses_rpc_counter),
- console_session.ses_rpc_lock,
- "Network is not accessible or target is down, waiting for %d console RPCs to being recycled\n",
- atomic_read(&console_session.ses_rpc_counter));
-
- list_add(&zlist, &console_session.ses_rpc_freelist);
- list_del_init(&console_session.ses_rpc_freelist);
-
- spin_unlock(&console_session.ses_rpc_lock);
-
- list_for_each_entry_safe(crpc, temp, &zlist, crp_link) {
- list_del(&crpc->crp_link);
- kfree(crpc);
- }
-}
-
-int
-lstcon_rpc_module_init(void)
-{
- INIT_LIST_HEAD(&console_session.ses_ping_timer.stt_list);
- console_session.ses_ping_timer.stt_func = lstcon_rpc_pinger;
- console_session.ses_ping_timer.stt_data = &console_session.ses_ping_timer;
-
- console_session.ses_ping = NULL;
-
- spin_lock_init(&console_session.ses_rpc_lock);
- atomic_set(&console_session.ses_rpc_counter, 0);
- INIT_LIST_HEAD(&console_session.ses_rpc_freelist);
-
- return 0;
-}
-
-void
-lstcon_rpc_module_fini(void)
-{
- LASSERT(list_empty(&console_session.ses_rpc_freelist));
- LASSERT(!atomic_read(&console_session.ses_rpc_counter));
-}
diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.h b/drivers/staging/lustre/lnet/selftest/conrpc.h
deleted file mode 100644
index ce2f92d04838..000000000000
--- a/drivers/staging/lustre/lnet/selftest/conrpc.h
+++ /dev/null
@@ -1,142 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * /lnet/selftest/conrpc.h
- *
- * Console rpc
- *
- * Author: Liang Zhen <liang@whamcloud.com>
- */
-
-#ifndef __LST_CONRPC_H__
-#define __LST_CONRPC_H__
-
-#include <linux/lnet/lib-types.h>
-#include <uapi/linux/lnet/lnetst.h>
-#include "rpc.h"
-#include "selftest.h"
-
-/* Console rpc and rpc transaction */
-#define LST_TRANS_TIMEOUT 30
-#define LST_TRANS_MIN_TIMEOUT 3
-
-#define LST_VALIDATE_TIMEOUT(t) min(max(t, LST_TRANS_MIN_TIMEOUT), LST_TRANS_TIMEOUT)
-
-#define LST_PING_INTERVAL 8
-
-struct lstcon_rpc_trans;
-struct lstcon_tsb_hdr;
-struct lstcon_test;
-struct lstcon_node;
-
-struct lstcon_rpc {
- struct list_head crp_link; /* chain on rpc transaction */
- struct srpc_client_rpc *crp_rpc; /* client rpc */
- struct lstcon_node *crp_node; /* destination node */
- struct lstcon_rpc_trans *crp_trans; /* conrpc transaction */
-
- unsigned int crp_posted:1; /* rpc is posted */
- unsigned int crp_finished:1; /* rpc is finished */
- unsigned int crp_unpacked:1; /* reply is unpacked */
- /** RPC is embedded in other structure and can't free it */
- unsigned int crp_embedded:1;
- int crp_status; /* console rpc errors */
- unsigned long crp_stamp; /* replied time stamp */
-};
-
-struct lstcon_rpc_trans {
- struct list_head tas_olink; /* link chain on owner list */
- struct list_head tas_link; /* link chain on global list */
- int tas_opc; /* operation code of transaction */
- unsigned int tas_feats_updated; /* features mask is uptodate */
- unsigned int tas_features; /* test features mask */
- wait_queue_head_t tas_waitq; /* wait queue head */
- atomic_t tas_remaining; /* # of un-scheduled rpcs */
- struct list_head tas_rpcs_list; /* queued requests */
-};
-
-#define LST_TRANS_PRIVATE 0x1000
-
-#define LST_TRANS_SESNEW (LST_TRANS_PRIVATE | 0x01)
-#define LST_TRANS_SESEND (LST_TRANS_PRIVATE | 0x02)
-#define LST_TRANS_SESQRY 0x03
-#define LST_TRANS_SESPING 0x04
-
-#define LST_TRANS_TSBCLIADD (LST_TRANS_PRIVATE | 0x11)
-#define LST_TRANS_TSBSRVADD (LST_TRANS_PRIVATE | 0x12)
-#define LST_TRANS_TSBRUN (LST_TRANS_PRIVATE | 0x13)
-#define LST_TRANS_TSBSTOP (LST_TRANS_PRIVATE | 0x14)
-#define LST_TRANS_TSBCLIQRY 0x15
-#define LST_TRANS_TSBSRVQRY 0x16
-
-#define LST_TRANS_STATQRY 0x21
-
-typedef int (*lstcon_rpc_cond_func_t)(int, struct lstcon_node *, void *);
-typedef int (*lstcon_rpc_readent_func_t)(int, struct srpc_msg *,
- struct lstcon_rpc_ent __user *);
-
-int lstcon_sesrpc_prep(struct lstcon_node *nd, int transop,
- unsigned int version, struct lstcon_rpc **crpc);
-int lstcon_dbgrpc_prep(struct lstcon_node *nd,
- unsigned int version, struct lstcon_rpc **crpc);
-int lstcon_batrpc_prep(struct lstcon_node *nd, int transop,
- unsigned int version, struct lstcon_tsb_hdr *tsb,
- struct lstcon_rpc **crpc);
-int lstcon_testrpc_prep(struct lstcon_node *nd, int transop,
- unsigned int version, struct lstcon_test *test,
- struct lstcon_rpc **crpc);
-int lstcon_statrpc_prep(struct lstcon_node *nd, unsigned int version,
- struct lstcon_rpc **crpc);
-void lstcon_rpc_put(struct lstcon_rpc *crpc);
-int lstcon_rpc_trans_prep(struct list_head *translist,
- int transop, struct lstcon_rpc_trans **transpp);
-int lstcon_rpc_trans_ndlist(struct list_head *ndlist,
- struct list_head *translist, int transop,
- void *arg, lstcon_rpc_cond_func_t condition,
- struct lstcon_rpc_trans **transpp);
-void lstcon_rpc_trans_stat(struct lstcon_rpc_trans *trans,
- struct lstcon_trans_stat *stat);
-int lstcon_rpc_trans_interpreter(struct lstcon_rpc_trans *trans,
- struct list_head __user *head_up,
- lstcon_rpc_readent_func_t readent);
-void lstcon_rpc_trans_abort(struct lstcon_rpc_trans *trans, int error);
-void lstcon_rpc_trans_destroy(struct lstcon_rpc_trans *trans);
-void lstcon_rpc_trans_addreq(struct lstcon_rpc_trans *trans,
- struct lstcon_rpc *req);
-int lstcon_rpc_trans_postwait(struct lstcon_rpc_trans *trans, int timeout);
-int lstcon_rpc_pinger_start(void);
-void lstcon_rpc_pinger_stop(void);
-void lstcon_rpc_cleanup_wait(void);
-int lstcon_rpc_module_init(void);
-void lstcon_rpc_module_fini(void);
-
-#endif
diff --git a/drivers/staging/lustre/lnet/selftest/console.c b/drivers/staging/lustre/lnet/selftest/console.c
deleted file mode 100644
index 3c1c1b5997e0..000000000000
--- a/drivers/staging/lustre/lnet/selftest/console.c
+++ /dev/null
@@ -1,2104 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/conctl.c
- *
- * Infrastructure of LST console
- *
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- */
-
-#include <linux/lnet/lib-lnet.h>
-#include "console.h"
-#include "conrpc.h"
-
-#define LST_NODE_STATE_COUNTER(nd, p) \
-do { \
- if ((nd)->nd_state == LST_NODE_ACTIVE) \
- (p)->nle_nactive++; \
- else if ((nd)->nd_state == LST_NODE_BUSY) \
- (p)->nle_nbusy++; \
- else if ((nd)->nd_state == LST_NODE_DOWN) \
- (p)->nle_ndown++; \
- else \
- (p)->nle_nunknown++; \
- (p)->nle_nnode++; \
-} while (0)
-
-struct lstcon_session console_session;
-
-static void
-lstcon_node_get(struct lstcon_node *nd)
-{
- LASSERT(nd->nd_ref >= 1);
-
- nd->nd_ref++;
-}
-
-static int
-lstcon_node_find(struct lnet_process_id id, struct lstcon_node **ndpp,
- int create)
-{
- struct lstcon_ndlink *ndl;
- unsigned int idx = LNET_NIDADDR(id.nid) % LST_GLOBAL_HASHSIZE;
-
- LASSERT(id.nid != LNET_NID_ANY);
-
- list_for_each_entry(ndl, &console_session.ses_ndl_hash[idx],
- ndl_hlink) {
- if (ndl->ndl_node->nd_id.nid != id.nid ||
- ndl->ndl_node->nd_id.pid != id.pid)
- continue;
-
- lstcon_node_get(ndl->ndl_node);
- *ndpp = ndl->ndl_node;
- return 0;
- }
-
- if (!create)
- return -ENOENT;
-
- *ndpp = kzalloc(sizeof(**ndpp) + sizeof(*ndl), GFP_KERNEL);
- if (!*ndpp)
- return -ENOMEM;
-
- ndl = (struct lstcon_ndlink *)(*ndpp + 1);
-
- ndl->ndl_node = *ndpp;
-
- ndl->ndl_node->nd_ref = 1;
- ndl->ndl_node->nd_id = id;
- ndl->ndl_node->nd_stamp = jiffies;
- ndl->ndl_node->nd_state = LST_NODE_UNKNOWN;
- ndl->ndl_node->nd_timeout = 0;
- memset(&ndl->ndl_node->nd_ping, 0, sizeof(struct lstcon_rpc));
-
- /*
- * queued in global hash & list, no refcount is taken by
- * global hash & list, if caller release his refcount,
- * node will be released
- */
- list_add_tail(&ndl->ndl_hlink, &console_session.ses_ndl_hash[idx]);
- list_add_tail(&ndl->ndl_link, &console_session.ses_ndl_list);
-
- return 0;
-}
-
-static void
-lstcon_node_put(struct lstcon_node *nd)
-{
- struct lstcon_ndlink *ndl;
-
- LASSERT(nd->nd_ref > 0);
-
- if (--nd->nd_ref > 0)
- return;
-
- ndl = (struct lstcon_ndlink *)(nd + 1);
-
- LASSERT(!list_empty(&ndl->ndl_link));
- LASSERT(!list_empty(&ndl->ndl_hlink));
-
- /* remove from session */
- list_del(&ndl->ndl_link);
- list_del(&ndl->ndl_hlink);
-
- kfree(nd);
-}
-
-static int
-lstcon_ndlink_find(struct list_head *hash, struct lnet_process_id id,
- struct lstcon_ndlink **ndlpp, int create)
-{
- unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE;
- struct lstcon_ndlink *ndl;
- struct lstcon_node *nd;
- int rc;
-
- if (id.nid == LNET_NID_ANY)
- return -EINVAL;
-
- /* search in hash */
- list_for_each_entry(ndl, &hash[idx], ndl_hlink) {
- if (ndl->ndl_node->nd_id.nid != id.nid ||
- ndl->ndl_node->nd_id.pid != id.pid)
- continue;
-
- *ndlpp = ndl;
- return 0;
- }
-
- if (!create)
- return -ENOENT;
-
- /* find or create in session hash */
- rc = lstcon_node_find(id, &nd, (create == 1) ? 1 : 0);
- if (rc)
- return rc;
-
- ndl = kzalloc(sizeof(struct lstcon_ndlink), GFP_NOFS);
- if (!ndl) {
- lstcon_node_put(nd);
- return -ENOMEM;
- }
-
- *ndlpp = ndl;
-
- ndl->ndl_node = nd;
- INIT_LIST_HEAD(&ndl->ndl_link);
- list_add_tail(&ndl->ndl_hlink, &hash[idx]);
-
- return 0;
-}
-
-static void
-lstcon_ndlink_release(struct lstcon_ndlink *ndl)
-{
- LASSERT(list_empty(&ndl->ndl_link));
- LASSERT(!list_empty(&ndl->ndl_hlink));
-
- list_del(&ndl->ndl_hlink); /* delete from hash */
- lstcon_node_put(ndl->ndl_node);
-
- kfree(ndl);
-}
-
-static int
-lstcon_group_alloc(char *name, struct lstcon_group **grpp)
-{
- struct lstcon_group *grp;
- int i;
-
- grp = kmalloc(offsetof(struct lstcon_group,
- grp_ndl_hash[LST_NODE_HASHSIZE]),
- GFP_KERNEL);
- if (!grp)
- return -ENOMEM;
-
- grp->grp_ref = 1;
- if (name) {
- if (strlen(name) > sizeof(grp->grp_name) - 1) {
- kfree(grp);
- return -E2BIG;
- }
- strncpy(grp->grp_name, name, sizeof(grp->grp_name));
- }
-
- INIT_LIST_HEAD(&grp->grp_link);
- INIT_LIST_HEAD(&grp->grp_ndl_list);
- INIT_LIST_HEAD(&grp->grp_trans_list);
-
- for (i = 0; i < LST_NODE_HASHSIZE; i++)
- INIT_LIST_HEAD(&grp->grp_ndl_hash[i]);
-
- *grpp = grp;
-
- return 0;
-}
-
-static void
-lstcon_group_addref(struct lstcon_group *grp)
-{
- grp->grp_ref++;
-}
-
-static void lstcon_group_ndlink_release(struct lstcon_group *,
- struct lstcon_ndlink *);
-
-static void
-lstcon_group_drain(struct lstcon_group *grp, int keep)
-{
- struct lstcon_ndlink *ndl;
- struct lstcon_ndlink *tmp;
-
- list_for_each_entry_safe(ndl, tmp, &grp->grp_ndl_list, ndl_link) {
- if (!(ndl->ndl_node->nd_state & keep))
- lstcon_group_ndlink_release(grp, ndl);
- }
-}
-
-static void
-lstcon_group_decref(struct lstcon_group *grp)
-{
- int i;
-
- if (--grp->grp_ref > 0)
- return;
-
- if (!list_empty(&grp->grp_link))
- list_del(&grp->grp_link);
-
- lstcon_group_drain(grp, 0);
-
- for (i = 0; i < LST_NODE_HASHSIZE; i++)
- LASSERT(list_empty(&grp->grp_ndl_hash[i]));
-
- kfree(grp);
-}
-
-static int
-lstcon_group_find(const char *name, struct lstcon_group **grpp)
-{
- struct lstcon_group *grp;
-
- list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) {
- if (strncmp(grp->grp_name, name, LST_NAME_SIZE))
- continue;
-
- lstcon_group_addref(grp); /* +1 ref for caller */
- *grpp = grp;
- return 0;
- }
-
- return -ENOENT;
-}
-
-static int
-lstcon_group_ndlink_find(struct lstcon_group *grp, struct lnet_process_id id,
- struct lstcon_ndlink **ndlpp, int create)
-{
- int rc;
-
- rc = lstcon_ndlink_find(&grp->grp_ndl_hash[0], id, ndlpp, create);
- if (rc)
- return rc;
-
- if (!list_empty(&(*ndlpp)->ndl_link))
- return 0;
-
- list_add_tail(&(*ndlpp)->ndl_link, &grp->grp_ndl_list);
- grp->grp_nnode++;
-
- return 0;
-}
-
-static void
-lstcon_group_ndlink_release(struct lstcon_group *grp, struct lstcon_ndlink *ndl)
-{
- list_del_init(&ndl->ndl_link);
- lstcon_ndlink_release(ndl);
- grp->grp_nnode--;
-}
-
-static void
-lstcon_group_ndlink_move(struct lstcon_group *old,
- struct lstcon_group *new, struct lstcon_ndlink *ndl)
-{
- unsigned int idx = LNET_NIDADDR(ndl->ndl_node->nd_id.nid) %
- LST_NODE_HASHSIZE;
-
- list_del(&ndl->ndl_hlink);
- list_del(&ndl->ndl_link);
- old->grp_nnode--;
-
- list_add_tail(&ndl->ndl_hlink, &new->grp_ndl_hash[idx]);
- list_add_tail(&ndl->ndl_link, &new->grp_ndl_list);
- new->grp_nnode++;
-}
-
-static void
-lstcon_group_move(struct lstcon_group *old, struct lstcon_group *new)
-{
- struct lstcon_ndlink *ndl;
-
- while (!list_empty(&old->grp_ndl_list)) {
- ndl = list_entry(old->grp_ndl_list.next,
- struct lstcon_ndlink, ndl_link);
- lstcon_group_ndlink_move(old, new, ndl);
- }
-}
-
-static int
-lstcon_sesrpc_condition(int transop, struct lstcon_node *nd, void *arg)
-{
- struct lstcon_group *grp = (struct lstcon_group *)arg;
-
- switch (transop) {
- case LST_TRANS_SESNEW:
- if (nd->nd_state == LST_NODE_ACTIVE)
- return 0;
- break;
-
- case LST_TRANS_SESEND:
- if (nd->nd_state != LST_NODE_ACTIVE)
- return 0;
-
- if (grp && nd->nd_ref > 1)
- return 0;
- break;
-
- case LST_TRANS_SESQRY:
- break;
-
- default:
- LBUG();
- }
-
- return 1;
-}
-
-static int
-lstcon_sesrpc_readent(int transop, struct srpc_msg *msg,
- struct lstcon_rpc_ent __user *ent_up)
-{
- struct srpc_debug_reply *rep;
-
- switch (transop) {
- case LST_TRANS_SESNEW:
- case LST_TRANS_SESEND:
- return 0;
-
- case LST_TRANS_SESQRY:
- rep = &msg->msg_body.dbg_reply;
-
- if (copy_to_user(&ent_up->rpe_priv[0],
- &rep->dbg_timeout, sizeof(int)) ||
- copy_to_user(&ent_up->rpe_payload[0],
- &rep->dbg_name, LST_NAME_SIZE))
- return -EFAULT;
-
- return 0;
-
- default:
- LBUG();
- }
-
- return 0;
-}
-
-static int
-lstcon_group_nodes_add(struct lstcon_group *grp,
- int count, struct lnet_process_id __user *ids_up,
- unsigned int *featp,
- struct list_head __user *result_up)
-{
- struct lstcon_rpc_trans *trans;
- struct lstcon_ndlink *ndl;
- struct lstcon_group *tmp;
- struct lnet_process_id id;
- int i;
- int rc;
-
- rc = lstcon_group_alloc(NULL, &tmp);
- if (rc) {
- CERROR("Out of memory\n");
- return -ENOMEM;
- }
-
- for (i = 0 ; i < count; i++) {
- if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
- rc = -EFAULT;
- break;
- }
-
- /* skip if it's in this group already */
- rc = lstcon_group_ndlink_find(grp, id, &ndl, 0);
- if (!rc)
- continue;
-
- /* add to tmp group */
- rc = lstcon_group_ndlink_find(tmp, id, &ndl, 1);
- if (rc) {
- CERROR("Can't create ndlink, out of memory\n");
- break;
- }
- }
-
- if (rc) {
- lstcon_group_decref(tmp);
- return rc;
- }
-
- rc = lstcon_rpc_trans_ndlist(&tmp->grp_ndl_list,
- &tmp->grp_trans_list, LST_TRANS_SESNEW,
- tmp, lstcon_sesrpc_condition, &trans);
- if (rc) {
- CERROR("Can't create transaction: %d\n", rc);
- lstcon_group_decref(tmp);
- return rc;
- }
-
- /* post all RPCs */
- lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
- rc = lstcon_rpc_trans_interpreter(trans, result_up,
- lstcon_sesrpc_readent);
- *featp = trans->tas_features;
-
- /* destroy all RPGs */
- lstcon_rpc_trans_destroy(trans);
-
- lstcon_group_move(tmp, grp);
- lstcon_group_decref(tmp);
-
- return rc;
-}
-
-static int
-lstcon_group_nodes_remove(struct lstcon_group *grp,
- int count, struct lnet_process_id __user *ids_up,
- struct list_head __user *result_up)
-{
- struct lstcon_rpc_trans *trans;
- struct lstcon_ndlink *ndl;
- struct lstcon_group *tmp;
- struct lnet_process_id id;
- int rc;
- int i;
-
- /* End session and remove node from the group */
-
- rc = lstcon_group_alloc(NULL, &tmp);
- if (rc) {
- CERROR("Out of memory\n");
- return -ENOMEM;
- }
-
- for (i = 0; i < count; i++) {
- if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
- rc = -EFAULT;
- goto error;
- }
-
- /* move node to tmp group */
- if (!lstcon_group_ndlink_find(grp, id, &ndl, 0))
- lstcon_group_ndlink_move(grp, tmp, ndl);
- }
-
- rc = lstcon_rpc_trans_ndlist(&tmp->grp_ndl_list,
- &tmp->grp_trans_list, LST_TRANS_SESEND,
- tmp, lstcon_sesrpc_condition, &trans);
- if (rc) {
- CERROR("Can't create transaction: %d\n", rc);
- goto error;
- }
-
- lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
- rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL);
-
- lstcon_rpc_trans_destroy(trans);
- /* release nodes anyway, because we can't rollback status */
- lstcon_group_decref(tmp);
-
- return rc;
-error:
- lstcon_group_move(tmp, grp);
- lstcon_group_decref(tmp);
-
- return rc;
-}
-
-int
-lstcon_group_add(char *name)
-{
- struct lstcon_group *grp;
- int rc;
-
- rc = lstcon_group_find(name, &grp) ? 0 : -EEXIST;
- if (rc) {
- /* find a group with same name */
- lstcon_group_decref(grp);
- return rc;
- }
-
- rc = lstcon_group_alloc(name, &grp);
- if (rc) {
- CERROR("Can't allocate descriptor for group %s\n", name);
- return -ENOMEM;
- }
-
- list_add_tail(&grp->grp_link, &console_session.ses_grp_list);
-
- return rc;
-}
-
-int
-lstcon_nodes_add(char *name, int count, struct lnet_process_id __user *ids_up,
- unsigned int *featp, struct list_head __user *result_up)
-{
- struct lstcon_group *grp;
- int rc;
-
- LASSERT(count > 0);
- LASSERT(ids_up);
-
- rc = lstcon_group_find(name, &grp);
- if (rc) {
- CDEBUG(D_NET, "Can't find group %s\n", name);
- return rc;
- }
-
- if (grp->grp_ref > 2) {
- /* referred by other threads or test */
- CDEBUG(D_NET, "Group %s is busy\n", name);
- lstcon_group_decref(grp);
-
- return -EBUSY;
- }
-
- rc = lstcon_group_nodes_add(grp, count, ids_up, featp, result_up);
-
- lstcon_group_decref(grp);
-
- return rc;
-}
-
-int
-lstcon_group_del(char *name)
-{
- struct lstcon_rpc_trans *trans;
- struct lstcon_group *grp;
- int rc;
-
- rc = lstcon_group_find(name, &grp);
- if (rc) {
- CDEBUG(D_NET, "Can't find group: %s\n", name);
- return rc;
- }
-
- if (grp->grp_ref > 2) {
- /* referred by others threads or test */
- CDEBUG(D_NET, "Group %s is busy\n", name);
- lstcon_group_decref(grp);
- return -EBUSY;
- }
-
- rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list,
- &grp->grp_trans_list, LST_TRANS_SESEND,
- grp, lstcon_sesrpc_condition, &trans);
- if (rc) {
- CERROR("Can't create transaction: %d\n", rc);
- lstcon_group_decref(grp);
- return rc;
- }
-
- lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
- lstcon_rpc_trans_destroy(trans);
-
- lstcon_group_decref(grp);
- /*
- * -ref for session, it's destroyed,
- * status can't be rolled back, destroy group anyway
- */
- lstcon_group_decref(grp);
-
- return rc;
-}
-
-int
-lstcon_group_clean(char *name, int args)
-{
- struct lstcon_group *grp = NULL;
- int rc;
-
- rc = lstcon_group_find(name, &grp);
- if (rc) {
- CDEBUG(D_NET, "Can't find group %s\n", name);
- return rc;
- }
-
- if (grp->grp_ref > 2) {
- /* referred by test */
- CDEBUG(D_NET, "Group %s is busy\n", name);
- lstcon_group_decref(grp);
- return -EBUSY;
- }
-
- args = (LST_NODE_ACTIVE | LST_NODE_BUSY |
- LST_NODE_DOWN | LST_NODE_UNKNOWN) & ~args;
-
- lstcon_group_drain(grp, args);
-
- lstcon_group_decref(grp);
- /* release empty group */
- if (list_empty(&grp->grp_ndl_list))
- lstcon_group_decref(grp);
-
- return 0;
-}
-
-int
-lstcon_nodes_remove(char *name, int count,
- struct lnet_process_id __user *ids_up,
- struct list_head __user *result_up)
-{
- struct lstcon_group *grp = NULL;
- int rc;
-
- rc = lstcon_group_find(name, &grp);
- if (rc) {
- CDEBUG(D_NET, "Can't find group: %s\n", name);
- return rc;
- }
-
- if (grp->grp_ref > 2) {
- /* referred by test */
- CDEBUG(D_NET, "Group %s is busy\n", name);
- lstcon_group_decref(grp);
- return -EBUSY;
- }
-
- rc = lstcon_group_nodes_remove(grp, count, ids_up, result_up);
-
- lstcon_group_decref(grp);
- /* release empty group */
- if (list_empty(&grp->grp_ndl_list))
- lstcon_group_decref(grp);
-
- return rc;
-}
-
-int
-lstcon_group_refresh(char *name, struct list_head __user *result_up)
-{
- struct lstcon_rpc_trans *trans;
- struct lstcon_group *grp;
- int rc;
-
- rc = lstcon_group_find(name, &grp);
- if (rc) {
- CDEBUG(D_NET, "Can't find group: %s\n", name);
- return rc;
- }
-
- if (grp->grp_ref > 2) {
- /* referred by test */
- CDEBUG(D_NET, "Group %s is busy\n", name);
- lstcon_group_decref(grp);
- return -EBUSY;
- }
-
- /* re-invite all inactive nodes int the group */
- rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list,
- &grp->grp_trans_list, LST_TRANS_SESNEW,
- grp, lstcon_sesrpc_condition, &trans);
- if (rc) {
- /* local error, return */
- CDEBUG(D_NET, "Can't create transaction: %d\n", rc);
- lstcon_group_decref(grp);
- return rc;
- }
-
- lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
- rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL);
-
- lstcon_rpc_trans_destroy(trans);
- /* -ref for me */
- lstcon_group_decref(grp);
-
- return rc;
-}
-
-int
-lstcon_group_list(int index, int len, char __user *name_up)
-{
- struct lstcon_group *grp;
-
- LASSERT(index >= 0);
- LASSERT(name_up);
-
- list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) {
- if (!index--) {
- return copy_to_user(name_up, grp->grp_name, len) ?
- -EFAULT : 0;
- }
- }
-
- return -ENOENT;
-}
-
-static int
-lstcon_nodes_getent(struct list_head *head, int *index_p,
- int *count_p, struct lstcon_node_ent __user *dents_up)
-{
- struct lstcon_ndlink *ndl;
- struct lstcon_node *nd;
- int count = 0;
- int index = 0;
-
- LASSERT(index_p && count_p);
- LASSERT(dents_up);
- LASSERT(*index_p >= 0);
- LASSERT(*count_p > 0);
-
- list_for_each_entry(ndl, head, ndl_link) {
- if (index++ < *index_p)
- continue;
-
- if (count >= *count_p)
- break;
-
- nd = ndl->ndl_node;
- if (copy_to_user(&dents_up[count].nde_id,
- &nd->nd_id, sizeof(nd->nd_id)) ||
- copy_to_user(&dents_up[count].nde_state,
- &nd->nd_state, sizeof(nd->nd_state)))
- return -EFAULT;
-
- count++;
- }
-
- if (index <= *index_p)
- return -ENOENT;
-
- *count_p = count;
- *index_p = index;
-
- return 0;
-}
-
-int
-lstcon_group_info(char *name, struct lstcon_ndlist_ent __user *gents_p,
- int *index_p, int *count_p,
- struct lstcon_node_ent __user *dents_up)
-{
- struct lstcon_ndlist_ent *gentp;
- struct lstcon_group *grp;
- struct lstcon_ndlink *ndl;
- int rc;
-
- rc = lstcon_group_find(name, &grp);
- if (rc) {
- CDEBUG(D_NET, "Can't find group %s\n", name);
- return rc;
- }
-
- if (dents_up) {
- /* verbose query */
- rc = lstcon_nodes_getent(&grp->grp_ndl_list,
- index_p, count_p, dents_up);
- lstcon_group_decref(grp);
-
- return rc;
- }
-
- /* non-verbose query */
- gentp = kzalloc(sizeof(struct lstcon_ndlist_ent), GFP_NOFS);
- if (!gentp) {
- CERROR("Can't allocate ndlist_ent\n");
- lstcon_group_decref(grp);
-
- return -ENOMEM;
- }
-
- list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link)
- LST_NODE_STATE_COUNTER(ndl->ndl_node, gentp);
-
- rc = copy_to_user(gents_p, gentp,
- sizeof(struct lstcon_ndlist_ent)) ? -EFAULT : 0;
-
- kfree(gentp);
-
- lstcon_group_decref(grp);
-
- return rc;
-}
-
-static int
-lstcon_batch_find(const char *name, struct lstcon_batch **batpp)
-{
- struct lstcon_batch *bat;
-
- list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) {
- if (!strncmp(bat->bat_name, name, LST_NAME_SIZE)) {
- *batpp = bat;
- return 0;
- }
- }
-
- return -ENOENT;
-}
-
-int
-lstcon_batch_add(char *name)
-{
- struct lstcon_batch *bat;
- int i;
- int rc;
-
- rc = !lstcon_batch_find(name, &bat) ? -EEXIST : 0;
- if (rc) {
- CDEBUG(D_NET, "Batch %s already exists\n", name);
- return rc;
- }
-
- bat = kzalloc(sizeof(struct lstcon_batch), GFP_NOFS);
- if (!bat) {
- CERROR("Can't allocate descriptor for batch %s\n", name);
- return -ENOMEM;
- }
-
- bat->bat_cli_hash = kmalloc(sizeof(struct list_head) * LST_NODE_HASHSIZE,
- GFP_KERNEL);
- if (!bat->bat_cli_hash) {
- CERROR("Can't allocate hash for batch %s\n", name);
- kfree(bat);
-
- return -ENOMEM;
- }
-
- bat->bat_srv_hash = kmalloc(sizeof(struct list_head) * LST_NODE_HASHSIZE,
- GFP_KERNEL);
- if (!bat->bat_srv_hash) {
- CERROR("Can't allocate hash for batch %s\n", name);
- kfree(bat->bat_cli_hash);
- kfree(bat);
-
- return -ENOMEM;
- }
-
- if (strlen(name) > sizeof(bat->bat_name) - 1) {
- kfree(bat->bat_srv_hash);
- kfree(bat->bat_cli_hash);
- kfree(bat);
- return -E2BIG;
- }
- strncpy(bat->bat_name, name, sizeof(bat->bat_name));
- bat->bat_hdr.tsb_index = 0;
- bat->bat_hdr.tsb_id.bat_id = ++console_session.ses_id_cookie;
-
- bat->bat_ntest = 0;
- bat->bat_state = LST_BATCH_IDLE;
-
- INIT_LIST_HEAD(&bat->bat_cli_list);
- INIT_LIST_HEAD(&bat->bat_srv_list);
- INIT_LIST_HEAD(&bat->bat_test_list);
- INIT_LIST_HEAD(&bat->bat_trans_list);
-
- for (i = 0; i < LST_NODE_HASHSIZE; i++) {
- INIT_LIST_HEAD(&bat->bat_cli_hash[i]);
- INIT_LIST_HEAD(&bat->bat_srv_hash[i]);
- }
-
- list_add_tail(&bat->bat_link, &console_session.ses_bat_list);
-
- return rc;
-}
-
-int
-lstcon_batch_list(int index, int len, char __user *name_up)
-{
- struct lstcon_batch *bat;
-
- LASSERT(name_up);
- LASSERT(index >= 0);
-
- list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) {
- if (!index--) {
- return copy_to_user(name_up, bat->bat_name, len) ?
- -EFAULT : 0;
- }
- }
-
- return -ENOENT;
-}
-
-int
-lstcon_batch_info(char *name, struct lstcon_test_batch_ent __user *ent_up,
- int server, int testidx, int *index_p, int *ndent_p,
- struct lstcon_node_ent __user *dents_up)
-{
- struct lstcon_test_batch_ent *entp;
- struct list_head *clilst;
- struct list_head *srvlst;
- struct lstcon_test *test = NULL;
- struct lstcon_batch *bat;
- struct lstcon_ndlink *ndl;
- int rc;
-
- rc = lstcon_batch_find(name, &bat);
- if (rc) {
- CDEBUG(D_NET, "Can't find batch %s\n", name);
- return -ENOENT;
- }
-
- if (testidx > 0) {
- /* query test, test index start from 1 */
- list_for_each_entry(test, &bat->bat_test_list, tes_link) {
- if (testidx-- == 1)
- break;
- }
-
- if (testidx > 0) {
- CDEBUG(D_NET, "Can't find specified test in batch\n");
- return -ENOENT;
- }
- }
-
- clilst = !test ? &bat->bat_cli_list :
- &test->tes_src_grp->grp_ndl_list;
- srvlst = !test ? &bat->bat_srv_list :
- &test->tes_dst_grp->grp_ndl_list;
-
- if (dents_up) {
- rc = lstcon_nodes_getent((server ? srvlst : clilst),
- index_p, ndent_p, dents_up);
- return rc;
- }
-
- /* non-verbose query */
- entp = kzalloc(sizeof(struct lstcon_test_batch_ent), GFP_NOFS);
- if (!entp)
- return -ENOMEM;
-
- if (!test) {
- entp->u.tbe_batch.bae_ntest = bat->bat_ntest;
- entp->u.tbe_batch.bae_state = bat->bat_state;
- } else {
- entp->u.tbe_test.tse_type = test->tes_type;
- entp->u.tbe_test.tse_loop = test->tes_loop;
- entp->u.tbe_test.tse_concur = test->tes_concur;
- }
-
- list_for_each_entry(ndl, clilst, ndl_link)
- LST_NODE_STATE_COUNTER(ndl->ndl_node, &entp->tbe_cli_nle);
-
- list_for_each_entry(ndl, srvlst, ndl_link)
- LST_NODE_STATE_COUNTER(ndl->ndl_node, &entp->tbe_srv_nle);
-
- rc = copy_to_user(ent_up, entp,
- sizeof(struct lstcon_test_batch_ent)) ? -EFAULT : 0;
-
- kfree(entp);
-
- return rc;
-}
-
-static int
-lstcon_batrpc_condition(int transop, struct lstcon_node *nd, void *arg)
-{
- switch (transop) {
- case LST_TRANS_TSBRUN:
- if (nd->nd_state != LST_NODE_ACTIVE)
- return -ENETDOWN;
- break;
-
- case LST_TRANS_TSBSTOP:
- if (nd->nd_state != LST_NODE_ACTIVE)
- return 0;
- break;
-
- case LST_TRANS_TSBCLIQRY:
- case LST_TRANS_TSBSRVQRY:
- break;
- }
-
- return 1;
-}
-
-static int
-lstcon_batch_op(struct lstcon_batch *bat, int transop,
- struct list_head __user *result_up)
-{
- struct lstcon_rpc_trans *trans;
- int rc;
-
- rc = lstcon_rpc_trans_ndlist(&bat->bat_cli_list,
- &bat->bat_trans_list, transop,
- bat, lstcon_batrpc_condition, &trans);
- if (rc) {
- CERROR("Can't create transaction: %d\n", rc);
- return rc;
- }
-
- lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
- rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL);
-
- lstcon_rpc_trans_destroy(trans);
-
- return rc;
-}
-
-int
-lstcon_batch_run(char *name, int timeout, struct list_head __user *result_up)
-{
- struct lstcon_batch *bat;
- int rc;
-
- if (lstcon_batch_find(name, &bat)) {
- CDEBUG(D_NET, "Can't find batch %s\n", name);
- return -ENOENT;
- }
-
- bat->bat_arg = timeout;
-
- rc = lstcon_batch_op(bat, LST_TRANS_TSBRUN, result_up);
-
- /* mark batch as running if it's started in any node */
- if (lstcon_tsbop_stat_success(lstcon_trans_stat(), 0))
- bat->bat_state = LST_BATCH_RUNNING;
-
- return rc;
-}
-
-int
-lstcon_batch_stop(char *name, int force, struct list_head __user *result_up)
-{
- struct lstcon_batch *bat;
- int rc;
-
- if (lstcon_batch_find(name, &bat)) {
- CDEBUG(D_NET, "Can't find batch %s\n", name);
- return -ENOENT;
- }
-
- bat->bat_arg = force;
-
- rc = lstcon_batch_op(bat, LST_TRANS_TSBSTOP, result_up);
-
- /* mark batch as stopped if all RPCs finished */
- if (!lstcon_tsbop_stat_failure(lstcon_trans_stat(), 0))
- bat->bat_state = LST_BATCH_IDLE;
-
- return rc;
-}
-
-static void
-lstcon_batch_destroy(struct lstcon_batch *bat)
-{
- struct lstcon_ndlink *ndl;
- struct lstcon_test *test;
- int i;
-
- list_del(&bat->bat_link);
-
- while (!list_empty(&bat->bat_test_list)) {
- test = list_entry(bat->bat_test_list.next,
- struct lstcon_test, tes_link);
- LASSERT(list_empty(&test->tes_trans_list));
-
- list_del(&test->tes_link);
-
- lstcon_group_decref(test->tes_src_grp);
- lstcon_group_decref(test->tes_dst_grp);
-
- kfree(test);
- }
-
- LASSERT(list_empty(&bat->bat_trans_list));
-
- while (!list_empty(&bat->bat_cli_list)) {
- ndl = list_entry(bat->bat_cli_list.next,
- struct lstcon_ndlink, ndl_link);
- list_del_init(&ndl->ndl_link);
-
- lstcon_ndlink_release(ndl);
- }
-
- while (!list_empty(&bat->bat_srv_list)) {
- ndl = list_entry(bat->bat_srv_list.next,
- struct lstcon_ndlink, ndl_link);
- list_del_init(&ndl->ndl_link);
-
- lstcon_ndlink_release(ndl);
- }
-
- for (i = 0; i < LST_NODE_HASHSIZE; i++) {
- LASSERT(list_empty(&bat->bat_cli_hash[i]));
- LASSERT(list_empty(&bat->bat_srv_hash[i]));
- }
-
- kfree(bat->bat_cli_hash);
- kfree(bat->bat_srv_hash);
- kfree(bat);
-}
-
-static int
-lstcon_testrpc_condition(int transop, struct lstcon_node *nd, void *arg)
-{
- struct lstcon_test *test;
- struct lstcon_batch *batch;
- struct lstcon_ndlink *ndl;
- struct list_head *hash;
- struct list_head *head;
-
- test = (struct lstcon_test *)arg;
- LASSERT(test);
-
- batch = test->tes_batch;
- LASSERT(batch);
-
- if (test->tes_oneside &&
- transop == LST_TRANS_TSBSRVADD)
- return 0;
-
- if (nd->nd_state != LST_NODE_ACTIVE)
- return -ENETDOWN;
-
- if (transop == LST_TRANS_TSBCLIADD) {
- hash = batch->bat_cli_hash;
- head = &batch->bat_cli_list;
-
- } else {
- LASSERT(transop == LST_TRANS_TSBSRVADD);
-
- hash = batch->bat_srv_hash;
- head = &batch->bat_srv_list;
- }
-
- LASSERT(nd->nd_id.nid != LNET_NID_ANY);
-
- if (lstcon_ndlink_find(hash, nd->nd_id, &ndl, 1))
- return -ENOMEM;
-
- if (list_empty(&ndl->ndl_link))
- list_add_tail(&ndl->ndl_link, head);
-
- return 1;
-}
-
-static int
-lstcon_test_nodes_add(struct lstcon_test *test,
- struct list_head __user *result_up)
-{
- struct lstcon_rpc_trans *trans;
- struct lstcon_group *grp;
- int transop;
- int rc;
-
- LASSERT(test->tes_src_grp);
- LASSERT(test->tes_dst_grp);
-
- transop = LST_TRANS_TSBSRVADD;
- grp = test->tes_dst_grp;
-again:
- rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list,
- &test->tes_trans_list, transop,
- test, lstcon_testrpc_condition, &trans);
- if (rc) {
- CERROR("Can't create transaction: %d\n", rc);
- return rc;
- }
-
- lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
- if (lstcon_trans_stat()->trs_rpc_errno ||
- lstcon_trans_stat()->trs_fwk_errno) {
- lstcon_rpc_trans_interpreter(trans, result_up, NULL);
-
- lstcon_rpc_trans_destroy(trans);
- /* return if any error */
- CDEBUG(D_NET, "Failed to add test %s, RPC error %d, framework error %d\n",
- transop == LST_TRANS_TSBCLIADD ? "client" : "server",
- lstcon_trans_stat()->trs_rpc_errno,
- lstcon_trans_stat()->trs_fwk_errno);
-
- return rc;
- }
-
- lstcon_rpc_trans_destroy(trans);
-
- if (transop == LST_TRANS_TSBCLIADD)
- return rc;
-
- transop = LST_TRANS_TSBCLIADD;
- grp = test->tes_src_grp;
- test->tes_cliidx = 0;
-
- /* requests to test clients */
- goto again;
-}
-
-static int
-lstcon_verify_batch(const char *name, struct lstcon_batch **batch)
-{
- int rc;
-
- rc = lstcon_batch_find(name, batch);
- if (rc) {
- CDEBUG(D_NET, "Can't find batch %s\n", name);
- return rc;
- }
-
- if ((*batch)->bat_state != LST_BATCH_IDLE) {
- CDEBUG(D_NET, "Can't change running batch %s\n", name);
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int
-lstcon_verify_group(const char *name, struct lstcon_group **grp)
-{
- int rc;
- struct lstcon_ndlink *ndl;
-
- rc = lstcon_group_find(name, grp);
- if (rc) {
- CDEBUG(D_NET, "can't find group %s\n", name);
- return rc;
- }
-
- list_for_each_entry(ndl, &(*grp)->grp_ndl_list, ndl_link) {
- if (ndl->ndl_node->nd_state == LST_NODE_ACTIVE)
- return 0;
- }
-
- CDEBUG(D_NET, "Group %s has no ACTIVE nodes\n", name);
-
- return -EINVAL;
-}
-
-int
-lstcon_test_add(char *batch_name, int type, int loop,
- int concur, int dist, int span,
- char *src_name, char *dst_name,
- void *param, int paramlen, int *retp,
- struct list_head __user *result_up)
-{
- struct lstcon_test *test = NULL;
- int rc;
- struct lstcon_group *src_grp = NULL;
- struct lstcon_group *dst_grp = NULL;
- struct lstcon_batch *batch = NULL;
-
- /*
- * verify that a batch of the given name exists, and the groups
- * that will be part of the batch exist and have at least one
- * active node
- */
- rc = lstcon_verify_batch(batch_name, &batch);
- if (rc)
- goto out;
-
- rc = lstcon_verify_group(src_name, &src_grp);
- if (rc)
- goto out;
-
- rc = lstcon_verify_group(dst_name, &dst_grp);
- if (rc)
- goto out;
-
- if (dst_grp->grp_userland)
- *retp = 1;
-
- test = kzalloc(offsetof(struct lstcon_test, tes_param[paramlen]),
- GFP_KERNEL);
- if (!test) {
- CERROR("Can't allocate test descriptor\n");
- rc = -ENOMEM;
-
- goto out;
- }
-
- test->tes_hdr.tsb_id = batch->bat_hdr.tsb_id;
- test->tes_batch = batch;
- test->tes_type = type;
- test->tes_oneside = 0; /* TODO */
- test->tes_loop = loop;
- test->tes_concur = concur;
- test->tes_stop_onerr = 1; /* TODO */
- test->tes_span = span;
- test->tes_dist = dist;
- test->tes_cliidx = 0; /* just used for creating RPC */
- test->tes_src_grp = src_grp;
- test->tes_dst_grp = dst_grp;
- INIT_LIST_HEAD(&test->tes_trans_list);
-
- if (param) {
- test->tes_paramlen = paramlen;
- memcpy(&test->tes_param[0], param, paramlen);
- }
-
- rc = lstcon_test_nodes_add(test, result_up);
-
- if (rc)
- goto out;
-
- if (lstcon_trans_stat()->trs_rpc_errno ||
- lstcon_trans_stat()->trs_fwk_errno)
- CDEBUG(D_NET, "Failed to add test %d to batch %s\n", type,
- batch_name);
-
- /* add to test list anyway, so user can check what's going on */
- list_add_tail(&test->tes_link, &batch->bat_test_list);
-
- batch->bat_ntest++;
- test->tes_hdr.tsb_index = batch->bat_ntest;
-
- /* hold groups so nobody can change them */
- return rc;
-out:
- kfree(test);
-
- if (dst_grp)
- lstcon_group_decref(dst_grp);
-
- if (src_grp)
- lstcon_group_decref(src_grp);
-
- return rc;
-}
-
-static int
-lstcon_test_find(struct lstcon_batch *batch, int idx,
- struct lstcon_test **testpp)
-{
- struct lstcon_test *test;
-
- list_for_each_entry(test, &batch->bat_test_list, tes_link) {
- if (idx == test->tes_hdr.tsb_index) {
- *testpp = test;
- return 0;
- }
- }
-
- return -ENOENT;
-}
-
-static int
-lstcon_tsbrpc_readent(int transop, struct srpc_msg *msg,
- struct lstcon_rpc_ent __user *ent_up)
-{
- struct srpc_batch_reply *rep = &msg->msg_body.bat_reply;
-
- LASSERT(transop == LST_TRANS_TSBCLIQRY ||
- transop == LST_TRANS_TSBSRVQRY);
-
- /* positive errno, framework error code */
- if (copy_to_user(&ent_up->rpe_priv[0], &rep->bar_active,
- sizeof(rep->bar_active)))
- return -EFAULT;
-
- return 0;
-}
-
-int
-lstcon_test_batch_query(char *name, int testidx, int client,
- int timeout, struct list_head __user *result_up)
-{
- struct lstcon_rpc_trans *trans;
- struct list_head *translist;
- struct list_head *ndlist;
- struct lstcon_tsb_hdr *hdr;
- struct lstcon_batch *batch;
- struct lstcon_test *test = NULL;
- int transop;
- int rc;
-
- rc = lstcon_batch_find(name, &batch);
- if (rc) {
- CDEBUG(D_NET, "Can't find batch: %s\n", name);
- return rc;
- }
-
- if (!testidx) {
- translist = &batch->bat_trans_list;
- ndlist = &batch->bat_cli_list;
- hdr = &batch->bat_hdr;
- } else {
- /* query specified test only */
- rc = lstcon_test_find(batch, testidx, &test);
- if (rc) {
- CDEBUG(D_NET, "Can't find test: %d\n", testidx);
- return rc;
- }
-
- translist = &test->tes_trans_list;
- ndlist = &test->tes_src_grp->grp_ndl_list;
- hdr = &test->tes_hdr;
- }
-
- transop = client ? LST_TRANS_TSBCLIQRY : LST_TRANS_TSBSRVQRY;
-
- rc = lstcon_rpc_trans_ndlist(ndlist, translist, transop, hdr,
- lstcon_batrpc_condition, &trans);
- if (rc) {
- CERROR("Can't create transaction: %d\n", rc);
- return rc;
- }
-
- lstcon_rpc_trans_postwait(trans, timeout);
-
- /* query a batch, not a test */
- if (!testidx &&
- !lstcon_rpc_stat_failure(lstcon_trans_stat(), 0) &&
- !lstcon_tsbqry_stat_run(lstcon_trans_stat(), 0)) {
- /* all RPCs finished, and no active test */
- batch->bat_state = LST_BATCH_IDLE;
- }
-
- rc = lstcon_rpc_trans_interpreter(trans, result_up,
- lstcon_tsbrpc_readent);
- lstcon_rpc_trans_destroy(trans);
-
- return rc;
-}
-
-static int
-lstcon_statrpc_readent(int transop, struct srpc_msg *msg,
- struct lstcon_rpc_ent __user *ent_up)
-{
- struct srpc_stat_reply *rep = &msg->msg_body.stat_reply;
- struct sfw_counters __user *sfwk_stat;
- struct srpc_counters __user *srpc_stat;
- struct lnet_counters __user *lnet_stat;
-
- if (rep->str_status)
- return 0;
-
- sfwk_stat = (struct sfw_counters __user *)&ent_up->rpe_payload[0];
- srpc_stat = (struct srpc_counters __user *)(sfwk_stat + 1);
- lnet_stat = (struct lnet_counters __user *)(srpc_stat + 1);
-
- if (copy_to_user(sfwk_stat, &rep->str_fw, sizeof(*sfwk_stat)) ||
- copy_to_user(srpc_stat, &rep->str_rpc, sizeof(*srpc_stat)) ||
- copy_to_user(lnet_stat, &rep->str_lnet, sizeof(*lnet_stat)))
- return -EFAULT;
-
- return 0;
-}
-
-static int
-lstcon_ndlist_stat(struct list_head *ndlist,
- int timeout, struct list_head __user *result_up)
-{
- struct list_head head;
- struct lstcon_rpc_trans *trans;
- int rc;
-
- INIT_LIST_HEAD(&head);
-
- rc = lstcon_rpc_trans_ndlist(ndlist, &head,
- LST_TRANS_STATQRY, NULL, NULL, &trans);
- if (rc) {
- CERROR("Can't create transaction: %d\n", rc);
- return rc;
- }
-
- lstcon_rpc_trans_postwait(trans, LST_VALIDATE_TIMEOUT(timeout));
-
- rc = lstcon_rpc_trans_interpreter(trans, result_up,
- lstcon_statrpc_readent);
- lstcon_rpc_trans_destroy(trans);
-
- return rc;
-}
-
-int
-lstcon_group_stat(char *grp_name, int timeout,
- struct list_head __user *result_up)
-{
- struct lstcon_group *grp;
- int rc;
-
- rc = lstcon_group_find(grp_name, &grp);
- if (rc) {
- CDEBUG(D_NET, "Can't find group %s\n", grp_name);
- return rc;
- }
-
- rc = lstcon_ndlist_stat(&grp->grp_ndl_list, timeout, result_up);
-
- lstcon_group_decref(grp);
-
- return rc;
-}
-
-int
-lstcon_nodes_stat(int count, struct lnet_process_id __user *ids_up,
- int timeout, struct list_head __user *result_up)
-{
- struct lstcon_ndlink *ndl;
- struct lstcon_group *tmp;
- struct lnet_process_id id;
- int i;
- int rc;
-
- rc = lstcon_group_alloc(NULL, &tmp);
- if (rc) {
- CERROR("Out of memory\n");
- return -ENOMEM;
- }
-
- for (i = 0 ; i < count; i++) {
- if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
- rc = -EFAULT;
- break;
- }
-
- /* add to tmp group */
- rc = lstcon_group_ndlink_find(tmp, id, &ndl, 2);
- if (rc) {
- CDEBUG((rc == -ENOMEM) ? D_ERROR : D_NET,
- "Failed to find or create %s: %d\n",
- libcfs_id2str(id), rc);
- break;
- }
- }
-
- if (rc) {
- lstcon_group_decref(tmp);
- return rc;
- }
-
- rc = lstcon_ndlist_stat(&tmp->grp_ndl_list, timeout, result_up);
-
- lstcon_group_decref(tmp);
-
- return rc;
-}
-
-static int
-lstcon_debug_ndlist(struct list_head *ndlist,
- struct list_head *translist,
- int timeout, struct list_head __user *result_up)
-{
- struct lstcon_rpc_trans *trans;
- int rc;
-
- rc = lstcon_rpc_trans_ndlist(ndlist, translist, LST_TRANS_SESQRY,
- NULL, lstcon_sesrpc_condition, &trans);
- if (rc) {
- CERROR("Can't create transaction: %d\n", rc);
- return rc;
- }
-
- lstcon_rpc_trans_postwait(trans, LST_VALIDATE_TIMEOUT(timeout));
-
- rc = lstcon_rpc_trans_interpreter(trans, result_up,
- lstcon_sesrpc_readent);
- lstcon_rpc_trans_destroy(trans);
-
- return rc;
-}
-
-int
-lstcon_session_debug(int timeout, struct list_head __user *result_up)
-{
- return lstcon_debug_ndlist(&console_session.ses_ndl_list,
- NULL, timeout, result_up);
-}
-
-int
-lstcon_batch_debug(int timeout, char *name,
- int client, struct list_head __user *result_up)
-{
- struct lstcon_batch *bat;
- int rc;
-
- rc = lstcon_batch_find(name, &bat);
- if (rc)
- return -ENOENT;
-
- rc = lstcon_debug_ndlist(client ? &bat->bat_cli_list :
- &bat->bat_srv_list,
- NULL, timeout, result_up);
-
- return rc;
-}
-
-int
-lstcon_group_debug(int timeout, char *name,
- struct list_head __user *result_up)
-{
- struct lstcon_group *grp;
- int rc;
-
- rc = lstcon_group_find(name, &grp);
- if (rc)
- return -ENOENT;
-
- rc = lstcon_debug_ndlist(&grp->grp_ndl_list, NULL,
- timeout, result_up);
- lstcon_group_decref(grp);
-
- return rc;
-}
-
-int
-lstcon_nodes_debug(int timeout, int count,
- struct lnet_process_id __user *ids_up,
- struct list_head __user *result_up)
-{
- struct lnet_process_id id;
- struct lstcon_ndlink *ndl;
- struct lstcon_group *grp;
- int i;
- int rc;
-
- rc = lstcon_group_alloc(NULL, &grp);
- if (rc) {
- CDEBUG(D_NET, "Out of memory\n");
- return rc;
- }
-
- for (i = 0; i < count; i++) {
- if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
- rc = -EFAULT;
- break;
- }
-
- /* node is added to tmp group */
- rc = lstcon_group_ndlink_find(grp, id, &ndl, 1);
- if (rc) {
- CERROR("Can't create node link\n");
- break;
- }
- }
-
- if (rc) {
- lstcon_group_decref(grp);
- return rc;
- }
-
- rc = lstcon_debug_ndlist(&grp->grp_ndl_list, NULL,
- timeout, result_up);
-
- lstcon_group_decref(grp);
-
- return rc;
-}
-
-int
-lstcon_session_match(struct lst_sid sid)
-{
- return (console_session.ses_id.ses_nid == sid.ses_nid &&
- console_session.ses_id.ses_stamp == sid.ses_stamp) ? 1 : 0;
-}
-
-static void
-lstcon_new_session_id(struct lst_sid *sid)
-{
- struct lnet_process_id id;
-
- LASSERT(console_session.ses_state == LST_SESSION_NONE);
-
- LNetGetId(1, &id);
- sid->ses_nid = id.nid;
- sid->ses_stamp = jiffies;
-}
-
-int
-lstcon_session_new(char *name, int key, unsigned int feats,
- int timeout, int force, struct lst_sid __user *sid_up)
-{
- int rc = 0;
- int i;
-
- if (console_session.ses_state != LST_SESSION_NONE) {
- /* session exists */
- if (!force) {
- CNETERR("Session %s already exists\n",
- console_session.ses_name);
- return -EEXIST;
- }
-
- rc = lstcon_session_end();
-
- /* lstcon_session_end() only return local error */
- if (rc)
- return rc;
- }
-
- if (feats & ~LST_FEATS_MASK) {
- CNETERR("Unknown session features %x\n",
- (feats & ~LST_FEATS_MASK));
- return -EINVAL;
- }
-
- for (i = 0; i < LST_GLOBAL_HASHSIZE; i++)
- LASSERT(list_empty(&console_session.ses_ndl_hash[i]));
-
- lstcon_new_session_id(&console_session.ses_id);
-
- console_session.ses_key = key;
- console_session.ses_state = LST_SESSION_ACTIVE;
- console_session.ses_force = !!force;
- console_session.ses_features = feats;
- console_session.ses_feats_updated = 0;
- console_session.ses_timeout = (timeout <= 0) ?
- LST_CONSOLE_TIMEOUT : timeout;
-
- if (strlen(name) > sizeof(console_session.ses_name) - 1)
- return -E2BIG;
- strlcpy(console_session.ses_name, name,
- sizeof(console_session.ses_name));
-
- rc = lstcon_batch_add(LST_DEFAULT_BATCH);
- if (rc)
- return rc;
-
- rc = lstcon_rpc_pinger_start();
- if (rc) {
- struct lstcon_batch *bat = NULL;
-
- lstcon_batch_find(LST_DEFAULT_BATCH, &bat);
- lstcon_batch_destroy(bat);
-
- return rc;
- }
-
- if (!copy_to_user(sid_up, &console_session.ses_id,
- sizeof(struct lst_sid)))
- return rc;
-
- lstcon_session_end();
-
- return -EFAULT;
-}
-
-int
-lstcon_session_info(struct lst_sid __user *sid_up, int __user *key_up,
- unsigned __user *featp,
- struct lstcon_ndlist_ent __user *ndinfo_up,
- char __user *name_up, int len)
-{
- struct lstcon_ndlist_ent *entp;
- struct lstcon_ndlink *ndl;
- int rc = 0;
-
- if (console_session.ses_state != LST_SESSION_ACTIVE)
- return -ESRCH;
-
- entp = kzalloc(sizeof(*entp), GFP_NOFS);
- if (!entp)
- return -ENOMEM;
-
- list_for_each_entry(ndl, &console_session.ses_ndl_list, ndl_link)
- LST_NODE_STATE_COUNTER(ndl->ndl_node, entp);
-
- if (copy_to_user(sid_up, &console_session.ses_id,
- sizeof(*sid_up)) ||
- copy_to_user(key_up, &console_session.ses_key,
- sizeof(*key_up)) ||
- copy_to_user(featp, &console_session.ses_features,
- sizeof(*featp)) ||
- copy_to_user(ndinfo_up, entp, sizeof(*entp)) ||
- copy_to_user(name_up, console_session.ses_name, len))
- rc = -EFAULT;
-
- kfree(entp);
-
- return rc;
-}
-
-int
-lstcon_session_end(void)
-{
- struct lstcon_rpc_trans *trans;
- struct lstcon_group *grp;
- struct lstcon_batch *bat;
- int rc = 0;
-
- LASSERT(console_session.ses_state == LST_SESSION_ACTIVE);
-
- rc = lstcon_rpc_trans_ndlist(&console_session.ses_ndl_list,
- NULL, LST_TRANS_SESEND, NULL,
- lstcon_sesrpc_condition, &trans);
- if (rc) {
- CERROR("Can't create transaction: %d\n", rc);
- return rc;
- }
-
- console_session.ses_shutdown = 1;
-
- lstcon_rpc_pinger_stop();
-
- lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
-
- lstcon_rpc_trans_destroy(trans);
- /* User can do nothing even rpc failed, so go on */
-
- /* waiting for orphan rpcs to die */
- lstcon_rpc_cleanup_wait();
-
- console_session.ses_id = LST_INVALID_SID;
- console_session.ses_state = LST_SESSION_NONE;
- console_session.ses_key = 0;
- console_session.ses_force = 0;
- console_session.ses_feats_updated = 0;
-
- /* destroy all batches */
- while (!list_empty(&console_session.ses_bat_list)) {
- bat = list_entry(console_session.ses_bat_list.next,
- struct lstcon_batch, bat_link);
-
- lstcon_batch_destroy(bat);
- }
-
- /* destroy all groups */
- while (!list_empty(&console_session.ses_grp_list)) {
- grp = list_entry(console_session.ses_grp_list.next,
- struct lstcon_group, grp_link);
- LASSERT(grp->grp_ref == 1);
-
- lstcon_group_decref(grp);
- }
-
- /* all nodes should be released */
- LASSERT(list_empty(&console_session.ses_ndl_list));
-
- console_session.ses_shutdown = 0;
- console_session.ses_expired = 0;
-
- return rc;
-}
-
-int
-lstcon_session_feats_check(unsigned int feats)
-{
- int rc = 0;
-
- if (feats & ~LST_FEATS_MASK) {
- CERROR("Can't support these features: %x\n",
- (feats & ~LST_FEATS_MASK));
- return -EPROTO;
- }
-
- spin_lock(&console_session.ses_rpc_lock);
-
- if (!console_session.ses_feats_updated) {
- console_session.ses_feats_updated = 1;
- console_session.ses_features = feats;
- }
-
- if (console_session.ses_features != feats)
- rc = -EPROTO;
-
- spin_unlock(&console_session.ses_rpc_lock);
-
- if (rc) {
- CERROR("remote features %x do not match with session features %x of console\n",
- feats, console_session.ses_features);
- }
-
- return rc;
-}
-
-static int
-lstcon_acceptor_handle(struct srpc_server_rpc *rpc)
-{
- struct srpc_msg *rep = &rpc->srpc_replymsg;
- struct srpc_msg *req = &rpc->srpc_reqstbuf->buf_msg;
- struct srpc_join_reqst *jreq = &req->msg_body.join_reqst;
- struct srpc_join_reply *jrep = &rep->msg_body.join_reply;
- struct lstcon_group *grp = NULL;
- struct lstcon_ndlink *ndl;
- int rc = 0;
-
- sfw_unpack_message(req);
-
- mutex_lock(&console_session.ses_mutex);
-
- jrep->join_sid = console_session.ses_id;
-
- if (console_session.ses_id.ses_nid == LNET_NID_ANY) {
- jrep->join_status = ESRCH;
- goto out;
- }
-
- if (lstcon_session_feats_check(req->msg_ses_feats)) {
- jrep->join_status = EPROTO;
- goto out;
- }
-
- if (jreq->join_sid.ses_nid != LNET_NID_ANY &&
- !lstcon_session_match(jreq->join_sid)) {
- jrep->join_status = EBUSY;
- goto out;
- }
-
- if (lstcon_group_find(jreq->join_group, &grp)) {
- rc = lstcon_group_alloc(jreq->join_group, &grp);
- if (rc) {
- CERROR("Out of memory\n");
- goto out;
- }
-
- list_add_tail(&grp->grp_link,
- &console_session.ses_grp_list);
- lstcon_group_addref(grp);
- }
-
- if (grp->grp_ref > 2) {
- /* Group in using */
- jrep->join_status = EBUSY;
- goto out;
- }
-
- rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 0);
- if (!rc) {
- jrep->join_status = EEXIST;
- goto out;
- }
-
- rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 1);
- if (rc) {
- CERROR("Out of memory\n");
- goto out;
- }
-
- ndl->ndl_node->nd_state = LST_NODE_ACTIVE;
- ndl->ndl_node->nd_timeout = console_session.ses_timeout;
-
- if (!grp->grp_userland)
- grp->grp_userland = 1;
-
- strlcpy(jrep->join_session, console_session.ses_name,
- sizeof(jrep->join_session));
- jrep->join_timeout = console_session.ses_timeout;
- jrep->join_status = 0;
-
-out:
- rep->msg_ses_feats = console_session.ses_features;
- if (grp)
- lstcon_group_decref(grp);
-
- mutex_unlock(&console_session.ses_mutex);
-
- return rc;
-}
-
-static struct srpc_service lstcon_acceptor_service;
-
-static void lstcon_init_acceptor_service(void)
-{
- /* initialize selftest console acceptor service table */
- lstcon_acceptor_service.sv_name = "join session";
- lstcon_acceptor_service.sv_handler = lstcon_acceptor_handle;
- lstcon_acceptor_service.sv_id = SRPC_SERVICE_JOIN;
- lstcon_acceptor_service.sv_wi_total = SFW_FRWK_WI_MAX;
-}
-
-static struct notifier_block lstcon_ioctl_handler = {
- .notifier_call = lstcon_ioctl_entry,
-};
-
-/* initialize console */
-int
-lstcon_console_init(void)
-{
- int i;
- int rc;
-
- memset(&console_session, 0, sizeof(struct lstcon_session));
-
- console_session.ses_id = LST_INVALID_SID;
- console_session.ses_state = LST_SESSION_NONE;
- console_session.ses_timeout = 0;
- console_session.ses_force = 0;
- console_session.ses_expired = 0;
- console_session.ses_feats_updated = 0;
- console_session.ses_features = LST_FEATS_MASK;
- console_session.ses_laststamp = ktime_get_real_seconds();
-
- mutex_init(&console_session.ses_mutex);
-
- INIT_LIST_HEAD(&console_session.ses_ndl_list);
- INIT_LIST_HEAD(&console_session.ses_grp_list);
- INIT_LIST_HEAD(&console_session.ses_bat_list);
- INIT_LIST_HEAD(&console_session.ses_trans_list);
-
- console_session.ses_ndl_hash =
- kmalloc(sizeof(struct list_head) * LST_GLOBAL_HASHSIZE, GFP_KERNEL);
- if (!console_session.ses_ndl_hash)
- return -ENOMEM;
-
- for (i = 0; i < LST_GLOBAL_HASHSIZE; i++)
- INIT_LIST_HEAD(&console_session.ses_ndl_hash[i]);
-
- /* initialize acceptor service table */
- lstcon_init_acceptor_service();
-
- rc = srpc_add_service(&lstcon_acceptor_service);
- LASSERT(rc != -EBUSY);
- if (rc) {
- kfree(console_session.ses_ndl_hash);
- return rc;
- }
-
- rc = srpc_service_add_buffers(&lstcon_acceptor_service,
- lstcon_acceptor_service.sv_wi_total);
- if (rc) {
- rc = -ENOMEM;
- goto out;
- }
-
- rc = blocking_notifier_chain_register(&libcfs_ioctl_list,
- &lstcon_ioctl_handler);
-
- if (!rc) {
- lstcon_rpc_module_init();
- return 0;
- }
-
-out:
- srpc_shutdown_service(&lstcon_acceptor_service);
- srpc_remove_service(&lstcon_acceptor_service);
-
- kfree(console_session.ses_ndl_hash);
-
- srpc_wait_service_shutdown(&lstcon_acceptor_service);
-
- return rc;
-}
-
-int
-lstcon_console_fini(void)
-{
- int i;
-
- blocking_notifier_chain_unregister(&libcfs_ioctl_list,
- &lstcon_ioctl_handler);
-
- mutex_lock(&console_session.ses_mutex);
-
- srpc_shutdown_service(&lstcon_acceptor_service);
- srpc_remove_service(&lstcon_acceptor_service);
-
- if (console_session.ses_state != LST_SESSION_NONE)
- lstcon_session_end();
-
- lstcon_rpc_module_fini();
-
- mutex_unlock(&console_session.ses_mutex);
-
- LASSERT(list_empty(&console_session.ses_ndl_list));
- LASSERT(list_empty(&console_session.ses_grp_list));
- LASSERT(list_empty(&console_session.ses_bat_list));
- LASSERT(list_empty(&console_session.ses_trans_list));
-
- for (i = 0; i < LST_NODE_HASHSIZE; i++)
- LASSERT(list_empty(&console_session.ses_ndl_hash[i]));
-
- kfree(console_session.ses_ndl_hash);
-
- srpc_wait_service_shutdown(&lstcon_acceptor_service);
-
- return 0;
-}
diff --git a/drivers/staging/lustre/lnet/selftest/console.h b/drivers/staging/lustre/lnet/selftest/console.h
deleted file mode 100644
index 2826205e36a1..000000000000
--- a/drivers/staging/lustre/lnet/selftest/console.h
+++ /dev/null
@@ -1,244 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/console.h
- *
- * kernel structure for LST console
- *
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- */
-
-#ifndef __LST_CONSOLE_H__
-#define __LST_CONSOLE_H__
-
-#include <linux/lnet/lib-types.h>
-#include <uapi/linux/lnet/lnetst.h>
-#include "selftest.h"
-#include "conrpc.h"
-
-/* node descriptor */
-struct lstcon_node {
- struct lnet_process_id nd_id; /* id of the node */
- int nd_ref; /* reference count */
- int nd_state; /* state of the node */
- int nd_timeout; /* session timeout */
- unsigned long nd_stamp; /* timestamp of last replied RPC */
- struct lstcon_rpc nd_ping; /* ping rpc */
-};
-
-/* node link descriptor */
-struct lstcon_ndlink {
- struct list_head ndl_link; /* chain on list */
- struct list_head ndl_hlink; /* chain on hash */
- struct lstcon_node *ndl_node; /* pointer to node */
-};
-
-/* (alias of nodes) group descriptor */
-struct lstcon_group {
- struct list_head grp_link; /* chain on global group list
- */
- int grp_ref; /* reference count */
- int grp_userland; /* has userland nodes */
- int grp_nnode; /* # of nodes */
- char grp_name[LST_NAME_SIZE]; /* group name */
-
- struct list_head grp_trans_list; /* transaction list */
- struct list_head grp_ndl_list; /* nodes list */
- struct list_head grp_ndl_hash[0]; /* hash table for nodes */
-};
-
-#define LST_BATCH_IDLE 0xB0 /* idle batch */
-#define LST_BATCH_RUNNING 0xB1 /* running batch */
-
-struct lstcon_tsb_hdr {
- struct lst_bid tsb_id; /* batch ID */
- int tsb_index; /* test index */
-};
-
-/* (tests ) batch descriptor */
-struct lstcon_batch {
- struct lstcon_tsb_hdr bat_hdr; /* test_batch header */
- struct list_head bat_link; /* chain on session's batches list */
- int bat_ntest; /* # of test */
- int bat_state; /* state of the batch */
- int bat_arg; /* parameter for run|stop, timeout
- * for run, force for stop
- */
- char bat_name[LST_NAME_SIZE];/* name of batch */
-
- struct list_head bat_test_list; /* list head of tests (struct lstcon_test)
- */
- struct list_head bat_trans_list; /* list head of transaction */
- struct list_head bat_cli_list; /* list head of client nodes
- * (struct lstcon_node)
- */
- struct list_head *bat_cli_hash; /* hash table of client nodes */
- struct list_head bat_srv_list; /* list head of server nodes */
- struct list_head *bat_srv_hash; /* hash table of server nodes */
-};
-
-/* a single test descriptor */
-struct lstcon_test {
- struct lstcon_tsb_hdr tes_hdr; /* test batch header */
- struct list_head tes_link; /* chain on batch's tests list */
- struct lstcon_batch *tes_batch; /* pointer to batch */
-
- int tes_type; /* type of the test, i.e: bulk, ping */
- int tes_stop_onerr; /* stop on error */
- int tes_oneside; /* one-sided test */
- int tes_concur; /* concurrency */
- int tes_loop; /* loop count */
- int tes_dist; /* nodes distribution of target group */
- int tes_span; /* nodes span of target group */
- int tes_cliidx; /* client index, used for RPC creating */
-
- struct list_head tes_trans_list; /* transaction list */
- struct lstcon_group *tes_src_grp; /* group run the test */
- struct lstcon_group *tes_dst_grp; /* target group */
-
- int tes_paramlen; /* test parameter length */
- char tes_param[0]; /* test parameter */
-};
-
-#define LST_GLOBAL_HASHSIZE 503 /* global nodes hash table size */
-#define LST_NODE_HASHSIZE 239 /* node hash table (for batch or group) */
-
-#define LST_SESSION_NONE 0x0 /* no session */
-#define LST_SESSION_ACTIVE 0x1 /* working session */
-
-#define LST_CONSOLE_TIMEOUT 300 /* default console timeout */
-
-struct lstcon_session {
- struct mutex ses_mutex; /* only 1 thread in session */
- struct lst_sid ses_id; /* global session id */
- int ses_key; /* local session key */
- int ses_state; /* state of session */
- int ses_timeout; /* timeout in seconds */
- time64_t ses_laststamp; /* last operation stamp (seconds)
- */
- unsigned int ses_features; /* tests features of the session
- */
- unsigned int ses_feats_updated:1; /* features are synced with
- * remote test nodes
- */
- unsigned int ses_force:1; /* force creating */
- unsigned int ses_shutdown:1; /* session is shutting down */
- unsigned int ses_expired:1; /* console is timedout */
- __u64 ses_id_cookie; /* batch id cookie */
- char ses_name[LST_NAME_SIZE];/* session name */
- struct lstcon_rpc_trans *ses_ping; /* session pinger */
- struct stt_timer ses_ping_timer; /* timer for pinger */
- struct lstcon_trans_stat ses_trans_stat; /* transaction stats */
-
- struct list_head ses_trans_list; /* global list of transaction */
- struct list_head ses_grp_list; /* global list of groups */
- struct list_head ses_bat_list; /* global list of batches */
- struct list_head ses_ndl_list; /* global list of nodes */
- struct list_head *ses_ndl_hash; /* hash table of nodes */
-
- spinlock_t ses_rpc_lock; /* serialize */
- atomic_t ses_rpc_counter; /* # of initialized RPCs */
- struct list_head ses_rpc_freelist; /* idle console rpc */
-}; /* session descriptor */
-
-extern struct lstcon_session console_session;
-
-static inline struct lstcon_trans_stat *
-lstcon_trans_stat(void)
-{
- return &console_session.ses_trans_stat;
-}
-
-static inline struct list_head *
-lstcon_id2hash(struct lnet_process_id id, struct list_head *hash)
-{
- unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE;
-
- return &hash[idx];
-}
-
-int lstcon_ioctl_entry(struct notifier_block *nb,
- unsigned long cmd, void *vdata);
-int lstcon_console_init(void);
-int lstcon_console_fini(void);
-int lstcon_session_match(struct lst_sid sid);
-int lstcon_session_new(char *name, int key, unsigned int version,
- int timeout, int flags, struct lst_sid __user *sid_up);
-int lstcon_session_info(struct lst_sid __user *sid_up, int __user *key,
- unsigned __user *verp, struct lstcon_ndlist_ent __user *entp,
- char __user *name_up, int len);
-int lstcon_session_end(void);
-int lstcon_session_debug(int timeout, struct list_head __user *result_up);
-int lstcon_session_feats_check(unsigned int feats);
-int lstcon_batch_debug(int timeout, char *name,
- int client, struct list_head __user *result_up);
-int lstcon_group_debug(int timeout, char *name,
- struct list_head __user *result_up);
-int lstcon_nodes_debug(int timeout, int nnd,
- struct lnet_process_id __user *nds_up,
- struct list_head __user *result_up);
-int lstcon_group_add(char *name);
-int lstcon_group_del(char *name);
-int lstcon_group_clean(char *name, int args);
-int lstcon_group_refresh(char *name, struct list_head __user *result_up);
-int lstcon_nodes_add(char *name, int nnd, struct lnet_process_id __user *nds_up,
- unsigned int *featp, struct list_head __user *result_up);
-int lstcon_nodes_remove(char *name, int nnd,
- struct lnet_process_id __user *nds_up,
- struct list_head __user *result_up);
-int lstcon_group_info(char *name, struct lstcon_ndlist_ent __user *gent_up,
- int *index_p, int *ndent_p,
- struct lstcon_node_ent __user *ndents_up);
-int lstcon_group_list(int idx, int len, char __user *name_up);
-int lstcon_batch_add(char *name);
-int lstcon_batch_run(char *name, int timeout,
- struct list_head __user *result_up);
-int lstcon_batch_stop(char *name, int force,
- struct list_head __user *result_up);
-int lstcon_test_batch_query(char *name, int testidx,
- int client, int timeout,
- struct list_head __user *result_up);
-int lstcon_batch_del(char *name);
-int lstcon_batch_list(int idx, int namelen, char __user *name_up);
-int lstcon_batch_info(char *name, struct lstcon_test_batch_ent __user *ent_up,
- int server, int testidx, int *index_p,
- int *ndent_p, struct lstcon_node_ent __user *dents_up);
-int lstcon_group_stat(char *grp_name, int timeout,
- struct list_head __user *result_up);
-int lstcon_nodes_stat(int count, struct lnet_process_id __user *ids_up,
- int timeout, struct list_head __user *result_up);
-int lstcon_test_add(char *batch_name, int type, int loop,
- int concur, int dist, int span,
- char *src_name, char *dst_name,
- void *param, int paramlen, int *retp,
- struct list_head __user *result_up);
-#endif
diff --git a/drivers/staging/lustre/lnet/selftest/framework.c b/drivers/staging/lustre/lnet/selftest/framework.c
deleted file mode 100644
index 741af10560ad..000000000000
--- a/drivers/staging/lustre/lnet/selftest/framework.c
+++ /dev/null
@@ -1,1786 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/framework.c
- *
- * Author: Isaac Huang <isaac@clusterfs.com>
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "selftest.h"
-
-struct lst_sid LST_INVALID_SID = {LNET_NID_ANY, -1};
-
-static int session_timeout = 100;
-module_param(session_timeout, int, 0444);
-MODULE_PARM_DESC(session_timeout, "test session timeout in seconds (100 by default, 0 == never)");
-
-static int rpc_timeout = 64;
-module_param(rpc_timeout, int, 0644);
-MODULE_PARM_DESC(rpc_timeout, "rpc timeout in seconds (64 by default, 0 == never)");
-
-#define sfw_unpack_id(id) \
-do { \
- __swab64s(&(id).nid); \
- __swab32s(&(id).pid); \
-} while (0)
-
-#define sfw_unpack_sid(sid) \
-do { \
- __swab64s(&(sid).ses_nid); \
- __swab64s(&(sid).ses_stamp); \
-} while (0)
-
-#define sfw_unpack_fw_counters(fc) \
-do { \
- __swab32s(&(fc).running_ms); \
- __swab32s(&(fc).active_batches); \
- __swab32s(&(fc).zombie_sessions); \
- __swab32s(&(fc).brw_errors); \
- __swab32s(&(fc).ping_errors); \
-} while (0)
-
-#define sfw_unpack_rpc_counters(rc) \
-do { \
- __swab32s(&(rc).errors); \
- __swab32s(&(rc).rpcs_sent); \
- __swab32s(&(rc).rpcs_rcvd); \
- __swab32s(&(rc).rpcs_dropped); \
- __swab32s(&(rc).rpcs_expired); \
- __swab64s(&(rc).bulk_get); \
- __swab64s(&(rc).bulk_put); \
-} while (0)
-
-#define sfw_unpack_lnet_counters(lc) \
-do { \
- __swab32s(&(lc).errors); \
- __swab32s(&(lc).msgs_max); \
- __swab32s(&(lc).msgs_alloc); \
- __swab32s(&(lc).send_count); \
- __swab32s(&(lc).recv_count); \
- __swab32s(&(lc).drop_count); \
- __swab32s(&(lc).route_count); \
- __swab64s(&(lc).send_length); \
- __swab64s(&(lc).recv_length); \
- __swab64s(&(lc).drop_length); \
- __swab64s(&(lc).route_length); \
-} while (0)
-
-#define sfw_test_active(t) (atomic_read(&(t)->tsi_nactive))
-#define sfw_batch_active(b) (atomic_read(&(b)->bat_nactive))
-
-static struct smoketest_framework {
- struct list_head fw_zombie_rpcs; /* RPCs to be recycled */
- struct list_head fw_zombie_sessions; /* stopping sessions */
- struct list_head fw_tests; /* registered test cases */
- atomic_t fw_nzombies; /* # zombie sessions */
- spinlock_t fw_lock; /* serialise */
- struct sfw_session *fw_session; /* _the_ session */
- int fw_shuttingdown; /* shutdown in progress */
- struct srpc_server_rpc *fw_active_srpc;/* running RPC */
-} sfw_data;
-
-/* forward ref's */
-int sfw_stop_batch(struct sfw_batch *tsb, int force);
-void sfw_destroy_session(struct sfw_session *sn);
-
-static inline struct sfw_test_case *
-sfw_find_test_case(int id)
-{
- struct sfw_test_case *tsc;
-
- LASSERT(id <= SRPC_SERVICE_MAX_ID);
- LASSERT(id > SRPC_FRAMEWORK_SERVICE_MAX_ID);
-
- list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) {
- if (tsc->tsc_srv_service->sv_id == id)
- return tsc;
- }
-
- return NULL;
-}
-
-static int
-sfw_register_test(struct srpc_service *service,
- struct sfw_test_client_ops *cliops)
-{
- struct sfw_test_case *tsc;
-
- if (sfw_find_test_case(service->sv_id)) {
- CERROR("Failed to register test %s (%d)\n",
- service->sv_name, service->sv_id);
- return -EEXIST;
- }
-
- tsc = kzalloc(sizeof(struct sfw_test_case), GFP_NOFS);
- if (!tsc)
- return -ENOMEM;
-
- tsc->tsc_cli_ops = cliops;
- tsc->tsc_srv_service = service;
-
- list_add_tail(&tsc->tsc_list, &sfw_data.fw_tests);
- return 0;
-}
-
-static void
-sfw_add_session_timer(void)
-{
- struct sfw_session *sn = sfw_data.fw_session;
- struct stt_timer *timer = &sn->sn_timer;
-
- LASSERT(!sfw_data.fw_shuttingdown);
-
- if (!sn || !sn->sn_timeout)
- return;
-
- LASSERT(!sn->sn_timer_active);
-
- sn->sn_timer_active = 1;
- timer->stt_expires = ktime_get_real_seconds() + sn->sn_timeout;
- stt_add_timer(timer);
-}
-
-static int
-sfw_del_session_timer(void)
-{
- struct sfw_session *sn = sfw_data.fw_session;
-
- if (!sn || !sn->sn_timer_active)
- return 0;
-
- LASSERT(sn->sn_timeout);
-
- if (stt_del_timer(&sn->sn_timer)) { /* timer defused */
- sn->sn_timer_active = 0;
- return 0;
- }
-
- return -EBUSY; /* racing with sfw_session_expired() */
-}
-
-static void
-sfw_deactivate_session(void)
-__must_hold(&sfw_data.fw_lock)
-{
- struct sfw_session *sn = sfw_data.fw_session;
- int nactive = 0;
- struct sfw_batch *tsb;
- struct sfw_test_case *tsc;
-
- if (!sn)
- return;
-
- LASSERT(!sn->sn_timer_active);
-
- sfw_data.fw_session = NULL;
- atomic_inc(&sfw_data.fw_nzombies);
- list_add(&sn->sn_list, &sfw_data.fw_zombie_sessions);
-
- spin_unlock(&sfw_data.fw_lock);
-
- list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) {
- srpc_abort_service(tsc->tsc_srv_service);
- }
-
- spin_lock(&sfw_data.fw_lock);
-
- list_for_each_entry(tsb, &sn->sn_batches, bat_list) {
- if (sfw_batch_active(tsb)) {
- nactive++;
- sfw_stop_batch(tsb, 1);
- }
- }
-
- if (nactive)
- return; /* wait for active batches to stop */
-
- list_del_init(&sn->sn_list);
- spin_unlock(&sfw_data.fw_lock);
-
- sfw_destroy_session(sn);
-
- spin_lock(&sfw_data.fw_lock);
-}
-
-static void
-sfw_session_expired(void *data)
-{
- struct sfw_session *sn = data;
-
- spin_lock(&sfw_data.fw_lock);
-
- LASSERT(sn->sn_timer_active);
- LASSERT(sn == sfw_data.fw_session);
-
- CWARN("Session expired! sid: %s-%llu, name: %s\n",
- libcfs_nid2str(sn->sn_id.ses_nid),
- sn->sn_id.ses_stamp, &sn->sn_name[0]);
-
- sn->sn_timer_active = 0;
- sfw_deactivate_session();
-
- spin_unlock(&sfw_data.fw_lock);
-}
-
-static inline void
-sfw_init_session(struct sfw_session *sn, struct lst_sid sid,
- unsigned int features, const char *name)
-{
- struct stt_timer *timer = &sn->sn_timer;
-
- memset(sn, 0, sizeof(struct sfw_session));
- INIT_LIST_HEAD(&sn->sn_list);
- INIT_LIST_HEAD(&sn->sn_batches);
- atomic_set(&sn->sn_refcount, 1); /* +1 for caller */
- atomic_set(&sn->sn_brw_errors, 0);
- atomic_set(&sn->sn_ping_errors, 0);
- strlcpy(&sn->sn_name[0], name, sizeof(sn->sn_name));
-
- sn->sn_timer_active = 0;
- sn->sn_id = sid;
- sn->sn_features = features;
- sn->sn_timeout = session_timeout;
- sn->sn_started = jiffies;
-
- timer->stt_data = sn;
- timer->stt_func = sfw_session_expired;
- INIT_LIST_HEAD(&timer->stt_list);
-}
-
-/* completion handler for incoming framework RPCs */
-static void
-sfw_server_rpc_done(struct srpc_server_rpc *rpc)
-{
- struct srpc_service *sv = rpc->srpc_scd->scd_svc;
- int status = rpc->srpc_status;
-
- CDEBUG(D_NET, "Incoming framework RPC done: service %s, peer %s, status %s:%d\n",
- sv->sv_name, libcfs_id2str(rpc->srpc_peer),
- swi_state2str(rpc->srpc_wi.swi_state),
- status);
-
- if (rpc->srpc_bulk)
- sfw_free_pages(rpc);
-}
-
-static void
-sfw_client_rpc_fini(struct srpc_client_rpc *rpc)
-{
- LASSERT(!rpc->crpc_bulk.bk_niov);
- LASSERT(list_empty(&rpc->crpc_list));
- LASSERT(!atomic_read(&rpc->crpc_refcount));
-
- CDEBUG(D_NET, "Outgoing framework RPC done: service %d, peer %s, status %s:%d:%d\n",
- rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
- swi_state2str(rpc->crpc_wi.swi_state),
- rpc->crpc_aborted, rpc->crpc_status);
-
- spin_lock(&sfw_data.fw_lock);
-
- /* my callers must finish all RPCs before shutting me down */
- LASSERT(!sfw_data.fw_shuttingdown);
- list_add(&rpc->crpc_list, &sfw_data.fw_zombie_rpcs);
-
- spin_unlock(&sfw_data.fw_lock);
-}
-
-static struct sfw_batch *
-sfw_find_batch(struct lst_bid bid)
-{
- struct sfw_session *sn = sfw_data.fw_session;
- struct sfw_batch *bat;
-
- LASSERT(sn);
-
- list_for_each_entry(bat, &sn->sn_batches, bat_list) {
- if (bat->bat_id.bat_id == bid.bat_id)
- return bat;
- }
-
- return NULL;
-}
-
-static struct sfw_batch *
-sfw_bid2batch(struct lst_bid bid)
-{
- struct sfw_session *sn = sfw_data.fw_session;
- struct sfw_batch *bat;
-
- LASSERT(sn);
-
- bat = sfw_find_batch(bid);
- if (bat)
- return bat;
-
- bat = kzalloc(sizeof(struct sfw_batch), GFP_NOFS);
- if (!bat)
- return NULL;
-
- bat->bat_error = 0;
- bat->bat_session = sn;
- bat->bat_id = bid;
- atomic_set(&bat->bat_nactive, 0);
- INIT_LIST_HEAD(&bat->bat_tests);
-
- list_add_tail(&bat->bat_list, &sn->sn_batches);
- return bat;
-}
-
-static int
-sfw_get_stats(struct srpc_stat_reqst *request, struct srpc_stat_reply *reply)
-{
- struct sfw_session *sn = sfw_data.fw_session;
- struct sfw_counters *cnt = &reply->str_fw;
- struct sfw_batch *bat;
-
- reply->str_sid = !sn ? LST_INVALID_SID : sn->sn_id;
-
- if (request->str_sid.ses_nid == LNET_NID_ANY) {
- reply->str_status = EINVAL;
- return 0;
- }
-
- if (!sn || !sfw_sid_equal(request->str_sid, sn->sn_id)) {
- reply->str_status = ESRCH;
- return 0;
- }
-
- lnet_counters_get(&reply->str_lnet);
- srpc_get_counters(&reply->str_rpc);
-
- /*
- * send over the msecs since the session was started
- * with 32 bits to send, this is ~49 days
- */
- cnt->running_ms = jiffies_to_msecs(jiffies - sn->sn_started);
- cnt->brw_errors = atomic_read(&sn->sn_brw_errors);
- cnt->ping_errors = atomic_read(&sn->sn_ping_errors);
- cnt->zombie_sessions = atomic_read(&sfw_data.fw_nzombies);
-
- cnt->active_batches = 0;
- list_for_each_entry(bat, &sn->sn_batches, bat_list) {
- if (atomic_read(&bat->bat_nactive) > 0)
- cnt->active_batches++;
- }
-
- reply->str_status = 0;
- return 0;
-}
-
-int
-sfw_make_session(struct srpc_mksn_reqst *request, struct srpc_mksn_reply *reply)
-{
- struct sfw_session *sn = sfw_data.fw_session;
- struct srpc_msg *msg = container_of(request, struct srpc_msg,
- msg_body.mksn_reqst);
- int cplen = 0;
-
- if (request->mksn_sid.ses_nid == LNET_NID_ANY) {
- reply->mksn_sid = !sn ? LST_INVALID_SID : sn->sn_id;
- reply->mksn_status = EINVAL;
- return 0;
- }
-
- if (sn) {
- reply->mksn_status = 0;
- reply->mksn_sid = sn->sn_id;
- reply->mksn_timeout = sn->sn_timeout;
-
- if (sfw_sid_equal(request->mksn_sid, sn->sn_id)) {
- atomic_inc(&sn->sn_refcount);
- return 0;
- }
-
- if (!request->mksn_force) {
- reply->mksn_status = EBUSY;
- cplen = strlcpy(&reply->mksn_name[0], &sn->sn_name[0],
- sizeof(reply->mksn_name));
- if (cplen >= sizeof(reply->mksn_name))
- return -E2BIG;
- return 0;
- }
- }
-
- /*
- * reject the request if it requires unknown features
- * NB: old version will always accept all features because it's not
- * aware of srpc_msg::msg_ses_feats, it's a defect but it's also
- * harmless because it will return zero feature to console, and it's
- * console's responsibility to make sure all nodes in a session have
- * same feature mask.
- */
- if (msg->msg_ses_feats & ~LST_FEATS_MASK) {
- reply->mksn_status = EPROTO;
- return 0;
- }
-
- /* brand new or create by force */
- sn = kzalloc(sizeof(struct sfw_session), GFP_NOFS);
- if (!sn) {
- CERROR("dropping RPC mksn under memory pressure\n");
- return -ENOMEM;
- }
-
- sfw_init_session(sn, request->mksn_sid,
- msg->msg_ses_feats, &request->mksn_name[0]);
-
- spin_lock(&sfw_data.fw_lock);
-
- sfw_deactivate_session();
- LASSERT(!sfw_data.fw_session);
- sfw_data.fw_session = sn;
-
- spin_unlock(&sfw_data.fw_lock);
-
- reply->mksn_status = 0;
- reply->mksn_sid = sn->sn_id;
- reply->mksn_timeout = sn->sn_timeout;
- return 0;
-}
-
-static int
-sfw_remove_session(struct srpc_rmsn_reqst *request,
- struct srpc_rmsn_reply *reply)
-{
- struct sfw_session *sn = sfw_data.fw_session;
-
- reply->rmsn_sid = !sn ? LST_INVALID_SID : sn->sn_id;
-
- if (request->rmsn_sid.ses_nid == LNET_NID_ANY) {
- reply->rmsn_status = EINVAL;
- return 0;
- }
-
- if (!sn || !sfw_sid_equal(request->rmsn_sid, sn->sn_id)) {
- reply->rmsn_status = !sn ? ESRCH : EBUSY;
- return 0;
- }
-
- if (!atomic_dec_and_test(&sn->sn_refcount)) {
- reply->rmsn_status = 0;
- return 0;
- }
-
- spin_lock(&sfw_data.fw_lock);
- sfw_deactivate_session();
- spin_unlock(&sfw_data.fw_lock);
-
- reply->rmsn_status = 0;
- reply->rmsn_sid = LST_INVALID_SID;
- LASSERT(!sfw_data.fw_session);
- return 0;
-}
-
-static int
-sfw_debug_session(struct srpc_debug_reqst *request,
- struct srpc_debug_reply *reply)
-{
- struct sfw_session *sn = sfw_data.fw_session;
-
- if (!sn) {
- reply->dbg_status = ESRCH;
- reply->dbg_sid = LST_INVALID_SID;
- return 0;
- }
-
- reply->dbg_status = 0;
- reply->dbg_sid = sn->sn_id;
- reply->dbg_timeout = sn->sn_timeout;
- if (strlcpy(reply->dbg_name, &sn->sn_name[0], sizeof(reply->dbg_name))
- >= sizeof(reply->dbg_name))
- return -E2BIG;
-
- return 0;
-}
-
-static void
-sfw_test_rpc_fini(struct srpc_client_rpc *rpc)
-{
- struct sfw_test_unit *tsu = rpc->crpc_priv;
- struct sfw_test_instance *tsi = tsu->tsu_instance;
-
- /* Called with hold of tsi->tsi_lock */
- LASSERT(list_empty(&rpc->crpc_list));
- list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs);
-}
-
-static inline int
-sfw_test_buffers(struct sfw_test_instance *tsi)
-{
- struct sfw_test_case *tsc;
- struct srpc_service *svc;
- int nbuf;
-
- LASSERT(tsi);
- tsc = sfw_find_test_case(tsi->tsi_service);
- LASSERT(tsc);
- svc = tsc->tsc_srv_service;
- LASSERT(svc);
-
- nbuf = min(svc->sv_wi_total, tsi->tsi_loop) / svc->sv_ncpts;
- return max(SFW_TEST_WI_MIN, nbuf + SFW_TEST_WI_EXTRA);
-}
-
-static int
-sfw_load_test(struct sfw_test_instance *tsi)
-{
- struct sfw_test_case *tsc;
- struct srpc_service *svc;
- int nbuf;
- int rc;
-
- LASSERT(tsi);
- tsc = sfw_find_test_case(tsi->tsi_service);
- nbuf = sfw_test_buffers(tsi);
- LASSERT(tsc);
- svc = tsc->tsc_srv_service;
-
- if (tsi->tsi_is_client) {
- tsi->tsi_ops = tsc->tsc_cli_ops;
- return 0;
- }
-
- rc = srpc_service_add_buffers(svc, nbuf);
- if (rc) {
- CWARN("Failed to reserve enough buffers: service %s, %d needed: %d\n",
- svc->sv_name, nbuf, rc);
- /*
- * NB: this error handler is not strictly correct, because
- * it may release more buffers than already allocated,
- * but it doesn't matter because request portal should
- * be lazy portal and will grow buffers if necessary.
- */
- srpc_service_remove_buffers(svc, nbuf);
- return -ENOMEM;
- }
-
- CDEBUG(D_NET, "Reserved %d buffers for test %s\n",
- nbuf * (srpc_serv_is_framework(svc) ?
- 2 : cfs_cpt_number(cfs_cpt_tab)), svc->sv_name);
- return 0;
-}
-
-static void
-sfw_unload_test(struct sfw_test_instance *tsi)
-{
- struct sfw_test_case *tsc;
-
- LASSERT(tsi);
- tsc = sfw_find_test_case(tsi->tsi_service);
- LASSERT(tsc);
-
- if (tsi->tsi_is_client)
- return;
-
- /*
- * shrink buffers, because request portal is lazy portal
- * which can grow buffers at runtime so we may leave
- * some buffers behind, but never mind...
- */
- srpc_service_remove_buffers(tsc->tsc_srv_service,
- sfw_test_buffers(tsi));
-}
-
-static void
-sfw_destroy_test_instance(struct sfw_test_instance *tsi)
-{
- struct srpc_client_rpc *rpc;
- struct sfw_test_unit *tsu;
-
- if (!tsi->tsi_is_client)
- goto clean;
-
- tsi->tsi_ops->tso_fini(tsi);
-
- LASSERT(!tsi->tsi_stopping);
- LASSERT(list_empty(&tsi->tsi_active_rpcs));
- LASSERT(!sfw_test_active(tsi));
-
- while (!list_empty(&tsi->tsi_units)) {
- tsu = list_entry(tsi->tsi_units.next,
- struct sfw_test_unit, tsu_list);
- list_del(&tsu->tsu_list);
- kfree(tsu);
- }
-
- while (!list_empty(&tsi->tsi_free_rpcs)) {
- rpc = list_entry(tsi->tsi_free_rpcs.next,
- struct srpc_client_rpc, crpc_list);
- list_del(&rpc->crpc_list);
- kfree(rpc);
- }
-
-clean:
- sfw_unload_test(tsi);
- kfree(tsi);
-}
-
-static void
-sfw_destroy_batch(struct sfw_batch *tsb)
-{
- struct sfw_test_instance *tsi;
-
- LASSERT(!sfw_batch_active(tsb));
- LASSERT(list_empty(&tsb->bat_list));
-
- while (!list_empty(&tsb->bat_tests)) {
- tsi = list_entry(tsb->bat_tests.next,
- struct sfw_test_instance, tsi_list);
- list_del_init(&tsi->tsi_list);
- sfw_destroy_test_instance(tsi);
- }
-
- kfree(tsb);
-}
-
-void
-sfw_destroy_session(struct sfw_session *sn)
-{
- struct sfw_batch *batch;
-
- LASSERT(list_empty(&sn->sn_list));
- LASSERT(sn != sfw_data.fw_session);
-
- while (!list_empty(&sn->sn_batches)) {
- batch = list_entry(sn->sn_batches.next,
- struct sfw_batch, bat_list);
- list_del_init(&batch->bat_list);
- sfw_destroy_batch(batch);
- }
-
- kfree(sn);
- atomic_dec(&sfw_data.fw_nzombies);
-}
-
-static void
-sfw_unpack_addtest_req(struct srpc_msg *msg)
-{
- struct srpc_test_reqst *req = &msg->msg_body.tes_reqst;
-
- LASSERT(msg->msg_type == SRPC_MSG_TEST_REQST);
- LASSERT(req->tsr_is_client);
-
- if (msg->msg_magic == SRPC_MSG_MAGIC)
- return; /* no flipping needed */
-
- LASSERT(msg->msg_magic == __swab32(SRPC_MSG_MAGIC));
-
- if (req->tsr_service == SRPC_SERVICE_BRW) {
- if (!(msg->msg_ses_feats & LST_FEAT_BULK_LEN)) {
- struct test_bulk_req *bulk = &req->tsr_u.bulk_v0;
-
- __swab32s(&bulk->blk_opc);
- __swab32s(&bulk->blk_npg);
- __swab32s(&bulk->blk_flags);
-
- } else {
- struct test_bulk_req_v1 *bulk = &req->tsr_u.bulk_v1;
-
- __swab16s(&bulk->blk_opc);
- __swab16s(&bulk->blk_flags);
- __swab32s(&bulk->blk_offset);
- __swab32s(&bulk->blk_len);
- }
-
- return;
- }
-
- if (req->tsr_service == SRPC_SERVICE_PING) {
- struct test_ping_req *ping = &req->tsr_u.ping;
-
- __swab32s(&ping->png_size);
- __swab32s(&ping->png_flags);
- return;
- }
-
- LBUG();
-}
-
-static int
-sfw_add_test_instance(struct sfw_batch *tsb, struct srpc_server_rpc *rpc)
-{
- struct srpc_msg *msg = &rpc->srpc_reqstbuf->buf_msg;
- struct srpc_test_reqst *req = &msg->msg_body.tes_reqst;
- struct srpc_bulk *bk = rpc->srpc_bulk;
- int ndest = req->tsr_ndest;
- struct sfw_test_unit *tsu;
- struct sfw_test_instance *tsi;
- int i;
- int rc;
-
- tsi = kzalloc(sizeof(*tsi), GFP_NOFS);
- if (!tsi) {
- CERROR("Can't allocate test instance for batch: %llu\n",
- tsb->bat_id.bat_id);
- return -ENOMEM;
- }
-
- spin_lock_init(&tsi->tsi_lock);
- atomic_set(&tsi->tsi_nactive, 0);
- INIT_LIST_HEAD(&tsi->tsi_units);
- INIT_LIST_HEAD(&tsi->tsi_free_rpcs);
- INIT_LIST_HEAD(&tsi->tsi_active_rpcs);
-
- tsi->tsi_stopping = 0;
- tsi->tsi_batch = tsb;
- tsi->tsi_loop = req->tsr_loop;
- tsi->tsi_concur = req->tsr_concur;
- tsi->tsi_service = req->tsr_service;
- tsi->tsi_is_client = !!(req->tsr_is_client);
- tsi->tsi_stoptsu_onerr = !!(req->tsr_stop_onerr);
-
- rc = sfw_load_test(tsi);
- if (rc) {
- kfree(tsi);
- return rc;
- }
-
- LASSERT(!sfw_batch_active(tsb));
-
- if (!tsi->tsi_is_client) {
- /* it's test server, just add it to tsb */
- list_add_tail(&tsi->tsi_list, &tsb->bat_tests);
- return 0;
- }
-
- LASSERT(bk);
- LASSERT(bk->bk_niov * SFW_ID_PER_PAGE >= (unsigned int)ndest);
- LASSERT((unsigned int)bk->bk_len >=
- sizeof(struct lnet_process_id_packed) * ndest);
-
- sfw_unpack_addtest_req(msg);
- memcpy(&tsi->tsi_u, &req->tsr_u, sizeof(tsi->tsi_u));
-
- for (i = 0; i < ndest; i++) {
- struct lnet_process_id_packed *dests;
- struct lnet_process_id_packed id;
- int j;
-
- dests = page_address(bk->bk_iovs[i / SFW_ID_PER_PAGE].bv_page);
- LASSERT(dests); /* my pages are within KVM always */
- id = dests[i % SFW_ID_PER_PAGE];
- if (msg->msg_magic != SRPC_MSG_MAGIC)
- sfw_unpack_id(id);
-
- for (j = 0; j < tsi->tsi_concur; j++) {
- tsu = kzalloc(sizeof(struct sfw_test_unit), GFP_NOFS);
- if (!tsu) {
- rc = -ENOMEM;
- CERROR("Can't allocate tsu for %d\n",
- tsi->tsi_service);
- goto error;
- }
-
- tsu->tsu_dest.nid = id.nid;
- tsu->tsu_dest.pid = id.pid;
- tsu->tsu_instance = tsi;
- tsu->tsu_private = NULL;
- list_add_tail(&tsu->tsu_list, &tsi->tsi_units);
- }
- }
-
- rc = tsi->tsi_ops->tso_init(tsi);
- if (!rc) {
- list_add_tail(&tsi->tsi_list, &tsb->bat_tests);
- return 0;
- }
-
-error:
- LASSERT(rc);
- sfw_destroy_test_instance(tsi);
- return rc;
-}
-
-static void
-sfw_test_unit_done(struct sfw_test_unit *tsu)
-{
- struct sfw_test_instance *tsi = tsu->tsu_instance;
- struct sfw_batch *tsb = tsi->tsi_batch;
- struct sfw_session *sn = tsb->bat_session;
-
- LASSERT(sfw_test_active(tsi));
-
- if (!atomic_dec_and_test(&tsi->tsi_nactive))
- return;
-
- /* the test instance is done */
- spin_lock(&tsi->tsi_lock);
-
- tsi->tsi_stopping = 0;
-
- spin_unlock(&tsi->tsi_lock);
-
- spin_lock(&sfw_data.fw_lock);
-
- if (!atomic_dec_and_test(&tsb->bat_nactive) || /* tsb still active */
- sn == sfw_data.fw_session) { /* sn also active */
- spin_unlock(&sfw_data.fw_lock);
- return;
- }
-
- LASSERT(!list_empty(&sn->sn_list)); /* I'm a zombie! */
-
- list_for_each_entry(tsb, &sn->sn_batches, bat_list) {
- if (sfw_batch_active(tsb)) {
- spin_unlock(&sfw_data.fw_lock);
- return;
- }
- }
-
- list_del_init(&sn->sn_list);
- spin_unlock(&sfw_data.fw_lock);
-
- sfw_destroy_session(sn);
-}
-
-static void
-sfw_test_rpc_done(struct srpc_client_rpc *rpc)
-{
- struct sfw_test_unit *tsu = rpc->crpc_priv;
- struct sfw_test_instance *tsi = tsu->tsu_instance;
- int done = 0;
-
- tsi->tsi_ops->tso_done_rpc(tsu, rpc);
-
- spin_lock(&tsi->tsi_lock);
-
- LASSERT(sfw_test_active(tsi));
- LASSERT(!list_empty(&rpc->crpc_list));
-
- list_del_init(&rpc->crpc_list);
-
- /* batch is stopping or loop is done or get error */
- if (tsi->tsi_stopping || !tsu->tsu_loop ||
- (rpc->crpc_status && tsi->tsi_stoptsu_onerr))
- done = 1;
-
- /* dec ref for poster */
- srpc_client_rpc_decref(rpc);
-
- spin_unlock(&tsi->tsi_lock);
-
- if (!done) {
- swi_schedule_workitem(&tsu->tsu_worker);
- return;
- }
-
- sfw_test_unit_done(tsu);
-}
-
-int
-sfw_create_test_rpc(struct sfw_test_unit *tsu, struct lnet_process_id peer,
- unsigned int features, int nblk, int blklen,
- struct srpc_client_rpc **rpcpp)
-{
- struct srpc_client_rpc *rpc = NULL;
- struct sfw_test_instance *tsi = tsu->tsu_instance;
-
- spin_lock(&tsi->tsi_lock);
-
- LASSERT(sfw_test_active(tsi));
- /* pick request from buffer */
- rpc = list_first_entry_or_null(&tsi->tsi_free_rpcs,
- struct srpc_client_rpc, crpc_list);
- if (rpc) {
- LASSERT(nblk == rpc->crpc_bulk.bk_niov);
- list_del_init(&rpc->crpc_list);
- }
-
- spin_unlock(&tsi->tsi_lock);
-
- if (!rpc) {
- rpc = srpc_create_client_rpc(peer, tsi->tsi_service, nblk,
- blklen, sfw_test_rpc_done,
- sfw_test_rpc_fini, tsu);
- } else {
- srpc_init_client_rpc(rpc, peer, tsi->tsi_service, nblk,
- blklen, sfw_test_rpc_done,
- sfw_test_rpc_fini, tsu);
- }
-
- if (!rpc) {
- CERROR("Can't create rpc for test %d\n", tsi->tsi_service);
- return -ENOMEM;
- }
-
- rpc->crpc_reqstmsg.msg_ses_feats = features;
- *rpcpp = rpc;
-
- return 0;
-}
-
-static void
-sfw_run_test(struct swi_workitem *wi)
-{
- struct sfw_test_unit *tsu = container_of(wi, struct sfw_test_unit, tsu_worker);
- struct sfw_test_instance *tsi = tsu->tsu_instance;
- struct srpc_client_rpc *rpc = NULL;
-
- if (tsi->tsi_ops->tso_prep_rpc(tsu, tsu->tsu_dest, &rpc)) {
- LASSERT(!rpc);
- goto test_done;
- }
-
- LASSERT(rpc);
-
- spin_lock(&tsi->tsi_lock);
-
- if (tsi->tsi_stopping) {
- list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs);
- spin_unlock(&tsi->tsi_lock);
- goto test_done;
- }
-
- if (tsu->tsu_loop > 0)
- tsu->tsu_loop--;
-
- list_add_tail(&rpc->crpc_list, &tsi->tsi_active_rpcs);
- spin_unlock(&tsi->tsi_lock);
-
- spin_lock(&rpc->crpc_lock);
- rpc->crpc_timeout = rpc_timeout;
- srpc_post_rpc(rpc);
- spin_unlock(&rpc->crpc_lock);
- return;
-
-test_done:
- /*
- * No one can schedule me now since:
- * - previous RPC, if any, has done and
- * - no new RPC is initiated.
- * - my batch is still active; no one can run it again now.
- * Cancel pending schedules and prevent future schedule attempts:
- */
- sfw_test_unit_done(tsu);
-}
-
-static int
-sfw_run_batch(struct sfw_batch *tsb)
-{
- struct swi_workitem *wi;
- struct sfw_test_unit *tsu;
- struct sfw_test_instance *tsi;
-
- if (sfw_batch_active(tsb)) {
- CDEBUG(D_NET, "Batch already active: %llu (%d)\n",
- tsb->bat_id.bat_id, atomic_read(&tsb->bat_nactive));
- return 0;
- }
-
- list_for_each_entry(tsi, &tsb->bat_tests, tsi_list) {
- if (!tsi->tsi_is_client) /* skip server instances */
- continue;
-
- LASSERT(!tsi->tsi_stopping);
- LASSERT(!sfw_test_active(tsi));
-
- atomic_inc(&tsb->bat_nactive);
-
- list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
- atomic_inc(&tsi->tsi_nactive);
- tsu->tsu_loop = tsi->tsi_loop;
- wi = &tsu->tsu_worker;
- swi_init_workitem(wi, sfw_run_test,
- lst_test_wq[lnet_cpt_of_nid(tsu->tsu_dest.nid)]);
- swi_schedule_workitem(wi);
- }
- }
-
- return 0;
-}
-
-int
-sfw_stop_batch(struct sfw_batch *tsb, int force)
-{
- struct sfw_test_instance *tsi;
- struct srpc_client_rpc *rpc;
-
- if (!sfw_batch_active(tsb)) {
- CDEBUG(D_NET, "Batch %llu inactive\n", tsb->bat_id.bat_id);
- return 0;
- }
-
- list_for_each_entry(tsi, &tsb->bat_tests, tsi_list) {
- spin_lock(&tsi->tsi_lock);
-
- if (!tsi->tsi_is_client ||
- !sfw_test_active(tsi) || tsi->tsi_stopping) {
- spin_unlock(&tsi->tsi_lock);
- continue;
- }
-
- tsi->tsi_stopping = 1;
-
- if (!force) {
- spin_unlock(&tsi->tsi_lock);
- continue;
- }
-
- /* abort launched rpcs in the test */
- list_for_each_entry(rpc, &tsi->tsi_active_rpcs, crpc_list) {
- spin_lock(&rpc->crpc_lock);
-
- srpc_abort_rpc(rpc, -EINTR);
-
- spin_unlock(&rpc->crpc_lock);
- }
-
- spin_unlock(&tsi->tsi_lock);
- }
-
- return 0;
-}
-
-static int
-sfw_query_batch(struct sfw_batch *tsb, int testidx,
- struct srpc_batch_reply *reply)
-{
- struct sfw_test_instance *tsi;
-
- if (testidx < 0)
- return -EINVAL;
-
- if (!testidx) {
- reply->bar_active = atomic_read(&tsb->bat_nactive);
- return 0;
- }
-
- list_for_each_entry(tsi, &tsb->bat_tests, tsi_list) {
- if (testidx-- > 1)
- continue;
-
- reply->bar_active = atomic_read(&tsi->tsi_nactive);
- return 0;
- }
-
- return -ENOENT;
-}
-
-void
-sfw_free_pages(struct srpc_server_rpc *rpc)
-{
- srpc_free_bulk(rpc->srpc_bulk);
- rpc->srpc_bulk = NULL;
-}
-
-int
-sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len,
- int sink)
-{
- LASSERT(!rpc->srpc_bulk);
- LASSERT(npages > 0 && npages <= LNET_MAX_IOV);
-
- rpc->srpc_bulk = srpc_alloc_bulk(cpt, 0, npages, len, sink);
- if (!rpc->srpc_bulk)
- return -ENOMEM;
-
- return 0;
-}
-
-static int
-sfw_add_test(struct srpc_server_rpc *rpc)
-{
- struct sfw_session *sn = sfw_data.fw_session;
- struct srpc_test_reply *reply = &rpc->srpc_replymsg.msg_body.tes_reply;
- struct srpc_test_reqst *request;
- int rc;
- struct sfw_batch *bat;
-
- request = &rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst;
- reply->tsr_sid = !sn ? LST_INVALID_SID : sn->sn_id;
-
- if (!request->tsr_loop ||
- !request->tsr_concur ||
- request->tsr_sid.ses_nid == LNET_NID_ANY ||
- request->tsr_ndest > SFW_MAX_NDESTS ||
- (request->tsr_is_client && !request->tsr_ndest) ||
- request->tsr_concur > SFW_MAX_CONCUR ||
- request->tsr_service > SRPC_SERVICE_MAX_ID ||
- request->tsr_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID) {
- reply->tsr_status = EINVAL;
- return 0;
- }
-
- if (!sn || !sfw_sid_equal(request->tsr_sid, sn->sn_id) ||
- !sfw_find_test_case(request->tsr_service)) {
- reply->tsr_status = ENOENT;
- return 0;
- }
-
- bat = sfw_bid2batch(request->tsr_bid);
- if (!bat) {
- CERROR("dropping RPC %s from %s under memory pressure\n",
- rpc->srpc_scd->scd_svc->sv_name,
- libcfs_id2str(rpc->srpc_peer));
- return -ENOMEM;
- }
-
- if (sfw_batch_active(bat)) {
- reply->tsr_status = EBUSY;
- return 0;
- }
-
- if (request->tsr_is_client && !rpc->srpc_bulk) {
- /* rpc will be resumed later in sfw_bulk_ready */
- int npg = sfw_id_pages(request->tsr_ndest);
- int len;
-
- if (!(sn->sn_features & LST_FEAT_BULK_LEN)) {
- len = npg * PAGE_SIZE;
-
- } else {
- len = sizeof(struct lnet_process_id_packed) *
- request->tsr_ndest;
- }
-
- return sfw_alloc_pages(rpc, CFS_CPT_ANY, npg, len, 1);
- }
-
- rc = sfw_add_test_instance(bat, rpc);
- CDEBUG(!rc ? D_NET : D_WARNING,
- "%s test: sv %d %s, loop %d, concur %d, ndest %d\n",
- !rc ? "Added" : "Failed to add", request->tsr_service,
- request->tsr_is_client ? "client" : "server",
- request->tsr_loop, request->tsr_concur, request->tsr_ndest);
-
- reply->tsr_status = (rc < 0) ? -rc : rc;
- return 0;
-}
-
-static int
-sfw_control_batch(struct srpc_batch_reqst *request,
- struct srpc_batch_reply *reply)
-{
- struct sfw_session *sn = sfw_data.fw_session;
- int rc = 0;
- struct sfw_batch *bat;
-
- reply->bar_sid = !sn ? LST_INVALID_SID : sn->sn_id;
-
- if (!sn || !sfw_sid_equal(request->bar_sid, sn->sn_id)) {
- reply->bar_status = ESRCH;
- return 0;
- }
-
- bat = sfw_find_batch(request->bar_bid);
- if (!bat) {
- reply->bar_status = ENOENT;
- return 0;
- }
-
- switch (request->bar_opc) {
- case SRPC_BATCH_OPC_RUN:
- rc = sfw_run_batch(bat);
- break;
-
- case SRPC_BATCH_OPC_STOP:
- rc = sfw_stop_batch(bat, request->bar_arg);
- break;
-
- case SRPC_BATCH_OPC_QUERY:
- rc = sfw_query_batch(bat, request->bar_testidx, reply);
- break;
-
- default:
- return -EINVAL; /* drop it */
- }
-
- reply->bar_status = (rc < 0) ? -rc : rc;
- return 0;
-}
-
-static int
-sfw_handle_server_rpc(struct srpc_server_rpc *rpc)
-{
- struct srpc_service *sv = rpc->srpc_scd->scd_svc;
- struct srpc_msg *reply = &rpc->srpc_replymsg;
- struct srpc_msg *request = &rpc->srpc_reqstbuf->buf_msg;
- unsigned int features = LST_FEATS_MASK;
- int rc = 0;
-
- LASSERT(!sfw_data.fw_active_srpc);
- LASSERT(sv->sv_id <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
-
- spin_lock(&sfw_data.fw_lock);
-
- if (sfw_data.fw_shuttingdown) {
- spin_unlock(&sfw_data.fw_lock);
- return -ESHUTDOWN;
- }
-
- /* Remove timer to avoid racing with it or expiring active session */
- if (sfw_del_session_timer()) {
- CERROR("dropping RPC %s from %s: racing with expiry timer\n",
- sv->sv_name, libcfs_id2str(rpc->srpc_peer));
- spin_unlock(&sfw_data.fw_lock);
- return -EAGAIN;
- }
-
- sfw_data.fw_active_srpc = rpc;
- spin_unlock(&sfw_data.fw_lock);
-
- sfw_unpack_message(request);
- LASSERT(request->msg_type == srpc_service2request(sv->sv_id));
-
- /* rpc module should have checked this */
- LASSERT(request->msg_version == SRPC_MSG_VERSION);
-
- if (sv->sv_id != SRPC_SERVICE_MAKE_SESSION &&
- sv->sv_id != SRPC_SERVICE_DEBUG) {
- struct sfw_session *sn = sfw_data.fw_session;
-
- if (sn &&
- sn->sn_features != request->msg_ses_feats) {
- CNETERR("Features of framework RPC don't match features of current session: %x/%x\n",
- request->msg_ses_feats, sn->sn_features);
- reply->msg_body.reply.status = EPROTO;
- reply->msg_body.reply.sid = sn->sn_id;
- goto out;
- }
-
- } else if (request->msg_ses_feats & ~LST_FEATS_MASK) {
- /*
- * NB: at this point, old version will ignore features and
- * create new session anyway, so console should be able
- * to handle this
- */
- reply->msg_body.reply.status = EPROTO;
- goto out;
- }
-
- switch (sv->sv_id) {
- default:
- LBUG();
- case SRPC_SERVICE_TEST:
- rc = sfw_add_test(rpc);
- break;
-
- case SRPC_SERVICE_BATCH:
- rc = sfw_control_batch(&request->msg_body.bat_reqst,
- &reply->msg_body.bat_reply);
- break;
-
- case SRPC_SERVICE_QUERY_STAT:
- rc = sfw_get_stats(&request->msg_body.stat_reqst,
- &reply->msg_body.stat_reply);
- break;
-
- case SRPC_SERVICE_DEBUG:
- rc = sfw_debug_session(&request->msg_body.dbg_reqst,
- &reply->msg_body.dbg_reply);
- break;
-
- case SRPC_SERVICE_MAKE_SESSION:
- rc = sfw_make_session(&request->msg_body.mksn_reqst,
- &reply->msg_body.mksn_reply);
- break;
-
- case SRPC_SERVICE_REMOVE_SESSION:
- rc = sfw_remove_session(&request->msg_body.rmsn_reqst,
- &reply->msg_body.rmsn_reply);
- break;
- }
-
- if (sfw_data.fw_session)
- features = sfw_data.fw_session->sn_features;
- out:
- reply->msg_ses_feats = features;
- rpc->srpc_done = sfw_server_rpc_done;
- spin_lock(&sfw_data.fw_lock);
-
- if (!sfw_data.fw_shuttingdown)
- sfw_add_session_timer();
-
- sfw_data.fw_active_srpc = NULL;
- spin_unlock(&sfw_data.fw_lock);
- return rc;
-}
-
-static int
-sfw_bulk_ready(struct srpc_server_rpc *rpc, int status)
-{
- struct srpc_service *sv = rpc->srpc_scd->scd_svc;
- int rc;
-
- LASSERT(rpc->srpc_bulk);
- LASSERT(sv->sv_id == SRPC_SERVICE_TEST);
- LASSERT(!sfw_data.fw_active_srpc);
- LASSERT(rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst.tsr_is_client);
-
- spin_lock(&sfw_data.fw_lock);
-
- if (status) {
- CERROR("Bulk transfer failed for RPC: service %s, peer %s, status %d\n",
- sv->sv_name, libcfs_id2str(rpc->srpc_peer), status);
- spin_unlock(&sfw_data.fw_lock);
- return -EIO;
- }
-
- if (sfw_data.fw_shuttingdown) {
- spin_unlock(&sfw_data.fw_lock);
- return -ESHUTDOWN;
- }
-
- if (sfw_del_session_timer()) {
- CERROR("dropping RPC %s from %s: racing with expiry timer\n",
- sv->sv_name, libcfs_id2str(rpc->srpc_peer));
- spin_unlock(&sfw_data.fw_lock);
- return -EAGAIN;
- }
-
- sfw_data.fw_active_srpc = rpc;
- spin_unlock(&sfw_data.fw_lock);
-
- rc = sfw_add_test(rpc);
-
- spin_lock(&sfw_data.fw_lock);
-
- if (!sfw_data.fw_shuttingdown)
- sfw_add_session_timer();
-
- sfw_data.fw_active_srpc = NULL;
- spin_unlock(&sfw_data.fw_lock);
- return rc;
-}
-
-struct srpc_client_rpc *
-sfw_create_rpc(struct lnet_process_id peer, int service,
- unsigned int features, int nbulkiov, int bulklen,
- void (*done)(struct srpc_client_rpc *), void *priv)
-{
- struct srpc_client_rpc *rpc = NULL;
-
- spin_lock(&sfw_data.fw_lock);
-
- LASSERT(!sfw_data.fw_shuttingdown);
- LASSERT(service <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
-
- if (!nbulkiov && !list_empty(&sfw_data.fw_zombie_rpcs)) {
- rpc = list_entry(sfw_data.fw_zombie_rpcs.next,
- struct srpc_client_rpc, crpc_list);
- list_del(&rpc->crpc_list);
-
- srpc_init_client_rpc(rpc, peer, service, 0, 0,
- done, sfw_client_rpc_fini, priv);
- }
-
- spin_unlock(&sfw_data.fw_lock);
-
- if (!rpc) {
- rpc = srpc_create_client_rpc(peer, service,
- nbulkiov, bulklen, done,
- nbulkiov ? NULL :
- sfw_client_rpc_fini,
- priv);
- }
-
- if (rpc) /* "session" is concept in framework */
- rpc->crpc_reqstmsg.msg_ses_feats = features;
-
- return rpc;
-}
-
-void
-sfw_unpack_message(struct srpc_msg *msg)
-{
- if (msg->msg_magic == SRPC_MSG_MAGIC)
- return; /* no flipping needed */
-
- /* srpc module should guarantee I wouldn't get crap */
- LASSERT(msg->msg_magic == __swab32(SRPC_MSG_MAGIC));
-
- if (msg->msg_type == SRPC_MSG_STAT_REQST) {
- struct srpc_stat_reqst *req = &msg->msg_body.stat_reqst;
-
- __swab32s(&req->str_type);
- __swab64s(&req->str_rpyid);
- sfw_unpack_sid(req->str_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_STAT_REPLY) {
- struct srpc_stat_reply *rep = &msg->msg_body.stat_reply;
-
- __swab32s(&rep->str_status);
- sfw_unpack_sid(rep->str_sid);
- sfw_unpack_fw_counters(rep->str_fw);
- sfw_unpack_rpc_counters(rep->str_rpc);
- sfw_unpack_lnet_counters(rep->str_lnet);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_MKSN_REQST) {
- struct srpc_mksn_reqst *req = &msg->msg_body.mksn_reqst;
-
- __swab64s(&req->mksn_rpyid);
- __swab32s(&req->mksn_force);
- sfw_unpack_sid(req->mksn_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_MKSN_REPLY) {
- struct srpc_mksn_reply *rep = &msg->msg_body.mksn_reply;
-
- __swab32s(&rep->mksn_status);
- __swab32s(&rep->mksn_timeout);
- sfw_unpack_sid(rep->mksn_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_RMSN_REQST) {
- struct srpc_rmsn_reqst *req = &msg->msg_body.rmsn_reqst;
-
- __swab64s(&req->rmsn_rpyid);
- sfw_unpack_sid(req->rmsn_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_RMSN_REPLY) {
- struct srpc_rmsn_reply *rep = &msg->msg_body.rmsn_reply;
-
- __swab32s(&rep->rmsn_status);
- sfw_unpack_sid(rep->rmsn_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_DEBUG_REQST) {
- struct srpc_debug_reqst *req = &msg->msg_body.dbg_reqst;
-
- __swab64s(&req->dbg_rpyid);
- __swab32s(&req->dbg_flags);
- sfw_unpack_sid(req->dbg_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_DEBUG_REPLY) {
- struct srpc_debug_reply *rep = &msg->msg_body.dbg_reply;
-
- __swab32s(&rep->dbg_nbatch);
- __swab32s(&rep->dbg_timeout);
- sfw_unpack_sid(rep->dbg_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_BATCH_REQST) {
- struct srpc_batch_reqst *req = &msg->msg_body.bat_reqst;
-
- __swab32s(&req->bar_opc);
- __swab64s(&req->bar_rpyid);
- __swab32s(&req->bar_testidx);
- __swab32s(&req->bar_arg);
- sfw_unpack_sid(req->bar_sid);
- __swab64s(&req->bar_bid.bat_id);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_BATCH_REPLY) {
- struct srpc_batch_reply *rep = &msg->msg_body.bat_reply;
-
- __swab32s(&rep->bar_status);
- sfw_unpack_sid(rep->bar_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_TEST_REQST) {
- struct srpc_test_reqst *req = &msg->msg_body.tes_reqst;
-
- __swab64s(&req->tsr_rpyid);
- __swab64s(&req->tsr_bulkid);
- __swab32s(&req->tsr_loop);
- __swab32s(&req->tsr_ndest);
- __swab32s(&req->tsr_concur);
- __swab32s(&req->tsr_service);
- sfw_unpack_sid(req->tsr_sid);
- __swab64s(&req->tsr_bid.bat_id);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_TEST_REPLY) {
- struct srpc_test_reply *rep = &msg->msg_body.tes_reply;
-
- __swab32s(&rep->tsr_status);
- sfw_unpack_sid(rep->tsr_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_JOIN_REQST) {
- struct srpc_join_reqst *req = &msg->msg_body.join_reqst;
-
- __swab64s(&req->join_rpyid);
- sfw_unpack_sid(req->join_sid);
- return;
- }
-
- if (msg->msg_type == SRPC_MSG_JOIN_REPLY) {
- struct srpc_join_reply *rep = &msg->msg_body.join_reply;
-
- __swab32s(&rep->join_status);
- __swab32s(&rep->join_timeout);
- sfw_unpack_sid(rep->join_sid);
- return;
- }
-
- LBUG();
-}
-
-void
-sfw_abort_rpc(struct srpc_client_rpc *rpc)
-{
- LASSERT(atomic_read(&rpc->crpc_refcount) > 0);
- LASSERT(rpc->crpc_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
-
- spin_lock(&rpc->crpc_lock);
- srpc_abort_rpc(rpc, -EINTR);
- spin_unlock(&rpc->crpc_lock);
-}
-
-void
-sfw_post_rpc(struct srpc_client_rpc *rpc)
-{
- spin_lock(&rpc->crpc_lock);
-
- LASSERT(!rpc->crpc_closed);
- LASSERT(!rpc->crpc_aborted);
- LASSERT(list_empty(&rpc->crpc_list));
- LASSERT(!sfw_data.fw_shuttingdown);
-
- rpc->crpc_timeout = rpc_timeout;
- srpc_post_rpc(rpc);
-
- spin_unlock(&rpc->crpc_lock);
-}
-
-static struct srpc_service sfw_services[] = {
- {
- /* sv_id */ SRPC_SERVICE_DEBUG,
- /* sv_name */ "debug",
- 0
- },
- {
- /* sv_id */ SRPC_SERVICE_QUERY_STAT,
- /* sv_name */ "query stats",
- 0
- },
- {
- /* sv_id */ SRPC_SERVICE_MAKE_SESSION,
- /* sv_name */ "make session",
- 0
- },
- {
- /* sv_id */ SRPC_SERVICE_REMOVE_SESSION,
- /* sv_name */ "remove session",
- 0
- },
- {
- /* sv_id */ SRPC_SERVICE_BATCH,
- /* sv_name */ "batch service",
- 0
- },
- {
- /* sv_id */ SRPC_SERVICE_TEST,
- /* sv_name */ "test service",
- 0
- },
- {
- /* sv_id */ 0,
- /* sv_name */ NULL,
- 0
- }
-};
-
-int
-sfw_startup(void)
-{
- int i;
- int rc;
- int error;
- struct srpc_service *sv;
- struct sfw_test_case *tsc;
-
- if (session_timeout < 0) {
- CERROR("Session timeout must be non-negative: %d\n",
- session_timeout);
- return -EINVAL;
- }
-
- if (rpc_timeout < 0) {
- CERROR("RPC timeout must be non-negative: %d\n",
- rpc_timeout);
- return -EINVAL;
- }
-
- if (!session_timeout)
- CWARN("Zero session_timeout specified - test sessions never expire.\n");
-
- if (!rpc_timeout)
- CWARN("Zero rpc_timeout specified - test RPC never expire.\n");
-
- memset(&sfw_data, 0, sizeof(struct smoketest_framework));
-
- sfw_data.fw_session = NULL;
- sfw_data.fw_active_srpc = NULL;
- spin_lock_init(&sfw_data.fw_lock);
- atomic_set(&sfw_data.fw_nzombies, 0);
- INIT_LIST_HEAD(&sfw_data.fw_tests);
- INIT_LIST_HEAD(&sfw_data.fw_zombie_rpcs);
- INIT_LIST_HEAD(&sfw_data.fw_zombie_sessions);
-
- brw_init_test_client();
- brw_init_test_service();
- rc = sfw_register_test(&brw_test_service, &brw_test_client);
- LASSERT(!rc);
-
- ping_init_test_client();
- ping_init_test_service();
- rc = sfw_register_test(&ping_test_service, &ping_test_client);
- LASSERT(!rc);
-
- error = 0;
- list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) {
- sv = tsc->tsc_srv_service;
-
- rc = srpc_add_service(sv);
- LASSERT(rc != -EBUSY);
- if (rc) {
- CWARN("Failed to add %s service: %d\n",
- sv->sv_name, rc);
- error = rc;
- }
- }
-
- for (i = 0; ; i++) {
- sv = &sfw_services[i];
- if (!sv->sv_name)
- break;
-
- sv->sv_bulk_ready = NULL;
- sv->sv_handler = sfw_handle_server_rpc;
- sv->sv_wi_total = SFW_FRWK_WI_MAX;
- if (sv->sv_id == SRPC_SERVICE_TEST)
- sv->sv_bulk_ready = sfw_bulk_ready;
-
- rc = srpc_add_service(sv);
- LASSERT(rc != -EBUSY);
- if (rc) {
- CWARN("Failed to add %s service: %d\n",
- sv->sv_name, rc);
- error = rc;
- }
-
- /* about to sfw_shutdown, no need to add buffer */
- if (error)
- continue;
-
- rc = srpc_service_add_buffers(sv, sv->sv_wi_total);
- if (rc) {
- CWARN("Failed to reserve enough buffers: service %s, %d needed: %d\n",
- sv->sv_name, sv->sv_wi_total, rc);
- error = -ENOMEM;
- }
- }
-
- if (error)
- sfw_shutdown();
- return error;
-}
-
-void
-sfw_shutdown(void)
-{
- struct srpc_service *sv;
- struct sfw_test_case *tsc;
- int i;
-
- spin_lock(&sfw_data.fw_lock);
-
- sfw_data.fw_shuttingdown = 1;
- lst_wait_until(!sfw_data.fw_active_srpc, sfw_data.fw_lock,
- "waiting for active RPC to finish.\n");
-
- if (sfw_del_session_timer())
- lst_wait_until(!sfw_data.fw_session, sfw_data.fw_lock,
- "waiting for session timer to explode.\n");
-
- sfw_deactivate_session();
- lst_wait_until(!atomic_read(&sfw_data.fw_nzombies),
- sfw_data.fw_lock,
- "waiting for %d zombie sessions to die.\n",
- atomic_read(&sfw_data.fw_nzombies));
-
- spin_unlock(&sfw_data.fw_lock);
-
- for (i = 0; ; i++) {
- sv = &sfw_services[i];
- if (!sv->sv_name)
- break;
-
- srpc_shutdown_service(sv);
- srpc_remove_service(sv);
- }
-
- list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) {
- sv = tsc->tsc_srv_service;
- srpc_shutdown_service(sv);
- srpc_remove_service(sv);
- }
-
- while (!list_empty(&sfw_data.fw_zombie_rpcs)) {
- struct srpc_client_rpc *rpc;
-
- rpc = list_entry(sfw_data.fw_zombie_rpcs.next,
- struct srpc_client_rpc, crpc_list);
- list_del(&rpc->crpc_list);
-
- kfree(rpc);
- }
-
- for (i = 0; ; i++) {
- sv = &sfw_services[i];
- if (!sv->sv_name)
- break;
-
- srpc_wait_service_shutdown(sv);
- }
-
- while (!list_empty(&sfw_data.fw_tests)) {
- tsc = list_entry(sfw_data.fw_tests.next,
- struct sfw_test_case, tsc_list);
-
- srpc_wait_service_shutdown(tsc->tsc_srv_service);
-
- list_del(&tsc->tsc_list);
- kfree(tsc);
- }
-}
diff --git a/drivers/staging/lustre/lnet/selftest/module.c b/drivers/staging/lustre/lnet/selftest/module.c
deleted file mode 100644
index 9ba65320f748..000000000000
--- a/drivers/staging/lustre/lnet/selftest/module.c
+++ /dev/null
@@ -1,169 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "selftest.h"
-#include "console.h"
-
-enum {
- LST_INIT_NONE = 0,
- LST_INIT_WI_SERIAL,
- LST_INIT_WI_TEST,
- LST_INIT_RPC,
- LST_INIT_FW,
- LST_INIT_CONSOLE
-};
-
-static int lst_init_step = LST_INIT_NONE;
-
-struct workqueue_struct *lst_serial_wq;
-struct workqueue_struct **lst_test_wq;
-
-static void
-lnet_selftest_exit(void)
-{
- int i;
-
- switch (lst_init_step) {
- case LST_INIT_CONSOLE:
- lstcon_console_fini();
- /* fall through */
- case LST_INIT_FW:
- sfw_shutdown();
- /* fall through */
- case LST_INIT_RPC:
- srpc_shutdown();
- /* fall through */
- case LST_INIT_WI_TEST:
- for (i = 0;
- i < cfs_cpt_number(lnet_cpt_table()); i++) {
- if (!lst_test_wq[i])
- continue;
- destroy_workqueue(lst_test_wq[i]);
- }
- kvfree(lst_test_wq);
- lst_test_wq = NULL;
- /* fall through */
- case LST_INIT_WI_SERIAL:
- destroy_workqueue(lst_serial_wq);
- lst_serial_wq = NULL;
- case LST_INIT_NONE:
- break;
- default:
- LBUG();
- }
-}
-
-static int
-lnet_selftest_init(void)
-{
- int nscheds;
- int rc;
- int i;
-
- rc = libcfs_setup();
- if (rc)
- return rc;
-
- lst_serial_wq = alloc_ordered_workqueue("lst_s", 0);
- if (!lst_serial_wq) {
- CERROR("Failed to create serial WI scheduler for LST\n");
- return -ENOMEM;
- }
- lst_init_step = LST_INIT_WI_SERIAL;
-
- nscheds = cfs_cpt_number(lnet_cpt_table());
- lst_test_wq = kvmalloc_array(nscheds, sizeof(lst_test_wq[0]),
- GFP_KERNEL | __GFP_ZERO);
- if (!lst_test_wq) {
- rc = -ENOMEM;
- goto error;
- }
-
- lst_init_step = LST_INIT_WI_TEST;
- for (i = 0; i < nscheds; i++) {
- int nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
- struct workqueue_attrs attrs = {0};
- cpumask_var_t *mask = cfs_cpt_cpumask(lnet_cpt_table(), i);
-
- /* reserve at least one CPU for LND */
- nthrs = max(nthrs - 1, 1);
- lst_test_wq[i] = alloc_workqueue("lst_t", WQ_UNBOUND, nthrs);
- if (!lst_test_wq[i]) {
- CWARN("Failed to create CPU partition affinity WI scheduler %d for LST\n",
- i);
- rc = -ENOMEM;
- goto error;
- }
-
- if (mask && alloc_cpumask_var(&attrs.cpumask, GFP_KERNEL)) {
- cpumask_copy(attrs.cpumask, *mask);
- apply_workqueue_attrs(lst_test_wq[i], &attrs);
- free_cpumask_var(attrs.cpumask);
- }
- }
-
- rc = srpc_startup();
- if (rc) {
- CERROR("LST can't startup rpc\n");
- goto error;
- }
- lst_init_step = LST_INIT_RPC;
-
- rc = sfw_startup();
- if (rc) {
- CERROR("LST can't startup framework\n");
- goto error;
- }
- lst_init_step = LST_INIT_FW;
-
- rc = lstcon_console_init();
- if (rc) {
- CERROR("LST can't startup console\n");
- goto error;
- }
- lst_init_step = LST_INIT_CONSOLE;
- return 0;
-error:
- lnet_selftest_exit();
- return rc;
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("LNet Selftest");
-MODULE_VERSION("2.7.0");
-MODULE_LICENSE("GPL");
-
-module_init(lnet_selftest_init);
-module_exit(lnet_selftest_exit);
diff --git a/drivers/staging/lustre/lnet/selftest/ping_test.c b/drivers/staging/lustre/lnet/selftest/ping_test.c
deleted file mode 100644
index f54bd630dbf8..000000000000
--- a/drivers/staging/lustre/lnet/selftest/ping_test.c
+++ /dev/null
@@ -1,228 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/conctl.c
- *
- * Test client & Server
- *
- * Author: Liang Zhen <liangzhen@clusterfs.com>
- */
-
-#include "selftest.h"
-
-#define LST_PING_TEST_MAGIC 0xbabeface
-
-static int ping_srv_workitems = SFW_TEST_WI_MAX;
-module_param(ping_srv_workitems, int, 0644);
-MODULE_PARM_DESC(ping_srv_workitems, "# PING server workitems");
-
-struct lst_ping_data {
- spinlock_t pnd_lock; /* serialize */
- int pnd_counter; /* sequence counter */
-};
-
-static struct lst_ping_data lst_ping_data;
-
-static int
-ping_client_init(struct sfw_test_instance *tsi)
-{
- struct sfw_session *sn = tsi->tsi_batch->bat_session;
-
- LASSERT(tsi->tsi_is_client);
- LASSERT(sn && !(sn->sn_features & ~LST_FEATS_MASK));
-
- spin_lock_init(&lst_ping_data.pnd_lock);
- lst_ping_data.pnd_counter = 0;
-
- return 0;
-}
-
-static void
-ping_client_fini(struct sfw_test_instance *tsi)
-{
- struct sfw_session *sn = tsi->tsi_batch->bat_session;
- int errors;
-
- LASSERT(sn);
- LASSERT(tsi->tsi_is_client);
-
- errors = atomic_read(&sn->sn_ping_errors);
- if (errors)
- CWARN("%d pings have failed.\n", errors);
- else
- CDEBUG(D_NET, "Ping test finished OK.\n");
-}
-
-static int
-ping_client_prep_rpc(struct sfw_test_unit *tsu, struct lnet_process_id dest,
- struct srpc_client_rpc **rpc)
-{
- struct srpc_ping_reqst *req;
- struct sfw_test_instance *tsi = tsu->tsu_instance;
- struct sfw_session *sn = tsi->tsi_batch->bat_session;
- struct timespec64 ts;
- int rc;
-
- LASSERT(sn);
- LASSERT(!(sn->sn_features & ~LST_FEATS_MASK));
-
- rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, 0, 0, rpc);
- if (rc)
- return rc;
-
- req = &(*rpc)->crpc_reqstmsg.msg_body.ping_reqst;
-
- req->pnr_magic = LST_PING_TEST_MAGIC;
-
- spin_lock(&lst_ping_data.pnd_lock);
- req->pnr_seq = lst_ping_data.pnd_counter++;
- spin_unlock(&lst_ping_data.pnd_lock);
-
- ktime_get_real_ts64(&ts);
- req->pnr_time_sec = ts.tv_sec;
- req->pnr_time_usec = ts.tv_nsec / NSEC_PER_USEC;
-
- return rc;
-}
-
-static void
-ping_client_done_rpc(struct sfw_test_unit *tsu, struct srpc_client_rpc *rpc)
-{
- struct sfw_test_instance *tsi = tsu->tsu_instance;
- struct sfw_session *sn = tsi->tsi_batch->bat_session;
- struct srpc_ping_reqst *reqst = &rpc->crpc_reqstmsg.msg_body.ping_reqst;
- struct srpc_ping_reply *reply = &rpc->crpc_replymsg.msg_body.ping_reply;
- struct timespec64 ts;
-
- LASSERT(sn);
-
- if (rpc->crpc_status) {
- if (!tsi->tsi_stopping) /* rpc could have been aborted */
- atomic_inc(&sn->sn_ping_errors);
- CERROR("Unable to ping %s (%d): %d\n",
- libcfs_id2str(rpc->crpc_dest),
- reqst->pnr_seq, rpc->crpc_status);
- return;
- }
-
- if (rpc->crpc_replymsg.msg_magic != SRPC_MSG_MAGIC) {
- __swab32s(&reply->pnr_seq);
- __swab32s(&reply->pnr_magic);
- __swab32s(&reply->pnr_status);
- }
-
- if (reply->pnr_magic != LST_PING_TEST_MAGIC) {
- rpc->crpc_status = -EBADMSG;
- atomic_inc(&sn->sn_ping_errors);
- CERROR("Bad magic %u from %s, %u expected.\n",
- reply->pnr_magic, libcfs_id2str(rpc->crpc_dest),
- LST_PING_TEST_MAGIC);
- return;
- }
-
- if (reply->pnr_seq != reqst->pnr_seq) {
- rpc->crpc_status = -EBADMSG;
- atomic_inc(&sn->sn_ping_errors);
- CERROR("Bad seq %u from %s, %u expected.\n",
- reply->pnr_seq, libcfs_id2str(rpc->crpc_dest),
- reqst->pnr_seq);
- return;
- }
-
- ktime_get_real_ts64(&ts);
- CDEBUG(D_NET, "%d reply in %u usec\n", reply->pnr_seq,
- (unsigned int)((ts.tv_sec - reqst->pnr_time_sec) * 1000000 +
- (ts.tv_nsec / NSEC_PER_USEC - reqst->pnr_time_usec)));
-}
-
-static int
-ping_server_handle(struct srpc_server_rpc *rpc)
-{
- struct srpc_service *sv = rpc->srpc_scd->scd_svc;
- struct srpc_msg *reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
- struct srpc_msg *replymsg = &rpc->srpc_replymsg;
- struct srpc_ping_reqst *req = &reqstmsg->msg_body.ping_reqst;
- struct srpc_ping_reply *rep = &rpc->srpc_replymsg.msg_body.ping_reply;
-
- LASSERT(sv->sv_id == SRPC_SERVICE_PING);
-
- if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) {
- LASSERT(reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC));
-
- __swab32s(&req->pnr_seq);
- __swab32s(&req->pnr_magic);
- __swab64s(&req->pnr_time_sec);
- __swab64s(&req->pnr_time_usec);
- }
- LASSERT(reqstmsg->msg_type == srpc_service2request(sv->sv_id));
-
- if (req->pnr_magic != LST_PING_TEST_MAGIC) {
- CERROR("Unexpected magic %08x from %s\n",
- req->pnr_magic, libcfs_id2str(rpc->srpc_peer));
- return -EINVAL;
- }
-
- rep->pnr_seq = req->pnr_seq;
- rep->pnr_magic = LST_PING_TEST_MAGIC;
-
- if (reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) {
- replymsg->msg_ses_feats = LST_FEATS_MASK;
- rep->pnr_status = EPROTO;
- return 0;
- }
-
- replymsg->msg_ses_feats = reqstmsg->msg_ses_feats;
-
- CDEBUG(D_NET, "Get ping %d from %s\n",
- req->pnr_seq, libcfs_id2str(rpc->srpc_peer));
- return 0;
-}
-
-struct sfw_test_client_ops ping_test_client;
-
-void ping_init_test_client(void)
-{
- ping_test_client.tso_init = ping_client_init;
- ping_test_client.tso_fini = ping_client_fini;
- ping_test_client.tso_prep_rpc = ping_client_prep_rpc;
- ping_test_client.tso_done_rpc = ping_client_done_rpc;
-}
-
-struct srpc_service ping_test_service;
-
-void ping_init_test_service(void)
-{
- ping_test_service.sv_id = SRPC_SERVICE_PING;
- ping_test_service.sv_name = "ping_test";
- ping_test_service.sv_handler = ping_server_handle;
- ping_test_service.sv_wi_total = ping_srv_workitems;
-}
diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c
deleted file mode 100644
index 9613b0a77007..000000000000
--- a/drivers/staging/lustre/lnet/selftest/rpc.c
+++ /dev/null
@@ -1,1682 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/rpc.c
- *
- * Author: Isaac Huang <isaac@clusterfs.com>
- *
- * 2012-05-13: Liang Zhen <liang@whamcloud.com>
- * - percpt data for service to improve smp performance
- * - code cleanup
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "selftest.h"
-
-enum srpc_state {
- SRPC_STATE_NONE,
- SRPC_STATE_NI_INIT,
- SRPC_STATE_EQ_INIT,
- SRPC_STATE_RUNNING,
- SRPC_STATE_STOPPING,
-};
-
-static struct smoketest_rpc {
- spinlock_t rpc_glock; /* global lock */
- struct srpc_service *rpc_services[SRPC_SERVICE_MAX_ID + 1];
- struct lnet_handle_eq rpc_lnet_eq; /* _the_ LNet event queue */
- enum srpc_state rpc_state;
- struct srpc_counters rpc_counters;
- __u64 rpc_matchbits; /* matchbits counter */
-} srpc_data;
-
-static inline int
-srpc_serv_portal(int svc_id)
-{
- return svc_id < SRPC_FRAMEWORK_SERVICE_MAX_ID ?
- SRPC_FRAMEWORK_REQUEST_PORTAL : SRPC_REQUEST_PORTAL;
-}
-
-/* forward ref's */
-void srpc_handle_rpc(struct swi_workitem *wi);
-
-void srpc_get_counters(struct srpc_counters *cnt)
-{
- spin_lock(&srpc_data.rpc_glock);
- *cnt = srpc_data.rpc_counters;
- spin_unlock(&srpc_data.rpc_glock);
-}
-
-void srpc_set_counters(const struct srpc_counters *cnt)
-{
- spin_lock(&srpc_data.rpc_glock);
- srpc_data.rpc_counters = *cnt;
- spin_unlock(&srpc_data.rpc_glock);
-}
-
-static int
-srpc_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i, int off,
- int nob)
-{
- LASSERT(off < PAGE_SIZE);
- LASSERT(nob > 0 && nob <= PAGE_SIZE);
-
- bk->bk_iovs[i].bv_offset = off;
- bk->bk_iovs[i].bv_page = pg;
- bk->bk_iovs[i].bv_len = nob;
- return nob;
-}
-
-void
-srpc_free_bulk(struct srpc_bulk *bk)
-{
- int i;
- struct page *pg;
-
- LASSERT(bk);
-
- for (i = 0; i < bk->bk_niov; i++) {
- pg = bk->bk_iovs[i].bv_page;
- if (!pg)
- break;
-
- __free_page(pg);
- }
-
- kfree(bk);
-}
-
-struct srpc_bulk *
-srpc_alloc_bulk(int cpt, unsigned int bulk_off, unsigned int bulk_npg,
- unsigned int bulk_len, int sink)
-{
- struct srpc_bulk *bk;
- int i;
-
- LASSERT(bulk_npg > 0 && bulk_npg <= LNET_MAX_IOV);
-
- bk = kzalloc_cpt(offsetof(struct srpc_bulk, bk_iovs[bulk_npg]),
- GFP_KERNEL, cpt);
- if (!bk) {
- CERROR("Can't allocate descriptor for %d pages\n", bulk_npg);
- return NULL;
- }
-
- memset(bk, 0, offsetof(struct srpc_bulk, bk_iovs[bulk_npg]));
- bk->bk_sink = sink;
- bk->bk_len = bulk_len;
- bk->bk_niov = bulk_npg;
-
- for (i = 0; i < bulk_npg; i++) {
- struct page *pg;
- int nob;
-
- pg = alloc_pages_node(cfs_cpt_spread_node(lnet_cpt_table(), cpt),
- GFP_KERNEL, 0);
- if (!pg) {
- CERROR("Can't allocate page %d of %d\n", i, bulk_npg);
- srpc_free_bulk(bk);
- return NULL;
- }
-
- nob = min_t(unsigned int, bulk_off + bulk_len, PAGE_SIZE) -
- bulk_off;
- srpc_add_bulk_page(bk, pg, i, bulk_off, nob);
- bulk_len -= nob;
- bulk_off = 0;
- }
-
- return bk;
-}
-
-static inline __u64
-srpc_next_id(void)
-{
- __u64 id;
-
- spin_lock(&srpc_data.rpc_glock);
- id = srpc_data.rpc_matchbits++;
- spin_unlock(&srpc_data.rpc_glock);
- return id;
-}
-
-static void
-srpc_init_server_rpc(struct srpc_server_rpc *rpc,
- struct srpc_service_cd *scd,
- struct srpc_buffer *buffer)
-{
- memset(rpc, 0, sizeof(*rpc));
- swi_init_workitem(&rpc->srpc_wi, srpc_handle_rpc,
- srpc_serv_is_framework(scd->scd_svc) ?
- lst_serial_wq : lst_test_wq[scd->scd_cpt]);
-
- rpc->srpc_ev.ev_fired = 1; /* no event expected now */
-
- rpc->srpc_scd = scd;
- rpc->srpc_reqstbuf = buffer;
- rpc->srpc_peer = buffer->buf_peer;
- rpc->srpc_self = buffer->buf_self;
- LNetInvalidateMDHandle(&rpc->srpc_replymdh);
-}
-
-static void
-srpc_service_fini(struct srpc_service *svc)
-{
- struct srpc_service_cd *scd;
- struct srpc_server_rpc *rpc;
- struct srpc_buffer *buf;
- struct list_head *q;
- int i;
-
- if (!svc->sv_cpt_data)
- return;
-
- cfs_percpt_for_each(scd, i, svc->sv_cpt_data) {
- while (1) {
- if (!list_empty(&scd->scd_buf_posted))
- q = &scd->scd_buf_posted;
- else if (!list_empty(&scd->scd_buf_blocked))
- q = &scd->scd_buf_blocked;
- else
- break;
-
- while (!list_empty(q)) {
- buf = list_entry(q->next, struct srpc_buffer,
- buf_list);
- list_del(&buf->buf_list);
- kfree(buf);
- }
- }
-
- LASSERT(list_empty(&scd->scd_rpc_active));
-
- while (!list_empty(&scd->scd_rpc_free)) {
- rpc = list_entry(scd->scd_rpc_free.next,
- struct srpc_server_rpc,
- srpc_list);
- list_del(&rpc->srpc_list);
- kfree(rpc);
- }
- }
-
- cfs_percpt_free(svc->sv_cpt_data);
- svc->sv_cpt_data = NULL;
-}
-
-static int
-srpc_service_nrpcs(struct srpc_service *svc)
-{
- int nrpcs = svc->sv_wi_total / svc->sv_ncpts;
-
- return srpc_serv_is_framework(svc) ?
- max(nrpcs, SFW_FRWK_WI_MIN) : max(nrpcs, SFW_TEST_WI_MIN);
-}
-
-void srpc_add_buffer(struct swi_workitem *wi);
-
-static int
-srpc_service_init(struct srpc_service *svc)
-{
- struct srpc_service_cd *scd;
- struct srpc_server_rpc *rpc;
- int nrpcs;
- int i;
- int j;
-
- svc->sv_shuttingdown = 0;
-
- svc->sv_cpt_data = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(**svc->sv_cpt_data));
- if (!svc->sv_cpt_data)
- return -ENOMEM;
-
- svc->sv_ncpts = srpc_serv_is_framework(svc) ?
- 1 : cfs_cpt_number(lnet_cpt_table());
- nrpcs = srpc_service_nrpcs(svc);
-
- cfs_percpt_for_each(scd, i, svc->sv_cpt_data) {
- scd->scd_cpt = i;
- scd->scd_svc = svc;
- spin_lock_init(&scd->scd_lock);
- INIT_LIST_HEAD(&scd->scd_rpc_free);
- INIT_LIST_HEAD(&scd->scd_rpc_active);
- INIT_LIST_HEAD(&scd->scd_buf_posted);
- INIT_LIST_HEAD(&scd->scd_buf_blocked);
-
- scd->scd_ev.ev_data = scd;
- scd->scd_ev.ev_type = SRPC_REQUEST_RCVD;
-
- /*
- * NB: don't use lst_serial_wq for adding buffer,
- * see details in srpc_service_add_buffers()
- */
- swi_init_workitem(&scd->scd_buf_wi,
- srpc_add_buffer, lst_test_wq[i]);
-
- if (i && srpc_serv_is_framework(svc)) {
- /*
- * NB: framework service only needs srpc_service_cd for
- * one partition, but we allocate for all to make
- * it easier to implement, it will waste a little
- * memory but nobody should care about this
- */
- continue;
- }
-
- for (j = 0; j < nrpcs; j++) {
- rpc = kzalloc_cpt(sizeof(*rpc), GFP_NOFS, i);
- if (!rpc) {
- srpc_service_fini(svc);
- return -ENOMEM;
- }
- list_add(&rpc->srpc_list, &scd->scd_rpc_free);
- }
- }
-
- return 0;
-}
-
-int
-srpc_add_service(struct srpc_service *sv)
-{
- int id = sv->sv_id;
-
- LASSERT(0 <= id && id <= SRPC_SERVICE_MAX_ID);
-
- if (srpc_service_init(sv))
- return -ENOMEM;
-
- spin_lock(&srpc_data.rpc_glock);
-
- LASSERT(srpc_data.rpc_state == SRPC_STATE_RUNNING);
-
- if (srpc_data.rpc_services[id]) {
- spin_unlock(&srpc_data.rpc_glock);
- goto failed;
- }
-
- srpc_data.rpc_services[id] = sv;
- spin_unlock(&srpc_data.rpc_glock);
-
- CDEBUG(D_NET, "Adding service: id %d, name %s\n", id, sv->sv_name);
- return 0;
-
- failed:
- srpc_service_fini(sv);
- return -EBUSY;
-}
-
-int
-srpc_remove_service(struct srpc_service *sv)
-{
- int id = sv->sv_id;
-
- spin_lock(&srpc_data.rpc_glock);
-
- if (srpc_data.rpc_services[id] != sv) {
- spin_unlock(&srpc_data.rpc_glock);
- return -ENOENT;
- }
-
- srpc_data.rpc_services[id] = NULL;
- spin_unlock(&srpc_data.rpc_glock);
- return 0;
-}
-
-static int
-srpc_post_passive_rdma(int portal, int local, __u64 matchbits, void *buf,
- int len, int options, struct lnet_process_id peer,
- struct lnet_handle_md *mdh, struct srpc_event *ev)
-{
- int rc;
- struct lnet_md md;
- struct lnet_handle_me meh;
-
- rc = LNetMEAttach(portal, peer, matchbits, 0, LNET_UNLINK,
- local ? LNET_INS_LOCAL : LNET_INS_AFTER, &meh);
- if (rc) {
- CERROR("LNetMEAttach failed: %d\n", rc);
- LASSERT(rc == -ENOMEM);
- return -ENOMEM;
- }
-
- md.threshold = 1;
- md.user_ptr = ev;
- md.start = buf;
- md.length = len;
- md.options = options;
- md.eq_handle = srpc_data.rpc_lnet_eq;
-
- rc = LNetMDAttach(meh, md, LNET_UNLINK, mdh);
- if (rc) {
- CERROR("LNetMDAttach failed: %d\n", rc);
- LASSERT(rc == -ENOMEM);
-
- rc = LNetMEUnlink(meh);
- LASSERT(!rc);
- return -ENOMEM;
- }
-
- CDEBUG(D_NET, "Posted passive RDMA: peer %s, portal %d, matchbits %#llx\n",
- libcfs_id2str(peer), portal, matchbits);
- return 0;
-}
-
-static int
-srpc_post_active_rdma(int portal, __u64 matchbits, void *buf, int len,
- int options, struct lnet_process_id peer,
- lnet_nid_t self, struct lnet_handle_md *mdh,
- struct srpc_event *ev)
-{
- int rc;
- struct lnet_md md;
-
- md.user_ptr = ev;
- md.start = buf;
- md.length = len;
- md.eq_handle = srpc_data.rpc_lnet_eq;
- md.threshold = options & LNET_MD_OP_GET ? 2 : 1;
- md.options = options & ~(LNET_MD_OP_PUT | LNET_MD_OP_GET);
-
- rc = LNetMDBind(md, LNET_UNLINK, mdh);
- if (rc) {
- CERROR("LNetMDBind failed: %d\n", rc);
- LASSERT(rc == -ENOMEM);
- return -ENOMEM;
- }
-
- /*
- * this is kind of an abuse of the LNET_MD_OP_{PUT,GET} options.
- * they're only meaningful for MDs attached to an ME (i.e. passive
- * buffers...
- */
- if (options & LNET_MD_OP_PUT) {
- rc = LNetPut(self, *mdh, LNET_NOACK_REQ, peer,
- portal, matchbits, 0, 0);
- } else {
- LASSERT(options & LNET_MD_OP_GET);
-
- rc = LNetGet(self, *mdh, peer, portal, matchbits, 0);
- }
-
- if (rc) {
- CERROR("LNet%s(%s, %d, %lld) failed: %d\n",
- options & LNET_MD_OP_PUT ? "Put" : "Get",
- libcfs_id2str(peer), portal, matchbits, rc);
-
- /*
- * The forthcoming unlink event will complete this operation
- * with failure, so fall through and return success here.
- */
- rc = LNetMDUnlink(*mdh);
- LASSERT(!rc);
- } else {
- CDEBUG(D_NET, "Posted active RDMA: peer %s, portal %u, matchbits %#llx\n",
- libcfs_id2str(peer), portal, matchbits);
- }
- return 0;
-}
-
-static int
-srpc_post_passive_rqtbuf(int service, int local, void *buf, int len,
- struct lnet_handle_md *mdh, struct srpc_event *ev)
-{
- struct lnet_process_id any = { 0 };
-
- any.nid = LNET_NID_ANY;
- any.pid = LNET_PID_ANY;
-
- return srpc_post_passive_rdma(srpc_serv_portal(service),
- local, service, buf, len,
- LNET_MD_OP_PUT, any, mdh, ev);
-}
-
-static int
-srpc_service_post_buffer(struct srpc_service_cd *scd, struct srpc_buffer *buf)
-__must_hold(&scd->scd_lock)
-{
- struct srpc_service *sv = scd->scd_svc;
- struct srpc_msg *msg = &buf->buf_msg;
- int rc;
-
- LNetInvalidateMDHandle(&buf->buf_mdh);
- list_add(&buf->buf_list, &scd->scd_buf_posted);
- scd->scd_buf_nposted++;
- spin_unlock(&scd->scd_lock);
-
- rc = srpc_post_passive_rqtbuf(sv->sv_id,
- !srpc_serv_is_framework(sv),
- msg, sizeof(*msg), &buf->buf_mdh,
- &scd->scd_ev);
-
- /*
- * At this point, a RPC (new or delayed) may have arrived in
- * msg and its event handler has been called. So we must add
- * buf to scd_buf_posted _before_ dropping scd_lock
- */
- spin_lock(&scd->scd_lock);
-
- if (!rc) {
- if (!sv->sv_shuttingdown)
- return 0;
-
- spin_unlock(&scd->scd_lock);
- /*
- * srpc_shutdown_service might have tried to unlink me
- * when my buf_mdh was still invalid
- */
- LNetMDUnlink(buf->buf_mdh);
- spin_lock(&scd->scd_lock);
- return 0;
- }
-
- scd->scd_buf_nposted--;
- if (sv->sv_shuttingdown)
- return rc; /* don't allow to change scd_buf_posted */
-
- list_del(&buf->buf_list);
- spin_unlock(&scd->scd_lock);
-
- kfree(buf);
-
- spin_lock(&scd->scd_lock);
- return rc;
-}
-
-void
-srpc_add_buffer(struct swi_workitem *wi)
-{
- struct srpc_service_cd *scd = container_of(wi, struct srpc_service_cd, scd_buf_wi);
- struct srpc_buffer *buf;
- int rc = 0;
-
- /*
- * it's called by workitem scheduler threads, these threads
- * should have been set CPT affinity, so buffers will be posted
- * on CPT local list of Portal
- */
- spin_lock(&scd->scd_lock);
-
- while (scd->scd_buf_adjust > 0 &&
- !scd->scd_svc->sv_shuttingdown) {
- scd->scd_buf_adjust--; /* consume it */
- scd->scd_buf_posting++;
-
- spin_unlock(&scd->scd_lock);
-
- buf = kzalloc(sizeof(*buf), GFP_NOFS);
- if (!buf) {
- CERROR("Failed to add new buf to service: %s\n",
- scd->scd_svc->sv_name);
- spin_lock(&scd->scd_lock);
- rc = -ENOMEM;
- break;
- }
-
- spin_lock(&scd->scd_lock);
- if (scd->scd_svc->sv_shuttingdown) {
- spin_unlock(&scd->scd_lock);
- kfree(buf);
-
- spin_lock(&scd->scd_lock);
- rc = -ESHUTDOWN;
- break;
- }
-
- rc = srpc_service_post_buffer(scd, buf);
- if (rc)
- break; /* buf has been freed inside */
-
- LASSERT(scd->scd_buf_posting > 0);
- scd->scd_buf_posting--;
- scd->scd_buf_total++;
- scd->scd_buf_low = max(2, scd->scd_buf_total / 4);
- }
-
- if (rc) {
- scd->scd_buf_err_stamp = ktime_get_real_seconds();
- scd->scd_buf_err = rc;
-
- LASSERT(scd->scd_buf_posting > 0);
- scd->scd_buf_posting--;
- }
-
- spin_unlock(&scd->scd_lock);
-}
-
-int
-srpc_service_add_buffers(struct srpc_service *sv, int nbuffer)
-{
- struct srpc_service_cd *scd;
- int rc = 0;
- int i;
-
- LASSERTF(nbuffer > 0, "nbuffer must be positive: %d\n", nbuffer);
-
- cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
- spin_lock(&scd->scd_lock);
-
- scd->scd_buf_err = 0;
- scd->scd_buf_err_stamp = 0;
- scd->scd_buf_posting = 0;
- scd->scd_buf_adjust = nbuffer;
- /* start to post buffers */
- swi_schedule_workitem(&scd->scd_buf_wi);
- spin_unlock(&scd->scd_lock);
-
- /* framework service only post buffer for one partition */
- if (srpc_serv_is_framework(sv))
- break;
- }
-
- cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
- spin_lock(&scd->scd_lock);
- /*
- * NB: srpc_service_add_buffers() can be called inside
- * thread context of lst_serial_wq, and we don't normally
- * allow to sleep inside thread context of WI scheduler
- * because it will block current scheduler thread from doing
- * anything else, even worse, it could deadlock if it's
- * waiting on result from another WI of the same scheduler.
- * However, it's safe at here because scd_buf_wi is scheduled
- * by thread in a different WI scheduler (lst_test_wq),
- * so we don't have any risk of deadlock, though this could
- * block all WIs pending on lst_serial_wq for a moment
- * which is not good but not fatal.
- */
- lst_wait_until(scd->scd_buf_err ||
- (!scd->scd_buf_adjust &&
- !scd->scd_buf_posting),
- scd->scd_lock, "waiting for adding buffer\n");
-
- if (scd->scd_buf_err && !rc)
- rc = scd->scd_buf_err;
-
- spin_unlock(&scd->scd_lock);
- }
-
- return rc;
-}
-
-void
-srpc_service_remove_buffers(struct srpc_service *sv, int nbuffer)
-{
- struct srpc_service_cd *scd;
- int num;
- int i;
-
- LASSERT(!sv->sv_shuttingdown);
-
- cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
- spin_lock(&scd->scd_lock);
-
- num = scd->scd_buf_total + scd->scd_buf_posting;
- scd->scd_buf_adjust -= min(nbuffer, num);
-
- spin_unlock(&scd->scd_lock);
- }
-}
-
-/* returns 1 if sv has finished, otherwise 0 */
-int
-srpc_finish_service(struct srpc_service *sv)
-{
- struct srpc_service_cd *scd;
- struct srpc_server_rpc *rpc;
- int i;
-
- LASSERT(sv->sv_shuttingdown); /* srpc_shutdown_service called */
-
- cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
- swi_cancel_workitem(&scd->scd_buf_wi);
-
- spin_lock(&scd->scd_lock);
-
- if (scd->scd_buf_nposted > 0) {
- CDEBUG(D_NET, "waiting for %d posted buffers to unlink\n",
- scd->scd_buf_nposted);
- spin_unlock(&scd->scd_lock);
- return 0;
- }
-
- if (list_empty(&scd->scd_rpc_active)) {
- spin_unlock(&scd->scd_lock);
- continue;
- }
-
- rpc = list_entry(scd->scd_rpc_active.next,
- struct srpc_server_rpc, srpc_list);
- CNETERR("Active RPC %p on shutdown: sv %s, peer %s, wi %s, ev fired %d type %d status %d lnet %d\n",
- rpc, sv->sv_name, libcfs_id2str(rpc->srpc_peer),
- swi_state2str(rpc->srpc_wi.swi_state),
- rpc->srpc_ev.ev_fired, rpc->srpc_ev.ev_type,
- rpc->srpc_ev.ev_status, rpc->srpc_ev.ev_lnet);
- spin_unlock(&scd->scd_lock);
- return 0;
- }
-
- /* no lock needed from now on */
- srpc_service_fini(sv);
- return 1;
-}
-
-/* called with sv->sv_lock held */
-static void
-srpc_service_recycle_buffer(struct srpc_service_cd *scd,
- struct srpc_buffer *buf)
-__must_hold(&scd->scd_lock)
-{
- if (!scd->scd_svc->sv_shuttingdown && scd->scd_buf_adjust >= 0) {
- if (srpc_service_post_buffer(scd, buf)) {
- CWARN("Failed to post %s buffer\n",
- scd->scd_svc->sv_name);
- }
- return;
- }
-
- /* service is shutting down, or we want to recycle some buffers */
- scd->scd_buf_total--;
-
- if (scd->scd_buf_adjust < 0) {
- scd->scd_buf_adjust++;
- if (scd->scd_buf_adjust < 0 &&
- !scd->scd_buf_total && !scd->scd_buf_posting) {
- CDEBUG(D_INFO,
- "Try to recycle %d buffers but nothing left\n",
- scd->scd_buf_adjust);
- scd->scd_buf_adjust = 0;
- }
- }
-
- spin_unlock(&scd->scd_lock);
- kfree(buf);
- spin_lock(&scd->scd_lock);
-}
-
-void
-srpc_abort_service(struct srpc_service *sv)
-{
- struct srpc_service_cd *scd;
- struct srpc_server_rpc *rpc;
- int i;
-
- CDEBUG(D_NET, "Aborting service: id %d, name %s\n",
- sv->sv_id, sv->sv_name);
-
- cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
- spin_lock(&scd->scd_lock);
-
- /*
- * schedule in-flight RPCs to notice the abort, NB:
- * racing with incoming RPCs; complete fix should make test
- * RPCs carry session ID in its headers
- */
- list_for_each_entry(rpc, &scd->scd_rpc_active, srpc_list) {
- rpc->srpc_aborted = 1;
- swi_schedule_workitem(&rpc->srpc_wi);
- }
-
- spin_unlock(&scd->scd_lock);
- }
-}
-
-void
-srpc_shutdown_service(struct srpc_service *sv)
-{
- struct srpc_service_cd *scd;
- struct srpc_server_rpc *rpc;
- struct srpc_buffer *buf;
- int i;
-
- CDEBUG(D_NET, "Shutting down service: id %d, name %s\n",
- sv->sv_id, sv->sv_name);
-
- cfs_percpt_for_each(scd, i, sv->sv_cpt_data)
- spin_lock(&scd->scd_lock);
-
- sv->sv_shuttingdown = 1; /* i.e. no new active RPC */
-
- cfs_percpt_for_each(scd, i, sv->sv_cpt_data)
- spin_unlock(&scd->scd_lock);
-
- cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
- spin_lock(&scd->scd_lock);
-
- /* schedule in-flight RPCs to notice the shutdown */
- list_for_each_entry(rpc, &scd->scd_rpc_active, srpc_list)
- swi_schedule_workitem(&rpc->srpc_wi);
-
- spin_unlock(&scd->scd_lock);
-
- /*
- * OK to traverse scd_buf_posted without lock, since no one
- * touches scd_buf_posted now
- */
- list_for_each_entry(buf, &scd->scd_buf_posted, buf_list)
- LNetMDUnlink(buf->buf_mdh);
- }
-}
-
-static int
-srpc_send_request(struct srpc_client_rpc *rpc)
-{
- struct srpc_event *ev = &rpc->crpc_reqstev;
- int rc;
-
- ev->ev_fired = 0;
- ev->ev_data = rpc;
- ev->ev_type = SRPC_REQUEST_SENT;
-
- rc = srpc_post_active_rdma(srpc_serv_portal(rpc->crpc_service),
- rpc->crpc_service, &rpc->crpc_reqstmsg,
- sizeof(struct srpc_msg), LNET_MD_OP_PUT,
- rpc->crpc_dest, LNET_NID_ANY,
- &rpc->crpc_reqstmdh, ev);
- if (rc) {
- LASSERT(rc == -ENOMEM);
- ev->ev_fired = 1; /* no more event expected */
- }
- return rc;
-}
-
-static int
-srpc_prepare_reply(struct srpc_client_rpc *rpc)
-{
- struct srpc_event *ev = &rpc->crpc_replyev;
- __u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.rpyid;
- int rc;
-
- ev->ev_fired = 0;
- ev->ev_data = rpc;
- ev->ev_type = SRPC_REPLY_RCVD;
-
- *id = srpc_next_id();
-
- rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, 0, *id,
- &rpc->crpc_replymsg,
- sizeof(struct srpc_msg),
- LNET_MD_OP_PUT, rpc->crpc_dest,
- &rpc->crpc_replymdh, ev);
- if (rc) {
- LASSERT(rc == -ENOMEM);
- ev->ev_fired = 1; /* no more event expected */
- }
- return rc;
-}
-
-static int
-srpc_prepare_bulk(struct srpc_client_rpc *rpc)
-{
- struct srpc_bulk *bk = &rpc->crpc_bulk;
- struct srpc_event *ev = &rpc->crpc_bulkev;
- __u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.bulkid;
- int rc;
- int opt;
-
- LASSERT(bk->bk_niov <= LNET_MAX_IOV);
-
- if (!bk->bk_niov)
- return 0; /* nothing to do */
-
- opt = bk->bk_sink ? LNET_MD_OP_PUT : LNET_MD_OP_GET;
- opt |= LNET_MD_KIOV;
-
- ev->ev_fired = 0;
- ev->ev_data = rpc;
- ev->ev_type = SRPC_BULK_REQ_RCVD;
-
- *id = srpc_next_id();
-
- rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, 0, *id,
- &bk->bk_iovs[0], bk->bk_niov, opt,
- rpc->crpc_dest, &bk->bk_mdh, ev);
- if (rc) {
- LASSERT(rc == -ENOMEM);
- ev->ev_fired = 1; /* no more event expected */
- }
- return rc;
-}
-
-static int
-srpc_do_bulk(struct srpc_server_rpc *rpc)
-{
- struct srpc_event *ev = &rpc->srpc_ev;
- struct srpc_bulk *bk = rpc->srpc_bulk;
- __u64 id = rpc->srpc_reqstbuf->buf_msg.msg_body.reqst.bulkid;
- int rc;
- int opt;
-
- LASSERT(bk);
-
- opt = bk->bk_sink ? LNET_MD_OP_GET : LNET_MD_OP_PUT;
- opt |= LNET_MD_KIOV;
-
- ev->ev_fired = 0;
- ev->ev_data = rpc;
- ev->ev_type = bk->bk_sink ? SRPC_BULK_GET_RPLD : SRPC_BULK_PUT_SENT;
-
- rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, id,
- &bk->bk_iovs[0], bk->bk_niov, opt,
- rpc->srpc_peer, rpc->srpc_self,
- &bk->bk_mdh, ev);
- if (rc)
- ev->ev_fired = 1; /* no more event expected */
- return rc;
-}
-
-/* only called from srpc_handle_rpc */
-static void
-srpc_server_rpc_done(struct srpc_server_rpc *rpc, int status)
-{
- struct srpc_service_cd *scd = rpc->srpc_scd;
- struct srpc_service *sv = scd->scd_svc;
- struct srpc_buffer *buffer;
-
- LASSERT(status || rpc->srpc_wi.swi_state == SWI_STATE_DONE);
-
- rpc->srpc_status = status;
-
- CDEBUG_LIMIT(!status ? D_NET : D_NETERROR,
- "Server RPC %p done: service %s, peer %s, status %s:%d\n",
- rpc, sv->sv_name, libcfs_id2str(rpc->srpc_peer),
- swi_state2str(rpc->srpc_wi.swi_state), status);
-
- if (status) {
- spin_lock(&srpc_data.rpc_glock);
- srpc_data.rpc_counters.rpcs_dropped++;
- spin_unlock(&srpc_data.rpc_glock);
- }
-
- if (rpc->srpc_done)
- (*rpc->srpc_done) (rpc);
- LASSERT(!rpc->srpc_bulk);
-
- spin_lock(&scd->scd_lock);
-
- if (rpc->srpc_reqstbuf) {
- /*
- * NB might drop sv_lock in srpc_service_recycle_buffer, but
- * sv won't go away for scd_rpc_active must not be empty
- */
- srpc_service_recycle_buffer(scd, rpc->srpc_reqstbuf);
- rpc->srpc_reqstbuf = NULL;
- }
-
- list_del(&rpc->srpc_list); /* from scd->scd_rpc_active */
-
- /*
- * No one can schedule me now since:
- * - I'm not on scd_rpc_active.
- * - all LNet events have been fired.
- * Cancel pending schedules and prevent future schedule attempts:
- */
- LASSERT(rpc->srpc_ev.ev_fired);
-
- if (!sv->sv_shuttingdown && !list_empty(&scd->scd_buf_blocked)) {
- buffer = list_entry(scd->scd_buf_blocked.next,
- struct srpc_buffer, buf_list);
- list_del(&buffer->buf_list);
-
- srpc_init_server_rpc(rpc, scd, buffer);
- list_add_tail(&rpc->srpc_list, &scd->scd_rpc_active);
- swi_schedule_workitem(&rpc->srpc_wi);
- } else {
- list_add(&rpc->srpc_list, &scd->scd_rpc_free);
- }
-
- spin_unlock(&scd->scd_lock);
-}
-
-/* handles an incoming RPC */
-void
-srpc_handle_rpc(struct swi_workitem *wi)
-{
- struct srpc_server_rpc *rpc = container_of(wi, struct srpc_server_rpc, srpc_wi);
- struct srpc_service_cd *scd = rpc->srpc_scd;
- struct srpc_service *sv = scd->scd_svc;
- struct srpc_event *ev = &rpc->srpc_ev;
- int rc = 0;
-
- LASSERT(wi == &rpc->srpc_wi);
-
- spin_lock(&scd->scd_lock);
-
- if (sv->sv_shuttingdown || rpc->srpc_aborted) {
- spin_unlock(&scd->scd_lock);
-
- if (rpc->srpc_bulk)
- LNetMDUnlink(rpc->srpc_bulk->bk_mdh);
- LNetMDUnlink(rpc->srpc_replymdh);
-
- if (ev->ev_fired) { /* no more event, OK to finish */
- srpc_server_rpc_done(rpc, -ESHUTDOWN);
- }
- return;
- }
-
- spin_unlock(&scd->scd_lock);
-
- switch (wi->swi_state) {
- default:
- LBUG();
- case SWI_STATE_NEWBORN: {
- struct srpc_msg *msg;
- struct srpc_generic_reply *reply;
-
- msg = &rpc->srpc_reqstbuf->buf_msg;
- reply = &rpc->srpc_replymsg.msg_body.reply;
-
- if (!msg->msg_magic) {
- /* moaned already in srpc_lnet_ev_handler */
- srpc_server_rpc_done(rpc, EBADMSG);
- return;
- }
-
- srpc_unpack_msg_hdr(msg);
- if (msg->msg_version != SRPC_MSG_VERSION) {
- CWARN("Version mismatch: %u, %u expected, from %s\n",
- msg->msg_version, SRPC_MSG_VERSION,
- libcfs_id2str(rpc->srpc_peer));
- reply->status = EPROTO;
- /* drop through and send reply */
- } else {
- reply->status = 0;
- rc = (*sv->sv_handler)(rpc);
- LASSERT(!reply->status || !rpc->srpc_bulk);
- if (rc) {
- srpc_server_rpc_done(rpc, rc);
- return;
- }
- }
-
- wi->swi_state = SWI_STATE_BULK_STARTED;
-
- if (rpc->srpc_bulk) {
- rc = srpc_do_bulk(rpc);
- if (!rc)
- return; /* wait for bulk */
-
- LASSERT(ev->ev_fired);
- ev->ev_status = rc;
- }
- }
- /* fall through */
- case SWI_STATE_BULK_STARTED:
- LASSERT(!rpc->srpc_bulk || ev->ev_fired);
-
- if (rpc->srpc_bulk) {
- rc = ev->ev_status;
-
- if (sv->sv_bulk_ready)
- rc = (*sv->sv_bulk_ready) (rpc, rc);
-
- if (rc) {
- srpc_server_rpc_done(rpc, rc);
- return;
- }
- }
-
- wi->swi_state = SWI_STATE_REPLY_SUBMITTED;
- rc = srpc_send_reply(rpc);
- if (!rc)
- return; /* wait for reply */
- srpc_server_rpc_done(rpc, rc);
- return;
-
- case SWI_STATE_REPLY_SUBMITTED:
- if (!ev->ev_fired) {
- CERROR("RPC %p: bulk %p, service %d\n",
- rpc, rpc->srpc_bulk, sv->sv_id);
- CERROR("Event: status %d, type %d, lnet %d\n",
- ev->ev_status, ev->ev_type, ev->ev_lnet);
- LASSERT(ev->ev_fired);
- }
-
- wi->swi_state = SWI_STATE_DONE;
- srpc_server_rpc_done(rpc, ev->ev_status);
- return;
- }
-}
-
-static void
-srpc_client_rpc_expired(void *data)
-{
- struct srpc_client_rpc *rpc = data;
-
- CWARN("Client RPC expired: service %d, peer %s, timeout %d.\n",
- rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
- rpc->crpc_timeout);
-
- spin_lock(&rpc->crpc_lock);
-
- rpc->crpc_timeout = 0;
- srpc_abort_rpc(rpc, -ETIMEDOUT);
-
- spin_unlock(&rpc->crpc_lock);
-
- spin_lock(&srpc_data.rpc_glock);
- srpc_data.rpc_counters.rpcs_expired++;
- spin_unlock(&srpc_data.rpc_glock);
-}
-
-static void
-srpc_add_client_rpc_timer(struct srpc_client_rpc *rpc)
-{
- struct stt_timer *timer = &rpc->crpc_timer;
-
- if (!rpc->crpc_timeout)
- return;
-
- INIT_LIST_HEAD(&timer->stt_list);
- timer->stt_data = rpc;
- timer->stt_func = srpc_client_rpc_expired;
- timer->stt_expires = ktime_get_real_seconds() + rpc->crpc_timeout;
- stt_add_timer(timer);
-}
-
-/*
- * Called with rpc->crpc_lock held.
- *
- * Upon exit the RPC expiry timer is not queued and the handler is not
- * running on any CPU.
- */
-static void
-srpc_del_client_rpc_timer(struct srpc_client_rpc *rpc)
-{
- /* timer not planted or already exploded */
- if (!rpc->crpc_timeout)
- return;
-
- /* timer successfully defused */
- if (stt_del_timer(&rpc->crpc_timer))
- return;
-
- /* timer detonated, wait for it to explode */
- while (rpc->crpc_timeout) {
- spin_unlock(&rpc->crpc_lock);
-
- schedule();
-
- spin_lock(&rpc->crpc_lock);
- }
-}
-
-static void
-srpc_client_rpc_done(struct srpc_client_rpc *rpc, int status)
-{
- struct swi_workitem *wi = &rpc->crpc_wi;
-
- LASSERT(status || wi->swi_state == SWI_STATE_DONE);
-
- spin_lock(&rpc->crpc_lock);
-
- rpc->crpc_closed = 1;
- if (!rpc->crpc_status)
- rpc->crpc_status = status;
-
- srpc_del_client_rpc_timer(rpc);
-
- CDEBUG_LIMIT(!status ? D_NET : D_NETERROR,
- "Client RPC done: service %d, peer %s, status %s:%d:%d\n",
- rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
- swi_state2str(wi->swi_state), rpc->crpc_aborted, status);
-
- /*
- * No one can schedule me now since:
- * - RPC timer has been defused.
- * - all LNet events have been fired.
- * - crpc_closed has been set, preventing srpc_abort_rpc from
- * scheduling me.
- * Cancel pending schedules and prevent future schedule attempts:
- */
- LASSERT(!srpc_event_pending(rpc));
-
- spin_unlock(&rpc->crpc_lock);
-
- (*rpc->crpc_done)(rpc);
-}
-
-/* sends an outgoing RPC */
-void
-srpc_send_rpc(struct swi_workitem *wi)
-{
- int rc = 0;
- struct srpc_client_rpc *rpc;
- struct srpc_msg *reply;
- int do_bulk;
-
- LASSERT(wi);
-
- rpc = container_of(wi, struct srpc_client_rpc, crpc_wi);
-
- LASSERT(rpc);
- LASSERT(wi == &rpc->crpc_wi);
-
- reply = &rpc->crpc_replymsg;
- do_bulk = rpc->crpc_bulk.bk_niov > 0;
-
- spin_lock(&rpc->crpc_lock);
-
- if (rpc->crpc_aborted) {
- spin_unlock(&rpc->crpc_lock);
- goto abort;
- }
-
- spin_unlock(&rpc->crpc_lock);
-
- switch (wi->swi_state) {
- default:
- LBUG();
- case SWI_STATE_NEWBORN:
- LASSERT(!srpc_event_pending(rpc));
-
- rc = srpc_prepare_reply(rpc);
- if (rc) {
- srpc_client_rpc_done(rpc, rc);
- return;
- }
-
- rc = srpc_prepare_bulk(rpc);
- if (rc)
- break;
-
- wi->swi_state = SWI_STATE_REQUEST_SUBMITTED;
- rc = srpc_send_request(rpc);
- break;
-
- case SWI_STATE_REQUEST_SUBMITTED:
- /*
- * CAVEAT EMPTOR: rqtev, rpyev, and bulkev may come in any
- * order; however, they're processed in a strict order:
- * rqt, rpy, and bulk.
- */
- if (!rpc->crpc_reqstev.ev_fired)
- break;
-
- rc = rpc->crpc_reqstev.ev_status;
- if (rc)
- break;
-
- wi->swi_state = SWI_STATE_REQUEST_SENT;
- /* perhaps more events */
- /* fall through */
- case SWI_STATE_REQUEST_SENT: {
- enum srpc_msg_type type = srpc_service2reply(rpc->crpc_service);
-
- if (!rpc->crpc_replyev.ev_fired)
- break;
-
- rc = rpc->crpc_replyev.ev_status;
- if (rc)
- break;
-
- srpc_unpack_msg_hdr(reply);
- if (reply->msg_type != type ||
- (reply->msg_magic != SRPC_MSG_MAGIC &&
- reply->msg_magic != __swab32(SRPC_MSG_MAGIC))) {
- CWARN("Bad message from %s: type %u (%d expected), magic %u (%d expected).\n",
- libcfs_id2str(rpc->crpc_dest),
- reply->msg_type, type,
- reply->msg_magic, SRPC_MSG_MAGIC);
- rc = -EBADMSG;
- break;
- }
-
- if (do_bulk && reply->msg_body.reply.status) {
- CWARN("Remote error %d at %s, unlink bulk buffer in case peer didn't initiate bulk transfer\n",
- reply->msg_body.reply.status,
- libcfs_id2str(rpc->crpc_dest));
- LNetMDUnlink(rpc->crpc_bulk.bk_mdh);
- }
-
- wi->swi_state = SWI_STATE_REPLY_RECEIVED;
- }
- /* fall through */
- case SWI_STATE_REPLY_RECEIVED:
- if (do_bulk && !rpc->crpc_bulkev.ev_fired)
- break;
-
- rc = do_bulk ? rpc->crpc_bulkev.ev_status : 0;
-
- /*
- * Bulk buffer was unlinked due to remote error. Clear error
- * since reply buffer still contains valid data.
- * NB rpc->crpc_done shouldn't look into bulk data in case of
- * remote error.
- */
- if (do_bulk && rpc->crpc_bulkev.ev_lnet == LNET_EVENT_UNLINK &&
- !rpc->crpc_status && reply->msg_body.reply.status)
- rc = 0;
-
- wi->swi_state = SWI_STATE_DONE;
- srpc_client_rpc_done(rpc, rc);
- return;
- }
-
- if (rc) {
- spin_lock(&rpc->crpc_lock);
- srpc_abort_rpc(rpc, rc);
- spin_unlock(&rpc->crpc_lock);
- }
-
-abort:
- if (rpc->crpc_aborted) {
- LNetMDUnlink(rpc->crpc_reqstmdh);
- LNetMDUnlink(rpc->crpc_replymdh);
- LNetMDUnlink(rpc->crpc_bulk.bk_mdh);
-
- if (!srpc_event_pending(rpc)) {
- srpc_client_rpc_done(rpc, -EINTR);
- return;
- }
- }
-}
-
-struct srpc_client_rpc *
-srpc_create_client_rpc(struct lnet_process_id peer, int service,
- int nbulkiov, int bulklen,
- void (*rpc_done)(struct srpc_client_rpc *),
- void (*rpc_fini)(struct srpc_client_rpc *), void *priv)
-{
- struct srpc_client_rpc *rpc;
-
- rpc = kzalloc(offsetof(struct srpc_client_rpc,
- crpc_bulk.bk_iovs[nbulkiov]), GFP_KERNEL);
- if (!rpc)
- return NULL;
-
- srpc_init_client_rpc(rpc, peer, service, nbulkiov,
- bulklen, rpc_done, rpc_fini, priv);
- return rpc;
-}
-
-/* called with rpc->crpc_lock held */
-void
-srpc_abort_rpc(struct srpc_client_rpc *rpc, int why)
-{
- LASSERT(why);
-
- if (rpc->crpc_aborted || /* already aborted */
- rpc->crpc_closed) /* callback imminent */
- return;
-
- CDEBUG(D_NET, "Aborting RPC: service %d, peer %s, state %s, why %d\n",
- rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
- swi_state2str(rpc->crpc_wi.swi_state), why);
-
- rpc->crpc_aborted = 1;
- rpc->crpc_status = why;
- swi_schedule_workitem(&rpc->crpc_wi);
-}
-
-/* called with rpc->crpc_lock held */
-void
-srpc_post_rpc(struct srpc_client_rpc *rpc)
-{
- LASSERT(!rpc->crpc_aborted);
- LASSERT(srpc_data.rpc_state == SRPC_STATE_RUNNING);
-
- CDEBUG(D_NET, "Posting RPC: peer %s, service %d, timeout %d\n",
- libcfs_id2str(rpc->crpc_dest), rpc->crpc_service,
- rpc->crpc_timeout);
-
- srpc_add_client_rpc_timer(rpc);
- swi_schedule_workitem(&rpc->crpc_wi);
-}
-
-int
-srpc_send_reply(struct srpc_server_rpc *rpc)
-{
- struct srpc_event *ev = &rpc->srpc_ev;
- struct srpc_msg *msg = &rpc->srpc_replymsg;
- struct srpc_buffer *buffer = rpc->srpc_reqstbuf;
- struct srpc_service_cd *scd = rpc->srpc_scd;
- struct srpc_service *sv = scd->scd_svc;
- __u64 rpyid;
- int rc;
-
- LASSERT(buffer);
- rpyid = buffer->buf_msg.msg_body.reqst.rpyid;
-
- spin_lock(&scd->scd_lock);
-
- if (!sv->sv_shuttingdown && !srpc_serv_is_framework(sv)) {
- /*
- * Repost buffer before replying since test client
- * might send me another RPC once it gets the reply
- */
- if (srpc_service_post_buffer(scd, buffer))
- CWARN("Failed to repost %s buffer\n", sv->sv_name);
- rpc->srpc_reqstbuf = NULL;
- }
-
- spin_unlock(&scd->scd_lock);
-
- ev->ev_fired = 0;
- ev->ev_data = rpc;
- ev->ev_type = SRPC_REPLY_SENT;
-
- msg->msg_magic = SRPC_MSG_MAGIC;
- msg->msg_version = SRPC_MSG_VERSION;
- msg->msg_type = srpc_service2reply(sv->sv_id);
-
- rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, rpyid, msg,
- sizeof(*msg), LNET_MD_OP_PUT,
- rpc->srpc_peer, rpc->srpc_self,
- &rpc->srpc_replymdh, ev);
- if (rc)
- ev->ev_fired = 1; /* no more event expected */
- return rc;
-}
-
-/* when in kernel always called with LNET_LOCK() held, and in thread context */
-static void
-srpc_lnet_ev_handler(struct lnet_event *ev)
-{
- struct srpc_service_cd *scd;
- struct srpc_event *rpcev = ev->md.user_ptr;
- struct srpc_client_rpc *crpc;
- struct srpc_server_rpc *srpc;
- struct srpc_buffer *buffer;
- struct srpc_service *sv;
- struct srpc_msg *msg;
- enum srpc_msg_type type;
-
- LASSERT(!in_interrupt());
-
- if (ev->status) {
- __u32 errors;
-
- spin_lock(&srpc_data.rpc_glock);
- if (ev->status != -ECANCELED) /* cancellation is not error */
- srpc_data.rpc_counters.errors++;
- errors = srpc_data.rpc_counters.errors;
- spin_unlock(&srpc_data.rpc_glock);
-
- CNETERR("LNet event status %d type %d, RPC errors %u\n",
- ev->status, ev->type, errors);
- }
-
- rpcev->ev_lnet = ev->type;
-
- switch (rpcev->ev_type) {
- default:
- CERROR("Unknown event: status %d, type %d, lnet %d\n",
- rpcev->ev_status, rpcev->ev_type, rpcev->ev_lnet);
- LBUG();
- case SRPC_REQUEST_SENT:
- if (!ev->status && ev->type != LNET_EVENT_UNLINK) {
- spin_lock(&srpc_data.rpc_glock);
- srpc_data.rpc_counters.rpcs_sent++;
- spin_unlock(&srpc_data.rpc_glock);
- }
- /* fall through */
- case SRPC_REPLY_RCVD:
- case SRPC_BULK_REQ_RCVD:
- crpc = rpcev->ev_data;
-
- if (rpcev != &crpc->crpc_reqstev &&
- rpcev != &crpc->crpc_replyev &&
- rpcev != &crpc->crpc_bulkev) {
- CERROR("rpcev %p, crpc %p, reqstev %p, replyev %p, bulkev %p\n",
- rpcev, crpc, &crpc->crpc_reqstev,
- &crpc->crpc_replyev, &crpc->crpc_bulkev);
- CERROR("Bad event: status %d, type %d, lnet %d\n",
- rpcev->ev_status, rpcev->ev_type, rpcev->ev_lnet);
- LBUG();
- }
-
- spin_lock(&crpc->crpc_lock);
-
- LASSERT(!rpcev->ev_fired);
- rpcev->ev_fired = 1;
- rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ?
- -EINTR : ev->status;
- swi_schedule_workitem(&crpc->crpc_wi);
-
- spin_unlock(&crpc->crpc_lock);
- break;
-
- case SRPC_REQUEST_RCVD:
- scd = rpcev->ev_data;
- sv = scd->scd_svc;
-
- LASSERT(rpcev == &scd->scd_ev);
-
- spin_lock(&scd->scd_lock);
-
- LASSERT(ev->unlinked);
- LASSERT(ev->type == LNET_EVENT_PUT ||
- ev->type == LNET_EVENT_UNLINK);
- LASSERT(ev->type != LNET_EVENT_UNLINK ||
- sv->sv_shuttingdown);
-
- buffer = container_of(ev->md.start, struct srpc_buffer, buf_msg);
- buffer->buf_peer = ev->initiator;
- buffer->buf_self = ev->target.nid;
-
- LASSERT(scd->scd_buf_nposted > 0);
- scd->scd_buf_nposted--;
-
- if (sv->sv_shuttingdown) {
- /*
- * Leave buffer on scd->scd_buf_nposted since
- * srpc_finish_service needs to traverse it.
- */
- spin_unlock(&scd->scd_lock);
- break;
- }
-
- if (scd->scd_buf_err_stamp &&
- scd->scd_buf_err_stamp < ktime_get_real_seconds()) {
- /* re-enable adding buffer */
- scd->scd_buf_err_stamp = 0;
- scd->scd_buf_err = 0;
- }
-
- if (!scd->scd_buf_err && /* adding buffer is enabled */
- !scd->scd_buf_adjust &&
- scd->scd_buf_nposted < scd->scd_buf_low) {
- scd->scd_buf_adjust = max(scd->scd_buf_total / 2,
- SFW_TEST_WI_MIN);
- swi_schedule_workitem(&scd->scd_buf_wi);
- }
-
- list_del(&buffer->buf_list); /* from scd->scd_buf_posted */
- msg = &buffer->buf_msg;
- type = srpc_service2request(sv->sv_id);
-
- if (ev->status || ev->mlength != sizeof(*msg) ||
- (msg->msg_type != type &&
- msg->msg_type != __swab32(type)) ||
- (msg->msg_magic != SRPC_MSG_MAGIC &&
- msg->msg_magic != __swab32(SRPC_MSG_MAGIC))) {
- CERROR("Dropping RPC (%s) from %s: status %d mlength %d type %u magic %u.\n",
- sv->sv_name, libcfs_id2str(ev->initiator),
- ev->status, ev->mlength,
- msg->msg_type, msg->msg_magic);
-
- /*
- * NB can't call srpc_service_recycle_buffer here since
- * it may call LNetM[DE]Attach. The invalid magic tells
- * srpc_handle_rpc to drop this RPC
- */
- msg->msg_magic = 0;
- }
-
- if (!list_empty(&scd->scd_rpc_free)) {
- srpc = list_entry(scd->scd_rpc_free.next,
- struct srpc_server_rpc,
- srpc_list);
- list_del(&srpc->srpc_list);
-
- srpc_init_server_rpc(srpc, scd, buffer);
- list_add_tail(&srpc->srpc_list,
- &scd->scd_rpc_active);
- swi_schedule_workitem(&srpc->srpc_wi);
- } else {
- list_add_tail(&buffer->buf_list,
- &scd->scd_buf_blocked);
- }
-
- spin_unlock(&scd->scd_lock);
-
- spin_lock(&srpc_data.rpc_glock);
- srpc_data.rpc_counters.rpcs_rcvd++;
- spin_unlock(&srpc_data.rpc_glock);
- break;
-
- case SRPC_BULK_GET_RPLD:
- LASSERT(ev->type == LNET_EVENT_SEND ||
- ev->type == LNET_EVENT_REPLY ||
- ev->type == LNET_EVENT_UNLINK);
-
- if (!ev->unlinked)
- break; /* wait for final event */
- /* fall through */
- case SRPC_BULK_PUT_SENT:
- if (!ev->status && ev->type != LNET_EVENT_UNLINK) {
- spin_lock(&srpc_data.rpc_glock);
-
- if (rpcev->ev_type == SRPC_BULK_GET_RPLD)
- srpc_data.rpc_counters.bulk_get += ev->mlength;
- else
- srpc_data.rpc_counters.bulk_put += ev->mlength;
-
- spin_unlock(&srpc_data.rpc_glock);
- }
- /* fall through */
- case SRPC_REPLY_SENT:
- srpc = rpcev->ev_data;
- scd = srpc->srpc_scd;
-
- LASSERT(rpcev == &srpc->srpc_ev);
-
- spin_lock(&scd->scd_lock);
-
- rpcev->ev_fired = 1;
- rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ?
- -EINTR : ev->status;
- swi_schedule_workitem(&srpc->srpc_wi);
-
- spin_unlock(&scd->scd_lock);
- break;
- }
-}
-
-int
-srpc_startup(void)
-{
- int rc;
-
- memset(&srpc_data, 0, sizeof(struct smoketest_rpc));
- spin_lock_init(&srpc_data.rpc_glock);
-
- /* 1 second pause to avoid timestamp reuse */
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
- srpc_data.rpc_matchbits = ((__u64)ktime_get_real_seconds()) << 48;
-
- srpc_data.rpc_state = SRPC_STATE_NONE;
-
- rc = LNetNIInit(LNET_PID_LUSTRE);
- if (rc < 0) {
- CERROR("LNetNIInit() has failed: %d\n", rc);
- return rc;
- }
-
- srpc_data.rpc_state = SRPC_STATE_NI_INIT;
-
- LNetInvalidateEQHandle(&srpc_data.rpc_lnet_eq);
- rc = LNetEQAlloc(0, srpc_lnet_ev_handler, &srpc_data.rpc_lnet_eq);
- if (rc) {
- CERROR("LNetEQAlloc() has failed: %d\n", rc);
- goto bail;
- }
-
- rc = LNetSetLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL);
- LASSERT(!rc);
- rc = LNetSetLazyPortal(SRPC_REQUEST_PORTAL);
- LASSERT(!rc);
-
- srpc_data.rpc_state = SRPC_STATE_EQ_INIT;
-
- rc = stt_startup();
-
-bail:
- if (rc)
- srpc_shutdown();
- else
- srpc_data.rpc_state = SRPC_STATE_RUNNING;
-
- return rc;
-}
-
-void
-srpc_shutdown(void)
-{
- int i;
- int rc;
- int state;
-
- state = srpc_data.rpc_state;
- srpc_data.rpc_state = SRPC_STATE_STOPPING;
-
- switch (state) {
- default:
- LBUG();
- case SRPC_STATE_RUNNING:
- spin_lock(&srpc_data.rpc_glock);
-
- for (i = 0; i <= SRPC_SERVICE_MAX_ID; i++) {
- struct srpc_service *sv = srpc_data.rpc_services[i];
-
- LASSERTF(!sv, "service not empty: id %d, name %s\n",
- i, sv->sv_name);
- }
-
- spin_unlock(&srpc_data.rpc_glock);
-
- stt_shutdown();
- /* fall through */
- case SRPC_STATE_EQ_INIT:
- rc = LNetClearLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL);
- rc = LNetClearLazyPortal(SRPC_REQUEST_PORTAL);
- LASSERT(!rc);
- rc = LNetEQFree(srpc_data.rpc_lnet_eq);
- LASSERT(!rc); /* the EQ should have no user by now */
- /* fall through */
- case SRPC_STATE_NI_INIT:
- LNetNIFini();
- }
-}
diff --git a/drivers/staging/lustre/lnet/selftest/rpc.h b/drivers/staging/lustre/lnet/selftest/rpc.h
deleted file mode 100644
index 465b5b534423..000000000000
--- a/drivers/staging/lustre/lnet/selftest/rpc.h
+++ /dev/null
@@ -1,295 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef __SELFTEST_RPC_H__
-#define __SELFTEST_RPC_H__
-
-#include <uapi/linux/lnet/lnetst.h>
-
-/*
- * LST wired structures
- *
- * XXX: *REPLY == *REQST + 1
- */
-enum srpc_msg_type {
- SRPC_MSG_MKSN_REQST = 0,
- SRPC_MSG_MKSN_REPLY = 1,
- SRPC_MSG_RMSN_REQST = 2,
- SRPC_MSG_RMSN_REPLY = 3,
- SRPC_MSG_BATCH_REQST = 4,
- SRPC_MSG_BATCH_REPLY = 5,
- SRPC_MSG_STAT_REQST = 6,
- SRPC_MSG_STAT_REPLY = 7,
- SRPC_MSG_TEST_REQST = 8,
- SRPC_MSG_TEST_REPLY = 9,
- SRPC_MSG_DEBUG_REQST = 10,
- SRPC_MSG_DEBUG_REPLY = 11,
- SRPC_MSG_BRW_REQST = 12,
- SRPC_MSG_BRW_REPLY = 13,
- SRPC_MSG_PING_REQST = 14,
- SRPC_MSG_PING_REPLY = 15,
- SRPC_MSG_JOIN_REQST = 16,
- SRPC_MSG_JOIN_REPLY = 17,
-};
-
-/* CAVEAT EMPTOR:
- * All srpc_*_reqst_t's 1st field must be matchbits of reply buffer,
- * and 2nd field matchbits of bulk buffer if any.
- *
- * All srpc_*_reply_t's 1st field must be a __u32 status, and 2nd field
- * session id if needed.
- */
-struct srpc_generic_reqst {
- __u64 rpyid; /* reply buffer matchbits */
- __u64 bulkid; /* bulk buffer matchbits */
-} WIRE_ATTR;
-
-struct srpc_generic_reply {
- __u32 status;
- struct lst_sid sid;
-} WIRE_ATTR;
-
-/* FRAMEWORK RPCs */
-struct srpc_mksn_reqst {
- __u64 mksn_rpyid; /* reply buffer matchbits */
- struct lst_sid mksn_sid; /* session id */
- __u32 mksn_force; /* use brute force */
- char mksn_name[LST_NAME_SIZE];
-} WIRE_ATTR; /* make session request */
-
-struct srpc_mksn_reply {
- __u32 mksn_status; /* session status */
- struct lst_sid mksn_sid; /* session id */
- __u32 mksn_timeout; /* session timeout */
- char mksn_name[LST_NAME_SIZE];
-} WIRE_ATTR; /* make session reply */
-
-struct srpc_rmsn_reqst {
- __u64 rmsn_rpyid; /* reply buffer matchbits */
- struct lst_sid rmsn_sid; /* session id */
-} WIRE_ATTR; /* remove session request */
-
-struct srpc_rmsn_reply {
- __u32 rmsn_status;
- struct lst_sid rmsn_sid; /* session id */
-} WIRE_ATTR; /* remove session reply */
-
-struct srpc_join_reqst {
- __u64 join_rpyid; /* reply buffer matchbits */
- struct lst_sid join_sid; /* session id to join */
- char join_group[LST_NAME_SIZE]; /* group name */
-} WIRE_ATTR;
-
-struct srpc_join_reply {
- __u32 join_status; /* returned status */
- struct lst_sid join_sid; /* session id */
- __u32 join_timeout; /* # seconds' inactivity to
- * expire
- */
- char join_session[LST_NAME_SIZE]; /* session name */
-} WIRE_ATTR;
-
-struct srpc_debug_reqst {
- __u64 dbg_rpyid; /* reply buffer matchbits */
- struct lst_sid dbg_sid; /* session id */
- __u32 dbg_flags; /* bitmap of debug */
-} WIRE_ATTR;
-
-struct srpc_debug_reply {
- __u32 dbg_status; /* returned code */
- struct lst_sid dbg_sid; /* session id */
- __u32 dbg_timeout; /* session timeout */
- __u32 dbg_nbatch; /* # of batches in the node */
- char dbg_name[LST_NAME_SIZE]; /* session name */
-} WIRE_ATTR;
-
-#define SRPC_BATCH_OPC_RUN 1
-#define SRPC_BATCH_OPC_STOP 2
-#define SRPC_BATCH_OPC_QUERY 3
-
-struct srpc_batch_reqst {
- __u64 bar_rpyid; /* reply buffer matchbits */
- struct lst_sid bar_sid; /* session id */
- struct lst_bid bar_bid; /* batch id */
- __u32 bar_opc; /* create/start/stop batch */
- __u32 bar_testidx; /* index of test */
- __u32 bar_arg; /* parameters */
-} WIRE_ATTR;
-
-struct srpc_batch_reply {
- __u32 bar_status; /* status of request */
- struct lst_sid bar_sid; /* session id */
- __u32 bar_active; /* # of active tests in batch/test */
- __u32 bar_time; /* remained time */
-} WIRE_ATTR;
-
-struct srpc_stat_reqst {
- __u64 str_rpyid; /* reply buffer matchbits */
- struct lst_sid str_sid; /* session id */
- __u32 str_type; /* type of stat */
-} WIRE_ATTR;
-
-struct srpc_stat_reply {
- __u32 str_status;
- struct lst_sid str_sid;
- struct sfw_counters str_fw;
- struct srpc_counters str_rpc;
- struct lnet_counters str_lnet;
-} WIRE_ATTR;
-
-struct test_bulk_req {
- __u32 blk_opc; /* bulk operation code */
- __u32 blk_npg; /* # of pages */
- __u32 blk_flags; /* reserved flags */
-} WIRE_ATTR;
-
-struct test_bulk_req_v1 {
- __u16 blk_opc; /* bulk operation code */
- __u16 blk_flags; /* data check flags */
- __u32 blk_len; /* data length */
- __u32 blk_offset; /* offset */
-} WIRE_ATTR;
-
-struct test_ping_req {
- __u32 png_size; /* size of ping message */
- __u32 png_flags; /* reserved flags */
-} WIRE_ATTR;
-
-struct srpc_test_reqst {
- __u64 tsr_rpyid; /* reply buffer matchbits */
- __u64 tsr_bulkid; /* bulk buffer matchbits */
- struct lst_sid tsr_sid; /* session id */
- struct lst_bid tsr_bid; /* batch id */
- __u32 tsr_service; /* test type: bulk|ping|... */
- __u32 tsr_loop; /* test client loop count or
- * # server buffers needed
- */
- __u32 tsr_concur; /* concurrency of test */
- __u8 tsr_is_client; /* is test client or not */
- __u8 tsr_stop_onerr; /* stop on error */
- __u32 tsr_ndest; /* # of dest nodes */
-
- union {
- struct test_ping_req ping;
- struct test_bulk_req bulk_v0;
- struct test_bulk_req_v1 bulk_v1;
- } tsr_u;
-} WIRE_ATTR;
-
-struct srpc_test_reply {
- __u32 tsr_status; /* returned code */
- struct lst_sid tsr_sid;
-} WIRE_ATTR;
-
-/* TEST RPCs */
-struct srpc_ping_reqst {
- __u64 pnr_rpyid;
- __u32 pnr_magic;
- __u32 pnr_seq;
- __u64 pnr_time_sec;
- __u64 pnr_time_usec;
-} WIRE_ATTR;
-
-struct srpc_ping_reply {
- __u32 pnr_status;
- __u32 pnr_magic;
- __u32 pnr_seq;
-} WIRE_ATTR;
-
-struct srpc_brw_reqst {
- __u64 brw_rpyid; /* reply buffer matchbits */
- __u64 brw_bulkid; /* bulk buffer matchbits */
- __u32 brw_rw; /* read or write */
- __u32 brw_len; /* bulk data len */
- __u32 brw_flags; /* bulk data patterns */
-} WIRE_ATTR; /* bulk r/w request */
-
-struct srpc_brw_reply {
- __u32 brw_status;
-} WIRE_ATTR; /* bulk r/w reply */
-
-#define SRPC_MSG_MAGIC 0xeeb0f00d
-#define SRPC_MSG_VERSION 1
-
-struct srpc_msg {
- __u32 msg_magic; /* magic number */
- __u32 msg_version; /* message version number */
- __u32 msg_type; /* type of message body: srpc_msg_type */
- __u32 msg_reserved0;
- __u32 msg_reserved1;
- __u32 msg_ses_feats; /* test session features */
- union {
- struct srpc_generic_reqst reqst;
- struct srpc_generic_reply reply;
-
- struct srpc_mksn_reqst mksn_reqst;
- struct srpc_mksn_reply mksn_reply;
- struct srpc_rmsn_reqst rmsn_reqst;
- struct srpc_rmsn_reply rmsn_reply;
- struct srpc_debug_reqst dbg_reqst;
- struct srpc_debug_reply dbg_reply;
- struct srpc_batch_reqst bat_reqst;
- struct srpc_batch_reply bat_reply;
- struct srpc_stat_reqst stat_reqst;
- struct srpc_stat_reply stat_reply;
- struct srpc_test_reqst tes_reqst;
- struct srpc_test_reply tes_reply;
- struct srpc_join_reqst join_reqst;
- struct srpc_join_reply join_reply;
-
- struct srpc_ping_reqst ping_reqst;
- struct srpc_ping_reply ping_reply;
- struct srpc_brw_reqst brw_reqst;
- struct srpc_brw_reply brw_reply;
- } msg_body;
-} WIRE_ATTR;
-
-static inline void
-srpc_unpack_msg_hdr(struct srpc_msg *msg)
-{
- if (msg->msg_magic == SRPC_MSG_MAGIC)
- return; /* no flipping needed */
-
- /*
- * We do not swap the magic number here as it is needed to
- * determine whether the body needs to be swapped.
- */
- /* __swab32s(&msg->msg_magic); */
- __swab32s(&msg->msg_type);
- __swab32s(&msg->msg_version);
- __swab32s(&msg->msg_ses_feats);
- __swab32s(&msg->msg_reserved0);
- __swab32s(&msg->msg_reserved1);
-}
-
-#endif /* __SELFTEST_RPC_H__ */
diff --git a/drivers/staging/lustre/lnet/selftest/selftest.h b/drivers/staging/lustre/lnet/selftest/selftest.h
deleted file mode 100644
index 8737fa96b192..000000000000
--- a/drivers/staging/lustre/lnet/selftest/selftest.h
+++ /dev/null
@@ -1,622 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/selftest.h
- *
- * Author: Isaac Huang <isaac@clusterfs.com>
- */
-#ifndef __SELFTEST_SELFTEST_H__
-#define __SELFTEST_SELFTEST_H__
-
-#define LNET_ONLY
-
-#include <linux/lnet/lib-lnet.h>
-#include <linux/lnet/lib-types.h>
-#include <uapi/linux/lnet/lnetst.h>
-
-#include "rpc.h"
-#include "timer.h"
-
-#ifndef MADE_WITHOUT_COMPROMISE
-#define MADE_WITHOUT_COMPROMISE
-#endif
-
-#define SWI_STATE_NEWBORN 0
-#define SWI_STATE_REPLY_SUBMITTED 1
-#define SWI_STATE_REPLY_SENT 2
-#define SWI_STATE_REQUEST_SUBMITTED 3
-#define SWI_STATE_REQUEST_SENT 4
-#define SWI_STATE_REPLY_RECEIVED 5
-#define SWI_STATE_BULK_STARTED 6
-#define SWI_STATE_DONE 10
-
-/* forward refs */
-struct srpc_service;
-struct srpc_service_cd;
-struct sfw_test_unit;
-struct sfw_test_instance;
-
-/* services below SRPC_FRAMEWORK_SERVICE_MAX_ID are framework
- * services, e.g. create/modify session.
- */
-#define SRPC_SERVICE_DEBUG 0
-#define SRPC_SERVICE_MAKE_SESSION 1
-#define SRPC_SERVICE_REMOVE_SESSION 2
-#define SRPC_SERVICE_BATCH 3
-#define SRPC_SERVICE_TEST 4
-#define SRPC_SERVICE_QUERY_STAT 5
-#define SRPC_SERVICE_JOIN 6
-#define SRPC_FRAMEWORK_SERVICE_MAX_ID 10
-/* other services start from SRPC_FRAMEWORK_SERVICE_MAX_ID+1 */
-#define SRPC_SERVICE_BRW 11
-#define SRPC_SERVICE_PING 12
-#define SRPC_SERVICE_MAX_ID 12
-
-#define SRPC_REQUEST_PORTAL 50
-/* a lazy portal for framework RPC requests */
-#define SRPC_FRAMEWORK_REQUEST_PORTAL 51
-/* all reply/bulk RDMAs go to this portal */
-#define SRPC_RDMA_PORTAL 52
-
-static inline enum srpc_msg_type
-srpc_service2request(int service)
-{
- switch (service) {
- default:
- LBUG();
- case SRPC_SERVICE_DEBUG:
- return SRPC_MSG_DEBUG_REQST;
-
- case SRPC_SERVICE_MAKE_SESSION:
- return SRPC_MSG_MKSN_REQST;
-
- case SRPC_SERVICE_REMOVE_SESSION:
- return SRPC_MSG_RMSN_REQST;
-
- case SRPC_SERVICE_BATCH:
- return SRPC_MSG_BATCH_REQST;
-
- case SRPC_SERVICE_TEST:
- return SRPC_MSG_TEST_REQST;
-
- case SRPC_SERVICE_QUERY_STAT:
- return SRPC_MSG_STAT_REQST;
-
- case SRPC_SERVICE_BRW:
- return SRPC_MSG_BRW_REQST;
-
- case SRPC_SERVICE_PING:
- return SRPC_MSG_PING_REQST;
-
- case SRPC_SERVICE_JOIN:
- return SRPC_MSG_JOIN_REQST;
- }
-}
-
-static inline enum srpc_msg_type
-srpc_service2reply(int service)
-{
- return srpc_service2request(service) + 1;
-}
-
-enum srpc_event_type {
- SRPC_BULK_REQ_RCVD = 1, /* passive bulk request(PUT sink/GET source)
- * received
- */
- SRPC_BULK_PUT_SENT = 2, /* active bulk PUT sent (source) */
- SRPC_BULK_GET_RPLD = 3, /* active bulk GET replied (sink) */
- SRPC_REPLY_RCVD = 4, /* incoming reply received */
- SRPC_REPLY_SENT = 5, /* outgoing reply sent */
- SRPC_REQUEST_RCVD = 6, /* incoming request received */
- SRPC_REQUEST_SENT = 7, /* outgoing request sent */
-};
-
-/* RPC event */
-struct srpc_event {
- enum srpc_event_type ev_type; /* what's up */
- enum lnet_event_kind ev_lnet; /* LNet event type */
- int ev_fired; /* LNet event fired? */
- int ev_status; /* LNet event status */
- void *ev_data; /* owning server/client RPC */
-};
-
-/* bulk descriptor */
-struct srpc_bulk {
- int bk_len; /* len of bulk data */
- struct lnet_handle_md bk_mdh;
- int bk_sink; /* sink/source */
- int bk_niov; /* # iov in bk_iovs */
- struct bio_vec bk_iovs[0];
-};
-
-/* message buffer descriptor */
-struct srpc_buffer {
- struct list_head buf_list; /* chain on srpc_service::*_msgq */
- struct srpc_msg buf_msg;
- struct lnet_handle_md buf_mdh;
- lnet_nid_t buf_self;
- struct lnet_process_id buf_peer;
-};
-
-struct swi_workitem;
-typedef void (*swi_action_t) (struct swi_workitem *);
-
-struct swi_workitem {
- struct workqueue_struct *swi_wq;
- struct work_struct swi_work;
- swi_action_t swi_action;
- int swi_state;
-};
-
-/* server-side state of a RPC */
-struct srpc_server_rpc {
- /* chain on srpc_service::*_rpcq */
- struct list_head srpc_list;
- struct srpc_service_cd *srpc_scd;
- struct swi_workitem srpc_wi;
- struct srpc_event srpc_ev; /* bulk/reply event */
- lnet_nid_t srpc_self;
- struct lnet_process_id srpc_peer;
- struct srpc_msg srpc_replymsg;
- struct lnet_handle_md srpc_replymdh;
- struct srpc_buffer *srpc_reqstbuf;
- struct srpc_bulk *srpc_bulk;
-
- unsigned int srpc_aborted; /* being given up */
- int srpc_status;
- void (*srpc_done)(struct srpc_server_rpc *);
-};
-
-/* client-side state of a RPC */
-struct srpc_client_rpc {
- struct list_head crpc_list; /* chain on user's lists */
- spinlock_t crpc_lock; /* serialize */
- int crpc_service;
- atomic_t crpc_refcount;
- int crpc_timeout; /* # seconds to wait for reply */
- struct stt_timer crpc_timer;
- struct swi_workitem crpc_wi;
- struct lnet_process_id crpc_dest;
-
- void (*crpc_done)(struct srpc_client_rpc *);
- void (*crpc_fini)(struct srpc_client_rpc *);
- int crpc_status; /* completion status */
- void *crpc_priv; /* caller data */
-
- /* state flags */
- unsigned int crpc_aborted:1; /* being given up */
- unsigned int crpc_closed:1; /* completed */
-
- /* RPC events */
- struct srpc_event crpc_bulkev; /* bulk event */
- struct srpc_event crpc_reqstev; /* request event */
- struct srpc_event crpc_replyev; /* reply event */
-
- /* bulk, request(reqst), and reply exchanged on wire */
- struct srpc_msg crpc_reqstmsg;
- struct srpc_msg crpc_replymsg;
- struct lnet_handle_md crpc_reqstmdh;
- struct lnet_handle_md crpc_replymdh;
- struct srpc_bulk crpc_bulk;
-};
-
-#define srpc_client_rpc_size(rpc) \
-offsetof(struct srpc_client_rpc, crpc_bulk.bk_iovs[(rpc)->crpc_bulk.bk_niov])
-
-#define srpc_client_rpc_addref(rpc) \
-do { \
- CDEBUG(D_NET, "RPC[%p] -> %s (%d)++\n", \
- (rpc), libcfs_id2str((rpc)->crpc_dest), \
- atomic_read(&(rpc)->crpc_refcount)); \
- LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0); \
- atomic_inc(&(rpc)->crpc_refcount); \
-} while (0)
-
-#define srpc_client_rpc_decref(rpc) \
-do { \
- CDEBUG(D_NET, "RPC[%p] -> %s (%d)--\n", \
- (rpc), libcfs_id2str((rpc)->crpc_dest), \
- atomic_read(&(rpc)->crpc_refcount)); \
- LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0); \
- if (atomic_dec_and_test(&(rpc)->crpc_refcount)) \
- srpc_destroy_client_rpc(rpc); \
-} while (0)
-
-#define srpc_event_pending(rpc) (!(rpc)->crpc_bulkev.ev_fired || \
- !(rpc)->crpc_reqstev.ev_fired || \
- !(rpc)->crpc_replyev.ev_fired)
-
-/* CPU partition data of srpc service */
-struct srpc_service_cd {
- /** serialize */
- spinlock_t scd_lock;
- /** backref to service */
- struct srpc_service *scd_svc;
- /** event buffer */
- struct srpc_event scd_ev;
- /** free RPC descriptors */
- struct list_head scd_rpc_free;
- /** in-flight RPCs */
- struct list_head scd_rpc_active;
- /** workitem for posting buffer */
- struct swi_workitem scd_buf_wi;
- /** CPT id */
- int scd_cpt;
- /** error code for scd_buf_wi */
- int scd_buf_err;
- /** timestamp for scd_buf_err */
- time64_t scd_buf_err_stamp;
- /** total # request buffers */
- int scd_buf_total;
- /** # posted request buffers */
- int scd_buf_nposted;
- /** in progress of buffer posting */
- int scd_buf_posting;
- /** allocate more buffers if scd_buf_nposted < scd_buf_low */
- int scd_buf_low;
- /** increase/decrease some buffers */
- int scd_buf_adjust;
- /** posted message buffers */
- struct list_head scd_buf_posted;
- /** blocked for RPC descriptor */
- struct list_head scd_buf_blocked;
-};
-
-/* number of server workitems (mini-thread) for testing service */
-#define SFW_TEST_WI_MIN 256
-#define SFW_TEST_WI_MAX 2048
-/* extra buffers for tolerating buggy peers, or unbalanced number
- * of peers between partitions
- */
-#define SFW_TEST_WI_EXTRA 64
-
-/* number of server workitems (mini-thread) for framework service */
-#define SFW_FRWK_WI_MIN 16
-#define SFW_FRWK_WI_MAX 256
-
-struct srpc_service {
- int sv_id; /* service id */
- const char *sv_name; /* human readable name */
- int sv_wi_total; /* total server workitems */
- int sv_shuttingdown;
- int sv_ncpts;
- /* percpt data for srpc_service */
- struct srpc_service_cd **sv_cpt_data;
- /* Service callbacks:
- * - sv_handler: process incoming RPC request
- * - sv_bulk_ready: notify bulk data
- */
- int (*sv_handler)(struct srpc_server_rpc *);
- int (*sv_bulk_ready)(struct srpc_server_rpc *, int);
-};
-
-struct sfw_session {
- struct list_head sn_list; /* chain on fw_zombie_sessions */
- struct lst_sid sn_id; /* unique identifier */
- unsigned int sn_timeout; /* # seconds' inactivity to expire */
- int sn_timer_active;
- unsigned int sn_features;
- struct stt_timer sn_timer;
- struct list_head sn_batches; /* list of batches */
- char sn_name[LST_NAME_SIZE];
- atomic_t sn_refcount;
- atomic_t sn_brw_errors;
- atomic_t sn_ping_errors;
- unsigned long sn_started;
-};
-
-#define sfw_sid_equal(sid0, sid1) ((sid0).ses_nid == (sid1).ses_nid && \
- (sid0).ses_stamp == (sid1).ses_stamp)
-
-struct sfw_batch {
- struct list_head bat_list; /* chain on sn_batches */
- struct lst_bid bat_id; /* batch id */
- int bat_error; /* error code of batch */
- struct sfw_session *bat_session; /* batch's session */
- atomic_t bat_nactive; /* # of active tests */
- struct list_head bat_tests; /* test instances */
-};
-
-struct sfw_test_client_ops {
- int (*tso_init)(struct sfw_test_instance *tsi); /* initialize test
- * client
- */
- void (*tso_fini)(struct sfw_test_instance *tsi); /* finalize test
- * client
- */
- int (*tso_prep_rpc)(struct sfw_test_unit *tsu,
- struct lnet_process_id dest,
- struct srpc_client_rpc **rpc); /* prep a tests rpc */
- void (*tso_done_rpc)(struct sfw_test_unit *tsu,
- struct srpc_client_rpc *rpc); /* done a test rpc */
-};
-
-struct sfw_test_instance {
- struct list_head tsi_list; /* chain on batch */
- int tsi_service; /* test type */
- struct sfw_batch *tsi_batch; /* batch */
- struct sfw_test_client_ops *tsi_ops; /* test client operation
- */
-
- /* public parameter for all test units */
- unsigned int tsi_is_client:1; /* is test client */
- unsigned int tsi_stoptsu_onerr:1; /* stop tsu on error */
- int tsi_concur; /* concurrency */
- int tsi_loop; /* loop count */
-
- /* status of test instance */
- spinlock_t tsi_lock; /* serialize */
- unsigned int tsi_stopping:1; /* test is stopping */
- atomic_t tsi_nactive; /* # of active test
- * unit
- */
- struct list_head tsi_units; /* test units */
- struct list_head tsi_free_rpcs; /* free rpcs */
- struct list_head tsi_active_rpcs; /* active rpcs */
-
- union {
- struct test_ping_req ping; /* ping parameter */
- struct test_bulk_req bulk_v0; /* bulk parameter */
- struct test_bulk_req_v1 bulk_v1; /* bulk v1 parameter */
- } tsi_u;
-};
-
-/*
- * XXX: trailing (PAGE_SIZE % sizeof(struct lnet_process_id)) bytes at the end
- * of pages are not used
- */
-#define SFW_MAX_CONCUR LST_MAX_CONCUR
-#define SFW_ID_PER_PAGE (PAGE_SIZE / sizeof(struct lnet_process_id_packed))
-#define SFW_MAX_NDESTS (LNET_MAX_IOV * SFW_ID_PER_PAGE)
-#define sfw_id_pages(n) (((n) + SFW_ID_PER_PAGE - 1) / SFW_ID_PER_PAGE)
-
-struct sfw_test_unit {
- struct list_head tsu_list; /* chain on lst_test_instance */
- struct lnet_process_id tsu_dest; /* id of dest node */
- int tsu_loop; /* loop count of the test */
- struct sfw_test_instance *tsu_instance; /* pointer to test instance */
- void *tsu_private; /* private data */
- struct swi_workitem tsu_worker; /* workitem of the test unit */
-};
-
-struct sfw_test_case {
- struct list_head tsc_list; /* chain on fw_tests */
- struct srpc_service *tsc_srv_service; /* test service */
- struct sfw_test_client_ops *tsc_cli_ops; /* ops of test client */
-};
-
-struct srpc_client_rpc *
-sfw_create_rpc(struct lnet_process_id peer, int service,
- unsigned int features, int nbulkiov, int bulklen,
- void (*done)(struct srpc_client_rpc *), void *priv);
-int sfw_create_test_rpc(struct sfw_test_unit *tsu,
- struct lnet_process_id peer, unsigned int features,
- int nblk, int blklen, struct srpc_client_rpc **rpc);
-void sfw_abort_rpc(struct srpc_client_rpc *rpc);
-void sfw_post_rpc(struct srpc_client_rpc *rpc);
-void sfw_client_rpc_done(struct srpc_client_rpc *rpc);
-void sfw_unpack_message(struct srpc_msg *msg);
-void sfw_free_pages(struct srpc_server_rpc *rpc);
-void sfw_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i);
-int sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len,
- int sink);
-int sfw_make_session(struct srpc_mksn_reqst *request,
- struct srpc_mksn_reply *reply);
-
-struct srpc_client_rpc *
-srpc_create_client_rpc(struct lnet_process_id peer, int service,
- int nbulkiov, int bulklen,
- void (*rpc_done)(struct srpc_client_rpc *),
- void (*rpc_fini)(struct srpc_client_rpc *), void *priv);
-void srpc_post_rpc(struct srpc_client_rpc *rpc);
-void srpc_abort_rpc(struct srpc_client_rpc *rpc, int why);
-void srpc_free_bulk(struct srpc_bulk *bk);
-struct srpc_bulk *srpc_alloc_bulk(int cpt, unsigned int off,
- unsigned int bulk_npg, unsigned int bulk_len,
- int sink);
-void srpc_send_rpc(struct swi_workitem *wi);
-int srpc_send_reply(struct srpc_server_rpc *rpc);
-int srpc_add_service(struct srpc_service *sv);
-int srpc_remove_service(struct srpc_service *sv);
-void srpc_shutdown_service(struct srpc_service *sv);
-void srpc_abort_service(struct srpc_service *sv);
-int srpc_finish_service(struct srpc_service *sv);
-int srpc_service_add_buffers(struct srpc_service *sv, int nbuffer);
-void srpc_service_remove_buffers(struct srpc_service *sv, int nbuffer);
-void srpc_get_counters(struct srpc_counters *cnt);
-void srpc_set_counters(const struct srpc_counters *cnt);
-
-extern struct workqueue_struct *lst_serial_wq;
-extern struct workqueue_struct **lst_test_wq;
-
-static inline int
-srpc_serv_is_framework(struct srpc_service *svc)
-{
- return svc->sv_id < SRPC_FRAMEWORK_SERVICE_MAX_ID;
-}
-
-static void
-swi_wi_action(struct work_struct *wi)
-{
- struct swi_workitem *swi;
-
- swi = container_of(wi, struct swi_workitem, swi_work);
-
- swi->swi_action(swi);
-}
-
-static inline void
-swi_init_workitem(struct swi_workitem *swi,
- swi_action_t action, struct workqueue_struct *wq)
-{
- swi->swi_wq = wq;
- swi->swi_action = action;
- swi->swi_state = SWI_STATE_NEWBORN;
- INIT_WORK(&swi->swi_work, swi_wi_action);
-}
-
-static inline void
-swi_schedule_workitem(struct swi_workitem *wi)
-{
- queue_work(wi->swi_wq, &wi->swi_work);
-}
-
-static inline int
-swi_cancel_workitem(struct swi_workitem *swi)
-{
- return cancel_work_sync(&swi->swi_work);
-}
-
-int sfw_startup(void);
-int srpc_startup(void);
-void sfw_shutdown(void);
-void srpc_shutdown(void);
-
-static inline void
-srpc_destroy_client_rpc(struct srpc_client_rpc *rpc)
-{
- LASSERT(rpc);
- LASSERT(!srpc_event_pending(rpc));
- LASSERT(!atomic_read(&rpc->crpc_refcount));
-
- if (!rpc->crpc_fini)
- kfree(rpc);
- else
- (*rpc->crpc_fini)(rpc);
-}
-
-static inline void
-srpc_init_client_rpc(struct srpc_client_rpc *rpc, struct lnet_process_id peer,
- int service, int nbulkiov, int bulklen,
- void (*rpc_done)(struct srpc_client_rpc *),
- void (*rpc_fini)(struct srpc_client_rpc *), void *priv)
-{
- LASSERT(nbulkiov <= LNET_MAX_IOV);
-
- memset(rpc, 0, offsetof(struct srpc_client_rpc,
- crpc_bulk.bk_iovs[nbulkiov]));
-
- INIT_LIST_HEAD(&rpc->crpc_list);
- swi_init_workitem(&rpc->crpc_wi, srpc_send_rpc,
- lst_test_wq[lnet_cpt_of_nid(peer.nid)]);
- spin_lock_init(&rpc->crpc_lock);
- atomic_set(&rpc->crpc_refcount, 1); /* 1 ref for caller */
-
- rpc->crpc_dest = peer;
- rpc->crpc_priv = priv;
- rpc->crpc_service = service;
- rpc->crpc_bulk.bk_len = bulklen;
- rpc->crpc_bulk.bk_niov = nbulkiov;
- rpc->crpc_done = rpc_done;
- rpc->crpc_fini = rpc_fini;
- LNetInvalidateMDHandle(&rpc->crpc_reqstmdh);
- LNetInvalidateMDHandle(&rpc->crpc_replymdh);
- LNetInvalidateMDHandle(&rpc->crpc_bulk.bk_mdh);
-
- /* no event is expected at this point */
- rpc->crpc_bulkev.ev_fired = 1;
- rpc->crpc_reqstev.ev_fired = 1;
- rpc->crpc_replyev.ev_fired = 1;
-
- rpc->crpc_reqstmsg.msg_magic = SRPC_MSG_MAGIC;
- rpc->crpc_reqstmsg.msg_version = SRPC_MSG_VERSION;
- rpc->crpc_reqstmsg.msg_type = srpc_service2request(service);
-}
-
-static inline const char *
-swi_state2str(int state)
-{
-#define STATE2STR(x) case x: return #x
- switch (state) {
- default:
- LBUG();
- STATE2STR(SWI_STATE_NEWBORN);
- STATE2STR(SWI_STATE_REPLY_SUBMITTED);
- STATE2STR(SWI_STATE_REPLY_SENT);
- STATE2STR(SWI_STATE_REQUEST_SUBMITTED);
- STATE2STR(SWI_STATE_REQUEST_SENT);
- STATE2STR(SWI_STATE_REPLY_RECEIVED);
- STATE2STR(SWI_STATE_BULK_STARTED);
- STATE2STR(SWI_STATE_DONE);
- }
-#undef STATE2STR
-}
-
-#define selftest_wait_events() \
- do { \
- set_current_state(TASK_UNINTERRUPTIBLE); \
- schedule_timeout(HZ / 10); \
- } while (0)
-
-#define lst_wait_until(cond, lock, fmt, ...) \
-do { \
- int __I = 2; \
- while (!(cond)) { \
- CDEBUG(is_power_of_2(++__I) ? D_WARNING : D_NET, \
- fmt, ## __VA_ARGS__); \
- spin_unlock(&(lock)); \
- \
- selftest_wait_events(); \
- \
- spin_lock(&(lock)); \
- } \
-} while (0)
-
-static inline void
-srpc_wait_service_shutdown(struct srpc_service *sv)
-{
- int i = 2;
-
- LASSERT(sv->sv_shuttingdown);
-
- while (!srpc_finish_service(sv)) {
- i++;
- CDEBUG(((i & -i) == i) ? D_WARNING : D_NET,
- "Waiting for %s service to shutdown...\n",
- sv->sv_name);
- selftest_wait_events();
- }
-}
-
-extern struct sfw_test_client_ops brw_test_client;
-void brw_init_test_client(void);
-
-extern struct srpc_service brw_test_service;
-void brw_init_test_service(void);
-
-extern struct sfw_test_client_ops ping_test_client;
-void ping_init_test_client(void);
-
-extern struct srpc_service ping_test_service;
-void ping_init_test_service(void);
-
-#endif /* __SELFTEST_SELFTEST_H__ */
diff --git a/drivers/staging/lustre/lnet/selftest/timer.c b/drivers/staging/lustre/lnet/selftest/timer.c
deleted file mode 100644
index 582f252b3e12..000000000000
--- a/drivers/staging/lustre/lnet/selftest/timer.c
+++ /dev/null
@@ -1,244 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/timer.c
- *
- * Author: Isaac Huang <isaac@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include "selftest.h"
-
-/*
- * Timers are implemented as a sorted queue of expiry times. The queue
- * is slotted, with each slot holding timers which expire in a
- * 2**STTIMER_MINPOLL (8) second period. The timers in each slot are
- * sorted by increasing expiry time. The number of slots is 2**7 (128),
- * to cover a time period of 1024 seconds into the future before wrapping.
- */
-#define STTIMER_MINPOLL 3 /* log2 min poll interval (8 s) */
-#define STTIMER_SLOTTIME BIT(STTIMER_MINPOLL)
-#define STTIMER_SLOTTIMEMASK (~(STTIMER_SLOTTIME - 1))
-#define STTIMER_NSLOTS BIT(7)
-#define STTIMER_SLOT(t) (&stt_data.stt_hash[(((t) >> STTIMER_MINPOLL) & \
- (STTIMER_NSLOTS - 1))])
-
-static struct st_timer_data {
- spinlock_t stt_lock;
- unsigned long stt_prev_slot; /* start time of the slot processed
- * previously
- */
- struct list_head stt_hash[STTIMER_NSLOTS];
- int stt_shuttingdown;
- wait_queue_head_t stt_waitq;
- int stt_nthreads;
-} stt_data;
-
-void
-stt_add_timer(struct stt_timer *timer)
-{
- struct list_head *pos;
-
- spin_lock(&stt_data.stt_lock);
-
- LASSERT(stt_data.stt_nthreads > 0);
- LASSERT(!stt_data.stt_shuttingdown);
- LASSERT(timer->stt_func);
- LASSERT(list_empty(&timer->stt_list));
- LASSERT(timer->stt_expires > ktime_get_real_seconds());
-
- /* a simple insertion sort */
- list_for_each_prev(pos, STTIMER_SLOT(timer->stt_expires)) {
- struct stt_timer *old = list_entry(pos, struct stt_timer,
- stt_list);
-
- if (timer->stt_expires >= old->stt_expires)
- break;
- }
- list_add(&timer->stt_list, pos);
-
- spin_unlock(&stt_data.stt_lock);
-}
-
-/*
- * The function returns whether it has deactivated a pending timer or not.
- * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
- * active timer returns 1.)
- *
- * CAVEAT EMPTOR:
- * When 0 is returned, it is possible that timer->stt_func _is_ running on
- * another CPU.
- */
-int
-stt_del_timer(struct stt_timer *timer)
-{
- int ret = 0;
-
- spin_lock(&stt_data.stt_lock);
-
- LASSERT(stt_data.stt_nthreads > 0);
- LASSERT(!stt_data.stt_shuttingdown);
-
- if (!list_empty(&timer->stt_list)) {
- ret = 1;
- list_del_init(&timer->stt_list);
- }
-
- spin_unlock(&stt_data.stt_lock);
- return ret;
-}
-
-/* called with stt_data.stt_lock held */
-static int
-stt_expire_list(struct list_head *slot, time64_t now)
-{
- int expired = 0;
- struct stt_timer *timer;
-
- while (!list_empty(slot)) {
- timer = list_entry(slot->next, struct stt_timer, stt_list);
-
- if (timer->stt_expires > now)
- break;
-
- list_del_init(&timer->stt_list);
- spin_unlock(&stt_data.stt_lock);
-
- expired++;
- (*timer->stt_func) (timer->stt_data);
-
- spin_lock(&stt_data.stt_lock);
- }
-
- return expired;
-}
-
-static int
-stt_check_timers(unsigned long *last)
-{
- int expired = 0;
- time64_t now;
- unsigned long this_slot;
-
- now = ktime_get_real_seconds();
- this_slot = now & STTIMER_SLOTTIMEMASK;
-
- spin_lock(&stt_data.stt_lock);
-
- while (time_after_eq(this_slot, *last)) {
- expired += stt_expire_list(STTIMER_SLOT(this_slot), now);
- this_slot = this_slot - STTIMER_SLOTTIME;
- }
-
- *last = now & STTIMER_SLOTTIMEMASK;
- spin_unlock(&stt_data.stt_lock);
- return expired;
-}
-
-static int
-stt_timer_main(void *arg)
-{
- int rc = 0;
-
- while (!stt_data.stt_shuttingdown) {
- stt_check_timers(&stt_data.stt_prev_slot);
-
- rc = wait_event_timeout(stt_data.stt_waitq,
- stt_data.stt_shuttingdown,
- STTIMER_SLOTTIME * HZ);
- }
-
- spin_lock(&stt_data.stt_lock);
- stt_data.stt_nthreads--;
- spin_unlock(&stt_data.stt_lock);
- return rc;
-}
-
-static int
-stt_start_timer_thread(void)
-{
- struct task_struct *task;
-
- LASSERT(!stt_data.stt_shuttingdown);
-
- task = kthread_run(stt_timer_main, NULL, "st_timer");
- if (IS_ERR(task))
- return PTR_ERR(task);
-
- spin_lock(&stt_data.stt_lock);
- stt_data.stt_nthreads++;
- spin_unlock(&stt_data.stt_lock);
- return 0;
-}
-
-int
-stt_startup(void)
-{
- int rc = 0;
- int i;
-
- stt_data.stt_shuttingdown = 0;
- stt_data.stt_prev_slot = ktime_get_real_seconds() & STTIMER_SLOTTIMEMASK;
-
- spin_lock_init(&stt_data.stt_lock);
- for (i = 0; i < STTIMER_NSLOTS; i++)
- INIT_LIST_HEAD(&stt_data.stt_hash[i]);
-
- stt_data.stt_nthreads = 0;
- init_waitqueue_head(&stt_data.stt_waitq);
- rc = stt_start_timer_thread();
- if (rc)
- CERROR("Can't spawn timer thread: %d\n", rc);
-
- return rc;
-}
-
-void
-stt_shutdown(void)
-{
- int i;
-
- spin_lock(&stt_data.stt_lock);
-
- for (i = 0; i < STTIMER_NSLOTS; i++)
- LASSERT(list_empty(&stt_data.stt_hash[i]));
-
- stt_data.stt_shuttingdown = 1;
-
- wake_up(&stt_data.stt_waitq);
- lst_wait_until(!stt_data.stt_nthreads, stt_data.stt_lock,
- "waiting for %d threads to terminate\n",
- stt_data.stt_nthreads);
-
- spin_unlock(&stt_data.stt_lock);
-}
diff --git a/drivers/staging/lustre/lnet/selftest/timer.h b/drivers/staging/lustre/lnet/selftest/timer.h
deleted file mode 100644
index 7f0ef9bd0cda..000000000000
--- a/drivers/staging/lustre/lnet/selftest/timer.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/selftest/timer.h
- *
- * Author: Isaac Huang <isaac@clusterfs.com>
- */
-#ifndef __SELFTEST_TIMER_H__
-#define __SELFTEST_TIMER_H__
-
-struct stt_timer {
- struct list_head stt_list;
- time64_t stt_expires;
- void (*stt_func)(void *);
- void *stt_data;
-};
-
-void stt_add_timer(struct stt_timer *timer);
-int stt_del_timer(struct stt_timer *timer);
-int stt_startup(void);
-void stt_shutdown(void);
-
-#endif /* __SELFTEST_TIMER_H__ */
diff --git a/drivers/staging/lustre/lustre/Kconfig b/drivers/staging/lustre/lustre/Kconfig
deleted file mode 100644
index ccb78a945995..000000000000
--- a/drivers/staging/lustre/lustre/Kconfig
+++ /dev/null
@@ -1,45 +0,0 @@
-config LUSTRE_FS
- tristate "Lustre file system client support"
- depends on LNET
- select CRYPTO
- select CRYPTO_CRC32
- select CRYPTO_CRC32_PCLMUL if X86
- select CRYPTO_CRC32C
- select CRYPTO_MD5
- select CRYPTO_SHA1
- select CRYPTO_SHA256
- select CRYPTO_SHA512
- depends on MULTIUSER
- help
- This option enables Lustre file system client support. Choose Y
- here if you want to access a Lustre file system cluster. To compile
- this file system support as a module, choose M here: the module will
- be called lustre.
-
- To mount Lustre file systems, you also need to install the user space
- mount.lustre and other user space commands which can be found in the
- lustre-client package, available from
- http://downloads.whamcloud.com/public/lustre/
-
- Lustre file system is the most popular cluster file system in high
- performance computing. Source code of both kernel space and user space
- Lustre components can also be found at
- http://git.whamcloud.com/?p=fs/lustre-release.git;a=summary
-
- If unsure, say N.
-
- See also http://wiki.lustre.org/
-
-config LUSTRE_DEBUG_EXPENSIVE_CHECK
- bool "Enable Lustre DEBUG checks"
- depends on LUSTRE_FS
- help
- This option is mainly for debug purpose. It enables Lustre code to do
- expensive checks that may have a performance impact.
-
- Use with caution. If unsure, say N.
-
-config LUSTRE_TRANSLATE_ERRNOS
- bool
- depends on LUSTRE_FS && !X86
- default y
diff --git a/drivers/staging/lustre/lustre/Makefile b/drivers/staging/lustre/lustre/Makefile
deleted file mode 100644
index 331e4fcdd5a2..000000000000
--- a/drivers/staging/lustre/lustre/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-obj-$(CONFIG_LUSTRE_FS) += obdclass/ ptlrpc/ fld/ osc/ mgc/ \
- fid/ lov/ mdc/ lmv/ llite/ obdecho/
diff --git a/drivers/staging/lustre/lustre/fid/Makefile b/drivers/staging/lustre/lustre/fid/Makefile
deleted file mode 100644
index 77b65b92667d..000000000000
--- a/drivers/staging/lustre/lustre/fid/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include/
-
-obj-$(CONFIG_LUSTRE_FS) += fid.o
-fid-y := fid_request.o fid_lib.o lproc_fid.o
diff --git a/drivers/staging/lustre/lustre/fid/fid_internal.h b/drivers/staging/lustre/lustre/fid/fid_internal.h
deleted file mode 100644
index 14569e969a31..000000000000
--- a/drivers/staging/lustre/lustre/fid/fid_internal.h
+++ /dev/null
@@ -1,46 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/fid/fid_internal.h
- *
- * Author: Yury Umanets <umka@clusterfs.com>
- */
-#ifndef __FID_INTERNAL_H
-#define __FID_INTERNAL_H
-
-#include <uapi/linux/lustre/lustre_idl.h>
-
-/* Functions used internally in module. */
-
-extern struct lprocfs_vars seq_client_debugfs_list[];
-
-#endif /* __FID_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/fid/fid_lib.c b/drivers/staging/lustre/lustre/fid/fid_lib.c
deleted file mode 100644
index ac52b378c155..000000000000
--- a/drivers/staging/lustre/lustre/fid/fid_lib.c
+++ /dev/null
@@ -1,87 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/fid/fid_lib.c
- *
- * Miscellaneous fid functions.
- *
- * Author: Nikita Danilov <nikita@clusterfs.com>
- * Author: Yury Umanets <umka@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_FID
-
-#include <linux/module.h>
-#include <lustre_fid.h>
-
-/**
- * A cluster-wide range from which fid-sequences are granted to servers and
- * then clients.
- *
- * Fid namespace:
- * <pre>
- * Normal FID: seq:64 [2^33,2^64-1] oid:32 ver:32
- * IGIF : 0:32, ino:32 gen:32 0:32
- * IDIF : 0:31, 1:1, ost-index:16, objd:48 0:32
- * </pre>
- *
- * The first 0x400 sequences of normal FID are reserved for special purpose.
- * FID_SEQ_START + 1 is for local file id generation.
- * FID_SEQ_START + 2 is for .lustre directory and its objects
- */
-const struct lu_seq_range LUSTRE_SEQ_SPACE_RANGE = {
- .lsr_start = FID_SEQ_NORMAL,
- .lsr_end = (__u64)~0ULL,
-};
-
-/* Zero range, used for init and other purposes. */
-const struct lu_seq_range LUSTRE_SEQ_ZERO_RANGE = {
- .lsr_start = 0,
-};
-
-/* Lustre Big Fs Lock fid. */
-const struct lu_fid LUSTRE_BFL_FID = { .f_seq = FID_SEQ_SPECIAL,
- .f_oid = FID_OID_SPECIAL_BFL,
- .f_ver = 0x0000000000000000 };
-EXPORT_SYMBOL(LUSTRE_BFL_FID);
-
-/** Special fid for ".lustre" directory */
-const struct lu_fid LU_DOT_LUSTRE_FID = { .f_seq = FID_SEQ_DOT_LUSTRE,
- .f_oid = FID_OID_DOT_LUSTRE,
- .f_ver = 0x0000000000000000 };
-EXPORT_SYMBOL(LU_DOT_LUSTRE_FID);
-
-/** Special fid for "fid" special object in .lustre */
-const struct lu_fid LU_OBF_FID = { .f_seq = FID_SEQ_DOT_LUSTRE,
- .f_oid = FID_OID_DOT_LUSTRE_OBF,
- .f_ver = 0x0000000000000000 };
-EXPORT_SYMBOL(LU_OBF_FID);
diff --git a/drivers/staging/lustre/lustre/fid/fid_request.c b/drivers/staging/lustre/lustre/fid/fid_request.c
deleted file mode 100644
index a34fd90ca5e5..000000000000
--- a/drivers/staging/lustre/lustre/fid/fid_request.c
+++ /dev/null
@@ -1,410 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/fid/fid_request.c
- *
- * Lustre Sequence Manager
- *
- * Author: Yury Umanets <umka@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_FID
-
-#include <linux/module.h>
-
-#include <obd.h>
-#include <obd_class.h>
-#include <obd_support.h>
-#include <lustre_fid.h>
-/* mdc RPC locks */
-#include <lustre_mdc.h>
-#include "fid_internal.h"
-
-static struct dentry *seq_debugfs_dir;
-
-static int seq_client_rpc(struct lu_client_seq *seq,
- struct lu_seq_range *output, __u32 opc,
- const char *opcname)
-{
- struct obd_export *exp = seq->lcs_exp;
- struct ptlrpc_request *req;
- struct lu_seq_range *out, *in;
- __u32 *op;
- unsigned int debug_mask;
- int rc;
-
- LASSERT(exp && !IS_ERR(exp));
- req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), &RQF_SEQ_QUERY,
- LUSTRE_MDS_VERSION, SEQ_QUERY);
- if (!req)
- return -ENOMEM;
-
- /* Init operation code */
- op = req_capsule_client_get(&req->rq_pill, &RMF_SEQ_OPC);
- *op = opc;
-
- /* Zero out input range, this is not recovery yet. */
- in = req_capsule_client_get(&req->rq_pill, &RMF_SEQ_RANGE);
- lu_seq_range_init(in);
-
- ptlrpc_request_set_replen(req);
-
- in->lsr_index = seq->lcs_space.lsr_index;
- if (seq->lcs_type == LUSTRE_SEQ_METADATA)
- fld_range_set_mdt(in);
- else
- fld_range_set_ost(in);
-
- if (opc == SEQ_ALLOC_SUPER) {
- req->rq_request_portal = SEQ_CONTROLLER_PORTAL;
- req->rq_reply_portal = MDC_REPLY_PORTAL;
- /* During allocating super sequence for data object,
- * the current thread might hold the export of MDT0(MDT0
- * precreating objects on this OST), and it will send the
- * request to MDT0 here, so we can not keep resending the
- * request here, otherwise if MDT0 is failed(umounted),
- * it can not release the export of MDT0
- */
- if (seq->lcs_type == LUSTRE_SEQ_DATA) {
- req->rq_no_delay = 1;
- req->rq_no_resend = 1;
- }
- debug_mask = D_CONSOLE;
- } else {
- if (seq->lcs_type == LUSTRE_SEQ_METADATA) {
- req->rq_reply_portal = MDC_REPLY_PORTAL;
- req->rq_request_portal = SEQ_METADATA_PORTAL;
- } else {
- req->rq_reply_portal = OSC_REPLY_PORTAL;
- req->rq_request_portal = SEQ_DATA_PORTAL;
- }
- debug_mask = D_INFO;
- }
-
- ptlrpc_at_set_req_timeout(req);
-
- rc = ptlrpc_queue_wait(req);
- if (rc)
- goto out_req;
-
- out = req_capsule_server_get(&req->rq_pill, &RMF_SEQ_RANGE);
-
- if (!lu_seq_range_is_sane(out)) {
- CERROR("%s: Invalid range received from server: "
- DRANGE "\n", seq->lcs_name, PRANGE(out));
- rc = -EINVAL;
- goto out_req;
- }
-
- if (lu_seq_range_is_exhausted(out)) {
- CERROR("%s: Range received from server is exhausted: "
- DRANGE "]\n", seq->lcs_name, PRANGE(out));
- rc = -EINVAL;
- goto out_req;
- }
-
- *output = *out;
- CDEBUG_LIMIT(debug_mask, "%s: Allocated %s-sequence " DRANGE "]\n",
- seq->lcs_name, opcname, PRANGE(output));
-
-out_req:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-/* Request sequence-controller node to allocate new meta-sequence. */
-static int seq_client_alloc_meta(const struct lu_env *env,
- struct lu_client_seq *seq)
-{
- int rc;
-
- do {
- /* If meta server return -EINPROGRESS or EAGAIN,
- * it means meta server might not be ready to
- * allocate super sequence from sequence controller
- * (MDT0)yet
- */
- rc = seq_client_rpc(seq, &seq->lcs_space,
- SEQ_ALLOC_META, "meta");
- } while (rc == -EINPROGRESS || rc == -EAGAIN);
-
- return rc;
-}
-
-/* Allocate new sequence for client. */
-static int seq_client_alloc_seq(const struct lu_env *env,
- struct lu_client_seq *seq, u64 *seqnr)
-{
- int rc;
-
- LASSERT(lu_seq_range_is_sane(&seq->lcs_space));
-
- if (lu_seq_range_is_exhausted(&seq->lcs_space)) {
- rc = seq_client_alloc_meta(env, seq);
- if (rc) {
- CERROR("%s: Can't allocate new meta-sequence, rc %d\n",
- seq->lcs_name, rc);
- *seqnr = U64_MAX;
- return rc;
- }
- CDEBUG(D_INFO, "%s: New range - " DRANGE "\n",
- seq->lcs_name, PRANGE(&seq->lcs_space));
- } else {
- rc = 0;
- }
-
- LASSERT(!lu_seq_range_is_exhausted(&seq->lcs_space));
- *seqnr = seq->lcs_space.lsr_start;
- seq->lcs_space.lsr_start += 1;
-
- CDEBUG(D_INFO, "%s: Allocated sequence [%#llx]\n", seq->lcs_name,
- *seqnr);
-
- return rc;
-}
-
-/* Allocate new fid on passed client @seq and save it to @fid. */
-int seq_client_alloc_fid(const struct lu_env *env,
- struct lu_client_seq *seq, struct lu_fid *fid)
-{
- int rc;
-
- LASSERT(seq);
- LASSERT(fid);
-
- spin_lock(&seq->lcs_lock);
-
- if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_EXHAUST))
- seq->lcs_fid.f_oid = seq->lcs_width;
-
- wait_event_cmd(seq->lcs_waitq,
- (!fid_is_zero(&seq->lcs_fid) &&
- fid_oid(&seq->lcs_fid) < seq->lcs_width) ||
- !seq->lcs_update,
- spin_unlock(&seq->lcs_lock),
- spin_lock(&seq->lcs_lock));
-
- if (!fid_is_zero(&seq->lcs_fid) &&
- fid_oid(&seq->lcs_fid) < seq->lcs_width) {
- /* Just bump last allocated fid and return to caller. */
- seq->lcs_fid.f_oid += 1;
- rc = 0;
- } else {
- u64 seqnr;
-
- LASSERT(seq->lcs_update == 0);
- seq->lcs_update = 1;
- spin_unlock(&seq->lcs_lock);
-
- rc = seq_client_alloc_seq(env, seq, &seqnr);
-
- spin_lock(&seq->lcs_lock);
- seq->lcs_update = 0;
- wake_up(&seq->lcs_waitq);
-
- if (rc) {
- CERROR("%s: Can't allocate new sequence, rc %d\n",
- seq->lcs_name, rc);
- spin_unlock(&seq->lcs_lock);
- return rc;
- }
-
- CDEBUG(D_INFO, "%s: Switch to sequence [0x%16.16llx]\n",
- seq->lcs_name, seqnr);
-
- seq->lcs_fid.f_oid = LUSTRE_FID_INIT_OID;
- seq->lcs_fid.f_seq = seqnr;
- seq->lcs_fid.f_ver = 0;
-
- /*
- * Inform caller that sequence switch is performed to allow it
- * to setup FLD for it.
- */
- rc = 1;
- }
-
- *fid = seq->lcs_fid;
- spin_unlock(&seq->lcs_lock);
-
- CDEBUG(D_INFO,
- "%s: Allocated FID " DFID "\n", seq->lcs_name, PFID(fid));
- return rc;
-}
-EXPORT_SYMBOL(seq_client_alloc_fid);
-
-/*
- * Finish the current sequence due to disconnect.
- * See mdc_import_event()
- */
-void seq_client_flush(struct lu_client_seq *seq)
-{
-
- LASSERT(seq);
- spin_lock(&seq->lcs_lock);
-
- wait_event_cmd(seq->lcs_waitq,
- !seq->lcs_update,
- spin_unlock(&seq->lcs_lock),
- spin_lock(&seq->lcs_lock));
-
- fid_zero(&seq->lcs_fid);
- /**
- * this id shld not be used for seq range allocation.
- * set to -1 for dgb check.
- */
-
- seq->lcs_space.lsr_index = -1;
-
- lu_seq_range_init(&seq->lcs_space);
- spin_unlock(&seq->lcs_lock);
-}
-EXPORT_SYMBOL(seq_client_flush);
-
-static void seq_client_debugfs_fini(struct lu_client_seq *seq)
-{
- debugfs_remove_recursive(seq->lcs_debugfs_entry);
-}
-
-static void seq_client_debugfs_init(struct lu_client_seq *seq)
-{
- seq->lcs_debugfs_entry = debugfs_create_dir(seq->lcs_name,
- seq_debugfs_dir);
-
- ldebugfs_add_vars(seq->lcs_debugfs_entry, seq_client_debugfs_list, seq);
-}
-
-static void seq_client_fini(struct lu_client_seq *seq)
-{
- seq_client_debugfs_fini(seq);
-
- if (seq->lcs_exp) {
- class_export_put(seq->lcs_exp);
- seq->lcs_exp = NULL;
- }
-}
-
-static void seq_client_init(struct lu_client_seq *seq, struct obd_export *exp,
- enum lu_cli_type type, const char *prefix)
-{
- LASSERT(seq);
- LASSERT(prefix);
-
- seq->lcs_type = type;
-
- spin_lock_init(&seq->lcs_lock);
- if (type == LUSTRE_SEQ_METADATA)
- seq->lcs_width = LUSTRE_METADATA_SEQ_MAX_WIDTH;
- else
- seq->lcs_width = LUSTRE_DATA_SEQ_MAX_WIDTH;
-
- init_waitqueue_head(&seq->lcs_waitq);
- /* Make sure that things are clear before work is started. */
- seq_client_flush(seq);
-
- seq->lcs_exp = class_export_get(exp);
-
- snprintf(seq->lcs_name, sizeof(seq->lcs_name),
- "cli-%s", prefix);
-
- seq_client_debugfs_init(seq);
-}
-
-int client_fid_init(struct obd_device *obd,
- struct obd_export *exp, enum lu_cli_type type)
-{
- struct client_obd *cli = &obd->u.cli;
- char *prefix;
- int rc;
-
- cli->cl_seq = kzalloc(sizeof(*cli->cl_seq), GFP_NOFS);
- if (!cli->cl_seq)
- return -ENOMEM;
-
- prefix = kzalloc(MAX_OBD_NAME + 5, GFP_NOFS);
- if (!prefix) {
- rc = -ENOMEM;
- goto out_free_seq;
- }
-
- snprintf(prefix, MAX_OBD_NAME + 5, "cli-%s", obd->obd_name);
-
- /* Init client side sequence-manager */
- seq_client_init(cli->cl_seq, exp, type, prefix);
- kfree(prefix);
-
- return 0;
-out_free_seq:
- kfree(cli->cl_seq);
- cli->cl_seq = NULL;
- return rc;
-}
-EXPORT_SYMBOL(client_fid_init);
-
-int client_fid_fini(struct obd_device *obd)
-{
- struct client_obd *cli = &obd->u.cli;
-
- if (cli->cl_seq) {
- seq_client_fini(cli->cl_seq);
- kfree(cli->cl_seq);
- cli->cl_seq = NULL;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(client_fid_fini);
-
-static int __init fid_init(void)
-{
- int rc;
-
- rc = libcfs_setup();
- if (rc)
- return rc;
-
- seq_debugfs_dir = debugfs_create_dir(LUSTRE_SEQ_NAME,
- debugfs_lustre_root);
- return 0;
-}
-
-static void __exit fid_exit(void)
-{
- debugfs_remove_recursive(seq_debugfs_dir);
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre File IDentifier");
-MODULE_VERSION(LUSTRE_VERSION_STRING);
-MODULE_LICENSE("GPL");
-
-module_init(fid_init);
-module_exit(fid_exit);
diff --git a/drivers/staging/lustre/lustre/fid/lproc_fid.c b/drivers/staging/lustre/lustre/fid/lproc_fid.c
deleted file mode 100644
index 0aabf473c9bd..000000000000
--- a/drivers/staging/lustre/lustre/fid/lproc_fid.c
+++ /dev/null
@@ -1,225 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/fid/lproc_fid.c
- *
- * Lustre Sequence Manager
- *
- * Author: Yury Umanets <umka@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_FID
-
-#include <linux/module.h>
-
-#include <obd.h>
-#include <obd_class.h>
-#include <obd_support.h>
-#include <lustre_req_layout.h>
-#include <lustre_fid.h>
-#include "fid_internal.h"
-
-/* Format: [0x64BIT_INT - 0x64BIT_INT] + 32 bytes just in case */
-#define MAX_FID_RANGE_STRLEN (32 + 2 * 2 * sizeof(__u64))
-/*
- * Note: this function is only used for testing, it is no safe for production
- * use.
- */
-static int
-ldebugfs_fid_write_common(const char __user *buffer, size_t count,
- struct lu_seq_range *range)
-{
- struct lu_seq_range tmp;
- int rc;
- char kernbuf[MAX_FID_RANGE_STRLEN];
-
- LASSERT(range);
-
- if (count >= sizeof(kernbuf))
- return -EINVAL;
-
- if (copy_from_user(kernbuf, buffer, count))
- return -EFAULT;
-
- kernbuf[count] = 0;
-
- if (count == 5 && strcmp(kernbuf, "clear") == 0) {
- memset(range, 0, sizeof(*range));
- return count;
- }
-
- /* of the form "[0x0000000240000400 - 0x000000028000400]" */
- rc = sscanf(kernbuf, "[%llx - %llx]\n",
- (unsigned long long *)&tmp.lsr_start,
- (unsigned long long *)&tmp.lsr_end);
- if (rc != 2)
- return -EINVAL;
- if (!lu_seq_range_is_sane(&tmp) || lu_seq_range_is_zero(&tmp) ||
- tmp.lsr_start < range->lsr_start || tmp.lsr_end > range->lsr_end)
- return -EINVAL;
- *range = tmp;
- return count;
-}
-
-/* Client side debugfs stuff */
-static ssize_t
-ldebugfs_fid_space_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct lu_client_seq *seq;
- struct lu_seq_range range;
- int rc;
-
- seq = ((struct seq_file *)file->private_data)->private;
-
- rc = ldebugfs_fid_write_common(buffer, count, &range);
-
- spin_lock(&seq->lcs_lock);
- if (seq->lcs_update)
- /* An RPC call is active to update lcs_space */
- rc = -EBUSY;
- if (rc > 0)
- seq->lcs_space = range;
- spin_unlock(&seq->lcs_lock);
-
- if (rc > 0) {
- CDEBUG(D_INFO, "%s: Space: " DRANGE "\n",
- seq->lcs_name, PRANGE(&range));
- }
-
- return rc;
-}
-
-static int
-ldebugfs_fid_space_seq_show(struct seq_file *m, void *unused)
-{
- struct lu_client_seq *seq = (struct lu_client_seq *)m->private;
- int rc = 0;
-
- spin_lock(&seq->lcs_lock);
- if (seq->lcs_update)
- rc = -EBUSY;
- else
- seq_printf(m, "[%#llx - %#llx]:%x:%s\n", PRANGE(&seq->lcs_space));
- spin_unlock(&seq->lcs_lock);
-
- return rc;
-}
-
-static ssize_t
-ldebugfs_fid_width_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct lu_client_seq *seq;
- __u64 max;
- int rc, val;
-
- seq = ((struct seq_file *)file->private_data)->private;
-
- rc = lprocfs_write_helper(buffer, count, &val);
- if (rc)
- return rc;
-
- spin_lock(&seq->lcs_lock);
- if (seq->lcs_type == LUSTRE_SEQ_DATA)
- max = LUSTRE_DATA_SEQ_MAX_WIDTH;
- else
- max = LUSTRE_METADATA_SEQ_MAX_WIDTH;
-
- if (val <= max && val > 0) {
- seq->lcs_width = val;
-
- CDEBUG(D_INFO, "%s: Sequence size: %llu\n", seq->lcs_name,
- seq->lcs_width);
- }
-
- spin_unlock(&seq->lcs_lock);
-
- return count;
-}
-
-static int
-ldebugfs_fid_width_seq_show(struct seq_file *m, void *unused)
-{
- struct lu_client_seq *seq = (struct lu_client_seq *)m->private;
-
- spin_lock(&seq->lcs_lock);
- seq_printf(m, "%llu\n", seq->lcs_width);
- spin_unlock(&seq->lcs_lock);
-
- return 0;
-}
-
-static int
-ldebugfs_fid_fid_seq_show(struct seq_file *m, void *unused)
-{
- struct lu_client_seq *seq = (struct lu_client_seq *)m->private;
-
- spin_lock(&seq->lcs_lock);
- seq_printf(m, DFID "\n", PFID(&seq->lcs_fid));
- spin_unlock(&seq->lcs_lock);
-
- return 0;
-}
-
-static int
-ldebugfs_fid_server_seq_show(struct seq_file *m, void *unused)
-{
- struct lu_client_seq *seq = (struct lu_client_seq *)m->private;
- struct client_obd *cli;
-
- if (seq->lcs_exp) {
- cli = &seq->lcs_exp->exp_obd->u.cli;
- seq_printf(m, "%s\n", cli->cl_target_uuid.uuid);
- }
-
- return 0;
-}
-
-LPROC_SEQ_FOPS(ldebugfs_fid_space);
-LPROC_SEQ_FOPS(ldebugfs_fid_width);
-LPROC_SEQ_FOPS_RO(ldebugfs_fid_server);
-LPROC_SEQ_FOPS_RO(ldebugfs_fid_fid);
-
-struct lprocfs_vars seq_client_debugfs_list[] = {
- { .name = "space",
- .fops = &ldebugfs_fid_space_fops },
- { .name = "width",
- .fops = &ldebugfs_fid_width_fops },
- { .name = "server",
- .fops = &ldebugfs_fid_server_fops },
- { .name = "fid",
- .fops = &ldebugfs_fid_fid_fops },
- { NULL }
-};
diff --git a/drivers/staging/lustre/lustre/fld/Makefile b/drivers/staging/lustre/lustre/fld/Makefile
deleted file mode 100644
index 426deba8b815..000000000000
--- a/drivers/staging/lustre/lustre/fld/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include/
-
-obj-$(CONFIG_LUSTRE_FS) += fld.o
-fld-y := fld_request.o fld_cache.o lproc_fld.o
diff --git a/drivers/staging/lustre/lustre/fld/fld_cache.c b/drivers/staging/lustre/lustre/fld/fld_cache.c
deleted file mode 100644
index a7415c9a1c28..000000000000
--- a/drivers/staging/lustre/lustre/fld/fld_cache.c
+++ /dev/null
@@ -1,516 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2013, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/fld/fld_cache.c
- *
- * FLD (Fids Location Database)
- *
- * Author: Pravin Shelar <pravin.shelar@sun.com>
- * Author: Yury Umanets <umka@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_FLD
-
-#include <linux/module.h>
-#include <asm/div64.h>
-
-#include <obd.h>
-#include <obd_class.h>
-#include <uapi/linux/lustre/lustre_ver.h>
-#include <obd_support.h>
-#include <lprocfs_status.h>
-
-#include <lustre_req_layout.h>
-#include <lustre_fld.h>
-#include "fld_internal.h"
-
-/**
- * create fld cache.
- */
-struct fld_cache *fld_cache_init(const char *name,
- int cache_size, int cache_threshold)
-{
- struct fld_cache *cache;
-
- LASSERT(name);
- LASSERT(cache_threshold < cache_size);
-
- cache = kzalloc(sizeof(*cache), GFP_NOFS);
- if (!cache)
- return ERR_PTR(-ENOMEM);
-
- INIT_LIST_HEAD(&cache->fci_entries_head);
- INIT_LIST_HEAD(&cache->fci_lru);
-
- cache->fci_cache_count = 0;
- rwlock_init(&cache->fci_lock);
-
- strlcpy(cache->fci_name, name,
- sizeof(cache->fci_name));
-
- cache->fci_cache_size = cache_size;
- cache->fci_threshold = cache_threshold;
-
- /* Init fld cache info. */
- memset(&cache->fci_stat, 0, sizeof(cache->fci_stat));
-
- CDEBUG(D_INFO, "%s: FLD cache - Size: %d, Threshold: %d\n",
- cache->fci_name, cache_size, cache_threshold);
-
- return cache;
-}
-
-/**
- * destroy fld cache.
- */
-void fld_cache_fini(struct fld_cache *cache)
-{
- __u64 pct;
-
- LASSERT(cache);
- fld_cache_flush(cache);
-
- if (cache->fci_stat.fst_count > 0) {
- pct = cache->fci_stat.fst_cache * 100;
- do_div(pct, cache->fci_stat.fst_count);
- } else {
- pct = 0;
- }
-
- CDEBUG(D_INFO, "FLD cache statistics (%s):\n", cache->fci_name);
- CDEBUG(D_INFO, " Total reqs: %llu\n", cache->fci_stat.fst_count);
- CDEBUG(D_INFO, " Cache reqs: %llu\n", cache->fci_stat.fst_cache);
- CDEBUG(D_INFO, " Cache hits: %llu%%\n", pct);
-
- kfree(cache);
-}
-
-/**
- * delete given node from list.
- */
-static void fld_cache_entry_delete(struct fld_cache *cache,
- struct fld_cache_entry *node)
-{
- list_del(&node->fce_list);
- list_del(&node->fce_lru);
- cache->fci_cache_count--;
- kfree(node);
-}
-
-/**
- * fix list by checking new entry with NEXT entry in order.
- */
-static void fld_fix_new_list(struct fld_cache *cache)
-{
- struct fld_cache_entry *f_curr;
- struct fld_cache_entry *f_next;
- struct lu_seq_range *c_range;
- struct lu_seq_range *n_range;
- struct list_head *head = &cache->fci_entries_head;
-
-restart_fixup:
-
- list_for_each_entry_safe(f_curr, f_next, head, fce_list) {
- c_range = &f_curr->fce_range;
- n_range = &f_next->fce_range;
-
- LASSERT(lu_seq_range_is_sane(c_range));
- if (&f_next->fce_list == head)
- break;
-
- if (c_range->lsr_flags != n_range->lsr_flags)
- continue;
-
- LASSERTF(c_range->lsr_start <= n_range->lsr_start,
- "cur lsr_start " DRANGE " next lsr_start " DRANGE "\n",
- PRANGE(c_range), PRANGE(n_range));
-
- /* check merge possibility with next range */
- if (c_range->lsr_end == n_range->lsr_start) {
- if (c_range->lsr_index != n_range->lsr_index)
- continue;
- n_range->lsr_start = c_range->lsr_start;
- fld_cache_entry_delete(cache, f_curr);
- continue;
- }
-
- /* check if current range overlaps with next range. */
- if (n_range->lsr_start < c_range->lsr_end) {
- if (c_range->lsr_index == n_range->lsr_index) {
- n_range->lsr_start = c_range->lsr_start;
- n_range->lsr_end = max(c_range->lsr_end,
- n_range->lsr_end);
- fld_cache_entry_delete(cache, f_curr);
- } else {
- if (n_range->lsr_end <= c_range->lsr_end) {
- *n_range = *c_range;
- fld_cache_entry_delete(cache, f_curr);
- } else {
- n_range->lsr_start = c_range->lsr_end;
- }
- }
-
- /* we could have overlap over next
- * range too. better restart.
- */
- goto restart_fixup;
- }
-
- /* kill duplicates */
- if (c_range->lsr_start == n_range->lsr_start &&
- c_range->lsr_end == n_range->lsr_end)
- fld_cache_entry_delete(cache, f_curr);
- }
-}
-
-/**
- * add node to fld cache
- */
-static inline void fld_cache_entry_add(struct fld_cache *cache,
- struct fld_cache_entry *f_new,
- struct list_head *pos)
-{
- list_add(&f_new->fce_list, pos);
- list_add(&f_new->fce_lru, &cache->fci_lru);
-
- cache->fci_cache_count++;
- fld_fix_new_list(cache);
-}
-
-/**
- * Check if cache needs to be shrunk. If so - do it.
- * Remove one entry in list and so on until cache is shrunk enough.
- */
-static int fld_cache_shrink(struct fld_cache *cache)
-{
- int num = 0;
-
- if (cache->fci_cache_count < cache->fci_cache_size)
- return 0;
-
- while (cache->fci_cache_count + cache->fci_threshold >
- cache->fci_cache_size &&
- !list_empty(&cache->fci_lru)) {
- struct fld_cache_entry *flde =
- list_last_entry(&cache->fci_lru,
- struct fld_cache_entry, fce_lru);
-
- fld_cache_entry_delete(cache, flde);
- num++;
- }
-
- CDEBUG(D_INFO, "%s: FLD cache - Shrunk by %d entries\n",
- cache->fci_name, num);
-
- return 0;
-}
-
-/**
- * kill all fld cache entries.
- */
-void fld_cache_flush(struct fld_cache *cache)
-{
- write_lock(&cache->fci_lock);
- cache->fci_cache_size = 0;
- fld_cache_shrink(cache);
- write_unlock(&cache->fci_lock);
-}
-
-/**
- * punch hole in existing range. divide this range and add new
- * entry accordingly.
- */
-
-static void fld_cache_punch_hole(struct fld_cache *cache,
- struct fld_cache_entry *f_curr,
- struct fld_cache_entry *f_new)
-{
- const struct lu_seq_range *range = &f_new->fce_range;
- const u64 new_start = range->lsr_start;
- const u64 new_end = range->lsr_end;
- struct fld_cache_entry *fldt;
-
- fldt = kzalloc(sizeof(*fldt), GFP_ATOMIC);
- if (!fldt) {
- kfree(f_new);
- /* overlap is not allowed, so don't mess up list. */
- return;
- }
- /* break f_curr RANGE into three RANGES:
- * f_curr, f_new , fldt
- */
-
- /* f_new = *range */
-
- /* fldt */
- fldt->fce_range.lsr_start = new_end;
- fldt->fce_range.lsr_end = f_curr->fce_range.lsr_end;
- fldt->fce_range.lsr_index = f_curr->fce_range.lsr_index;
-
- /* f_curr */
- f_curr->fce_range.lsr_end = new_start;
-
- /* add these two entries to list */
- fld_cache_entry_add(cache, f_new, &f_curr->fce_list);
- fld_cache_entry_add(cache, fldt, &f_new->fce_list);
-
- /* no need to fixup */
-}
-
-/**
- * handle range overlap in fld cache.
- */
-static void fld_cache_overlap_handle(struct fld_cache *cache,
- struct fld_cache_entry *f_curr,
- struct fld_cache_entry *f_new)
-{
- const struct lu_seq_range *range = &f_new->fce_range;
- const u64 new_start = range->lsr_start;
- const u64 new_end = range->lsr_end;
- const u32 mdt = range->lsr_index;
-
- /* this is overlap case, these case are checking overlapping with
- * prev range only. fixup will handle overlapping with next range.
- */
-
- if (f_curr->fce_range.lsr_index == mdt) {
- f_curr->fce_range.lsr_start = min(f_curr->fce_range.lsr_start,
- new_start);
-
- f_curr->fce_range.lsr_end = max(f_curr->fce_range.lsr_end,
- new_end);
-
- kfree(f_new);
- fld_fix_new_list(cache);
-
- } else if (new_start <= f_curr->fce_range.lsr_start &&
- f_curr->fce_range.lsr_end <= new_end) {
- /* case 1: new range completely overshadowed existing range.
- * e.g. whole range migrated. update fld cache entry
- */
-
- f_curr->fce_range = *range;
- kfree(f_new);
- fld_fix_new_list(cache);
-
- } else if (f_curr->fce_range.lsr_start < new_start &&
- new_end < f_curr->fce_range.lsr_end) {
- /* case 2: new range fit within existing range. */
-
- fld_cache_punch_hole(cache, f_curr, f_new);
-
- } else if (new_end <= f_curr->fce_range.lsr_end) {
- /* case 3: overlap:
- * [new_start [c_start new_end) c_end)
- */
-
- LASSERT(new_start <= f_curr->fce_range.lsr_start);
-
- f_curr->fce_range.lsr_start = new_end;
- fld_cache_entry_add(cache, f_new, f_curr->fce_list.prev);
-
- } else if (f_curr->fce_range.lsr_start <= new_start) {
- /* case 4: overlap:
- * [c_start [new_start c_end) new_end)
- */
-
- LASSERT(f_curr->fce_range.lsr_end <= new_end);
-
- f_curr->fce_range.lsr_end = new_start;
- fld_cache_entry_add(cache, f_new, &f_curr->fce_list);
- } else {
- CERROR("NEW range =" DRANGE " curr = " DRANGE "\n",
- PRANGE(range), PRANGE(&f_curr->fce_range));
- }
-}
-
-struct fld_cache_entry
-*fld_cache_entry_create(const struct lu_seq_range *range)
-{
- struct fld_cache_entry *f_new;
-
- LASSERT(lu_seq_range_is_sane(range));
-
- f_new = kzalloc(sizeof(*f_new), GFP_NOFS);
- if (!f_new)
- return ERR_PTR(-ENOMEM);
-
- f_new->fce_range = *range;
- return f_new;
-}
-
-/**
- * Insert FLD entry in FLD cache.
- *
- * This function handles all cases of merging and breaking up of
- * ranges.
- */
-static int fld_cache_insert_nolock(struct fld_cache *cache,
- struct fld_cache_entry *f_new)
-{
- struct fld_cache_entry *f_curr;
- struct fld_cache_entry *n;
- struct list_head *head;
- struct list_head *prev = NULL;
- const u64 new_start = f_new->fce_range.lsr_start;
- const u64 new_end = f_new->fce_range.lsr_end;
- __u32 new_flags = f_new->fce_range.lsr_flags;
-
- /*
- * Duplicate entries are eliminated in insert op.
- * So we don't need to search new entry before starting
- * insertion loop.
- */
-
- if (!cache->fci_no_shrink)
- fld_cache_shrink(cache);
-
- head = &cache->fci_entries_head;
-
- list_for_each_entry_safe(f_curr, n, head, fce_list) {
- /* add list if next is end of list */
- if (new_end < f_curr->fce_range.lsr_start ||
- (new_end == f_curr->fce_range.lsr_start &&
- new_flags != f_curr->fce_range.lsr_flags))
- break;
-
- prev = &f_curr->fce_list;
- /* check if this range is to left of new range. */
- if (new_start < f_curr->fce_range.lsr_end &&
- new_flags == f_curr->fce_range.lsr_flags) {
- fld_cache_overlap_handle(cache, f_curr, f_new);
- goto out;
- }
- }
-
- if (!prev)
- prev = head;
-
- CDEBUG(D_INFO, "insert range " DRANGE "\n", PRANGE(&f_new->fce_range));
- /* Add new entry to cache and lru list. */
- fld_cache_entry_add(cache, f_new, prev);
-out:
- return 0;
-}
-
-int fld_cache_insert(struct fld_cache *cache,
- const struct lu_seq_range *range)
-{
- struct fld_cache_entry *flde;
- int rc;
-
- flde = fld_cache_entry_create(range);
- if (IS_ERR(flde))
- return PTR_ERR(flde);
-
- write_lock(&cache->fci_lock);
- rc = fld_cache_insert_nolock(cache, flde);
- write_unlock(&cache->fci_lock);
- if (rc)
- kfree(flde);
-
- return rc;
-}
-
-/**
- * Delete FLD entry in FLD cache.
- *
- */
-
-struct fld_cache_entry
-*fld_cache_entry_lookup_nolock(struct fld_cache *cache,
- struct lu_seq_range *range)
-{
- struct fld_cache_entry *flde;
- struct fld_cache_entry *got = NULL;
- struct list_head *head;
-
- head = &cache->fci_entries_head;
- list_for_each_entry(flde, head, fce_list) {
- if (range->lsr_start == flde->fce_range.lsr_start ||
- (range->lsr_end == flde->fce_range.lsr_end &&
- range->lsr_flags == flde->fce_range.lsr_flags)) {
- got = flde;
- break;
- }
- }
-
- return got;
-}
-
-/**
- * lookup \a seq sequence for range in fld cache.
- */
-struct fld_cache_entry
-*fld_cache_entry_lookup(struct fld_cache *cache, struct lu_seq_range *range)
-{
- struct fld_cache_entry *got = NULL;
-
- read_lock(&cache->fci_lock);
- got = fld_cache_entry_lookup_nolock(cache, range);
- read_unlock(&cache->fci_lock);
- return got;
-}
-
-/**
- * lookup \a seq sequence for range in fld cache.
- */
-int fld_cache_lookup(struct fld_cache *cache,
- const u64 seq, struct lu_seq_range *range)
-{
- struct fld_cache_entry *flde;
- struct fld_cache_entry *prev = NULL;
- struct list_head *head;
-
- read_lock(&cache->fci_lock);
- head = &cache->fci_entries_head;
-
- cache->fci_stat.fst_count++;
- list_for_each_entry(flde, head, fce_list) {
- if (flde->fce_range.lsr_start > seq) {
- if (prev)
- *range = prev->fce_range;
- break;
- }
-
- prev = flde;
- if (lu_seq_range_within(&flde->fce_range, seq)) {
- *range = flde->fce_range;
-
- cache->fci_stat.fst_cache++;
- read_unlock(&cache->fci_lock);
- return 0;
- }
- }
- read_unlock(&cache->fci_lock);
- return -ENOENT;
-}
diff --git a/drivers/staging/lustre/lustre/fld/fld_internal.h b/drivers/staging/lustre/lustre/fld/fld_internal.h
deleted file mode 100644
index e1d6aaa5c2b4..000000000000
--- a/drivers/staging/lustre/lustre/fld/fld_internal.h
+++ /dev/null
@@ -1,170 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/fld/fld_internal.h
- *
- * Subsystem Description:
- * FLD is FID Location Database, which stores where (IE, on which MDT)
- * FIDs are located.
- * The database is basically a record file, each record consists of a FID
- * sequence range, MDT/OST index, and flags. The FLD for the whole FS
- * is only stored on the sequence controller(MDT0) right now, but each target
- * also has its local FLD, which only stores the local sequence.
- *
- * The FLD subsystem usually has two tasks:
- * 1. maintain the database, i.e. when the sequence controller allocates
- * new sequence ranges to some nodes, it will call the FLD API to insert the
- * location information <sequence_range, node_index> in FLDB.
- *
- * 2. Handle requests from other nodes, i.e. if client needs to know where
- * the FID is located, if it can not find the information in the local cache,
- * it will send a FLD lookup RPC to the FLD service, and the FLD service will
- * look up the FLDB entry and return the location information to client.
- *
- *
- * Author: Yury Umanets <umka@clusterfs.com>
- * Author: Tom WangDi <wangdi@clusterfs.com>
- */
-#ifndef __FLD_INTERNAL_H
-#define __FLD_INTERNAL_H
-
-#include <uapi/linux/lustre/lustre_idl.h>
-
-#include <lustre_req_layout.h>
-#include <lustre_fld.h>
-
-struct fld_stats {
- __u64 fst_count;
- __u64 fst_cache;
- __u64 fst_inflight;
-};
-
-struct lu_fld_hash {
- const char *fh_name;
- int (*fh_hash_func)(struct lu_client_fld *, __u64);
- struct lu_fld_target *(*fh_scan_func)(struct lu_client_fld *, __u64);
-};
-
-struct fld_cache_entry {
- struct list_head fce_lru;
- struct list_head fce_list;
- /** fld cache entries are sorted on range->lsr_start field. */
- struct lu_seq_range fce_range;
-};
-
-struct fld_cache {
- /**
- * Cache guard, protects fci_hash mostly because others immutable after
- * init is finished.
- */
- rwlock_t fci_lock;
-
- /** Cache shrink threshold */
- int fci_threshold;
-
- /** Preferred number of cached entries */
- int fci_cache_size;
-
- /** Current number of cached entries. Protected by \a fci_lock */
- int fci_cache_count;
-
- /** LRU list fld entries. */
- struct list_head fci_lru;
-
- /** sorted fld entries. */
- struct list_head fci_entries_head;
-
- /** Cache statistics. */
- struct fld_stats fci_stat;
-
- /** Cache name used for debug and messages. */
- char fci_name[LUSTRE_MDT_MAXNAMELEN];
- unsigned int fci_no_shrink:1;
-};
-
-enum {
- /* 4M of FLD cache will not hurt client a lot. */
- FLD_SERVER_CACHE_SIZE = (4 * 0x100000),
-
- /* 1M of FLD cache will not hurt client a lot. */
- FLD_CLIENT_CACHE_SIZE = (1 * 0x100000)
-};
-
-enum {
- /* Cache threshold is 10 percent of size. */
- FLD_SERVER_CACHE_THRESHOLD = 10,
-
- /* Cache threshold is 10 percent of size. */
- FLD_CLIENT_CACHE_THRESHOLD = 10
-};
-
-extern struct lu_fld_hash fld_hash[];
-
-int fld_client_rpc(struct obd_export *exp,
- struct lu_seq_range *range, __u32 fld_op,
- struct ptlrpc_request **reqp);
-
-extern struct lprocfs_vars fld_client_debugfs_list[];
-
-struct fld_cache *fld_cache_init(const char *name,
- int cache_size, int cache_threshold);
-
-void fld_cache_fini(struct fld_cache *cache);
-
-void fld_cache_flush(struct fld_cache *cache);
-
-int fld_cache_insert(struct fld_cache *cache,
- const struct lu_seq_range *range);
-
-struct fld_cache_entry
-*fld_cache_entry_create(const struct lu_seq_range *range);
-
-int fld_cache_lookup(struct fld_cache *cache,
- const u64 seq, struct lu_seq_range *range);
-
-struct fld_cache_entry*
-fld_cache_entry_lookup(struct fld_cache *cache, struct lu_seq_range *range);
-
-struct fld_cache_entry
-*fld_cache_entry_lookup_nolock(struct fld_cache *cache,
- struct lu_seq_range *range);
-
-static inline const char *
-fld_target_name(struct lu_fld_target *tar)
-{
- if (tar->ft_srv)
- return tar->ft_srv->lsf_name;
-
- return (const char *)tar->ft_exp->exp_obd->obd_name;
-}
-
-#endif /* __FLD_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/fld/fld_request.c b/drivers/staging/lustre/lustre/fld/fld_request.c
deleted file mode 100644
index 97f7ea632346..000000000000
--- a/drivers/staging/lustre/lustre/fld/fld_request.c
+++ /dev/null
@@ -1,446 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/fld/fld_request.c
- *
- * FLD (Fids Location Database)
- *
- * Author: Yury Umanets <umka@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_FLD
-
-#include <linux/module.h>
-#include <asm/div64.h>
-
-#include <obd.h>
-#include <obd_class.h>
-#include <uapi/linux/lustre/lustre_ver.h>
-#include <obd_support.h>
-#include <lprocfs_status.h>
-
-#include <lustre_req_layout.h>
-#include <lustre_fld.h>
-#include <lustre_mdc.h>
-#include "fld_internal.h"
-
-static int fld_rrb_hash(struct lu_client_fld *fld, u64 seq)
-{
- LASSERT(fld->lcf_count > 0);
- return do_div(seq, fld->lcf_count);
-}
-
-static struct lu_fld_target *
-fld_rrb_scan(struct lu_client_fld *fld, u64 seq)
-{
- struct lu_fld_target *target;
- int hash;
-
- /* Because almost all of special sequence located in MDT0,
- * it should go to index 0 directly, instead of calculating
- * hash again, and also if other MDTs is not being connected,
- * the fld lookup requests(for seq on MDT0) should not be
- * blocked because of other MDTs
- */
- if (fid_seq_is_norm(seq))
- hash = fld_rrb_hash(fld, seq);
- else
- hash = 0;
-
-again:
- list_for_each_entry(target, &fld->lcf_targets, ft_chain) {
- if (target->ft_idx == hash)
- return target;
- }
-
- if (hash != 0) {
- /* It is possible the remote target(MDT) are not connected to
- * with client yet, so we will refer this to MDT0, which should
- * be connected during mount
- */
- hash = 0;
- goto again;
- }
-
- CERROR("%s: Can't find target by hash %d (seq %#llx). Targets (%d):\n",
- fld->lcf_name, hash, seq, fld->lcf_count);
-
- list_for_each_entry(target, &fld->lcf_targets, ft_chain) {
- const char *srv_name = target->ft_srv ?
- target->ft_srv->lsf_name : "<null>";
- const char *exp_name = target->ft_exp ?
- (char *)target->ft_exp->exp_obd->obd_uuid.uuid :
- "<null>";
-
- CERROR(" exp: 0x%p (%s), srv: 0x%p (%s), idx: %llu\n",
- target->ft_exp, exp_name, target->ft_srv,
- srv_name, target->ft_idx);
- }
-
- /*
- * If target is not found, there is logical error anyway, so here is
- * LBUG() to catch this situation.
- */
- LBUG();
- return NULL;
-}
-
-struct lu_fld_hash fld_hash[] = {
- {
- .fh_name = "RRB",
- .fh_hash_func = fld_rrb_hash,
- .fh_scan_func = fld_rrb_scan
- },
- {
- NULL,
- }
-};
-
-static struct lu_fld_target *
-fld_client_get_target(struct lu_client_fld *fld, u64 seq)
-{
- struct lu_fld_target *target;
-
- LASSERT(fld->lcf_hash);
-
- spin_lock(&fld->lcf_lock);
- target = fld->lcf_hash->fh_scan_func(fld, seq);
- spin_unlock(&fld->lcf_lock);
-
- if (target) {
- CDEBUG(D_INFO, "%s: Found target (idx %llu) by seq %#llx\n",
- fld->lcf_name, target->ft_idx, seq);
- }
-
- return target;
-}
-
-/*
- * Add export to FLD. This is usually done by CMM and LMV as they are main users
- * of FLD module.
- */
-int fld_client_add_target(struct lu_client_fld *fld,
- struct lu_fld_target *tar)
-{
- const char *name;
- struct lu_fld_target *target, *tmp;
-
- LASSERT(tar);
- name = fld_target_name(tar);
- LASSERT(name);
- LASSERT(tar->ft_srv || tar->ft_exp);
-
- CDEBUG(D_INFO, "%s: Adding target %s (idx %llu)\n",
- fld->lcf_name, name, tar->ft_idx);
-
- target = kzalloc(sizeof(*target), GFP_NOFS);
- if (!target)
- return -ENOMEM;
-
- spin_lock(&fld->lcf_lock);
- list_for_each_entry(tmp, &fld->lcf_targets, ft_chain) {
- if (tmp->ft_idx == tar->ft_idx) {
- spin_unlock(&fld->lcf_lock);
- kfree(target);
- CERROR("Target %s exists in FLD and known as %s:#%llu\n",
- name, fld_target_name(tmp), tmp->ft_idx);
- return -EEXIST;
- }
- }
-
- target->ft_exp = tar->ft_exp;
- if (target->ft_exp)
- class_export_get(target->ft_exp);
- target->ft_srv = tar->ft_srv;
- target->ft_idx = tar->ft_idx;
-
- list_add_tail(&target->ft_chain, &fld->lcf_targets);
-
- fld->lcf_count++;
- spin_unlock(&fld->lcf_lock);
-
- return 0;
-}
-EXPORT_SYMBOL(fld_client_add_target);
-
-/* Remove export from FLD */
-int fld_client_del_target(struct lu_client_fld *fld, __u64 idx)
-{
- struct lu_fld_target *target, *tmp;
-
- spin_lock(&fld->lcf_lock);
- list_for_each_entry_safe(target, tmp, &fld->lcf_targets, ft_chain) {
- if (target->ft_idx == idx) {
- fld->lcf_count--;
- list_del(&target->ft_chain);
- spin_unlock(&fld->lcf_lock);
-
- if (target->ft_exp)
- class_export_put(target->ft_exp);
-
- kfree(target);
- return 0;
- }
- }
- spin_unlock(&fld->lcf_lock);
- return -ENOENT;
-}
-
-static struct dentry *fld_debugfs_dir;
-
-static void fld_client_debugfs_init(struct lu_client_fld *fld)
-{
- fld->lcf_debugfs_entry = debugfs_create_dir(fld->lcf_name,
- fld_debugfs_dir);
-
- ldebugfs_add_vars(fld->lcf_debugfs_entry, fld_client_debugfs_list, fld);
-}
-
-void fld_client_debugfs_fini(struct lu_client_fld *fld)
-{
- debugfs_remove_recursive(fld->lcf_debugfs_entry);
-}
-EXPORT_SYMBOL(fld_client_debugfs_fini);
-
-static inline int hash_is_sane(int hash)
-{
- return (hash >= 0 && hash < ARRAY_SIZE(fld_hash));
-}
-
-int fld_client_init(struct lu_client_fld *fld,
- const char *prefix, int hash)
-{
- int cache_size, cache_threshold;
- int rc = 0;
-
- snprintf(fld->lcf_name, sizeof(fld->lcf_name),
- "cli-%s", prefix);
-
- if (!hash_is_sane(hash)) {
- CERROR("%s: Wrong hash function %#x\n",
- fld->lcf_name, hash);
- return -EINVAL;
- }
-
- fld->lcf_count = 0;
- spin_lock_init(&fld->lcf_lock);
- fld->lcf_hash = &fld_hash[hash];
- INIT_LIST_HEAD(&fld->lcf_targets);
-
- cache_size = FLD_CLIENT_CACHE_SIZE /
- sizeof(struct fld_cache_entry);
-
- cache_threshold = cache_size *
- FLD_CLIENT_CACHE_THRESHOLD / 100;
-
- fld->lcf_cache = fld_cache_init(fld->lcf_name,
- cache_size, cache_threshold);
- if (IS_ERR(fld->lcf_cache)) {
- rc = PTR_ERR(fld->lcf_cache);
- fld->lcf_cache = NULL;
- goto out;
- }
-
- fld_client_debugfs_init(fld);
-out:
- CDEBUG(D_INFO, "%s: Using \"%s\" hash\n",
- fld->lcf_name, fld->lcf_hash->fh_name);
- return rc;
-}
-EXPORT_SYMBOL(fld_client_init);
-
-void fld_client_fini(struct lu_client_fld *fld)
-{
- struct lu_fld_target *target, *tmp;
-
- spin_lock(&fld->lcf_lock);
- list_for_each_entry_safe(target, tmp, &fld->lcf_targets, ft_chain) {
- fld->lcf_count--;
- list_del(&target->ft_chain);
- if (target->ft_exp)
- class_export_put(target->ft_exp);
- kfree(target);
- }
- spin_unlock(&fld->lcf_lock);
-
- if (fld->lcf_cache) {
- if (!IS_ERR(fld->lcf_cache))
- fld_cache_fini(fld->lcf_cache);
- fld->lcf_cache = NULL;
- }
-}
-EXPORT_SYMBOL(fld_client_fini);
-
-int fld_client_rpc(struct obd_export *exp,
- struct lu_seq_range *range, __u32 fld_op,
- struct ptlrpc_request **reqp)
-{
- struct ptlrpc_request *req = NULL;
- struct lu_seq_range *prange;
- __u32 *op;
- int rc = 0;
- struct obd_import *imp;
-
- LASSERT(exp);
-
- imp = class_exp2cliimp(exp);
- switch (fld_op) {
- case FLD_QUERY:
- req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_QUERY,
- LUSTRE_MDS_VERSION, FLD_QUERY);
- if (!req)
- return -ENOMEM;
-
- /*
- * XXX: only needed when talking to old server(< 2.6), it should
- * be removed when < 2.6 server is not supported
- */
- op = req_capsule_client_get(&req->rq_pill, &RMF_FLD_OPC);
- *op = FLD_LOOKUP;
-
- if (imp->imp_connect_flags_orig & OBD_CONNECT_MDS_MDS)
- req->rq_allow_replay = 1;
- break;
- case FLD_READ:
- req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_READ,
- LUSTRE_MDS_VERSION, FLD_READ);
- if (!req)
- return -ENOMEM;
-
- req_capsule_set_size(&req->rq_pill, &RMF_GENERIC_DATA,
- RCL_SERVER, PAGE_SIZE);
- break;
- default:
- rc = -EINVAL;
- break;
- }
- if (rc)
- return rc;
-
- prange = req_capsule_client_get(&req->rq_pill, &RMF_FLD_MDFLD);
- *prange = *range;
- ptlrpc_request_set_replen(req);
- req->rq_request_portal = FLD_REQUEST_PORTAL;
- req->rq_reply_portal = MDC_REPLY_PORTAL;
- ptlrpc_at_set_req_timeout(req);
-
- obd_get_request_slot(&exp->exp_obd->u.cli);
- rc = ptlrpc_queue_wait(req);
- obd_put_request_slot(&exp->exp_obd->u.cli);
- if (rc)
- goto out_req;
-
- if (fld_op == FLD_QUERY) {
- prange = req_capsule_server_get(&req->rq_pill, &RMF_FLD_MDFLD);
- if (!prange) {
- rc = -EFAULT;
- goto out_req;
- }
- *range = *prange;
- }
-
-out_req:
- if (rc || !reqp) {
- ptlrpc_req_finished(req);
- req = NULL;
- }
-
- if (reqp)
- *reqp = req;
-
- return rc;
-}
-
-int fld_client_lookup(struct lu_client_fld *fld, u64 seq, u32 *mds,
- __u32 flags, const struct lu_env *env)
-{
- struct lu_seq_range res = { 0 };
- struct lu_fld_target *target;
- int rc;
-
- rc = fld_cache_lookup(fld->lcf_cache, seq, &res);
- if (rc == 0) {
- *mds = res.lsr_index;
- return 0;
- }
-
- /* Can not find it in the cache */
- target = fld_client_get_target(fld, seq);
- LASSERT(target);
-
- CDEBUG(D_INFO,
- "%s: Lookup fld entry (seq: %#llx) on target %s (idx %llu)\n",
- fld->lcf_name, seq, fld_target_name(target), target->ft_idx);
-
- res.lsr_start = seq;
- fld_range_set_type(&res, flags);
- rc = fld_client_rpc(target->ft_exp, &res, FLD_QUERY, NULL);
-
- if (rc == 0) {
- *mds = res.lsr_index;
-
- fld_cache_insert(fld->lcf_cache, &res);
- }
- return rc;
-}
-EXPORT_SYMBOL(fld_client_lookup);
-
-void fld_client_flush(struct lu_client_fld *fld)
-{
- fld_cache_flush(fld->lcf_cache);
-}
-
-static int __init fld_init(void)
-{
- int rc;
-
- rc = libcfs_setup();
- if (rc)
- return rc;
-
- fld_debugfs_dir = debugfs_create_dir(LUSTRE_FLD_NAME,
- debugfs_lustre_root);
- return 0;
-}
-
-static void __exit fld_exit(void)
-{
- debugfs_remove_recursive(fld_debugfs_dir);
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre FID Location Database");
-MODULE_VERSION(LUSTRE_VERSION_STRING);
-MODULE_LICENSE("GPL");
-
-module_init(fld_init)
-module_exit(fld_exit)
diff --git a/drivers/staging/lustre/lustre/fld/lproc_fld.c b/drivers/staging/lustre/lustre/fld/lproc_fld.c
deleted file mode 100644
index 0bcfb26ef8aa..000000000000
--- a/drivers/staging/lustre/lustre/fld/lproc_fld.c
+++ /dev/null
@@ -1,154 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/fld/lproc_fld.c
- *
- * FLD (FIDs Location Database)
- *
- * Author: Yury Umanets <umka@clusterfs.com>
- * Di Wang <di.wang@whamcloud.com>
- */
-
-#define DEBUG_SUBSYSTEM S_FLD
-
-#include <linux/module.h>
-
-#include <obd.h>
-#include <obd_class.h>
-#include <obd_support.h>
-#include <lustre_req_layout.h>
-#include <lustre_fld.h>
-#include <lustre_fid.h>
-#include "fld_internal.h"
-
-static int
-fld_debugfs_targets_seq_show(struct seq_file *m, void *unused)
-{
- struct lu_client_fld *fld = (struct lu_client_fld *)m->private;
- struct lu_fld_target *target;
-
- spin_lock(&fld->lcf_lock);
- list_for_each_entry(target, &fld->lcf_targets, ft_chain)
- seq_printf(m, "%s\n", fld_target_name(target));
- spin_unlock(&fld->lcf_lock);
-
- return 0;
-}
-
-static int
-fld_debugfs_hash_seq_show(struct seq_file *m, void *unused)
-{
- struct lu_client_fld *fld = (struct lu_client_fld *)m->private;
-
- spin_lock(&fld->lcf_lock);
- seq_printf(m, "%s\n", fld->lcf_hash->fh_name);
- spin_unlock(&fld->lcf_lock);
-
- return 0;
-}
-
-static ssize_t
-fld_debugfs_hash_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct lu_client_fld *fld;
- struct lu_fld_hash *hash = NULL;
- char fh_name[8];
- int i;
-
- if (count > sizeof(fh_name))
- return -ENAMETOOLONG;
-
- if (copy_from_user(fh_name, buffer, count) != 0)
- return -EFAULT;
-
- fld = ((struct seq_file *)file->private_data)->private;
-
- for (i = 0; fld_hash[i].fh_name; i++) {
- if (count != strlen(fld_hash[i].fh_name))
- continue;
-
- if (!strncmp(fld_hash[i].fh_name, fh_name, count)) {
- hash = &fld_hash[i];
- break;
- }
- }
-
- if (hash) {
- spin_lock(&fld->lcf_lock);
- fld->lcf_hash = hash;
- spin_unlock(&fld->lcf_lock);
-
- CDEBUG(D_INFO, "%s: Changed hash to \"%s\"\n",
- fld->lcf_name, hash->fh_name);
- }
-
- return count;
-}
-
-static ssize_t
-fld_debugfs_cache_flush_write(struct file *file, const char __user *buffer,
- size_t count, loff_t *pos)
-{
- struct lu_client_fld *fld = file->private_data;
-
- fld_cache_flush(fld->lcf_cache);
-
- CDEBUG(D_INFO, "%s: Lookup cache is flushed\n", fld->lcf_name);
-
- return count;
-}
-
-static int
-fld_debugfs_cache_flush_release(struct inode *inode, struct file *file)
-{
- file->private_data = NULL;
- return 0;
-}
-
-static const struct file_operations fld_debugfs_cache_flush_fops = {
- .owner = THIS_MODULE,
- .open = simple_open,
- .write = fld_debugfs_cache_flush_write,
- .release = fld_debugfs_cache_flush_release,
-};
-
-LPROC_SEQ_FOPS_RO(fld_debugfs_targets);
-LPROC_SEQ_FOPS(fld_debugfs_hash);
-
-struct lprocfs_vars fld_client_debugfs_list[] = {
- { "targets", &fld_debugfs_targets_fops },
- { "hash", &fld_debugfs_hash_fops },
- { "cache_flush", &fld_debugfs_cache_flush_fops },
- { NULL }
-};
diff --git a/drivers/staging/lustre/lustre/include/cl_object.h b/drivers/staging/lustre/lustre/include/cl_object.h
deleted file mode 100644
index 6f7b991be809..000000000000
--- a/drivers/staging/lustre/lustre/include/cl_object.h
+++ /dev/null
@@ -1,2463 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-#ifndef _LUSTRE_CL_OBJECT_H
-#define _LUSTRE_CL_OBJECT_H
-
-/** \defgroup clio clio
- *
- * Client objects implement io operations and cache pages.
- *
- * Examples: lov and osc are implementations of cl interface.
- *
- * Big Theory Statement.
- *
- * Layered objects.
- *
- * Client implementation is based on the following data-types:
- *
- * - cl_object
- *
- * - cl_page
- *
- * - cl_lock represents an extent lock on an object.
- *
- * - cl_io represents high-level i/o activity such as whole read/write
- * system call, or write-out of pages from under the lock being
- * canceled. cl_io has sub-ios that can be stopped and resumed
- * independently, thus achieving high degree of transfer
- * parallelism. Single cl_io can be advanced forward by
- * the multiple threads (although in the most usual case of
- * read/write system call it is associated with the single user
- * thread, that issued the system call).
- *
- * Terminology
- *
- * - to avoid confusion high-level I/O operation like read or write system
- * call is referred to as "an io", whereas low-level I/O operation, like
- * RPC, is referred to as "a transfer"
- *
- * - "generic code" means generic (not file system specific) code in the
- * hosting environment. "cl-code" means code (mostly in cl_*.c files) that
- * is not layer specific.
- *
- * Locking.
- *
- * - i_mutex
- * - PG_locked
- * - cl_object_header::coh_page_guard
- * - lu_site::ls_guard
- *
- * See the top comment in cl_object.c for the description of overall locking and
- * reference-counting design.
- *
- * See comments below for the description of i/o, page, and dlm-locking
- * design.
- *
- * @{
- */
-
-/*
- * super-class definitions.
- */
-#include <lu_object.h>
-#include <lustre_compat.h>
-#include <linux/atomic.h>
-#include <linux/mutex.h>
-#include <linux/radix-tree.h>
-#include <linux/spinlock.h>
-#include <linux/wait.h>
-
-struct inode;
-
-struct cl_device;
-
-struct cl_object;
-
-struct cl_page;
-struct cl_page_slice;
-struct cl_lock;
-struct cl_lock_slice;
-
-struct cl_lock_operations;
-struct cl_page_operations;
-
-struct cl_io;
-struct cl_io_slice;
-
-struct cl_req_attr;
-
-/**
- * Device in the client stack.
- *
- * \see vvp_device, lov_device, lovsub_device, osc_device
- */
-struct cl_device {
- /** Super-class. */
- struct lu_device cd_lu_dev;
-};
-
-/** \addtogroup cl_object cl_object
- * @{
- */
-/**
- * "Data attributes" of cl_object. Data attributes can be updated
- * independently for a sub-object, and top-object's attributes are calculated
- * from sub-objects' ones.
- */
-struct cl_attr {
- /** Object size, in bytes */
- loff_t cat_size;
- /**
- * Known minimal size, in bytes.
- *
- * This is only valid when at least one DLM lock is held.
- */
- loff_t cat_kms;
- /** Modification time. Measured in seconds since epoch. */
- time64_t cat_mtime;
- /** Access time. Measured in seconds since epoch. */
- time64_t cat_atime;
- /** Change time. Measured in seconds since epoch. */
- time64_t cat_ctime;
- /**
- * Blocks allocated to this cl_object on the server file system.
- *
- * \todo XXX An interface for block size is needed.
- */
- __u64 cat_blocks;
- /**
- * User identifier for quota purposes.
- */
- uid_t cat_uid;
- /**
- * Group identifier for quota purposes.
- */
- gid_t cat_gid;
-
- /* nlink of the directory */
- __u64 cat_nlink;
-};
-
-/**
- * Fields in cl_attr that are being set.
- */
-enum cl_attr_valid {
- CAT_SIZE = 1 << 0,
- CAT_KMS = 1 << 1,
- CAT_MTIME = 1 << 3,
- CAT_ATIME = 1 << 4,
- CAT_CTIME = 1 << 5,
- CAT_BLOCKS = 1 << 6,
- CAT_UID = 1 << 7,
- CAT_GID = 1 << 8
-};
-
-/**
- * Sub-class of lu_object with methods common for objects on the client
- * stacks.
- *
- * cl_object: represents a regular file system object, both a file and a
- * stripe. cl_object is based on lu_object: it is identified by a fid,
- * layered, cached, hashed, and lrued. Important distinction with the server
- * side, where md_object and dt_object are used, is that cl_object "fans out"
- * at the lov/sns level: depending on the file layout, single file is
- * represented as a set of "sub-objects" (stripes). At the implementation
- * level, struct lov_object contains an array of cl_objects. Each sub-object
- * is a full-fledged cl_object, having its fid, living in the lru and hash
- * table.
- *
- * This leads to the next important difference with the server side: on the
- * client, it's quite usual to have objects with the different sequence of
- * layers. For example, typical top-object is composed of the following
- * layers:
- *
- * - vvp
- * - lov
- *
- * whereas its sub-objects are composed of
- *
- * - lovsub
- * - osc
- *
- * layers. Here "lovsub" is a mostly dummy layer, whose purpose is to keep
- * track of the object-subobject relationship.
- *
- * Sub-objects are not cached independently: when top-object is about to
- * be discarded from the memory, all its sub-objects are torn-down and
- * destroyed too.
- *
- * \see vvp_object, lov_object, lovsub_object, osc_object
- */
-struct cl_object {
- /** super class */
- struct lu_object co_lu;
- /** per-object-layer operations */
- const struct cl_object_operations *co_ops;
- /** offset of page slice in cl_page buffer */
- int co_slice_off;
-};
-
-/**
- * Description of the client object configuration. This is used for the
- * creation of a new client object that is identified by a more state than
- * fid.
- */
-struct cl_object_conf {
- /** Super-class. */
- struct lu_object_conf coc_lu;
- union {
- /**
- * Object layout. This is consumed by lov.
- */
- struct lu_buf coc_layout;
- /**
- * Description of particular stripe location in the
- * cluster. This is consumed by osc.
- */
- struct lov_oinfo *coc_oinfo;
- } u;
- /**
- * VFS inode. This is consumed by vvp.
- */
- struct inode *coc_inode;
- /**
- * Layout lock handle.
- */
- struct ldlm_lock *coc_lock;
- /**
- * Operation to handle layout, OBJECT_CONF_XYZ.
- */
- int coc_opc;
-};
-
-enum {
- /** configure layout, set up a new stripe, must be called while
- * holding layout lock.
- */
- OBJECT_CONF_SET = 0,
- /** invalidate the current stripe configuration due to losing
- * layout lock.
- */
- OBJECT_CONF_INVALIDATE = 1,
- /** wait for old layout to go away so that new layout can be set up. */
- OBJECT_CONF_WAIT = 2
-};
-
-enum {
- CL_LAYOUT_GEN_NONE = (u32)-2, /* layout lock was cancelled */
- CL_LAYOUT_GEN_EMPTY = (u32)-1, /* for empty layout */
-};
-
-struct cl_layout {
- /** the buffer to return the layout in lov_mds_md format. */
- struct lu_buf cl_buf;
- /** size of layout in lov_mds_md format. */
- size_t cl_size;
- /** Layout generation. */
- u32 cl_layout_gen;
-};
-
-/**
- * Operations implemented for each cl object layer.
- *
- * \see vvp_ops, lov_ops, lovsub_ops, osc_ops
- */
-struct cl_object_operations {
- /**
- * Initialize page slice for this layer. Called top-to-bottom through
- * every object layer when a new cl_page is instantiated. Layer
- * keeping private per-page data, or requiring its own page operations
- * vector should allocate these data here, and attach then to the page
- * by calling cl_page_slice_add(). \a vmpage is locked (in the VM
- * sense). Optional.
- *
- * \retval NULL success.
- *
- * \retval ERR_PTR(errno) failure code.
- *
- * \retval valid-pointer pointer to already existing referenced page
- * to be used instead of newly created.
- */
- int (*coo_page_init)(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, pgoff_t index);
- /**
- * Initialize lock slice for this layer. Called top-to-bottom through
- * every object layer when a new cl_lock is instantiated. Layer
- * keeping private per-lock data, or requiring its own lock operations
- * vector should allocate these data here, and attach then to the lock
- * by calling cl_lock_slice_add(). Mandatory.
- */
- int (*coo_lock_init)(const struct lu_env *env,
- struct cl_object *obj, struct cl_lock *lock,
- const struct cl_io *io);
- /**
- * Initialize io state for a given layer.
- *
- * called top-to-bottom once per io existence to initialize io
- * state. If layer wants to keep some state for this type of io, it
- * has to embed struct cl_io_slice in lu_env::le_ses, and register
- * slice with cl_io_slice_add(). It is guaranteed that all threads
- * participating in this io share the same session.
- */
- int (*coo_io_init)(const struct lu_env *env,
- struct cl_object *obj, struct cl_io *io);
- /**
- * Fill portion of \a attr that this layer controls. This method is
- * called top-to-bottom through all object layers.
- *
- * \pre cl_object_header::coh_attr_guard of the top-object is locked.
- *
- * \return 0: to continue
- * \return +ve: to stop iterating through layers (but 0 is returned
- * from enclosing cl_object_attr_get())
- * \return -ve: to signal error
- */
- int (*coo_attr_get)(const struct lu_env *env, struct cl_object *obj,
- struct cl_attr *attr);
- /**
- * Update attributes.
- *
- * \a valid is a bitmask composed from enum #cl_attr_valid, and
- * indicating what attributes are to be set.
- *
- * \pre cl_object_header::coh_attr_guard of the top-object is locked.
- *
- * \return the same convention as for
- * cl_object_operations::coo_attr_get() is used.
- */
- int (*coo_attr_update)(const struct lu_env *env, struct cl_object *obj,
- const struct cl_attr *attr, unsigned int valid);
- /**
- * Update object configuration. Called top-to-bottom to modify object
- * configuration.
- *
- * XXX error conditions and handling.
- */
- int (*coo_conf_set)(const struct lu_env *env, struct cl_object *obj,
- const struct cl_object_conf *conf);
- /**
- * Glimpse ast. Executed when glimpse ast arrives for a lock on this
- * object. Layers are supposed to fill parts of \a lvb that will be
- * shipped to the glimpse originator as a glimpse result.
- *
- * \see vvp_object_glimpse(), lovsub_object_glimpse(),
- * \see osc_object_glimpse()
- */
- int (*coo_glimpse)(const struct lu_env *env,
- const struct cl_object *obj, struct ost_lvb *lvb);
- /**
- * Object prune method. Called when the layout is going to change on
- * this object, therefore each layer has to clean up their cache,
- * mainly pages and locks.
- */
- int (*coo_prune)(const struct lu_env *env, struct cl_object *obj);
- /**
- * Object getstripe method.
- */
- int (*coo_getstripe)(const struct lu_env *env, struct cl_object *obj,
- struct lov_user_md __user *lum);
- /**
- * Get FIEMAP mapping from the object.
- */
- int (*coo_fiemap)(const struct lu_env *env, struct cl_object *obj,
- struct ll_fiemap_info_key *fmkey,
- struct fiemap *fiemap, size_t *buflen);
- /**
- * Get layout and generation of the object.
- */
- int (*coo_layout_get)(const struct lu_env *env, struct cl_object *obj,
- struct cl_layout *layout);
- /**
- * Get maximum size of the object.
- */
- loff_t (*coo_maxbytes)(struct cl_object *obj);
- /**
- * Set request attributes.
- */
- void (*coo_req_attr_set)(const struct lu_env *env,
- struct cl_object *obj,
- struct cl_req_attr *attr);
-};
-
-/**
- * Extended header for client object.
- */
-struct cl_object_header {
- /** Standard lu_object_header. cl_object::co_lu::lo_header points
- * here.
- */
- struct lu_object_header coh_lu;
-
- /**
- * Parent object. It is assumed that an object has a well-defined
- * parent, but not a well-defined child (there may be multiple
- * sub-objects, for the same top-object). cl_object_header::coh_parent
- * field allows certain code to be written generically, without
- * limiting possible cl_object layouts unduly.
- */
- struct cl_object_header *coh_parent;
- /**
- * Protects consistency between cl_attr of parent object and
- * attributes of sub-objects, that the former is calculated ("merged")
- * from.
- *
- * \todo XXX this can be read/write lock if needed.
- */
- spinlock_t coh_attr_guard;
- /**
- * Size of cl_page + page slices
- */
- unsigned short coh_page_bufsize;
- /**
- * Number of objects above this one: 0 for a top-object, 1 for its
- * sub-object, etc.
- */
- unsigned char coh_nesting;
-};
-
-/**
- * Helper macro: iterate over all layers of the object \a obj, assigning every
- * layer top-to-bottom to \a slice.
- */
-#define cl_object_for_each(slice, obj) \
- list_for_each_entry((slice), \
- &(obj)->co_lu.lo_header->loh_layers, \
- co_lu.lo_linkage)
-/**
- * Helper macro: iterate over all layers of the object \a obj, assigning every
- * layer bottom-to-top to \a slice.
- */
-#define cl_object_for_each_reverse(slice, obj) \
- list_for_each_entry_reverse((slice), \
- &(obj)->co_lu.lo_header->loh_layers, \
- co_lu.lo_linkage)
-/** @} cl_object */
-
-#define CL_PAGE_EOF ((pgoff_t)~0ull)
-
-/** \addtogroup cl_page cl_page
- * @{
- */
-
-/** \struct cl_page
- * Layered client page.
- *
- * cl_page: represents a portion of a file, cached in the memory. All pages
- * of the given file are of the same size, and are kept in the radix tree
- * hanging off the cl_object. cl_page doesn't fan out, but as sub-objects
- * of the top-level file object are first class cl_objects, they have their
- * own radix trees of pages and hence page is implemented as a sequence of
- * struct cl_pages's, linked into double-linked list through
- * cl_page::cp_parent and cl_page::cp_child pointers, each residing in the
- * corresponding radix tree at the corresponding logical offset.
- *
- * cl_page is associated with VM page of the hosting environment (struct
- * page in Linux kernel, for example), struct page. It is assumed, that this
- * association is implemented by one of cl_page layers (top layer in the
- * current design) that
- *
- * - intercepts per-VM-page call-backs made by the environment (e.g.,
- * memory pressure),
- *
- * - translates state (page flag bits) and locking between lustre and
- * environment.
- *
- * The association between cl_page and struct page is immutable and
- * established when cl_page is created.
- *
- * cl_page can be "owned" by a particular cl_io (see below), guaranteeing
- * this io an exclusive access to this page w.r.t. other io attempts and
- * various events changing page state (such as transfer completion, or
- * eviction of the page from the memory). Note, that in general cl_io
- * cannot be identified with a particular thread, and page ownership is not
- * exactly equal to the current thread holding a lock on the page. Layer
- * implementing association between cl_page and struct page has to implement
- * ownership on top of available synchronization mechanisms.
- *
- * While lustre client maintains the notion of an page ownership by io,
- * hosting MM/VM usually has its own page concurrency control
- * mechanisms. For example, in Linux, page access is synchronized by the
- * per-page PG_locked bit-lock, and generic kernel code (generic_file_*())
- * takes care to acquire and release such locks as necessary around the
- * calls to the file system methods (->readpage(), ->prepare_write(),
- * ->commit_write(), etc.). This leads to the situation when there are two
- * different ways to own a page in the client:
- *
- * - client code explicitly and voluntary owns the page (cl_page_own());
- *
- * - VM locks a page and then calls the client, that has "to assume"
- * the ownership from the VM (cl_page_assume()).
- *
- * Dual methods to release ownership are cl_page_disown() and
- * cl_page_unassume().
- *
- * cl_page is reference counted (cl_page::cp_ref). When reference counter
- * drops to 0, the page is returned to the cache, unless it is in
- * cl_page_state::CPS_FREEING state, in which case it is immediately
- * destroyed.
- *
- * The general logic guaranteeing the absence of "existential races" for
- * pages is the following:
- *
- * - there are fixed known ways for a thread to obtain a new reference
- * to a page:
- *
- * - by doing a lookup in the cl_object radix tree, protected by the
- * spin-lock;
- *
- * - by starting from VM-locked struct page and following some
- * hosting environment method (e.g., following ->private pointer in
- * the case of Linux kernel), see cl_vmpage_page();
- *
- * - when the page enters cl_page_state::CPS_FREEING state, all these
- * ways are severed with the proper synchronization
- * (cl_page_delete());
- *
- * - entry into cl_page_state::CPS_FREEING is serialized by the VM page
- * lock;
- *
- * - no new references to the page in cl_page_state::CPS_FREEING state
- * are allowed (checked in cl_page_get()).
- *
- * Together this guarantees that when last reference to a
- * cl_page_state::CPS_FREEING page is released, it is safe to destroy the
- * page, as neither references to it can be acquired at that point, nor
- * ones exist.
- *
- * cl_page is a state machine. States are enumerated in enum
- * cl_page_state. Possible state transitions are enumerated in
- * cl_page_state_set(). State transition process (i.e., actual changing of
- * cl_page::cp_state field) is protected by the lock on the underlying VM
- * page.
- *
- * Linux Kernel implementation.
- *
- * Binding between cl_page and struct page (which is a typedef for
- * struct page) is implemented in the vvp layer. cl_page is attached to the
- * ->private pointer of the struct page, together with the setting of
- * PG_private bit in page->flags, and acquiring additional reference on the
- * struct page (much like struct buffer_head, or any similar file system
- * private data structures).
- *
- * PG_locked lock is used to implement both ownership and transfer
- * synchronization, that is, page is VM-locked in CPS_{OWNED,PAGE{IN,OUT}}
- * states. No additional references are acquired for the duration of the
- * transfer.
- *
- * \warning *THIS IS NOT* the behavior expected by the Linux kernel, where
- * write-out is "protected" by the special PG_writeback bit.
- */
-
-/**
- * States of cl_page. cl_page.c assumes particular order here.
- *
- * The page state machine is rather crude, as it doesn't recognize finer page
- * states like "dirty" or "up to date". This is because such states are not
- * always well defined for the whole stack (see, for example, the
- * implementation of the read-ahead, that hides page up-to-dateness to track
- * cache hits accurately). Such sub-states are maintained by the layers that
- * are interested in them.
- */
-enum cl_page_state {
- /**
- * Page is in the cache, un-owned. Page leaves cached state in the
- * following cases:
- *
- * - [cl_page_state::CPS_OWNED] io comes across the page and
- * owns it;
- *
- * - [cl_page_state::CPS_PAGEOUT] page is dirty, the
- * req-formation engine decides that it wants to include this page
- * into an RPC being constructed, and yanks it from the cache;
- *
- * - [cl_page_state::CPS_FREEING] VM callback is executed to
- * evict the page form the memory;
- *
- * \invariant cl_page::cp_owner == NULL && cl_page::cp_req == NULL
- */
- CPS_CACHED,
- /**
- * Page is exclusively owned by some cl_io. Page may end up in this
- * state as a result of
- *
- * - io creating new page and immediately owning it;
- *
- * - [cl_page_state::CPS_CACHED] io finding existing cached page
- * and owning it;
- *
- * - [cl_page_state::CPS_OWNED] io finding existing owned page
- * and waiting for owner to release the page;
- *
- * Page leaves owned state in the following cases:
- *
- * - [cl_page_state::CPS_CACHED] io decides to leave the page in
- * the cache, doing nothing;
- *
- * - [cl_page_state::CPS_PAGEIN] io starts read transfer for
- * this page;
- *
- * - [cl_page_state::CPS_PAGEOUT] io starts immediate write
- * transfer for this page;
- *
- * - [cl_page_state::CPS_FREEING] io decides to destroy this
- * page (e.g., as part of truncate or extent lock cancellation).
- *
- * \invariant cl_page::cp_owner != NULL && cl_page::cp_req == NULL
- */
- CPS_OWNED,
- /**
- * Page is being written out, as a part of a transfer. This state is
- * entered when req-formation logic decided that it wants this page to
- * be sent through the wire _now_. Specifically, it means that once
- * this state is achieved, transfer completion handler (with either
- * success or failure indication) is guaranteed to be executed against
- * this page independently of any locks and any scheduling decisions
- * made by the hosting environment (that effectively means that the
- * page is never put into cl_page_state::CPS_PAGEOUT state "in
- * advance". This property is mentioned, because it is important when
- * reasoning about possible dead-locks in the system). The page can
- * enter this state as a result of
- *
- * - [cl_page_state::CPS_OWNED] an io requesting an immediate
- * write-out of this page, or
- *
- * - [cl_page_state::CPS_CACHED] req-forming engine deciding
- * that it has enough dirty pages cached to issue a "good"
- * transfer.
- *
- * The page leaves cl_page_state::CPS_PAGEOUT state when the transfer
- * is completed---it is moved into cl_page_state::CPS_CACHED state.
- *
- * Underlying VM page is locked for the duration of transfer.
- *
- * \invariant: cl_page::cp_owner == NULL && cl_page::cp_req != NULL
- */
- CPS_PAGEOUT,
- /**
- * Page is being read in, as a part of a transfer. This is quite
- * similar to the cl_page_state::CPS_PAGEOUT state, except that
- * read-in is always "immediate"---there is no such thing a sudden
- * construction of read request from cached, presumably not up to date,
- * pages.
- *
- * Underlying VM page is locked for the duration of transfer.
- *
- * \invariant: cl_page::cp_owner == NULL && cl_page::cp_req != NULL
- */
- CPS_PAGEIN,
- /**
- * Page is being destroyed. This state is entered when client decides
- * that page has to be deleted from its host object, as, e.g., a part
- * of truncate.
- *
- * Once this state is reached, there is no way to escape it.
- *
- * \invariant: cl_page::cp_owner == NULL && cl_page::cp_req == NULL
- */
- CPS_FREEING,
- CPS_NR
-};
-
-enum cl_page_type {
- /** Host page, the page is from the host inode which the cl_page
- * belongs to.
- */
- CPT_CACHEABLE = 1,
-
- /** Transient page, the transient cl_page is used to bind a cl_page
- * to vmpage which is not belonging to the same object of cl_page.
- * it is used in DirectIO and lockless IO.
- */
- CPT_TRANSIENT,
-};
-
-/**
- * Fields are protected by the lock on struct page, except for atomics and
- * immutables.
- *
- * \invariant Data type invariants are in cl_page_invariant(). Basically:
- * cl_page::cp_parent and cl_page::cp_child are a well-formed double-linked
- * list, consistent with the parent/child pointers in the cl_page::cp_obj and
- * cl_page::cp_owner (when set).
- */
-struct cl_page {
- /** Reference counter. */
- atomic_t cp_ref;
- /** An object this page is a part of. Immutable after creation. */
- struct cl_object *cp_obj;
- /** vmpage */
- struct page *cp_vmpage;
- /** Linkage of pages within group. Pages must be owned */
- struct list_head cp_batch;
- /** List of slices. Immutable after creation. */
- struct list_head cp_layers;
- /**
- * Page state. This field is const to avoid accidental update, it is
- * modified only internally within cl_page.c. Protected by a VM lock.
- */
- const enum cl_page_state cp_state;
- /**
- * Page type. Only CPT_TRANSIENT is used so far. Immutable after
- * creation.
- */
- enum cl_page_type cp_type;
-
- /**
- * Owning IO in cl_page_state::CPS_OWNED state. Sub-page can be owned
- * by sub-io. Protected by a VM lock.
- */
- struct cl_io *cp_owner;
- /** List of references to this page, for debugging. */
- struct lu_ref cp_reference;
- /** Link to an object, for debugging. */
- struct lu_ref_link cp_obj_ref;
- /** Link to a queue, for debugging. */
- struct lu_ref_link cp_queue_ref;
- /** Assigned if doing a sync_io */
- struct cl_sync_io *cp_sync_io;
-};
-
-/**
- * Per-layer part of cl_page.
- *
- * \see vvp_page, lov_page, osc_page
- */
-struct cl_page_slice {
- struct cl_page *cpl_page;
- pgoff_t cpl_index;
- /**
- * Object slice corresponding to this page slice. Immutable after
- * creation.
- */
- struct cl_object *cpl_obj;
- const struct cl_page_operations *cpl_ops;
- /** Linkage into cl_page::cp_layers. Immutable after creation. */
- struct list_head cpl_linkage;
-};
-
-/**
- * Lock mode. For the client extent locks.
- *
- * \ingroup cl_lock
- */
-enum cl_lock_mode {
- CLM_READ,
- CLM_WRITE,
- CLM_GROUP
-};
-
-/**
- * Requested transfer type.
- */
-enum cl_req_type {
- CRT_READ,
- CRT_WRITE,
- CRT_NR
-};
-
-/**
- * Per-layer page operations.
- *
- * Methods taking an \a io argument are for the activity happening in the
- * context of given \a io. Page is assumed to be owned by that io, except for
- * the obvious cases (like cl_page_operations::cpo_own()).
- *
- * \see vvp_page_ops, lov_page_ops, osc_page_ops
- */
-struct cl_page_operations {
- /**
- * cl_page<->struct page methods. Only one layer in the stack has to
- * implement these. Current code assumes that this functionality is
- * provided by the topmost layer, see cl_page_disown0() as an example.
- */
-
- /**
- * Called when \a io acquires this page into the exclusive
- * ownership. When this method returns, it is guaranteed that the is
- * not owned by other io, and no transfer is going on against
- * it. Optional.
- *
- * \see cl_page_own()
- * \see vvp_page_own(), lov_page_own()
- */
- int (*cpo_own)(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *io, int nonblock);
- /** Called when ownership it yielded. Optional.
- *
- * \see cl_page_disown()
- * \see vvp_page_disown()
- */
- void (*cpo_disown)(const struct lu_env *env,
- const struct cl_page_slice *slice, struct cl_io *io);
- /**
- * Called for a page that is already "owned" by \a io from VM point of
- * view. Optional.
- *
- * \see cl_page_assume()
- * \see vvp_page_assume(), lov_page_assume()
- */
- void (*cpo_assume)(const struct lu_env *env,
- const struct cl_page_slice *slice, struct cl_io *io);
- /** Dual to cl_page_operations::cpo_assume(). Optional. Called
- * bottom-to-top when IO releases a page without actually unlocking
- * it.
- *
- * \see cl_page_unassume()
- * \see vvp_page_unassume()
- */
- void (*cpo_unassume)(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *io);
- /**
- * Announces whether the page contains valid data or not by \a uptodate.
- *
- * \see cl_page_export()
- * \see vvp_page_export()
- */
- void (*cpo_export)(const struct lu_env *env,
- const struct cl_page_slice *slice, int uptodate);
- /**
- * Checks whether underlying VM page is locked (in the suitable
- * sense). Used for assertions.
- *
- * \retval -EBUSY: page is protected by a lock of a given mode;
- * \retval -ENODATA: page is not protected by a lock;
- * \retval 0: this layer cannot decide. (Should never happen.)
- */
- int (*cpo_is_vmlocked)(const struct lu_env *env,
- const struct cl_page_slice *slice);
- /**
- * Page destruction.
- */
-
- /**
- * Called when page is truncated from the object. Optional.
- *
- * \see cl_page_discard()
- * \see vvp_page_discard(), osc_page_discard()
- */
- void (*cpo_discard)(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *io);
- /**
- * Called when page is removed from the cache, and is about to being
- * destroyed. Optional.
- *
- * \see cl_page_delete()
- * \see vvp_page_delete(), osc_page_delete()
- */
- void (*cpo_delete)(const struct lu_env *env,
- const struct cl_page_slice *slice);
- /** Destructor. Frees resources and slice itself. */
- void (*cpo_fini)(const struct lu_env *env,
- struct cl_page_slice *slice);
- /**
- * Optional debugging helper. Prints given page slice.
- *
- * \see cl_page_print()
- */
- int (*cpo_print)(const struct lu_env *env,
- const struct cl_page_slice *slice,
- void *cookie, lu_printer_t p);
- /**
- * \name transfer
- *
- * Transfer methods.
- *
- * @{
- */
- /**
- * Request type dependent vector of operations.
- *
- * Transfer operations depend on transfer mode (cl_req_type). To avoid
- * passing transfer mode to each and every of these methods, and to
- * avoid branching on request type inside of the methods, separate
- * methods for cl_req_type:CRT_READ and cl_req_type:CRT_WRITE are
- * provided. That is, method invocation usually looks like
- *
- * slice->cp_ops.io[req->crq_type].cpo_method(env, slice, ...);
- */
- struct {
- /**
- * Called when a page is submitted for a transfer as a part of
- * cl_page_list.
- *
- * \return 0 : page is eligible for submission;
- * \return -EALREADY : skip this page;
- * \return -ve : error.
- *
- * \see cl_page_prep()
- */
- int (*cpo_prep)(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *io);
- /**
- * Completion handler. This is guaranteed to be eventually
- * fired after cl_page_operations::cpo_prep() or
- * cl_page_operations::cpo_make_ready() call.
- *
- * This method can be called in a non-blocking context. It is
- * guaranteed however, that the page involved and its object
- * are pinned in memory (and, hence, calling cl_page_put() is
- * safe).
- *
- * \see cl_page_completion()
- */
- void (*cpo_completion)(const struct lu_env *env,
- const struct cl_page_slice *slice,
- int ioret);
- /**
- * Called when cached page is about to be added to the
- * ptlrpc request as a part of req formation.
- *
- * \return 0 : proceed with this page;
- * \return -EAGAIN : skip this page;
- * \return -ve : error.
- *
- * \see cl_page_make_ready()
- */
- int (*cpo_make_ready)(const struct lu_env *env,
- const struct cl_page_slice *slice);
- } io[CRT_NR];
- /**
- * Tell transfer engine that only [to, from] part of a page should be
- * transmitted.
- *
- * This is used for immediate transfers.
- *
- * \todo XXX this is not very good interface. It would be much better
- * if all transfer parameters were supplied as arguments to
- * cl_io_operations::cio_submit() call, but it is not clear how to do
- * this for page queues.
- *
- * \see cl_page_clip()
- */
- void (*cpo_clip)(const struct lu_env *env,
- const struct cl_page_slice *slice,
- int from, int to);
- /**
- * \pre the page was queued for transferring.
- * \post page is removed from client's pending list, or -EBUSY
- * is returned if it has already been in transferring.
- *
- * This is one of seldom page operation which is:
- * 0. called from top level;
- * 1. don't have vmpage locked;
- * 2. every layer should synchronize execution of its ->cpo_cancel()
- * with completion handlers. Osc uses client obd lock for this
- * purpose. Based on there is no vvp_page_cancel and
- * lov_page_cancel(), cpo_cancel is defacto protected by client lock.
- *
- * \see osc_page_cancel().
- */
- int (*cpo_cancel)(const struct lu_env *env,
- const struct cl_page_slice *slice);
- /**
- * Write out a page by kernel. This is only called by ll_writepage
- * right now.
- *
- * \see cl_page_flush()
- */
- int (*cpo_flush)(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *io);
- /** @} transfer */
-};
-
-/**
- * Helper macro, dumping detailed information about \a page into a log.
- */
-#define CL_PAGE_DEBUG(mask, env, page, format, ...) \
-do { \
- if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
- cl_page_print(env, &msgdata, lu_cdebug_printer, page); \
- CDEBUG(mask, format, ## __VA_ARGS__); \
- } \
-} while (0)
-
-/**
- * Helper macro, dumping shorter information about \a page into a log.
- */
-#define CL_PAGE_HEADER(mask, env, page, format, ...) \
-do { \
- if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
- cl_page_header_print(env, &msgdata, lu_cdebug_printer, page); \
- CDEBUG(mask, format, ## __VA_ARGS__); \
- } \
-} while (0)
-
-static inline struct page *cl_page_vmpage(struct cl_page *page)
-{
- LASSERT(page->cp_vmpage);
- return page->cp_vmpage;
-}
-
-/**
- * Check if a cl_page is in use.
- *
- * Client cache holds a refcount, this refcount will be dropped when
- * the page is taken out of cache, see vvp_page_delete().
- */
-static inline bool __page_in_use(const struct cl_page *page, int refc)
-{
- return (atomic_read(&page->cp_ref) > refc + 1);
-}
-
-/**
- * Caller itself holds a refcount of cl_page.
- */
-#define cl_page_in_use(pg) __page_in_use(pg, 1)
-/**
- * Caller doesn't hold a refcount.
- */
-#define cl_page_in_use_noref(pg) __page_in_use(pg, 0)
-
-/** @} cl_page */
-
-/** \addtogroup cl_lock cl_lock
- * @{
- */
-/** \struct cl_lock
- *
- * Extent locking on the client.
- *
- * LAYERING
- *
- * The locking model of the new client code is built around
- *
- * struct cl_lock
- *
- * data-type representing an extent lock on a regular file. cl_lock is a
- * layered object (much like cl_object and cl_page), it consists of a header
- * (struct cl_lock) and a list of layers (struct cl_lock_slice), linked to
- * cl_lock::cll_layers list through cl_lock_slice::cls_linkage.
- *
- * Typical cl_lock consists of the two layers:
- *
- * - vvp_lock (vvp specific data), and
- * - lov_lock (lov specific data).
- *
- * lov_lock contains an array of sub-locks. Each of these sub-locks is a
- * normal cl_lock: it has a header (struct cl_lock) and a list of layers:
- *
- * - lovsub_lock, and
- * - osc_lock
- *
- * Each sub-lock is associated with a cl_object (representing stripe
- * sub-object or the file to which top-level cl_lock is associated to), and is
- * linked into that cl_object::coh_locks. In this respect cl_lock is similar to
- * cl_object (that at lov layer also fans out into multiple sub-objects), and
- * is different from cl_page, that doesn't fan out (there is usually exactly
- * one osc_page for every vvp_page). We shall call vvp-lov portion of the lock
- * a "top-lock" and its lovsub-osc portion a "sub-lock".
- *
- * LIFE CYCLE
- *
- * cl_lock is a cacheless data container for the requirements of locks to
- * complete the IO. cl_lock is created before I/O starts and destroyed when the
- * I/O is complete.
- *
- * cl_lock depends on LDLM lock to fulfill lock semantics. LDLM lock is attached
- * to cl_lock at OSC layer. LDLM lock is still cacheable.
- *
- * INTERFACE AND USAGE
- *
- * Two major methods are supported for cl_lock: clo_enqueue and clo_cancel. A
- * cl_lock is enqueued by cl_lock_request(), which will call clo_enqueue()
- * methods for each layer to enqueue the lock. At the LOV layer, if a cl_lock
- * consists of multiple sub cl_locks, each sub locks will be enqueued
- * correspondingly. At OSC layer, the lock enqueue request will tend to reuse
- * cached LDLM lock; otherwise a new LDLM lock will have to be requested from
- * OST side.
- *
- * cl_lock_cancel() must be called to release a cl_lock after use. clo_cancel()
- * method will be called for each layer to release the resource held by this
- * lock. At OSC layer, the reference count of LDLM lock, which is held at
- * clo_enqueue time, is released.
- *
- * LDLM lock can only be canceled if there is no cl_lock using it.
- *
- * Overall process of the locking during IO operation is as following:
- *
- * - once parameters for IO are setup in cl_io, cl_io_operations::cio_lock()
- * is called on each layer. Responsibility of this method is to add locks,
- * needed by a given layer into cl_io.ci_lockset.
- *
- * - once locks for all layers were collected, they are sorted to avoid
- * dead-locks (cl_io_locks_sort()), and enqueued.
- *
- * - when all locks are acquired, IO is performed;
- *
- * - locks are released after IO is complete.
- *
- * Striping introduces major additional complexity into locking. The
- * fundamental problem is that it is generally unsafe to actively use (hold)
- * two locks on the different OST servers at the same time, as this introduces
- * inter-server dependency and can lead to cascading evictions.
- *
- * Basic solution is to sub-divide large read/write IOs into smaller pieces so
- * that no multi-stripe locks are taken (note that this design abandons POSIX
- * read/write semantics). Such pieces ideally can be executed concurrently. At
- * the same time, certain types of IO cannot be sub-divived, without
- * sacrificing correctness. This includes:
- *
- * - O_APPEND write, where [0, EOF] lock has to be taken, to guarantee
- * atomicity;
- *
- * - ftruncate(fd, offset), where [offset, EOF] lock has to be taken.
- *
- * Also, in the case of read(fd, buf, count) or write(fd, buf, count), where
- * buf is a part of memory mapped Lustre file, a lock or locks protecting buf
- * has to be held together with the usual lock on [offset, offset + count].
- *
- * Interaction with DLM
- *
- * In the expected setup, cl_lock is ultimately backed up by a collection of
- * DLM locks (struct ldlm_lock). Association between cl_lock and DLM lock is
- * implemented in osc layer, that also matches DLM events (ASTs, cancellation,
- * etc.) into cl_lock_operation calls. See struct osc_lock for a more detailed
- * description of interaction with DLM.
- */
-
-/**
- * Lock description.
- */
-struct cl_lock_descr {
- /** Object this lock is granted for. */
- struct cl_object *cld_obj;
- /** Index of the first page protected by this lock. */
- pgoff_t cld_start;
- /** Index of the last page (inclusive) protected by this lock. */
- pgoff_t cld_end;
- /** Group ID, for group lock */
- __u64 cld_gid;
- /** Lock mode. */
- enum cl_lock_mode cld_mode;
- /**
- * flags to enqueue lock. A combination of bit-flags from
- * enum cl_enq_flags.
- */
- __u32 cld_enq_flags;
-};
-
-#define DDESCR "%s(%d):[%lu, %lu]:%x"
-#define PDESCR(descr) \
- cl_lock_mode_name((descr)->cld_mode), (descr)->cld_mode, \
- (descr)->cld_start, (descr)->cld_end, (descr)->cld_enq_flags
-
-const char *cl_lock_mode_name(const enum cl_lock_mode mode);
-
-/**
- * Layered client lock.
- */
-struct cl_lock {
- /** List of slices. Immutable after creation. */
- struct list_head cll_layers;
- /** lock attribute, extent, cl_object, etc. */
- struct cl_lock_descr cll_descr;
-};
-
-/**
- * Per-layer part of cl_lock
- *
- * \see vvp_lock, lov_lock, lovsub_lock, osc_lock
- */
-struct cl_lock_slice {
- struct cl_lock *cls_lock;
- /** Object slice corresponding to this lock slice. Immutable after
- * creation.
- */
- struct cl_object *cls_obj;
- const struct cl_lock_operations *cls_ops;
- /** Linkage into cl_lock::cll_layers. Immutable after creation. */
- struct list_head cls_linkage;
-};
-
-/**
- *
- * \see vvp_lock_ops, lov_lock_ops, lovsub_lock_ops, osc_lock_ops
- */
-struct cl_lock_operations {
- /** @{ */
- /**
- * Attempts to enqueue the lock. Called top-to-bottom.
- *
- * \retval 0 this layer has enqueued the lock successfully
- * \retval >0 this layer has enqueued the lock, but need to wait on
- * @anchor for resources
- * \retval -ve failure
- *
- * \see vvp_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(),
- * \see osc_lock_enqueue()
- */
- int (*clo_enqueue)(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- struct cl_io *io, struct cl_sync_io *anchor);
- /**
- * Cancel a lock, release its DLM lock ref, while does not cancel the
- * DLM lock
- */
- void (*clo_cancel)(const struct lu_env *env,
- const struct cl_lock_slice *slice);
- /** @} */
- /**
- * Destructor. Frees resources and the slice.
- *
- * \see vvp_lock_fini(), lov_lock_fini(), lovsub_lock_fini(),
- * \see osc_lock_fini()
- */
- void (*clo_fini)(const struct lu_env *env, struct cl_lock_slice *slice);
- /**
- * Optional debugging helper. Prints given lock slice.
- */
- int (*clo_print)(const struct lu_env *env,
- void *cookie, lu_printer_t p,
- const struct cl_lock_slice *slice);
-};
-
-#define CL_LOCK_DEBUG(mask, env, lock, format, ...) \
-do { \
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
- \
- if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
- cl_lock_print(env, &msgdata, lu_cdebug_printer, lock); \
- CDEBUG(mask, format, ## __VA_ARGS__); \
- } \
-} while (0)
-
-#define CL_LOCK_ASSERT(expr, env, lock) do { \
- if (likely(expr)) \
- break; \
- \
- CL_LOCK_DEBUG(D_ERROR, env, lock, "failed at %s.\n", #expr); \
- LBUG(); \
-} while (0)
-
-/** @} cl_lock */
-
-/** \addtogroup cl_page_list cl_page_list
- * Page list used to perform collective operations on a group of pages.
- *
- * Pages are added to the list one by one. cl_page_list acquires a reference
- * for every page in it. Page list is used to perform collective operations on
- * pages:
- *
- * - submit pages for an immediate transfer,
- *
- * - own pages on behalf of certain io (waiting for each page in turn),
- *
- * - discard pages.
- *
- * When list is finalized, it releases references on all pages it still has.
- *
- * \todo XXX concurrency control.
- *
- * @{
- */
-struct cl_page_list {
- unsigned int pl_nr;
- struct list_head pl_pages;
- struct task_struct *pl_owner;
-};
-
-/**
- * A 2-queue of pages. A convenience data-type for common use case, 2-queue
- * contains an incoming page list and an outgoing page list.
- */
-struct cl_2queue {
- struct cl_page_list c2_qin;
- struct cl_page_list c2_qout;
-};
-
-/** @} cl_page_list */
-
-/** \addtogroup cl_io cl_io
- * @{
- */
-/** \struct cl_io
- * I/O
- *
- * cl_io represents a high level I/O activity like
- * read(2)/write(2)/truncate(2) system call, or cancellation of an extent
- * lock.
- *
- * cl_io is a layered object, much like cl_{object,page,lock} but with one
- * important distinction. We want to minimize number of calls to the allocator
- * in the fast path, e.g., in the case of read(2) when everything is cached:
- * client already owns the lock over region being read, and data are cached
- * due to read-ahead. To avoid allocation of cl_io layers in such situations,
- * per-layer io state is stored in the session, associated with the io, see
- * struct {vvp,lov,osc}_io for example. Sessions allocation is amortized
- * by using free-lists, see cl_env_get().
- *
- * There is a small predefined number of possible io types, enumerated in enum
- * cl_io_type.
- *
- * cl_io is a state machine, that can be advanced concurrently by the multiple
- * threads. It is up to these threads to control the concurrency and,
- * specifically, to detect when io is done, and its state can be safely
- * released.
- *
- * For read/write io overall execution plan is as following:
- *
- * (0) initialize io state through all layers;
- *
- * (1) loop: prepare chunk of work to do
- *
- * (2) call all layers to collect locks they need to process current chunk
- *
- * (3) sort all locks to avoid dead-locks, and acquire them
- *
- * (4) process the chunk: call per-page methods
- * cl_io_operations::cio_prepare_write(),
- * cl_io_operations::cio_commit_write() for write)
- *
- * (5) release locks
- *
- * (6) repeat loop.
- *
- * To implement the "parallel IO mode", lov layer creates sub-io's (lazily to
- * address allocation efficiency issues mentioned above), and returns with the
- * special error condition from per-page method when current sub-io has to
- * block. This causes io loop to be repeated, and lov switches to the next
- * sub-io in its cl_io_operations::cio_iter_init() implementation.
- */
-
-/** IO types */
-enum cl_io_type {
- /** read system call */
- CIT_READ = 1,
- /** write system call */
- CIT_WRITE,
- /** truncate, utime system calls */
- CIT_SETATTR,
- /** get data version */
- CIT_DATA_VERSION,
- /**
- * page fault handling
- */
- CIT_FAULT,
- /**
- * fsync system call handling
- * To write out a range of file
- */
- CIT_FSYNC,
- /**
- * Miscellaneous io. This is used for occasional io activity that
- * doesn't fit into other types. Currently this is used for:
- *
- * - cancellation of an extent lock. This io exists as a context
- * to write dirty pages from under the lock being canceled back
- * to the server;
- *
- * - VM induced page write-out. An io context for writing page out
- * for memory cleansing;
- *
- * - glimpse. An io context to acquire glimpse lock.
- *
- * - grouplock. An io context to acquire group lock.
- *
- * CIT_MISC io is used simply as a context in which locks and pages
- * are manipulated. Such io has no internal "process", that is,
- * cl_io_loop() is never called for it.
- */
- CIT_MISC,
- CIT_OP_NR
-};
-
-/**
- * States of cl_io state machine
- */
-enum cl_io_state {
- /** Not initialized. */
- CIS_ZERO,
- /** Initialized. */
- CIS_INIT,
- /** IO iteration started. */
- CIS_IT_STARTED,
- /** Locks taken. */
- CIS_LOCKED,
- /** Actual IO is in progress. */
- CIS_IO_GOING,
- /** IO for the current iteration finished. */
- CIS_IO_FINISHED,
- /** Locks released. */
- CIS_UNLOCKED,
- /** Iteration completed. */
- CIS_IT_ENDED,
- /** cl_io finalized. */
- CIS_FINI
-};
-
-/**
- * IO state private for a layer.
- *
- * This is usually embedded into layer session data, rather than allocated
- * dynamically.
- *
- * \see vvp_io, lov_io, osc_io
- */
-struct cl_io_slice {
- struct cl_io *cis_io;
- /** corresponding object slice. Immutable after creation. */
- struct cl_object *cis_obj;
- /** io operations. Immutable after creation. */
- const struct cl_io_operations *cis_iop;
- /**
- * linkage into a list of all slices for a given cl_io, hanging off
- * cl_io::ci_layers. Immutable after creation.
- */
- struct list_head cis_linkage;
-};
-
-typedef void (*cl_commit_cbt)(const struct lu_env *, struct cl_io *,
- struct cl_page *);
-
-struct cl_read_ahead {
- /*
- * Maximum page index the readahead window will end.
- * This is determined DLM lock coverage, RPC and stripe boundary.
- * cra_end is included.
- */
- pgoff_t cra_end;
- /* optimal RPC size for this read, by pages */
- unsigned long cra_rpc_size;
- /*
- * Release callback. If readahead holds resources underneath, this
- * function should be called to release it.
- */
- void (*cra_release)(const struct lu_env *env, void *cbdata);
- /* Callback data for cra_release routine */
- void *cra_cbdata;
-};
-
-static inline void cl_read_ahead_release(const struct lu_env *env,
- struct cl_read_ahead *ra)
-{
- if (ra->cra_release)
- ra->cra_release(env, ra->cra_cbdata);
- memset(ra, 0, sizeof(*ra));
-}
-
-/**
- * Per-layer io operations.
- * \see vvp_io_ops, lov_io_ops, lovsub_io_ops, osc_io_ops
- */
-struct cl_io_operations {
- /**
- * Vector of io state transition methods for every io type.
- *
- * \see cl_page_operations::io
- */
- struct {
- /**
- * Prepare io iteration at a given layer.
- *
- * Called top-to-bottom at the beginning of each iteration of
- * "io loop" (if it makes sense for this type of io). Here
- * layer selects what work it will do during this iteration.
- *
- * \see cl_io_operations::cio_iter_fini()
- */
- int (*cio_iter_init)(const struct lu_env *env,
- const struct cl_io_slice *slice);
- /**
- * Finalize io iteration.
- *
- * Called bottom-to-top at the end of each iteration of "io
- * loop". Here layers can decide whether IO has to be
- * continued.
- *
- * \see cl_io_operations::cio_iter_init()
- */
- void (*cio_iter_fini)(const struct lu_env *env,
- const struct cl_io_slice *slice);
- /**
- * Collect locks for the current iteration of io.
- *
- * Called top-to-bottom to collect all locks necessary for
- * this iteration. This methods shouldn't actually enqueue
- * anything, instead it should post a lock through
- * cl_io_lock_add(). Once all locks are collected, they are
- * sorted and enqueued in the proper order.
- */
- int (*cio_lock)(const struct lu_env *env,
- const struct cl_io_slice *slice);
- /**
- * Finalize unlocking.
- *
- * Called bottom-to-top to finish layer specific unlocking
- * functionality, after generic code released all locks
- * acquired by cl_io_operations::cio_lock().
- */
- void (*cio_unlock)(const struct lu_env *env,
- const struct cl_io_slice *slice);
- /**
- * Start io iteration.
- *
- * Once all locks are acquired, called top-to-bottom to
- * commence actual IO. In the current implementation,
- * top-level vvp_io_{read,write}_start() does all the work
- * synchronously by calling generic_file_*(), so other layers
- * are called when everything is done.
- */
- int (*cio_start)(const struct lu_env *env,
- const struct cl_io_slice *slice);
- /**
- * Called top-to-bottom at the end of io loop. Here layer
- * might wait for an unfinished asynchronous io.
- */
- void (*cio_end)(const struct lu_env *env,
- const struct cl_io_slice *slice);
- /**
- * Called bottom-to-top to notify layers that read/write IO
- * iteration finished, with \a nob bytes transferred.
- */
- void (*cio_advance)(const struct lu_env *env,
- const struct cl_io_slice *slice,
- size_t nob);
- /**
- * Called once per io, bottom-to-top to release io resources.
- */
- void (*cio_fini)(const struct lu_env *env,
- const struct cl_io_slice *slice);
- } op[CIT_OP_NR];
-
- /**
- * Submit pages from \a queue->c2_qin for IO, and move
- * successfully submitted pages into \a queue->c2_qout. Return
- * non-zero if failed to submit even the single page. If
- * submission failed after some pages were moved into \a
- * queue->c2_qout, completion callback with non-zero ioret is
- * executed on them.
- */
- int (*cio_submit)(const struct lu_env *env,
- const struct cl_io_slice *slice,
- enum cl_req_type crt,
- struct cl_2queue *queue);
- /**
- * Queue async page for write.
- * The difference between cio_submit and cio_queue is that
- * cio_submit is for urgent request.
- */
- int (*cio_commit_async)(const struct lu_env *env,
- const struct cl_io_slice *slice,
- struct cl_page_list *queue, int from, int to,
- cl_commit_cbt cb);
- /**
- * Decide maximum read ahead extent
- *
- * \pre io->ci_type == CIT_READ
- */
- int (*cio_read_ahead)(const struct lu_env *env,
- const struct cl_io_slice *slice,
- pgoff_t start, struct cl_read_ahead *ra);
- /**
- * Optional debugging helper. Print given io slice.
- */
- int (*cio_print)(const struct lu_env *env, void *cookie,
- lu_printer_t p, const struct cl_io_slice *slice);
-};
-
-/**
- * Flags to lock enqueue procedure.
- * \ingroup cl_lock
- */
-enum cl_enq_flags {
- /**
- * instruct server to not block, if conflicting lock is found. Instead
- * -EWOULDBLOCK is returned immediately.
- */
- CEF_NONBLOCK = 0x00000001,
- /**
- * take lock asynchronously (out of order), as it cannot
- * deadlock. This is for LDLM_FL_HAS_INTENT locks used for glimpsing.
- */
- CEF_ASYNC = 0x00000002,
- /**
- * tell the server to instruct (though a flag in the blocking ast) an
- * owner of the conflicting lock, that it can drop dirty pages
- * protected by this lock, without sending them to the server.
- */
- CEF_DISCARD_DATA = 0x00000004,
- /**
- * tell the sub layers that it must be a `real' lock. This is used for
- * mmapped-buffer locks and glimpse locks that must be never converted
- * into lockless mode.
- *
- * \see vvp_mmap_locks(), cl_glimpse_lock().
- */
- CEF_MUST = 0x00000008,
- /**
- * tell the sub layers that never request a `real' lock. This flag is
- * not used currently.
- *
- * cl_io::ci_lockreq and CEF_{MUST,NEVER} flags specify lockless
- * conversion policy: ci_lockreq describes generic information of lock
- * requirement for this IO, especially for locks which belong to the
- * object doing IO; however, lock itself may have precise requirements
- * that are described by the enqueue flags.
- */
- CEF_NEVER = 0x00000010,
- /**
- * for async glimpse lock.
- */
- CEF_AGL = 0x00000020,
- /**
- * enqueue a lock to test DLM lock existence.
- */
- CEF_PEEK = 0x00000040,
- /**
- * Lock match only. Used by group lock in I/O as group lock
- * is known to exist.
- */
- CEF_LOCK_MATCH = BIT(7),
- /**
- * mask of enq_flags.
- */
- CEF_MASK = 0x000000ff,
-};
-
-/**
- * Link between lock and io. Intermediate structure is needed, because the
- * same lock can be part of multiple io's simultaneously.
- */
-struct cl_io_lock_link {
- /** linkage into one of cl_lockset lists. */
- struct list_head cill_linkage;
- struct cl_lock cill_lock;
- /** optional destructor */
- void (*cill_fini)(const struct lu_env *env,
- struct cl_io_lock_link *link);
-};
-#define cill_descr cill_lock.cll_descr
-
-/**
- * Lock-set represents a collection of locks, that io needs at a
- * time. Generally speaking, client tries to avoid holding multiple locks when
- * possible, because
- *
- * - holding extent locks over multiple ost's introduces the danger of
- * "cascading timeouts";
- *
- * - holding multiple locks over the same ost is still dead-lock prone,
- * see comment in osc_lock_enqueue(),
- *
- * but there are certain situations where this is unavoidable:
- *
- * - O_APPEND writes have to take [0, EOF] lock for correctness;
- *
- * - truncate has to take [new-size, EOF] lock for correctness;
- *
- * - SNS has to take locks across full stripe for correctness;
- *
- * - in the case when user level buffer, supplied to {read,write}(file0),
- * is a part of a memory mapped lustre file, client has to take a dlm
- * locks on file0, and all files that back up the buffer (or a part of
- * the buffer, that is being processed in the current chunk, in any
- * case, there are situations where at least 2 locks are necessary).
- *
- * In such cases we at least try to take locks in the same consistent
- * order. To this end, all locks are first collected, then sorted, and then
- * enqueued.
- */
-struct cl_lockset {
- /** locks to be acquired. */
- struct list_head cls_todo;
- /** locks acquired. */
- struct list_head cls_done;
-};
-
-/**
- * Lock requirements(demand) for IO. It should be cl_io_lock_req,
- * but 'req' is always to be thought as 'request' :-)
- */
-enum cl_io_lock_dmd {
- /** Always lock data (e.g., O_APPEND). */
- CILR_MANDATORY = 0,
- /** Layers are free to decide between local and global locking. */
- CILR_MAYBE,
- /** Never lock: there is no cache (e.g., lockless IO). */
- CILR_NEVER
-};
-
-enum cl_fsync_mode {
- /** start writeback, do not wait for them to finish */
- CL_FSYNC_NONE = 0,
- /** start writeback and wait for them to finish */
- CL_FSYNC_LOCAL = 1,
- /** discard all of dirty pages in a specific file range */
- CL_FSYNC_DISCARD = 2,
- /** start writeback and make sure they have reached storage before
- * return. OST_SYNC RPC must be issued and finished
- */
- CL_FSYNC_ALL = 3
-};
-
-struct cl_io_rw_common {
- loff_t crw_pos;
- size_t crw_count;
- int crw_nonblock;
-};
-
-/**
- * State for io.
- *
- * cl_io is shared by all threads participating in this IO (in current
- * implementation only one thread advances IO, but parallel IO design and
- * concurrent copy_*_user() require multiple threads acting on the same IO. It
- * is up to these threads to serialize their activities, including updates to
- * mutable cl_io fields.
- */
-struct cl_io {
- /** type of this IO. Immutable after creation. */
- enum cl_io_type ci_type;
- /** current state of cl_io state machine. */
- enum cl_io_state ci_state;
- /** main object this io is against. Immutable after creation. */
- struct cl_object *ci_obj;
- /**
- * Upper layer io, of which this io is a part of. Immutable after
- * creation.
- */
- struct cl_io *ci_parent;
- /** List of slices. Immutable after creation. */
- struct list_head ci_layers;
- /** list of locks (to be) acquired by this io. */
- struct cl_lockset ci_lockset;
- /** lock requirements, this is just a help info for sublayers. */
- enum cl_io_lock_dmd ci_lockreq;
- union {
- struct cl_rd_io {
- struct cl_io_rw_common rd;
- } ci_rd;
- struct cl_wr_io {
- struct cl_io_rw_common wr;
- int wr_append;
- int wr_sync;
- } ci_wr;
- struct cl_io_rw_common ci_rw;
- struct cl_setattr_io {
- struct ost_lvb sa_attr;
- unsigned int sa_attr_flags;
- unsigned int sa_valid;
- int sa_stripe_index;
- const struct lu_fid *sa_parent_fid;
- } ci_setattr;
- struct cl_data_version_io {
- u64 dv_data_version;
- int dv_flags;
- } ci_data_version;
- struct cl_fault_io {
- /** page index within file. */
- pgoff_t ft_index;
- /** bytes valid byte on a faulted page. */
- size_t ft_nob;
- /** writable page? for nopage() only */
- int ft_writable;
- /** page of an executable? */
- int ft_executable;
- /** page_mkwrite() */
- int ft_mkwrite;
- /** resulting page */
- struct cl_page *ft_page;
- } ci_fault;
- struct cl_fsync_io {
- loff_t fi_start;
- loff_t fi_end;
- /** file system level fid */
- struct lu_fid *fi_fid;
- enum cl_fsync_mode fi_mode;
- /* how many pages were written/discarded */
- unsigned int fi_nr_written;
- } ci_fsync;
- } u;
- struct cl_2queue ci_queue;
- size_t ci_nob;
- int ci_result;
- unsigned int ci_continue:1,
- /**
- * This io has held grouplock, to inform sublayers that
- * don't do lockless i/o.
- */
- ci_no_srvlock:1,
- /**
- * The whole IO need to be restarted because layout has been changed
- */
- ci_need_restart:1,
- /**
- * to not refresh layout - the IO issuer knows that the layout won't
- * change(page operations, layout change causes all page to be
- * discarded), or it doesn't matter if it changes(sync).
- */
- ci_ignore_layout:1,
- /**
- * Check if layout changed after the IO finishes. Mainly for HSM
- * requirement. If IO occurs to openning files, it doesn't need to
- * verify layout because HSM won't release openning files.
- * Right now, only two operations need to verify layout: glimpse
- * and setattr.
- */
- ci_verify_layout:1,
- /**
- * file is released, restore has to be triggered by vvp layer
- */
- ci_restore_needed:1,
- /**
- * O_NOATIME
- */
- ci_noatime:1;
- /**
- * Number of pages owned by this IO. For invariant checking.
- */
- unsigned int ci_owned_nr;
-};
-
-/** @} cl_io */
-
-/**
- * Per-transfer attributes.
- */
-struct cl_req_attr {
- enum cl_req_type cra_type;
- u64 cra_flags;
- struct cl_page *cra_page;
-
- /** Generic attributes for the server consumption. */
- struct obdo *cra_oa;
- /** Jobid */
- char cra_jobid[LUSTRE_JOBID_SIZE];
-};
-
-enum cache_stats_item {
- /** how many cache lookups were performed */
- CS_lookup = 0,
- /** how many times cache lookup resulted in a hit */
- CS_hit,
- /** how many entities are in the cache right now */
- CS_total,
- /** how many entities in the cache are actively used (and cannot be
- * evicted) right now
- */
- CS_busy,
- /** how many entities were created at all */
- CS_create,
- CS_NR
-};
-
-#define CS_NAMES { "lookup", "hit", "total", "busy", "create" }
-
-/**
- * Stats for a generic cache (similar to inode, lu_object, etc. caches).
- */
-struct cache_stats {
- const char *cs_name;
- atomic_t cs_stats[CS_NR];
-};
-
-/** These are not exported so far */
-void cache_stats_init(struct cache_stats *cs, const char *name);
-
-/**
- * Client-side site. This represents particular client stack. "Global"
- * variables should (directly or indirectly) be added here to allow multiple
- * clients to co-exist in the single address space.
- */
-struct cl_site {
- struct lu_site cs_lu;
- /**
- * Statistical counters. Atomics do not scale, something better like
- * per-cpu counters is needed.
- *
- * These are exported as /sys/kernel/debug/lustre/llite/.../site
- *
- * When interpreting keep in mind that both sub-locks (and sub-pages)
- * and top-locks (and top-pages) are accounted here.
- */
- struct cache_stats cs_pages;
- atomic_t cs_pages_state[CPS_NR];
-};
-
-int cl_site_init(struct cl_site *s, struct cl_device *top);
-void cl_site_fini(struct cl_site *s);
-void cl_stack_fini(const struct lu_env *env, struct cl_device *cl);
-
-/**
- * Output client site statistical counters into a buffer. Suitable for
- * ll_rd_*()-style functions.
- */
-int cl_site_stats_print(const struct cl_site *site, struct seq_file *m);
-
-/**
- * \name helpers
- *
- * Type conversion and accessory functions.
- */
-/** @{ */
-
-static inline struct cl_site *lu2cl_site(const struct lu_site *site)
-{
- return container_of(site, struct cl_site, cs_lu);
-}
-
-static inline int lu_device_is_cl(const struct lu_device *d)
-{
- return d->ld_type->ldt_tags & LU_DEVICE_CL;
-}
-
-static inline struct cl_device *lu2cl_dev(const struct lu_device *d)
-{
- LASSERT(!d || IS_ERR(d) || lu_device_is_cl(d));
- return container_of_safe(d, struct cl_device, cd_lu_dev);
-}
-
-static inline struct lu_device *cl2lu_dev(struct cl_device *d)
-{
- return &d->cd_lu_dev;
-}
-
-static inline struct cl_object *lu2cl(const struct lu_object *o)
-{
- LASSERT(!o || IS_ERR(o) || lu_device_is_cl(o->lo_dev));
- return container_of_safe(o, struct cl_object, co_lu);
-}
-
-static inline const struct cl_object_conf *
-lu2cl_conf(const struct lu_object_conf *conf)
-{
- return container_of_safe(conf, struct cl_object_conf, coc_lu);
-}
-
-static inline struct cl_object *cl_object_next(const struct cl_object *obj)
-{
- return obj ? lu2cl(lu_object_next(&obj->co_lu)) : NULL;
-}
-
-static inline struct cl_device *cl_object_device(const struct cl_object *o)
-{
- LASSERT(!o || IS_ERR(o) || lu_device_is_cl(o->co_lu.lo_dev));
- return container_of_safe(o->co_lu.lo_dev, struct cl_device, cd_lu_dev);
-}
-
-static inline struct cl_object_header *luh2coh(const struct lu_object_header *h)
-{
- return container_of_safe(h, struct cl_object_header, coh_lu);
-}
-
-static inline struct cl_site *cl_object_site(const struct cl_object *obj)
-{
- return lu2cl_site(obj->co_lu.lo_dev->ld_site);
-}
-
-static inline
-struct cl_object_header *cl_object_header(const struct cl_object *obj)
-{
- return luh2coh(obj->co_lu.lo_header);
-}
-
-static inline int cl_device_init(struct cl_device *d, struct lu_device_type *t)
-{
- return lu_device_init(&d->cd_lu_dev, t);
-}
-
-static inline void cl_device_fini(struct cl_device *d)
-{
- lu_device_fini(&d->cd_lu_dev);
-}
-
-void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
- struct cl_object *obj, pgoff_t index,
- const struct cl_page_operations *ops);
-void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice,
- struct cl_object *obj,
- const struct cl_lock_operations *ops);
-void cl_io_slice_add(struct cl_io *io, struct cl_io_slice *slice,
- struct cl_object *obj, const struct cl_io_operations *ops);
-/** @} helpers */
-
-/** \defgroup cl_object cl_object
- * @{
- */
-struct cl_object *cl_object_top(struct cl_object *o);
-struct cl_object *cl_object_find(const struct lu_env *env, struct cl_device *cd,
- const struct lu_fid *fid,
- const struct cl_object_conf *c);
-
-int cl_object_header_init(struct cl_object_header *h);
-void cl_object_put(const struct lu_env *env, struct cl_object *o);
-void cl_object_get(struct cl_object *o);
-void cl_object_attr_lock(struct cl_object *o);
-void cl_object_attr_unlock(struct cl_object *o);
-int cl_object_attr_get(const struct lu_env *env, struct cl_object *obj,
- struct cl_attr *attr);
-int cl_object_attr_update(const struct lu_env *env, struct cl_object *obj,
- const struct cl_attr *attr, unsigned int valid);
-int cl_object_glimpse(const struct lu_env *env, struct cl_object *obj,
- struct ost_lvb *lvb);
-int cl_conf_set(const struct lu_env *env, struct cl_object *obj,
- const struct cl_object_conf *conf);
-int cl_object_prune(const struct lu_env *env, struct cl_object *obj);
-void cl_object_kill(const struct lu_env *env, struct cl_object *obj);
-int cl_object_getstripe(const struct lu_env *env, struct cl_object *obj,
- struct lov_user_md __user *lum);
-int cl_object_fiemap(const struct lu_env *env, struct cl_object *obj,
- struct ll_fiemap_info_key *fmkey, struct fiemap *fiemap,
- size_t *buflen);
-int cl_object_layout_get(const struct lu_env *env, struct cl_object *obj,
- struct cl_layout *cl);
-loff_t cl_object_maxbytes(struct cl_object *obj);
-
-/**
- * Returns true, iff \a o0 and \a o1 are slices of the same object.
- */
-static inline int cl_object_same(struct cl_object *o0, struct cl_object *o1)
-{
- return cl_object_header(o0) == cl_object_header(o1);
-}
-
-static inline void cl_object_page_init(struct cl_object *clob, int size)
-{
- clob->co_slice_off = cl_object_header(clob)->coh_page_bufsize;
- cl_object_header(clob)->coh_page_bufsize += cfs_size_round(size);
- WARN_ON(cl_object_header(clob)->coh_page_bufsize > 512);
-}
-
-static inline void *cl_object_page_slice(struct cl_object *clob,
- struct cl_page *page)
-{
- return (void *)((char *)page + clob->co_slice_off);
-}
-
-/**
- * Return refcount of cl_object.
- */
-static inline int cl_object_refc(struct cl_object *clob)
-{
- struct lu_object_header *header = clob->co_lu.lo_header;
-
- return atomic_read(&header->loh_ref);
-}
-
-/** @} cl_object */
-
-/** \defgroup cl_page cl_page
- * @{
- */
-enum {
- CLP_GANG_OKAY = 0,
- CLP_GANG_RESCHED,
- CLP_GANG_AGAIN,
- CLP_GANG_ABORT
-};
-
-/* callback of cl_page_gang_lookup() */
-struct cl_page *cl_page_find(const struct lu_env *env, struct cl_object *obj,
- pgoff_t idx, struct page *vmpage,
- enum cl_page_type type);
-struct cl_page *cl_page_alloc(const struct lu_env *env,
- struct cl_object *o, pgoff_t ind,
- struct page *vmpage,
- enum cl_page_type type);
-void cl_page_get(struct cl_page *page);
-void cl_page_put(const struct lu_env *env, struct cl_page *page);
-void cl_page_print(const struct lu_env *env, void *cookie, lu_printer_t printer,
- const struct cl_page *pg);
-void cl_page_header_print(const struct lu_env *env, void *cookie,
- lu_printer_t printer, const struct cl_page *pg);
-struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj);
-
-const struct cl_page_slice *cl_page_at(const struct cl_page *page,
- const struct lu_device_type *dtype);
-
-/**
- * \name ownership
- *
- * Functions dealing with the ownership of page by io.
- */
-/** @{ */
-
-int cl_page_own(const struct lu_env *env,
- struct cl_io *io, struct cl_page *page);
-int cl_page_own_try(const struct lu_env *env,
- struct cl_io *io, struct cl_page *page);
-void cl_page_assume(const struct lu_env *env,
- struct cl_io *io, struct cl_page *page);
-void cl_page_unassume(const struct lu_env *env,
- struct cl_io *io, struct cl_page *pg);
-void cl_page_disown(const struct lu_env *env,
- struct cl_io *io, struct cl_page *page);
-void cl_page_disown0(const struct lu_env *env,
- struct cl_io *io, struct cl_page *pg);
-int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io);
-
-/** @} ownership */
-
-/**
- * \name transfer
- *
- * Functions dealing with the preparation of a page for a transfer, and
- * tracking transfer state.
- */
-/** @{ */
-int cl_page_prep(const struct lu_env *env, struct cl_io *io,
- struct cl_page *pg, enum cl_req_type crt);
-void cl_page_completion(const struct lu_env *env,
- struct cl_page *pg, enum cl_req_type crt, int ioret);
-int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg,
- enum cl_req_type crt);
-int cl_page_cache_add(const struct lu_env *env, struct cl_io *io,
- struct cl_page *pg, enum cl_req_type crt);
-void cl_page_clip(const struct lu_env *env, struct cl_page *pg,
- int from, int to);
-int cl_page_cancel(const struct lu_env *env, struct cl_page *page);
-int cl_page_flush(const struct lu_env *env, struct cl_io *io,
- struct cl_page *pg);
-
-/** @} transfer */
-
-/**
- * \name helper routines
- * Functions to discard, delete and export a cl_page.
- */
-/** @{ */
-void cl_page_discard(const struct lu_env *env, struct cl_io *io,
- struct cl_page *pg);
-void cl_page_delete(const struct lu_env *env, struct cl_page *pg);
-int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg);
-void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate);
-loff_t cl_offset(const struct cl_object *obj, pgoff_t idx);
-pgoff_t cl_index(const struct cl_object *obj, loff_t offset);
-size_t cl_page_size(const struct cl_object *obj);
-int cl_pages_prune(const struct lu_env *env, struct cl_object *obj);
-
-void cl_lock_print(const struct lu_env *env, void *cookie,
- lu_printer_t printer, const struct cl_lock *lock);
-void cl_lock_descr_print(const struct lu_env *env, void *cookie,
- lu_printer_t printer,
- const struct cl_lock_descr *descr);
-/* @} helper */
-
-/**
- * Data structure managing a client's cached pages. A count of
- * "unstable" pages is maintained, and an LRU of clean pages is
- * maintained. "unstable" pages are pages pinned by the ptlrpc
- * layer for recovery purposes.
- */
-struct cl_client_cache {
- /**
- * # of client cache refcount
- * # of users (OSCs) + 2 (held by llite and lov)
- */
- atomic_t ccc_users;
- /**
- * # of threads are doing shrinking
- */
- unsigned int ccc_lru_shrinkers;
- /**
- * # of LRU entries available
- */
- atomic_long_t ccc_lru_left;
- /**
- * List of entities(OSCs) for this LRU cache
- */
- struct list_head ccc_lru;
- /**
- * Max # of LRU entries
- */
- unsigned long ccc_lru_max;
- /**
- * Lock to protect ccc_lru list
- */
- spinlock_t ccc_lru_lock;
- /**
- * Set if unstable check is enabled
- */
- unsigned int ccc_unstable_check:1;
- /**
- * # of unstable pages for this mount point
- */
- atomic_long_t ccc_unstable_nr;
- /**
- * Waitq for awaiting unstable pages to reach zero.
- * Used at umounting time and signaled on BRW commit
- */
- wait_queue_head_t ccc_unstable_waitq;
-
-};
-
-/**
- * cl_cache functions
- */
-struct cl_client_cache *cl_cache_init(unsigned long lru_page_max);
-void cl_cache_incref(struct cl_client_cache *cache);
-void cl_cache_decref(struct cl_client_cache *cache);
-
-/** @} cl_page */
-
-/** \defgroup cl_lock cl_lock
- * @{
- */
-
-int cl_lock_request(const struct lu_env *env, struct cl_io *io,
- struct cl_lock *lock);
-int cl_lock_init(const struct lu_env *env, struct cl_lock *lock,
- const struct cl_io *io);
-void cl_lock_fini(const struct lu_env *env, struct cl_lock *lock);
-const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
- const struct lu_device_type *dtype);
-void cl_lock_release(const struct lu_env *env, struct cl_lock *lock);
-int cl_lock_enqueue(const struct lu_env *env, struct cl_io *io,
- struct cl_lock *lock, struct cl_sync_io *anchor);
-void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock);
-
-/** @} cl_lock */
-
-/** \defgroup cl_io cl_io
- * @{
- */
-
-int cl_io_init(const struct lu_env *env, struct cl_io *io,
- enum cl_io_type iot, struct cl_object *obj);
-int cl_io_sub_init(const struct lu_env *env, struct cl_io *io,
- enum cl_io_type iot, struct cl_object *obj);
-int cl_io_rw_init(const struct lu_env *env, struct cl_io *io,
- enum cl_io_type iot, loff_t pos, size_t count);
-int cl_io_loop(const struct lu_env *env, struct cl_io *io);
-
-void cl_io_fini(const struct lu_env *env, struct cl_io *io);
-int cl_io_iter_init(const struct lu_env *env, struct cl_io *io);
-void cl_io_iter_fini(const struct lu_env *env, struct cl_io *io);
-int cl_io_lock(const struct lu_env *env, struct cl_io *io);
-void cl_io_unlock(const struct lu_env *env, struct cl_io *io);
-int cl_io_start(const struct lu_env *env, struct cl_io *io);
-void cl_io_end(const struct lu_env *env, struct cl_io *io);
-int cl_io_lock_add(const struct lu_env *env, struct cl_io *io,
- struct cl_io_lock_link *link);
-int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io,
- struct cl_lock_descr *descr);
-int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io,
- enum cl_req_type iot, struct cl_2queue *queue);
-int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
- enum cl_req_type iot, struct cl_2queue *queue,
- long timeout);
-int cl_io_commit_async(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue, int from, int to,
- cl_commit_cbt cb);
-int cl_io_read_ahead(const struct lu_env *env, struct cl_io *io,
- pgoff_t start, struct cl_read_ahead *ra);
-int cl_io_is_going(const struct lu_env *env);
-
-/**
- * True, iff \a io is an O_APPEND write(2).
- */
-static inline int cl_io_is_append(const struct cl_io *io)
-{
- return io->ci_type == CIT_WRITE && io->u.ci_wr.wr_append;
-}
-
-static inline int cl_io_is_sync_write(const struct cl_io *io)
-{
- return io->ci_type == CIT_WRITE && io->u.ci_wr.wr_sync;
-}
-
-static inline int cl_io_is_mkwrite(const struct cl_io *io)
-{
- return io->ci_type == CIT_FAULT && io->u.ci_fault.ft_mkwrite;
-}
-
-/**
- * True, iff \a io is a truncate(2).
- */
-static inline int cl_io_is_trunc(const struct cl_io *io)
-{
- return io->ci_type == CIT_SETATTR &&
- (io->u.ci_setattr.sa_valid & ATTR_SIZE);
-}
-
-struct cl_io *cl_io_top(struct cl_io *io);
-
-#define CL_IO_SLICE_CLEAN(foo_io, base) \
-do { \
- typeof(foo_io) __foo_io = (foo_io); \
- \
- BUILD_BUG_ON(offsetof(typeof(*__foo_io), base) != 0); \
- memset(&__foo_io->base + 1, 0, \
- sizeof(*__foo_io) - sizeof(__foo_io->base)); \
-} while (0)
-
-/** @} cl_io */
-
-/** \defgroup cl_page_list cl_page_list
- * @{
- */
-
-/**
- * Last page in the page list.
- */
-static inline struct cl_page *cl_page_list_last(struct cl_page_list *plist)
-{
- LASSERT(plist->pl_nr > 0);
- return list_entry(plist->pl_pages.prev, struct cl_page, cp_batch);
-}
-
-static inline struct cl_page *cl_page_list_first(struct cl_page_list *plist)
-{
- LASSERT(plist->pl_nr > 0);
- return list_entry(plist->pl_pages.next, struct cl_page, cp_batch);
-}
-
-/**
- * Iterate over pages in a page list.
- */
-#define cl_page_list_for_each(page, list) \
- list_for_each_entry((page), &(list)->pl_pages, cp_batch)
-
-/**
- * Iterate over pages in a page list, taking possible removals into account.
- */
-#define cl_page_list_for_each_safe(page, temp, list) \
- list_for_each_entry_safe((page), (temp), &(list)->pl_pages, cp_batch)
-
-void cl_page_list_init(struct cl_page_list *plist);
-void cl_page_list_add(struct cl_page_list *plist, struct cl_page *page);
-void cl_page_list_move(struct cl_page_list *dst, struct cl_page_list *src,
- struct cl_page *page);
-void cl_page_list_move_head(struct cl_page_list *dst, struct cl_page_list *src,
- struct cl_page *page);
-void cl_page_list_splice(struct cl_page_list *list, struct cl_page_list *head);
-void cl_page_list_del(const struct lu_env *env, struct cl_page_list *plist,
- struct cl_page *page);
-void cl_page_list_disown(const struct lu_env *env,
- struct cl_io *io, struct cl_page_list *plist);
-void cl_page_list_fini(const struct lu_env *env, struct cl_page_list *plist);
-
-void cl_2queue_init(struct cl_2queue *queue);
-void cl_2queue_disown(const struct lu_env *env,
- struct cl_io *io, struct cl_2queue *queue);
-void cl_2queue_discard(const struct lu_env *env,
- struct cl_io *io, struct cl_2queue *queue);
-void cl_2queue_fini(const struct lu_env *env, struct cl_2queue *queue);
-void cl_2queue_init_page(struct cl_2queue *queue, struct cl_page *page);
-
-/** @} cl_page_list */
-
-void cl_req_attr_set(const struct lu_env *env, struct cl_object *obj,
- struct cl_req_attr *attr);
-
-/** \defgroup cl_sync_io cl_sync_io
- * @{
- */
-
-/**
- * Anchor for synchronous transfer. This is allocated on a stack by thread
- * doing synchronous transfer, and a pointer to this structure is set up in
- * every page submitted for transfer. Transfer completion routine updates
- * anchor and wakes up waiting thread when transfer is complete.
- */
-struct cl_sync_io {
- /** number of pages yet to be transferred. */
- atomic_t csi_sync_nr;
- /** error code. */
- int csi_sync_rc;
- /** barrier of destroy this structure */
- atomic_t csi_barrier;
- /** completion to be signaled when transfer is complete. */
- wait_queue_head_t csi_waitq;
- /** callback to invoke when this IO is finished */
- void (*csi_end_io)(const struct lu_env *,
- struct cl_sync_io *);
-};
-
-void cl_sync_io_init(struct cl_sync_io *anchor, int nr,
- void (*end)(const struct lu_env *, struct cl_sync_io *));
-int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor,
- long timeout);
-void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor,
- int ioret);
-void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor);
-
-/** @} cl_sync_io */
-
-/** \defgroup cl_env cl_env
- *
- * lu_env handling for a client.
- *
- * lu_env is an environment within which lustre code executes. Its major part
- * is lu_context---a fast memory allocation mechanism that is used to conserve
- * precious kernel stack space. Originally lu_env was designed for a server,
- * where
- *
- * - there is a (mostly) fixed number of threads, and
- *
- * - call chains have no non-lustre portions inserted between lustre code.
- *
- * On a client both these assumption fails, because every user thread can
- * potentially execute lustre code as part of a system call, and lustre calls
- * into VFS or MM that call back into lustre.
- *
- * To deal with that, cl_env wrapper functions implement the following
- * optimizations:
- *
- * - allocation and destruction of environment is amortized by caching no
- * longer used environments instead of destroying them;
- *
- * \see lu_env, lu_context, lu_context_key
- * @{
- */
-
-struct lu_env *cl_env_get(u16 *refcheck);
-struct lu_env *cl_env_alloc(u16 *refcheck, __u32 tags);
-void cl_env_put(struct lu_env *env, u16 *refcheck);
-unsigned int cl_env_cache_purge(unsigned int nr);
-struct lu_env *cl_env_percpu_get(void);
-void cl_env_percpu_put(struct lu_env *env);
-
-/** @} cl_env */
-
-/*
- * Misc
- */
-void cl_lvb2attr(struct cl_attr *attr, const struct ost_lvb *lvb);
-
-struct cl_device *cl_type_setup(const struct lu_env *env, struct lu_site *site,
- struct lu_device_type *ldt,
- struct lu_device *next);
-/** @} clio */
-
-int cl_global_init(void);
-void cl_global_fini(void);
-
-#endif /* _LINUX_CL_OBJECT_H */
diff --git a/drivers/staging/lustre/lustre/include/interval_tree.h b/drivers/staging/lustre/lustre/include/interval_tree.h
deleted file mode 100644
index 7d119c1a0469..000000000000
--- a/drivers/staging/lustre/lustre/include/interval_tree.h
+++ /dev/null
@@ -1,119 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/include/interval_tree.h
- *
- * Author: Huang Wei <huangwei@clusterfs.com>
- * Author: Jay Xiong <jinshan.xiong@sun.com>
- */
-
-#ifndef _INTERVAL_H__
-#define _INTERVAL_H__
-
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-
-struct interval_node {
- struct interval_node *in_left;
- struct interval_node *in_right;
- struct interval_node *in_parent;
- unsigned in_color:1,
- in_intree:1, /** set if the node is in tree */
- in_res1:30;
- __u8 in_res2[4]; /** tags, 8-bytes aligned */
- __u64 in_max_high;
- struct interval_node_extent {
- __u64 start;
- __u64 end;
- } in_extent;
-};
-
-enum interval_iter {
- INTERVAL_ITER_CONT = 1,
- INTERVAL_ITER_STOP = 2
-};
-
-static inline int interval_is_intree(struct interval_node *node)
-{
- return node->in_intree == 1;
-}
-
-static inline __u64 interval_low(struct interval_node *node)
-{
- return node->in_extent.start;
-}
-
-static inline __u64 interval_high(struct interval_node *node)
-{
- return node->in_extent.end;
-}
-
-static inline int interval_set(struct interval_node *node,
- __u64 start, __u64 end)
-{
- if (start > end)
- return -ERANGE;
- node->in_extent.start = start;
- node->in_extent.end = end;
- node->in_max_high = end;
- return 0;
-}
-
-/*
- * Rules to write an interval callback.
- * - the callback returns INTERVAL_ITER_STOP when it thinks the iteration
- * should be stopped. It will then cause the iteration function to return
- * immediately with return value INTERVAL_ITER_STOP.
- * - callbacks for interval_iterate and interval_iterate_reverse: Every
- * nodes in the tree will be set to @node before the callback being called
- * - callback for interval_search: Only overlapped node will be set to @node
- * before the callback being called.
- */
-typedef enum interval_iter (*interval_callback_t)(struct interval_node *node,
- void *args);
-
-struct interval_node *interval_insert(struct interval_node *node,
- struct interval_node **root);
-void interval_erase(struct interval_node *node, struct interval_node **root);
-
-/*
- * Search the extents in the tree and call @func for each overlapped
- * extents.
- */
-enum interval_iter interval_search(struct interval_node *root,
- struct interval_node_extent *ex,
- interval_callback_t func, void *data);
-
-enum interval_iter interval_iterate_reverse(struct interval_node *root,
- interval_callback_t func,
- void *data);
-
-#endif
diff --git a/drivers/staging/lustre/lustre/include/llog_swab.h b/drivers/staging/lustre/lustre/include/llog_swab.h
deleted file mode 100644
index 0433b79efdcb..000000000000
--- a/drivers/staging/lustre/lustre/include/llog_swab.h
+++ /dev/null
@@ -1,67 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2014, Intel Corporation.
- *
- * Copyright 2015 Cray Inc, all rights reserved.
- * Author: Ben Evans.
- *
- * We assume all nodes are either little-endian or big-endian, and we
- * always send messages in the sender's native format. The receiver
- * detects the message format by checking the 'magic' field of the message
- * (see lustre_msg_swabbed() below).
- *
- * Each type has corresponding 'lustre_swab_xxxtypexxx()' routines
- * are implemented in ptlrpc/pack_generic.c. These 'swabbers' convert the
- * type from "other" endian, in-place in the message buffer.
- *
- * A swabber takes a single pointer argument. The caller must already have
- * verified that the length of the message buffer >= sizeof (type).
- *
- * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine
- * may be defined that swabs just the variable part, after the caller has
- * verified that the message buffer is large enough.
- */
-
-#ifndef _LLOG_SWAB_H_
-#define _LLOG_SWAB_H_
-
-#include <uapi/linux/lustre/lustre_idl.h>
-
-struct lustre_cfg;
-
-void lustre_swab_lu_fid(struct lu_fid *fid);
-void lustre_swab_ost_id(struct ost_id *oid);
-void lustre_swab_llogd_body(struct llogd_body *d);
-void lustre_swab_llog_hdr(struct llog_log_hdr *h);
-void lustre_swab_llogd_conn_body(struct llogd_conn_body *d);
-void lustre_swab_llog_rec(struct llog_rec_hdr *rec);
-void lustre_swab_lu_seq_range(struct lu_seq_range *range);
-void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg);
-void lustre_swab_cfg_marker(struct cfg_marker *marker,
- int swab, int size);
-
-#endif
diff --git a/drivers/staging/lustre/lustre/include/lprocfs_status.h b/drivers/staging/lustre/lustre/include/lprocfs_status.h
deleted file mode 100644
index 495e6f5f676b..000000000000
--- a/drivers/staging/lustre/lustre/include/lprocfs_status.h
+++ /dev/null
@@ -1,646 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/include/lprocfs_status.h
- *
- * Top level header file for LProc SNMP
- *
- * Author: Hariharan Thantry thantry@users.sourceforge.net
- */
-#ifndef _LPROCFS_SNMP_H
-#define _LPROCFS_SNMP_H
-
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/device.h>
-
-#include <uapi/linux/lustre/lustre_cfg.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-
-struct lprocfs_vars {
- const char *name;
- const struct file_operations *fops;
- void *data;
- /**
- * sysfs file mode.
- */
- umode_t proc_mode;
-};
-
-struct lprocfs_static_vars {
- struct lprocfs_vars *obd_vars;
- const struct attribute_group *sysfs_vars;
-};
-
-/* if we find more consumers this could be generalized */
-#define OBD_HIST_MAX 32
-struct obd_histogram {
- spinlock_t oh_lock;
- unsigned long oh_buckets[OBD_HIST_MAX];
-};
-
-enum {
- BRW_R_PAGES = 0,
- BRW_W_PAGES,
- BRW_R_RPC_HIST,
- BRW_W_RPC_HIST,
- BRW_R_IO_TIME,
- BRW_W_IO_TIME,
- BRW_R_DISCONT_PAGES,
- BRW_W_DISCONT_PAGES,
- BRW_R_DISCONT_BLOCKS,
- BRW_W_DISCONT_BLOCKS,
- BRW_R_DISK_IOSIZE,
- BRW_W_DISK_IOSIZE,
- BRW_R_DIO_FRAGS,
- BRW_W_DIO_FRAGS,
- BRW_LAST,
-};
-
-struct brw_stats {
- struct obd_histogram hist[BRW_LAST];
-};
-
-enum {
- RENAME_SAMEDIR_SIZE = 0,
- RENAME_CROSSDIR_SRC_SIZE,
- RENAME_CROSSDIR_TGT_SIZE,
- RENAME_LAST,
-};
-
-struct rename_stats {
- struct obd_histogram hist[RENAME_LAST];
-};
-
-/* An lprocfs counter can be configured using the enum bit masks below.
- *
- * LPROCFS_CNTR_EXTERNALLOCK indicates that an external lock already
- * protects this counter from concurrent updates. If not specified,
- * lprocfs an internal per-counter lock variable. External locks are
- * not used to protect counter increments, but are used to protect
- * counter readout and resets.
- *
- * LPROCFS_CNTR_AVGMINMAX indicates a multi-valued counter samples,
- * (i.e. counter can be incremented by more than "1"). When specified,
- * the counter maintains min, max and sum in addition to a simple
- * invocation count. This allows averages to be computed.
- * If not specified, the counter is an increment-by-1 counter.
- * min, max, sum, etc. are not maintained.
- *
- * LPROCFS_CNTR_STDDEV indicates that the counter should track sum of
- * squares (for multi-valued counter samples only). This allows
- * external computation of standard deviation, but involves a 64-bit
- * multiply per counter increment.
- */
-
-enum {
- LPROCFS_CNTR_EXTERNALLOCK = 0x0001,
- LPROCFS_CNTR_AVGMINMAX = 0x0002,
- LPROCFS_CNTR_STDDEV = 0x0004,
-
- /* counter data type */
- LPROCFS_TYPE_REGS = 0x0100,
- LPROCFS_TYPE_BYTES = 0x0200,
- LPROCFS_TYPE_PAGES = 0x0400,
- LPROCFS_TYPE_CYCLE = 0x0800,
-};
-
-#define LC_MIN_INIT ((~(__u64)0) >> 1)
-
-struct lprocfs_counter_header {
- unsigned int lc_config;
- const char *lc_name; /* must be static */
- const char *lc_units; /* must be static */
-};
-
-struct lprocfs_counter {
- __s64 lc_count;
- __s64 lc_min;
- __s64 lc_max;
- __s64 lc_sumsquare;
- /*
- * Every counter has lc_array_sum[0], while lc_array_sum[1] is only
- * for irq context counter, i.e. stats with
- * LPROCFS_STATS_FLAG_IRQ_SAFE flag, its counter need
- * lc_array_sum[1]
- */
- __s64 lc_array_sum[1];
-};
-
-#define lc_sum lc_array_sum[0]
-#define lc_sum_irq lc_array_sum[1]
-
-struct lprocfs_percpu {
-#ifndef __GNUC__
- __s64 pad;
-#endif
- struct lprocfs_counter lp_cntr[0];
-};
-
-enum lprocfs_stats_lock_ops {
- LPROCFS_GET_NUM_CPU = 0x0001, /* number allocated per-CPU stats */
- LPROCFS_GET_SMP_ID = 0x0002, /* current stat to be updated */
-};
-
-enum lprocfs_stats_flags {
- LPROCFS_STATS_FLAG_NONE = 0x0000, /* per cpu counter */
- LPROCFS_STATS_FLAG_NOPERCPU = 0x0001, /* stats have no percpu
- * area and need locking
- */
- LPROCFS_STATS_FLAG_IRQ_SAFE = 0x0002, /* alloc need irq safe */
-};
-
-enum lprocfs_fields_flags {
- LPROCFS_FIELDS_FLAGS_CONFIG = 0x0001,
- LPROCFS_FIELDS_FLAGS_SUM = 0x0002,
- LPROCFS_FIELDS_FLAGS_MIN = 0x0003,
- LPROCFS_FIELDS_FLAGS_MAX = 0x0004,
- LPROCFS_FIELDS_FLAGS_AVG = 0x0005,
- LPROCFS_FIELDS_FLAGS_SUMSQUARE = 0x0006,
- LPROCFS_FIELDS_FLAGS_COUNT = 0x0007,
-};
-
-struct lprocfs_stats {
- /* # of counters */
- unsigned short ls_num;
- /* 1 + the biggest cpu # whose ls_percpu slot has been allocated */
- unsigned short ls_biggest_alloc_num;
- enum lprocfs_stats_flags ls_flags;
- /* Lock used when there are no percpu stats areas; For percpu stats,
- * it is used to protect ls_biggest_alloc_num change
- */
- spinlock_t ls_lock;
-
- /* has ls_num of counter headers */
- struct lprocfs_counter_header *ls_cnt_header;
- struct lprocfs_percpu *ls_percpu[0];
-};
-
-#define OPC_RANGE(seg) (seg ## _LAST_OPC - seg ## _FIRST_OPC)
-
-/* Pack all opcodes down into a single monotonically increasing index */
-static inline int opcode_offset(__u32 opc)
-{
- if (opc < OST_LAST_OPC) {
- /* OST opcode */
- return (opc - OST_FIRST_OPC);
- } else if (opc < MDS_LAST_OPC) {
- /* MDS opcode */
- return (opc - MDS_FIRST_OPC +
- OPC_RANGE(OST));
- } else if (opc < LDLM_LAST_OPC) {
- /* LDLM Opcode */
- return (opc - LDLM_FIRST_OPC +
- OPC_RANGE(MDS) +
- OPC_RANGE(OST));
- } else if (opc < MGS_LAST_OPC) {
- /* MGS Opcode */
- return (opc - MGS_FIRST_OPC +
- OPC_RANGE(LDLM) +
- OPC_RANGE(MDS) +
- OPC_RANGE(OST));
- } else if (opc < OBD_LAST_OPC) {
- /* OBD Ping */
- return (opc - OBD_FIRST_OPC +
- OPC_RANGE(MGS) +
- OPC_RANGE(LDLM) +
- OPC_RANGE(MDS) +
- OPC_RANGE(OST));
- } else if (opc < LLOG_LAST_OPC) {
- /* LLOG Opcode */
- return (opc - LLOG_FIRST_OPC +
- OPC_RANGE(OBD) +
- OPC_RANGE(MGS) +
- OPC_RANGE(LDLM) +
- OPC_RANGE(MDS) +
- OPC_RANGE(OST));
- } else if (opc < QUOTA_LAST_OPC) {
- /* LQUOTA Opcode */
- return (opc - QUOTA_FIRST_OPC +
- OPC_RANGE(LLOG) +
- OPC_RANGE(OBD) +
- OPC_RANGE(MGS) +
- OPC_RANGE(LDLM) +
- OPC_RANGE(MDS) +
- OPC_RANGE(OST));
- } else if (opc < SEQ_LAST_OPC) {
- /* SEQ opcode */
- return (opc - SEQ_FIRST_OPC +
- OPC_RANGE(QUOTA) +
- OPC_RANGE(LLOG) +
- OPC_RANGE(OBD) +
- OPC_RANGE(MGS) +
- OPC_RANGE(LDLM) +
- OPC_RANGE(MDS) +
- OPC_RANGE(OST));
- } else if (opc < SEC_LAST_OPC) {
- /* SEC opcode */
- return (opc - SEC_FIRST_OPC +
- OPC_RANGE(SEQ) +
- OPC_RANGE(QUOTA) +
- OPC_RANGE(LLOG) +
- OPC_RANGE(OBD) +
- OPC_RANGE(MGS) +
- OPC_RANGE(LDLM) +
- OPC_RANGE(MDS) +
- OPC_RANGE(OST));
- } else if (opc < FLD_LAST_OPC) {
- /* FLD opcode */
- return (opc - FLD_FIRST_OPC +
- OPC_RANGE(SEC) +
- OPC_RANGE(SEQ) +
- OPC_RANGE(QUOTA) +
- OPC_RANGE(LLOG) +
- OPC_RANGE(OBD) +
- OPC_RANGE(MGS) +
- OPC_RANGE(LDLM) +
- OPC_RANGE(MDS) +
- OPC_RANGE(OST));
- } else {
- /* Unknown Opcode */
- return -1;
- }
-}
-
-#define LUSTRE_MAX_OPCODES (OPC_RANGE(OST) + \
- OPC_RANGE(MDS) + \
- OPC_RANGE(LDLM) + \
- OPC_RANGE(MGS) + \
- OPC_RANGE(OBD) + \
- OPC_RANGE(LLOG) + \
- OPC_RANGE(SEC) + \
- OPC_RANGE(SEQ) + \
- OPC_RANGE(SEC) + \
- OPC_RANGE(FLD))
-
-#define EXTRA_MAX_OPCODES ((PTLRPC_LAST_CNTR - PTLRPC_FIRST_CNTR) + \
- OPC_RANGE(EXTRA))
-
-enum {
- PTLRPC_REQWAIT_CNTR = 0,
- PTLRPC_REQQDEPTH_CNTR,
- PTLRPC_REQACTIVE_CNTR,
- PTLRPC_TIMEOUT,
- PTLRPC_REQBUF_AVAIL_CNTR,
- PTLRPC_LAST_CNTR
-};
-
-#define PTLRPC_FIRST_CNTR PTLRPC_REQWAIT_CNTR
-
-enum {
- LDLM_GLIMPSE_ENQUEUE = 0,
- LDLM_PLAIN_ENQUEUE,
- LDLM_EXTENT_ENQUEUE,
- LDLM_FLOCK_ENQUEUE,
- LDLM_IBITS_ENQUEUE,
- MDS_REINT_SETATTR,
- MDS_REINT_CREATE,
- MDS_REINT_LINK,
- MDS_REINT_UNLINK,
- MDS_REINT_RENAME,
- MDS_REINT_OPEN,
- MDS_REINT_SETXATTR,
- BRW_READ_BYTES,
- BRW_WRITE_BYTES,
- EXTRA_LAST_OPC
-};
-
-#define EXTRA_FIRST_OPC LDLM_GLIMPSE_ENQUEUE
-/* class_obd.c */
-extern struct dentry *debugfs_lustre_root;
-extern struct kobject *lustre_kobj;
-
-struct obd_device;
-struct obd_histogram;
-
-/* Days / hours / mins / seconds format */
-struct dhms {
- int d, h, m, s;
-};
-
-static inline void s2dhms(struct dhms *ts, time64_t secs64)
-{
- unsigned int secs;
-
- ts->d = div_u64_rem(secs64, 86400, &secs);
- ts->h = secs / 3600;
- secs = secs % 3600;
- ts->m = secs / 60;
- ts->s = secs % 60;
-}
-
-#define DHMS_FMT "%dd%dh%02dm%02ds"
-#define DHMS_VARS(x) (x)->d, (x)->h, (x)->m, (x)->s
-
-#define JOBSTATS_JOBID_VAR_MAX_LEN 20
-#define JOBSTATS_DISABLE "disable"
-#define JOBSTATS_PROCNAME_UID "procname_uid"
-#define JOBSTATS_NODELOCAL "nodelocal"
-
-/* obd_config.c */
-void lustre_register_client_process_config(int (*cpc)(struct lustre_cfg *lcfg));
-
-int lprocfs_write_frac_helper(const char __user *buffer,
- unsigned long count, int *val, int mult);
-int lprocfs_read_frac_helper(char *buffer, unsigned long count,
- long val, int mult);
-
-int lprocfs_stats_alloc_one(struct lprocfs_stats *stats,
- unsigned int cpuid);
-int lprocfs_stats_lock(struct lprocfs_stats *stats,
- enum lprocfs_stats_lock_ops opc,
- unsigned long *flags);
-void lprocfs_stats_unlock(struct lprocfs_stats *stats,
- enum lprocfs_stats_lock_ops opc,
- unsigned long *flags);
-
-static inline unsigned int
-lprocfs_stats_counter_size(struct lprocfs_stats *stats)
-{
- unsigned int percpusize;
-
- percpusize = offsetof(struct lprocfs_percpu, lp_cntr[stats->ls_num]);
-
- /* irq safe stats need lc_array_sum[1] */
- if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
- percpusize += stats->ls_num * sizeof(__s64);
-
- if ((stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) == 0)
- percpusize = L1_CACHE_ALIGN(percpusize);
-
- return percpusize;
-}
-
-static inline struct lprocfs_counter *
-lprocfs_stats_counter_get(struct lprocfs_stats *stats, unsigned int cpuid,
- int index)
-{
- struct lprocfs_counter *cntr;
-
- cntr = &stats->ls_percpu[cpuid]->lp_cntr[index];
-
- if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
- cntr = (void *)cntr + index * sizeof(__s64);
-
- return cntr;
-}
-
-/* Two optimized LPROCFS counter increment functions are provided:
- * lprocfs_counter_incr(cntr, value) - optimized for by-one counters
- * lprocfs_counter_add(cntr) - use for multi-valued counters
- * Counter data layout allows config flag, counter lock and the
- * count itself to reside within a single cache line.
- */
-
-void lprocfs_counter_add(struct lprocfs_stats *stats, int idx, long amount);
-void lprocfs_counter_sub(struct lprocfs_stats *stats, int idx, long amount);
-
-#define lprocfs_counter_incr(stats, idx) \
- lprocfs_counter_add(stats, idx, 1)
-#define lprocfs_counter_decr(stats, idx) \
- lprocfs_counter_sub(stats, idx, 1)
-
-__s64 lprocfs_read_helper(struct lprocfs_counter *lc,
- struct lprocfs_counter_header *header,
- enum lprocfs_stats_flags flags,
- enum lprocfs_fields_flags field);
-__u64 lprocfs_stats_collector(struct lprocfs_stats *stats, int idx,
- enum lprocfs_fields_flags field);
-
-extern struct lprocfs_stats *
-lprocfs_alloc_stats(unsigned int num, enum lprocfs_stats_flags flags);
-void lprocfs_clear_stats(struct lprocfs_stats *stats);
-void lprocfs_free_stats(struct lprocfs_stats **stats);
-void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
- unsigned int conf, const char *name,
- const char *units);
-struct obd_export;
-int lprocfs_exp_cleanup(struct obd_export *exp);
-extern const struct file_operations lprocfs_stats_seq_fops;
-
-/* lprocfs_status.c */
-void ldebugfs_add_vars(struct dentry *parent, struct lprocfs_vars *var,
- void *data);
-
-int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list,
- const struct attribute_group *attrs);
-int lprocfs_obd_cleanup(struct obd_device *obd);
-
-/* Generic callbacks */
-
-int lprocfs_rd_uint(struct seq_file *m, void *data);
-int lprocfs_wr_uint(struct file *file, const char __user *buffer,
- unsigned long count, void *data);
-int lprocfs_rd_server_uuid(struct seq_file *m, void *data);
-int lprocfs_rd_conn_uuid(struct seq_file *m, void *data);
-int lprocfs_rd_import(struct seq_file *m, void *data);
-int lprocfs_rd_state(struct seq_file *m, void *data);
-int lprocfs_rd_connect_flags(struct seq_file *m, void *data);
-
-struct adaptive_timeout;
-int lprocfs_at_hist_helper(struct seq_file *m, struct adaptive_timeout *at);
-int lprocfs_rd_timeouts(struct seq_file *m, void *data);
-int lprocfs_wr_ping(struct file *file, const char __user *buffer,
- size_t count, loff_t *off);
-int lprocfs_wr_import(struct file *file, const char __user *buffer,
- size_t count, loff_t *off);
-int lprocfs_rd_pinger_recov(struct seq_file *m, void *n);
-int lprocfs_wr_pinger_recov(struct file *file, const char __user *buffer,
- size_t count, loff_t *off);
-
-/* Statfs helpers */
-
-int lprocfs_write_helper(const char __user *buffer, unsigned long count,
- int *val);
-int lprocfs_write_u64_helper(const char __user *buffer,
- unsigned long count, __u64 *val);
-int lprocfs_write_frac_u64_helper(const char __user *buffer,
- unsigned long count,
- __u64 *val, int mult);
-char *lprocfs_find_named_value(const char *buffer, const char *name,
- size_t *count);
-void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value);
-void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value);
-void lprocfs_oh_clear(struct obd_histogram *oh);
-unsigned long lprocfs_oh_sum(struct obd_histogram *oh);
-
-void lprocfs_stats_collect(struct lprocfs_stats *stats, int idx,
- struct lprocfs_counter *cnt);
-
-int lprocfs_single_release(struct inode *inode, struct file *file);
-int lprocfs_seq_release(struct inode *inode, struct file *file);
-
-/* write the name##_seq_show function, call LPROC_SEQ_FOPS_RO for read-only
- * proc entries; otherwise, you will define name##_seq_write function also for
- * a read-write proc entry, and then call LPROC_SEQ_SEQ instead. Finally,
- * call ldebugfs_obd_seq_create(obd, filename, 0444, &name#_fops, data);
- */
-#define __LPROC_SEQ_FOPS(name, custom_seq_write) \
-static int name##_single_open(struct inode *inode, struct file *file) \
-{ \
- return single_open(file, name##_seq_show, inode->i_private); \
-} \
-static const struct file_operations name##_fops = { \
- .owner = THIS_MODULE, \
- .open = name##_single_open, \
- .read = seq_read, \
- .write = custom_seq_write, \
- .llseek = seq_lseek, \
- .release = lprocfs_single_release, \
-}
-
-#define LPROC_SEQ_FOPS_RO(name) __LPROC_SEQ_FOPS(name, NULL)
-#define LPROC_SEQ_FOPS(name) __LPROC_SEQ_FOPS(name, name##_seq_write)
-
-#define LPROC_SEQ_FOPS_RO_TYPE(name, type) \
- static int name##_##type##_seq_show(struct seq_file *m, void *v)\
- { \
- return lprocfs_rd_##type(m, m->private); \
- } \
- LPROC_SEQ_FOPS_RO(name##_##type)
-
-#define LPROC_SEQ_FOPS_RW_TYPE(name, type) \
- static int name##_##type##_seq_show(struct seq_file *m, void *v)\
- { \
- return lprocfs_rd_##type(m, m->private); \
- } \
- static ssize_t name##_##type##_seq_write(struct file *file, \
- const char __user *buffer, size_t count, \
- loff_t *off) \
- { \
- struct seq_file *seq = file->private_data; \
- return lprocfs_wr_##type(file, buffer, \
- count, seq->private); \
- } \
- LPROC_SEQ_FOPS(name##_##type)
-
-#define LPROC_SEQ_FOPS_WR_ONLY(name, type) \
- static ssize_t name##_##type##_write(struct file *file, \
- const char __user *buffer, size_t count, \
- loff_t *off) \
- { \
- return lprocfs_wr_##type(file, buffer, count, off); \
- } \
- static int name##_##type##_open(struct inode *inode, struct file *file) \
- { \
- return single_open(file, NULL, inode->i_private); \
- } \
- static const struct file_operations name##_##type##_fops = { \
- .open = name##_##type##_open, \
- .write = name##_##type##_write, \
- .release = lprocfs_single_release, \
- }
-
-struct lustre_attr {
- struct attribute attr;
- ssize_t (*show)(struct kobject *kobj, struct attribute *attr,
- char *buf);
- ssize_t (*store)(struct kobject *kobj, struct attribute *attr,
- const char *buf, size_t len);
-};
-
-#define LUSTRE_ATTR(name, mode, show, store) \
-static struct lustre_attr lustre_attr_##name = __ATTR(name, mode, show, store)
-
-#define LUSTRE_RO_ATTR(name) LUSTRE_ATTR(name, 0444, name##_show, NULL)
-#define LUSTRE_RW_ATTR(name) LUSTRE_ATTR(name, 0644, name##_show, name##_store)
-
-extern const struct sysfs_ops lustre_sysfs_ops;
-
-struct root_squash_info;
-int lprocfs_wr_root_squash(const char __user *buffer, unsigned long count,
- struct root_squash_info *squash, char *name);
-int lprocfs_wr_nosquash_nids(const char __user *buffer, unsigned long count,
- struct root_squash_info *squash, char *name);
-
-/* all quota proc functions */
-int lprocfs_quota_rd_bunit(char *page, char **start,
- loff_t off, int count,
- int *eof, void *data);
-int lprocfs_quota_wr_bunit(struct file *file, const char *buffer,
- unsigned long count, void *data);
-int lprocfs_quota_rd_btune(char *page, char **start,
- loff_t off, int count,
- int *eof, void *data);
-int lprocfs_quota_wr_btune(struct file *file, const char *buffer,
- unsigned long count, void *data);
-int lprocfs_quota_rd_iunit(char *page, char **start,
- loff_t off, int count,
- int *eof, void *data);
-int lprocfs_quota_wr_iunit(struct file *file, const char *buffer,
- unsigned long count, void *data);
-int lprocfs_quota_rd_itune(char *page, char **start,
- loff_t off, int count,
- int *eof, void *data);
-int lprocfs_quota_wr_itune(struct file *file, const char *buffer,
- unsigned long count, void *data);
-int lprocfs_quota_rd_type(char *page, char **start, loff_t off, int count,
- int *eof, void *data);
-int lprocfs_quota_wr_type(struct file *file, const char *buffer,
- unsigned long count, void *data);
-int lprocfs_quota_rd_switch_seconds(char *page, char **start, loff_t off,
- int count, int *eof, void *data);
-int lprocfs_quota_wr_switch_seconds(struct file *file,
- const char *buffer,
- unsigned long count, void *data);
-int lprocfs_quota_rd_sync_blk(char *page, char **start, loff_t off,
- int count, int *eof, void *data);
-int lprocfs_quota_wr_sync_blk(struct file *file, const char *buffer,
- unsigned long count, void *data);
-int lprocfs_quota_rd_switch_qs(char *page, char **start, loff_t off,
- int count, int *eof, void *data);
-int lprocfs_quota_wr_switch_qs(struct file *file,
- const char *buffer, unsigned long count,
- void *data);
-int lprocfs_quota_rd_boundary_factor(char *page, char **start, loff_t off,
- int count, int *eof, void *data);
-int lprocfs_quota_wr_boundary_factor(struct file *file,
- const char *buffer, unsigned long count,
- void *data);
-int lprocfs_quota_rd_least_bunit(char *page, char **start, loff_t off,
- int count, int *eof, void *data);
-int lprocfs_quota_wr_least_bunit(struct file *file,
- const char *buffer, unsigned long count,
- void *data);
-int lprocfs_quota_rd_least_iunit(char *page, char **start, loff_t off,
- int count, int *eof, void *data);
-int lprocfs_quota_wr_least_iunit(struct file *file,
- const char *buffer, unsigned long count,
- void *data);
-int lprocfs_quota_rd_qs_factor(char *page, char **start, loff_t off,
- int count, int *eof, void *data);
-int lprocfs_quota_wr_qs_factor(struct file *file,
- const char *buffer, unsigned long count,
- void *data);
-#endif /* LPROCFS_SNMP_H */
diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h
deleted file mode 100644
index f29bbca5af65..000000000000
--- a/drivers/staging/lustre/lustre/include/lu_object.h
+++ /dev/null
@@ -1,1305 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef __LUSTRE_LU_OBJECT_H
-#define __LUSTRE_LU_OBJECT_H
-
-#include <stdarg.h>
-#include <linux/percpu_counter.h>
-#include <linux/libcfs/libcfs.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-#include <lu_ref.h>
-
-struct seq_file;
-struct lustre_cfg;
-struct lprocfs_stats;
-
-/** \defgroup lu lu
- * lu_* data-types represent server-side entities shared by data and meta-data
- * stacks.
- *
- * Design goals:
- *
- * -# support for layering.
- *
- * Server side object is split into layers, one per device in the
- * corresponding device stack. Individual layer is represented by struct
- * lu_object. Compound layered object --- by struct lu_object_header. Most
- * interface functions take lu_object as an argument and operate on the
- * whole compound object. This decision was made due to the following
- * reasons:
- *
- * - it's envisaged that lu_object will be used much more often than
- * lu_object_header;
- *
- * - we want lower (non-top) layers to be able to initiate operations
- * on the whole object.
- *
- * Generic code supports layering more complex than simple stacking, e.g.,
- * it is possible that at some layer object "spawns" multiple sub-objects
- * on the lower layer.
- *
- * -# fid-based identification.
- *
- * Compound object is uniquely identified by its fid. Objects are indexed
- * by their fids (hash table is used for index).
- *
- * -# caching and life-cycle management.
- *
- * Object's life-time is controlled by reference counting. When reference
- * count drops to 0, object is returned to cache. Cached objects still
- * retain their identity (i.e., fid), and can be recovered from cache.
- *
- * Objects are kept in the global LRU list, and lu_site_purge() function
- * can be used to reclaim given number of unused objects from the tail of
- * the LRU.
- *
- * -# avoiding recursion.
- *
- * Generic code tries to replace recursion through layers by iterations
- * where possible. Additionally to the end of reducing stack consumption,
- * data, when practically possible, are allocated through lu_context_key
- * interface rather than on stack.
- * @{
- */
-
-struct lu_site;
-struct lu_object;
-struct lu_device;
-struct lu_object_header;
-struct lu_context;
-struct lu_env;
-
-/**
- * Operations common for data and meta-data devices.
- */
-struct lu_device_operations {
- /**
- * Allocate object for the given device (without lower-layer
- * parts). This is called by lu_object_operations::loo_object_init()
- * from the parent layer, and should setup at least lu_object::lo_dev
- * and lu_object::lo_ops fields of resulting lu_object.
- *
- * Object creation protocol.
- *
- * Due to design goal of avoiding recursion, object creation (see
- * lu_object_alloc()) is somewhat involved:
- *
- * - first, lu_device_operations::ldo_object_alloc() method of the
- * top-level device in the stack is called. It should allocate top
- * level object (including lu_object_header), but without any
- * lower-layer sub-object(s).
- *
- * - then lu_object_alloc() sets fid in the header of newly created
- * object.
- *
- * - then lu_object_operations::loo_object_init() is called. It has
- * to allocate lower-layer object(s). To do this,
- * lu_object_operations::loo_object_init() calls ldo_object_alloc()
- * of the lower-layer device(s).
- *
- * - for all new objects allocated by
- * lu_object_operations::loo_object_init() (and inserted into object
- * stack), lu_object_operations::loo_object_init() is called again
- * repeatedly, until no new objects are created.
- *
- * \post ergo(!IS_ERR(result), result->lo_dev == d &&
- * result->lo_ops != NULL);
- */
- struct lu_object *(*ldo_object_alloc)(const struct lu_env *env,
- const struct lu_object_header *h,
- struct lu_device *d);
- /**
- * process config specific for device.
- */
- int (*ldo_process_config)(const struct lu_env *env,
- struct lu_device *, struct lustre_cfg *);
- int (*ldo_recovery_complete)(const struct lu_env *,
- struct lu_device *);
-
- /**
- * initialize local objects for device. this method called after layer
- * has been initialized (after LCFG_SETUP stage) and before it starts
- * serving user requests.
- */
-
- int (*ldo_prepare)(const struct lu_env *,
- struct lu_device *parent,
- struct lu_device *dev);
-
-};
-
-/**
- * For lu_object_conf flags
- */
-enum loc_flags {
- /* This is a new object to be allocated, or the file
- * corresponding to the object does not exists.
- */
- LOC_F_NEW = 0x00000001,
-};
-
-/**
- * Object configuration, describing particulars of object being created. On
- * server this is not used, as server objects are full identified by fid. On
- * client configuration contains struct lustre_md.
- */
-struct lu_object_conf {
- /**
- * Some hints for obj find and alloc.
- */
- enum loc_flags loc_flags;
-};
-
-/**
- * Type of "printer" function used by lu_object_operations::loo_object_print()
- * method.
- *
- * Printer function is needed to provide some flexibility in (semi-)debugging
- * output: possible implementations: printk, CDEBUG, sysfs/seq_file
- */
-typedef int (*lu_printer_t)(const struct lu_env *env,
- void *cookie, const char *format, ...)
- __printf(3, 4);
-
-/**
- * Operations specific for particular lu_object.
- */
-struct lu_object_operations {
- /**
- * Allocate lower-layer parts of the object by calling
- * lu_device_operations::ldo_object_alloc() of the corresponding
- * underlying device.
- *
- * This method is called once for each object inserted into object
- * stack. It's responsibility of this method to insert lower-layer
- * object(s) it create into appropriate places of object stack.
- */
- int (*loo_object_init)(const struct lu_env *env,
- struct lu_object *o,
- const struct lu_object_conf *conf);
- /**
- * Called (in top-to-bottom order) during object allocation after all
- * layers were allocated and initialized. Can be used to perform
- * initialization depending on lower layers.
- */
- int (*loo_object_start)(const struct lu_env *env,
- struct lu_object *o);
- /**
- * Called before lu_object_operations::loo_object_free() to signal
- * that object is being destroyed. Dual to
- * lu_object_operations::loo_object_init().
- */
- void (*loo_object_delete)(const struct lu_env *env,
- struct lu_object *o);
- /**
- * Dual to lu_device_operations::ldo_object_alloc(). Called when
- * object is removed from memory.
- */
- void (*loo_object_free)(const struct lu_env *env,
- struct lu_object *o);
- /**
- * Called when last active reference to the object is released (and
- * object returns to the cache). This method is optional.
- */
- void (*loo_object_release)(const struct lu_env *env,
- struct lu_object *o);
- /**
- * Optional debugging helper. Print given object.
- */
- int (*loo_object_print)(const struct lu_env *env, void *cookie,
- lu_printer_t p, const struct lu_object *o);
- /**
- * Optional debugging method. Returns true iff method is internally
- * consistent.
- */
- int (*loo_object_invariant)(const struct lu_object *o);
-};
-
-/**
- * Type of lu_device.
- */
-struct lu_device_type;
-
-/**
- * Device: a layer in the server side abstraction stacking.
- */
-struct lu_device {
- /**
- * reference count. This is incremented, in particular, on each object
- * created at this layer.
- *
- * \todo XXX which means that atomic_t is probably too small.
- */
- atomic_t ld_ref;
- /**
- * Pointer to device type. Never modified once set.
- */
- struct lu_device_type *ld_type;
- /**
- * Operation vector for this device.
- */
- const struct lu_device_operations *ld_ops;
- /**
- * Stack this device belongs to.
- */
- struct lu_site *ld_site;
-
- /** \todo XXX: temporary back pointer into obd. */
- struct obd_device *ld_obd;
- /**
- * A list of references to this object, for debugging.
- */
- struct lu_ref ld_reference;
- /**
- * Link the device to the site.
- **/
- struct list_head ld_linkage;
-};
-
-struct lu_device_type_operations;
-
-/**
- * Tag bits for device type. They are used to distinguish certain groups of
- * device types.
- */
-enum lu_device_tag {
- /** this is meta-data device */
- LU_DEVICE_MD = (1 << 0),
- /** this is data device */
- LU_DEVICE_DT = (1 << 1),
- /** data device in the client stack */
- LU_DEVICE_CL = (1 << 2)
-};
-
-/**
- * Type of device.
- */
-struct lu_device_type {
- /**
- * Tag bits. Taken from enum lu_device_tag. Never modified once set.
- */
- __u32 ldt_tags;
- /**
- * Name of this class. Unique system-wide. Never modified once set.
- */
- char *ldt_name;
- /**
- * Operations for this type.
- */
- const struct lu_device_type_operations *ldt_ops;
- /**
- * \todo XXX: temporary pointer to associated obd_type.
- */
- struct obd_type *ldt_obd_type;
- /**
- * \todo XXX: temporary: context tags used by obd_*() calls.
- */
- __u32 ldt_ctx_tags;
- /**
- * Number of existing device type instances.
- */
- atomic_t ldt_device_nr;
- /**
- * Linkage into a global list of all device types.
- *
- * \see lu_device_types.
- */
- struct list_head ldt_linkage;
-};
-
-/**
- * Operations on a device type.
- */
-struct lu_device_type_operations {
- /**
- * Allocate new device.
- */
- struct lu_device *(*ldto_device_alloc)(const struct lu_env *env,
- struct lu_device_type *t,
- struct lustre_cfg *lcfg);
- /**
- * Free device. Dual to
- * lu_device_type_operations::ldto_device_alloc(). Returns pointer to
- * the next device in the stack.
- */
- struct lu_device *(*ldto_device_free)(const struct lu_env *,
- struct lu_device *);
-
- /**
- * Initialize the devices after allocation
- */
- int (*ldto_device_init)(const struct lu_env *env,
- struct lu_device *, const char *,
- struct lu_device *);
- /**
- * Finalize device. Dual to
- * lu_device_type_operations::ldto_device_init(). Returns pointer to
- * the next device in the stack.
- */
- struct lu_device *(*ldto_device_fini)(const struct lu_env *env,
- struct lu_device *);
- /**
- * Initialize device type. This is called on module load.
- */
- int (*ldto_init)(struct lu_device_type *t);
- /**
- * Finalize device type. Dual to
- * lu_device_type_operations::ldto_init(). Called on module unload.
- */
- void (*ldto_fini)(struct lu_device_type *t);
- /**
- * Called when the first device is created.
- */
- void (*ldto_start)(struct lu_device_type *t);
- /**
- * Called when number of devices drops to 0.
- */
- void (*ldto_stop)(struct lu_device_type *t);
-};
-
-static inline int lu_device_is_md(const struct lu_device *d)
-{
- return ergo(d, d->ld_type->ldt_tags & LU_DEVICE_MD);
-}
-
-/**
- * Common object attributes.
- */
-struct lu_attr {
- /** size in bytes */
- __u64 la_size;
- /** modification time in seconds since Epoch */
- s64 la_mtime;
- /** access time in seconds since Epoch */
- s64 la_atime;
- /** change time in seconds since Epoch */
- s64 la_ctime;
- /** 512-byte blocks allocated to object */
- __u64 la_blocks;
- /** permission bits and file type */
- __u32 la_mode;
- /** owner id */
- __u32 la_uid;
- /** group id */
- __u32 la_gid;
- /** object flags */
- __u32 la_flags;
- /** number of persistent references to this object */
- __u32 la_nlink;
- /** blk bits of the object*/
- __u32 la_blkbits;
- /** blk size of the object*/
- __u32 la_blksize;
- /** real device */
- __u32 la_rdev;
- /**
- * valid bits
- *
- * \see enum la_valid
- */
- __u64 la_valid;
-};
-
-/** Bit-mask of valid attributes */
-enum la_valid {
- LA_ATIME = 1 << 0,
- LA_MTIME = 1 << 1,
- LA_CTIME = 1 << 2,
- LA_SIZE = 1 << 3,
- LA_MODE = 1 << 4,
- LA_UID = 1 << 5,
- LA_GID = 1 << 6,
- LA_BLOCKS = 1 << 7,
- LA_TYPE = 1 << 8,
- LA_FLAGS = 1 << 9,
- LA_NLINK = 1 << 10,
- LA_RDEV = 1 << 11,
- LA_BLKSIZE = 1 << 12,
- LA_KILL_SUID = 1 << 13,
- LA_KILL_SGID = 1 << 14,
-};
-
-/**
- * Layer in the layered object.
- */
-struct lu_object {
- /**
- * Header for this object.
- */
- struct lu_object_header *lo_header;
- /**
- * Device for this layer.
- */
- struct lu_device *lo_dev;
- /**
- * Operations for this object.
- */
- const struct lu_object_operations *lo_ops;
- /**
- * Linkage into list of all layers.
- */
- struct list_head lo_linkage;
- /**
- * Link to the device, for debugging.
- */
- struct lu_ref_link lo_dev_ref;
-};
-
-enum lu_object_header_flags {
- /**
- * Don't keep this object in cache. Object will be destroyed as soon
- * as last reference to it is released. This flag cannot be cleared
- * once set.
- */
- LU_OBJECT_HEARD_BANSHEE = 0,
- /**
- * Mark this object has already been taken out of cache.
- */
- LU_OBJECT_UNHASHED = 1,
-};
-
-enum lu_object_header_attr {
- LOHA_EXISTS = 1 << 0,
- LOHA_REMOTE = 1 << 1,
- /**
- * UNIX file type is stored in S_IFMT bits.
- */
- LOHA_FT_START = 001 << 12, /**< S_IFIFO */
- LOHA_FT_END = 017 << 12, /**< S_IFMT */
-};
-
-/**
- * "Compound" object, consisting of multiple layers.
- *
- * Compound object with given fid is unique with given lu_site.
- *
- * Note, that object does *not* necessary correspond to the real object in the
- * persistent storage: object is an anchor for locking and method calling, so
- * it is created for things like not-yet-existing child created by mkdir or
- * create calls. lu_object_operations::loo_exists() can be used to check
- * whether object is backed by persistent storage entity.
- */
-struct lu_object_header {
- /**
- * Fid, uniquely identifying this object.
- */
- struct lu_fid loh_fid;
- /**
- * Object flags from enum lu_object_header_flags. Set and checked
- * atomically.
- */
- unsigned long loh_flags;
- /**
- * Object reference count. Protected by lu_site::ls_guard.
- */
- atomic_t loh_ref;
- /**
- * Common object attributes, cached for efficiency. From enum
- * lu_object_header_attr.
- */
- __u32 loh_attr;
- /**
- * Linkage into per-site hash table. Protected by lu_site::ls_guard.
- */
- struct hlist_node loh_hash;
- /**
- * Linkage into per-site LRU list. Protected by lu_site::ls_guard.
- */
- struct list_head loh_lru;
- /**
- * Linkage into list of layers. Never modified once set (except lately
- * during object destruction). No locking is necessary.
- */
- struct list_head loh_layers;
- /**
- * A list of references to this object, for debugging.
- */
- struct lu_ref loh_reference;
-};
-
-struct fld;
-struct lu_site_bkt_data;
-
-enum {
- LU_SS_CREATED = 0,
- LU_SS_CACHE_HIT,
- LU_SS_CACHE_MISS,
- LU_SS_CACHE_RACE,
- LU_SS_CACHE_DEATH_RACE,
- LU_SS_LRU_PURGED,
- LU_SS_LAST_STAT
-};
-
-/**
- * lu_site is a "compartment" within which objects are unique, and LRU
- * discipline is maintained.
- *
- * lu_site exists so that multiple layered stacks can co-exist in the same
- * address space.
- *
- * lu_site has the same relation to lu_device as lu_object_header to
- * lu_object.
- */
-struct lu_site {
- /**
- * objects hash table
- */
- struct cfs_hash *ls_obj_hash;
- /**
- * index of bucket on hash table while purging
- */
- unsigned int ls_purge_start;
- /**
- * Top-level device for this stack.
- */
- struct lu_device *ls_top_dev;
- /**
- * Bottom-level device for this stack
- */
- struct lu_device *ls_bottom_dev;
- /**
- * Linkage into global list of sites.
- */
- struct list_head ls_linkage;
- /**
- * List for lu device for this site, protected
- * by ls_ld_lock.
- **/
- struct list_head ls_ld_linkage;
- spinlock_t ls_ld_lock;
-
- /**
- * Lock to serialize site purge.
- */
- struct mutex ls_purge_mutex;
-
- /**
- * lu_site stats
- */
- struct lprocfs_stats *ls_stats;
- /**
- * XXX: a hack! fld has to find md_site via site, remove when possible
- */
- struct seq_server_site *ld_seq_site;
- /**
- * Number of objects in lsb_lru_lists - used for shrinking
- */
- struct percpu_counter ls_lru_len_counter;
-};
-
-wait_queue_head_t *
-lu_site_wq_from_fid(struct lu_site *site, struct lu_fid *fid);
-
-static inline struct seq_server_site *lu_site2seq(const struct lu_site *s)
-{
- return s->ld_seq_site;
-}
-
-/** \name ctors
- * Constructors/destructors.
- * @{
- */
-
-int lu_site_init(struct lu_site *s, struct lu_device *d);
-void lu_site_fini(struct lu_site *s);
-int lu_site_init_finish(struct lu_site *s);
-void lu_stack_fini(const struct lu_env *env, struct lu_device *top);
-void lu_device_get(struct lu_device *d);
-void lu_device_put(struct lu_device *d);
-int lu_device_init(struct lu_device *d, struct lu_device_type *t);
-void lu_device_fini(struct lu_device *d);
-int lu_object_header_init(struct lu_object_header *h);
-void lu_object_header_fini(struct lu_object_header *h);
-int lu_object_init(struct lu_object *o,
- struct lu_object_header *h, struct lu_device *d);
-void lu_object_fini(struct lu_object *o);
-void lu_object_add_top(struct lu_object_header *h, struct lu_object *o);
-void lu_object_add(struct lu_object *before, struct lu_object *o);
-
-/**
- * Helpers to initialize and finalize device types.
- */
-
-int lu_device_type_init(struct lu_device_type *ldt);
-void lu_device_type_fini(struct lu_device_type *ldt);
-
-/** @} ctors */
-
-/** \name caching
- * Caching and reference counting.
- * @{
- */
-
-/**
- * Acquire additional reference to the given object. This function is used to
- * attain additional reference. To acquire initial reference use
- * lu_object_find().
- */
-static inline void lu_object_get(struct lu_object *o)
-{
- LASSERT(atomic_read(&o->lo_header->loh_ref) > 0);
- atomic_inc(&o->lo_header->loh_ref);
-}
-
-/**
- * Return true of object will not be cached after last reference to it is
- * released.
- */
-static inline int lu_object_is_dying(const struct lu_object_header *h)
-{
- return test_bit(LU_OBJECT_HEARD_BANSHEE, &h->loh_flags);
-}
-
-void lu_object_put(const struct lu_env *env, struct lu_object *o);
-void lu_object_unhash(const struct lu_env *env, struct lu_object *o);
-int lu_site_purge_objects(const struct lu_env *env, struct lu_site *s, int nr,
- bool canblock);
-
-static inline int lu_site_purge(const struct lu_env *env, struct lu_site *s,
- int nr)
-{
- return lu_site_purge_objects(env, s, nr, true);
-}
-
-void lu_site_print(const struct lu_env *env, struct lu_site *s, void *cookie,
- lu_printer_t printer);
-struct lu_object *lu_object_find_at(const struct lu_env *env,
- struct lu_device *dev,
- const struct lu_fid *f,
- const struct lu_object_conf *conf);
-struct lu_object *lu_object_find_slice(const struct lu_env *env,
- struct lu_device *dev,
- const struct lu_fid *f,
- const struct lu_object_conf *conf);
-/** @} caching */
-
-/** \name helpers
- * Helpers.
- * @{
- */
-
-/**
- * First (topmost) sub-object of given compound object
- */
-static inline struct lu_object *lu_object_top(struct lu_object_header *h)
-{
- LASSERT(!list_empty(&h->loh_layers));
- return list_first_entry(&h->loh_layers, struct lu_object, lo_linkage);
-}
-
-/**
- * Next sub-object in the layering
- */
-static inline const struct lu_object *lu_object_next(const struct lu_object *o)
-{
- return list_next_entry(o, lo_linkage);
-}
-
-/**
- * Pointer to the fid of this object.
- */
-static inline const struct lu_fid *lu_object_fid(const struct lu_object *o)
-{
- return &o->lo_header->loh_fid;
-}
-
-/**
- * return device operations vector for this object
- */
-static inline const struct lu_device_operations *
-lu_object_ops(const struct lu_object *o)
-{
- return o->lo_dev->ld_ops;
-}
-
-/**
- * Given a compound object, find its slice, corresponding to the device type
- * \a dtype.
- */
-struct lu_object *lu_object_locate(struct lu_object_header *h,
- const struct lu_device_type *dtype);
-
-/**
- * Printer function emitting messages through libcfs_debug_msg().
- */
-int lu_cdebug_printer(const struct lu_env *env,
- void *cookie, const char *format, ...);
-
-/**
- * Print object description followed by a user-supplied message.
- */
-#define LU_OBJECT_DEBUG(mask, env, object, format, ...) \
-do { \
- if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
- lu_object_print(env, &msgdata, lu_cdebug_printer, object);\
- CDEBUG(mask, format "\n", ## __VA_ARGS__); \
- } \
-} while (0)
-
-/**
- * Print short object description followed by a user-supplied message.
- */
-#define LU_OBJECT_HEADER(mask, env, object, format, ...) \
-do { \
- if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
- lu_object_header_print(env, &msgdata, lu_cdebug_printer,\
- (object)->lo_header); \
- lu_cdebug_printer(env, &msgdata, "\n"); \
- CDEBUG(mask, format, ## __VA_ARGS__); \
- } \
-} while (0)
-
-void lu_object_print (const struct lu_env *env, void *cookie,
- lu_printer_t printer, const struct lu_object *o);
-void lu_object_header_print(const struct lu_env *env, void *cookie,
- lu_printer_t printer,
- const struct lu_object_header *hdr);
-
-/**
- * Check object consistency.
- */
-int lu_object_invariant(const struct lu_object *o);
-
-/**
- * Check whether object exists, no matter on local or remote storage.
- * Note: LOHA_EXISTS will be set once some one created the object,
- * and it does not needs to be committed to storage.
- */
-#define lu_object_exists(o) ((o)->lo_header->loh_attr & LOHA_EXISTS)
-
-/**
- * Check whether object on the remote storage.
- */
-#define lu_object_remote(o) unlikely((o)->lo_header->loh_attr & LOHA_REMOTE)
-
-static inline int lu_object_assert_exists(const struct lu_object *o)
-{
- return lu_object_exists(o);
-}
-
-static inline int lu_object_assert_not_exists(const struct lu_object *o)
-{
- return !lu_object_exists(o);
-}
-
-/**
- * Attr of this object.
- */
-static inline __u32 lu_object_attr(const struct lu_object *o)
-{
- LASSERT(lu_object_exists(o) != 0);
- return o->lo_header->loh_attr;
-}
-
-static inline void lu_object_ref_add(struct lu_object *o,
- const char *scope,
- const void *source)
-{
- lu_ref_add(&o->lo_header->loh_reference, scope, source);
-}
-
-static inline void lu_object_ref_add_at(struct lu_object *o,
- struct lu_ref_link *link,
- const char *scope,
- const void *source)
-{
- lu_ref_add_at(&o->lo_header->loh_reference, link, scope, source);
-}
-
-static inline void lu_object_ref_del(struct lu_object *o,
- const char *scope, const void *source)
-{
- lu_ref_del(&o->lo_header->loh_reference, scope, source);
-}
-
-static inline void lu_object_ref_del_at(struct lu_object *o,
- struct lu_ref_link *link,
- const char *scope, const void *source)
-{
- lu_ref_del_at(&o->lo_header->loh_reference, link, scope, source);
-}
-
-/** input params, should be filled out by mdt */
-struct lu_rdpg {
- /** hash */
- __u64 rp_hash;
- /** count in bytes */
- unsigned int rp_count;
- /** number of pages */
- unsigned int rp_npages;
- /** requested attr */
- __u32 rp_attrs;
- /** pointers to pages */
- struct page **rp_pages;
-};
-
-enum lu_xattr_flags {
- LU_XATTR_REPLACE = (1 << 0),
- LU_XATTR_CREATE = (1 << 1)
-};
-
-/** @} helpers */
-
-/** \name lu_context
- * @{
- */
-
-/** For lu_context health-checks */
-enum lu_context_state {
- LCS_INITIALIZED = 1,
- LCS_ENTERED,
- LCS_LEFT,
- LCS_FINALIZED
-};
-
-/**
- * lu_context. Execution context for lu_object methods. Currently associated
- * with thread.
- *
- * All lu_object methods, except device and device type methods (called during
- * system initialization and shutdown) are executed "within" some
- * lu_context. This means, that pointer to some "current" lu_context is passed
- * as an argument to all methods.
- *
- * All service ptlrpc threads create lu_context as part of their
- * initialization. It is possible to create "stand-alone" context for other
- * execution environments (like system calls).
- *
- * lu_object methods mainly use lu_context through lu_context_key interface
- * that allows each layer to associate arbitrary pieces of data with each
- * context (see pthread_key_create(3) for similar interface).
- *
- * On a client, lu_context is bound to a thread, see cl_env_get().
- *
- * \see lu_context_key
- */
-struct lu_context {
- /**
- * lu_context is used on the client side too. Yet we don't want to
- * allocate values of server-side keys for the client contexts and
- * vice versa.
- *
- * To achieve this, set of tags in introduced. Contexts and keys are
- * marked with tags. Key value are created only for context whose set
- * of tags has non-empty intersection with one for key. Tags are taken
- * from enum lu_context_tag.
- */
- __u32 lc_tags;
- enum lu_context_state lc_state;
- /**
- * Pointer to the home service thread. NULL for other execution
- * contexts.
- */
- struct ptlrpc_thread *lc_thread;
- /**
- * Pointer to an array with key values. Internal implementation
- * detail.
- */
- void **lc_value;
- /**
- * Linkage into a list of all remembered contexts. Only
- * `non-transient' contexts, i.e., ones created for service threads
- * are placed here.
- */
- struct list_head lc_remember;
- /**
- * Version counter used to skip calls to lu_context_refill() when no
- * keys were registered.
- */
- unsigned int lc_version;
- /**
- * Debugging cookie.
- */
- unsigned int lc_cookie;
-};
-
-/**
- * lu_context_key interface. Similar to pthread_key.
- */
-
-enum lu_context_tag {
- /**
- * Thread on md server
- */
- LCT_MD_THREAD = 1 << 0,
- /**
- * Thread on dt server
- */
- LCT_DT_THREAD = 1 << 1,
- /**
- * Context for transaction handle
- */
- LCT_TX_HANDLE = 1 << 2,
- /**
- * Thread on client
- */
- LCT_CL_THREAD = 1 << 3,
- /**
- * A per-request session on a server, and a per-system-call session on
- * a client.
- */
- LCT_SESSION = 1 << 4,
- /**
- * A per-request data on OSP device
- */
- LCT_OSP_THREAD = 1 << 5,
- /**
- * MGS device thread
- */
- LCT_MG_THREAD = 1 << 6,
- /**
- * Context for local operations
- */
- LCT_LOCAL = 1 << 7,
- /**
- * session for server thread
- **/
- LCT_SERVER_SESSION = BIT(8),
- /**
- * Set when at least one of keys, having values in this context has
- * non-NULL lu_context_key::lct_exit() method. This is used to
- * optimize lu_context_exit() call.
- */
- LCT_HAS_EXIT = 1 << 28,
- /**
- * Don't add references for modules creating key values in that context.
- * This is only for contexts used internally by lu_object framework.
- */
- LCT_NOREF = 1 << 29,
- /**
- * Key is being prepared for retiring, don't create new values for it.
- */
- LCT_QUIESCENT = 1 << 30,
- /**
- * Context should be remembered.
- */
- LCT_REMEMBER = 1 << 31,
- /**
- * Contexts usable in cache shrinker thread.
- */
- LCT_SHRINKER = LCT_MD_THREAD | LCT_DT_THREAD | LCT_CL_THREAD |
- LCT_NOREF
-};
-
-/**
- * Key. Represents per-context value slot.
- *
- * Keys are usually registered when module owning the key is initialized, and
- * de-registered when module is unloaded. Once key is registered, all new
- * contexts with matching tags, will get key value. "Old" contexts, already
- * initialized at the time of key registration, can be forced to get key value
- * by calling lu_context_refill().
- *
- * Every key value is counted in lu_context_key::lct_used and acquires a
- * reference on an owning module. This means, that all key values have to be
- * destroyed before module can be unloaded. This is usually achieved by
- * stopping threads started by the module, that created contexts in their
- * entry functions. Situation is complicated by the threads shared by multiple
- * modules, like ptlrpcd daemon on a client. To work around this problem,
- * contexts, created in such threads, are `remembered' (see
- * LCT_REMEMBER)---i.e., added into a global list. When module is preparing
- * for unloading it does the following:
- *
- * - marks its keys as `quiescent' (lu_context_tag::LCT_QUIESCENT)
- * preventing new key values from being allocated in the new contexts,
- * and
- *
- * - scans a list of remembered contexts, destroying values of module
- * keys, thus releasing references to the module.
- *
- * This is done by lu_context_key_quiesce(). If module is re-activated
- * before key has been de-registered, lu_context_key_revive() call clears
- * `quiescent' marker.
- *
- * lu_context code doesn't provide any internal synchronization for these
- * activities---it's assumed that startup (including threads start-up) and
- * shutdown are serialized by some external means.
- *
- * \see lu_context
- */
-struct lu_context_key {
- /**
- * Set of tags for which values of this key are to be instantiated.
- */
- __u32 lct_tags;
- /**
- * Value constructor. This is called when new value is created for a
- * context. Returns pointer to new value of error pointer.
- */
- void *(*lct_init)(const struct lu_context *ctx,
- struct lu_context_key *key);
- /**
- * Value destructor. Called when context with previously allocated
- * value of this slot is destroyed. \a data is a value that was returned
- * by a matching call to lu_context_key::lct_init().
- */
- void (*lct_fini)(const struct lu_context *ctx,
- struct lu_context_key *key, void *data);
- /**
- * Optional method called on lu_context_exit() for all allocated
- * keys. Can be used by debugging code checking that locks are
- * released, etc.
- */
- void (*lct_exit)(const struct lu_context *ctx,
- struct lu_context_key *key, void *data);
- /**
- * Internal implementation detail: index within lu_context::lc_value[]
- * reserved for this key.
- */
- int lct_index;
- /**
- * Internal implementation detail: number of values created for this
- * key.
- */
- atomic_t lct_used;
- /**
- * Internal implementation detail: module for this key.
- */
- struct module *lct_owner;
- /**
- * References to this key. For debugging.
- */
- struct lu_ref lct_reference;
-};
-
-#define LU_KEY_INIT(mod, type) \
- static void *mod##_key_init(const struct lu_context *ctx, \
- struct lu_context_key *key) \
- { \
- type *value; \
- \
- BUILD_BUG_ON(sizeof(*value) > PAGE_SIZE); \
- \
- value = kzalloc(sizeof(*value), GFP_NOFS); \
- if (!value) \
- value = ERR_PTR(-ENOMEM); \
- \
- return value; \
- } \
- struct __##mod##__dummy_init {; } /* semicolon catcher */
-
-#define LU_KEY_FINI(mod, type) \
- static void mod##_key_fini(const struct lu_context *ctx, \
- struct lu_context_key *key, void *data) \
- { \
- type *info = data; \
- \
- kfree(info); \
- } \
- struct __##mod##__dummy_fini {; } /* semicolon catcher */
-
-#define LU_KEY_INIT_FINI(mod, type) \
- LU_KEY_INIT(mod, type); \
- LU_KEY_FINI(mod, type)
-
-#define LU_CONTEXT_KEY_DEFINE(mod, tags) \
- struct lu_context_key mod##_thread_key = { \
- .lct_tags = tags, \
- .lct_init = mod##_key_init, \
- .lct_fini = mod##_key_fini \
- }
-
-#define LU_CONTEXT_KEY_INIT(key) \
-do { \
- (key)->lct_owner = THIS_MODULE; \
-} while (0)
-
-int lu_context_key_register(struct lu_context_key *key);
-void lu_context_key_degister(struct lu_context_key *key);
-void *lu_context_key_get(const struct lu_context *ctx,
- const struct lu_context_key *key);
-void lu_context_key_quiesce(struct lu_context_key *key);
-void lu_context_key_revive(struct lu_context_key *key);
-
-/*
- * LU_KEY_INIT_GENERIC() has to be a macro to correctly determine an
- * owning module.
- */
-
-#define LU_KEY_INIT_GENERIC(mod) \
- static void mod##_key_init_generic(struct lu_context_key *k, ...) \
- { \
- struct lu_context_key *key = k; \
- va_list args; \
- \
- va_start(args, k); \
- do { \
- LU_CONTEXT_KEY_INIT(key); \
- key = va_arg(args, struct lu_context_key *); \
- } while (key); \
- va_end(args); \
- }
-
-#define LU_TYPE_INIT(mod, ...) \
- LU_KEY_INIT_GENERIC(mod) \
- static int mod##_type_init(struct lu_device_type *t) \
- { \
- mod##_key_init_generic(__VA_ARGS__, NULL); \
- return lu_context_key_register_many(__VA_ARGS__, NULL); \
- } \
- struct __##mod##_dummy_type_init {; }
-
-#define LU_TYPE_FINI(mod, ...) \
- static void mod##_type_fini(struct lu_device_type *t) \
- { \
- lu_context_key_degister_many(__VA_ARGS__, NULL); \
- } \
- struct __##mod##_dummy_type_fini {; }
-
-#define LU_TYPE_START(mod, ...) \
- static void mod##_type_start(struct lu_device_type *t) \
- { \
- lu_context_key_revive_many(__VA_ARGS__, NULL); \
- } \
- struct __##mod##_dummy_type_start {; }
-
-#define LU_TYPE_STOP(mod, ...) \
- static void mod##_type_stop(struct lu_device_type *t) \
- { \
- lu_context_key_quiesce_many(__VA_ARGS__, NULL); \
- } \
- struct __##mod##_dummy_type_stop {; }
-
-#define LU_TYPE_INIT_FINI(mod, ...) \
- LU_TYPE_INIT(mod, __VA_ARGS__); \
- LU_TYPE_FINI(mod, __VA_ARGS__); \
- LU_TYPE_START(mod, __VA_ARGS__); \
- LU_TYPE_STOP(mod, __VA_ARGS__)
-
-int lu_context_init(struct lu_context *ctx, __u32 tags);
-void lu_context_fini(struct lu_context *ctx);
-void lu_context_enter(struct lu_context *ctx);
-void lu_context_exit(struct lu_context *ctx);
-int lu_context_refill(struct lu_context *ctx);
-
-/*
- * Helper functions to operate on multiple keys. These are used by the default
- * device type operations, defined by LU_TYPE_INIT_FINI().
- */
-
-int lu_context_key_register_many(struct lu_context_key *k, ...);
-void lu_context_key_degister_many(struct lu_context_key *k, ...);
-void lu_context_key_revive_many(struct lu_context_key *k, ...);
-void lu_context_key_quiesce_many(struct lu_context_key *k, ...);
-
-/**
- * Environment.
- */
-struct lu_env {
- /**
- * "Local" context, used to store data instead of stack.
- */
- struct lu_context le_ctx;
- /**
- * "Session" context for per-request data.
- */
- struct lu_context *le_ses;
-};
-
-int lu_env_init(struct lu_env *env, __u32 tags);
-void lu_env_fini(struct lu_env *env);
-int lu_env_refill(struct lu_env *env);
-
-/** @} lu_context */
-
-/**
- * Output site statistical counters into a buffer. Suitable for
- * ll_rd_*()-style functions.
- */
-int lu_site_stats_print(const struct lu_site *s, struct seq_file *m);
-
-/**
- * Common name structure to be passed around for various name related methods.
- */
-struct lu_name {
- const char *ln_name;
- int ln_namelen;
-};
-
-/**
- * Validate names (path components)
- *
- * To be valid \a name must be non-empty, '\0' terminated of length \a
- * name_len, and not contain '/'. The maximum length of a name (before
- * say -ENAMETOOLONG will be returned) is really controlled by llite
- * and the server. We only check for something insane coming from bad
- * integer handling here.
- */
-static inline bool lu_name_is_valid_2(const char *name, size_t name_len)
-{
- return name && name_len > 0 && name_len < INT_MAX &&
- name[name_len] == '\0' && strlen(name) == name_len &&
- !memchr(name, '/', name_len);
-}
-
-/**
- * Common buffer structure to be passed around for various xattr_{s,g}et()
- * methods.
- */
-struct lu_buf {
- void *lb_buf;
- size_t lb_len;
-};
-
-/**
- * One-time initializers, called at obdclass module initialization, not
- * exported.
- */
-
-/**
- * Initialization of global lu_* data.
- */
-int lu_global_init(void);
-
-/**
- * Dual to lu_global_init().
- */
-void lu_global_fini(void);
-
-struct lu_kmem_descr {
- struct kmem_cache **ckd_cache;
- const char *ckd_name;
- const size_t ckd_size;
-};
-
-int lu_kmem_init(struct lu_kmem_descr *caches);
-void lu_kmem_fini(struct lu_kmem_descr *caches);
-
-extern __u32 lu_context_tags_default;
-extern __u32 lu_session_tags_default;
-
-/** @} lu */
-#endif /* __LUSTRE_LU_OBJECT_H */
diff --git a/drivers/staging/lustre/lustre/include/lu_ref.h b/drivers/staging/lustre/lustre/include/lu_ref.h
deleted file mode 100644
index ad0c24d29ffa..000000000000
--- a/drivers/staging/lustre/lustre/include/lu_ref.h
+++ /dev/null
@@ -1,178 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef __LUSTRE_LU_REF_H
-#define __LUSTRE_LU_REF_H
-
-#include <linux/list.h>
-
-/** \defgroup lu_ref lu_ref
- *
- * An interface to track references between objects. Mostly for debugging.
- *
- * Suppose there is a reference counted data-structure struct foo. To track
- * who acquired references to instance of struct foo, add lu_ref field to it:
- *
- * \code
- * struct foo {
- * atomic_t foo_refcount;
- * struct lu_ref foo_reference;
- * ...
- * };
- * \endcode
- *
- * foo::foo_reference has to be initialized by calling
- * lu_ref_init(). Typically there will be functions or macros to increment and
- * decrement foo::foo_refcount, let's say they are foo_get(struct foo *foo)
- * and foo_put(struct foo *foo), respectively.
- *
- * Whenever foo_get() is called to acquire a reference on a foo, lu_ref_add()
- * has to be called to insert into foo::foo_reference a record, describing
- * acquired reference. Dually, lu_ref_del() removes matching record. Typical
- * usages are:
- *
- * \code
- * struct bar *bar;
- *
- * // bar owns a reference to foo.
- * bar->bar_foo = foo_get(foo);
- * lu_ref_add(&foo->foo_reference, "bar", bar);
- *
- * ...
- *
- * // reference from bar to foo is released.
- * lu_ref_del(&foo->foo_reference, "bar", bar);
- * foo_put(bar->bar_foo);
- *
- *
- * // current thread acquired a temporary reference to foo.
- * foo_get(foo);
- * lu_ref_add(&foo->reference, __func__, current);
- *
- * ...
- *
- * // temporary reference is released.
- * lu_ref_del(&foo->reference, __func__, current);
- * foo_put(foo);
- * \endcode
- *
- * \e Et \e cetera. Often it makes sense to include lu_ref_add() and
- * lu_ref_del() calls into foo_get() and foo_put(). When an instance of struct
- * foo is destroyed, lu_ref_fini() has to be called that checks that no
- * pending references remain. lu_ref_print() can be used to dump a list of
- * pending references, while hunting down a leak.
- *
- * For objects to which a large number of references can be acquired,
- * lu_ref_del() can become cpu consuming, as it has to scan the list of
- * references. To work around this, remember result of lu_ref_add() (usually
- * in the same place where pointer to struct foo is stored), and use
- * lu_ref_del_at():
- *
- * \code
- * // There is a large number of bar's for a single foo.
- * bar->bar_foo = foo_get(foo);
- * bar->bar_foo_ref = lu_ref_add(&foo->foo_reference, "bar", bar);
- *
- * ...
- *
- * // reference from bar to foo is released.
- * lu_ref_del_at(&foo->foo_reference, bar->bar_foo_ref, "bar", bar);
- * foo_put(bar->bar_foo);
- * \endcode
- *
- * lu_ref interface degrades gracefully in case of memory shortages.
- *
- * @{
- */
-
-/*
- * dummy data structures/functions to pass compile for now.
- * We need to reimplement them with kref.
- */
-struct lu_ref {};
-struct lu_ref_link {};
-
-static inline void lu_ref_init(struct lu_ref *ref)
-{
-}
-
-static inline void lu_ref_fini(struct lu_ref *ref)
-{
-}
-
-static inline struct lu_ref_link *lu_ref_add(struct lu_ref *ref,
- const char *scope,
- const void *source)
-{
- return NULL;
-}
-
-static inline struct lu_ref_link *lu_ref_add_atomic(struct lu_ref *ref,
- const char *scope,
- const void *source)
-{
- return NULL;
-}
-
-static inline void lu_ref_add_at(struct lu_ref *ref,
- struct lu_ref_link *link,
- const char *scope,
- const void *source)
-{
-}
-
-static inline void lu_ref_del(struct lu_ref *ref, const char *scope,
- const void *source)
-{
-}
-
-static inline void lu_ref_set_at(struct lu_ref *ref, struct lu_ref_link *link,
- const char *scope, const void *source0,
- const void *source1)
-{
-}
-
-static inline void lu_ref_del_at(struct lu_ref *ref, struct lu_ref_link *link,
- const char *scope, const void *source)
-{
-}
-
-static inline int lu_ref_global_init(void)
-{
- return 0;
-}
-
-static inline void lu_ref_global_fini(void)
-{
-}
-
-static inline void lu_ref_print(const struct lu_ref *ref)
-{
-}
-
-static inline void lu_ref_print_all(void)
-{
-}
-
-/** @} lu */
-
-#endif /* __LUSTRE_LU_REF_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_acl.h b/drivers/staging/lustre/lustre/include/lustre_acl.h
deleted file mode 100644
index e7575a172b5f..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_acl.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/include/lustre_acl.h
- */
-
-#ifndef _LUSTRE_ACL_H
-#define _LUSTRE_ACL_H
-
-#include <linux/fs.h>
-#include <linux/dcache.h>
-#ifdef CONFIG_FS_POSIX_ACL
-#include <linux/posix_acl_xattr.h>
-
-#define LUSTRE_POSIX_ACL_MAX_ENTRIES 32
-#define LUSTRE_POSIX_ACL_MAX_SIZE_OLD \
- (sizeof(struct posix_acl_xattr_header) + \
- LUSTRE_POSIX_ACL_MAX_ENTRIES * sizeof(struct posix_acl_xattr_entry))
-
-#else /* ! CONFIG_FS_POSIX_ACL */
-#define LUSTRE_POSIX_ACL_MAX_SIZE_OLD 0
-#endif /* CONFIG_FS_POSIX_ACL */
-
-#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_compat.h b/drivers/staging/lustre/lustre/include/lustre_compat.h
deleted file mode 100644
index 3c6db0d632dc..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_compat.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef _LUSTRE_COMPAT_H
-#define _LUSTRE_COMPAT_H
-
-#include <linux/fs_struct.h>
-#include <linux/namei.h>
-#include <linux/cred.h>
-#include <linux/module.h>
-
-#include <lustre_patchless_compat.h>
-
-/*
- * set ATTR_BLOCKS to a high value to avoid any risk of collision with other
- * ATTR_* attributes (see bug 13828)
- */
-#define ATTR_BLOCKS (1 << 27)
-
-#define current_ngroups current_cred()->group_info->ngroups
-#define current_groups current_cred()->group_info->small_block
-
-/*
- * OBD need working random driver, thus all our
- * initialization routines must be called after device
- * driver initialization
- */
-#ifndef MODULE
-#undef module_init
-#define module_init(a) late_initcall(a)
-#endif
-
-#define LTIME_S(time) (time.tv_sec)
-
-#ifndef QUOTA_OK
-# define QUOTA_OK 0
-#endif
-#ifndef NO_QUOTA
-# define NO_QUOTA (-EDQUOT)
-#endif
-
-#if !defined(_ASM_GENERIC_BITOPS_EXT2_NON_ATOMIC_H_) && !defined(ext2_set_bit)
-# define ext2_set_bit __test_and_set_bit_le
-# define ext2_clear_bit __test_and_clear_bit_le
-# define ext2_test_bit test_bit_le
-# define ext2_find_first_zero_bit find_first_zero_bit_le
-# define ext2_find_next_zero_bit find_next_zero_bit_le
-#endif
-
-#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
-
-#endif /* _LUSTRE_COMPAT_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_debug.h b/drivers/staging/lustre/lustre/include/lustre_debug.h
deleted file mode 100644
index 721a81f923e3..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_debug.h
+++ /dev/null
@@ -1,52 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef _LUSTRE_DEBUG_H
-#define _LUSTRE_DEBUG_H
-
-/** \defgroup debug debug
- *
- * @{
- */
-
-#include <lustre_net.h>
-#include <obd.h>
-
-/* lib/debug.c */
-int dump_req(struct ptlrpc_request *req);
-int block_debug_setup(void *addr, int len, __u64 off, __u64 id);
-int block_debug_check(char *who, void *addr, int len, __u64 off, __u64 id);
-
-/** @} debug */
-
-#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_disk.h b/drivers/staging/lustre/lustre/include/lustre_disk.h
deleted file mode 100644
index 886e817644d6..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_disk.h
+++ /dev/null
@@ -1,152 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/include/lustre_disk.h
- *
- * Lustre disk format definitions.
- *
- * Author: Nathan Rutman <nathan@clusterfs.com>
- */
-
-#ifndef _LUSTRE_DISK_H
-#define _LUSTRE_DISK_H
-
-/** \defgroup disk disk
- *
- * @{
- */
-
-#include <asm/byteorder.h>
-#include <linux/types.h>
-#include <linux/backing-dev.h>
-
-/****************** persistent mount data *********************/
-
-#define LDD_F_SV_TYPE_MDT 0x0001
-#define LDD_F_SV_TYPE_OST 0x0002
-#define LDD_F_SV_TYPE_MGS 0x0004
-#define LDD_F_SV_TYPE_MASK (LDD_F_SV_TYPE_MDT | \
- LDD_F_SV_TYPE_OST | \
- LDD_F_SV_TYPE_MGS)
-#define LDD_F_SV_ALL 0x0008
-
-/****************** mount command *********************/
-
-/* The lmd is only used internally by Lustre; mount simply passes
- * everything as string options
- */
-
-#define LMD_MAGIC 0xbdacbd03
-#define LMD_PARAMS_MAXLEN 4096
-
-/* gleaned from the mount command - no persistent info here */
-struct lustre_mount_data {
- __u32 lmd_magic;
- __u32 lmd_flags; /* lustre mount flags */
- int lmd_mgs_failnodes; /* mgs failover node count */
- int lmd_exclude_count;
- int lmd_recovery_time_soft;
- int lmd_recovery_time_hard;
- char *lmd_dev; /* device name */
- char *lmd_profile; /* client only */
- char *lmd_mgssec; /* sptlrpc flavor to mgs */
- char *lmd_opts; /* lustre mount options (as opposed to
- * _device_ mount options)
- */
- char *lmd_params; /* lustre params */
- __u32 *lmd_exclude; /* array of OSTs to ignore */
- char *lmd_mgs; /* MGS nid */
- char *lmd_osd_type; /* OSD type */
-};
-
-#define LMD_FLG_SERVER 0x0001 /* Mounting a server */
-#define LMD_FLG_CLIENT 0x0002 /* Mounting a client */
-#define LMD_FLG_ABORT_RECOV 0x0008 /* Abort recovery */
-#define LMD_FLG_NOSVC 0x0010 /* Only start MGS/MGC for servers,
- * no other services
- */
-#define LMD_FLG_NOMGS 0x0020 /* Only start target for servers,
- * reusing existing MGS services
- */
-#define LMD_FLG_WRITECONF 0x0040 /* Rewrite config log */
-#define LMD_FLG_NOIR 0x0080 /* NO imperative recovery */
-#define LMD_FLG_NOSCRUB 0x0100 /* Do not trigger scrub automatically */
-#define LMD_FLG_MGS 0x0200 /* Also start MGS along with server */
-#define LMD_FLG_IAM 0x0400 /* IAM dir */
-#define LMD_FLG_NO_PRIMNODE 0x0800 /* all nodes are service nodes */
-#define LMD_FLG_VIRGIN 0x1000 /* the service registers first time */
-#define LMD_FLG_UPDATE 0x2000 /* update parameters */
-#define LMD_FLG_HSM 0x4000 /* Start coordinator */
-
-#define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT)
-
-/****************** superblock additional info *********************/
-
-struct ll_sb_info;
-
-struct lustre_sb_info {
- int lsi_flags;
- struct obd_device *lsi_mgc; /* mgc obd */
- struct lustre_mount_data *lsi_lmd; /* mount command info */
- struct ll_sb_info *lsi_llsbi; /* add'l client sbi info */
- struct dt_device *lsi_dt_dev; /* dt device to access disk fs*/
- atomic_t lsi_mounts; /* references to the srv_mnt */
- char lsi_svname[MTI_NAME_MAXLEN];
- char lsi_osd_obdname[64];
- char lsi_osd_uuid[64];
- struct obd_export *lsi_osd_exp;
- char lsi_osd_type[16];
- char lsi_fstype[16];
-};
-
-#define LSI_UMOUNT_FAILOVER 0x00200000
-
-#define s2lsi(sb) ((struct lustre_sb_info *)((sb)->s_fs_info))
-#define s2lsi_nocast(sb) ((sb)->s_fs_info)
-
-#define get_profile_name(sb) (s2lsi(sb)->lsi_lmd->lmd_profile)
-
-/****************** prototypes *********************/
-
-/* obd_mount.c */
-
-int lustre_start_mgc(struct super_block *sb);
-void lustre_register_super_ops(struct module *mod,
- int (*cfs)(struct super_block *sb),
- void (*ksc)(struct super_block *sb));
-int lustre_common_put_super(struct super_block *sb);
-
-int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id, int type);
-
-/** @} disk */
-
-#endif /* _LUSTRE_DISK_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h
deleted file mode 100644
index 2c55241258cc..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_dlm.h
+++ /dev/null
@@ -1,1346 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2010, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-/** \defgroup LDLM Lustre Distributed Lock Manager
- *
- * Lustre DLM is based on VAX DLM.
- * Its two main roles are:
- * - To provide locking assuring consistency of data on all Lustre nodes.
- * - To allow clients to cache state protected by a lock by holding the
- * lock until a conflicting lock is requested or it is expired by the LRU.
- *
- * @{
- */
-
-#ifndef _LUSTRE_DLM_H__
-#define _LUSTRE_DLM_H__
-
-#include <lustre_lib.h>
-#include <lustre_net.h>
-#include <lustre_import.h>
-#include <lustre_handles.h>
-#include <interval_tree.h> /* for interval_node{}, ldlm_extent */
-#include <lu_ref.h>
-
-#include "lustre_dlm_flags.h"
-
-struct obd_ops;
-struct obd_device;
-
-#define OBD_LDLM_DEVICENAME "ldlm"
-
-#define LDLM_DEFAULT_LRU_SIZE (100 * num_online_cpus())
-#define LDLM_DEFAULT_MAX_ALIVE (65 * 60 * HZ) /* 65 min */
-#define LDLM_DEFAULT_PARALLEL_AST_LIMIT 1024
-
-/**
- * LDLM non-error return states
- */
-enum ldlm_error {
- ELDLM_OK = 0,
- ELDLM_LOCK_MATCHED = 1,
-
- ELDLM_LOCK_CHANGED = 300,
- ELDLM_LOCK_ABORTED = 301,
- ELDLM_LOCK_REPLACED = 302,
- ELDLM_NO_LOCK_DATA = 303,
- ELDLM_LOCK_WOULDBLOCK = 304,
-
- ELDLM_NAMESPACE_EXISTS = 400,
- ELDLM_BAD_NAMESPACE = 401
-};
-
-/**
- * LDLM namespace type.
- * The "client" type is actually an indication that this is a narrow local view
- * into complete namespace on the server. Such namespaces cannot make any
- * decisions about lack of conflicts or do any autonomous lock granting without
- * first speaking to a server.
- */
-enum ldlm_side {
- LDLM_NAMESPACE_SERVER = 1 << 0,
- LDLM_NAMESPACE_CLIENT = 1 << 1
-};
-
-/**
- * The blocking callback is overloaded to perform two functions. These flags
- * indicate which operation should be performed.
- */
-#define LDLM_CB_BLOCKING 1
-#define LDLM_CB_CANCELING 2
-
-/**
- * \name Lock Compatibility Matrix.
- *
- * A lock has both a type (extent, flock, inode bits, or plain) and a mode.
- * Lock types are described in their respective implementation files:
- * ldlm_{extent,flock,inodebits,plain}.c.
- *
- * There are six lock modes along with a compatibility matrix to indicate if
- * two locks are compatible.
- *
- * - EX: Exclusive mode. Before a new file is created, MDS requests EX lock
- * on the parent.
- * - PW: Protective Write (normal write) mode. When a client requests a write
- * lock from an OST, a lock with PW mode will be issued.
- * - PR: Protective Read (normal read) mode. When a client requests a read from
- * an OST, a lock with PR mode will be issued. Also, if the client opens a
- * file for execution, it is granted a lock with PR mode.
- * - CW: Concurrent Write mode. The type of lock that the MDS grants if a client
- * requests a write lock during a file open operation.
- * - CR Concurrent Read mode. When a client performs a path lookup, MDS grants
- * an inodebit lock with the CR mode on the intermediate path component.
- * - NL Null mode.
- *
- * <PRE>
- * NL CR CW PR PW EX
- * NL 1 1 1 1 1 1
- * CR 1 1 1 1 1 0
- * CW 1 1 1 0 0 0
- * PR 1 1 0 1 0 0
- * PW 1 1 0 0 0 0
- * EX 1 0 0 0 0 0
- * </PRE>
- */
-/** @{ */
-#define LCK_COMPAT_EX LCK_NL
-#define LCK_COMPAT_PW (LCK_COMPAT_EX | LCK_CR)
-#define LCK_COMPAT_PR (LCK_COMPAT_PW | LCK_PR)
-#define LCK_COMPAT_CW (LCK_COMPAT_PW | LCK_CW)
-#define LCK_COMPAT_CR (LCK_COMPAT_CW | LCK_PR | LCK_PW)
-#define LCK_COMPAT_NL (LCK_COMPAT_CR | LCK_EX | LCK_GROUP)
-#define LCK_COMPAT_GROUP (LCK_GROUP | LCK_NL)
-#define LCK_COMPAT_COS (LCK_COS)
-/** @} Lock Compatibility Matrix */
-
-extern enum ldlm_mode lck_compat_array[];
-
-static inline void lockmode_verify(enum ldlm_mode mode)
-{
- LASSERT(mode > LCK_MINMODE && mode < LCK_MAXMODE);
-}
-
-static inline int lockmode_compat(enum ldlm_mode exist_mode,
- enum ldlm_mode new_mode)
-{
- return (lck_compat_array[exist_mode] & new_mode);
-}
-
-/*
- *
- * cluster name spaces
- *
- */
-
-#define DLM_OST_NAMESPACE 1
-#define DLM_MDS_NAMESPACE 2
-
-/* XXX
- - do we just separate this by security domains and use a prefix for
- multiple namespaces in the same domain?
- -
-*/
-
-/**
- * Locking rules for LDLM:
- *
- * lr_lock
- *
- * lr_lock
- * waiting_locks_spinlock
- *
- * lr_lock
- * led_lock
- *
- * lr_lock
- * ns_lock
- *
- * lr_lvb_mutex
- * lr_lock
- *
- */
-
-struct ldlm_pool;
-struct ldlm_lock;
-struct ldlm_resource;
-struct ldlm_namespace;
-
-/**
- * Operations on LDLM pools.
- * LDLM pool is a pool of locks in the namespace without any implicitly
- * specified limits.
- * Locks in the pool are organized in LRU.
- * Local memory pressure or server instructions (e.g. mempressure on server)
- * can trigger freeing of locks from the pool
- */
-struct ldlm_pool_ops {
- /** Recalculate pool \a pl usage */
- int (*po_recalc)(struct ldlm_pool *pl);
- /** Cancel at least \a nr locks from pool \a pl */
- int (*po_shrink)(struct ldlm_pool *pl, int nr,
- gfp_t gfp_mask);
-};
-
-/** One second for pools thread check interval. Each pool has own period. */
-#define LDLM_POOLS_THREAD_PERIOD (1)
-
-/** ~6% margin for modest pools. See ldlm_pool.c for details. */
-#define LDLM_POOLS_MODEST_MARGIN_SHIFT (4)
-
-/** Default recalc period for server side pools in sec. */
-#define LDLM_POOL_SRV_DEF_RECALC_PERIOD (1)
-
-/** Default recalc period for client side pools in sec. */
-#define LDLM_POOL_CLI_DEF_RECALC_PERIOD (10)
-
-/**
- * LDLM pool structure to track granted locks.
- * For purposes of determining when to release locks on e.g. memory pressure.
- * This feature is commonly referred to as lru_resize.
- */
-struct ldlm_pool {
- /** Pool debugfs directory. */
- struct dentry *pl_debugfs_entry;
- /** Pool name, must be long enough to hold compound proc entry name. */
- char pl_name[100];
- /** Lock for protecting SLV/CLV updates. */
- spinlock_t pl_lock;
- /** Number of allowed locks in in pool, both, client and server side. */
- atomic_t pl_limit;
- /** Number of granted locks in */
- atomic_t pl_granted;
- /** Grant rate per T. */
- atomic_t pl_grant_rate;
- /** Cancel rate per T. */
- atomic_t pl_cancel_rate;
- /** Server lock volume (SLV). Protected by pl_lock. */
- __u64 pl_server_lock_volume;
- /** Current biggest client lock volume. Protected by pl_lock. */
- __u64 pl_client_lock_volume;
- /** Lock volume factor. SLV on client is calculated as following:
- * server_slv * lock_volume_factor.
- */
- atomic_t pl_lock_volume_factor;
- /** Time when last SLV from server was obtained. */
- time64_t pl_recalc_time;
- /** Recalculation period for pool. */
- time64_t pl_recalc_period;
- /** Recalculation and shrink operations. */
- const struct ldlm_pool_ops *pl_ops;
- /** Number of planned locks for next period. */
- int pl_grant_plan;
- /** Pool statistics. */
- struct lprocfs_stats *pl_stats;
-
- /* sysfs object */
- struct kobject pl_kobj;
- struct completion pl_kobj_unregister;
-};
-
-typedef int (*ldlm_cancel_cbt)(struct ldlm_lock *lock);
-
-/**
- * LVB operations.
- * LVB is Lock Value Block. This is a special opaque (to LDLM) value that could
- * be associated with an LDLM lock and transferred from client to server and
- * back.
- *
- * Currently LVBs are used by:
- * - OSC-OST code to maintain current object size/times
- * - layout lock code to return the layout when the layout lock is granted
- */
-struct ldlm_valblock_ops {
- int (*lvbo_init)(struct ldlm_resource *res);
- int (*lvbo_update)(struct ldlm_resource *res,
- struct ptlrpc_request *r,
- int increase);
- int (*lvbo_free)(struct ldlm_resource *res);
- /* Return size of lvb data appropriate RPC size can be reserved */
- int (*lvbo_size)(struct ldlm_lock *lock);
- /* Called to fill in lvb data to RPC buffer @buf */
- int (*lvbo_fill)(struct ldlm_lock *lock, void *buf, int buflen);
-};
-
-/**
- * LDLM pools related, type of lock pool in the namespace.
- * Greedy means release cached locks aggressively
- */
-enum ldlm_appetite {
- LDLM_NAMESPACE_GREEDY = 1 << 0,
- LDLM_NAMESPACE_MODEST = 1 << 1
-};
-
-struct ldlm_ns_bucket {
- /** back pointer to namespace */
- struct ldlm_namespace *nsb_namespace;
- /**
- * Estimated lock callback time. Used by adaptive timeout code to
- * avoid spurious client evictions due to unresponsiveness when in
- * fact the network or overall system load is at fault
- */
- struct adaptive_timeout nsb_at_estimate;
-};
-
-enum {
- /** LDLM namespace lock stats */
- LDLM_NSS_LOCKS = 0,
- LDLM_NSS_LAST
-};
-
-enum ldlm_ns_type {
- /** invalid type */
- LDLM_NS_TYPE_UNKNOWN = 0,
- /** mdc namespace */
- LDLM_NS_TYPE_MDC,
- /** mds namespace */
- LDLM_NS_TYPE_MDT,
- /** osc namespace */
- LDLM_NS_TYPE_OSC,
- /** ost namespace */
- LDLM_NS_TYPE_OST,
- /** mgc namespace */
- LDLM_NS_TYPE_MGC,
- /** mgs namespace */
- LDLM_NS_TYPE_MGT,
-};
-
-/**
- * LDLM Namespace.
- *
- * Namespace serves to contain locks related to a particular service.
- * There are two kinds of namespaces:
- * - Server namespace has knowledge of all locks and is therefore authoritative
- * to make decisions like what locks could be granted and what conflicts
- * exist during new lock enqueue.
- * - Client namespace only has limited knowledge about locks in the namespace,
- * only seeing locks held by the client.
- *
- * Every Lustre service has one server namespace present on the server serving
- * that service. Every client connected to the service has a client namespace
- * for it.
- * Every lock obtained by client in that namespace is actually represented by
- * two in-memory locks. One on the server and one on the client. The locks are
- * linked by a special cookie by which one node can tell to the other which lock
- * it actually means during communications. Such locks are called remote locks.
- * The locks held by server only without any reference to a client are called
- * local locks.
- */
-struct ldlm_namespace {
- /** Backward link to OBD, required for LDLM pool to store new SLV. */
- struct obd_device *ns_obd;
-
- /** Flag indicating if namespace is on client instead of server */
- enum ldlm_side ns_client;
-
- /** name of this namespace */
- char *ns_name;
-
- /** Resource hash table for namespace. */
- struct cfs_hash *ns_rs_hash;
-
- /** serialize */
- spinlock_t ns_lock;
-
- /** big refcount (by bucket) */
- atomic_t ns_bref;
-
- /**
- * Namespace connect flags supported by server (may be changed via
- * sysfs, LRU resize may be disabled/enabled).
- */
- __u64 ns_connect_flags;
-
- /** Client side original connect flags supported by server. */
- __u64 ns_orig_connect_flags;
-
- /* namespace debugfs dir entry */
- struct dentry *ns_debugfs_entry;
-
- /**
- * Position in global namespace list linking all namespaces on
- * the node.
- */
- struct list_head ns_list_chain;
-
- /**
- * List of unused locks for this namespace. This list is also called
- * LRU lock list.
- * Unused locks are locks with zero reader/writer reference counts.
- * This list is only used on clients for lock caching purposes.
- * When we want to release some locks voluntarily or if server wants
- * us to release some locks due to e.g. memory pressure, we take locks
- * to release from the head of this list.
- * Locks are linked via l_lru field in \see struct ldlm_lock.
- */
- struct list_head ns_unused_list;
- /** Number of locks in the LRU list above */
- int ns_nr_unused;
-
- /**
- * Maximum number of locks permitted in the LRU. If 0, means locks
- * are managed by pools and there is no preset limit, rather it is all
- * controlled by available memory on this client and on server.
- */
- unsigned int ns_max_unused;
- /** Maximum allowed age (last used time) for locks in the LRU */
- unsigned int ns_max_age;
-
- /**
- * Used to rate-limit ldlm_namespace_dump calls.
- * \see ldlm_namespace_dump. Increased by 10 seconds every time
- * it is called.
- */
- unsigned long ns_next_dump;
-
- /**
- * LVB operations for this namespace.
- * \see struct ldlm_valblock_ops
- */
- struct ldlm_valblock_ops *ns_lvbo;
-
- /**
- * Used by filter code to store pointer to OBD of the service.
- * Should be dropped in favor of \a ns_obd
- */
- void *ns_lvbp;
-
- /**
- * Wait queue used by __ldlm_namespace_free. Gets woken up every time
- * a resource is removed.
- */
- wait_queue_head_t ns_waitq;
- /** LDLM pool structure for this namespace */
- struct ldlm_pool ns_pool;
- /** Definition of how eagerly unused locks will be released from LRU */
- enum ldlm_appetite ns_appetite;
-
- /** Limit of parallel AST RPC count. */
- unsigned ns_max_parallel_ast;
-
- /**
- * Callback to check if a lock is good to be canceled by ELC or
- * during recovery.
- */
- ldlm_cancel_cbt ns_cancel;
-
- /** LDLM lock stats */
- struct lprocfs_stats *ns_stats;
-
- /**
- * Flag to indicate namespace is being freed. Used to determine if
- * recalculation of LDLM pool statistics should be skipped.
- */
- unsigned ns_stopping:1;
-
- struct kobject ns_kobj; /* sysfs object */
- struct completion ns_kobj_unregister;
-};
-
-/**
- * Returns 1 if namespace \a ns supports early lock cancel (ELC).
- */
-static inline int ns_connect_cancelset(struct ldlm_namespace *ns)
-{
- return !!(ns->ns_connect_flags & OBD_CONNECT_CANCELSET);
-}
-
-/**
- * Returns 1 if this namespace supports lru_resize.
- */
-static inline int ns_connect_lru_resize(struct ldlm_namespace *ns)
-{
- return !!(ns->ns_connect_flags & OBD_CONNECT_LRU_RESIZE);
-}
-
-static inline void ns_register_cancel(struct ldlm_namespace *ns,
- ldlm_cancel_cbt arg)
-{
- ns->ns_cancel = arg;
-}
-
-struct ldlm_lock;
-
-/** Type for blocking callback function of a lock. */
-typedef int (*ldlm_blocking_callback)(struct ldlm_lock *lock,
- struct ldlm_lock_desc *new, void *data,
- int flag);
-/** Type for completion callback function of a lock. */
-typedef int (*ldlm_completion_callback)(struct ldlm_lock *lock, __u64 flags,
- void *data);
-/** Type for glimpse callback function of a lock. */
-typedef int (*ldlm_glimpse_callback)(struct ldlm_lock *lock, void *data);
-
-/** Work list for sending GL ASTs to multiple locks. */
-struct ldlm_glimpse_work {
- struct ldlm_lock *gl_lock; /* lock to glimpse */
- struct list_head gl_list; /* linkage to other gl work structs */
- __u32 gl_flags;/* see LDLM_GL_WORK_* below */
- union ldlm_gl_desc *gl_desc; /* glimpse descriptor to be packed in
- * glimpse callback request
- */
-};
-
-/** The ldlm_glimpse_work is allocated on the stack and should not be freed. */
-#define LDLM_GL_WORK_NOFREE 0x1
-
-/** Interval node data for each LDLM_EXTENT lock. */
-struct ldlm_interval {
- struct interval_node li_node; /* node for tree management */
- struct list_head li_group; /* the locks which have the same
- * policy - group of the policy
- */
-};
-
-#define to_ldlm_interval(n) container_of(n, struct ldlm_interval, li_node)
-
-/**
- * Interval tree for extent locks.
- * The interval tree must be accessed under the resource lock.
- * Interval trees are used for granted extent locks to speed up conflicts
- * lookup. See ldlm/interval_tree.c for more details.
- */
-struct ldlm_interval_tree {
- /** Tree size. */
- int lit_size;
- enum ldlm_mode lit_mode; /* lock mode */
- struct interval_node *lit_root; /* actual ldlm_interval */
-};
-
-/** Whether to track references to exports by LDLM locks. */
-#define LUSTRE_TRACKS_LOCK_EXP_REFS (0)
-
-/** Cancel flags. */
-enum ldlm_cancel_flags {
- LCF_ASYNC = 0x1, /* Cancel locks asynchronously. */
- LCF_LOCAL = 0x2, /* Cancel locks locally, not notifing server */
- LCF_BL_AST = 0x4, /* Cancel locks marked as LDLM_FL_BL_AST
- * in the same RPC
- */
-};
-
-struct ldlm_flock {
- __u64 start;
- __u64 end;
- __u64 owner;
- __u64 blocking_owner;
- struct obd_export *blocking_export;
- __u32 pid;
-};
-
-union ldlm_policy_data {
- struct ldlm_extent l_extent;
- struct ldlm_flock l_flock;
- struct ldlm_inodebits l_inodebits;
-};
-
-void ldlm_convert_policy_to_local(struct obd_export *exp, enum ldlm_type type,
- const union ldlm_wire_policy_data *wpolicy,
- union ldlm_policy_data *lpolicy);
-
-enum lvb_type {
- LVB_T_NONE = 0,
- LVB_T_OST = 1,
- LVB_T_LQUOTA = 2,
- LVB_T_LAYOUT = 3,
-};
-
-/**
- * LDLM_GID_ANY is used to match any group id in ldlm_lock_match().
- */
-#define LDLM_GID_ANY ((__u64)-1)
-
-/**
- * LDLM lock structure
- *
- * Represents a single LDLM lock and its state in memory. Each lock is
- * associated with a single ldlm_resource, the object which is being
- * locked. There may be multiple ldlm_locks on a single resource,
- * depending on the lock type and whether the locks are conflicting or
- * not.
- */
-struct ldlm_lock {
- /**
- * Local lock handle.
- * When remote side wants to tell us about a lock, they address
- * it by this opaque handle. The handle does not hold a
- * reference on the ldlm_lock, so it can be safely passed to
- * other threads or nodes. When the lock needs to be accessed
- * from the handle, it is looked up again in the lock table, and
- * may no longer exist.
- *
- * Must be first in the structure.
- */
- struct portals_handle l_handle;
- /**
- * Lock reference count.
- * This is how many users have pointers to actual structure, so that
- * we do not accidentally free lock structure that is in use.
- */
- atomic_t l_refc;
- /**
- * Internal spinlock protects l_resource. We should hold this lock
- * first before taking res_lock.
- */
- spinlock_t l_lock;
- /**
- * Pointer to actual resource this lock is in.
- * ldlm_lock_change_resource() can change this.
- */
- struct ldlm_resource *l_resource;
- /**
- * List item for client side LRU list.
- * Protected by ns_lock in struct ldlm_namespace.
- */
- struct list_head l_lru;
- /**
- * Linkage to resource's lock queues according to current lock state.
- * (could be granted, waiting or converting)
- * Protected by lr_lock in struct ldlm_resource.
- */
- struct list_head l_res_link;
- /**
- * Tree node for ldlm_extent.
- */
- struct ldlm_interval *l_tree_node;
- /**
- * Requested mode.
- * Protected by lr_lock.
- */
- enum ldlm_mode l_req_mode;
- /**
- * Granted mode, also protected by lr_lock.
- */
- enum ldlm_mode l_granted_mode;
- /** Lock completion handler pointer. Called when lock is granted. */
- ldlm_completion_callback l_completion_ast;
- /**
- * Lock blocking AST handler pointer.
- * It plays two roles:
- * - as a notification of an attempt to queue a conflicting lock (once)
- * - as a notification when the lock is being cancelled.
- *
- * As such it's typically called twice: once for the initial conflict
- * and then once more when the last user went away and the lock is
- * cancelled (could happen recursively).
- */
- ldlm_blocking_callback l_blocking_ast;
- /**
- * Lock glimpse handler.
- * Glimpse handler is used to obtain LVB updates from a client by
- * server
- */
- ldlm_glimpse_callback l_glimpse_ast;
-
- /**
- * Lock export.
- * This is a pointer to actual client export for locks that were granted
- * to clients. Used server-side.
- */
- struct obd_export *l_export;
- /**
- * Lock connection export.
- * Pointer to server export on a client.
- */
- struct obd_export *l_conn_export;
-
- /**
- * Remote lock handle.
- * If the lock is remote, this is the handle of the other side lock
- * (l_handle)
- */
- struct lustre_handle l_remote_handle;
-
- /**
- * Representation of private data specific for a lock type.
- * Examples are: extent range for extent lock or bitmask for ibits locks
- */
- union ldlm_policy_data l_policy_data;
-
- /**
- * Lock state flags. Protected by lr_lock.
- * \see lustre_dlm_flags.h where the bits are defined.
- */
- __u64 l_flags;
-
- /**
- * Lock r/w usage counters.
- * Protected by lr_lock.
- */
- __u32 l_readers;
- __u32 l_writers;
- /**
- * If the lock is granted, a process sleeps on this waitq to learn when
- * it's no longer in use. If the lock is not granted, a process sleeps
- * on this waitq to learn when it becomes granted.
- */
- wait_queue_head_t l_waitq;
-
- /**
- * Seconds. It will be updated if there is any activity related to
- * the lock, e.g. enqueue the lock or send blocking AST.
- */
- time64_t l_last_activity;
-
- /**
- * Time last used by e.g. being matched by lock match.
- * Jiffies. Should be converted to time if needed.
- */
- unsigned long l_last_used;
-
- /** Originally requested extent for the extent lock. */
- struct ldlm_extent l_req_extent;
-
- /*
- * Client-side-only members.
- */
-
- enum lvb_type l_lvb_type;
-
- /**
- * Temporary storage for a LVB received during an enqueue operation.
- */
- __u32 l_lvb_len;
- void *l_lvb_data;
-
- /** Private storage for lock user. Opaque to LDLM. */
- void *l_ast_data;
-
- /*
- * Server-side-only members.
- */
-
- /**
- * Connection cookie for the client originating the operation.
- * Used by Commit on Share (COS) code. Currently only used for
- * inodebits locks on MDS.
- */
- __u64 l_client_cookie;
-
- /**
- * List item for locks waiting for cancellation from clients.
- * The lists this could be linked into are:
- * waiting_locks_list (protected by waiting_locks_spinlock),
- * then if the lock timed out, it is moved to
- * expired_lock_thread.elt_expired_locks for further processing.
- * Protected by elt_lock.
- */
- struct list_head l_pending_chain;
-
- /**
- * Set when lock is sent a blocking AST. Time in seconds when timeout
- * is reached and client holding this lock could be evicted.
- * This timeout could be further extended by e.g. certain IO activity
- * under this lock.
- * \see ost_rw_prolong_locks
- */
- unsigned long l_callback_timeout;
-
- /** Local PID of process which created this lock. */
- __u32 l_pid;
-
- /**
- * Number of times blocking AST was sent for this lock.
- * This is for debugging. Valid values are 0 and 1, if there is an
- * attempt to send blocking AST more than once, an assertion would be
- * hit. \see ldlm_work_bl_ast_lock
- */
- int l_bl_ast_run;
- /** List item ldlm_add_ast_work_item() for case of blocking ASTs. */
- struct list_head l_bl_ast;
- /** List item ldlm_add_ast_work_item() for case of completion ASTs. */
- struct list_head l_cp_ast;
- /** For ldlm_add_ast_work_item() for "revoke" AST used in COS. */
- struct list_head l_rk_ast;
-
- /**
- * Pointer to a conflicting lock that caused blocking AST to be sent
- * for this lock
- */
- struct ldlm_lock *l_blocking_lock;
-
- /**
- * Protected by lr_lock, linkages to "skip lists".
- * For more explanations of skip lists see ldlm/ldlm_inodebits.c
- */
- struct list_head l_sl_mode;
- struct list_head l_sl_policy;
-
- /** Reference tracking structure to debug leaked locks. */
- struct lu_ref l_reference;
-#if LUSTRE_TRACKS_LOCK_EXP_REFS
- /* Debugging stuff for bug 20498, for tracking export references. */
- /** number of export references taken */
- int l_exp_refs_nr;
- /** link all locks referencing one export */
- struct list_head l_exp_refs_link;
- /** referenced export object */
- struct obd_export *l_exp_refs_target;
-#endif
-};
-
-/**
- * LDLM resource description.
- * Basically, resource is a representation for a single object.
- * Object has a name which is currently 4 64-bit integers. LDLM user is
- * responsible for creation of a mapping between objects it wants to be
- * protected and resource names.
- *
- * A resource can only hold locks of a single lock type, though there may be
- * multiple ldlm_locks on a single resource, depending on the lock type and
- * whether the locks are conflicting or not.
- */
-struct ldlm_resource {
- struct ldlm_ns_bucket *lr_ns_bucket;
-
- /**
- * List item for list in namespace hash.
- * protected by ns_lock
- */
- struct hlist_node lr_hash;
-
- /** Spinlock to protect locks under this resource. */
- spinlock_t lr_lock;
-
- /**
- * protected by lr_lock
- * @{
- */
- /** List of locks in granted state */
- struct list_head lr_granted;
- /**
- * List of locks that could not be granted due to conflicts and
- * that are waiting for conflicts to go away
- */
- struct list_head lr_waiting;
- /** @} */
-
- /** Type of locks this resource can hold. Only one type per resource. */
- enum ldlm_type lr_type; /* LDLM_{PLAIN,EXTENT,FLOCK,IBITS} */
-
- /** Resource name */
- struct ldlm_res_id lr_name;
- /** Reference count for this resource */
- atomic_t lr_refcount;
-
- /**
- * Interval trees (only for extent locks) for all modes of this resource
- */
- struct ldlm_interval_tree lr_itree[LCK_MODE_NUM];
-
- /**
- * Server-side-only lock value block elements.
- * To serialize lvbo_init.
- */
- struct mutex lr_lvb_mutex;
- int lr_lvb_len;
-
- /** When the resource was considered as contended. */
- unsigned long lr_contention_time;
- /** List of references to this resource. For debugging. */
- struct lu_ref lr_reference;
-
- struct inode *lr_lvb_inode;
-};
-
-static inline bool ldlm_has_layout(struct ldlm_lock *lock)
-{
- return lock->l_resource->lr_type == LDLM_IBITS &&
- lock->l_policy_data.l_inodebits.bits & MDS_INODELOCK_LAYOUT;
-}
-
-static inline char *
-ldlm_ns_name(struct ldlm_namespace *ns)
-{
- return ns->ns_name;
-}
-
-static inline struct ldlm_namespace *
-ldlm_res_to_ns(struct ldlm_resource *res)
-{
- return res->lr_ns_bucket->nsb_namespace;
-}
-
-static inline struct ldlm_namespace *
-ldlm_lock_to_ns(struct ldlm_lock *lock)
-{
- return ldlm_res_to_ns(lock->l_resource);
-}
-
-static inline char *
-ldlm_lock_to_ns_name(struct ldlm_lock *lock)
-{
- return ldlm_ns_name(ldlm_lock_to_ns(lock));
-}
-
-static inline struct adaptive_timeout *
-ldlm_lock_to_ns_at(struct ldlm_lock *lock)
-{
- return &lock->l_resource->lr_ns_bucket->nsb_at_estimate;
-}
-
-static inline int ldlm_lvbo_init(struct ldlm_resource *res)
-{
- struct ldlm_namespace *ns = ldlm_res_to_ns(res);
-
- if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init)
- return ns->ns_lvbo->lvbo_init(res);
-
- return 0;
-}
-
-static inline int ldlm_lvbo_size(struct ldlm_lock *lock)
-{
- struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
-
- if (ns->ns_lvbo && ns->ns_lvbo->lvbo_size)
- return ns->ns_lvbo->lvbo_size(lock);
-
- return 0;
-}
-
-static inline int ldlm_lvbo_fill(struct ldlm_lock *lock, void *buf, int len)
-{
- struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
-
- if (ns->ns_lvbo)
- return ns->ns_lvbo->lvbo_fill(lock, buf, len);
-
- return 0;
-}
-
-struct ldlm_ast_work {
- struct ldlm_lock *w_lock;
- int w_blocking;
- struct ldlm_lock_desc w_desc;
- struct list_head w_list;
- int w_flags;
- void *w_data;
- int w_datalen;
-};
-
-/**
- * Common ldlm_enqueue parameters
- */
-struct ldlm_enqueue_info {
- enum ldlm_type ei_type; /** Type of the lock being enqueued. */
- enum ldlm_mode ei_mode; /** Mode of the lock being enqueued. */
- void *ei_cb_bl; /** blocking lock callback */
- void *ei_cb_cp; /** lock completion callback */
- void *ei_cb_gl; /** lock glimpse callback */
- void *ei_cbdata; /** Data to be passed into callbacks. */
- unsigned int ei_enq_slave:1; /* whether enqueue slave stripes */
-};
-
-extern struct obd_ops ldlm_obd_ops;
-
-extern char *ldlm_lockname[];
-const char *ldlm_it2str(enum ldlm_intent_flags it);
-
-/**
- * Just a fancy CDEBUG call with log level preset to LDLM_DEBUG.
- * For the cases where we do not have actual lock to print along
- * with a debugging message that is ldlm-related
- */
-#define LDLM_DEBUG_NOLOCK(format, a...) \
- CDEBUG(D_DLMTRACE, "### " format "\n", ##a)
-
-/**
- * Support function for lock information printing into debug logs.
- * \see LDLM_DEBUG
- */
-#define ldlm_lock_debug(msgdata, mask, cdls, lock, fmt, a...) do { \
- CFS_CHECK_STACK(msgdata, mask, cdls); \
- \
- if (((mask) & D_CANTMASK) != 0 || \
- ((libcfs_debug & (mask)) != 0 && \
- (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) \
- _ldlm_lock_debug(lock, msgdata, fmt, ##a); \
-} while (0)
-
-void _ldlm_lock_debug(struct ldlm_lock *lock,
- struct libcfs_debug_msg_data *data,
- const char *fmt, ...)
- __printf(3, 4);
-
-/**
- * Rate-limited version of lock printing function.
- */
-#define LDLM_DEBUG_LIMIT(mask, lock, fmt, a...) do { \
- static struct cfs_debug_limit_state _ldlm_cdls; \
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, &_ldlm_cdls); \
- ldlm_lock_debug(&msgdata, mask, &_ldlm_cdls, lock, "### " fmt, ##a);\
-} while (0)
-
-#define LDLM_ERROR(lock, fmt, a...) LDLM_DEBUG_LIMIT(D_ERROR, lock, fmt, ## a)
-#define LDLM_WARN(lock, fmt, a...) LDLM_DEBUG_LIMIT(D_WARNING, lock, fmt, ## a)
-
-/** Non-rate-limited lock printing function for debugging purposes. */
-#define LDLM_DEBUG(lock, fmt, a...) do { \
- if (likely(lock)) { \
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_DLMTRACE, NULL); \
- ldlm_lock_debug(&msgdata, D_DLMTRACE, NULL, lock, \
- "### " fmt, ##a); \
- } else { \
- LDLM_DEBUG_NOLOCK("no dlm lock: " fmt, ##a); \
- } \
-} while (0)
-
-typedef int (*ldlm_processing_policy)(struct ldlm_lock *lock, __u64 *flags,
- int first_enq, enum ldlm_error *err,
- struct list_head *work_list);
-
-/**
- * Return values for lock iterators.
- * Also used during deciding of lock grants and cancellations.
- */
-#define LDLM_ITER_CONTINUE 1 /* keep iterating */
-#define LDLM_ITER_STOP 2 /* stop iterating */
-
-typedef int (*ldlm_iterator_t)(struct ldlm_lock *, void *);
-typedef int (*ldlm_res_iterator_t)(struct ldlm_resource *, void *);
-
-/** \defgroup ldlm_iterator Lock iterators
- *
- * LDLM provides for a way to iterate through every lock on a resource or
- * namespace or every resource in a namespace.
- * @{
- */
-int ldlm_resource_iterate(struct ldlm_namespace *, const struct ldlm_res_id *,
- ldlm_iterator_t iter, void *data);
-/** @} ldlm_iterator */
-
-int ldlm_replay_locks(struct obd_import *imp);
-
-/* ldlm_flock.c */
-int ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data);
-
-/* ldlm_extent.c */
-__u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms);
-
-struct ldlm_callback_suite {
- ldlm_completion_callback lcs_completion;
- ldlm_blocking_callback lcs_blocking;
- ldlm_glimpse_callback lcs_glimpse;
-};
-
-/* ldlm_lockd.c */
-int ldlm_get_ref(void);
-void ldlm_put_ref(void);
-struct ldlm_lock *ldlm_request_lock(struct ptlrpc_request *req);
-
-/* ldlm_lock.c */
-void ldlm_lock2handle(const struct ldlm_lock *lock,
- struct lustre_handle *lockh);
-struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *, __u64 flags);
-void ldlm_cancel_callback(struct ldlm_lock *);
-int ldlm_lock_remove_from_lru(struct ldlm_lock *);
-int ldlm_lock_set_data(const struct lustre_handle *lockh, void *data);
-
-/**
- * Obtain a lock reference by its handle.
- */
-static inline struct ldlm_lock *ldlm_handle2lock(const struct lustre_handle *h)
-{
- return __ldlm_handle2lock(h, 0);
-}
-
-#define LDLM_LOCK_REF_DEL(lock) \
- lu_ref_del(&lock->l_reference, "handle", current)
-
-static inline struct ldlm_lock *
-ldlm_handle2lock_long(const struct lustre_handle *h, __u64 flags)
-{
- struct ldlm_lock *lock;
-
- lock = __ldlm_handle2lock(h, flags);
- if (lock)
- LDLM_LOCK_REF_DEL(lock);
- return lock;
-}
-
-/**
- * Update Lock Value Block Operations (LVBO) on a resource taking into account
- * data from request \a r
- */
-static inline int ldlm_res_lvbo_update(struct ldlm_resource *res,
- struct ptlrpc_request *r, int increase)
-{
- if (ldlm_res_to_ns(res)->ns_lvbo &&
- ldlm_res_to_ns(res)->ns_lvbo->lvbo_update) {
- return ldlm_res_to_ns(res)->ns_lvbo->lvbo_update(res, r,
- increase);
- }
- return 0;
-}
-
-int ldlm_error2errno(enum ldlm_error error);
-
-#if LUSTRE_TRACKS_LOCK_EXP_REFS
-void ldlm_dump_export_locks(struct obd_export *exp);
-#endif
-
-/**
- * Release a temporary lock reference obtained by ldlm_handle2lock() or
- * __ldlm_handle2lock().
- */
-#define LDLM_LOCK_PUT(lock) \
-do { \
- LDLM_LOCK_REF_DEL(lock); \
- /*LDLM_DEBUG((lock), "put");*/ \
- ldlm_lock_put(lock); \
-} while (0)
-
-/**
- * Release a lock reference obtained by some other means (see
- * LDLM_LOCK_PUT()).
- */
-#define LDLM_LOCK_RELEASE(lock) \
-do { \
- /*LDLM_DEBUG((lock), "put");*/ \
- ldlm_lock_put(lock); \
-} while (0)
-
-#define LDLM_LOCK_GET(lock) \
-({ \
- ldlm_lock_get(lock); \
- /*LDLM_DEBUG((lock), "get");*/ \
- lock; \
-})
-
-#define ldlm_lock_list_put(head, member, count) \
-({ \
- struct ldlm_lock *_lock, *_next; \
- int c = count; \
- list_for_each_entry_safe(_lock, _next, head, member) { \
- if (c-- == 0) \
- break; \
- list_del_init(&_lock->member); \
- LDLM_LOCK_RELEASE(_lock); \
- } \
- LASSERT(c <= 0); \
-})
-
-struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock);
-void ldlm_lock_put(struct ldlm_lock *lock);
-void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc);
-void ldlm_lock_addref(const struct lustre_handle *lockh, enum ldlm_mode mode);
-int ldlm_lock_addref_try(const struct lustre_handle *lockh,
- enum ldlm_mode mode);
-void ldlm_lock_decref(const struct lustre_handle *lockh, enum ldlm_mode mode);
-void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh,
- enum ldlm_mode mode);
-void ldlm_lock_fail_match_locked(struct ldlm_lock *lock);
-void ldlm_lock_allow_match(struct ldlm_lock *lock);
-void ldlm_lock_allow_match_locked(struct ldlm_lock *lock);
-enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
- const struct ldlm_res_id *,
- enum ldlm_type type, union ldlm_policy_data *,
- enum ldlm_mode mode, struct lustre_handle *,
- int unref);
-enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh,
- __u64 *bits);
-void ldlm_lock_cancel(struct ldlm_lock *lock);
-void ldlm_lock_dump_handle(int level, const struct lustre_handle *);
-void ldlm_unlink_lock_skiplist(struct ldlm_lock *req);
-
-/* resource.c */
-struct ldlm_namespace *
-ldlm_namespace_new(struct obd_device *obd, char *name,
- enum ldlm_side client, enum ldlm_appetite apt,
- enum ldlm_ns_type ns_type);
-int ldlm_namespace_cleanup(struct ldlm_namespace *ns, __u64 flags);
-void ldlm_namespace_free_prior(struct ldlm_namespace *ns,
- struct obd_import *imp,
- int force);
-void ldlm_namespace_free_post(struct ldlm_namespace *ns);
-void ldlm_namespace_get(struct ldlm_namespace *ns);
-void ldlm_namespace_put(struct ldlm_namespace *ns);
-void ldlm_debugfs_setup(void);
-void ldlm_debugfs_cleanup(void);
-
-/* resource.c - internal */
-struct ldlm_resource *ldlm_resource_get(struct ldlm_namespace *ns,
- struct ldlm_resource *parent,
- const struct ldlm_res_id *,
- enum ldlm_type type, int create);
-void ldlm_resource_putref(struct ldlm_resource *res);
-void ldlm_resource_add_lock(struct ldlm_resource *res,
- struct list_head *head,
- struct ldlm_lock *lock);
-void ldlm_resource_unlink_lock(struct ldlm_lock *lock);
-void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc);
-void ldlm_dump_all_namespaces(enum ldlm_side client, int level);
-void ldlm_namespace_dump(int level, struct ldlm_namespace *);
-void ldlm_resource_dump(int level, struct ldlm_resource *);
-int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *,
- const struct ldlm_res_id *);
-
-#define LDLM_RESOURCE_ADDREF(res) do { \
- lu_ref_add_atomic(&(res)->lr_reference, __func__, current); \
-} while (0)
-
-#define LDLM_RESOURCE_DELREF(res) do { \
- lu_ref_del(&(res)->lr_reference, __func__, current); \
-} while (0)
-
-/* ldlm_request.c */
-/** \defgroup ldlm_local_ast Default AST handlers for local locks
- * These AST handlers are typically used for server-side local locks and are
- * also used by client-side lock handlers to perform minimum level base
- * processing.
- * @{
- */
-int ldlm_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data);
-/** @} ldlm_local_ast */
-
-/** \defgroup ldlm_cli_api API to operate on locks from actual LDLM users.
- * These are typically used by client and server (*_local versions)
- * to obtain and release locks.
- * @{
- */
-int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
- struct ldlm_enqueue_info *einfo,
- const struct ldlm_res_id *res_id,
- union ldlm_policy_data const *policy, __u64 *flags,
- void *lvb, __u32 lvb_len, enum lvb_type lvb_type,
- struct lustre_handle *lockh, int async);
-int ldlm_prep_enqueue_req(struct obd_export *exp,
- struct ptlrpc_request *req,
- struct list_head *cancels,
- int count);
-int ldlm_prep_elc_req(struct obd_export *exp,
- struct ptlrpc_request *req,
- int version, int opc, int canceloff,
- struct list_head *cancels, int count);
-
-int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
- enum ldlm_type type, __u8 with_policy,
- enum ldlm_mode mode,
- __u64 *flags, void *lvb, __u32 lvb_len,
- const struct lustre_handle *lockh, int rc);
-int ldlm_cli_update_pool(struct ptlrpc_request *req);
-int ldlm_cli_cancel(const struct lustre_handle *lockh,
- enum ldlm_cancel_flags cancel_flags);
-int ldlm_cli_cancel_unused(struct ldlm_namespace *, const struct ldlm_res_id *,
- enum ldlm_cancel_flags flags, void *opaque);
-int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
- const struct ldlm_res_id *res_id,
- union ldlm_policy_data *policy,
- enum ldlm_mode mode,
- enum ldlm_cancel_flags flags,
- void *opaque);
-int ldlm_cancel_resource_local(struct ldlm_resource *res,
- struct list_head *cancels,
- union ldlm_policy_data *policy,
- enum ldlm_mode mode, __u64 lock_flags,
- enum ldlm_cancel_flags cancel_flags,
- void *opaque);
-int ldlm_cli_cancel_list_local(struct list_head *cancels, int count,
- enum ldlm_cancel_flags flags);
-int ldlm_cli_cancel_list(struct list_head *head, int count,
- struct ptlrpc_request *req,
- enum ldlm_cancel_flags flags);
-/** @} ldlm_cli_api */
-
-/* mds/handler.c */
-/* This has to be here because recursive inclusion sucks. */
-int intent_disposition(struct ldlm_reply *rep, int flag);
-void intent_set_disposition(struct ldlm_reply *rep, int flag);
-
-/**
- * "Modes" of acquiring lock_res, necessary to tell lockdep that taking more
- * than one lock_res is dead-lock safe.
- */
-enum lock_res_type {
- LRT_NORMAL,
- LRT_NEW
-};
-
-/** Lock resource. */
-static inline void lock_res(struct ldlm_resource *res)
-{
- spin_lock(&res->lr_lock);
-}
-
-/** Lock resource with a way to instruct lockdep code about nestedness-safe. */
-static inline void lock_res_nested(struct ldlm_resource *res,
- enum lock_res_type mode)
-{
- spin_lock_nested(&res->lr_lock, mode);
-}
-
-/** Unlock resource. */
-static inline void unlock_res(struct ldlm_resource *res)
-{
- spin_unlock(&res->lr_lock);
-}
-
-/** Check if resource is already locked, assert if not. */
-static inline void check_res_locked(struct ldlm_resource *res)
-{
- assert_spin_locked(&res->lr_lock);
-}
-
-struct ldlm_resource *lock_res_and_lock(struct ldlm_lock *lock);
-void unlock_res_and_lock(struct ldlm_lock *lock);
-
-/* ldlm_pool.c */
-/** \defgroup ldlm_pools Various LDLM pool related functions
- * There are not used outside of ldlm.
- * @{
- */
-int ldlm_pools_init(void);
-void ldlm_pools_fini(void);
-
-int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns,
- int idx, enum ldlm_side client);
-void ldlm_pool_fini(struct ldlm_pool *pl);
-void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock);
-void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock);
-/** @} */
-
-static inline int ldlm_extent_overlap(const struct ldlm_extent *ex1,
- const struct ldlm_extent *ex2)
-{
- return ex1->start <= ex2->end && ex2->start <= ex1->end;
-}
-
-/* check if @ex1 contains @ex2 */
-static inline int ldlm_extent_contain(const struct ldlm_extent *ex1,
- const struct ldlm_extent *ex2)
-{
- return ex1->start <= ex2->start && ex1->end >= ex2->end;
-}
-
-#endif
-/** @} LDLM */
diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
deleted file mode 100644
index 53db031c4c8c..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
+++ /dev/null
@@ -1,402 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* -*- buffer-read-only: t -*- vi: set ro:
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-/**
- * \file lustre_dlm_flags.h
- * The flags and collections of flags (masks) for \see struct ldlm_lock.
- *
- * \addtogroup LDLM Lustre Distributed Lock Manager
- * @{
- *
- * \name flags
- * The flags and collections of flags (masks) for \see struct ldlm_lock.
- * @{
- */
-#ifndef LDLM_ALL_FLAGS_MASK
-
-/** l_flags bits marked as "all_flags" bits */
-#define LDLM_FL_ALL_FLAGS_MASK 0x00FFFFFFC08F932FULL
-
-/** extent, mode, or resource changed */
-#define LDLM_FL_LOCK_CHANGED 0x0000000000000001ULL /* bit 0 */
-#define ldlm_is_lock_changed(_l) LDLM_TEST_FLAG((_l), 1ULL << 0)
-#define ldlm_set_lock_changed(_l) LDLM_SET_FLAG((_l), 1ULL << 0)
-#define ldlm_clear_lock_changed(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 0)
-
-/**
- * Server placed lock on granted list, or a recovering client wants the
- * lock added to the granted list, no questions asked.
- */
-#define LDLM_FL_BLOCK_GRANTED 0x0000000000000002ULL /* bit 1 */
-#define ldlm_is_block_granted(_l) LDLM_TEST_FLAG((_l), 1ULL << 1)
-#define ldlm_set_block_granted(_l) LDLM_SET_FLAG((_l), 1ULL << 1)
-#define ldlm_clear_block_granted(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 1)
-
-/**
- * Server placed lock on conv list, or a recovering client wants the lock
- * added to the conv list, no questions asked.
- */
-#define LDLM_FL_BLOCK_CONV 0x0000000000000004ULL /* bit 2 */
-#define ldlm_is_block_conv(_l) LDLM_TEST_FLAG((_l), 1ULL << 2)
-#define ldlm_set_block_conv(_l) LDLM_SET_FLAG((_l), 1ULL << 2)
-#define ldlm_clear_block_conv(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 2)
-
-/**
- * Server placed lock on wait list, or a recovering client wants the lock
- * added to the wait list, no questions asked.
- */
-#define LDLM_FL_BLOCK_WAIT 0x0000000000000008ULL /* bit 3 */
-#define ldlm_is_block_wait(_l) LDLM_TEST_FLAG((_l), 1ULL << 3)
-#define ldlm_set_block_wait(_l) LDLM_SET_FLAG((_l), 1ULL << 3)
-#define ldlm_clear_block_wait(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 3)
-
-/** blocking or cancel packet was queued for sending. */
-#define LDLM_FL_AST_SENT 0x0000000000000020ULL /* bit 5 */
-#define ldlm_is_ast_sent(_l) LDLM_TEST_FLAG((_l), 1ULL << 5)
-#define ldlm_set_ast_sent(_l) LDLM_SET_FLAG((_l), 1ULL << 5)
-#define ldlm_clear_ast_sent(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 5)
-
-/**
- * Lock is being replayed. This could probably be implied by the fact that
- * one of BLOCK_{GRANTED,CONV,WAIT} is set, but that is pretty dangerous.
- */
-#define LDLM_FL_REPLAY 0x0000000000000100ULL /* bit 8 */
-#define ldlm_is_replay(_l) LDLM_TEST_FLAG((_l), 1ULL << 8)
-#define ldlm_set_replay(_l) LDLM_SET_FLAG((_l), 1ULL << 8)
-#define ldlm_clear_replay(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 8)
-
-/** Don't grant lock, just do intent. */
-#define LDLM_FL_INTENT_ONLY 0x0000000000000200ULL /* bit 9 */
-#define ldlm_is_intent_only(_l) LDLM_TEST_FLAG((_l), 1ULL << 9)
-#define ldlm_set_intent_only(_l) LDLM_SET_FLAG((_l), 1ULL << 9)
-#define ldlm_clear_intent_only(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 9)
-
-/** lock request has intent */
-#define LDLM_FL_HAS_INTENT 0x0000000000001000ULL /* bit 12 */
-#define ldlm_is_has_intent(_l) LDLM_TEST_FLAG((_l), 1ULL << 12)
-#define ldlm_set_has_intent(_l) LDLM_SET_FLAG((_l), 1ULL << 12)
-#define ldlm_clear_has_intent(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 12)
-
-/** flock deadlock detected */
-#define LDLM_FL_FLOCK_DEADLOCK 0x0000000000008000ULL /* bit 15 */
-#define ldlm_is_flock_deadlock(_l) LDLM_TEST_FLAG((_l), 1ULL << 15)
-#define ldlm_set_flock_deadlock(_l) LDLM_SET_FLAG((_l), 1ULL << 15)
-#define ldlm_clear_flock_deadlock(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 15)
-
-/** discard (no writeback) on cancel */
-#define LDLM_FL_DISCARD_DATA 0x0000000000010000ULL /* bit 16 */
-#define ldlm_is_discard_data(_l) LDLM_TEST_FLAG((_l), 1ULL << 16)
-#define ldlm_set_discard_data(_l) LDLM_SET_FLAG((_l), 1ULL << 16)
-#define ldlm_clear_discard_data(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 16)
-
-/** Blocked by group lock - wait indefinitely */
-#define LDLM_FL_NO_TIMEOUT 0x0000000000020000ULL /* bit 17 */
-#define ldlm_is_no_timeout(_l) LDLM_TEST_FLAG((_l), 1ULL << 17)
-#define ldlm_set_no_timeout(_l) LDLM_SET_FLAG((_l), 1ULL << 17)
-#define ldlm_clear_no_timeout(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 17)
-
-/**
- * Server told not to wait if blocked. For AGL, OST will not send glimpse
- * callback.
- */
-#define LDLM_FL_BLOCK_NOWAIT 0x0000000000040000ULL /* bit 18 */
-#define ldlm_is_block_nowait(_l) LDLM_TEST_FLAG((_l), 1ULL << 18)
-#define ldlm_set_block_nowait(_l) LDLM_SET_FLAG((_l), 1ULL << 18)
-#define ldlm_clear_block_nowait(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 18)
-
-/** return blocking lock */
-#define LDLM_FL_TEST_LOCK 0x0000000000080000ULL /* bit 19 */
-#define ldlm_is_test_lock(_l) LDLM_TEST_FLAG((_l), 1ULL << 19)
-#define ldlm_set_test_lock(_l) LDLM_SET_FLAG((_l), 1ULL << 19)
-#define ldlm_clear_test_lock(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 19)
-
-/** match lock only */
-#define LDLM_FL_MATCH_LOCK 0x0000000000100000ULL /* bit 20 */
-
-/**
- * Immediately cancel such locks when they block some other locks. Send
- * cancel notification to original lock holder, but expect no reply. This
- * is for clients (like liblustre) that cannot be expected to reliably
- * response to blocking AST.
- */
-#define LDLM_FL_CANCEL_ON_BLOCK 0x0000000000800000ULL /* bit 23 */
-#define ldlm_is_cancel_on_block(_l) LDLM_TEST_FLAG((_l), 1ULL << 23)
-#define ldlm_set_cancel_on_block(_l) LDLM_SET_FLAG((_l), 1ULL << 23)
-#define ldlm_clear_cancel_on_block(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 23)
-
-/**
- * measure lock contention and return -EUSERS if locking contention is high
- */
-#define LDLM_FL_DENY_ON_CONTENTION 0x0000000040000000ULL /* bit 30 */
-#define ldlm_is_deny_on_contention(_l) LDLM_TEST_FLAG((_l), 1ULL << 30)
-#define ldlm_set_deny_on_contention(_l) LDLM_SET_FLAG((_l), 1ULL << 30)
-#define ldlm_clear_deny_on_contention(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 30)
-
-/**
- * These are flags that are mapped into the flags and ASTs of blocking
- * locks Add FL_DISCARD to blocking ASTs
- */
-#define LDLM_FL_AST_DISCARD_DATA 0x0000000080000000ULL /* bit 31 */
-#define ldlm_is_ast_discard_data(_l) LDLM_TEST_FLAG((_l), 1ULL << 31)
-#define ldlm_set_ast_discard_data(_l) LDLM_SET_FLAG((_l), 1ULL << 31)
-#define ldlm_clear_ast_discard_data(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 31)
-
-/**
- * Used for marking lock as a target for -EINTR while cp_ast sleep emulation
- * + race with upcoming bl_ast.
- */
-#define LDLM_FL_FAIL_LOC 0x0000000100000000ULL /* bit 32 */
-#define ldlm_is_fail_loc(_l) LDLM_TEST_FLAG((_l), 1ULL << 32)
-#define ldlm_set_fail_loc(_l) LDLM_SET_FLAG((_l), 1ULL << 32)
-#define ldlm_clear_fail_loc(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 32)
-
-/**
- * Used while processing the unused list to know that we have already
- * handled this lock and decided to skip it.
- */
-#define LDLM_FL_SKIPPED 0x0000000200000000ULL /* bit 33 */
-#define ldlm_is_skipped(_l) LDLM_TEST_FLAG((_l), 1ULL << 33)
-#define ldlm_set_skipped(_l) LDLM_SET_FLAG((_l), 1ULL << 33)
-#define ldlm_clear_skipped(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 33)
-
-/** this lock is being destroyed */
-#define LDLM_FL_CBPENDING 0x0000000400000000ULL /* bit 34 */
-#define ldlm_is_cbpending(_l) LDLM_TEST_FLAG((_l), 1ULL << 34)
-#define ldlm_set_cbpending(_l) LDLM_SET_FLAG((_l), 1ULL << 34)
-#define ldlm_clear_cbpending(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 34)
-
-/** not a real flag, not saved in lock */
-#define LDLM_FL_WAIT_NOREPROC 0x0000000800000000ULL /* bit 35 */
-#define ldlm_is_wait_noreproc(_l) LDLM_TEST_FLAG((_l), 1ULL << 35)
-#define ldlm_set_wait_noreproc(_l) LDLM_SET_FLAG((_l), 1ULL << 35)
-#define ldlm_clear_wait_noreproc(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 35)
-
-/** cancellation callback already run */
-#define LDLM_FL_CANCEL 0x0000001000000000ULL /* bit 36 */
-#define ldlm_is_cancel(_l) LDLM_TEST_FLAG((_l), 1ULL << 36)
-#define ldlm_set_cancel(_l) LDLM_SET_FLAG((_l), 1ULL << 36)
-#define ldlm_clear_cancel(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 36)
-
-/** whatever it might mean -- never transmitted? */
-#define LDLM_FL_LOCAL_ONLY 0x0000002000000000ULL /* bit 37 */
-#define ldlm_is_local_only(_l) LDLM_TEST_FLAG((_l), 1ULL << 37)
-#define ldlm_set_local_only(_l) LDLM_SET_FLAG((_l), 1ULL << 37)
-#define ldlm_clear_local_only(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 37)
-
-/** don't run the cancel callback under ldlm_cli_cancel_unused */
-#define LDLM_FL_FAILED 0x0000004000000000ULL /* bit 38 */
-#define ldlm_is_failed(_l) LDLM_TEST_FLAG((_l), 1ULL << 38)
-#define ldlm_set_failed(_l) LDLM_SET_FLAG((_l), 1ULL << 38)
-#define ldlm_clear_failed(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 38)
-
-/** lock cancel has already been sent */
-#define LDLM_FL_CANCELING 0x0000008000000000ULL /* bit 39 */
-#define ldlm_is_canceling(_l) LDLM_TEST_FLAG((_l), 1ULL << 39)
-#define ldlm_set_canceling(_l) LDLM_SET_FLAG((_l), 1ULL << 39)
-#define ldlm_clear_canceling(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 39)
-
-/** local lock (ie, no srv/cli split) */
-#define LDLM_FL_LOCAL 0x0000010000000000ULL /* bit 40 */
-#define ldlm_is_local(_l) LDLM_TEST_FLAG((_l), 1ULL << 40)
-#define ldlm_set_local(_l) LDLM_SET_FLAG((_l), 1ULL << 40)
-#define ldlm_clear_local(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 40)
-
-/**
- * XXX FIXME: This is being added to b_size as a low-risk fix to the
- * fact that the LVB filling happens _after_ the lock has been granted,
- * so another thread can match it before the LVB has been updated. As a
- * dirty hack, we set LDLM_FL_LVB_READY only after we've done the LVB poop.
- * this is only needed on LOV/OSC now, where LVB is actually used and
- * callers must set it in input flags.
- *
- * The proper fix is to do the granting inside of the completion AST,
- * which can be replaced with a LVB-aware wrapping function for OSC locks.
- * That change is pretty high-risk, though, and would need a lot more
- * testing.
- */
-#define LDLM_FL_LVB_READY 0x0000020000000000ULL /* bit 41 */
-#define ldlm_is_lvb_ready(_l) LDLM_TEST_FLAG((_l), 1ULL << 41)
-#define ldlm_set_lvb_ready(_l) LDLM_SET_FLAG((_l), 1ULL << 41)
-#define ldlm_clear_lvb_ready(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 41)
-
-/**
- * A lock contributes to the known minimum size (KMS) calculation until it
- * has finished the part of its cancellation that performs write back on its
- * dirty pages. It can remain on the granted list during this whole time.
- * Threads racing to update the KMS after performing their writeback need
- * to know to exclude each other's locks from the calculation as they walk
- * the granted list.
- */
-#define LDLM_FL_KMS_IGNORE 0x0000040000000000ULL /* bit 42 */
-#define ldlm_is_kms_ignore(_l) LDLM_TEST_FLAG((_l), 1ULL << 42)
-#define ldlm_set_kms_ignore(_l) LDLM_SET_FLAG((_l), 1ULL << 42)
-#define ldlm_clear_kms_ignore(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 42)
-
-/** completion AST to be executed */
-#define LDLM_FL_CP_REQD 0x0000080000000000ULL /* bit 43 */
-#define ldlm_is_cp_reqd(_l) LDLM_TEST_FLAG((_l), 1ULL << 43)
-#define ldlm_set_cp_reqd(_l) LDLM_SET_FLAG((_l), 1ULL << 43)
-#define ldlm_clear_cp_reqd(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 43)
-
-/** cleanup_resource has already handled the lock */
-#define LDLM_FL_CLEANED 0x0000100000000000ULL /* bit 44 */
-#define ldlm_is_cleaned(_l) LDLM_TEST_FLAG((_l), 1ULL << 44)
-#define ldlm_set_cleaned(_l) LDLM_SET_FLAG((_l), 1ULL << 44)
-#define ldlm_clear_cleaned(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 44)
-
-/**
- * optimization hint: LDLM can run blocking callback from current context
- * w/o involving separate thread. in order to decrease cs rate
- */
-#define LDLM_FL_ATOMIC_CB 0x0000200000000000ULL /* bit 45 */
-#define ldlm_is_atomic_cb(_l) LDLM_TEST_FLAG((_l), 1ULL << 45)
-#define ldlm_set_atomic_cb(_l) LDLM_SET_FLAG((_l), 1ULL << 45)
-#define ldlm_clear_atomic_cb(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 45)
-
-/**
- * It may happen that a client initiates two operations, e.g. unlink and
- * mkdir, such that the server sends a blocking AST for conflicting locks
- * to this client for the first operation, whereas the second operation
- * has canceled this lock and is waiting for rpc_lock which is taken by
- * the first operation. LDLM_FL_BL_AST is set by ldlm_callback_handler() in
- * the lock to prevent the Early Lock Cancel (ELC) code from cancelling it.
- */
-#define LDLM_FL_BL_AST 0x0000400000000000ULL /* bit 46 */
-#define ldlm_is_bl_ast(_l) LDLM_TEST_FLAG((_l), 1ULL << 46)
-#define ldlm_set_bl_ast(_l) LDLM_SET_FLAG((_l), 1ULL << 46)
-#define ldlm_clear_bl_ast(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 46)
-
-/**
- * Set by ldlm_cancel_callback() when lock cache is dropped to let
- * ldlm_callback_handler() return EINVAL to the server. It is used when
- * ELC RPC is already prepared and is waiting for rpc_lock, too late to
- * send a separate CANCEL RPC.
- */
-#define LDLM_FL_BL_DONE 0x0000800000000000ULL /* bit 47 */
-#define ldlm_is_bl_done(_l) LDLM_TEST_FLAG((_l), 1ULL << 47)
-#define ldlm_set_bl_done(_l) LDLM_SET_FLAG((_l), 1ULL << 47)
-#define ldlm_clear_bl_done(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 47)
-
-/**
- * Don't put lock into the LRU list, so that it is not canceled due
- * to aging. Used by MGC locks, they are cancelled only at unmount or
- * by callback.
- */
-#define LDLM_FL_NO_LRU 0x0001000000000000ULL /* bit 48 */
-#define ldlm_is_no_lru(_l) LDLM_TEST_FLAG((_l), 1ULL << 48)
-#define ldlm_set_no_lru(_l) LDLM_SET_FLAG((_l), 1ULL << 48)
-#define ldlm_clear_no_lru(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 48)
-
-/**
- * Set for locks that failed and where the server has been notified.
- *
- * Protected by lock and resource locks.
- */
-#define LDLM_FL_FAIL_NOTIFIED 0x0002000000000000ULL /* bit 49 */
-#define ldlm_is_fail_notified(_l) LDLM_TEST_FLAG((_l), 1ULL << 49)
-#define ldlm_set_fail_notified(_l) LDLM_SET_FLAG((_l), 1ULL << 49)
-#define ldlm_clear_fail_notified(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 49)
-
-/**
- * Set for locks that were removed from class hash table and will
- * be destroyed when last reference to them is released. Set by
- * ldlm_lock_destroy_internal().
- *
- * Protected by lock and resource locks.
- */
-#define LDLM_FL_DESTROYED 0x0004000000000000ULL /* bit 50 */
-#define ldlm_is_destroyed(_l) LDLM_TEST_FLAG((_l), 1ULL << 50)
-#define ldlm_set_destroyed(_l) LDLM_SET_FLAG((_l), 1ULL << 50)
-#define ldlm_clear_destroyed(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 50)
-
-/** flag whether this is a server namespace lock */
-#define LDLM_FL_SERVER_LOCK 0x0008000000000000ULL /* bit 51 */
-#define ldlm_is_server_lock(_l) LDLM_TEST_FLAG((_l), 1ULL << 51)
-#define ldlm_set_server_lock(_l) LDLM_SET_FLAG((_l), 1ULL << 51)
-#define ldlm_clear_server_lock(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 51)
-
-/**
- * It's set in lock_res_and_lock() and unset in unlock_res_and_lock().
- *
- * NB: compared with check_res_locked(), checking this bit is cheaper.
- * Also, spin_is_locked() is deprecated for kernel code; one reason is
- * because it works only for SMP so user needs to add extra macros like
- * LASSERT_SPIN_LOCKED for uniprocessor kernels.
- */
-#define LDLM_FL_RES_LOCKED 0x0010000000000000ULL /* bit 52 */
-#define ldlm_is_res_locked(_l) LDLM_TEST_FLAG((_l), 1ULL << 52)
-#define ldlm_set_res_locked(_l) LDLM_SET_FLAG((_l), 1ULL << 52)
-#define ldlm_clear_res_locked(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 52)
-
-/**
- * It's set once we call ldlm_add_waiting_lock_res_locked() to start the
- * lock-timeout timer and it will never be reset.
- *
- * Protected by lock and resource locks.
- */
-#define LDLM_FL_WAITED 0x0020000000000000ULL /* bit 53 */
-#define ldlm_is_waited(_l) LDLM_TEST_FLAG((_l), 1ULL << 53)
-#define ldlm_set_waited(_l) LDLM_SET_FLAG((_l), 1ULL << 53)
-#define ldlm_clear_waited(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 53)
-
-/** Flag whether this is a server namespace lock. */
-#define LDLM_FL_NS_SRV 0x0040000000000000ULL /* bit 54 */
-#define ldlm_is_ns_srv(_l) LDLM_TEST_FLAG((_l), 1ULL << 54)
-#define ldlm_set_ns_srv(_l) LDLM_SET_FLAG((_l), 1ULL << 54)
-#define ldlm_clear_ns_srv(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 54)
-
-/** Flag whether this lock can be reused. Used by exclusive open. */
-#define LDLM_FL_EXCL 0x0080000000000000ULL /* bit 55 */
-#define ldlm_is_excl(_l) LDLM_TEST_FLAG((_l), 1ULL << 55)
-#define ldlm_set_excl(_l) LDLM_SET_FLAG((_l), 1ULL << 55)
-#define ldlm_clear_excl(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 55)
-
-/** l_flags bits marked as "ast" bits */
-#define LDLM_FL_AST_MASK (LDLM_FL_FLOCK_DEADLOCK |\
- LDLM_FL_AST_DISCARD_DATA)
-
-/** l_flags bits marked as "blocked" bits */
-#define LDLM_FL_BLOCKED_MASK (LDLM_FL_BLOCK_GRANTED |\
- LDLM_FL_BLOCK_CONV |\
- LDLM_FL_BLOCK_WAIT)
-
-/** l_flags bits marked as "gone" bits */
-#define LDLM_FL_GONE_MASK (LDLM_FL_DESTROYED |\
- LDLM_FL_FAILED)
-
-/** l_flags bits marked as "inherit" bits */
-/* Flags inherited from wire on enqueue/reply between client/server. */
-/* NO_TIMEOUT flag to force ldlm_lock_match() to wait with no timeout. */
-/* TEST_LOCK flag to not let TEST lock to be granted. */
-#define LDLM_FL_INHERIT_MASK (LDLM_FL_CANCEL_ON_BLOCK |\
- LDLM_FL_NO_TIMEOUT |\
- LDLM_FL_TEST_LOCK)
-
-/** test for ldlm_lock flag bit set */
-#define LDLM_TEST_FLAG(_l, _b) (((_l)->l_flags & (_b)) != 0)
-
-/** multi-bit test: are any of mask bits set? */
-#define LDLM_HAVE_MASK(_l, _m) ((_l)->l_flags & LDLM_FL_##_m##_MASK)
-
-/** set a ldlm_lock flag bit */
-#define LDLM_SET_FLAG(_l, _b) ((_l)->l_flags |= (_b))
-
-/** clear a ldlm_lock flag bit */
-#define LDLM_CLEAR_FLAG(_l, _b) ((_l)->l_flags &= ~(_b))
-
-/** @} subgroup */
-/** @} group */
-
-#endif /* LDLM_ALL_FLAGS_MASK */
diff --git a/drivers/staging/lustre/lustre/include/lustre_errno.h b/drivers/staging/lustre/lustre/include/lustre_errno.h
deleted file mode 100644
index 59fbb9f47ff1..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_errno.h
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.txt
- *
- * GPL HEADER END
- */
-/*
- * Copyright (C) 2011 FUJITSU LIMITED. All rights reserved.
- *
- * Copyright (c) 2013, Intel Corporation.
- */
-
-#ifndef LUSTRE_ERRNO_H
-#define LUSTRE_ERRNO_H
-
-/*
- * Only "network" errnos, which are defined below, are allowed on wire (or on
- * disk). Generic routines exist to help translate between these and a subset
- * of the "host" errnos. Some host errnos (e.g., EDEADLOCK) are intentionally
- * left out. See also the comment on lustre_errno_hton_mapping[].
- *
- * To maintain compatibility with existing x86 clients and servers, each of
- * these network errnos has the same numerical value as its corresponding host
- * errno on x86.
- */
-#define LUSTRE_EPERM 1 /* Operation not permitted */
-#define LUSTRE_ENOENT 2 /* No such file or directory */
-#define LUSTRE_ESRCH 3 /* No such process */
-#define LUSTRE_EINTR 4 /* Interrupted system call */
-#define LUSTRE_EIO 5 /* I/O error */
-#define LUSTRE_ENXIO 6 /* No such device or address */
-#define LUSTRE_E2BIG 7 /* Argument list too long */
-#define LUSTRE_ENOEXEC 8 /* Exec format error */
-#define LUSTRE_EBADF 9 /* Bad file number */
-#define LUSTRE_ECHILD 10 /* No child processes */
-#define LUSTRE_EAGAIN 11 /* Try again */
-#define LUSTRE_ENOMEM 12 /* Out of memory */
-#define LUSTRE_EACCES 13 /* Permission denied */
-#define LUSTRE_EFAULT 14 /* Bad address */
-#define LUSTRE_ENOTBLK 15 /* Block device required */
-#define LUSTRE_EBUSY 16 /* Device or resource busy */
-#define LUSTRE_EEXIST 17 /* File exists */
-#define LUSTRE_EXDEV 18 /* Cross-device link */
-#define LUSTRE_ENODEV 19 /* No such device */
-#define LUSTRE_ENOTDIR 20 /* Not a directory */
-#define LUSTRE_EISDIR 21 /* Is a directory */
-#define LUSTRE_EINVAL 22 /* Invalid argument */
-#define LUSTRE_ENFILE 23 /* File table overflow */
-#define LUSTRE_EMFILE 24 /* Too many open files */
-#define LUSTRE_ENOTTY 25 /* Not a typewriter */
-#define LUSTRE_ETXTBSY 26 /* Text file busy */
-#define LUSTRE_EFBIG 27 /* File too large */
-#define LUSTRE_ENOSPC 28 /* No space left on device */
-#define LUSTRE_ESPIPE 29 /* Illegal seek */
-#define LUSTRE_EROFS 30 /* Read-only file system */
-#define LUSTRE_EMLINK 31 /* Too many links */
-#define LUSTRE_EPIPE 32 /* Broken pipe */
-#define LUSTRE_EDOM 33 /* Math argument out of func domain */
-#define LUSTRE_ERANGE 34 /* Math result not representable */
-#define LUSTRE_EDEADLK 35 /* Resource deadlock would occur */
-#define LUSTRE_ENAMETOOLONG 36 /* File name too long */
-#define LUSTRE_ENOLCK 37 /* No record locks available */
-#define LUSTRE_ENOSYS 38 /* Function not implemented */
-#define LUSTRE_ENOTEMPTY 39 /* Directory not empty */
-#define LUSTRE_ELOOP 40 /* Too many symbolic links found */
-#define LUSTRE_ENOMSG 42 /* No message of desired type */
-#define LUSTRE_EIDRM 43 /* Identifier removed */
-#define LUSTRE_ECHRNG 44 /* Channel number out of range */
-#define LUSTRE_EL2NSYNC 45 /* Level 2 not synchronized */
-#define LUSTRE_EL3HLT 46 /* Level 3 halted */
-#define LUSTRE_EL3RST 47 /* Level 3 reset */
-#define LUSTRE_ELNRNG 48 /* Link number out of range */
-#define LUSTRE_EUNATCH 49 /* Protocol driver not attached */
-#define LUSTRE_ENOCSI 50 /* No CSI structure available */
-#define LUSTRE_EL2HLT 51 /* Level 2 halted */
-#define LUSTRE_EBADE 52 /* Invalid exchange */
-#define LUSTRE_EBADR 53 /* Invalid request descriptor */
-#define LUSTRE_EXFULL 54 /* Exchange full */
-#define LUSTRE_ENOANO 55 /* No anode */
-#define LUSTRE_EBADRQC 56 /* Invalid request code */
-#define LUSTRE_EBADSLT 57 /* Invalid slot */
-#define LUSTRE_EBFONT 59 /* Bad font file format */
-#define LUSTRE_ENOSTR 60 /* Device not a stream */
-#define LUSTRE_ENODATA 61 /* No data available */
-#define LUSTRE_ETIME 62 /* Timer expired */
-#define LUSTRE_ENOSR 63 /* Out of streams resources */
-#define LUSTRE_ENONET 64 /* Machine is not on the network */
-#define LUSTRE_ENOPKG 65 /* Package not installed */
-#define LUSTRE_EREMOTE 66 /* Object is remote */
-#define LUSTRE_ENOLINK 67 /* Link has been severed */
-#define LUSTRE_EADV 68 /* Advertise error */
-#define LUSTRE_ESRMNT 69 /* Srmount error */
-#define LUSTRE_ECOMM 70 /* Communication error on send */
-#define LUSTRE_EPROTO 71 /* Protocol error */
-#define LUSTRE_EMULTIHOP 72 /* Multihop attempted */
-#define LUSTRE_EDOTDOT 73 /* RFS specific error */
-#define LUSTRE_EBADMSG 74 /* Not a data message */
-#define LUSTRE_EOVERFLOW 75 /* Value too large for data type */
-#define LUSTRE_ENOTUNIQ 76 /* Name not unique on network */
-#define LUSTRE_EBADFD 77 /* File descriptor in bad state */
-#define LUSTRE_EREMCHG 78 /* Remote address changed */
-#define LUSTRE_ELIBACC 79 /* Can't access needed shared library */
-#define LUSTRE_ELIBBAD 80 /* Access corrupted shared library */
-#define LUSTRE_ELIBSCN 81 /* .lib section in a.out corrupted */
-#define LUSTRE_ELIBMAX 82 /* Trying to link too many libraries */
-#define LUSTRE_ELIBEXEC 83 /* Cannot exec a shared lib directly */
-#define LUSTRE_EILSEQ 84 /* Illegal byte sequence */
-#define LUSTRE_ERESTART 85 /* Restart interrupted system call */
-#define LUSTRE_ESTRPIPE 86 /* Streams pipe error */
-#define LUSTRE_EUSERS 87 /* Too many users */
-#define LUSTRE_ENOTSOCK 88 /* Socket operation on non-socket */
-#define LUSTRE_EDESTADDRREQ 89 /* Destination address required */
-#define LUSTRE_EMSGSIZE 90 /* Message too long */
-#define LUSTRE_EPROTOTYPE 91 /* Protocol wrong type for socket */
-#define LUSTRE_ENOPROTOOPT 92 /* Protocol not available */
-#define LUSTRE_EPROTONOSUPPORT 93 /* Protocol not supported */
-#define LUSTRE_ESOCKTNOSUPPORT 94 /* Socket type not supported */
-#define LUSTRE_EOPNOTSUPP 95 /* Operation not supported */
-#define LUSTRE_EPFNOSUPPORT 96 /* Protocol family not supported */
-#define LUSTRE_EAFNOSUPPORT 97 /* Address family not supported */
-#define LUSTRE_EADDRINUSE 98 /* Address already in use */
-#define LUSTRE_EADDRNOTAVAIL 99 /* Cannot assign requested address */
-#define LUSTRE_ENETDOWN 100 /* Network is down */
-#define LUSTRE_ENETUNREACH 101 /* Network is unreachable */
-#define LUSTRE_ENETRESET 102 /* Network connection drop for reset */
-#define LUSTRE_ECONNABORTED 103 /* Software caused connection abort */
-#define LUSTRE_ECONNRESET 104 /* Connection reset by peer */
-#define LUSTRE_ENOBUFS 105 /* No buffer space available */
-#define LUSTRE_EISCONN 106 /* Transport endpoint is connected */
-#define LUSTRE_ENOTCONN 107 /* Transport endpoint not connected */
-#define LUSTRE_ESHUTDOWN 108 /* Cannot send after shutdown */
-#define LUSTRE_ETOOMANYREFS 109 /* Too many references: cannot splice */
-#define LUSTRE_ETIMEDOUT 110 /* Connection timed out */
-#define LUSTRE_ECONNREFUSED 111 /* Connection refused */
-#define LUSTRE_EHOSTDOWN 112 /* Host is down */
-#define LUSTRE_EHOSTUNREACH 113 /* No route to host */
-#define LUSTRE_EALREADY 114 /* Operation already in progress */
-#define LUSTRE_EINPROGRESS 115 /* Operation now in progress */
-#define LUSTRE_ESTALE 116 /* Stale file handle */
-#define LUSTRE_EUCLEAN 117 /* Structure needs cleaning */
-#define LUSTRE_ENOTNAM 118 /* Not a XENIX named type file */
-#define LUSTRE_ENAVAIL 119 /* No XENIX semaphores available */
-#define LUSTRE_EISNAM 120 /* Is a named type file */
-#define LUSTRE_EREMOTEIO 121 /* Remote I/O error */
-#define LUSTRE_EDQUOT 122 /* Quota exceeded */
-#define LUSTRE_ENOMEDIUM 123 /* No medium found */
-#define LUSTRE_EMEDIUMTYPE 124 /* Wrong medium type */
-#define LUSTRE_ECANCELED 125 /* Operation Canceled */
-#define LUSTRE_ENOKEY 126 /* Required key not available */
-#define LUSTRE_EKEYEXPIRED 127 /* Key has expired */
-#define LUSTRE_EKEYREVOKED 128 /* Key has been revoked */
-#define LUSTRE_EKEYREJECTED 129 /* Key was rejected by service */
-#define LUSTRE_EOWNERDEAD 130 /* Owner died */
-#define LUSTRE_ENOTRECOVERABLE 131 /* State not recoverable */
-#define LUSTRE_ERESTARTSYS 512
-#define LUSTRE_ERESTARTNOINTR 513
-#define LUSTRE_ERESTARTNOHAND 514 /* restart if no handler.. */
-#define LUSTRE_ENOIOCTLCMD 515 /* No ioctl command */
-#define LUSTRE_ERESTART_RESTARTBLOCK 516 /* restart via sys_restart_syscall */
-#define LUSTRE_EBADHANDLE 521 /* Illegal NFS file handle */
-#define LUSTRE_ENOTSYNC 522 /* Update synchronization mismatch */
-#define LUSTRE_EBADCOOKIE 523 /* Cookie is stale */
-#define LUSTRE_ENOTSUPP 524 /* Operation is not supported */
-#define LUSTRE_ETOOSMALL 525 /* Buffer or request is too small */
-#define LUSTRE_ESERVERFAULT 526 /* An untranslatable error occurred */
-#define LUSTRE_EBADTYPE 527 /* Type not supported by server */
-#define LUSTRE_EJUKEBOX 528 /* Request won't finish until timeout */
-#define LUSTRE_EIOCBQUEUED 529 /* iocb queued await completion event */
-#define LUSTRE_EIOCBRETRY 530 /* iocb queued, will trigger a retry */
-
-/*
- * Translations are optimized away on x86. Host errnos that shouldn't be put
- * on wire could leak through as a result. Do not count on this side effect.
- */
-#ifdef CONFIG_LUSTRE_TRANSLATE_ERRNOS
-unsigned int lustre_errno_hton(unsigned int h);
-unsigned int lustre_errno_ntoh(unsigned int n);
-#else
-#define lustre_errno_hton(h) (h)
-#define lustre_errno_ntoh(n) (n)
-#endif
-
-#endif /* LUSTRE_ERRNO_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_export.h b/drivers/staging/lustre/lustre/include/lustre_export.h
deleted file mode 100644
index 79ad5aae86b9..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_export.h
+++ /dev/null
@@ -1,250 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-/** \defgroup obd_export PortalRPC export definitions
- *
- * @{
- */
-
-#ifndef __EXPORT_H
-#define __EXPORT_H
-
-/** \defgroup export export
- *
- * @{
- */
-
-#include <lprocfs_status.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-#include <lustre_dlm.h>
-
-enum obd_option {
- OBD_OPT_FORCE = 0x0001,
- OBD_OPT_FAILOVER = 0x0002,
- OBD_OPT_ABORT_RECOV = 0x0004,
-};
-
-/**
- * Export structure. Represents target-side of connection in portals.
- * Also used in Lustre to connect between layers on the same node when
- * there is no network-connection in-between.
- * For every connected client there is an export structure on the server
- * attached to the same obd device.
- */
-struct obd_export {
- /**
- * Export handle, it's id is provided to client on connect
- * Subsequent client RPCs contain this handle id to identify
- * what export they are talking to.
- */
- struct portals_handle exp_handle;
- atomic_t exp_refcount;
- /**
- * Set of counters below is to track where export references are
- * kept. The exp_rpc_count is used for reconnect handling also,
- * the cb_count and locks_count are for debug purposes only for now.
- * The sum of them should be less than exp_refcount by 3
- */
- atomic_t exp_rpc_count; /* RPC references */
- atomic_t exp_cb_count; /* Commit callback references */
- /** Number of queued replay requests to be processes */
- atomic_t exp_replay_count;
- atomic_t exp_locks_count; /** Lock references */
-#if LUSTRE_TRACKS_LOCK_EXP_REFS
- struct list_head exp_locks_list;
- spinlock_t exp_locks_list_guard;
-#endif
- /** UUID of client connected to this export */
- struct obd_uuid exp_client_uuid;
- /** To link all exports on an obd device */
- struct list_head exp_obd_chain;
- /** work_struct for destruction of export */
- struct work_struct exp_zombie_work;
- struct rhash_head exp_uuid_hash; /** uuid-export hash*/
- /** Obd device of this export */
- struct obd_device *exp_obd;
- /**
- * "reverse" import to send requests (e.g. from ldlm) back to client
- * exp_lock protect its change
- */
- struct obd_import *exp_imp_reverse;
- struct lprocfs_stats *exp_md_stats;
- /** Active connection */
- struct ptlrpc_connection *exp_connection;
- /** Connection count value from last successful reconnect rpc */
- __u32 exp_conn_cnt;
- struct list_head exp_outstanding_replies;
- struct list_head exp_uncommitted_replies;
- spinlock_t exp_uncommitted_replies_lock;
- /** Last committed transno for this export */
- __u64 exp_last_committed;
- /** On replay all requests waiting for replay are linked here */
- struct list_head exp_req_replay_queue;
- /**
- * protects exp_flags, exp_outstanding_replies and the change
- * of exp_imp_reverse
- */
- spinlock_t exp_lock;
- /** Compatibility flags for this export are embedded into
- * exp_connect_data
- */
- struct obd_connect_data exp_connect_data;
- enum obd_option exp_flags;
- unsigned long exp_failed:1,
- exp_disconnected:1,
- exp_connecting:1,
- exp_flvr_changed:1,
- exp_flvr_adapt:1;
- /* also protected by exp_lock */
- enum lustre_sec_part exp_sp_peer;
- struct sptlrpc_flavor exp_flvr; /* current */
- struct sptlrpc_flavor exp_flvr_old[2]; /* about-to-expire */
- time64_t exp_flvr_expire[2]; /* seconds */
-
- /** protects exp_hp_rpcs */
- spinlock_t exp_rpc_lock;
- struct list_head exp_hp_rpcs; /* (potential) HP RPCs */
-
- /** blocking dlm lock list, protected by exp_bl_list_lock */
- struct list_head exp_bl_list;
- spinlock_t exp_bl_list_lock;
-};
-
-static inline __u64 *exp_connect_flags_ptr(struct obd_export *exp)
-{
- return &exp->exp_connect_data.ocd_connect_flags;
-}
-
-static inline __u64 exp_connect_flags(struct obd_export *exp)
-{
- return *exp_connect_flags_ptr(exp);
-}
-
-static inline int exp_max_brw_size(struct obd_export *exp)
-{
- if (exp_connect_flags(exp) & OBD_CONNECT_BRW_SIZE)
- return exp->exp_connect_data.ocd_brw_size;
-
- return ONE_MB_BRW_SIZE;
-}
-
-static inline int exp_connect_multibulk(struct obd_export *exp)
-{
- return exp_max_brw_size(exp) > ONE_MB_BRW_SIZE;
-}
-
-static inline int exp_connect_cancelset(struct obd_export *exp)
-{
- return !!(exp_connect_flags(exp) & OBD_CONNECT_CANCELSET);
-}
-
-static inline int exp_connect_lru_resize(struct obd_export *exp)
-{
- return !!(exp_connect_flags(exp) & OBD_CONNECT_LRU_RESIZE);
-}
-
-static inline int exp_connect_vbr(struct obd_export *exp)
-{
- return !!(exp_connect_flags(exp) & OBD_CONNECT_VBR);
-}
-
-static inline int exp_connect_som(struct obd_export *exp)
-{
- return !!(exp_connect_flags(exp) & OBD_CONNECT_SOM);
-}
-
-static inline int exp_connect_umask(struct obd_export *exp)
-{
- return !!(exp_connect_flags(exp) & OBD_CONNECT_UMASK);
-}
-
-static inline int imp_connect_lru_resize(struct obd_import *imp)
-{
- struct obd_connect_data *ocd;
-
- ocd = &imp->imp_connect_data;
- return !!(ocd->ocd_connect_flags & OBD_CONNECT_LRU_RESIZE);
-}
-
-static inline int exp_connect_layout(struct obd_export *exp)
-{
- return !!(exp_connect_flags(exp) & OBD_CONNECT_LAYOUTLOCK);
-}
-
-static inline bool exp_connect_lvb_type(struct obd_export *exp)
-{
- if (exp_connect_flags(exp) & OBD_CONNECT_LVB_TYPE)
- return true;
- else
- return false;
-}
-
-static inline bool imp_connect_lvb_type(struct obd_import *imp)
-{
- struct obd_connect_data *ocd;
-
- ocd = &imp->imp_connect_data;
- if (ocd->ocd_connect_flags & OBD_CONNECT_LVB_TYPE)
- return true;
- else
- return false;
-}
-
-static inline __u64 exp_connect_ibits(struct obd_export *exp)
-{
- struct obd_connect_data *ocd;
-
- ocd = &exp->exp_connect_data;
- return ocd->ocd_ibits_known;
-}
-
-static inline bool imp_connect_disp_stripe(struct obd_import *imp)
-{
- struct obd_connect_data *ocd;
-
- ocd = &imp->imp_connect_data;
- return ocd->ocd_connect_flags & OBD_CONNECT_DISP_STRIPE;
-}
-
-struct obd_export *class_conn2export(struct lustre_handle *conn);
-
-#define KKUC_CT_DATA_MAGIC 0x092013cea
-struct kkuc_ct_data {
- __u32 kcd_magic;
- struct obd_uuid kcd_uuid;
- __u32 kcd_archive;
-};
-
-/** @} export */
-
-#endif /* __EXPORT_H */
-/** @} obd_export */
diff --git a/drivers/staging/lustre/lustre/include/lustre_fid.h b/drivers/staging/lustre/lustre/include/lustre_fid.h
deleted file mode 100644
index 094ad282de2c..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_fid.h
+++ /dev/null
@@ -1,676 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/include/lustre_fid.h
- *
- * Author: Yury Umanets <umka@clusterfs.com>
- */
-
-#ifndef __LUSTRE_FID_H
-#define __LUSTRE_FID_H
-
-/** \defgroup fid fid
- *
- * @{
- *
- * http://wiki.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs
- * describes the FID namespace and interoperability requirements for FIDs.
- * The important parts of that document are included here for reference.
- *
- * FID
- * File IDentifier generated by client from range allocated by the SEQuence
- * service and stored in struct lu_fid. The FID is composed of three parts:
- * SEQuence, ObjectID, and VERsion. The SEQ component is a filesystem
- * unique 64-bit integer, and only one client is ever assigned any SEQ value.
- * The first 0x400 FID_SEQ_NORMAL [2^33, 2^33 + 0x400] values are reserved
- * for system use. The OID component is a 32-bit value generated by the
- * client on a per-SEQ basis to allow creating many unique FIDs without
- * communication with the server. The VER component is a 32-bit value that
- * distinguishes between different FID instantiations, such as snapshots or
- * separate subtrees within the filesystem. FIDs with the same VER field
- * are considered part of the same namespace.
- *
- * OLD filesystems are those upgraded from Lustre 1.x that predate FIDs, and
- * MDTs use 32-bit ldiskfs internal inode/generation numbers (IGIFs), while
- * OSTs use 64-bit Lustre object IDs and generation numbers.
- *
- * NEW filesystems are those formatted since the introduction of FIDs.
- *
- * IGIF
- * Inode and Generation In FID, a surrogate FID used to globally identify
- * an existing object on OLD formatted MDT file system. This would only be
- * used on MDT0 in a DNE filesystem, because there cannot be more than one
- * MDT in an OLD formatted filesystem. Belongs to sequence in [12, 2^32 - 1]
- * range, where inode number is stored in SEQ, and inode generation is in OID.
- * NOTE: This assumes no more than 2^32-1 inodes exist in the MDT filesystem,
- * which is the maximum possible for an ldiskfs backend. It also assumes
- * that the reserved ext3/ext4/ldiskfs inode numbers [0-11] are never visible
- * to clients, which has always been true.
- *
- * IDIF
- * object ID In FID, a surrogate FID used to globally identify an existing
- * OST object on OLD formatted OST file system. Belongs to a sequence in
- * [2^32, 2^33 - 1]. Sequence number is calculated as:
- *
- * 1 << 32 | (ost_index << 16) | ((objid >> 32) & 0xffff)
- *
- * that is, SEQ consists of 16-bit OST index, and higher 16 bits of object
- * ID. The generation of unique SEQ values per OST allows the IDIF FIDs to
- * be identified in the FLD correctly. The OID field is calculated as:
- *
- * objid & 0xffffffff
- *
- * that is, it consists of lower 32 bits of object ID. For objects within
- * the IDIF range, object ID extraction will be:
- *
- * o_id = (fid->f_seq & 0x7fff) << 16 | fid->f_oid;
- * o_seq = 0; // formerly group number
- *
- * NOTE: This assumes that no more than 2^48-1 objects have ever been created
- * on any OST, and that no more than 65535 OSTs are in use. Both are very
- * reasonable assumptions, i.e. an IDIF can uniquely map all objects assuming
- * a maximum creation rate of 1M objects per second for a maximum of 9 years,
- * or combinations thereof.
- *
- * OST_MDT0
- * Surrogate FID used to identify an existing object on OLD formatted OST
- * filesystem. Belongs to the reserved SEQuence 0, and is used prior to
- * the introduction of FID-on-OST, at which point IDIF will be used to
- * identify objects as residing on a specific OST.
- *
- * LLOG
- * For Lustre Log objects the object sequence 1 is used. This is compatible
- * with both OLD and NEW namespaces, as this SEQ number is in the
- * ext3/ldiskfs reserved inode range and does not conflict with IGIF
- * sequence numbers.
- *
- * ECHO
- * For testing OST IO performance the object sequence 2 is used. This is
- * compatible with both OLD and NEW namespaces, as this SEQ number is in
- * the ext3/ldiskfs reserved inode range and does not conflict with IGIF
- * sequence numbers.
- *
- * OST_MDT1 .. OST_MAX
- * For testing with multiple MDTs the object sequence 3 through 9 is used,
- * allowing direct mapping of MDTs 1 through 7 respectively, for a total
- * of 8 MDTs including OST_MDT0. This matches the legacy CMD project "group"
- * mappings. However, this SEQ range is only for testing prior to any
- * production DNE release, as the objects in this range conflict across all
- * OSTs, as the OST index is not part of the FID. For production DNE usage,
- * OST objects created by MDT1+ will use FID_SEQ_NORMAL FIDs.
- *
- * DLM OST objid to IDIF mapping
- * For compatibility with existing OLD OST network protocol structures, the
- * FID must map onto the o_id and o_seq in a manner that ensures existing
- * objects are identified consistently for IO, as well as onto the LDLM
- * namespace to ensure IDIFs there is only a single resource name for any
- * object in the DLM. The OLD OST object DLM resource mapping is:
- *
- * resource[] = {o_id, o_seq, 0, 0}; // o_seq == 0 for production releases
- *
- * The NEW OST object DLM resource mapping is the same for both MDT and OST:
- *
- * resource[] = {SEQ, OID, VER, HASH};
- *
- * NOTE: for mapping IDIF values to DLM resource names the o_id may be
- * larger than the 2^33 reserved sequence numbers for IDIF, so it is possible
- * for the o_id numbers to overlap FID SEQ numbers in the resource. However,
- * in all production releases the OLD o_seq field is always zero, and all
- * valid FID OID values are non-zero, so the lock resources will not collide.
- * Even so, the MDT and OST resources are also in different LDLM namespaces.
- */
-
-#include <linux/libcfs/libcfs.h>
-#include <uapi/linux/lustre/lustre_fid.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-#include <uapi/linux/lustre/lustre_ostid.h>
-
-struct lu_env;
-struct lu_site;
-struct lu_context;
-struct obd_device;
-struct obd_export;
-
-/* Whole sequences space range and zero range definitions */
-extern const struct lu_seq_range LUSTRE_SEQ_SPACE_RANGE;
-extern const struct lu_seq_range LUSTRE_SEQ_ZERO_RANGE;
-extern const struct lu_fid LUSTRE_BFL_FID;
-extern const struct lu_fid LU_OBF_FID;
-extern const struct lu_fid LU_DOT_LUSTRE_FID;
-
-enum {
- /*
- * This is how may metadata FIDs may be allocated in one sequence(128k)
- */
- LUSTRE_METADATA_SEQ_MAX_WIDTH = 0x0000000000020000ULL,
-
- /*
- * This is how many data FIDs could be allocated in one sequence(4B - 1)
- */
- LUSTRE_DATA_SEQ_MAX_WIDTH = 0x00000000FFFFFFFFULL,
-
- /*
- * How many sequences to allocate to a client at once.
- */
- LUSTRE_SEQ_META_WIDTH = 0x0000000000000001ULL,
-
- /*
- * seq allocation pool size.
- */
- LUSTRE_SEQ_BATCH_WIDTH = LUSTRE_SEQ_META_WIDTH * 1000,
-
- /*
- * This is how many sequences may be in one super-sequence allocated to
- * MDTs.
- */
- LUSTRE_SEQ_SUPER_WIDTH = ((1ULL << 30ULL) * LUSTRE_SEQ_META_WIDTH)
-};
-
-enum {
- /** 2^6 FIDs for OI containers */
- OSD_OI_FID_OID_BITS = 6,
- /** reserve enough FIDs in case we want more in the future */
- OSD_OI_FID_OID_BITS_MAX = 10,
-};
-
-/** special OID for local objects */
-enum local_oid {
- /** \see fld_mod_init */
- FLD_INDEX_OID = 3UL,
- /** \see fid_mod_init */
- FID_SEQ_CTL_OID = 4UL,
- FID_SEQ_SRV_OID = 5UL,
- /** \see mdd_mod_init */
- MDD_ROOT_INDEX_OID = 6UL, /* deprecated in 2.4 */
- MDD_ORPHAN_OID = 7UL, /* deprecated in 2.4 */
- MDD_LOV_OBJ_OID = 8UL,
- MDD_CAPA_KEYS_OID = 9UL,
- /** \see mdt_mod_init */
- LAST_RECV_OID = 11UL,
- OSD_FS_ROOT_OID = 13UL,
- ACCT_USER_OID = 15UL,
- ACCT_GROUP_OID = 16UL,
- LFSCK_BOOKMARK_OID = 17UL,
- OTABLE_IT_OID = 18UL,
- /* These two definitions are obsolete
- * OFD_GROUP0_LAST_OID = 20UL,
- * OFD_GROUP4K_LAST_OID = 20UL+4096,
- */
- OFD_LAST_GROUP_OID = 4117UL,
- LLOG_CATALOGS_OID = 4118UL,
- MGS_CONFIGS_OID = 4119UL,
- OFD_HEALTH_CHECK_OID = 4120UL,
- MDD_LOV_OBJ_OSEQ = 4121UL,
- LFSCK_NAMESPACE_OID = 4122UL,
- REMOTE_PARENT_DIR_OID = 4123UL,
- SLAVE_LLOG_CATALOGS_OID = 4124UL,
-};
-
-static inline void lu_local_obj_fid(struct lu_fid *fid, __u32 oid)
-{
- fid->f_seq = FID_SEQ_LOCAL_FILE;
- fid->f_oid = oid;
- fid->f_ver = 0;
-}
-
-static inline void lu_local_name_obj_fid(struct lu_fid *fid, __u32 oid)
-{
- fid->f_seq = FID_SEQ_LOCAL_NAME;
- fid->f_oid = oid;
- fid->f_ver = 0;
-}
-
-/* For new FS (>= 2.4), the root FID will be changed to
- * [FID_SEQ_ROOT:1:0], for existing FS, (upgraded to 2.4),
- * the root FID will still be IGIF
- */
-static inline int fid_is_root(const struct lu_fid *fid)
-{
- return unlikely((fid_seq(fid) == FID_SEQ_ROOT &&
- fid_oid(fid) == 1));
-}
-
-static inline int fid_is_dot_lustre(const struct lu_fid *fid)
-{
- return unlikely(fid_seq(fid) == FID_SEQ_DOT_LUSTRE &&
- fid_oid(fid) == FID_OID_DOT_LUSTRE);
-}
-
-static inline int fid_is_obf(const struct lu_fid *fid)
-{
- return unlikely(fid_seq(fid) == FID_SEQ_DOT_LUSTRE &&
- fid_oid(fid) == FID_OID_DOT_LUSTRE_OBF);
-}
-
-static inline int fid_is_otable_it(const struct lu_fid *fid)
-{
- return unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE &&
- fid_oid(fid) == OTABLE_IT_OID);
-}
-
-static inline int fid_is_acct(const struct lu_fid *fid)
-{
- return fid_seq(fid) == FID_SEQ_LOCAL_FILE &&
- (fid_oid(fid) == ACCT_USER_OID ||
- fid_oid(fid) == ACCT_GROUP_OID);
-}
-
-static inline int fid_is_quota(const struct lu_fid *fid)
-{
- return fid_seq(fid) == FID_SEQ_QUOTA ||
- fid_seq(fid) == FID_SEQ_QUOTA_GLB;
-}
-
-static inline int fid_seq_in_fldb(__u64 seq)
-{
- return fid_seq_is_igif(seq) || fid_seq_is_norm(seq) ||
- fid_seq_is_root(seq) || fid_seq_is_dot(seq);
-}
-
-static inline void lu_last_id_fid(struct lu_fid *fid, __u64 seq, __u32 ost_idx)
-{
- if (fid_seq_is_mdt0(seq)) {
- fid->f_seq = fid_idif_seq(0, ost_idx);
- } else {
- LASSERTF(fid_seq_is_norm(seq) || fid_seq_is_echo(seq) ||
- fid_seq_is_idif(seq), "%#llx\n", seq);
- fid->f_seq = seq;
- }
- fid->f_oid = 0;
- fid->f_ver = 0;
-}
-
-/* seq client type */
-enum lu_cli_type {
- LUSTRE_SEQ_METADATA = 1,
- LUSTRE_SEQ_DATA
-};
-
-enum lu_mgr_type {
- LUSTRE_SEQ_SERVER,
- LUSTRE_SEQ_CONTROLLER
-};
-
-/* Client sequence manager interface. */
-struct lu_client_seq {
- /* Sequence-controller export. */
- struct obd_export *lcs_exp;
- spinlock_t lcs_lock;
-
- /*
- * Range of allowed for allocation sequences. When using lu_client_seq on
- * clients, this contains meta-sequence range. And for servers this
- * contains super-sequence range.
- */
- struct lu_seq_range lcs_space;
-
- /* Seq related proc */
- struct dentry *lcs_debugfs_entry;
-
- /* This holds last allocated fid in last obtained seq */
- struct lu_fid lcs_fid;
-
- /* LUSTRE_SEQ_METADATA or LUSTRE_SEQ_DATA */
- enum lu_cli_type lcs_type;
-
- /*
- * Service uuid, passed from MDT + seq name to form unique seq name to
- * use it with procfs.
- */
- char lcs_name[LUSTRE_MDT_MAXNAMELEN];
-
- /*
- * Sequence width, that is how many objects may be allocated in one
- * sequence. Default value for it is LUSTRE_SEQ_MAX_WIDTH.
- */
- __u64 lcs_width;
-
- /* wait queue for fid allocation and update indicator */
- wait_queue_head_t lcs_waitq;
- int lcs_update;
-};
-
-/* Client methods */
-void seq_client_flush(struct lu_client_seq *seq);
-
-int seq_client_alloc_fid(const struct lu_env *env, struct lu_client_seq *seq,
- struct lu_fid *fid);
-/* Fids common stuff */
-int fid_is_local(const struct lu_env *env,
- struct lu_site *site, const struct lu_fid *fid);
-
-enum lu_cli_type;
-int client_fid_init(struct obd_device *obd, struct obd_export *exp,
- enum lu_cli_type type);
-int client_fid_fini(struct obd_device *obd);
-
-/* fid locking */
-
-struct ldlm_namespace;
-
-/*
- * Build (DLM) resource name from FID.
- *
- * NOTE: until Lustre 1.8.7/2.1.1 the fid_ver() was packed into name[2],
- * but was moved into name[1] along with the OID to avoid consuming the
- * renaming name[2,3] fields that need to be used for the quota identifier.
- */
-static inline void
-fid_build_reg_res_name(const struct lu_fid *fid, struct ldlm_res_id *res)
-{
- memset(res, 0, sizeof(*res));
- res->name[LUSTRE_RES_ID_SEQ_OFF] = fid_seq(fid);
- res->name[LUSTRE_RES_ID_VER_OID_OFF] = fid_ver_oid(fid);
-}
-
-/*
- * Return true if resource is for object identified by FID.
- */
-static inline bool fid_res_name_eq(const struct lu_fid *fid,
- const struct ldlm_res_id *res)
-{
- return res->name[LUSTRE_RES_ID_SEQ_OFF] == fid_seq(fid) &&
- res->name[LUSTRE_RES_ID_VER_OID_OFF] == fid_ver_oid(fid);
-}
-
-/*
- * Extract FID from LDLM resource. Reverse of fid_build_reg_res_name().
- */
-static inline void
-fid_extract_from_res_name(struct lu_fid *fid, const struct ldlm_res_id *res)
-{
- fid->f_seq = res->name[LUSTRE_RES_ID_SEQ_OFF];
- fid->f_oid = (__u32)(res->name[LUSTRE_RES_ID_VER_OID_OFF]);
- fid->f_ver = (__u32)(res->name[LUSTRE_RES_ID_VER_OID_OFF] >> 32);
- LASSERT(fid_res_name_eq(fid, res));
-}
-
-/*
- * Build (DLM) resource identifier from global quota FID and quota ID.
- */
-static inline void
-fid_build_quota_res_name(const struct lu_fid *glb_fid, union lquota_id *qid,
- struct ldlm_res_id *res)
-{
- fid_build_reg_res_name(glb_fid, res);
- res->name[LUSTRE_RES_ID_QUOTA_SEQ_OFF] = fid_seq(&qid->qid_fid);
- res->name[LUSTRE_RES_ID_QUOTA_VER_OID_OFF] = fid_ver_oid(&qid->qid_fid);
-}
-
-/*
- * Extract global FID and quota ID from resource name
- */
-static inline void fid_extract_from_quota_res(struct lu_fid *glb_fid,
- union lquota_id *qid,
- const struct ldlm_res_id *res)
-{
- fid_extract_from_res_name(glb_fid, res);
- qid->qid_fid.f_seq = res->name[LUSTRE_RES_ID_QUOTA_SEQ_OFF];
- qid->qid_fid.f_oid = (__u32)res->name[LUSTRE_RES_ID_QUOTA_VER_OID_OFF];
- qid->qid_fid.f_ver =
- (__u32)(res->name[LUSTRE_RES_ID_QUOTA_VER_OID_OFF] >> 32);
-}
-
-static inline void
-fid_build_pdo_res_name(const struct lu_fid *fid, unsigned int hash,
- struct ldlm_res_id *res)
-{
- fid_build_reg_res_name(fid, res);
- res->name[LUSTRE_RES_ID_HSH_OFF] = hash;
-}
-
-/**
- * Build DLM resource name from object id & seq, which will be removed
- * finally, when we replace ost_id with FID in data stack.
- *
- * Currently, resid from the old client, whose res[0] = object_id,
- * res[1] = object_seq, is just opposite with Metatdata
- * resid, where, res[0] = fid->f_seq, res[1] = fid->f_oid.
- * To unify the resid identification, we will reverse the data
- * resid to keep it same with Metadata resid, i.e.
- *
- * For resid from the old client,
- * res[0] = objid, res[1] = 0, still keep the original order,
- * for compatibility.
- *
- * For new resid
- * res will be built from normal FID directly, i.e. res[0] = f_seq,
- * res[1] = f_oid + f_ver.
- */
-static inline void ostid_build_res_name(const struct ost_id *oi,
- struct ldlm_res_id *name)
-{
- memset(name, 0, sizeof(*name));
- if (fid_seq_is_mdt0(ostid_seq(oi))) {
- name->name[LUSTRE_RES_ID_SEQ_OFF] = ostid_id(oi);
- name->name[LUSTRE_RES_ID_VER_OID_OFF] = ostid_seq(oi);
- } else {
- fid_build_reg_res_name(&oi->oi_fid, name);
- }
-}
-
-/**
- * Return true if the resource is for the object identified by this id & group.
- */
-static inline int ostid_res_name_eq(const struct ost_id *oi,
- const struct ldlm_res_id *name)
-{
- /* Note: it is just a trick here to save some effort, probably the
- * correct way would be turn them into the FID and compare
- */
- if (fid_seq_is_mdt0(ostid_seq(oi))) {
- return name->name[LUSTRE_RES_ID_SEQ_OFF] == ostid_id(oi) &&
- name->name[LUSTRE_RES_ID_VER_OID_OFF] == ostid_seq(oi);
- } else {
- return name->name[LUSTRE_RES_ID_SEQ_OFF] == ostid_seq(oi) &&
- name->name[LUSTRE_RES_ID_VER_OID_OFF] == ostid_id(oi);
- }
-}
-
-/**
- * Note: we need check oi_seq to decide where to set oi_id,
- * so oi_seq should always be set ahead of oi_id.
- */
-static inline int ostid_set_id(struct ost_id *oi, __u64 oid)
-{
- if (fid_seq_is_mdt0(oi->oi.oi_seq)) {
- if (oid >= IDIF_MAX_OID)
- return -E2BIG;
- oi->oi.oi_id = oid;
- } else if (fid_is_idif(&oi->oi_fid)) {
- if (oid >= IDIF_MAX_OID)
- return -E2BIG;
- oi->oi_fid.f_seq = fid_idif_seq(oid,
- fid_idif_ost_idx(&oi->oi_fid));
- oi->oi_fid.f_oid = oid;
- oi->oi_fid.f_ver = oid >> 48;
- } else {
- if (oid >= OBIF_MAX_OID)
- return -E2BIG;
- oi->oi_fid.f_oid = oid;
- }
- return 0;
-}
-
-/* pack any OST FID into an ostid (id/seq) for the wire/disk */
-static inline int fid_to_ostid(const struct lu_fid *fid, struct ost_id *ostid)
-{
- int rc = 0;
-
- if (fid_seq_is_igif(fid->f_seq))
- return -EBADF;
-
- if (fid_is_idif(fid)) {
- u64 objid = fid_idif_id(fid_seq(fid), fid_oid(fid),
- fid_ver(fid));
-
- ostid_set_seq_mdt0(ostid);
- rc = ostid_set_id(ostid, objid);
- } else {
- ostid->oi_fid = *fid;
- }
-
- return rc;
-}
-
-/* The same as osc_build_res_name() */
-static inline void ost_fid_build_resid(const struct lu_fid *fid,
- struct ldlm_res_id *resname)
-{
- if (fid_is_mdt0(fid) || fid_is_idif(fid)) {
- struct ost_id oi;
-
- oi.oi.oi_id = 0; /* gcc 4.7.2 complains otherwise */
- if (fid_to_ostid(fid, &oi) != 0)
- return;
- ostid_build_res_name(&oi, resname);
- } else {
- fid_build_reg_res_name(fid, resname);
- }
-}
-
-/**
- * Flatten 128-bit FID values into a 64-bit value for use as an inode number.
- * For non-IGIF FIDs this starts just over 2^32, and continues without
- * conflict until 2^64, at which point we wrap the high 24 bits of the SEQ
- * into the range where there may not be many OID values in use, to minimize
- * the risk of conflict.
- *
- * Suppose LUSTRE_SEQ_MAX_WIDTH less than (1 << 24) which is currently true,
- * the time between re-used inode numbers is very long - 2^40 SEQ numbers,
- * or about 2^40 client mounts, if clients create less than 2^24 files/mount.
- */
-static inline __u64 fid_flatten(const struct lu_fid *fid)
-{
- __u64 ino;
- __u64 seq;
-
- if (fid_is_igif(fid)) {
- ino = lu_igif_ino(fid);
- return ino;
- }
-
- seq = fid_seq(fid);
-
- ino = (seq << 24) + ((seq >> 24) & 0xffffff0000ULL) + fid_oid(fid);
-
- return ino ? ino : fid_oid(fid);
-}
-
-static inline __u32 fid_hash(const struct lu_fid *f, int bits)
-{
- /* all objects with same id and different versions will belong to same
- * collisions list.
- */
- return hash_long(fid_flatten(f), bits);
-}
-
-/**
- * map fid to 32 bit value for ino on 32bit systems.
- */
-static inline __u32 fid_flatten32(const struct lu_fid *fid)
-{
- __u32 ino;
- __u64 seq;
-
- if (fid_is_igif(fid)) {
- ino = lu_igif_ino(fid);
- return ino;
- }
-
- seq = fid_seq(fid) - FID_SEQ_START;
-
- /* Map the high bits of the OID into higher bits of the inode number so
- * that inodes generated at about the same time have a reduced chance
- * of collisions. This will give a period of 2^12 = 1024 unique clients
- * (from SEQ) and up to min(LUSTRE_SEQ_MAX_WIDTH, 2^20) = 128k objects
- * (from OID), or up to 128M inodes without collisions for new files.
- */
- ino = ((seq & 0x000fffffULL) << 12) + ((seq >> 8) & 0xfffff000) +
- (seq >> (64 - (40 - 8)) & 0xffffff00) +
- (fid_oid(fid) & 0xff000fff) + ((fid_oid(fid) & 0x00fff000) << 8);
-
- return ino ? ino : fid_oid(fid);
-}
-
-static inline int lu_fid_diff(const struct lu_fid *fid1,
- const struct lu_fid *fid2)
-{
- LASSERTF(fid_seq(fid1) == fid_seq(fid2), "fid1:" DFID ", fid2:" DFID "\n",
- PFID(fid1), PFID(fid2));
-
- if (fid_is_idif(fid1) && fid_is_idif(fid2))
- return fid_idif_id(fid1->f_seq, fid1->f_oid, fid1->f_ver) -
- fid_idif_id(fid2->f_seq, fid2->f_oid, fid2->f_ver);
-
- return fid_oid(fid1) - fid_oid(fid2);
-}
-
-#define LUSTRE_SEQ_SRV_NAME "seq_srv"
-#define LUSTRE_SEQ_CTL_NAME "seq_ctl"
-
-/* Range common stuff */
-static inline void range_cpu_to_le(struct lu_seq_range *dst, const struct lu_seq_range *src)
-{
- dst->lsr_start = cpu_to_le64(src->lsr_start);
- dst->lsr_end = cpu_to_le64(src->lsr_end);
- dst->lsr_index = cpu_to_le32(src->lsr_index);
- dst->lsr_flags = cpu_to_le32(src->lsr_flags);
-}
-
-static inline void range_le_to_cpu(struct lu_seq_range *dst, const struct lu_seq_range *src)
-{
- dst->lsr_start = le64_to_cpu(src->lsr_start);
- dst->lsr_end = le64_to_cpu(src->lsr_end);
- dst->lsr_index = le32_to_cpu(src->lsr_index);
- dst->lsr_flags = le32_to_cpu(src->lsr_flags);
-}
-
-static inline void range_cpu_to_be(struct lu_seq_range *dst, const struct lu_seq_range *src)
-{
- dst->lsr_start = cpu_to_be64(src->lsr_start);
- dst->lsr_end = cpu_to_be64(src->lsr_end);
- dst->lsr_index = cpu_to_be32(src->lsr_index);
- dst->lsr_flags = cpu_to_be32(src->lsr_flags);
-}
-
-static inline void range_be_to_cpu(struct lu_seq_range *dst, const struct lu_seq_range *src)
-{
- dst->lsr_start = be64_to_cpu(src->lsr_start);
- dst->lsr_end = be64_to_cpu(src->lsr_end);
- dst->lsr_index = be32_to_cpu(src->lsr_index);
- dst->lsr_flags = be32_to_cpu(src->lsr_flags);
-}
-
-/** @} fid */
-
-#endif /* __LUSTRE_FID_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_fld.h b/drivers/staging/lustre/lustre/include/lustre_fld.h
deleted file mode 100644
index f42122a4dfaa..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_fld.h
+++ /dev/null
@@ -1,137 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef __LINUX_FLD_H
-#define __LINUX_FLD_H
-
-/** \defgroup fld fld
- *
- * @{
- */
-
-#include <uapi/linux/lustre/lustre_idl.h>
-#include <seq_range.h>
-
-struct lu_client_fld;
-struct lu_server_fld;
-struct lu_fld_hash;
-struct fld_cache;
-
-extern const struct dt_index_features fld_index_features;
-extern const char fld_index_name[];
-
-/*
- * FLD (Fid Location Database) interface.
- */
-enum {
- LUSTRE_CLI_FLD_HASH_DHT = 0,
- LUSTRE_CLI_FLD_HASH_RRB
-};
-
-struct lu_fld_target {
- struct list_head ft_chain;
- struct obd_export *ft_exp;
- struct lu_server_fld *ft_srv;
- __u64 ft_idx;
-};
-
-struct lu_server_fld {
- /**
- * super sequence controller export, needed to forward fld
- * lookup request.
- */
- struct obd_export *lsf_control_exp;
-
- /** Client FLD cache. */
- struct fld_cache *lsf_cache;
-
- /** Protect index modifications */
- struct mutex lsf_lock;
-
- /** Fld service name in form "fld-srv-lustre-MDTXXX" */
- char lsf_name[LUSTRE_MDT_MAXNAMELEN];
-
-};
-
-struct lu_client_fld {
- /** Client side debugfs entry. */
- struct dentry *lcf_debugfs_entry;
-
- /** List of exports client FLD knows about. */
- struct list_head lcf_targets;
-
- /** Current hash to be used to chose an export. */
- struct lu_fld_hash *lcf_hash;
-
- /** Exports count. */
- int lcf_count;
-
- /** Lock protecting exports list and fld_hash. */
- spinlock_t lcf_lock;
-
- /** Client FLD cache. */
- struct fld_cache *lcf_cache;
-
- /** Client fld debugfs entry name. */
- char lcf_name[LUSTRE_MDT_MAXNAMELEN];
-};
-
-/* Client methods */
-int fld_client_init(struct lu_client_fld *fld,
- const char *prefix, int hash);
-
-void fld_client_fini(struct lu_client_fld *fld);
-
-void fld_client_flush(struct lu_client_fld *fld);
-
-int fld_client_lookup(struct lu_client_fld *fld, u64 seq, u32 *mds,
- __u32 flags, const struct lu_env *env);
-
-int fld_client_create(struct lu_client_fld *fld,
- struct lu_seq_range *range,
- const struct lu_env *env);
-
-int fld_client_delete(struct lu_client_fld *fld, u64 seq,
- const struct lu_env *env);
-
-int fld_client_add_target(struct lu_client_fld *fld,
- struct lu_fld_target *tar);
-
-int fld_client_del_target(struct lu_client_fld *fld,
- __u64 idx);
-
-void fld_client_debugfs_fini(struct lu_client_fld *fld);
-
-/** @} fld */
-
-#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_ha.h b/drivers/staging/lustre/lustre/include/lustre_ha.h
deleted file mode 100644
index cbd68985ada9..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_ha.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef _LUSTRE_HA_H
-#define _LUSTRE_HA_H
-
-/** \defgroup ha ha
- *
- * @{
- */
-
-struct obd_import;
-struct obd_export;
-struct obd_device;
-struct ptlrpc_request;
-
-int ptlrpc_replay(struct obd_import *imp);
-int ptlrpc_resend(struct obd_import *imp);
-void ptlrpc_free_committed(struct obd_import *imp);
-void ptlrpc_wake_delayed(struct obd_import *imp);
-int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async);
-int ptlrpc_set_import_active(struct obd_import *imp, int active);
-void ptlrpc_activate_import(struct obd_import *imp);
-void ptlrpc_deactivate_import(struct obd_import *imp);
-void ptlrpc_invalidate_import(struct obd_import *imp);
-void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt);
-void ptlrpc_pinger_force(struct obd_import *imp);
-
-/** @} ha */
-
-#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_handles.h b/drivers/staging/lustre/lustre/include/lustre_handles.h
deleted file mode 100644
index 3556ce8d94e8..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_handles.h
+++ /dev/null
@@ -1,91 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef __LUSTRE_HANDLES_H_
-#define __LUSTRE_HANDLES_H_
-
-/** \defgroup handles handles
- *
- * @{
- */
-
-#include <linux/atomic.h>
-#include <linux/list.h>
-#include <linux/rcupdate.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-
-struct portals_handle_ops {
- void (*hop_addref)(void *object);
- void (*hop_free)(void *object, int size);
-};
-
-/* These handles are most easily used by having them appear at the very top of
- * whatever object that you want to make handles for. ie:
- *
- * struct ldlm_lock {
- * struct portals_handle handle;
- * ...
- * };
- *
- * Now you're able to assign the results of cookie2handle directly to an
- * ldlm_lock. If it's not at the top, you'll want to use container_of()
- * to compute the start of the structure based on the handle field.
- */
-struct portals_handle {
- struct list_head h_link;
- __u64 h_cookie;
- const void *h_owner;
- struct portals_handle_ops *h_ops;
-
- /* newly added fields to handle the RCU issue. -jxiong */
- struct rcu_head h_rcu;
- spinlock_t h_lock;
- unsigned int h_size:31;
- unsigned int h_in:1;
-};
-
-/* handles.c */
-
-/* Add a handle to the hash table */
-void class_handle_hash(struct portals_handle *,
- struct portals_handle_ops *ops);
-void class_handle_unhash(struct portals_handle *);
-void *class_handle2object(__u64 cookie, const void *owner);
-void class_handle_free_cb(struct rcu_head *rcu);
-int class_handle_init(void);
-void class_handle_cleanup(void);
-
-/** @} handles */
-
-#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_import.h b/drivers/staging/lustre/lustre/include/lustre_import.h
deleted file mode 100644
index ac3805ead620..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_import.h
+++ /dev/null
@@ -1,369 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-/** \defgroup obd_import PtlRPC import definitions
- * Imports are client-side representation of remote obd target.
- *
- * @{
- */
-
-#ifndef __IMPORT_H
-#define __IMPORT_H
-
-/** \defgroup export export
- *
- * @{
- */
-
-#include <linux/libcfs/libcfs.h>
-#include <lustre_handles.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-
-/**
- * Adaptive Timeout stuff
- *
- * @{
- */
-#define D_ADAPTTO D_OTHER
-#define AT_BINS 4 /* "bin" means "N seconds of history" */
-#define AT_FLG_NOHIST 0x1 /* use last reported value only */
-
-struct adaptive_timeout {
- time64_t at_binstart; /* bin start time */
- unsigned int at_hist[AT_BINS]; /* timeout history bins */
- unsigned int at_flags;
- unsigned int at_current; /* current timeout value */
- unsigned int at_worst_ever; /* worst-ever timeout value */
- time64_t at_worst_time; /* worst-ever timeout timestamp */
- spinlock_t at_lock;
-};
-
-struct ptlrpc_at_array {
- struct list_head *paa_reqs_array; /** array to hold requests */
- __u32 paa_size; /** the size of array */
- __u32 paa_count; /** the total count of reqs */
- time64_t paa_deadline; /** the earliest deadline of reqs */
- __u32 *paa_reqs_count; /** the count of reqs in each entry */
-};
-
-#define IMP_AT_MAX_PORTALS 8
-struct imp_at {
- int iat_portal[IMP_AT_MAX_PORTALS];
- struct adaptive_timeout iat_net_latency;
- struct adaptive_timeout iat_service_estimate[IMP_AT_MAX_PORTALS];
-};
-
-/** @} */
-
-/** Possible import states */
-enum lustre_imp_state {
- LUSTRE_IMP_CLOSED = 1,
- LUSTRE_IMP_NEW = 2,
- LUSTRE_IMP_DISCON = 3,
- LUSTRE_IMP_CONNECTING = 4,
- LUSTRE_IMP_REPLAY = 5,
- LUSTRE_IMP_REPLAY_LOCKS = 6,
- LUSTRE_IMP_REPLAY_WAIT = 7,
- LUSTRE_IMP_RECOVER = 8,
- LUSTRE_IMP_FULL = 9,
- LUSTRE_IMP_EVICTED = 10,
-};
-
-/** Returns test string representation of numeric import state \a state */
-static inline char *ptlrpc_import_state_name(enum lustre_imp_state state)
-{
- static char *import_state_names[] = {
- "<UNKNOWN>", "CLOSED", "NEW", "DISCONN",
- "CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT",
- "RECOVER", "FULL", "EVICTED",
- };
-
- LASSERT(state <= LUSTRE_IMP_EVICTED);
- return import_state_names[state];
-}
-
-/**
- * List of import event types
- */
-enum obd_import_event {
- IMP_EVENT_DISCON = 0x808001,
- IMP_EVENT_INACTIVE = 0x808002,
- IMP_EVENT_INVALIDATE = 0x808003,
- IMP_EVENT_ACTIVE = 0x808004,
- IMP_EVENT_OCD = 0x808005,
- IMP_EVENT_DEACTIVATE = 0x808006,
- IMP_EVENT_ACTIVATE = 0x808007,
-};
-
-/**
- * Definition of import connection structure
- */
-struct obd_import_conn {
- /** Item for linking connections together */
- struct list_head oic_item;
- /** Pointer to actual PortalRPC connection */
- struct ptlrpc_connection *oic_conn;
- /** uuid of remote side */
- struct obd_uuid oic_uuid;
- /**
- * Time (64 bit jiffies) of last connection attempt on this connection
- */
- __u64 oic_last_attempt;
-};
-
-/* state history */
-#define IMP_STATE_HIST_LEN 16
-struct import_state_hist {
- enum lustre_imp_state ish_state;
- time64_t ish_time;
-};
-
-/**
- * Definition of PortalRPC import structure.
- * Imports are representing client-side view to remote target.
- */
-struct obd_import {
- /** Local handle (== id) for this import. */
- struct portals_handle imp_handle;
- /** Reference counter */
- atomic_t imp_refcount;
- struct lustre_handle imp_dlm_handle; /* client's ldlm export */
- /** Currently active connection */
- struct ptlrpc_connection *imp_connection;
- /** PortalRPC client structure for this import */
- struct ptlrpc_client *imp_client;
- /** List element for linking into pinger chain */
- struct list_head imp_pinger_chain;
- /** work struct for destruction of import */
- struct work_struct imp_zombie_work;
-
- /**
- * Lists of requests that are retained for replay, waiting for a reply,
- * or waiting for recovery to complete, respectively.
- * @{
- */
- struct list_head imp_replay_list;
- struct list_head imp_sending_list;
- struct list_head imp_delayed_list;
- /** @} */
-
- /**
- * List of requests that are retained for committed open replay. Once
- * open is committed, open replay request will be moved from the
- * imp_replay_list into the imp_committed_list.
- * The imp_replay_cursor is for accelerating searching during replay.
- * @{
- */
- struct list_head imp_committed_list;
- struct list_head *imp_replay_cursor;
- /** @} */
-
- /** List of not replied requests */
- struct list_head imp_unreplied_list;
- /** Known maximal replied XID */
- __u64 imp_known_replied_xid;
-
- /** obd device for this import */
- struct obd_device *imp_obd;
-
- /**
- * some seciruty-related fields
- * @{
- */
- struct ptlrpc_sec *imp_sec;
- struct mutex imp_sec_mutex;
- time64_t imp_sec_expire;
- /** @} */
-
- /** Wait queue for those who need to wait for recovery completion */
- wait_queue_head_t imp_recovery_waitq;
-
- /** Number of requests currently in-flight */
- atomic_t imp_inflight;
- /** Number of requests currently unregistering */
- atomic_t imp_unregistering;
- /** Number of replay requests inflight */
- atomic_t imp_replay_inflight;
- /** Number of currently happening import invalidations */
- atomic_t imp_inval_count;
- /** Numbner of request timeouts */
- atomic_t imp_timeouts;
- /** Current import state */
- enum lustre_imp_state imp_state;
- /** Last replay state */
- enum lustre_imp_state imp_replay_state;
- /** History of import states */
- struct import_state_hist imp_state_hist[IMP_STATE_HIST_LEN];
- int imp_state_hist_idx;
- /** Current import generation. Incremented on every reconnect */
- int imp_generation;
- /** Incremented every time we send reconnection request */
- __u32 imp_conn_cnt;
- /**
- * \see ptlrpc_free_committed remembers imp_generation value here
- * after a check to save on unnecessary replay list iterations
- */
- int imp_last_generation_checked;
- /** Last transno we replayed */
- __u64 imp_last_replay_transno;
- /** Last transno committed on remote side */
- __u64 imp_peer_committed_transno;
- /**
- * \see ptlrpc_free_committed remembers last_transno since its last
- * check here and if last_transno did not change since last run of
- * ptlrpc_free_committed and import generation is the same, we can
- * skip looking for requests to remove from replay list as optimisation
- */
- __u64 imp_last_transno_checked;
- /**
- * Remote export handle. This is how remote side knows what export
- * we are talking to. Filled from response to connect request
- */
- struct lustre_handle imp_remote_handle;
- /** When to perform next ping. time in jiffies. */
- unsigned long imp_next_ping;
- /** When we last successfully connected. time in 64bit jiffies */
- __u64 imp_last_success_conn;
-
- /** List of all possible connection for import. */
- struct list_head imp_conn_list;
- /**
- * Current connection. \a imp_connection is imp_conn_current->oic_conn
- */
- struct obd_import_conn *imp_conn_current;
-
- /** Protects flags, level, generation, conn_cnt, *_list */
- spinlock_t imp_lock;
-
- /* flags */
- unsigned long imp_no_timeout:1, /* timeouts are disabled */
- imp_invalid:1, /* evicted */
- /* administratively disabled */
- imp_deactive:1,
- /* try to recover the import */
- imp_replayable:1,
- /* don't run recovery (timeout instead) */
- imp_dlm_fake:1,
- /* use 1/2 timeout on MDS' OSCs */
- imp_server_timeout:1,
- /* VBR: imp in delayed recovery */
- imp_delayed_recovery:1,
- /* VBR: if gap was found then no lock replays
- */
- imp_no_lock_replay:1,
- /* recovery by versions was failed */
- imp_vbr_failed:1,
- /* force an immediate ping */
- imp_force_verify:1,
- /* force a scheduled ping */
- imp_force_next_verify:1,
- /* pingable */
- imp_pingable:1,
- /* resend for replay */
- imp_resend_replay:1,
- /* disable normal recovery, for test only. */
- imp_no_pinger_recover:1,
-#if OBD_OCD_VERSION(3, 0, 53, 0) > LUSTRE_VERSION_CODE
- /* need IR MNE swab */
- imp_need_mne_swab:1,
-#endif
- /* import must be reconnected instead of
- * chosing new connection
- */
- imp_force_reconnect:1,
- /* import has tried to connect with server */
- imp_connect_tried:1,
- /* connected but not FULL yet */
- imp_connected:1;
- __u32 imp_connect_op;
- struct obd_connect_data imp_connect_data;
- __u64 imp_connect_flags_orig;
- int imp_connect_error;
-
- __u32 imp_msg_magic;
- __u32 imp_msghdr_flags; /* adjusted based on server capability */
-
- struct imp_at imp_at; /* adaptive timeout data */
- time64_t imp_last_reply_time; /* for health check */
-};
-
-/* import.c */
-static inline unsigned int at_est2timeout(unsigned int val)
-{
- /* add an arbitrary minimum: 125% +5 sec */
- return (val + (val >> 2) + 5);
-}
-
-static inline unsigned int at_timeout2est(unsigned int val)
-{
- /* restore estimate value from timeout: e=4/5(t-5) */
- LASSERT(val);
- return (max((val << 2) / 5, 5U) - 4);
-}
-
-static inline void at_reset(struct adaptive_timeout *at, int val)
-{
- spin_lock(&at->at_lock);
- at->at_current = val;
- at->at_worst_ever = val;
- at->at_worst_time = ktime_get_real_seconds();
- spin_unlock(&at->at_lock);
-}
-
-static inline void at_init(struct adaptive_timeout *at, int val, int flags)
-{
- memset(at, 0, sizeof(*at));
- spin_lock_init(&at->at_lock);
- at->at_flags = flags;
- at_reset(at, val);
-}
-
-extern unsigned int at_min;
-static inline int at_get(struct adaptive_timeout *at)
-{
- return (at->at_current > at_min) ? at->at_current : at_min;
-}
-
-int at_measured(struct adaptive_timeout *at, unsigned int val);
-int import_at_get_index(struct obd_import *imp, int portal);
-extern unsigned int at_max;
-#define AT_OFF (at_max == 0)
-
-/* genops.c */
-struct obd_export;
-struct obd_import *class_exp2cliimp(struct obd_export *);
-
-/** @} import */
-
-#endif /* __IMPORT_H */
-
-/** @} obd_import */
diff --git a/drivers/staging/lustre/lustre/include/lustre_intent.h b/drivers/staging/lustre/lustre/include/lustre_intent.h
deleted file mode 100644
index 51e5c0e03872..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_intent.h
+++ /dev/null
@@ -1,71 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef LUSTRE_INTENT_H
-#define LUSTRE_INTENT_H
-
-#include <linux/types.h>
-
-/* intent IT_XXX are defined in lustre/include/obd.h */
-
-struct lookup_intent {
- int it_op;
- int it_create_mode;
- __u64 it_flags;
- int it_disposition;
- int it_status;
- __u64 it_lock_handle;
- __u64 it_lock_bits;
- int it_lock_mode;
- int it_remote_lock_mode;
- __u64 it_remote_lock_handle;
- struct ptlrpc_request *it_request;
- unsigned int it_lock_set:1;
-};
-
-static inline int it_disposition(struct lookup_intent *it, int flag)
-{
- return it->it_disposition & flag;
-}
-
-static inline void it_set_disposition(struct lookup_intent *it, int flag)
-{
- it->it_disposition |= flag;
-}
-
-static inline void it_clear_disposition(struct lookup_intent *it, int flag)
-{
- it->it_disposition &= ~flag;
-}
-
-#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_kernelcomm.h b/drivers/staging/lustre/lustre/include/lustre_kernelcomm.h
deleted file mode 100644
index 2b3fa8430185..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_kernelcomm.h
+++ /dev/null
@@ -1,56 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2013 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- *
- * Author: Nathan Rutman <nathan.rutman@sun.com>
- *
- * Kernel <-> userspace communication routines.
- * The definitions below are used in the kernel and userspace.
- */
-
-#ifndef __LUSTRE_KERNELCOMM_H__
-#define __LUSTRE_KERNELCOMM_H__
-
-/* For declarations shared with userspace */
-#include <uapi/linux/lustre/lustre_kernelcomm.h>
-
-/* prototype for callback function on kuc groups */
-typedef int (*libcfs_kkuc_cb_t)(void *data, void *cb_arg);
-
-/* Kernel methods */
-int libcfs_kkuc_msg_put(struct file *fp, void *payload);
-int libcfs_kkuc_group_put(unsigned int group, void *payload);
-int libcfs_kkuc_group_add(struct file *fp, int uid, unsigned int group,
- void *data, size_t data_len);
-int libcfs_kkuc_group_rem(int uid, unsigned int group);
-int libcfs_kkuc_group_foreach(unsigned int group, libcfs_kkuc_cb_t cb_func,
- void *cb_arg);
-
-#endif /* __LUSTRE_KERNELCOMM_H__ */
diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h
deleted file mode 100644
index 87748e9902a7..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_lib.h
+++ /dev/null
@@ -1,126 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/include/lustre_lib.h
- *
- * Basic Lustre library routines.
- */
-
-#ifndef _LUSTRE_LIB_H
-#define _LUSTRE_LIB_H
-
-/** \defgroup lib lib
- *
- * @{
- */
-
-#include <linux/sched/signal.h>
-#include <linux/signal.h>
-#include <linux/types.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-#include <uapi/linux/lustre/lustre_ver.h>
-#include <uapi/linux/lustre/lustre_cfg.h>
-
-/* target.c */
-struct ptlrpc_request;
-struct obd_export;
-struct lu_target;
-struct l_wait_info;
-#include <lustre_ha.h>
-#include <lustre_net.h>
-
-#define LI_POISON 0x5a5a5a5a
-#if BITS_PER_LONG > 32
-# define LL_POISON 0x5a5a5a5a5a5a5a5aL
-#else
-# define LL_POISON 0x5a5a5a5aL
-#endif
-#define LP_POISON ((void *)LL_POISON)
-
-int target_pack_pool_reply(struct ptlrpc_request *req);
-int do_set_info_async(struct obd_import *imp,
- int opcode, int version,
- u32 keylen, void *key,
- u32 vallen, void *val,
- struct ptlrpc_request_set *set);
-
-void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id);
-
-#define LUSTRE_FATAL_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) | \
- sigmask(SIGTERM) | sigmask(SIGQUIT) | \
- sigmask(SIGALRM))
-static inline int l_fatal_signal_pending(struct task_struct *p)
-{
- return signal_pending(p) && sigtestsetmask(&p->pending.signal, LUSTRE_FATAL_SIGS);
-}
-
-/** @} lib */
-
-
-
-/* l_wait_event_abortable() is a bit like wait_event_killable()
- * except there is a fixed set of signals which will abort:
- * LUSTRE_FATAL_SIGS
- */
-#define l_wait_event_abortable(wq, condition) \
-({ \
- sigset_t __new_blocked, __old_blocked; \
- int __ret = 0; \
- siginitset(&__new_blocked, LUSTRE_FATAL_SIGS); \
- sigprocmask(SIG_BLOCK, &__new_blocked, &__old_blocked); \
- __ret = wait_event_interruptible(wq, condition); \
- sigprocmask(SIG_SETMASK, &__old_blocked, NULL); \
- __ret; \
-})
-
-#define l_wait_event_abortable_timeout(wq, condition, timeout) \
-({ \
- sigset_t __new_blocked, __old_blocked; \
- int __ret = 0; \
- siginitset(&__new_blocked, LUSTRE_FATAL_SIGS); \
- sigprocmask(SIG_BLOCK, &__new_blocked, &__old_blocked); \
- __ret = wait_event_interruptible_timeout(wq, condition, timeout);\
- sigprocmask(SIG_SETMASK, &__old_blocked, NULL); \
- __ret; \
-})
-
-#define l_wait_event_abortable_exclusive(wq, condition) \
-({ \
- sigset_t __new_blocked, __old_blocked; \
- int __ret = 0; \
- siginitset(&__new_blocked, LUSTRE_FATAL_SIGS); \
- sigprocmask(SIG_BLOCK, &__new_blocked, &__old_blocked); \
- __ret = wait_event_interruptible_exclusive(wq, condition); \
- sigprocmask(SIG_SETMASK, &__old_blocked, NULL); \
- __ret; \
-})
-#endif /* _LUSTRE_LIB_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_linkea.h b/drivers/staging/lustre/lustre/include/lustre_linkea.h
deleted file mode 100644
index 03db1511bfd3..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_linkea.h
+++ /dev/null
@@ -1,93 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2013, 2014, Intel Corporation.
- * Use is subject to license terms.
- *
- * Author: di wang <di.wang@intel.com>
- */
-
-/* There are several reasons to restrict the linkEA size:
- *
- * 1. Under DNE mode, if we do not restrict the linkEA size, and if there
- * are too many cross-MDTs hard links to the same object, then it will
- * casue the llog overflow.
- *
- * 2. Some backend has limited size for EA. For example, if without large
- * EA enabled, the ldiskfs will make all EAs to share one (4K) EA block.
- *
- * 3. Too many entries in linkEA will seriously affect linkEA performance
- * because we only support to locate linkEA entry consecutively.
- */
-#define MAX_LINKEA_SIZE 4096
-
-struct linkea_data {
- /**
- * Buffer to keep link EA body.
- */
- struct lu_buf *ld_buf;
- /**
- * The matched header, entry and its length in the EA
- */
- struct link_ea_header *ld_leh;
- struct link_ea_entry *ld_lee;
- int ld_reclen;
-};
-
-int linkea_data_new(struct linkea_data *ldata, struct lu_buf *buf);
-int linkea_init(struct linkea_data *ldata);
-int linkea_init_with_rec(struct linkea_data *ldata);
-void linkea_entry_unpack(const struct link_ea_entry *lee, int *reclen,
- struct lu_name *lname, struct lu_fid *pfid);
-int linkea_entry_pack(struct link_ea_entry *lee, const struct lu_name *lname,
- const struct lu_fid *pfid);
-int linkea_add_buf(struct linkea_data *ldata, const struct lu_name *lname,
- const struct lu_fid *pfid);
-void linkea_del_buf(struct linkea_data *ldata, const struct lu_name *lname);
-int linkea_links_find(struct linkea_data *ldata, const struct lu_name *lname,
- const struct lu_fid *pfid);
-
-static inline void linkea_first_entry(struct linkea_data *ldata)
-{
- LASSERT(ldata);
- LASSERT(ldata->ld_leh);
-
- if (ldata->ld_leh->leh_reccount == 0)
- ldata->ld_lee = NULL;
- else
- ldata->ld_lee = (struct link_ea_entry *)(ldata->ld_leh + 1);
-}
-
-static inline void linkea_next_entry(struct linkea_data *ldata)
-{
- LASSERT(ldata);
- LASSERT(ldata->ld_leh);
-
- if (ldata->ld_lee) {
- ldata->ld_lee = (struct link_ea_entry *)((char *)ldata->ld_lee +
- ldata->ld_reclen);
- if ((char *)ldata->ld_lee >= ((char *)ldata->ld_leh +
- ldata->ld_leh->leh_len))
- ldata->ld_lee = NULL;
- }
-}
diff --git a/drivers/staging/lustre/lustre/include/lustre_lmv.h b/drivers/staging/lustre/lustre/include/lustre_lmv.h
deleted file mode 100644
index 080ec1f8e19f..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_lmv.h
+++ /dev/null
@@ -1,174 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License version 2 for more details. A copy is
- * included in the COPYING file that accompanied this code.
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2013, Intel Corporation.
- */
-/*
- * lustre/include/lustre_lmv.h
- *
- * Lustre LMV structures and functions.
- *
- * Author: Di Wang <di.wang@intel.com>
- */
-
-#ifndef _LUSTRE_LMV_H
-#define _LUSTRE_LMV_H
-#include <uapi/linux/lustre/lustre_idl.h>
-
-struct lmv_oinfo {
- struct lu_fid lmo_fid;
- u32 lmo_mds;
- struct inode *lmo_root;
-};
-
-struct lmv_stripe_md {
- __u32 lsm_md_magic;
- __u32 lsm_md_stripe_count;
- __u32 lsm_md_master_mdt_index;
- __u32 lsm_md_hash_type;
- __u32 lsm_md_layout_version;
- __u32 lsm_md_default_count;
- __u32 lsm_md_default_index;
- char lsm_md_pool_name[LOV_MAXPOOLNAME + 1];
- struct lmv_oinfo lsm_md_oinfo[0];
-};
-
-static inline bool
-lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2)
-{
- __u32 idx;
-
- if (lsm1->lsm_md_magic != lsm2->lsm_md_magic ||
- lsm1->lsm_md_stripe_count != lsm2->lsm_md_stripe_count ||
- lsm1->lsm_md_master_mdt_index != lsm2->lsm_md_master_mdt_index ||
- lsm1->lsm_md_hash_type != lsm2->lsm_md_hash_type ||
- lsm1->lsm_md_layout_version != lsm2->lsm_md_layout_version ||
- strcmp(lsm1->lsm_md_pool_name, lsm2->lsm_md_pool_name) != 0)
- return false;
-
- for (idx = 0; idx < lsm1->lsm_md_stripe_count; idx++) {
- if (!lu_fid_eq(&lsm1->lsm_md_oinfo[idx].lmo_fid,
- &lsm2->lsm_md_oinfo[idx].lmo_fid))
- return false;
- }
-
- return true;
-}
-
-union lmv_mds_md;
-
-void lmv_free_memmd(struct lmv_stripe_md *lsm);
-
-static inline void lmv1_le_to_cpu(struct lmv_mds_md_v1 *lmv_dst,
- const struct lmv_mds_md_v1 *lmv_src)
-{
- __u32 i;
-
- lmv_dst->lmv_magic = le32_to_cpu(lmv_src->lmv_magic);
- lmv_dst->lmv_stripe_count = le32_to_cpu(lmv_src->lmv_stripe_count);
- lmv_dst->lmv_master_mdt_index =
- le32_to_cpu(lmv_src->lmv_master_mdt_index);
- lmv_dst->lmv_hash_type = le32_to_cpu(lmv_src->lmv_hash_type);
- lmv_dst->lmv_layout_version = le32_to_cpu(lmv_src->lmv_layout_version);
-
- for (i = 0; i < lmv_src->lmv_stripe_count; i++)
- fid_le_to_cpu(&lmv_dst->lmv_stripe_fids[i],
- &lmv_src->lmv_stripe_fids[i]);
-}
-
-static inline void lmv_le_to_cpu(union lmv_mds_md *lmv_dst,
- const union lmv_mds_md *lmv_src)
-{
- switch (le32_to_cpu(lmv_src->lmv_magic)) {
- case LMV_MAGIC_V1:
- lmv1_le_to_cpu(&lmv_dst->lmv_md_v1, &lmv_src->lmv_md_v1);
- break;
- default:
- break;
- }
-}
-
-/* This hash is only for testing purpose */
-static inline unsigned int
-lmv_hash_all_chars(unsigned int count, const char *name, int namelen)
-{
- const unsigned char *p = (const unsigned char *)name;
- unsigned int c = 0;
-
- while (--namelen >= 0)
- c += p[namelen];
-
- c = c % count;
-
- return c;
-}
-
-static inline unsigned int
-lmv_hash_fnv1a(unsigned int count, const char *name, int namelen)
-{
- __u64 hash;
-
- hash = lustre_hash_fnv_1a_64(name, namelen);
-
- return do_div(hash, count);
-}
-
-static inline int lmv_name_to_stripe_index(__u32 lmv_hash_type,
- unsigned int stripe_count,
- const char *name, int namelen)
-{
- __u32 hash_type = lmv_hash_type & LMV_HASH_TYPE_MASK;
- int idx;
-
- LASSERT(namelen > 0);
- if (stripe_count <= 1)
- return 0;
-
- /* for migrating object, always start from 0 stripe */
- if (lmv_hash_type & LMV_HASH_FLAG_MIGRATION)
- return 0;
-
- switch (hash_type) {
- case LMV_HASH_TYPE_ALL_CHARS:
- idx = lmv_hash_all_chars(stripe_count, name, namelen);
- break;
- case LMV_HASH_TYPE_FNV_1A_64:
- idx = lmv_hash_fnv1a(stripe_count, name, namelen);
- break;
- default:
- idx = -EBADFD;
- break;
- }
- CDEBUG(D_INFO, "name %.*s hash_type %d idx %d\n", namelen, name,
- hash_type, idx);
-
- return idx;
-}
-
-static inline bool lmv_is_known_hash_type(__u32 type)
-{
- return (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_FNV_1A_64 ||
- (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_ALL_CHARS;
-}
-
-#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_log.h b/drivers/staging/lustre/lustre/include/lustre_log.h
deleted file mode 100644
index 07f4e600386b..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_log.h
+++ /dev/null
@@ -1,382 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/include/lustre_log.h
- *
- * Generic infrastructure for managing a collection of logs.
- * These logs are used for:
- *
- * - orphan recovery: OST adds record on create
- * - mtime/size consistency: the OST adds a record on first write
- * - open/unlinked objects: OST adds a record on destroy
- *
- * - mds unlink log: the MDS adds an entry upon delete
- *
- * - raid1 replication log between OST's
- * - MDS replication logs
- */
-
-#ifndef _LUSTRE_LOG_H
-#define _LUSTRE_LOG_H
-
-/** \defgroup log log
- *
- * @{
- */
-
-#include <obd_class.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-
-#define LOG_NAME_LIMIT(logname, name) \
- snprintf(logname, sizeof(logname), "LOGS/%s", name)
-#define LLOG_EEMPTY 4711
-
-enum llog_open_param {
- LLOG_OPEN_EXISTS = 0x0000,
- LLOG_OPEN_NEW = 0x0001,
-};
-
-struct plain_handle_data {
- struct list_head phd_entry;
- struct llog_handle *phd_cat_handle;
- struct llog_cookie phd_cookie; /* cookie of this log in its cat */
-};
-
-struct cat_handle_data {
- struct list_head chd_head;
- struct llog_handle *chd_current_log; /* currently open log */
- struct llog_handle *chd_next_log; /* llog to be used next */
-};
-
-struct llog_handle;
-
-/* llog.c - general API */
-int llog_init_handle(const struct lu_env *env, struct llog_handle *handle,
- int flags, struct obd_uuid *uuid);
-int llog_process(const struct lu_env *env, struct llog_handle *loghandle,
- llog_cb_t cb, void *data, void *catdata);
-int llog_process_or_fork(const struct lu_env *env,
- struct llog_handle *loghandle,
- llog_cb_t cb, void *data, void *catdata, bool fork);
-int llog_open(const struct lu_env *env, struct llog_ctxt *ctxt,
- struct llog_handle **lgh, struct llog_logid *logid,
- char *name, enum llog_open_param open_param);
-int llog_close(const struct lu_env *env, struct llog_handle *cathandle);
-
-/* llog_process flags */
-#define LLOG_FLAG_NODEAMON 0x0001
-
-/* llog_cat.c - catalog api */
-struct llog_process_data {
- /**
- * Any useful data needed while processing catalog. This is
- * passed later to process callback.
- */
- void *lpd_data;
- /**
- * Catalog process callback function, called for each record
- * in catalog.
- */
- llog_cb_t lpd_cb;
- /**
- * Start processing the catalog from startcat/startidx
- */
- int lpd_startcat;
- int lpd_startidx;
-};
-
-struct llog_process_cat_data {
- /**
- * Temporary stored first_idx while scanning log.
- */
- int lpcd_first_idx;
- /**
- * Temporary stored last_idx while scanning log.
- */
- int lpcd_last_idx;
-};
-
-struct thandle;
-
-int llog_cat_close(const struct lu_env *env, struct llog_handle *cathandle);
-int llog_cat_process(const struct lu_env *env, struct llog_handle *cat_llh,
- llog_cb_t cb, void *data, int startcat, int startidx);
-
-/* llog_obd.c */
-int llog_setup(const struct lu_env *env, struct obd_device *obd,
- struct obd_llog_group *olg, int index,
- struct obd_device *disk_obd, struct llog_operations *op);
-int __llog_ctxt_put(const struct lu_env *env, struct llog_ctxt *ctxt);
-int llog_cleanup(const struct lu_env *env, struct llog_ctxt *);
-
-/* llog_net.c */
-int llog_initiator_connect(struct llog_ctxt *ctxt);
-
-struct llog_operations {
- int (*lop_next_block)(const struct lu_env *env, struct llog_handle *h,
- int *curr_idx, int next_idx, __u64 *offset,
- void *buf, int len);
- int (*lop_prev_block)(const struct lu_env *env, struct llog_handle *h,
- int prev_idx, void *buf, int len);
- int (*lop_read_header)(const struct lu_env *env,
- struct llog_handle *handle);
- int (*lop_setup)(const struct lu_env *env, struct obd_device *obd,
- struct obd_llog_group *olg, int ctxt_idx,
- struct obd_device *disk_obd);
- int (*lop_sync)(struct llog_ctxt *ctxt, struct obd_export *exp,
- int flags);
- int (*lop_cleanup)(const struct lu_env *env, struct llog_ctxt *ctxt);
- int (*lop_cancel)(const struct lu_env *env, struct llog_ctxt *ctxt,
- struct llog_cookie *cookies, int flags);
- int (*lop_connect)(struct llog_ctxt *ctxt, struct llog_logid *logid,
- struct llog_gen *gen, struct obd_uuid *uuid);
- /**
- * Any llog file must be opened first using llog_open(). Llog can be
- * opened by name, logid or without both, in last case the new logid
- * will be generated.
- */
- int (*lop_open)(const struct lu_env *env, struct llog_handle *lgh,
- struct llog_logid *logid, char *name,
- enum llog_open_param);
- /**
- * Opened llog may not exist and this must be checked where needed using
- * the llog_exist() call.
- */
- int (*lop_exist)(struct llog_handle *lgh);
- /**
- * Close llog file and calls llog_free_handle() implicitly.
- * Any opened llog must be closed by llog_close() call.
- */
- int (*lop_close)(const struct lu_env *env, struct llog_handle *handle);
- /**
- * Create new llog file. The llog must be opened.
- * Must be used only for local llog operations.
- */
- int (*lop_declare_create)(const struct lu_env *env,
- struct llog_handle *handle,
- struct thandle *th);
- /**
- * write new record in llog. It appends records usually but can edit
- * existing records too.
- */
- int (*lop_declare_write_rec)(const struct lu_env *env,
- struct llog_handle *lgh,
- struct llog_rec_hdr *rec,
- int idx, struct thandle *th);
- int (*lop_write_rec)(const struct lu_env *env,
- struct llog_handle *loghandle,
- struct llog_rec_hdr *rec,
- struct llog_cookie *cookie, int cookiecount,
- void *buf, int idx, struct thandle *th);
- /**
- * Add new record in llog catalog. Does the same as llog_write_rec()
- * but using llog catalog.
- */
- int (*lop_declare_add)(const struct lu_env *env,
- struct llog_handle *lgh,
- struct llog_rec_hdr *rec, struct thandle *th);
- int (*lop_add)(const struct lu_env *env, struct llog_handle *lgh,
- struct llog_rec_hdr *rec, struct llog_cookie *cookie,
- void *buf, struct thandle *th);
-};
-
-/* In-memory descriptor for a log object or log catalog */
-struct llog_handle {
- struct rw_semaphore lgh_lock;
- spinlock_t lgh_hdr_lock; /* protect lgh_hdr data */
- struct llog_logid lgh_id; /* id of this log */
- struct llog_log_hdr *lgh_hdr;
- size_t lgh_hdr_size;
- int lgh_last_idx;
- int lgh_cur_idx; /* used during llog_process */
- __u64 lgh_cur_offset; /* used during llog_process */
- struct llog_ctxt *lgh_ctxt;
- union {
- struct plain_handle_data phd;
- struct cat_handle_data chd;
- } u;
- char *lgh_name;
- void *private_data;
- struct llog_operations *lgh_logops;
- atomic_t lgh_refcount;
-};
-
-#define LLOG_CTXT_FLAG_UNINITIALIZED 0x00000001
-#define LLOG_CTXT_FLAG_STOP 0x00000002
-
-struct llog_ctxt {
- int loc_idx; /* my index the obd array of ctxt's */
- struct obd_device *loc_obd; /* points back to the containing obd*/
- struct obd_llog_group *loc_olg; /* group containing that ctxt */
- struct obd_export *loc_exp; /* parent "disk" export (e.g. MDS) */
- struct obd_import *loc_imp; /* to use in RPC's: can be backward
- * pointing import
- */
- struct llog_operations *loc_logops;
- struct llog_handle *loc_handle;
- struct mutex loc_mutex; /* protect loc_imp */
- atomic_t loc_refcount;
- long loc_flags; /* flags, see above defines */
- /*
- * llog chunk size, and llog record size can not be bigger than
- * loc_chunk_size
- */
- __u32 loc_chunk_size;
-};
-
-#define LLOG_PROC_BREAK 0x0001
-#define LLOG_DEL_RECORD 0x0002
-
-static inline int llog_handle2ops(struct llog_handle *loghandle,
- struct llog_operations **lop)
-{
- if (!loghandle || !loghandle->lgh_logops)
- return -EINVAL;
-
- *lop = loghandle->lgh_logops;
- return 0;
-}
-
-static inline struct llog_ctxt *llog_ctxt_get(struct llog_ctxt *ctxt)
-{
- atomic_inc(&ctxt->loc_refcount);
- CDEBUG(D_INFO, "GETting ctxt %p : new refcount %d\n", ctxt,
- atomic_read(&ctxt->loc_refcount));
- return ctxt;
-}
-
-static inline void llog_ctxt_put(struct llog_ctxt *ctxt)
-{
- if (!ctxt)
- return;
- LASSERT_ATOMIC_GT_LT(&ctxt->loc_refcount, 0, LI_POISON);
- CDEBUG(D_INFO, "PUTting ctxt %p : new refcount %d\n", ctxt,
- atomic_read(&ctxt->loc_refcount) - 1);
- __llog_ctxt_put(NULL, ctxt);
-}
-
-static inline void llog_group_init(struct obd_llog_group *olg)
-{
- init_waitqueue_head(&olg->olg_waitq);
- spin_lock_init(&olg->olg_lock);
- mutex_init(&olg->olg_cat_processing);
-}
-
-static inline int llog_group_set_ctxt(struct obd_llog_group *olg,
- struct llog_ctxt *ctxt, int index)
-{
- LASSERT(index >= 0 && index < LLOG_MAX_CTXTS);
-
- spin_lock(&olg->olg_lock);
- if (olg->olg_ctxts[index]) {
- spin_unlock(&olg->olg_lock);
- return -EEXIST;
- }
- olg->olg_ctxts[index] = ctxt;
- spin_unlock(&olg->olg_lock);
- return 0;
-}
-
-static inline struct llog_ctxt *llog_group_get_ctxt(struct obd_llog_group *olg,
- int index)
-{
- struct llog_ctxt *ctxt;
-
- LASSERT(index >= 0 && index < LLOG_MAX_CTXTS);
-
- spin_lock(&olg->olg_lock);
- if (!olg->olg_ctxts[index])
- ctxt = NULL;
- else
- ctxt = llog_ctxt_get(olg->olg_ctxts[index]);
- spin_unlock(&olg->olg_lock);
- return ctxt;
-}
-
-static inline void llog_group_clear_ctxt(struct obd_llog_group *olg, int index)
-{
- LASSERT(index >= 0 && index < LLOG_MAX_CTXTS);
- spin_lock(&olg->olg_lock);
- olg->olg_ctxts[index] = NULL;
- spin_unlock(&olg->olg_lock);
-}
-
-static inline struct llog_ctxt *llog_get_context(struct obd_device *obd,
- int index)
-{
- return llog_group_get_ctxt(&obd->obd_olg, index);
-}
-
-static inline int llog_group_ctxt_null(struct obd_llog_group *olg, int index)
-{
- return (!olg->olg_ctxts[index]);
-}
-
-static inline int llog_ctxt_null(struct obd_device *obd, int index)
-{
- return llog_group_ctxt_null(&obd->obd_olg, index);
-}
-
-static inline int llog_next_block(const struct lu_env *env,
- struct llog_handle *loghandle, int *cur_idx,
- int next_idx, __u64 *cur_offset, void *buf,
- int len)
-{
- struct llog_operations *lop;
- int rc;
-
- rc = llog_handle2ops(loghandle, &lop);
- if (rc)
- return rc;
- if (!lop->lop_next_block)
- return -EOPNOTSUPP;
-
- rc = lop->lop_next_block(env, loghandle, cur_idx, next_idx,
- cur_offset, buf, len);
- return rc;
-}
-
-/* llog.c */
-int llog_declare_write_rec(const struct lu_env *env,
- struct llog_handle *handle,
- struct llog_rec_hdr *rec, int idx,
- struct thandle *th);
-int llog_write_rec(const struct lu_env *env, struct llog_handle *handle,
- struct llog_rec_hdr *rec, struct llog_cookie *logcookies,
- int numcookies, void *buf, int idx, struct thandle *th);
-int lustre_process_log(struct super_block *sb, char *logname,
- struct config_llog_instance *cfg);
-int lustre_end_log(struct super_block *sb, char *logname,
- struct config_llog_instance *cfg);
-/** @} log */
-
-#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h
deleted file mode 100644
index a9c9992a2502..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_mdc.h
+++ /dev/null
@@ -1,229 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/include/lustre_mdc.h
- *
- * MDS data structures.
- * See also lustre_idl.h for wire formats of requests.
- */
-
-#ifndef _LUSTRE_MDC_H
-#define _LUSTRE_MDC_H
-
-/** \defgroup mdc mdc
- *
- * @{
- */
-
-#include <linux/fs.h>
-#include <linux/dcache.h>
-#include <lustre_intent.h>
-#include <lustre_handles.h>
-#include <linux/libcfs/libcfs.h>
-#include <obd_class.h>
-#include <lustre_lib.h>
-#include <lustre_dlm.h>
-#include <lustre_export.h>
-
-struct ptlrpc_client;
-struct obd_export;
-struct ptlrpc_request;
-struct obd_device;
-
-/**
- * Serializes in-flight MDT-modifying RPC requests to preserve idempotency.
- *
- * This mutex is used to implement execute-once semantics on the MDT.
- * The MDT stores the last transaction ID and result for every client in
- * its last_rcvd file. If the client doesn't get a reply, it can safely
- * resend the request and the MDT will reconstruct the reply being aware
- * that the request has already been executed. Without this lock,
- * execution status of concurrent in-flight requests would be
- * overwritten.
- *
- * This design limits the extent to which we can keep a full pipeline of
- * in-flight requests from a single client. This limitation could be
- * overcome by allowing multiple slots per client in the last_rcvd file.
- */
-struct mdc_rpc_lock {
- /** Lock protecting in-flight RPC concurrency. */
- struct mutex rpcl_mutex;
- /** Intent associated with currently executing request. */
- struct lookup_intent *rpcl_it;
- /** Used for MDS/RPC load testing purposes. */
- int rpcl_fakes;
-};
-
-#define MDC_FAKE_RPCL_IT ((void *)0x2c0012bfUL)
-
-static inline void mdc_init_rpc_lock(struct mdc_rpc_lock *lck)
-{
- mutex_init(&lck->rpcl_mutex);
- lck->rpcl_it = NULL;
-}
-
-static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck,
- struct lookup_intent *it)
-{
- if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
- it->it_op == IT_LAYOUT || it->it_op == IT_READDIR))
- return;
-
- /* This would normally block until the existing request finishes.
- * If fail_loc is set it will block until the regular request is
- * done, then set rpcl_it to MDC_FAKE_RPCL_IT. Once that is set
- * it will only be cleared when all fake requests are finished.
- * Only when all fake requests are finished can normal requests
- * be sent, to ensure they are recoverable again.
- */
- again:
- mutex_lock(&lck->rpcl_mutex);
-
- if (CFS_FAIL_CHECK_QUIET(OBD_FAIL_MDC_RPCS_SEM)) {
- lck->rpcl_it = MDC_FAKE_RPCL_IT;
- lck->rpcl_fakes++;
- mutex_unlock(&lck->rpcl_mutex);
- return;
- }
-
- /* This will only happen when the CFS_FAIL_CHECK() was
- * just turned off but there are still requests in progress.
- * Wait until they finish. It doesn't need to be efficient
- * in this extremely rare case, just have low overhead in
- * the common case when it isn't true.
- */
- while (unlikely(lck->rpcl_it == MDC_FAKE_RPCL_IT)) {
- mutex_unlock(&lck->rpcl_mutex);
- schedule_timeout(HZ / 4);
- goto again;
- }
-
- LASSERT(!lck->rpcl_it);
- lck->rpcl_it = it;
-}
-
-static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck,
- struct lookup_intent *it)
-{
- if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
- it->it_op == IT_LAYOUT || it->it_op == IT_READDIR))
- return;
-
- if (lck->rpcl_it == MDC_FAKE_RPCL_IT) { /* OBD_FAIL_MDC_RPCS_SEM */
- mutex_lock(&lck->rpcl_mutex);
-
- LASSERTF(lck->rpcl_fakes > 0, "%d\n", lck->rpcl_fakes);
- lck->rpcl_fakes--;
-
- if (lck->rpcl_fakes == 0)
- lck->rpcl_it = NULL;
-
- } else {
- LASSERTF(it == lck->rpcl_it, "%p != %p\n", it, lck->rpcl_it);
- lck->rpcl_it = NULL;
- }
-
- mutex_unlock(&lck->rpcl_mutex);
-}
-
-static inline void mdc_get_mod_rpc_slot(struct ptlrpc_request *req,
- struct lookup_intent *it)
-{
- struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
- u32 opc;
- u16 tag;
-
- opc = lustre_msg_get_opc(req->rq_reqmsg);
- tag = obd_get_mod_rpc_slot(cli, opc, it);
- lustre_msg_set_tag(req->rq_reqmsg, tag);
-}
-
-static inline void mdc_put_mod_rpc_slot(struct ptlrpc_request *req,
- struct lookup_intent *it)
-{
- struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
- u32 opc;
- u16 tag;
-
- opc = lustre_msg_get_opc(req->rq_reqmsg);
- tag = lustre_msg_get_tag(req->rq_reqmsg);
- obd_put_mod_rpc_slot(cli, opc, it, tag);
-}
-
-/**
- * Update the maximum possible easize.
- *
- * This value is learned from ptlrpc replies sent by the MDT. The
- * default easize is initialized to the minimum value but allowed
- * to grow up to a single page in size if required to handle the
- * common case.
- *
- * \see client_obd::cl_default_mds_easize
- *
- * \param[in] exp export for MDC device
- * \param[in] body body of ptlrpc reply from MDT
- *
- */
-static inline void mdc_update_max_ea_from_body(struct obd_export *exp,
- struct mdt_body *body)
-{
- if (body->mbo_valid & OBD_MD_FLMODEASIZE) {
- struct client_obd *cli = &exp->exp_obd->u.cli;
- u32 def_easize;
-
- if (cli->cl_max_mds_easize < body->mbo_max_mdsize)
- cli->cl_max_mds_easize = body->mbo_max_mdsize;
-
- def_easize = min_t(__u32, body->mbo_max_mdsize,
- OBD_MAX_DEFAULT_EA_SIZE);
- cli->cl_default_mds_easize = def_easize;
- }
-}
-
-/* mdc/mdc_locks.c */
-int it_open_error(int phase, struct lookup_intent *it);
-
-static inline bool cl_is_lov_delay_create(unsigned int flags)
-{
- return (flags & O_LOV_DELAY_CREATE) == O_LOV_DELAY_CREATE;
-}
-
-static inline void cl_lov_delay_create_clear(unsigned int *flags)
-{
- if ((*flags & O_LOV_DELAY_CREATE) == O_LOV_DELAY_CREATE)
- *flags &= ~O_LOV_DELAY_CREATE;
-}
-
-/** @} mdc */
-
-#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_mds.h b/drivers/staging/lustre/lustre/include/lustre_mds.h
deleted file mode 100644
index f665556556ec..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_mds.h
+++ /dev/null
@@ -1,62 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/include/lustre_mds.h
- *
- * MDS data structures.
- * See also lustre_idl.h for wire formats of requests.
- */
-
-#ifndef _LUSTRE_MDS_H
-#define _LUSTRE_MDS_H
-
-/** \defgroup mds mds
- *
- * @{
- */
-
-#include <lustre_handles.h>
-#include <lustre_lib.h>
-#include <lustre_dlm.h>
-#include <lustre_export.h>
-
-struct mds_group_info {
- struct obd_uuid *uuid;
- int group;
-};
-
-#define MDD_OBD_NAME "mdd_obd"
-#define MDD_OBD_UUID "mdd_obd_uuid"
-
-/** @} mds */
-
-#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h
deleted file mode 100644
index 35b43a77eb18..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_net.h
+++ /dev/null
@@ -1,2360 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2010, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-/** \defgroup PtlRPC Portal RPC and networking module.
- *
- * PortalRPC is the layer used by rest of lustre code to achieve network
- * communications: establish connections with corresponding export and import
- * states, listen for a service, send and receive RPCs.
- * PortalRPC also includes base recovery framework: packet resending and
- * replaying, reconnections, pinger.
- *
- * PortalRPC utilizes LNet as its transport layer.
- *
- * @{
- */
-
-#ifndef _LUSTRE_NET_H
-#define _LUSTRE_NET_H
-
-/** \defgroup net net
- *
- * @{
- */
-
-#include <linux/uio.h>
-#include <linux/libcfs/libcfs.h>
-#include <uapi/linux/lnet/nidstr.h>
-#include <linux/lnet/api.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-#include <lustre_errno.h>
-#include <lustre_ha.h>
-#include <lustre_sec.h>
-#include <lustre_import.h>
-#include <lprocfs_status.h>
-#include <lu_object.h>
-#include <lustre_req_layout.h>
-
-#include <obd_support.h>
-#include <uapi/linux/lustre/lustre_ver.h>
-
-#include <linux/rhashtable.h>
-
-/* MD flags we _always_ use */
-#define PTLRPC_MD_OPTIONS 0
-
-/**
- * log2 max # of bulk operations in one request: 2=4MB/RPC, 5=32MB/RPC, ...
- * In order for the client and server to properly negotiate the maximum
- * possible transfer size, PTLRPC_BULK_OPS_COUNT must be a power-of-two
- * value. The client is free to limit the actual RPC size for any bulk
- * transfer via cl_max_pages_per_rpc to some non-power-of-two value.
- * NOTE: This is limited to 16 (=64GB RPCs) by IOOBJ_MAX_BRW_BITS.
- */
-#define PTLRPC_BULK_OPS_BITS 4
-#if PTLRPC_BULK_OPS_BITS > 16
-#error "More than 65536 BRW RPCs not allowed by IOOBJ_MAX_BRW_BITS."
-#endif
-#define PTLRPC_BULK_OPS_COUNT (1U << PTLRPC_BULK_OPS_BITS)
-/**
- * PTLRPC_BULK_OPS_MASK is for the convenience of the client only, and
- * should not be used on the server at all. Otherwise, it imposes a
- * protocol limitation on the maximum RPC size that can be used by any
- * RPC sent to that server in the future. Instead, the server should
- * use the negotiated per-client ocd_brw_size to determine the bulk
- * RPC count.
- */
-#define PTLRPC_BULK_OPS_MASK (~((__u64)PTLRPC_BULK_OPS_COUNT - 1))
-
-/**
- * Define maxima for bulk I/O.
- *
- * A single PTLRPC BRW request is sent via up to PTLRPC_BULK_OPS_COUNT
- * of LNET_MTU sized RDMA transfers. Clients and servers negotiate the
- * currently supported maximum between peers at connect via ocd_brw_size.
- */
-#define PTLRPC_MAX_BRW_BITS (LNET_MTU_BITS + PTLRPC_BULK_OPS_BITS)
-#define PTLRPC_MAX_BRW_SIZE (1 << PTLRPC_MAX_BRW_BITS)
-#define PTLRPC_MAX_BRW_PAGES (PTLRPC_MAX_BRW_SIZE >> PAGE_SHIFT)
-
-#define ONE_MB_BRW_SIZE (1 << LNET_MTU_BITS)
-#define MD_MAX_BRW_SIZE (1 << LNET_MTU_BITS)
-#define MD_MAX_BRW_PAGES (MD_MAX_BRW_SIZE >> PAGE_SHIFT)
-#define DT_MAX_BRW_SIZE PTLRPC_MAX_BRW_SIZE
-#define DT_MAX_BRW_PAGES (DT_MAX_BRW_SIZE >> PAGE_SHIFT)
-#define OFD_MAX_BRW_SIZE (1 << LNET_MTU_BITS)
-
-/* When PAGE_SIZE is a constant, we can check our arithmetic here with cpp! */
-# if ((PTLRPC_MAX_BRW_PAGES & (PTLRPC_MAX_BRW_PAGES - 1)) != 0)
-# error "PTLRPC_MAX_BRW_PAGES isn't a power of two"
-# endif
-# if (PTLRPC_MAX_BRW_SIZE != (PTLRPC_MAX_BRW_PAGES * PAGE_SIZE))
-# error "PTLRPC_MAX_BRW_SIZE isn't PTLRPC_MAX_BRW_PAGES * PAGE_SIZE"
-# endif
-# if (PTLRPC_MAX_BRW_SIZE > LNET_MTU * PTLRPC_BULK_OPS_COUNT)
-# error "PTLRPC_MAX_BRW_SIZE too big"
-# endif
-# if (PTLRPC_MAX_BRW_PAGES > LNET_MAX_IOV * PTLRPC_BULK_OPS_COUNT)
-# error "PTLRPC_MAX_BRW_PAGES too big"
-# endif
-
-#define PTLRPC_NTHRS_INIT 2
-
-/**
- * Buffer Constants
- *
- * Constants determine how memory is used to buffer incoming service requests.
- *
- * ?_NBUFS # buffers to allocate when growing the pool
- * ?_BUFSIZE # bytes in a single request buffer
- * ?_MAXREQSIZE # maximum request service will receive
- *
- * When fewer than ?_NBUFS/2 buffers are posted for receive, another chunk
- * of ?_NBUFS is added to the pool.
- *
- * Messages larger than ?_MAXREQSIZE are dropped. Request buffers are
- * considered full when less than ?_MAXREQSIZE is left in them.
- */
-/**
- * Thread Constants
- *
- * Constants determine how threads are created for ptlrpc service.
- *
- * ?_NTHRS_INIT # threads to create for each service partition on
- * initializing. If it's non-affinity service and
- * there is only one partition, it's the overall #
- * threads for the service while initializing.
- * ?_NTHRS_BASE # threads should be created at least for each
- * ptlrpc partition to keep the service healthy.
- * It's the low-water mark of threads upper-limit
- * for each partition.
- * ?_THR_FACTOR # threads can be added on threads upper-limit for
- * each CPU core. This factor is only for reference,
- * we might decrease value of factor if number of cores
- * per CPT is above a limit.
- * ?_NTHRS_MAX # overall threads can be created for a service,
- * it's a soft limit because if service is running
- * on machine with hundreds of cores and tens of
- * CPU partitions, we need to guarantee each partition
- * has ?_NTHRS_BASE threads, which means total threads
- * will be ?_NTHRS_BASE * number_of_cpts which can
- * exceed ?_NTHRS_MAX.
- *
- * Examples
- *
- * #define MDS_NTHRS_INIT 2
- * #define MDS_NTHRS_BASE 64
- * #define MDS_NTHRS_FACTOR 8
- * #define MDS_NTHRS_MAX 1024
- *
- * Example 1):
- * ---------------------------------------------------------------------
- * Server(A) has 16 cores, user configured it to 4 partitions so each
- * partition has 4 cores, then actual number of service threads on each
- * partition is:
- * MDS_NTHRS_BASE(64) + cores(4) * MDS_NTHRS_FACTOR(8) = 96
- *
- * Total number of threads for the service is:
- * 96 * partitions(4) = 384
- *
- * Example 2):
- * ---------------------------------------------------------------------
- * Server(B) has 32 cores, user configured it to 4 partitions so each
- * partition has 8 cores, then actual number of service threads on each
- * partition is:
- * MDS_NTHRS_BASE(64) + cores(8) * MDS_NTHRS_FACTOR(8) = 128
- *
- * Total number of threads for the service is:
- * 128 * partitions(4) = 512
- *
- * Example 3):
- * ---------------------------------------------------------------------
- * Server(B) has 96 cores, user configured it to 8 partitions so each
- * partition has 12 cores, then actual number of service threads on each
- * partition is:
- * MDS_NTHRS_BASE(64) + cores(12) * MDS_NTHRS_FACTOR(8) = 160
- *
- * Total number of threads for the service is:
- * 160 * partitions(8) = 1280
- *
- * However, it's above the soft limit MDS_NTHRS_MAX, so we choose this number
- * as upper limit of threads number for each partition:
- * MDS_NTHRS_MAX(1024) / partitions(8) = 128
- *
- * Example 4):
- * ---------------------------------------------------------------------
- * Server(C) have a thousand of cores and user configured it to 32 partitions
- * MDS_NTHRS_BASE(64) * 32 = 2048
- *
- * which is already above soft limit MDS_NTHRS_MAX(1024), but we still need
- * to guarantee that each partition has at least MDS_NTHRS_BASE(64) threads
- * to keep service healthy, so total number of threads will just be 2048.
- *
- * NB: we don't suggest to choose server with that many cores because backend
- * filesystem itself, buffer cache, or underlying network stack might
- * have some SMP scalability issues at that large scale.
- *
- * If user already has a fat machine with hundreds or thousands of cores,
- * there are two choices for configuration:
- * a) create CPU table from subset of all CPUs and run Lustre on
- * top of this subset
- * b) bind service threads on a few partitions, see modparameters of
- * MDS and OSS for details
-*
- * NB: these calculations (and examples below) are simplified to help
- * understanding, the real implementation is a little more complex,
- * please see ptlrpc_server_nthreads_check() for details.
- *
- */
-
- /*
- * LDLM threads constants:
- *
- * Given 8 as factor and 24 as base threads number
- *
- * example 1)
- * On 4-core machine we will have 24 + 8 * 4 = 56 threads.
- *
- * example 2)
- * On 8-core machine with 2 partitions we will have 24 + 4 * 8 = 56
- * threads for each partition and total threads number will be 112.
- *
- * example 3)
- * On 64-core machine with 8 partitions we will need LDLM_NTHRS_BASE(24)
- * threads for each partition to keep service healthy, so total threads
- * number should be 24 * 8 = 192.
- *
- * So with these constants, threads number will be at the similar level
- * of old versions, unless target machine has over a hundred cores
- */
-#define LDLM_THR_FACTOR 8
-#define LDLM_NTHRS_INIT PTLRPC_NTHRS_INIT
-#define LDLM_NTHRS_BASE 24
-#define LDLM_NTHRS_MAX (num_online_cpus() == 1 ? 64 : 128)
-
-#define LDLM_BL_THREADS LDLM_NTHRS_AUTO_INIT
-#define LDLM_CLIENT_NBUFS 1
-#define LDLM_SERVER_NBUFS 64
-#define LDLM_BUFSIZE (8 * 1024)
-#define LDLM_MAXREQSIZE (5 * 1024)
-#define LDLM_MAXREPSIZE (1024)
-
-#define MDS_MAXREQSIZE (5 * 1024) /* >= 4736 */
-
-/**
- * FIEMAP request can be 4K+ for now
- */
-#define OST_MAXREQSIZE (16 * 1024)
-
-/* Macro to hide a typecast. */
-#define ptlrpc_req_async_args(req) ((void *)&req->rq_async_args)
-
-struct ptlrpc_replay_async_args {
- int praa_old_state;
- int praa_old_status;
-};
-
-/**
- * Structure to single define portal connection.
- */
-struct ptlrpc_connection {
- /** linkage for connections hash table */
- struct rhash_head c_hash;
- /** Our own lnet nid for this connection */
- lnet_nid_t c_self;
- /** Remote side nid for this connection */
- struct lnet_process_id c_peer;
- /** UUID of the other side */
- struct obd_uuid c_remote_uuid;
- /** reference counter for this connection */
- atomic_t c_refcount;
-};
-
-/** Client definition for PortalRPC */
-struct ptlrpc_client {
- /** What lnet portal does this client send messages to by default */
- __u32 cli_request_portal;
- /** What portal do we expect replies on */
- __u32 cli_reply_portal;
- /** Name of the client */
- char *cli_name;
-};
-
-/** state flags of requests */
-/* XXX only ones left are those used by the bulk descs as well! */
-#define PTL_RPC_FL_INTR (1 << 0) /* reply wait was interrupted by user */
-#define PTL_RPC_FL_TIMEOUT (1 << 7) /* request timed out waiting for reply */
-
-#define REQ_MAX_ACK_LOCKS 8
-
-union ptlrpc_async_args {
- /**
- * Scratchpad for passing args to completion interpreter. Users
- * cast to the struct of their choosing, and BUILD_BUG_ON oversized
- * arguments. For _tons_ of context, kmalloc a struct and store
- * a pointer to it here. The pointer_arg ensures this struct is at
- * least big enough for that.
- */
- void *pointer_arg[11];
- __u64 space[7];
-};
-
-struct ptlrpc_request_set;
-typedef int (*set_interpreter_func)(struct ptlrpc_request_set *, void *, int);
-typedef int (*set_producer_func)(struct ptlrpc_request_set *, void *);
-
-/**
- * Definition of request set structure.
- * Request set is a list of requests (not necessary to the same target) that
- * once populated with RPCs could be sent in parallel.
- * There are two kinds of request sets. General purpose and with dedicated
- * serving thread. Example of the latter is ptlrpcd set.
- * For general purpose sets once request set started sending it is impossible
- * to add new requests to such set.
- * Provides a way to call "completion callbacks" when all requests in the set
- * returned.
- */
-struct ptlrpc_request_set {
- atomic_t set_refcount;
- /** number of in queue requests */
- atomic_t set_new_count;
- /** number of uncompleted requests */
- atomic_t set_remaining;
- /** wait queue to wait on for request events */
- wait_queue_head_t set_waitq;
- wait_queue_head_t *set_wakeup_ptr;
- /** List of requests in the set */
- struct list_head set_requests;
- /**
- * List of completion callbacks to be called when the set is completed
- * This is only used if \a set_interpret is NULL.
- * Links struct ptlrpc_set_cbdata.
- */
- struct list_head set_cblist;
- /** Completion callback, if only one. */
- set_interpreter_func set_interpret;
- /** opaq argument passed to completion \a set_interpret callback. */
- void *set_arg;
- /**
- * Lock for \a set_new_requests manipulations
- * locked so that any old caller can communicate requests to
- * the set holder who can then fold them into the lock-free set
- */
- spinlock_t set_new_req_lock;
- /** List of new yet unsent requests. Only used with ptlrpcd now. */
- struct list_head set_new_requests;
-
- /** rq_status of requests that have been freed already */
- int set_rc;
- /** Additional fields used by the flow control extension */
- /** Maximum number of RPCs in flight */
- int set_max_inflight;
- /** Callback function used to generate RPCs */
- set_producer_func set_producer;
- /** opaq argument passed to the producer callback */
- void *set_producer_arg;
-};
-
-/**
- * Description of a single ptrlrpc_set callback
- */
-struct ptlrpc_set_cbdata {
- /** List linkage item */
- struct list_head psc_item;
- /** Pointer to interpreting function */
- set_interpreter_func psc_interpret;
- /** Opaq argument to pass to the callback */
- void *psc_data;
-};
-
-struct ptlrpc_bulk_desc;
-struct ptlrpc_service_part;
-struct ptlrpc_service;
-
-/**
- * ptlrpc callback & work item stuff
- */
-struct ptlrpc_cb_id {
- void (*cbid_fn)(struct lnet_event *ev); /* specific callback fn */
- void *cbid_arg; /* additional arg */
-};
-
-/** Maximum number of locks to fit into reply state */
-#define RS_MAX_LOCKS 8
-#define RS_DEBUG 0
-
-/**
- * Structure to define reply state on the server
- * Reply state holds various reply message information. Also for "difficult"
- * replies (rep-ack case) we store the state after sending reply and wait
- * for the client to acknowledge the reception. In these cases locks could be
- * added to the state for replay/failover consistency guarantees.
- */
-struct ptlrpc_reply_state {
- /** Callback description */
- struct ptlrpc_cb_id rs_cb_id;
- /** Linkage for list of all reply states in a system */
- struct list_head rs_list;
- /** Linkage for list of all reply states on same export */
- struct list_head rs_exp_list;
- /** Linkage for list of all reply states for same obd */
- struct list_head rs_obd_list;
-#if RS_DEBUG
- struct list_head rs_debug_list;
-#endif
- /** A spinlock to protect the reply state flags */
- spinlock_t rs_lock;
- /** Reply state flags */
- unsigned long rs_difficult:1; /* ACK/commit stuff */
- unsigned long rs_no_ack:1; /* no ACK, even for
- * difficult requests
- */
- unsigned long rs_scheduled:1; /* being handled? */
- unsigned long rs_scheduled_ever:1;/* any schedule attempts? */
- unsigned long rs_handled:1; /* been handled yet? */
- unsigned long rs_on_net:1; /* reply_out_callback pending? */
- unsigned long rs_prealloc:1; /* rs from prealloc list */
- unsigned long rs_committed:1;/* the transaction was committed
- * and the rs was dispatched
- */
- atomic_t rs_refcount; /* number of users */
- /** Number of locks awaiting client ACK */
- int rs_nlocks;
-
- /** Size of the state */
- int rs_size;
- /** opcode */
- __u32 rs_opc;
- /** Transaction number */
- __u64 rs_transno;
- /** xid */
- __u64 rs_xid;
- struct obd_export *rs_export;
- struct ptlrpc_service_part *rs_svcpt;
- /** Lnet metadata handle for the reply */
- struct lnet_handle_md rs_md_h;
-
- /** Context for the service thread */
- struct ptlrpc_svc_ctx *rs_svc_ctx;
- /** Reply buffer (actually sent to the client), encoded if needed */
- struct lustre_msg *rs_repbuf; /* wrapper */
- /** Size of the reply buffer */
- int rs_repbuf_len; /* wrapper buf length */
- /** Size of the reply message */
- int rs_repdata_len; /* wrapper msg length */
- /**
- * Actual reply message. Its content is encrypted (if needed) to
- * produce reply buffer for actual sending. In simple case
- * of no network encryption we just set \a rs_repbuf to \a rs_msg
- */
- struct lustre_msg *rs_msg; /* reply message */
-
- /** Handles of locks awaiting client reply ACK */
- struct lustre_handle rs_locks[RS_MAX_LOCKS];
- /** Lock modes of locks in \a rs_locks */
- enum ldlm_mode rs_modes[RS_MAX_LOCKS];
-};
-
-struct ptlrpc_thread;
-
-/** RPC stages */
-enum rq_phase {
- RQ_PHASE_NEW = 0xebc0de00,
- RQ_PHASE_RPC = 0xebc0de01,
- RQ_PHASE_BULK = 0xebc0de02,
- RQ_PHASE_INTERPRET = 0xebc0de03,
- RQ_PHASE_COMPLETE = 0xebc0de04,
- RQ_PHASE_UNREG_RPC = 0xebc0de05,
- RQ_PHASE_UNREG_BULK = 0xebc0de06,
- RQ_PHASE_UNDEFINED = 0xebc0de07
-};
-
-/** Type of request interpreter call-back */
-typedef int (*ptlrpc_interpterer_t)(const struct lu_env *env,
- struct ptlrpc_request *req,
- void *arg, int rc);
-
-/**
- * Definition of request pool structure.
- * The pool is used to store empty preallocated requests for the case
- * when we would actually need to send something without performing
- * any allocations (to avoid e.g. OOM).
- */
-struct ptlrpc_request_pool {
- /** Locks the list */
- spinlock_t prp_lock;
- /** list of ptlrpc_request structs */
- struct list_head prp_req_list;
- /** Maximum message size that would fit into a request from this pool */
- int prp_rq_size;
- /** Function to allocate more requests for this pool */
- int (*prp_populate)(struct ptlrpc_request_pool *, int);
-};
-
-struct lu_context;
-struct lu_env;
-
-struct ldlm_lock;
-
-#include <lustre_nrs.h>
-
-/**
- * Basic request prioritization operations structure.
- * The whole idea is centered around locks and RPCs that might affect locks.
- * When a lock is contended we try to give priority to RPCs that might lead
- * to fastest release of that lock.
- * Currently only implemented for OSTs only in a way that makes all
- * IO and truncate RPCs that are coming from a locked region where a lock is
- * contended a priority over other requests.
- */
-struct ptlrpc_hpreq_ops {
- /**
- * Check if the lock handle of the given lock is the same as
- * taken from the request.
- */
- int (*hpreq_lock_match)(struct ptlrpc_request *, struct ldlm_lock *);
- /**
- * Check if the request is a high priority one.
- */
- int (*hpreq_check)(struct ptlrpc_request *);
- /**
- * Called after the request has been handled.
- */
- void (*hpreq_fini)(struct ptlrpc_request *);
-};
-
-struct ptlrpc_cli_req {
- /** For bulk requests on client only: bulk descriptor */
- struct ptlrpc_bulk_desc *cr_bulk;
- /** optional time limit for send attempts */
- long cr_delay_limit;
- /** time request was first queued */
- unsigned long cr_queued_time;
- /** request sent timeval */
- struct timespec64 cr_sent_tv;
- /** time for request really sent out */
- time64_t cr_sent_out;
- /** when req reply unlink must finish. */
- time64_t cr_reply_deadline;
- /** when req bulk unlink must finish. */
- time64_t cr_bulk_deadline;
- /** when req unlink must finish. */
- time64_t cr_req_deadline;
- /** Portal to which this request would be sent */
- short cr_req_ptl;
- /** Portal where to wait for reply and where reply would be sent */
- short cr_rep_ptl;
- /** request resending number */
- unsigned int cr_resend_nr;
- /** What was import generation when this request was sent */
- int cr_imp_gen;
- enum lustre_imp_state cr_send_state;
- /** Per-request waitq introduced by bug 21938 for recovery waiting */
- wait_queue_head_t cr_set_waitq;
- /** Link item for request set lists */
- struct list_head cr_set_chain;
- /** link to waited ctx */
- struct list_head cr_ctx_chain;
-
- /** client's half ctx */
- struct ptlrpc_cli_ctx *cr_cli_ctx;
- /** Link back to the request set */
- struct ptlrpc_request_set *cr_set;
- /** outgoing request MD handle */
- struct lnet_handle_md cr_req_md_h;
- /** request-out callback parameter */
- struct ptlrpc_cb_id cr_req_cbid;
- /** incoming reply MD handle */
- struct lnet_handle_md cr_reply_md_h;
- wait_queue_head_t cr_reply_waitq;
- /** reply callback parameter */
- struct ptlrpc_cb_id cr_reply_cbid;
- /** Async completion handler, called when reply is received */
- ptlrpc_interpterer_t cr_reply_interp;
- /** Async completion context */
- union ptlrpc_async_args cr_async_args;
- /** Opaq data for replay and commit callbacks. */
- void *cr_cb_data;
- /** Link to the imp->imp_unreplied_list */
- struct list_head cr_unreplied_list;
- /**
- * Commit callback, called when request is committed and about to be
- * freed.
- */
- void (*cr_commit_cb)(struct ptlrpc_request *);
- /** Replay callback, called after request is replayed at recovery */
- void (*cr_replay_cb)(struct ptlrpc_request *);
-};
-
-/** client request member alias */
-/* NB: these alias should NOT be used by any new code, instead they should
- * be removed step by step to avoid potential abuse
- */
-#define rq_bulk rq_cli.cr_bulk
-#define rq_delay_limit rq_cli.cr_delay_limit
-#define rq_queued_time rq_cli.cr_queued_time
-#define rq_sent_tv rq_cli.cr_sent_tv
-#define rq_real_sent rq_cli.cr_sent_out
-#define rq_reply_deadline rq_cli.cr_reply_deadline
-#define rq_bulk_deadline rq_cli.cr_bulk_deadline
-#define rq_req_deadline rq_cli.cr_req_deadline
-#define rq_nr_resend rq_cli.cr_resend_nr
-#define rq_request_portal rq_cli.cr_req_ptl
-#define rq_reply_portal rq_cli.cr_rep_ptl
-#define rq_import_generation rq_cli.cr_imp_gen
-#define rq_send_state rq_cli.cr_send_state
-#define rq_set_chain rq_cli.cr_set_chain
-#define rq_ctx_chain rq_cli.cr_ctx_chain
-#define rq_set rq_cli.cr_set
-#define rq_set_waitq rq_cli.cr_set_waitq
-#define rq_cli_ctx rq_cli.cr_cli_ctx
-#define rq_req_md_h rq_cli.cr_req_md_h
-#define rq_req_cbid rq_cli.cr_req_cbid
-#define rq_reply_md_h rq_cli.cr_reply_md_h
-#define rq_reply_waitq rq_cli.cr_reply_waitq
-#define rq_reply_cbid rq_cli.cr_reply_cbid
-#define rq_interpret_reply rq_cli.cr_reply_interp
-#define rq_async_args rq_cli.cr_async_args
-#define rq_cb_data rq_cli.cr_cb_data
-#define rq_unreplied_list rq_cli.cr_unreplied_list
-#define rq_commit_cb rq_cli.cr_commit_cb
-#define rq_replay_cb rq_cli.cr_replay_cb
-
-struct ptlrpc_srv_req {
- /** initial thread servicing this request */
- struct ptlrpc_thread *sr_svc_thread;
- /**
- * Server side list of incoming unserved requests sorted by arrival
- * time. Traversed from time to time to notice about to expire
- * requests and sent back "early replies" to clients to let them
- * know server is alive and well, just very busy to service their
- * requests in time
- */
- struct list_head sr_timed_list;
- /** server-side per-export list */
- struct list_head sr_exp_list;
- /** server-side history, used for debuging purposes. */
- struct list_head sr_hist_list;
- /** history sequence # */
- __u64 sr_hist_seq;
- /** the index of service's srv_at_array into which request is linked */
- time64_t sr_at_index;
- /** authed uid */
- uid_t sr_auth_uid;
- /** authed uid mapped to */
- uid_t sr_auth_mapped_uid;
- /** RPC is generated from what part of Lustre */
- enum lustre_sec_part sr_sp_from;
- /** request session context */
- struct lu_context sr_ses;
- /** \addtogroup nrs
- * @{
- */
- /** stub for NRS request */
- struct ptlrpc_nrs_request sr_nrq;
- /** @} nrs */
- /** request arrival time */
- struct timespec64 sr_arrival_time;
- /** server's half ctx */
- struct ptlrpc_svc_ctx *sr_svc_ctx;
- /** (server side), pointed directly into req buffer */
- struct ptlrpc_user_desc *sr_user_desc;
- /** separated reply state */
- struct ptlrpc_reply_state *sr_reply_state;
- /** server-side hp handlers */
- struct ptlrpc_hpreq_ops *sr_ops;
- /** incoming request buffer */
- struct ptlrpc_request_buffer_desc *sr_rqbd;
-};
-
-/** server request member alias */
-/* NB: these alias should NOT be used by any new code, instead they should
- * be removed step by step to avoid potential abuse
- */
-#define rq_svc_thread rq_srv.sr_svc_thread
-#define rq_timed_list rq_srv.sr_timed_list
-#define rq_exp_list rq_srv.sr_exp_list
-#define rq_history_list rq_srv.sr_hist_list
-#define rq_history_seq rq_srv.sr_hist_seq
-#define rq_at_index rq_srv.sr_at_index
-#define rq_auth_uid rq_srv.sr_auth_uid
-#define rq_auth_mapped_uid rq_srv.sr_auth_mapped_uid
-#define rq_sp_from rq_srv.sr_sp_from
-#define rq_session rq_srv.sr_ses
-#define rq_nrq rq_srv.sr_nrq
-#define rq_arrival_time rq_srv.sr_arrival_time
-#define rq_reply_state rq_srv.sr_reply_state
-#define rq_svc_ctx rq_srv.sr_svc_ctx
-#define rq_user_desc rq_srv.sr_user_desc
-#define rq_ops rq_srv.sr_ops
-#define rq_rqbd rq_srv.sr_rqbd
-
-/**
- * Represents remote procedure call.
- *
- * This is a staple structure used by everybody wanting to send a request
- * in Lustre.
- */
-struct ptlrpc_request {
- /* Request type: one of PTL_RPC_MSG_* */
- int rq_type;
- /** Result of request processing */
- int rq_status;
- /**
- * Linkage item through which this request is included into
- * sending/delayed lists on client and into rqbd list on server
- */
- struct list_head rq_list;
- /** Lock to protect request flags and some other important bits, like
- * rq_list
- */
- spinlock_t rq_lock;
- /** client-side flags are serialized by rq_lock @{ */
- unsigned int rq_intr:1, rq_replied:1, rq_err:1,
- rq_timedout:1, rq_resend:1, rq_restart:1,
- /**
- * when ->rq_replay is set, request is kept by the client even
- * after server commits corresponding transaction. This is
- * used for operations that require sequence of multiple
- * requests to be replayed. The only example currently is file
- * open/close. When last request in such a sequence is
- * committed, ->rq_replay is cleared on all requests in the
- * sequence.
- */
- rq_replay:1,
- rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1,
- rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1,
- rq_early:1,
- rq_req_unlinked:1, /* unlinked request buffer from lnet */
- rq_reply_unlinked:1, /* unlinked reply buffer from lnet */
- rq_memalloc:1, /* req originated from "kswapd" */
- rq_committed:1,
- rq_reply_truncated:1,
- /** whether the "rq_set" is a valid one */
- rq_invalid_rqset:1,
- rq_generation_set:1,
- /** do not resend request on -EINPROGRESS */
- rq_no_retry_einprogress:1,
- /* allow the req to be sent if the import is in recovery
- * status
- */
- rq_allow_replay:1,
- /* bulk request, sent to server, but uncommitted */
- rq_unstable:1;
- /** @} */
-
- /** server-side flags @{ */
- unsigned int
- rq_hp:1, /**< high priority RPC */
- rq_at_linked:1, /**< link into service's srv_at_array */
- rq_packed_final:1; /**< packed final reply */
- /** @} */
-
- /** one of RQ_PHASE_* */
- enum rq_phase rq_phase;
- /** one of RQ_PHASE_* to be used next */
- enum rq_phase rq_next_phase;
- /**
- * client-side refcount for SENT race, server-side refcount
- * for multiple replies
- */
- atomic_t rq_refcount;
- /**
- * client-side:
- * !rq_truncate : # reply bytes actually received,
- * rq_truncate : required repbuf_len for resend
- */
- int rq_nob_received;
- /** Request length */
- int rq_reqlen;
- /** Reply length */
- int rq_replen;
- /** Pool if request is from preallocated list */
- struct ptlrpc_request_pool *rq_pool;
- /** Request message - what client sent */
- struct lustre_msg *rq_reqmsg;
- /** Reply message - server response */
- struct lustre_msg *rq_repmsg;
- /** Transaction number */
- __u64 rq_transno;
- /** xid */
- __u64 rq_xid;
- /** bulk match bits */
- u64 rq_mbits;
- /**
- * List item to for replay list. Not yet committed requests get linked
- * there.
- * Also see \a rq_replay comment above.
- * It's also link chain on obd_export::exp_req_replay_queue
- */
- struct list_head rq_replay_list;
- /** non-shared members for client & server request*/
- union {
- struct ptlrpc_cli_req rq_cli;
- struct ptlrpc_srv_req rq_srv;
- };
- /**
- * security and encryption data
- * @{
- */
- /** description of flavors for client & server */
- struct sptlrpc_flavor rq_flvr;
-
- /* client/server security flags */
- unsigned int
- rq_ctx_init:1, /* context initiation */
- rq_ctx_fini:1, /* context destroy */
- rq_bulk_read:1, /* request bulk read */
- rq_bulk_write:1, /* request bulk write */
- /* server authentication flags */
- rq_auth_gss:1, /* authenticated by gss */
- rq_auth_usr_root:1, /* authed as root */
- rq_auth_usr_mdt:1, /* authed as mdt */
- rq_auth_usr_ost:1, /* authed as ost */
- /* security tfm flags */
- rq_pack_udesc:1,
- rq_pack_bulk:1,
- /* doesn't expect reply FIXME */
- rq_no_reply:1,
- rq_pill_init:1, /* pill initialized */
- rq_srv_req:1; /* server request */
-
- /** various buffer pointers */
- struct lustre_msg *rq_reqbuf; /**< req wrapper */
- char *rq_repbuf; /**< rep buffer */
- struct lustre_msg *rq_repdata; /**< rep wrapper msg */
- /** only in priv mode */
- struct lustre_msg *rq_clrbuf;
- int rq_reqbuf_len; /* req wrapper buf len */
- int rq_reqdata_len; /* req wrapper msg len */
- int rq_repbuf_len; /* rep buffer len */
- int rq_repdata_len; /* rep wrapper msg len */
- int rq_clrbuf_len; /* only in priv mode */
- int rq_clrdata_len; /* only in priv mode */
-
- /** early replies go to offset 0, regular replies go after that */
- unsigned int rq_reply_off;
-
- /** @} */
-
- /** Fields that help to see if request and reply were swabbed or not */
- __u32 rq_req_swab_mask;
- __u32 rq_rep_swab_mask;
-
- /** how many early replies (for stats) */
- int rq_early_count;
-
- /** Server-side, export on which request was received */
- struct obd_export *rq_export;
- /** import where request is being sent */
- struct obd_import *rq_import;
- /** our LNet NID */
- lnet_nid_t rq_self;
- /** Peer description (the other side) */
- struct lnet_process_id rq_peer;
- /**
- * service time estimate (secs)
- * If the request is not served by this time, it is marked as timed out.
- */
- int rq_timeout;
- /**
- * when request/reply sent (secs), or time when request should be sent
- */
- time64_t rq_sent;
- /** when request must finish. */
- time64_t rq_deadline;
- /** request format description */
- struct req_capsule rq_pill;
-};
-
-/**
- * Call completion handler for rpc if any, return it's status or original
- * rc if there was no handler defined for this request.
- */
-static inline int ptlrpc_req_interpret(const struct lu_env *env,
- struct ptlrpc_request *req, int rc)
-{
- if (req->rq_interpret_reply) {
- req->rq_status = req->rq_interpret_reply(env, req,
- &req->rq_async_args,
- rc);
- return req->rq_status;
- }
- return rc;
-}
-
-/*
- * Can the request be moved from the regular NRS head to the high-priority NRS
- * head (of the same PTLRPC service partition), if any?
- *
- * For a reliable result, this should be checked under svcpt->scp_req lock.
- */
-static inline bool ptlrpc_nrs_req_can_move(struct ptlrpc_request *req)
-{
- struct ptlrpc_nrs_request *nrq = &req->rq_nrq;
-
- /**
- * LU-898: Check ptlrpc_nrs_request::nr_enqueued to make sure the
- * request has been enqueued first, and ptlrpc_nrs_request::nr_started
- * to make sure it has not been scheduled yet (analogous to previous
- * (non-NRS) checking of !list_empty(&ptlrpc_request::rq_list).
- */
- return nrq->nr_enqueued && !nrq->nr_started && !req->rq_hp;
-}
-
-/** @} nrs */
-
-/**
- * Returns 1 if request buffer at offset \a index was already swabbed
- */
-static inline int lustre_req_swabbed(struct ptlrpc_request *req, size_t index)
-{
- LASSERT(index < sizeof(req->rq_req_swab_mask) * 8);
- return req->rq_req_swab_mask & (1 << index);
-}
-
-/**
- * Returns 1 if request reply buffer at offset \a index was already swabbed
- */
-static inline int lustre_rep_swabbed(struct ptlrpc_request *req, size_t index)
-{
- LASSERT(index < sizeof(req->rq_rep_swab_mask) * 8);
- return req->rq_rep_swab_mask & (1 << index);
-}
-
-/**
- * Returns 1 if request needs to be swabbed into local cpu byteorder
- */
-static inline int ptlrpc_req_need_swab(struct ptlrpc_request *req)
-{
- return lustre_req_swabbed(req, MSG_PTLRPC_HEADER_OFF);
-}
-
-/**
- * Returns 1 if request reply needs to be swabbed into local cpu byteorder
- */
-static inline int ptlrpc_rep_need_swab(struct ptlrpc_request *req)
-{
- return lustre_rep_swabbed(req, MSG_PTLRPC_HEADER_OFF);
-}
-
-/**
- * Mark request buffer at offset \a index that it was already swabbed
- */
-static inline void lustre_set_req_swabbed(struct ptlrpc_request *req,
- size_t index)
-{
- LASSERT(index < sizeof(req->rq_req_swab_mask) * 8);
- LASSERT((req->rq_req_swab_mask & (1 << index)) == 0);
- req->rq_req_swab_mask |= 1 << index;
-}
-
-/**
- * Mark request reply buffer at offset \a index that it was already swabbed
- */
-static inline void lustre_set_rep_swabbed(struct ptlrpc_request *req,
- size_t index)
-{
- LASSERT(index < sizeof(req->rq_rep_swab_mask) * 8);
- LASSERT((req->rq_rep_swab_mask & (1 << index)) == 0);
- req->rq_rep_swab_mask |= 1 << index;
-}
-
-/**
- * Convert numerical request phase value \a phase into text string description
- */
-static inline const char *
-ptlrpc_phase2str(enum rq_phase phase)
-{
- switch (phase) {
- case RQ_PHASE_NEW:
- return "New";
- case RQ_PHASE_RPC:
- return "Rpc";
- case RQ_PHASE_BULK:
- return "Bulk";
- case RQ_PHASE_INTERPRET:
- return "Interpret";
- case RQ_PHASE_COMPLETE:
- return "Complete";
- case RQ_PHASE_UNREG_RPC:
- return "UnregRPC";
- case RQ_PHASE_UNREG_BULK:
- return "UnregBULK";
- default:
- return "?Phase?";
- }
-}
-
-/**
- * Convert numerical request phase of the request \a req into text stringi
- * description
- */
-static inline const char *
-ptlrpc_rqphase2str(struct ptlrpc_request *req)
-{
- return ptlrpc_phase2str(req->rq_phase);
-}
-
-/**
- * Debugging functions and helpers to print request structure into debug log
- * @{
- */
-/* Spare the preprocessor, spoil the bugs. */
-#define FLAG(field, str) (field ? str : "")
-
-/** Convert bit flags into a string */
-#define DEBUG_REQ_FLAGS(req) \
- ptlrpc_rqphase2str(req), \
- FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"), \
- FLAG(req->rq_err, "E"), FLAG(req->rq_net_err, "e"), \
- FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"), \
- FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \
- FLAG(req->rq_no_resend, "N"), \
- FLAG(req->rq_waiting, "W"), \
- FLAG(req->rq_wait_ctx, "C"), FLAG(req->rq_hp, "H"), \
- FLAG(req->rq_committed, "M")
-
-#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s%s"
-
-void _debug_req(struct ptlrpc_request *req,
- struct libcfs_debug_msg_data *data, const char *fmt, ...)
- __printf(3, 4);
-
-/**
- * Helper that decides if we need to print request according to current debug
- * level settings
- */
-#define debug_req(msgdata, mask, cdls, req, fmt, a...) \
-do { \
- CFS_CHECK_STACK(msgdata, mask, cdls); \
- \
- if (((mask) & D_CANTMASK) != 0 || \
- ((libcfs_debug & (mask)) != 0 && \
- (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) \
- _debug_req((req), msgdata, fmt, ##a); \
-} while (0)
-
-/**
- * This is the debug print function you need to use to print request structure
- * content into lustre debug log.
- * for most callers (level is a constant) this is resolved at compile time
- */
-#define DEBUG_REQ(level, req, fmt, args...) \
-do { \
- if ((level) & (D_ERROR | D_WARNING)) { \
- static struct cfs_debug_limit_state cdls; \
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, &cdls); \
- debug_req(&msgdata, level, &cdls, req, "@@@ "fmt" ", ## args);\
- } else { \
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, NULL); \
- debug_req(&msgdata, level, NULL, req, "@@@ "fmt" ", ## args); \
- } \
-} while (0)
-/** @} */
-
-/**
- * Structure that defines a single page of a bulk transfer
- */
-struct ptlrpc_bulk_page {
- /** Linkage to list of pages in a bulk */
- struct list_head bp_link;
- /**
- * Number of bytes in a page to transfer starting from \a bp_pageoffset
- */
- int bp_buflen;
- /** offset within a page */
- int bp_pageoffset;
- /** The page itself */
- struct page *bp_page;
-};
-
-enum ptlrpc_bulk_op_type {
- PTLRPC_BULK_OP_ACTIVE = 0x00000001,
- PTLRPC_BULK_OP_PASSIVE = 0x00000002,
- PTLRPC_BULK_OP_PUT = 0x00000004,
- PTLRPC_BULK_OP_GET = 0x00000008,
- PTLRPC_BULK_BUF_KVEC = 0x00000010,
- PTLRPC_BULK_BUF_KIOV = 0x00000020,
- PTLRPC_BULK_GET_SOURCE = PTLRPC_BULK_OP_PASSIVE | PTLRPC_BULK_OP_GET,
- PTLRPC_BULK_PUT_SINK = PTLRPC_BULK_OP_PASSIVE | PTLRPC_BULK_OP_PUT,
- PTLRPC_BULK_GET_SINK = PTLRPC_BULK_OP_ACTIVE | PTLRPC_BULK_OP_GET,
- PTLRPC_BULK_PUT_SOURCE = PTLRPC_BULK_OP_ACTIVE | PTLRPC_BULK_OP_PUT,
-};
-
-static inline bool ptlrpc_is_bulk_op_get(enum ptlrpc_bulk_op_type type)
-{
- return (type & PTLRPC_BULK_OP_GET) == PTLRPC_BULK_OP_GET;
-}
-
-static inline bool ptlrpc_is_bulk_get_source(enum ptlrpc_bulk_op_type type)
-{
- return (type & PTLRPC_BULK_GET_SOURCE) == PTLRPC_BULK_GET_SOURCE;
-}
-
-static inline bool ptlrpc_is_bulk_put_sink(enum ptlrpc_bulk_op_type type)
-{
- return (type & PTLRPC_BULK_PUT_SINK) == PTLRPC_BULK_PUT_SINK;
-}
-
-static inline bool ptlrpc_is_bulk_get_sink(enum ptlrpc_bulk_op_type type)
-{
- return (type & PTLRPC_BULK_GET_SINK) == PTLRPC_BULK_GET_SINK;
-}
-
-static inline bool ptlrpc_is_bulk_put_source(enum ptlrpc_bulk_op_type type)
-{
- return (type & PTLRPC_BULK_PUT_SOURCE) == PTLRPC_BULK_PUT_SOURCE;
-}
-
-static inline bool ptlrpc_is_bulk_desc_kvec(enum ptlrpc_bulk_op_type type)
-{
- return ((type & PTLRPC_BULK_BUF_KVEC) | (type & PTLRPC_BULK_BUF_KIOV))
- == PTLRPC_BULK_BUF_KVEC;
-}
-
-static inline bool ptlrpc_is_bulk_desc_kiov(enum ptlrpc_bulk_op_type type)
-{
- return ((type & PTLRPC_BULK_BUF_KVEC) | (type & PTLRPC_BULK_BUF_KIOV))
- == PTLRPC_BULK_BUF_KIOV;
-}
-
-static inline bool ptlrpc_is_bulk_op_active(enum ptlrpc_bulk_op_type type)
-{
- return ((type & PTLRPC_BULK_OP_ACTIVE) |
- (type & PTLRPC_BULK_OP_PASSIVE)) == PTLRPC_BULK_OP_ACTIVE;
-}
-
-static inline bool ptlrpc_is_bulk_op_passive(enum ptlrpc_bulk_op_type type)
-{
- return ((type & PTLRPC_BULK_OP_ACTIVE) |
- (type & PTLRPC_BULK_OP_PASSIVE)) == PTLRPC_BULK_OP_PASSIVE;
-}
-
-struct ptlrpc_bulk_frag_ops {
- /**
- * Add a page \a page to the bulk descriptor \a desc
- * Data to transfer in the page starts at offset \a pageoffset and
- * amount of data to transfer from the page is \a len
- */
- void (*add_kiov_frag)(struct ptlrpc_bulk_desc *desc,
- struct page *page, int pageoffset, int len);
-
- /*
- * Add a \a fragment to the bulk descriptor \a desc.
- * Data to transfer in the fragment is pointed to by \a frag
- * The size of the fragment is \a len
- */
- int (*add_iov_frag)(struct ptlrpc_bulk_desc *desc, void *frag, int len);
-
- /**
- * Uninitialize and free bulk descriptor \a desc.
- * Works on bulk descriptors both from server and client side.
- */
- void (*release_frags)(struct ptlrpc_bulk_desc *desc);
-};
-
-extern const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_pin_ops;
-extern const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_nopin_ops;
-
-/**
- * Definition of bulk descriptor.
- * Bulks are special "Two phase" RPCs where initial request message
- * is sent first and it is followed bt a transfer (o receiving) of a large
- * amount of data to be settled into pages referenced from the bulk descriptors.
- * Bulks transfers (the actual data following the small requests) are done
- * on separate LNet portals.
- * In lustre we use bulk transfers for READ and WRITE transfers from/to OSTs.
- * Another user is readpage for MDT.
- */
-struct ptlrpc_bulk_desc {
- /** completed with failure */
- unsigned long bd_failure:1;
- /** client side */
- unsigned long bd_registered:1;
- /** For serialization with callback */
- spinlock_t bd_lock;
- /** Import generation when request for this bulk was sent */
- int bd_import_generation;
- /** {put,get}{source,sink}{kvec,kiov} */
- enum ptlrpc_bulk_op_type bd_type;
- /** LNet portal for this bulk */
- __u32 bd_portal;
- /** Server side - export this bulk created for */
- struct obd_export *bd_export;
- /** Client side - import this bulk was sent on */
- struct obd_import *bd_import;
- /** Back pointer to the request */
- struct ptlrpc_request *bd_req;
- struct ptlrpc_bulk_frag_ops *bd_frag_ops;
- wait_queue_head_t bd_waitq; /* server side only WQ */
- int bd_iov_count; /* # entries in bd_iov */
- int bd_max_iov; /* allocated size of bd_iov */
- int bd_nob; /* # bytes covered */
- int bd_nob_transferred; /* # bytes GOT/PUT */
-
- u64 bd_last_mbits;
-
- struct ptlrpc_cb_id bd_cbid; /* network callback info */
- lnet_nid_t bd_sender; /* stash event::sender */
- int bd_md_count; /* # valid entries in bd_mds */
- int bd_md_max_brw; /* max entries in bd_mds */
- /** array of associated MDs */
- struct lnet_handle_md bd_mds[PTLRPC_BULK_OPS_COUNT];
-
- union {
- struct {
- /*
- * encrypt iov, size is either 0 or bd_iov_count.
- */
- struct bio_vec *bd_enc_vec;
- struct bio_vec *bd_vec; /* Array of bio_vecs */
- } bd_kiov;
-
- struct {
- struct kvec *bd_enc_kvec;
- struct kvec *bd_kvec; /* Array of kvecs */
- } bd_kvec;
- } bd_u;
-};
-
-#define GET_KIOV(desc) ((desc)->bd_u.bd_kiov.bd_vec)
-#define BD_GET_KIOV(desc, i) ((desc)->bd_u.bd_kiov.bd_vec[i])
-#define GET_ENC_KIOV(desc) ((desc)->bd_u.bd_kiov.bd_enc_vec)
-#define BD_GET_ENC_KIOV(desc, i) ((desc)->bd_u.bd_kiov.bd_enc_vec[i])
-#define GET_KVEC(desc) ((desc)->bd_u.bd_kvec.bd_kvec)
-#define BD_GET_KVEC(desc, i) ((desc)->bd_u.bd_kvec.bd_kvec[i])
-#define GET_ENC_KVEC(desc) ((desc)->bd_u.bd_kvec.bd_enc_kvec)
-#define BD_GET_ENC_KVEC(desc, i) ((desc)->bd_u.bd_kvec.bd_enc_kvec[i])
-
-enum {
- SVC_STOPPED = 1 << 0,
- SVC_STOPPING = 1 << 1,
- SVC_STARTING = 1 << 2,
- SVC_RUNNING = 1 << 3,
-};
-
-#define PTLRPC_THR_NAME_LEN 32
-/**
- * Definition of server service thread structure
- */
-struct ptlrpc_thread {
- /**
- * List of active threads in svc->srv_threads
- */
- struct list_head t_link;
- /**
- * thread-private data (preallocated memory)
- */
- void *t_data;
- __u32 t_flags;
- /**
- * service thread index, from ptlrpc_start_threads
- */
- unsigned int t_id;
- /**
- * service thread pid
- */
- pid_t t_pid;
- /**
- * put watchdog in the structure per thread b=14840
- *
- * Lustre watchdog is removed for client in the hope
- * of a generic watchdog can be merged in kernel.
- * When that happens, we should add below back.
- *
- * struct lc_watchdog *t_watchdog;
- */
- /**
- * the svc this thread belonged to b=18582
- */
- struct ptlrpc_service_part *t_svcpt;
- wait_queue_head_t t_ctl_waitq;
- struct lu_env *t_env;
- char t_name[PTLRPC_THR_NAME_LEN];
-};
-
-static inline int thread_is_stopped(struct ptlrpc_thread *thread)
-{
- return !!(thread->t_flags & SVC_STOPPED);
-}
-
-static inline int thread_is_stopping(struct ptlrpc_thread *thread)
-{
- return !!(thread->t_flags & SVC_STOPPING);
-}
-
-static inline int thread_is_starting(struct ptlrpc_thread *thread)
-{
- return !!(thread->t_flags & SVC_STARTING);
-}
-
-static inline int thread_is_running(struct ptlrpc_thread *thread)
-{
- return !!(thread->t_flags & SVC_RUNNING);
-}
-
-static inline void thread_clear_flags(struct ptlrpc_thread *thread, __u32 flags)
-{
- thread->t_flags &= ~flags;
-}
-
-static inline void thread_set_flags(struct ptlrpc_thread *thread, __u32 flags)
-{
- thread->t_flags = flags;
-}
-
-static inline void thread_add_flags(struct ptlrpc_thread *thread, __u32 flags)
-{
- thread->t_flags |= flags;
-}
-
-static inline int thread_test_and_clear_flags(struct ptlrpc_thread *thread,
- __u32 flags)
-{
- if (thread->t_flags & flags) {
- thread->t_flags &= ~flags;
- return 1;
- }
- return 0;
-}
-
-/**
- * Request buffer descriptor structure.
- * This is a structure that contains one posted request buffer for service.
- * Once data land into a buffer, event callback creates actual request and
- * notifies wakes one of the service threads to process new incoming request.
- * More than one request can fit into the buffer.
- */
-struct ptlrpc_request_buffer_desc {
- /** Link item for rqbds on a service */
- struct list_head rqbd_list;
- /** History of requests for this buffer */
- struct list_head rqbd_reqs;
- /** Back pointer to service for which this buffer is registered */
- struct ptlrpc_service_part *rqbd_svcpt;
- /** LNet descriptor */
- struct lnet_handle_md rqbd_md_h;
- int rqbd_refcount;
- /** The buffer itself */
- char *rqbd_buffer;
- struct ptlrpc_cb_id rqbd_cbid;
- /**
- * This "embedded" request structure is only used for the
- * last request to fit into the buffer
- */
- struct ptlrpc_request rqbd_req;
-};
-
-typedef int (*svc_handler_t)(struct ptlrpc_request *req);
-
-struct ptlrpc_service_ops {
- /**
- * if non-NULL called during thread creation (ptlrpc_start_thread())
- * to initialize service specific per-thread state.
- */
- int (*so_thr_init)(struct ptlrpc_thread *thr);
- /**
- * if non-NULL called during thread shutdown (ptlrpc_main()) to
- * destruct state created by ->srv_init().
- */
- void (*so_thr_done)(struct ptlrpc_thread *thr);
- /**
- * Handler function for incoming requests for this service
- */
- int (*so_req_handler)(struct ptlrpc_request *req);
- /**
- * function to determine priority of the request, it's called
- * on every new request
- */
- int (*so_hpreq_handler)(struct ptlrpc_request *);
- /**
- * service-specific print fn
- */
- void (*so_req_printer)(void *, struct ptlrpc_request *);
-};
-
-#ifndef __cfs_cacheline_aligned
-/* NB: put it here for reducing patche dependence */
-# define __cfs_cacheline_aligned
-#endif
-
-/**
- * How many high priority requests to serve before serving one normal
- * priority request
- */
-#define PTLRPC_SVC_HP_RATIO 10
-
-/**
- * Definition of PortalRPC service.
- * The service is listening on a particular portal (like tcp port)
- * and perform actions for a specific server like IO service for OST
- * or general metadata service for MDS.
- */
-struct ptlrpc_service {
- /** serialize sysfs operations */
- spinlock_t srv_lock;
- /** most often accessed fields */
- /** chain thru all services */
- struct list_head srv_list;
- /** service operations table */
- struct ptlrpc_service_ops srv_ops;
- /** only statically allocated strings here; we don't clean them */
- char *srv_name;
- /** only statically allocated strings here; we don't clean them */
- char *srv_thread_name;
- /** service thread list */
- struct list_head srv_threads;
- /** threads # should be created for each partition on initializing */
- int srv_nthrs_cpt_init;
- /** limit of threads number for each partition */
- int srv_nthrs_cpt_limit;
- /** Root of debugfs dir tree for this service */
- struct dentry *srv_debugfs_entry;
- /** Pointer to statistic data for this service */
- struct lprocfs_stats *srv_stats;
- /** # hp per lp reqs to handle */
- int srv_hpreq_ratio;
- /** biggest request to receive */
- int srv_max_req_size;
- /** biggest reply to send */
- int srv_max_reply_size;
- /** size of individual buffers */
- int srv_buf_size;
- /** # buffers to allocate in 1 group */
- int srv_nbuf_per_group;
- /** Local portal on which to receive requests */
- __u32 srv_req_portal;
- /** Portal on the client to send replies to */
- __u32 srv_rep_portal;
- /**
- * Tags for lu_context associated with this thread, see struct
- * lu_context.
- */
- __u32 srv_ctx_tags;
- /** soft watchdog timeout multiplier */
- int srv_watchdog_factor;
- /** under unregister_service */
- unsigned srv_is_stopping:1;
-
- /** max # request buffers in history per partition */
- int srv_hist_nrqbds_cpt_max;
- /** number of CPTs this service bound on */
- int srv_ncpts;
- /** CPTs array this service bound on */
- __u32 *srv_cpts;
- /** 2^srv_cptab_bits >= cfs_cpt_numbert(srv_cptable) */
- int srv_cpt_bits;
- /** CPT table this service is running over */
- struct cfs_cpt_table *srv_cptable;
-
- /* sysfs object */
- struct kobject srv_kobj;
- struct completion srv_kobj_unregister;
- /**
- * partition data for ptlrpc service
- */
- struct ptlrpc_service_part *srv_parts[0];
-};
-
-/**
- * Definition of PortalRPC service partition data.
- * Although a service only has one instance of it right now, but we
- * will have multiple instances very soon (instance per CPT).
- *
- * it has four locks:
- * \a scp_lock
- * serialize operations on rqbd and requests waiting for preprocess
- * \a scp_req_lock
- * serialize operations active requests sent to this portal
- * \a scp_at_lock
- * serialize adaptive timeout stuff
- * \a scp_rep_lock
- * serialize operations on RS list (reply states)
- *
- * We don't have any use-case to take two or more locks at the same time
- * for now, so there is no lock order issue.
- */
-struct ptlrpc_service_part {
- /** back reference to owner */
- struct ptlrpc_service *scp_service __cfs_cacheline_aligned;
- /* CPT id, reserved */
- int scp_cpt;
- /** always increasing number */
- int scp_thr_nextid;
- /** # of starting threads */
- int scp_nthrs_starting;
- /** # of stopping threads, reserved for shrinking threads */
- int scp_nthrs_stopping;
- /** # running threads */
- int scp_nthrs_running;
- /** service threads list */
- struct list_head scp_threads;
-
- /**
- * serialize the following fields, used for protecting
- * rqbd list and incoming requests waiting for preprocess,
- * threads starting & stopping are also protected by this lock.
- */
- spinlock_t scp_lock __cfs_cacheline_aligned;
- /** total # req buffer descs allocated */
- int scp_nrqbds_total;
- /** # posted request buffers for receiving */
- int scp_nrqbds_posted;
- /** in progress of allocating rqbd */
- int scp_rqbd_allocating;
- /** # incoming reqs */
- int scp_nreqs_incoming;
- /** request buffers to be reposted */
- struct list_head scp_rqbd_idle;
- /** req buffers receiving */
- struct list_head scp_rqbd_posted;
- /** incoming reqs */
- struct list_head scp_req_incoming;
- /** timeout before re-posting reqs, in tick */
- long scp_rqbd_timeout;
- /**
- * all threads sleep on this. This wait-queue is signalled when new
- * incoming request arrives and when difficult reply has to be handled.
- */
- wait_queue_head_t scp_waitq;
-
- /** request history */
- struct list_head scp_hist_reqs;
- /** request buffer history */
- struct list_head scp_hist_rqbds;
- /** # request buffers in history */
- int scp_hist_nrqbds;
- /** sequence number for request */
- __u64 scp_hist_seq;
- /** highest seq culled from history */
- __u64 scp_hist_seq_culled;
-
- /**
- * serialize the following fields, used for processing requests
- * sent to this portal
- */
- spinlock_t scp_req_lock __cfs_cacheline_aligned;
- /** # reqs in either of the NRS heads below */
- /** # reqs being served */
- int scp_nreqs_active;
- /** # HPreqs being served */
- int scp_nhreqs_active;
- /** # hp requests handled */
- int scp_hreq_count;
-
- /** NRS head for regular requests */
- struct ptlrpc_nrs scp_nrs_reg;
- /** NRS head for HP requests; this is only valid for services that can
- * handle HP requests
- */
- struct ptlrpc_nrs *scp_nrs_hp;
-
- /** AT stuff */
- /** @{ */
- /**
- * serialize the following fields, used for changes on
- * adaptive timeout
- */
- spinlock_t scp_at_lock __cfs_cacheline_aligned;
- /** estimated rpc service time */
- struct adaptive_timeout scp_at_estimate;
- /** reqs waiting for replies */
- struct ptlrpc_at_array scp_at_array;
- /** early reply timer */
- struct timer_list scp_at_timer;
- /** debug */
- unsigned long scp_at_checktime;
- /** check early replies */
- unsigned scp_at_check;
- /** @} */
-
- /**
- * serialize the following fields, used for processing
- * replies for this portal
- */
- spinlock_t scp_rep_lock __cfs_cacheline_aligned;
- /** all the active replies */
- struct list_head scp_rep_active;
- /** List of free reply_states */
- struct list_head scp_rep_idle;
- /** waitq to run, when adding stuff to srv_free_rs_list */
- wait_queue_head_t scp_rep_waitq;
- /** # 'difficult' replies */
- atomic_t scp_nreps_difficult;
-};
-
-#define ptlrpc_service_for_each_part(part, i, svc) \
- for (i = 0; \
- i < (svc)->srv_ncpts && \
- (svc)->srv_parts && \
- ((part) = (svc)->srv_parts[i]); i++)
-
-/**
- * Declaration of ptlrpcd control structure
- */
-struct ptlrpcd_ctl {
- /**
- * Ptlrpc thread control flags (LIOD_START, LIOD_STOP, LIOD_FORCE)
- */
- unsigned long pc_flags;
- /**
- * Thread lock protecting structure fields.
- */
- spinlock_t pc_lock;
- /**
- * Start completion.
- */
- struct completion pc_starting;
- /**
- * Stop completion.
- */
- struct completion pc_finishing;
- /**
- * Thread requests set.
- */
- struct ptlrpc_request_set *pc_set;
- /**
- * Thread name used in kthread_run()
- */
- char pc_name[16];
- /**
- * CPT the thread is bound on.
- */
- int pc_cpt;
- /**
- * Index of ptlrpcd thread in the array.
- */
- int pc_index;
- /**
- * Pointer to the array of partners' ptlrpcd_ctl structure.
- */
- struct ptlrpcd_ctl **pc_partners;
- /**
- * Number of the ptlrpcd's partners.
- */
- int pc_npartners;
- /**
- * Record the partner index to be processed next.
- */
- int pc_cursor;
- /**
- * Error code if the thread failed to fully start.
- */
- int pc_error;
-};
-
-/* Bits for pc_flags */
-enum ptlrpcd_ctl_flags {
- /**
- * Ptlrpc thread start flag.
- */
- LIOD_START = 1 << 0,
- /**
- * Ptlrpc thread stop flag.
- */
- LIOD_STOP = 1 << 1,
- /**
- * Ptlrpc thread force flag (only stop force so far).
- * This will cause aborting any inflight rpcs handled
- * by thread if LIOD_STOP is specified.
- */
- LIOD_FORCE = 1 << 2,
- /**
- * This is a recovery ptlrpc thread.
- */
- LIOD_RECOVERY = 1 << 3,
-};
-
-/**
- * \addtogroup nrs
- * @{
- *
- * Service compatibility function; the policy is compatible with all services.
- *
- * \param[in] svc The service the policy is attempting to register with.
- * \param[in] desc The policy descriptor
- *
- * \retval true The policy is compatible with the service
- *
- * \see ptlrpc_nrs_pol_desc::pd_compat()
- */
-static inline bool nrs_policy_compat_all(const struct ptlrpc_service *svc,
- const struct ptlrpc_nrs_pol_desc *desc)
-{
- return true;
-}
-
-/**
- * Service compatibility function; the policy is compatible with only a specific
- * service which is identified by its human-readable name at
- * ptlrpc_service::srv_name.
- *
- * \param[in] svc The service the policy is attempting to register with.
- * \param[in] desc The policy descriptor
- *
- * \retval false The policy is not compatible with the service
- * \retval true The policy is compatible with the service
- *
- * \see ptlrpc_nrs_pol_desc::pd_compat()
- */
-static inline bool nrs_policy_compat_one(const struct ptlrpc_service *svc,
- const struct ptlrpc_nrs_pol_desc *desc)
-{
- return strcmp(svc->srv_name, desc->pd_compat_svc_name) == 0;
-}
-
-/** @} nrs */
-
-/* ptlrpc/events.c */
-extern struct lnet_handle_eq ptlrpc_eq_h;
-int ptlrpc_uuid_to_peer(struct obd_uuid *uuid,
- struct lnet_process_id *peer, lnet_nid_t *self);
-/**
- * These callbacks are invoked by LNet when something happened to
- * underlying buffer
- * @{
- */
-void request_out_callback(struct lnet_event *ev);
-void reply_in_callback(struct lnet_event *ev);
-void client_bulk_callback(struct lnet_event *ev);
-void request_in_callback(struct lnet_event *ev);
-void reply_out_callback(struct lnet_event *ev);
-/** @} */
-
-/* ptlrpc/connection.c */
-struct ptlrpc_connection *ptlrpc_connection_get(struct lnet_process_id peer,
- lnet_nid_t self,
- struct obd_uuid *uuid);
-int ptlrpc_connection_put(struct ptlrpc_connection *c);
-struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *);
-int ptlrpc_connection_init(void);
-void ptlrpc_connection_fini(void);
-
-/* ptlrpc/niobuf.c */
-/**
- * Actual interfacing with LNet to put/get/register/unregister stuff
- * @{
- */
-
-int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async);
-
-static inline int ptlrpc_client_bulk_active(struct ptlrpc_request *req)
-{
- struct ptlrpc_bulk_desc *desc;
- int rc;
-
- desc = req->rq_bulk;
-
- if (req->rq_bulk_deadline > ktime_get_real_seconds())
- return 1;
-
- if (!desc)
- return 0;
-
- spin_lock(&desc->bd_lock);
- rc = desc->bd_md_count;
- spin_unlock(&desc->bd_lock);
- return rc;
-}
-
-#define PTLRPC_REPLY_MAYBE_DIFFICULT 0x01
-#define PTLRPC_REPLY_EARLY 0x02
-int ptlrpc_send_reply(struct ptlrpc_request *req, int flags);
-int ptlrpc_reply(struct ptlrpc_request *req);
-int ptlrpc_send_error(struct ptlrpc_request *req, int difficult);
-int ptlrpc_error(struct ptlrpc_request *req);
-int ptlrpc_at_get_net_latency(struct ptlrpc_request *req);
-int ptl_send_rpc(struct ptlrpc_request *request, int noreply);
-int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd);
-/** @} */
-
-/* ptlrpc/client.c */
-/**
- * Client-side portals API. Everything to send requests, receive replies,
- * request queues, request management, etc.
- * @{
- */
-void ptlrpc_request_committed(struct ptlrpc_request *req, int force);
-
-int ptlrpc_inc_ref(void);
-void ptlrpc_dec_ref(void);
-
-void ptlrpc_init_client(int req_portal, int rep_portal, char *name,
- struct ptlrpc_client *);
-struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid);
-
-int ptlrpc_queue_wait(struct ptlrpc_request *req);
-int ptlrpc_replay_req(struct ptlrpc_request *req);
-void ptlrpc_abort_inflight(struct obd_import *imp);
-void ptlrpc_abort_set(struct ptlrpc_request_set *set);
-
-struct ptlrpc_request_set *ptlrpc_prep_set(void);
-struct ptlrpc_request_set *ptlrpc_prep_fcset(int max, set_producer_func func,
- void *arg);
-int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set);
-int ptlrpc_set_wait(struct ptlrpc_request_set *);
-void ptlrpc_mark_interrupted(struct ptlrpc_request *req);
-void ptlrpc_set_destroy(struct ptlrpc_request_set *);
-void ptlrpc_set_add_req(struct ptlrpc_request_set *, struct ptlrpc_request *);
-
-void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool);
-int ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq);
-
-struct ptlrpc_request_pool *
-ptlrpc_init_rq_pool(int, int,
- int (*populate_pool)(struct ptlrpc_request_pool *, int));
-
-void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req);
-struct ptlrpc_request *ptlrpc_request_alloc(struct obd_import *imp,
- const struct req_format *format);
-struct ptlrpc_request *ptlrpc_request_alloc_pool(struct obd_import *imp,
- struct ptlrpc_request_pool *,
- const struct req_format *);
-void ptlrpc_request_free(struct ptlrpc_request *request);
-int ptlrpc_request_pack(struct ptlrpc_request *request,
- __u32 version, int opcode);
-struct ptlrpc_request *ptlrpc_request_alloc_pack(struct obd_import *,
- const struct req_format *,
- __u32, int);
-int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
- __u32 version, int opcode, char **bufs,
- struct ptlrpc_cli_ctx *ctx);
-void ptlrpc_req_finished(struct ptlrpc_request *request);
-struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req);
-struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req,
- unsigned int nfrags,
- unsigned int max_brw,
- unsigned int type,
- unsigned int portal,
- const struct ptlrpc_bulk_frag_ops *ops);
-
-int ptlrpc_prep_bulk_frag(struct ptlrpc_bulk_desc *desc,
- void *frag, int len);
-void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
- struct page *page, int pageoffset, int len,
- int pin);
-static inline void ptlrpc_prep_bulk_page_pin(struct ptlrpc_bulk_desc *desc,
- struct page *page, int pageoffset,
- int len)
-{
- __ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 1);
-}
-
-static inline void ptlrpc_prep_bulk_page_nopin(struct ptlrpc_bulk_desc *desc,
- struct page *page, int pageoffset,
- int len)
-{
- __ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 0);
-}
-
-void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk);
-
-static inline void ptlrpc_release_bulk_page_pin(struct ptlrpc_bulk_desc *desc)
-{
- int i;
-
- for (i = 0; i < desc->bd_iov_count ; i++)
- put_page(BD_GET_KIOV(desc, i).bv_page);
-}
-
-void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
- struct obd_import *imp);
-__u64 ptlrpc_next_xid(void);
-__u64 ptlrpc_sample_next_xid(void);
-__u64 ptlrpc_req_xid(struct ptlrpc_request *request);
-
-/* Set of routines to run a function in ptlrpcd context */
-void *ptlrpcd_alloc_work(struct obd_import *imp,
- int (*cb)(const struct lu_env *, void *), void *data);
-void ptlrpcd_destroy_work(void *handler);
-int ptlrpcd_queue_work(void *handler);
-
-/** @} */
-struct ptlrpc_service_buf_conf {
- /* nbufs is buffers # to allocate when growing the pool */
- unsigned int bc_nbufs;
- /* buffer size to post */
- unsigned int bc_buf_size;
- /* portal to listed for requests on */
- unsigned int bc_req_portal;
- /* portal of where to send replies to */
- unsigned int bc_rep_portal;
- /* maximum request size to be accepted for this service */
- unsigned int bc_req_max_size;
- /* maximum reply size this service can ever send */
- unsigned int bc_rep_max_size;
-};
-
-struct ptlrpc_service_thr_conf {
- /* threadname should be 8 characters or less - 6 will be added on */
- char *tc_thr_name;
- /* threads increasing factor for each CPU */
- unsigned int tc_thr_factor;
- /* service threads # to start on each partition while initializing */
- unsigned int tc_nthrs_init;
- /*
- * low water of threads # upper-limit on each partition while running,
- * service availability may be impacted if threads number is lower
- * than this value. It can be ZERO if the service doesn't require
- * CPU affinity or there is only one partition.
- */
- unsigned int tc_nthrs_base;
- /* "soft" limit for total threads number */
- unsigned int tc_nthrs_max;
- /* user specified threads number, it will be validated due to
- * other members of this structure.
- */
- unsigned int tc_nthrs_user;
- /* set NUMA node affinity for service threads */
- unsigned int tc_cpu_affinity;
- /* Tags for lu_context associated with service thread */
- __u32 tc_ctx_tags;
-};
-
-struct ptlrpc_service_cpt_conf {
- struct cfs_cpt_table *cc_cptable;
- /* string pattern to describe CPTs for a service */
- char *cc_pattern;
-};
-
-struct ptlrpc_service_conf {
- /* service name */
- char *psc_name;
- /* soft watchdog timeout multiplifier to print stuck service traces */
- unsigned int psc_watchdog_factor;
- /* buffer information */
- struct ptlrpc_service_buf_conf psc_buf;
- /* thread information */
- struct ptlrpc_service_thr_conf psc_thr;
- /* CPU partition information */
- struct ptlrpc_service_cpt_conf psc_cpt;
- /* function table */
- struct ptlrpc_service_ops psc_ops;
-};
-
-/* ptlrpc/service.c */
-/**
- * Server-side services API. Register/unregister service, request state
- * management, service thread management
- *
- * @{
- */
-void ptlrpc_dispatch_difficult_reply(struct ptlrpc_reply_state *rs);
-void ptlrpc_schedule_difficult_reply(struct ptlrpc_reply_state *rs);
-struct ptlrpc_service *ptlrpc_register_service(struct ptlrpc_service_conf *conf,
- struct kset *parent,
- struct dentry *debugfs_entry);
-
-int ptlrpc_start_threads(struct ptlrpc_service *svc);
-int ptlrpc_unregister_service(struct ptlrpc_service *service);
-
-int ptlrpc_hr_init(void);
-void ptlrpc_hr_fini(void);
-
-/** @} */
-
-/* ptlrpc/import.c */
-/**
- * Import API
- * @{
- */
-int ptlrpc_connect_import(struct obd_import *imp);
-int ptlrpc_init_import(struct obd_import *imp);
-int ptlrpc_disconnect_import(struct obd_import *imp, int noclose);
-int ptlrpc_import_recovery_state_machine(struct obd_import *imp);
-
-/* ptlrpc/pack_generic.c */
-int ptlrpc_reconnect_import(struct obd_import *imp);
-/** @} */
-
-/**
- * ptlrpc msg buffer and swab interface
- *
- * @{
- */
-int ptlrpc_buf_need_swab(struct ptlrpc_request *req, const int inout,
- u32 index);
-void ptlrpc_buf_set_swabbed(struct ptlrpc_request *req, const int inout,
- u32 index);
-int ptlrpc_unpack_rep_msg(struct ptlrpc_request *req, int len);
-int ptlrpc_unpack_req_msg(struct ptlrpc_request *req, int len);
-
-void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, __u32 *lens,
- char **bufs);
-int lustre_pack_request(struct ptlrpc_request *, __u32 magic, int count,
- __u32 *lens, char **bufs);
-int lustre_pack_reply(struct ptlrpc_request *, int count, __u32 *lens,
- char **bufs);
-int lustre_pack_reply_v2(struct ptlrpc_request *req, int count,
- __u32 *lens, char **bufs, int flags);
-#define LPRFL_EARLY_REPLY 1
-int lustre_pack_reply_flags(struct ptlrpc_request *, int count, __u32 *lens,
- char **bufs, int flags);
-int lustre_shrink_msg(struct lustre_msg *msg, int segment,
- unsigned int newlen, int move_data);
-void lustre_free_reply_state(struct ptlrpc_reply_state *rs);
-int __lustre_unpack_msg(struct lustre_msg *m, int len);
-u32 lustre_msg_hdr_size(__u32 magic, u32 count);
-u32 lustre_msg_size(__u32 magic, int count, __u32 *lengths);
-u32 lustre_msg_size_v2(int count, __u32 *lengths);
-u32 lustre_packed_msg_size(struct lustre_msg *msg);
-u32 lustre_msg_early_size(void);
-void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, u32 n, u32 min_size);
-void *lustre_msg_buf(struct lustre_msg *m, u32 n, u32 minlen);
-u32 lustre_msg_buflen(struct lustre_msg *m, u32 n);
-u32 lustre_msg_bufcount(struct lustre_msg *m);
-char *lustre_msg_string(struct lustre_msg *m, u32 n, u32 max_len);
-__u32 lustre_msghdr_get_flags(struct lustre_msg *msg);
-void lustre_msghdr_set_flags(struct lustre_msg *msg, __u32 flags);
-__u32 lustre_msg_get_flags(struct lustre_msg *msg);
-void lustre_msg_add_flags(struct lustre_msg *msg, u32 flags);
-void lustre_msg_set_flags(struct lustre_msg *msg, u32 flags);
-void lustre_msg_clear_flags(struct lustre_msg *msg, u32 flags);
-__u32 lustre_msg_get_op_flags(struct lustre_msg *msg);
-void lustre_msg_add_op_flags(struct lustre_msg *msg, u32 flags);
-struct lustre_handle *lustre_msg_get_handle(struct lustre_msg *msg);
-__u32 lustre_msg_get_type(struct lustre_msg *msg);
-void lustre_msg_add_version(struct lustre_msg *msg, u32 version);
-__u32 lustre_msg_get_opc(struct lustre_msg *msg);
-__u16 lustre_msg_get_tag(struct lustre_msg *msg);
-__u64 lustre_msg_get_last_committed(struct lustre_msg *msg);
-__u64 *lustre_msg_get_versions(struct lustre_msg *msg);
-__u64 lustre_msg_get_transno(struct lustre_msg *msg);
-__u64 lustre_msg_get_slv(struct lustre_msg *msg);
-__u32 lustre_msg_get_limit(struct lustre_msg *msg);
-void lustre_msg_set_slv(struct lustre_msg *msg, __u64 slv);
-void lustre_msg_set_limit(struct lustre_msg *msg, __u64 limit);
-int lustre_msg_get_status(struct lustre_msg *msg);
-__u32 lustre_msg_get_conn_cnt(struct lustre_msg *msg);
-__u32 lustre_msg_get_magic(struct lustre_msg *msg);
-__u32 lustre_msg_get_timeout(struct lustre_msg *msg);
-__u32 lustre_msg_get_service_time(struct lustre_msg *msg);
-__u32 lustre_msg_get_cksum(struct lustre_msg *msg);
-__u32 lustre_msg_calc_cksum(struct lustre_msg *msg);
-void lustre_msg_set_handle(struct lustre_msg *msg,
- struct lustre_handle *handle);
-void lustre_msg_set_type(struct lustre_msg *msg, __u32 type);
-void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc);
-void lustre_msg_set_last_xid(struct lustre_msg *msg, u64 last_xid);
-void lustre_msg_set_tag(struct lustre_msg *msg, __u16 tag);
-void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions);
-void lustre_msg_set_transno(struct lustre_msg *msg, __u64 transno);
-void lustre_msg_set_status(struct lustre_msg *msg, __u32 status);
-void lustre_msg_set_conn_cnt(struct lustre_msg *msg, __u32 conn_cnt);
-void ptlrpc_request_set_replen(struct ptlrpc_request *req);
-void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout);
-void lustre_msg_set_service_time(struct lustre_msg *msg, __u32 service_time);
-void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid);
-void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum);
-void lustre_msg_set_mbits(struct lustre_msg *msg, u64 mbits);
-
-static inline void
-lustre_shrink_reply(struct ptlrpc_request *req, int segment,
- unsigned int newlen, int move_data)
-{
- LASSERT(req->rq_reply_state);
- LASSERT(req->rq_repmsg);
- req->rq_replen = lustre_shrink_msg(req->rq_repmsg, segment,
- newlen, move_data);
-}
-
-#ifdef CONFIG_LUSTRE_TRANSLATE_ERRNOS
-
-static inline int ptlrpc_status_hton(int h)
-{
- /*
- * Positive errnos must be network errnos, such as LUSTRE_EDEADLK,
- * ELDLM_LOCK_ABORTED, etc.
- */
- if (h < 0)
- return -lustre_errno_hton(-h);
- else
- return h;
-}
-
-static inline int ptlrpc_status_ntoh(int n)
-{
- /*
- * See the comment in ptlrpc_status_hton().
- */
- if (n < 0)
- return -lustre_errno_ntoh(-n);
- else
- return n;
-}
-
-#else
-
-#define ptlrpc_status_hton(h) (h)
-#define ptlrpc_status_ntoh(n) (n)
-
-#endif
-/** @} */
-
-/** Change request phase of \a req to \a new_phase */
-static inline void
-ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase)
-{
- if (req->rq_phase == new_phase)
- return;
-
- if (new_phase == RQ_PHASE_UNREG_RPC ||
- new_phase == RQ_PHASE_UNREG_BULK) {
- /* No embedded unregistering phases */
- if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
- req->rq_phase == RQ_PHASE_UNREG_BULK)
- return;
-
- req->rq_next_phase = req->rq_phase;
- if (req->rq_import)
- atomic_inc(&req->rq_import->imp_unregistering);
- }
-
- if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
- req->rq_phase == RQ_PHASE_UNREG_BULK) {
- if (req->rq_import)
- atomic_dec(&req->rq_import->imp_unregistering);
- }
-
- DEBUG_REQ(D_INFO, req, "move req \"%s\" -> \"%s\"",
- ptlrpc_rqphase2str(req), ptlrpc_phase2str(new_phase));
-
- req->rq_phase = new_phase;
-}
-
-/**
- * Returns true if request \a req got early reply and hard deadline is not met
- */
-static inline int
-ptlrpc_client_early(struct ptlrpc_request *req)
-{
- return req->rq_early;
-}
-
-/**
- * Returns true if we got real reply from server for this request
- */
-static inline int
-ptlrpc_client_replied(struct ptlrpc_request *req)
-{
- if (req->rq_reply_deadline > ktime_get_real_seconds())
- return 0;
- return req->rq_replied;
-}
-
-/** Returns true if request \a req is in process of receiving server reply */
-static inline int
-ptlrpc_client_recv(struct ptlrpc_request *req)
-{
- if (req->rq_reply_deadline > ktime_get_real_seconds())
- return 1;
- return req->rq_receiving_reply;
-}
-
-static inline int
-ptlrpc_client_recv_or_unlink(struct ptlrpc_request *req)
-{
- int rc;
-
- spin_lock(&req->rq_lock);
- if (req->rq_reply_deadline > ktime_get_real_seconds()) {
- spin_unlock(&req->rq_lock);
- return 1;
- }
- if (req->rq_req_deadline > ktime_get_real_seconds()) {
- spin_unlock(&req->rq_lock);
- return 1;
- }
- rc = !req->rq_req_unlinked || !req->rq_reply_unlinked ||
- req->rq_receiving_reply;
- spin_unlock(&req->rq_lock);
- return rc;
-}
-
-static inline void
-ptlrpc_client_wake_req(struct ptlrpc_request *req)
-{
- if (!req->rq_set)
- wake_up(&req->rq_reply_waitq);
- else
- wake_up(&req->rq_set->set_waitq);
-}
-
-static inline void
-ptlrpc_rs_addref(struct ptlrpc_reply_state *rs)
-{
- LASSERT(atomic_read(&rs->rs_refcount) > 0);
- atomic_inc(&rs->rs_refcount);
-}
-
-static inline void
-ptlrpc_rs_decref(struct ptlrpc_reply_state *rs)
-{
- LASSERT(atomic_read(&rs->rs_refcount) > 0);
- if (atomic_dec_and_test(&rs->rs_refcount))
- lustre_free_reply_state(rs);
-}
-
-/* Should only be called once per req */
-static inline void ptlrpc_req_drop_rs(struct ptlrpc_request *req)
-{
- if (!req->rq_reply_state)
- return; /* shouldn't occur */
- ptlrpc_rs_decref(req->rq_reply_state);
- req->rq_reply_state = NULL;
- req->rq_repmsg = NULL;
-}
-
-static inline __u32 lustre_request_magic(struct ptlrpc_request *req)
-{
- return lustre_msg_get_magic(req->rq_reqmsg);
-}
-
-static inline int ptlrpc_req_get_repsize(struct ptlrpc_request *req)
-{
- switch (req->rq_reqmsg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2:
- return req->rq_reqmsg->lm_repsize;
- default:
- LASSERTF(0, "incorrect message magic: %08x\n",
- req->rq_reqmsg->lm_magic);
- return -EFAULT;
- }
-}
-
-static inline int ptlrpc_send_limit_expired(struct ptlrpc_request *req)
-{
- if (req->rq_delay_limit != 0 &&
- time_before(req->rq_queued_time + req->rq_delay_limit * HZ,
- jiffies)) {
- return 1;
- }
- return 0;
-}
-
-static inline int ptlrpc_no_resend(struct ptlrpc_request *req)
-{
- if (!req->rq_no_resend && ptlrpc_send_limit_expired(req)) {
- spin_lock(&req->rq_lock);
- req->rq_no_resend = 1;
- spin_unlock(&req->rq_lock);
- }
- return req->rq_no_resend;
-}
-
-static inline int
-ptlrpc_server_get_timeout(struct ptlrpc_service_part *svcpt)
-{
- int at = AT_OFF ? 0 : at_get(&svcpt->scp_at_estimate);
-
- return svcpt->scp_service->srv_watchdog_factor *
- max_t(int, at, obd_timeout);
-}
-
-static inline struct ptlrpc_service *
-ptlrpc_req2svc(struct ptlrpc_request *req)
-{
- return req->rq_rqbd->rqbd_svcpt->scp_service;
-}
-
-/* ldlm/ldlm_lib.c */
-/**
- * Target client logic
- * @{
- */
-int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg);
-int client_obd_cleanup(struct obd_device *obddev);
-int client_connect_import(const struct lu_env *env,
- struct obd_export **exp, struct obd_device *obd,
- struct obd_uuid *cluuid, struct obd_connect_data *,
- void *localdata);
-int client_disconnect_export(struct obd_export *exp);
-int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid,
- int priority);
-int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid);
-int client_import_find_conn(struct obd_import *imp, lnet_nid_t peer,
- struct obd_uuid *uuid);
-int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid);
-void client_destroy_import(struct obd_import *imp);
-/** @} */
-
-/* ptlrpc/pinger.c */
-/**
- * Pinger API (client side only)
- * @{
- */
-enum timeout_event {
- TIMEOUT_GRANT = 1
-};
-
-struct timeout_item;
-typedef int (*timeout_cb_t)(struct timeout_item *, void *);
-int ptlrpc_pinger_add_import(struct obd_import *imp);
-int ptlrpc_pinger_del_import(struct obd_import *imp);
-int ptlrpc_add_timeout_client(int time, enum timeout_event event,
- timeout_cb_t cb, void *data,
- struct list_head *obd_list);
-int ptlrpc_del_timeout_client(struct list_head *obd_list,
- enum timeout_event event);
-struct ptlrpc_request *ptlrpc_prep_ping(struct obd_import *imp);
-int ptlrpc_obd_ping(struct obd_device *obd);
-void ptlrpc_pinger_ir_up(void);
-void ptlrpc_pinger_ir_down(void);
-/** @} */
-int ptlrpc_pinger_suppress_pings(void);
-
-/* ptlrpc/ptlrpcd.c */
-void ptlrpcd_stop(struct ptlrpcd_ctl *pc, int force);
-void ptlrpcd_free(struct ptlrpcd_ctl *pc);
-void ptlrpcd_wake(struct ptlrpc_request *req);
-void ptlrpcd_add_req(struct ptlrpc_request *req);
-int ptlrpcd_addref(void);
-void ptlrpcd_decref(void);
-
-/* ptlrpc/lproc_ptlrpc.c */
-/**
- * procfs output related functions
- * @{
- */
-const char *ll_opcode2str(__u32 opcode);
-void ptlrpc_lprocfs_register_obd(struct obd_device *obd);
-void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd);
-void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes);
-/** @} */
-
-/* ptlrpc/llog_client.c */
-extern struct llog_operations llog_client_ops;
-/** @} net */
-
-#endif
-/** @} PtlRPC */
diff --git a/drivers/staging/lustre/lustre/include/lustre_nrs.h b/drivers/staging/lustre/lustre/include/lustre_nrs.h
deleted file mode 100644
index ffa7317da35b..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_nrs.h
+++ /dev/null
@@ -1,718 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License version 2 for more details.
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2014, Intel Corporation.
- *
- * Copyright 2012 Xyratex Technology Limited
- */
-/*
- *
- * Network Request Scheduler (NRS)
- *
- */
-
-#ifndef _LUSTRE_NRS_H
-#define _LUSTRE_NRS_H
-
-/**
- * \defgroup nrs Network Request Scheduler
- * @{
- */
-struct ptlrpc_nrs_policy;
-struct ptlrpc_nrs_resource;
-struct ptlrpc_nrs_request;
-
-/**
- * NRS control operations.
- *
- * These are common for all policies.
- */
-enum ptlrpc_nrs_ctl {
- /**
- * Not a valid opcode.
- */
- PTLRPC_NRS_CTL_INVALID,
- /**
- * Activate the policy.
- */
- PTLRPC_NRS_CTL_START,
- /**
- * Reserved for multiple primary policies, which may be a possibility
- * in the future.
- */
- PTLRPC_NRS_CTL_STOP,
- /**
- * Policies can start using opcodes from this value and onwards for
- * their own purposes; the assigned value itself is arbitrary.
- */
- PTLRPC_NRS_CTL_1ST_POL_SPEC = 0x20,
-};
-
-/**
- * NRS policy operations.
- *
- * These determine the behaviour of a policy, and are called in response to
- * NRS core events.
- */
-struct ptlrpc_nrs_pol_ops {
- /**
- * Called during policy registration; this operation is optional.
- *
- * \param[in,out] policy The policy being initialized
- */
- int (*op_policy_init)(struct ptlrpc_nrs_policy *policy);
- /**
- * Called during policy unregistration; this operation is optional.
- *
- * \param[in,out] policy The policy being unregistered/finalized
- */
- void (*op_policy_fini)(struct ptlrpc_nrs_policy *policy);
- /**
- * Called when activating a policy via lprocfs; policies allocate and
- * initialize their resources here; this operation is optional.
- *
- * \param[in,out] policy The policy being started
- *
- * \see nrs_policy_start_locked()
- */
- int (*op_policy_start)(struct ptlrpc_nrs_policy *policy);
- /**
- * Called when deactivating a policy via lprocfs; policies deallocate
- * their resources here; this operation is optional
- *
- * \param[in,out] policy The policy being stopped
- *
- * \see nrs_policy_stop0()
- */
- void (*op_policy_stop)(struct ptlrpc_nrs_policy *policy);
- /**
- * Used for policy-specific operations; i.e. not generic ones like
- * \e PTLRPC_NRS_CTL_START and \e PTLRPC_NRS_CTL_GET_INFO; analogous
- * to an ioctl; this operation is optional.
- *
- * \param[in,out] policy The policy carrying out operation \a opc
- * \param[in] opc The command operation being carried out
- * \param[in,out] arg An generic buffer for communication between the
- * user and the control operation
- *
- * \retval -ve error
- * \retval 0 success
- *
- * \see ptlrpc_nrs_policy_control()
- */
- int (*op_policy_ctl)(struct ptlrpc_nrs_policy *policy,
- enum ptlrpc_nrs_ctl opc, void *arg);
-
- /**
- * Called when obtaining references to the resources of the resource
- * hierarchy for a request that has arrived for handling at the PTLRPC
- * service. Policies should return -ve for requests they do not wish
- * to handle. This operation is mandatory.
- *
- * \param[in,out] policy The policy we're getting resources for.
- * \param[in,out] nrq The request we are getting resources for.
- * \param[in] parent The parent resource of the resource being
- * requested; set to NULL if none.
- * \param[out] resp The resource is to be returned here; the
- * fallback policy in an NRS head should
- * \e always return a non-NULL pointer value.
- * \param[in] moving_req When set, signifies that this is an attempt
- * to obtain resources for a request being moved
- * to the high-priority NRS head by
- * ldlm_lock_reorder_req().
- * This implies two things:
- * 1. We are under obd_export::exp_rpc_lock and
- * so should not sleep.
- * 2. We should not perform non-idempotent or can
- * skip performing idempotent operations that
- * were carried out when resources were first
- * taken for the request when it was initialized
- * in ptlrpc_nrs_req_initialize().
- *
- * \retval 0, +ve The level of the returned resource in the resource
- * hierarchy; currently only 0 (for a non-leaf resource)
- * and 1 (for a leaf resource) are supported by the
- * framework.
- * \retval -ve error
- *
- * \see ptlrpc_nrs_req_initialize()
- * \see ptlrpc_nrs_hpreq_add_nolock()
- * \see ptlrpc_nrs_req_hp_move()
- */
- int (*op_res_get)(struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq,
- const struct ptlrpc_nrs_resource *parent,
- struct ptlrpc_nrs_resource **resp,
- bool moving_req);
- /**
- * Called when releasing references taken for resources in the resource
- * hierarchy for the request; this operation is optional.
- *
- * \param[in,out] policy The policy the resource belongs to
- * \param[in] res The resource to be freed
- *
- * \see ptlrpc_nrs_req_finalize()
- * \see ptlrpc_nrs_hpreq_add_nolock()
- * \see ptlrpc_nrs_req_hp_move()
- */
- void (*op_res_put)(struct ptlrpc_nrs_policy *policy,
- const struct ptlrpc_nrs_resource *res);
-
- /**
- * Obtains a request for handling from the policy, and optionally
- * removes the request from the policy; this operation is mandatory.
- *
- * \param[in,out] policy The policy to poll
- * \param[in] peek When set, signifies that we just want to
- * examine the request, and not handle it, so the
- * request is not removed from the policy.
- * \param[in] force When set, it will force a policy to return a
- * request if it has one queued.
- *
- * \retval NULL No request available for handling
- * \retval valid-pointer The request polled for handling
- *
- * \see ptlrpc_nrs_req_get_nolock()
- */
- struct ptlrpc_nrs_request *
- (*op_req_get)(struct ptlrpc_nrs_policy *policy, bool peek,
- bool force);
- /**
- * Called when attempting to add a request to a policy for later
- * handling; this operation is mandatory.
- *
- * \param[in,out] policy The policy on which to enqueue \a nrq
- * \param[in,out] nrq The request to enqueue
- *
- * \retval 0 success
- * \retval != 0 error
- *
- * \see ptlrpc_nrs_req_add_nolock()
- */
- int (*op_req_enqueue)(struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq);
- /**
- * Removes a request from the policy's set of pending requests. Normally
- * called after a request has been polled successfully from the policy
- * for handling; this operation is mandatory.
- *
- * \param[in,out] policy The policy the request \a nrq belongs to
- * \param[in,out] nrq The request to dequeue
- *
- * \see ptlrpc_nrs_req_del_nolock()
- */
- void (*op_req_dequeue)(struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq);
- /**
- * Called after the request being carried out. Could be used for
- * job/resource control; this operation is optional.
- *
- * \param[in,out] policy The policy which is stopping to handle request
- * \a nrq
- * \param[in,out] nrq The request
- *
- * \pre assert_spin_locked(&svcpt->scp_req_lock)
- *
- * \see ptlrpc_nrs_req_stop_nolock()
- */
- void (*op_req_stop)(struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq);
- /**
- * Registers the policy's lprocfs interface with a PTLRPC service.
- *
- * \param[in] svc The service
- *
- * \retval 0 success
- * \retval != 0 error
- */
- int (*op_lprocfs_init)(struct ptlrpc_service *svc);
- /**
- * Unegisters the policy's lprocfs interface with a PTLRPC service.
- *
- * In cases of failed policy registration in
- * \e ptlrpc_nrs_policy_register(), this function may be called for a
- * service which has not registered the policy successfully, so
- * implementations of this method should make sure their operations are
- * safe in such cases.
- *
- * \param[in] svc The service
- */
- void (*op_lprocfs_fini)(struct ptlrpc_service *svc);
-};
-
-/**
- * Policy flags
- */
-enum nrs_policy_flags {
- /**
- * Fallback policy, use this flag only on a single supported policy per
- * service. The flag cannot be used on policies that use
- * \e PTLRPC_NRS_FL_REG_EXTERN
- */
- PTLRPC_NRS_FL_FALLBACK = BIT(0),
- /**
- * Start policy immediately after registering.
- */
- PTLRPC_NRS_FL_REG_START = BIT(1),
- /**
- * This is a policy registering from a module different to the one NRS
- * core ships in (currently ptlrpc).
- */
- PTLRPC_NRS_FL_REG_EXTERN = BIT(2),
-};
-
-/**
- * NRS queue type.
- *
- * Denotes whether an NRS instance is for handling normal or high-priority
- * RPCs, or whether an operation pertains to one or both of the NRS instances
- * in a service.
- */
-enum ptlrpc_nrs_queue_type {
- PTLRPC_NRS_QUEUE_REG = BIT(0),
- PTLRPC_NRS_QUEUE_HP = BIT(1),
- PTLRPC_NRS_QUEUE_BOTH = (PTLRPC_NRS_QUEUE_REG | PTLRPC_NRS_QUEUE_HP)
-};
-
-/**
- * NRS head
- *
- * A PTLRPC service has at least one NRS head instance for handling normal
- * priority RPCs, and may optionally have a second NRS head instance for
- * handling high-priority RPCs. Each NRS head maintains a list of available
- * policies, of which one and only one policy is acting as the fallback policy,
- * and optionally a different policy may be acting as the primary policy. For
- * all RPCs handled by this NRS head instance, NRS core will first attempt to
- * enqueue the RPC using the primary policy (if any). The fallback policy is
- * used in the following cases:
- * - when there was no primary policy in the
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state at the time the request
- * was initialized.
- * - when the primary policy that was at the
- * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the
- * RPC was initialized, denoted it did not wish, or for some other reason was
- * not able to handle the request, by returning a non-valid NRS resource
- * reference.
- * - when the primary policy that was at the
- * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the
- * RPC was initialized, fails later during the request enqueueing stage.
- *
- * \see nrs_resource_get_safe()
- * \see nrs_request_enqueue()
- */
-struct ptlrpc_nrs {
- spinlock_t nrs_lock;
- /** XXX Possibly replace svcpt->scp_req_lock with another lock here. */
- /**
- * List of registered policies
- */
- struct list_head nrs_policy_list;
- /**
- * List of policies with queued requests. Policies that have any
- * outstanding requests are queued here, and this list is queried
- * in a round-robin manner from NRS core when obtaining a request
- * for handling. This ensures that requests from policies that at some
- * point transition away from the
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state are drained.
- */
- struct list_head nrs_policy_queued;
- /**
- * Service partition for this NRS head
- */
- struct ptlrpc_service_part *nrs_svcpt;
- /**
- * Primary policy, which is the preferred policy for handling RPCs
- */
- struct ptlrpc_nrs_policy *nrs_policy_primary;
- /**
- * Fallback policy, which is the backup policy for handling RPCs
- */
- struct ptlrpc_nrs_policy *nrs_policy_fallback;
- /**
- * This NRS head handles either HP or regular requests
- */
- enum ptlrpc_nrs_queue_type nrs_queue_type;
- /**
- * # queued requests from all policies in this NRS head
- */
- unsigned long nrs_req_queued;
- /**
- * # scheduled requests from all policies in this NRS head
- */
- unsigned long nrs_req_started;
- /**
- * # policies on this NRS
- */
- unsigned int nrs_num_pols;
- /**
- * This NRS head is in progress of starting a policy
- */
- unsigned int nrs_policy_starting:1;
- /**
- * In progress of shutting down the whole NRS head; used during
- * unregistration
- */
- unsigned int nrs_stopping:1;
- /**
- * NRS policy is throttling request
- */
- unsigned int nrs_throttling:1;
-};
-
-#define NRS_POL_NAME_MAX 16
-#define NRS_POL_ARG_MAX 16
-
-struct ptlrpc_nrs_pol_desc;
-
-/**
- * Service compatibility predicate; this determines whether a policy is adequate
- * for handling RPCs of a particular PTLRPC service.
- *
- * XXX:This should give the same result during policy registration and
- * unregistration, and for all partitions of a service; so the result should not
- * depend on temporal service or other properties, that may influence the
- * result.
- */
-typedef bool (*nrs_pol_desc_compat_t)(const struct ptlrpc_service *svc,
- const struct ptlrpc_nrs_pol_desc *desc);
-
-struct ptlrpc_nrs_pol_conf {
- /**
- * Human-readable policy name
- */
- char nc_name[NRS_POL_NAME_MAX];
- /**
- * NRS operations for this policy
- */
- const struct ptlrpc_nrs_pol_ops *nc_ops;
- /**
- * Service compatibility predicate
- */
- nrs_pol_desc_compat_t nc_compat;
- /**
- * Set for policies that support a single ptlrpc service, i.e. ones that
- * have \a pd_compat set to nrs_policy_compat_one(). The variable value
- * depicts the name of the single service that such policies are
- * compatible with.
- */
- const char *nc_compat_svc_name;
- /**
- * Owner module for this policy descriptor; policies registering from a
- * different module to the one the NRS framework is held within
- * (currently ptlrpc), should set this field to THIS_MODULE.
- */
- struct module *nc_owner;
- /**
- * Policy registration flags; a bitmask of \e nrs_policy_flags
- */
- unsigned int nc_flags;
-};
-
-/**
- * NRS policy registering descriptor
- *
- * Is used to hold a description of a policy that can be passed to NRS core in
- * order to register the policy with NRS heads in different PTLRPC services.
- */
-struct ptlrpc_nrs_pol_desc {
- /**
- * Human-readable policy name
- */
- char pd_name[NRS_POL_NAME_MAX];
- /**
- * Link into nrs_core::nrs_policies
- */
- struct list_head pd_list;
- /**
- * NRS operations for this policy
- */
- const struct ptlrpc_nrs_pol_ops *pd_ops;
- /**
- * Service compatibility predicate
- */
- nrs_pol_desc_compat_t pd_compat;
- /**
- * Set for policies that are compatible with only one PTLRPC service.
- *
- * \see ptlrpc_nrs_pol_conf::nc_compat_svc_name
- */
- const char *pd_compat_svc_name;
- /**
- * Owner module for this policy descriptor.
- *
- * We need to hold a reference to the module whenever we might make use
- * of any of the module's contents, i.e.
- * - If one or more instances of the policy are at a state where they
- * might be handling a request, i.e.
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED or
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING as we will have to
- * call into the policy's ptlrpc_nrs_pol_ops() handlers. A reference
- * is taken on the module when
- * \e ptlrpc_nrs_pol_desc::pd_refs becomes 1, and released when it
- * becomes 0, so that we hold only one reference to the module maximum
- * at any time.
- *
- * We do not need to hold a reference to the module, even though we
- * might use code and data from the module, in the following cases:
- * - During external policy registration, because this should happen in
- * the module's init() function, in which case the module is safe from
- * removal because a reference is being held on the module by the
- * kernel, and iirc kmod (and I guess module-init-tools also) will
- * serialize any racing processes properly anyway.
- * - During external policy unregistration, because this should happen
- * in a module's exit() function, and any attempts to start a policy
- * instance would need to take a reference on the module, and this is
- * not possible once we have reached the point where the exit()
- * handler is called.
- * - During service registration and unregistration, as service setup
- * and cleanup, and policy registration, unregistration and policy
- * instance starting, are serialized by \e nrs_core::nrs_mutex, so
- * as long as users adhere to the convention of registering policies
- * in init() and unregistering them in module exit() functions, there
- * should not be a race between these operations.
- * - During any policy-specific lprocfs operations, because a reference
- * is held by the kernel on a proc entry that has been entered by a
- * syscall, so as long as proc entries are removed during
- * unregistration time, then unregistration and lprocfs operations
- * will be properly serialized.
- */
- struct module *pd_owner;
- /**
- * Bitmask of \e nrs_policy_flags
- */
- unsigned int pd_flags;
- /**
- * # of references on this descriptor
- */
- atomic_t pd_refs;
-};
-
-/**
- * NRS policy state
- *
- * Policies transition from one state to the other during their lifetime
- */
-enum ptlrpc_nrs_pol_state {
- /**
- * Not a valid policy state.
- */
- NRS_POL_STATE_INVALID,
- /**
- * Policies are at this state either at the start of their life, or
- * transition here when the user selects a different policy to act
- * as the primary one.
- */
- NRS_POL_STATE_STOPPED,
- /**
- * Policy is progress of stopping
- */
- NRS_POL_STATE_STOPPING,
- /**
- * Policy is in progress of starting
- */
- NRS_POL_STATE_STARTING,
- /**
- * A policy is in this state in two cases:
- * - it is the fallback policy, which is always in this state.
- * - it has been activated by the user; i.e. it is the primary policy,
- */
- NRS_POL_STATE_STARTED,
-};
-
-/**
- * NRS policy information
- *
- * Used for obtaining information for the status of a policy via lprocfs
- */
-struct ptlrpc_nrs_pol_info {
- /**
- * Policy name
- */
- char pi_name[NRS_POL_NAME_MAX];
- /**
- * Policy argument
- */
- char pi_arg[NRS_POL_ARG_MAX];
- /**
- * Current policy state
- */
- enum ptlrpc_nrs_pol_state pi_state;
- /**
- * # RPCs enqueued for later dispatching by the policy
- */
- long pi_req_queued;
- /**
- * # RPCs started for dispatch by the policy
- */
- long pi_req_started;
- /**
- * Is this a fallback policy?
- */
- unsigned pi_fallback:1;
-};
-
-/**
- * NRS policy
- *
- * There is one instance of this for each policy in each NRS head of each
- * PTLRPC service partition.
- */
-struct ptlrpc_nrs_policy {
- /**
- * Linkage into the NRS head's list of policies,
- * ptlrpc_nrs:nrs_policy_list
- */
- struct list_head pol_list;
- /**
- * Linkage into the NRS head's list of policies with enqueued
- * requests ptlrpc_nrs:nrs_policy_queued
- */
- struct list_head pol_list_queued;
- /**
- * Current state of this policy
- */
- enum ptlrpc_nrs_pol_state pol_state;
- /**
- * Bitmask of nrs_policy_flags
- */
- unsigned int pol_flags;
- /**
- * # RPCs enqueued for later dispatching by the policy
- */
- long pol_req_queued;
- /**
- * # RPCs started for dispatch by the policy
- */
- long pol_req_started;
- /**
- * Usage Reference count taken on the policy instance
- */
- long pol_ref;
- /**
- * Human-readable policy argument
- */
- char pol_arg[NRS_POL_ARG_MAX];
- /**
- * The NRS head this policy has been created at
- */
- struct ptlrpc_nrs *pol_nrs;
- /**
- * Private policy data; varies by policy type
- */
- void *pol_private;
- /**
- * Policy descriptor for this policy instance.
- */
- struct ptlrpc_nrs_pol_desc *pol_desc;
-};
-
-/**
- * NRS resource
- *
- * Resources are embedded into two types of NRS entities:
- * - Inside NRS policies, in the policy's private data in
- * ptlrpc_nrs_policy::pol_private
- * - In objects that act as prime-level scheduling entities in different NRS
- * policies; e.g. on a policy that performs round robin or similar order
- * scheduling across client NIDs, there would be one NRS resource per unique
- * client NID. On a policy which performs round robin scheduling across
- * backend filesystem objects, there would be one resource associated with
- * each of the backend filesystem objects partaking in the scheduling
- * performed by the policy.
- *
- * NRS resources share a parent-child relationship, in which resources embedded
- * in policy instances are the parent entities, with all scheduling entities
- * a policy schedules across being the children, thus forming a simple resource
- * hierarchy. This hierarchy may be extended with one or more levels in the
- * future if the ability to have more than one primary policy is added.
- *
- * Upon request initialization, references to the then active NRS policies are
- * taken and used to later handle the dispatching of the request with one of
- * these policies.
- *
- * \see nrs_resource_get_safe()
- * \see ptlrpc_nrs_req_add()
- */
-struct ptlrpc_nrs_resource {
- /**
- * This NRS resource's parent; is NULL for resources embedded in NRS
- * policy instances; i.e. those are top-level ones.
- */
- struct ptlrpc_nrs_resource *res_parent;
- /**
- * The policy associated with this resource.
- */
- struct ptlrpc_nrs_policy *res_policy;
-};
-
-enum {
- NRS_RES_FALLBACK,
- NRS_RES_PRIMARY,
- NRS_RES_MAX
-};
-
-#include <lustre_nrs_fifo.h>
-
-/**
- * NRS request
- *
- * Instances of this object exist embedded within ptlrpc_request; the main
- * purpose of this object is to hold references to the request's resources
- * for the lifetime of the request, and to hold properties that policies use
- * use for determining the request's scheduling priority.
- **/
-struct ptlrpc_nrs_request {
- /**
- * The request's resource hierarchy.
- */
- struct ptlrpc_nrs_resource *nr_res_ptrs[NRS_RES_MAX];
- /**
- * Index into ptlrpc_nrs_request::nr_res_ptrs of the resource of the
- * policy that was used to enqueue the request.
- *
- * \see nrs_request_enqueue()
- */
- unsigned int nr_res_idx;
- unsigned int nr_initialized:1;
- unsigned int nr_enqueued:1;
- unsigned int nr_started:1;
- unsigned int nr_finalized:1;
-
- /**
- * Policy-specific fields, used for determining a request's scheduling
- * priority, and other supporting functionality.
- */
- union {
- /**
- * Fields for the FIFO policy
- */
- struct nrs_fifo_req fifo;
- } nr_u;
- /**
- * Externally-registering policies may want to use this to allocate
- * their own request properties.
- */
- void *ext;
-};
-
-/** @} nrs */
-#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h b/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h
deleted file mode 100644
index b70d97d4acbb..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h
+++ /dev/null
@@ -1,71 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License version 2 for more details.
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2014, Intel Corporation.
- *
- * Copyright 2012 Xyratex Technology Limited
- */
-/*
- *
- * Network Request Scheduler (NRS) First-in First-out (FIFO) policy
- *
- */
-
-#ifndef _LUSTRE_NRS_FIFO_H
-#define _LUSTRE_NRS_FIFO_H
-
-/* \name fifo
- *
- * FIFO policy
- *
- * This policy is a logical wrapper around previous, non-NRS functionality.
- * It dispatches RPCs in the same order as they arrive from the network. This
- * policy is currently used as the fallback policy, and the only enabled policy
- * on all NRS heads of all PTLRPC service partitions.
- * @{
- */
-
-/**
- * Private data structure for the FIFO policy
- */
-struct nrs_fifo_head {
- /**
- * Resource object for policy instance.
- */
- struct ptlrpc_nrs_resource fh_res;
- /**
- * List of queued requests.
- */
- struct list_head fh_list;
- /**
- * For debugging purposes.
- */
- __u64 fh_sequence;
-};
-
-struct nrs_fifo_req {
- struct list_head fr_list;
- __u64 fr_sequence;
-};
-
-/** @} fifo */
-#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_obdo.h b/drivers/staging/lustre/lustre/include/lustre_obdo.h
deleted file mode 100644
index d67dcbb84f18..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_obdo.h
+++ /dev/null
@@ -1,55 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2014, Intel Corporation.
- *
- * Copyright 2015 Cray Inc, all rights reserved.
- * Author: Ben Evans.
- *
- * Define obdo associated functions
- * obdo: OBject Device o...
- */
-
-#ifndef _LUSTRE_OBDO_H_
-#define _LUSTRE_OBDO_H_
-
-#include <uapi/linux/lustre/lustre_idl.h>
-
-/**
- * Create an obdo to send over the wire
- */
-void lustre_set_wire_obdo(const struct obd_connect_data *ocd,
- struct obdo *wobdo,
- const struct obdo *lobdo);
-
-/**
- * Create a local obdo from a wire based odbo
- */
-void lustre_get_wire_obdo(const struct obd_connect_data *ocd,
- struct obdo *lobdo,
- const struct obdo *wobdo);
-
-#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_patchless_compat.h b/drivers/staging/lustre/lustre/include/lustre_patchless_compat.h
deleted file mode 100644
index 298476ea7557..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_patchless_compat.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef LUSTRE_PATCHLESS_COMPAT_H
-#define LUSTRE_PATCHLESS_COMPAT_H
-
-#include <linux/fs.h>
-
-#include <linux/list.h>
-#include <linux/mm.h>
-#include <linux/hash.h>
-#include <linux/pagemap.h>
-
-#define ll_delete_from_page_cache(page) delete_from_page_cache(page)
-
-static inline void
-truncate_complete_page(struct address_space *mapping, struct page *page)
-{
- if (page->mapping != mapping)
- return;
-
- if (PagePrivate(page))
- page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
-
- cancel_dirty_page(page);
- ClearPageMappedToDisk(page);
- ll_delete_from_page_cache(page);
-}
-
-#ifndef ATTR_CTIME_SET
-/*
- * set ATTR_CTIME_SET to a high value to avoid any risk of collision with other
- * ATTR_* attributes (see bug 13828)
- */
-#define ATTR_CTIME_SET (1 << 28)
-#endif
-
-#endif /* LUSTRE_PATCHLESS_COMPAT_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_req_layout.h b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
deleted file mode 100644
index 213d0a01adcf..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_req_layout.h
+++ /dev/null
@@ -1,307 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/include/lustre_req_layout.h
- *
- * Lustre Metadata Target (mdt) request handler
- *
- * Author: Nikita Danilov <nikita@clusterfs.com>
- */
-
-#ifndef _LUSTRE_REQ_LAYOUT_H__
-#define _LUSTRE_REQ_LAYOUT_H__
-
-#include <linux/types.h>
-
-/** \defgroup req_layout req_layout
- *
- * @{
- */
-
-struct req_msg_field;
-struct req_format;
-struct req_capsule;
-
-struct ptlrpc_request;
-
-enum req_location {
- RCL_CLIENT,
- RCL_SERVER,
- RCL_NR
-};
-
-/* Maximal number of fields (buffers) in a request message. */
-#define REQ_MAX_FIELD_NR 9
-
-struct req_capsule {
- struct ptlrpc_request *rc_req;
- const struct req_format *rc_fmt;
- enum req_location rc_loc;
- __u32 rc_area[RCL_NR][REQ_MAX_FIELD_NR];
-};
-
-void req_capsule_init(struct req_capsule *pill, struct ptlrpc_request *req,
- enum req_location location);
-void req_capsule_fini(struct req_capsule *pill);
-
-void req_capsule_set(struct req_capsule *pill, const struct req_format *fmt);
-size_t req_capsule_filled_sizes(struct req_capsule *pill,
- enum req_location loc);
-int req_capsule_server_pack(struct req_capsule *pill);
-
-void *req_capsule_client_get(struct req_capsule *pill,
- const struct req_msg_field *field);
-void *req_capsule_client_swab_get(struct req_capsule *pill,
- const struct req_msg_field *field,
- void *swabber);
-void *req_capsule_client_sized_get(struct req_capsule *pill,
- const struct req_msg_field *field,
- u32 len);
-void *req_capsule_server_get(struct req_capsule *pill,
- const struct req_msg_field *field);
-void *req_capsule_server_sized_get(struct req_capsule *pill,
- const struct req_msg_field *field,
- u32 len);
-void *req_capsule_server_swab_get(struct req_capsule *pill,
- const struct req_msg_field *field,
- void *swabber);
-void *req_capsule_server_sized_swab_get(struct req_capsule *pill,
- const struct req_msg_field *field,
- u32 len, void *swabber);
-
-void req_capsule_set_size(struct req_capsule *pill,
- const struct req_msg_field *field,
- enum req_location loc, u32 size);
-u32 req_capsule_get_size(const struct req_capsule *pill,
- const struct req_msg_field *field,
- enum req_location loc);
-u32 req_capsule_msg_size(struct req_capsule *pill, enum req_location loc);
-u32 req_capsule_fmt_size(__u32 magic, const struct req_format *fmt,
- enum req_location loc);
-void req_capsule_extend(struct req_capsule *pill, const struct req_format *fmt);
-
-int req_capsule_has_field(const struct req_capsule *pill,
- const struct req_msg_field *field,
- enum req_location loc);
-void req_capsule_shrink(struct req_capsule *pill,
- const struct req_msg_field *field,
- u32 newlen, enum req_location loc);
-int req_layout_init(void);
-void req_layout_fini(void);
-
-extern struct req_format RQF_OBD_PING;
-extern struct req_format RQF_OBD_SET_INFO;
-extern struct req_format RQF_SEC_CTX;
-/* MGS req_format */
-extern struct req_format RQF_MGS_TARGET_REG;
-extern struct req_format RQF_MGS_SET_INFO;
-extern struct req_format RQF_MGS_CONFIG_READ;
-/* fid/fld req_format */
-extern struct req_format RQF_SEQ_QUERY;
-extern struct req_format RQF_FLD_QUERY;
-extern struct req_format RQF_FLD_READ;
-/* MDS req_format */
-extern struct req_format RQF_MDS_CONNECT;
-extern struct req_format RQF_MDS_DISCONNECT;
-extern struct req_format RQF_MDS_STATFS;
-extern struct req_format RQF_MDS_GETSTATUS;
-extern struct req_format RQF_MDS_SYNC;
-extern struct req_format RQF_MDS_GETXATTR;
-extern struct req_format RQF_MDS_GETATTR;
-
-/*
- * This is format of direct (non-intent) MDS_GETATTR_NAME request.
- */
-extern struct req_format RQF_MDS_GETATTR_NAME;
-extern struct req_format RQF_MDS_CLOSE;
-extern struct req_format RQF_MDS_INTENT_CLOSE;
-extern struct req_format RQF_MDS_CONNECT;
-extern struct req_format RQF_MDS_DISCONNECT;
-extern struct req_format RQF_MDS_GET_INFO;
-extern struct req_format RQF_MDS_READPAGE;
-extern struct req_format RQF_MDS_WRITEPAGE;
-extern struct req_format RQF_MDS_REINT;
-extern struct req_format RQF_MDS_REINT_CREATE;
-extern struct req_format RQF_MDS_REINT_CREATE_ACL;
-extern struct req_format RQF_MDS_REINT_CREATE_SLAVE;
-extern struct req_format RQF_MDS_REINT_CREATE_SYM;
-extern struct req_format RQF_MDS_REINT_OPEN;
-extern struct req_format RQF_MDS_REINT_UNLINK;
-extern struct req_format RQF_MDS_REINT_LINK;
-extern struct req_format RQF_MDS_REINT_RENAME;
-extern struct req_format RQF_MDS_REINT_SETATTR;
-extern struct req_format RQF_MDS_REINT_SETXATTR;
-extern struct req_format RQF_MDS_QUOTACTL;
-extern struct req_format RQF_MDS_SWAP_LAYOUTS;
-extern struct req_format RQF_MDS_REINT_MIGRATE;
-/* MDS hsm formats */
-extern struct req_format RQF_MDS_HSM_STATE_GET;
-extern struct req_format RQF_MDS_HSM_STATE_SET;
-extern struct req_format RQF_MDS_HSM_ACTION;
-extern struct req_format RQF_MDS_HSM_PROGRESS;
-extern struct req_format RQF_MDS_HSM_CT_REGISTER;
-extern struct req_format RQF_MDS_HSM_CT_UNREGISTER;
-extern struct req_format RQF_MDS_HSM_REQUEST;
-/* OST req_format */
-extern struct req_format RQF_OST_CONNECT;
-extern struct req_format RQF_OST_DISCONNECT;
-extern struct req_format RQF_OST_QUOTACTL;
-extern struct req_format RQF_OST_GETATTR;
-extern struct req_format RQF_OST_SETATTR;
-extern struct req_format RQF_OST_CREATE;
-extern struct req_format RQF_OST_PUNCH;
-extern struct req_format RQF_OST_SYNC;
-extern struct req_format RQF_OST_DESTROY;
-extern struct req_format RQF_OST_BRW_READ;
-extern struct req_format RQF_OST_BRW_WRITE;
-extern struct req_format RQF_OST_STATFS;
-extern struct req_format RQF_OST_SET_GRANT_INFO;
-extern struct req_format RQF_OST_GET_INFO;
-extern struct req_format RQF_OST_GET_INFO_LAST_ID;
-extern struct req_format RQF_OST_GET_INFO_LAST_FID;
-extern struct req_format RQF_OST_SET_INFO_LAST_FID;
-extern struct req_format RQF_OST_GET_INFO_FIEMAP;
-
-/* LDLM req_format */
-extern struct req_format RQF_LDLM_ENQUEUE;
-extern struct req_format RQF_LDLM_ENQUEUE_LVB;
-extern struct req_format RQF_LDLM_CONVERT;
-extern struct req_format RQF_LDLM_INTENT;
-extern struct req_format RQF_LDLM_INTENT_BASIC;
-extern struct req_format RQF_LDLM_INTENT_LAYOUT;
-extern struct req_format RQF_LDLM_INTENT_GETATTR;
-extern struct req_format RQF_LDLM_INTENT_OPEN;
-extern struct req_format RQF_LDLM_INTENT_CREATE;
-extern struct req_format RQF_LDLM_INTENT_UNLINK;
-extern struct req_format RQF_LDLM_INTENT_GETXATTR;
-extern struct req_format RQF_LDLM_CANCEL;
-extern struct req_format RQF_LDLM_CALLBACK;
-extern struct req_format RQF_LDLM_CP_CALLBACK;
-extern struct req_format RQF_LDLM_BL_CALLBACK;
-extern struct req_format RQF_LDLM_GL_CALLBACK;
-extern struct req_format RQF_LDLM_GL_DESC_CALLBACK;
-/* LOG req_format */
-extern struct req_format RQF_LOG_CANCEL;
-extern struct req_format RQF_LLOG_ORIGIN_HANDLE_CREATE;
-extern struct req_format RQF_LLOG_ORIGIN_HANDLE_DESTROY;
-extern struct req_format RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK;
-extern struct req_format RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK;
-extern struct req_format RQF_LLOG_ORIGIN_HANDLE_READ_HEADER;
-extern struct req_format RQF_LLOG_ORIGIN_CONNECT;
-
-extern struct req_format RQF_CONNECT;
-
-extern struct req_msg_field RMF_GENERIC_DATA;
-extern struct req_msg_field RMF_PTLRPC_BODY;
-extern struct req_msg_field RMF_MDT_BODY;
-extern struct req_msg_field RMF_MDT_EPOCH;
-extern struct req_msg_field RMF_OBD_STATFS;
-extern struct req_msg_field RMF_NAME;
-extern struct req_msg_field RMF_SYMTGT;
-extern struct req_msg_field RMF_TGTUUID;
-extern struct req_msg_field RMF_CLUUID;
-extern struct req_msg_field RMF_SETINFO_VAL;
-extern struct req_msg_field RMF_SETINFO_KEY;
-extern struct req_msg_field RMF_GETINFO_VAL;
-extern struct req_msg_field RMF_GETINFO_VALLEN;
-extern struct req_msg_field RMF_GETINFO_KEY;
-extern struct req_msg_field RMF_CLOSE_DATA;
-
-/*
- * connection handle received in MDS_CONNECT request.
- */
-extern struct req_msg_field RMF_CONN;
-extern struct req_msg_field RMF_CONNECT_DATA;
-extern struct req_msg_field RMF_DLM_REQ;
-extern struct req_msg_field RMF_DLM_REP;
-extern struct req_msg_field RMF_DLM_LVB;
-extern struct req_msg_field RMF_DLM_GL_DESC;
-extern struct req_msg_field RMF_LDLM_INTENT;
-extern struct req_msg_field RMF_LAYOUT_INTENT;
-extern struct req_msg_field RMF_MDT_MD;
-extern struct req_msg_field RMF_REC_REINT;
-extern struct req_msg_field RMF_EADATA;
-extern struct req_msg_field RMF_EAVALS;
-extern struct req_msg_field RMF_EAVALS_LENS;
-extern struct req_msg_field RMF_ACL;
-extern struct req_msg_field RMF_LOGCOOKIES;
-extern struct req_msg_field RMF_CAPA1;
-extern struct req_msg_field RMF_CAPA2;
-extern struct req_msg_field RMF_OBD_QUOTACHECK;
-extern struct req_msg_field RMF_OBD_QUOTACTL;
-extern struct req_msg_field RMF_STRING;
-extern struct req_msg_field RMF_SWAP_LAYOUTS;
-extern struct req_msg_field RMF_MDS_HSM_PROGRESS;
-extern struct req_msg_field RMF_MDS_HSM_REQUEST;
-extern struct req_msg_field RMF_MDS_HSM_USER_ITEM;
-extern struct req_msg_field RMF_MDS_HSM_ARCHIVE;
-extern struct req_msg_field RMF_HSM_USER_STATE;
-extern struct req_msg_field RMF_HSM_STATE_SET;
-extern struct req_msg_field RMF_MDS_HSM_CURRENT_ACTION;
-extern struct req_msg_field RMF_MDS_HSM_REQUEST;
-
-/* seq-mgr fields */
-extern struct req_msg_field RMF_SEQ_OPC;
-extern struct req_msg_field RMF_SEQ_RANGE;
-extern struct req_msg_field RMF_FID_SPACE;
-
-/* FLD fields */
-extern struct req_msg_field RMF_FLD_OPC;
-extern struct req_msg_field RMF_FLD_MDFLD;
-
-extern struct req_msg_field RMF_LLOGD_BODY;
-extern struct req_msg_field RMF_LLOG_LOG_HDR;
-extern struct req_msg_field RMF_LLOGD_CONN_BODY;
-
-extern struct req_msg_field RMF_MGS_TARGET_INFO;
-extern struct req_msg_field RMF_MGS_SEND_PARAM;
-
-extern struct req_msg_field RMF_OST_BODY;
-extern struct req_msg_field RMF_OBD_IOOBJ;
-extern struct req_msg_field RMF_OBD_ID;
-extern struct req_msg_field RMF_FID;
-extern struct req_msg_field RMF_NIOBUF_REMOTE;
-extern struct req_msg_field RMF_RCS;
-extern struct req_msg_field RMF_FIEMAP_KEY;
-extern struct req_msg_field RMF_FIEMAP_VAL;
-extern struct req_msg_field RMF_OST_ID;
-
-/* MGS config read message format */
-extern struct req_msg_field RMF_MGS_CONFIG_BODY;
-extern struct req_msg_field RMF_MGS_CONFIG_RES;
-
-/* generic uint32 */
-extern struct req_msg_field RMF_U32;
-
-/** @} req_layout */
-
-#endif /* _LUSTRE_REQ_LAYOUT_H__ */
diff --git a/drivers/staging/lustre/lustre/include/lustre_sec.h b/drivers/staging/lustre/lustre/include/lustre_sec.h
deleted file mode 100644
index d35bcbc98831..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_sec.h
+++ /dev/null
@@ -1,1072 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef _LUSTRE_SEC_H_
-#define _LUSTRE_SEC_H_
-
-#include <linux/module.h>
-
-/** \defgroup sptlrpc sptlrpc
- *
- * @{
- */
-
-/*
- * to avoid include
- */
-struct obd_import;
-struct obd_export;
-struct ptlrpc_request;
-struct ptlrpc_reply_state;
-struct ptlrpc_bulk_desc;
-struct brw_page;
-/* Linux specific */
-struct key;
-struct seq_file;
-struct lustre_cfg;
-
-/*
- * forward declaration
- */
-struct ptlrpc_sec_policy;
-struct ptlrpc_sec_cops;
-struct ptlrpc_sec_sops;
-struct ptlrpc_sec;
-struct ptlrpc_svc_ctx;
-struct ptlrpc_cli_ctx;
-struct ptlrpc_ctx_ops;
-
-/**
- * \addtogroup flavor flavor
- *
- * RPC flavor is represented by a 32 bits integer. Currently the high 12 bits
- * are unused, must be set to 0 for future expansion.
- * <pre>
- * ------------------------------------------------------------------------
- * | 4b (bulk svc) | 4b (bulk type) | 4b (svc) | 4b (mech) | 4b (policy) |
- * ------------------------------------------------------------------------
- * </pre>
- *
- * @{
- */
-
-/*
- * flavor constants
- */
-enum sptlrpc_policy {
- SPTLRPC_POLICY_NULL = 0,
- SPTLRPC_POLICY_PLAIN = 1,
- SPTLRPC_POLICY_GSS = 2,
- SPTLRPC_POLICY_MAX,
-};
-
-enum sptlrpc_mech_null {
- SPTLRPC_MECH_NULL = 0,
- SPTLRPC_MECH_NULL_MAX,
-};
-
-enum sptlrpc_mech_plain {
- SPTLRPC_MECH_PLAIN = 0,
- SPTLRPC_MECH_PLAIN_MAX,
-};
-
-enum sptlrpc_mech_gss {
- SPTLRPC_MECH_GSS_NULL = 0,
- SPTLRPC_MECH_GSS_KRB5 = 1,
- SPTLRPC_MECH_GSS_MAX,
-};
-
-enum sptlrpc_service_type {
- SPTLRPC_SVC_NULL = 0, /**< no security */
- SPTLRPC_SVC_AUTH = 1, /**< authentication only */
- SPTLRPC_SVC_INTG = 2, /**< integrity */
- SPTLRPC_SVC_PRIV = 3, /**< privacy */
- SPTLRPC_SVC_MAX,
-};
-
-enum sptlrpc_bulk_type {
- SPTLRPC_BULK_DEFAULT = 0, /**< follow rpc flavor */
- SPTLRPC_BULK_HASH = 1, /**< hash integrity */
- SPTLRPC_BULK_MAX,
-};
-
-enum sptlrpc_bulk_service {
- SPTLRPC_BULK_SVC_NULL = 0, /**< no security */
- SPTLRPC_BULK_SVC_AUTH = 1, /**< authentication only */
- SPTLRPC_BULK_SVC_INTG = 2, /**< integrity */
- SPTLRPC_BULK_SVC_PRIV = 3, /**< privacy */
- SPTLRPC_BULK_SVC_MAX,
-};
-
-/*
- * compose/extract macros
- */
-#define FLVR_POLICY_OFFSET (0)
-#define FLVR_MECH_OFFSET (4)
-#define FLVR_SVC_OFFSET (8)
-#define FLVR_BULK_TYPE_OFFSET (12)
-#define FLVR_BULK_SVC_OFFSET (16)
-
-#define MAKE_FLVR(policy, mech, svc, btype, bsvc) \
- (((__u32)(policy) << FLVR_POLICY_OFFSET) | \
- ((__u32)(mech) << FLVR_MECH_OFFSET) | \
- ((__u32)(svc) << FLVR_SVC_OFFSET) | \
- ((__u32)(btype) << FLVR_BULK_TYPE_OFFSET) | \
- ((__u32)(bsvc) << FLVR_BULK_SVC_OFFSET))
-
-/*
- * extraction
- */
-#define SPTLRPC_FLVR_POLICY(flavor) \
- ((((__u32)(flavor)) >> FLVR_POLICY_OFFSET) & 0xF)
-#define SPTLRPC_FLVR_MECH(flavor) \
- ((((__u32)(flavor)) >> FLVR_MECH_OFFSET) & 0xF)
-#define SPTLRPC_FLVR_SVC(flavor) \
- ((((__u32)(flavor)) >> FLVR_SVC_OFFSET) & 0xF)
-#define SPTLRPC_FLVR_BULK_TYPE(flavor) \
- ((((__u32)(flavor)) >> FLVR_BULK_TYPE_OFFSET) & 0xF)
-#define SPTLRPC_FLVR_BULK_SVC(flavor) \
- ((((__u32)(flavor)) >> FLVR_BULK_SVC_OFFSET) & 0xF)
-
-#define SPTLRPC_FLVR_BASE(flavor) \
- ((((__u32)(flavor)) >> FLVR_POLICY_OFFSET) & 0xFFF)
-#define SPTLRPC_FLVR_BASE_SUB(flavor) \
- ((((__u32)(flavor)) >> FLVR_MECH_OFFSET) & 0xFF)
-
-/*
- * gss subflavors
- */
-#define MAKE_BASE_SUBFLVR(mech, svc) \
- ((__u32)(mech) | \
- ((__u32)(svc) << (FLVR_SVC_OFFSET - FLVR_MECH_OFFSET)))
-
-#define SPTLRPC_SUBFLVR_KRB5N \
- MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_NULL)
-#define SPTLRPC_SUBFLVR_KRB5A \
- MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_AUTH)
-#define SPTLRPC_SUBFLVR_KRB5I \
- MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_INTG)
-#define SPTLRPC_SUBFLVR_KRB5P \
- MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_PRIV)
-
-/*
- * "end user" flavors
- */
-#define SPTLRPC_FLVR_NULL \
- MAKE_FLVR(SPTLRPC_POLICY_NULL, \
- SPTLRPC_MECH_NULL, \
- SPTLRPC_SVC_NULL, \
- SPTLRPC_BULK_DEFAULT, \
- SPTLRPC_BULK_SVC_NULL)
-#define SPTLRPC_FLVR_PLAIN \
- MAKE_FLVR(SPTLRPC_POLICY_PLAIN, \
- SPTLRPC_MECH_PLAIN, \
- SPTLRPC_SVC_NULL, \
- SPTLRPC_BULK_HASH, \
- SPTLRPC_BULK_SVC_INTG)
-#define SPTLRPC_FLVR_KRB5N \
- MAKE_FLVR(SPTLRPC_POLICY_GSS, \
- SPTLRPC_MECH_GSS_KRB5, \
- SPTLRPC_SVC_NULL, \
- SPTLRPC_BULK_DEFAULT, \
- SPTLRPC_BULK_SVC_NULL)
-#define SPTLRPC_FLVR_KRB5A \
- MAKE_FLVR(SPTLRPC_POLICY_GSS, \
- SPTLRPC_MECH_GSS_KRB5, \
- SPTLRPC_SVC_AUTH, \
- SPTLRPC_BULK_DEFAULT, \
- SPTLRPC_BULK_SVC_NULL)
-#define SPTLRPC_FLVR_KRB5I \
- MAKE_FLVR(SPTLRPC_POLICY_GSS, \
- SPTLRPC_MECH_GSS_KRB5, \
- SPTLRPC_SVC_INTG, \
- SPTLRPC_BULK_DEFAULT, \
- SPTLRPC_BULK_SVC_INTG)
-#define SPTLRPC_FLVR_KRB5P \
- MAKE_FLVR(SPTLRPC_POLICY_GSS, \
- SPTLRPC_MECH_GSS_KRB5, \
- SPTLRPC_SVC_PRIV, \
- SPTLRPC_BULK_DEFAULT, \
- SPTLRPC_BULK_SVC_PRIV)
-
-#define SPTLRPC_FLVR_DEFAULT SPTLRPC_FLVR_NULL
-
-#define SPTLRPC_FLVR_INVALID ((__u32)0xFFFFFFFF)
-#define SPTLRPC_FLVR_ANY ((__u32)0xFFF00000)
-
-/**
- * extract the useful part from wire flavor
- */
-#define WIRE_FLVR(wflvr) (((__u32)(wflvr)) & 0x000FFFFF)
-
-/** @} flavor */
-
-static inline void flvr_set_svc(__u32 *flvr, __u32 svc)
-{
- LASSERT(svc < SPTLRPC_SVC_MAX);
- *flvr = MAKE_FLVR(SPTLRPC_FLVR_POLICY(*flvr),
- SPTLRPC_FLVR_MECH(*flvr),
- svc,
- SPTLRPC_FLVR_BULK_TYPE(*flvr),
- SPTLRPC_FLVR_BULK_SVC(*flvr));
-}
-
-static inline void flvr_set_bulk_svc(__u32 *flvr, __u32 svc)
-{
- LASSERT(svc < SPTLRPC_BULK_SVC_MAX);
- *flvr = MAKE_FLVR(SPTLRPC_FLVR_POLICY(*flvr),
- SPTLRPC_FLVR_MECH(*flvr),
- SPTLRPC_FLVR_SVC(*flvr),
- SPTLRPC_FLVR_BULK_TYPE(*flvr),
- svc);
-}
-
-struct bulk_spec_hash {
- __u8 hash_alg;
-};
-
-/**
- * Full description of flavors being used on a ptlrpc connection, include
- * both regular RPC and bulk transfer parts.
- */
-struct sptlrpc_flavor {
- /**
- * wire flavor, should be renamed to sf_wire.
- */
- __u32 sf_rpc;
- /**
- * general flags of PTLRPC_SEC_FL_*
- */
- __u32 sf_flags;
- /**
- * rpc flavor specification
- */
- union {
- /* nothing for now */
- } u_rpc;
- /**
- * bulk flavor specification
- */
- union {
- struct bulk_spec_hash hash;
- } u_bulk;
-};
-
-/**
- * identify the RPC is generated from what part of Lustre. It's encoded into
- * RPC requests and to be checked by ptlrpc service.
- */
-enum lustre_sec_part {
- LUSTRE_SP_CLI = 0,
- LUSTRE_SP_MDT,
- LUSTRE_SP_OST,
- LUSTRE_SP_MGC,
- LUSTRE_SP_MGS,
- LUSTRE_SP_ANY = 0xFF
-};
-
-enum lustre_sec_part sptlrpc_target_sec_part(struct obd_device *obd);
-
-/**
- * A rule specifies a flavor to be used by a ptlrpc connection between
- * two Lustre parts.
- */
-struct sptlrpc_rule {
- __u32 sr_netid; /* LNET network ID */
- __u8 sr_from; /* sec_part */
- __u8 sr_to; /* sec_part */
- __u16 sr_padding;
- struct sptlrpc_flavor sr_flvr;
-};
-
-/**
- * A set of rules in memory.
- *
- * Rules are generated and stored on MGS, and propagated to MDT, OST,
- * and client when needed.
- */
-struct sptlrpc_rule_set {
- int srs_nslot;
- int srs_nrule;
- struct sptlrpc_rule *srs_rules;
-};
-
-int sptlrpc_parse_flavor(const char *str, struct sptlrpc_flavor *flvr);
-bool sptlrpc_flavor_has_bulk(struct sptlrpc_flavor *flvr);
-
-static inline void sptlrpc_rule_set_init(struct sptlrpc_rule_set *set)
-{
- memset(set, 0, sizeof(*set));
-}
-
-int sptlrpc_process_config(struct lustre_cfg *lcfg);
-void sptlrpc_conf_log_start(const char *logname);
-void sptlrpc_conf_log_stop(const char *logname);
-void sptlrpc_conf_log_update_begin(const char *logname);
-void sptlrpc_conf_log_update_end(const char *logname);
-void sptlrpc_conf_client_adapt(struct obd_device *obd);
-
-/* The maximum length of security payload. 1024 is enough for Kerberos 5,
- * and should be enough for other future mechanisms but not sure.
- * Only used by pre-allocated request/reply pool.
- */
-#define SPTLRPC_MAX_PAYLOAD (1024)
-
-struct vfs_cred {
- u32 vc_uid;
- u32 vc_gid;
-};
-
-struct ptlrpc_ctx_ops {
- /**
- * To determine whether it's suitable to use the \a ctx for \a vcred.
- */
- int (*match)(struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred);
-
- /**
- * To bring the \a ctx uptodate.
- */
- int (*refresh)(struct ptlrpc_cli_ctx *ctx);
-
- /**
- * Validate the \a ctx.
- */
- int (*validate)(struct ptlrpc_cli_ctx *ctx);
-
- /**
- * Force the \a ctx to die.
- */
- void (*force_die)(struct ptlrpc_cli_ctx *ctx, int grace);
- int (*display)(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize);
-
- /**
- * Sign the request message using \a ctx.
- *
- * \pre req->rq_reqmsg point to request message.
- * \pre req->rq_reqlen is the request message length.
- * \post req->rq_reqbuf point to request message with signature.
- * \post req->rq_reqdata_len is set to the final request message size.
- *
- * \see null_ctx_sign(), plain_ctx_sign(), gss_cli_ctx_sign().
- */
- int (*sign)(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req);
-
- /**
- * Verify the reply message using \a ctx.
- *
- * \pre req->rq_repdata point to reply message with signature.
- * \pre req->rq_repdata_len is the total reply message length.
- * \post req->rq_repmsg point to reply message without signature.
- * \post req->rq_replen is the reply message length.
- *
- * \see null_ctx_verify(), plain_ctx_verify(), gss_cli_ctx_verify().
- */
- int (*verify)(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req);
-
- /**
- * Encrypt the request message using \a ctx.
- *
- * \pre req->rq_reqmsg point to request message in clear text.
- * \pre req->rq_reqlen is the request message length.
- * \post req->rq_reqbuf point to request message.
- * \post req->rq_reqdata_len is set to the final request message size.
- *
- * \see gss_cli_ctx_seal().
- */
- int (*seal)(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req);
-
- /**
- * Decrypt the reply message using \a ctx.
- *
- * \pre req->rq_repdata point to encrypted reply message.
- * \pre req->rq_repdata_len is the total cipher text length.
- * \post req->rq_repmsg point to reply message in clear text.
- * \post req->rq_replen is the reply message length in clear text.
- *
- * \see gss_cli_ctx_unseal().
- */
- int (*unseal)(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req);
-
- /**
- * Wrap bulk request data. This is called before wrapping RPC
- * request message.
- *
- * \pre bulk buffer is descripted by desc->bd_iov and
- * desc->bd_iov_count. note for read it's just buffer, no data
- * need to be sent; for write it contains data in clear text.
- * \post when necessary, ptlrpc_bulk_sec_desc was properly prepared
- * (usually inside of RPC request message).
- * - encryption: cipher text bulk buffer is descripted by
- * desc->bd_enc_iov and desc->bd_iov_count (currently assume iov
- * count remains the same).
- * - otherwise: bulk buffer is still desc->bd_iov and
- * desc->bd_iov_count.
- *
- * \return 0: success.
- * \return -ev: error code.
- *
- * \see plain_cli_wrap_bulk(), gss_cli_ctx_wrap_bulk().
- */
- int (*wrap_bulk)(struct ptlrpc_cli_ctx *ctx,
- struct ptlrpc_request *req,
- struct ptlrpc_bulk_desc *desc);
-
- /**
- * Unwrap bulk reply data. This is called after wrapping RPC
- * reply message.
- *
- * \pre bulk buffer is descripted by desc->bd_iov/desc->bd_enc_iov and
- * desc->bd_iov_count, according to wrap_bulk().
- * \post final bulk data in clear text is placed in buffer described
- * by desc->bd_iov and desc->bd_iov_count.
- * \return +ve nob of actual bulk data in clear text.
- * \return -ve error code.
- *
- * \see plain_cli_unwrap_bulk(), gss_cli_ctx_unwrap_bulk().
- */
- int (*unwrap_bulk)(struct ptlrpc_cli_ctx *ctx,
- struct ptlrpc_request *req,
- struct ptlrpc_bulk_desc *desc);
-};
-
-#define PTLRPC_CTX_NEW_BIT (0) /* newly created */
-#define PTLRPC_CTX_UPTODATE_BIT (1) /* uptodate */
-#define PTLRPC_CTX_DEAD_BIT (2) /* mark expired gracefully */
-#define PTLRPC_CTX_ERROR_BIT (3) /* fatal error (refresh, etc.) */
-#define PTLRPC_CTX_CACHED_BIT (8) /* in ctx cache (hash etc.) */
-#define PTLRPC_CTX_ETERNAL_BIT (9) /* always valid */
-
-#define PTLRPC_CTX_NEW (1 << PTLRPC_CTX_NEW_BIT)
-#define PTLRPC_CTX_UPTODATE (1 << PTLRPC_CTX_UPTODATE_BIT)
-#define PTLRPC_CTX_DEAD (1 << PTLRPC_CTX_DEAD_BIT)
-#define PTLRPC_CTX_ERROR (1 << PTLRPC_CTX_ERROR_BIT)
-#define PTLRPC_CTX_CACHED (1 << PTLRPC_CTX_CACHED_BIT)
-#define PTLRPC_CTX_ETERNAL (1 << PTLRPC_CTX_ETERNAL_BIT)
-
-#define PTLRPC_CTX_STATUS_MASK (PTLRPC_CTX_NEW_BIT | \
- PTLRPC_CTX_UPTODATE | \
- PTLRPC_CTX_DEAD | \
- PTLRPC_CTX_ERROR)
-
-struct ptlrpc_cli_ctx {
- struct hlist_node cc_cache; /* linked into ctx cache */
- atomic_t cc_refcount;
- struct ptlrpc_sec *cc_sec;
- struct ptlrpc_ctx_ops *cc_ops;
- unsigned long cc_expire; /* in seconds */
- unsigned int cc_early_expire:1;
- unsigned long cc_flags;
- struct vfs_cred cc_vcred;
- spinlock_t cc_lock;
- struct list_head cc_req_list; /* waiting reqs linked here */
- struct list_head cc_gc_chain; /* linked to gc chain */
-};
-
-/**
- * client side policy operation vector.
- */
-struct ptlrpc_sec_cops {
- /**
- * Given an \a imp, create and initialize a ptlrpc_sec structure.
- * \param ctx service context:
- * - regular import: \a ctx should be NULL;
- * - reverse import: \a ctx is obtained from incoming request.
- * \param flavor specify what flavor to use.
- *
- * When necessary, policy module is responsible for taking reference
- * on the import.
- *
- * \see null_create_sec(), plain_create_sec(), gss_sec_create_kr().
- */
- struct ptlrpc_sec *(*create_sec)(struct obd_import *imp,
- struct ptlrpc_svc_ctx *ctx,
- struct sptlrpc_flavor *flavor);
-
- /**
- * Destructor of ptlrpc_sec. When called, refcount has been dropped
- * to 0 and all contexts has been destroyed.
- *
- * \see null_destroy_sec(), plain_destroy_sec(), gss_sec_destroy_kr().
- */
- void (*destroy_sec)(struct ptlrpc_sec *sec);
-
- /**
- * Notify that this ptlrpc_sec is going to die. Optionally, policy
- * module is supposed to set sec->ps_dying and whatever necessary
- * actions.
- *
- * \see plain_kill_sec(), gss_sec_kill().
- */
- void (*kill_sec)(struct ptlrpc_sec *sec);
-
- /**
- * Given \a vcred, lookup and/or create its context. The policy module
- * is supposed to maintain its own context cache.
- * XXX currently \a create and \a remove_dead is always 1, perhaps
- * should be removed completely.
- *
- * \see null_lookup_ctx(), plain_lookup_ctx(), gss_sec_lookup_ctx_kr().
- */
- struct ptlrpc_cli_ctx *(*lookup_ctx)(struct ptlrpc_sec *sec,
- struct vfs_cred *vcred,
- int create, int remove_dead);
-
- /**
- * Called then the reference of \a ctx dropped to 0. The policy module
- * is supposed to destroy this context or whatever else according to
- * its cache maintenance mechanism.
- *
- * \param sync if zero, we shouldn't wait for the context being
- * destroyed completely.
- *
- * \see plain_release_ctx(), gss_sec_release_ctx_kr().
- */
- void (*release_ctx)(struct ptlrpc_sec *sec, struct ptlrpc_cli_ctx *ctx,
- int sync);
-
- /**
- * Flush the context cache.
- *
- * \param uid context of which user, -1 means all contexts.
- * \param grace if zero, the PTLRPC_CTX_UPTODATE_BIT of affected
- * contexts should be cleared immediately.
- * \param force if zero, only idle contexts will be flushed.
- *
- * \see plain_flush_ctx_cache(), gss_sec_flush_ctx_cache_kr().
- */
- int (*flush_ctx_cache)(struct ptlrpc_sec *sec, uid_t uid,
- int grace, int force);
-
- /**
- * Called periodically by garbage collector to remove dead contexts
- * from cache.
- *
- * \see gss_sec_gc_ctx_kr().
- */
- void (*gc_ctx)(struct ptlrpc_sec *sec);
-
- /**
- * Given an context \a ctx, install a corresponding reverse service
- * context on client side.
- * XXX currently it's only used by GSS module, maybe we should remove
- * this from general API.
- */
- int (*install_rctx)(struct obd_import *imp, struct ptlrpc_sec *sec,
- struct ptlrpc_cli_ctx *ctx);
-
- /**
- * To allocate request buffer for \a req.
- *
- * \pre req->rq_reqmsg == NULL.
- * \pre req->rq_reqbuf == NULL, otherwise it must be pre-allocated,
- * we are not supposed to free it.
- * \post if success, req->rq_reqmsg point to a buffer with size
- * at least \a lustre_msg_size.
- *
- * \see null_alloc_reqbuf(), plain_alloc_reqbuf(), gss_alloc_reqbuf().
- */
- int (*alloc_reqbuf)(struct ptlrpc_sec *sec, struct ptlrpc_request *req,
- int lustre_msg_size);
-
- /**
- * To free request buffer for \a req.
- *
- * \pre req->rq_reqbuf != NULL.
- *
- * \see null_free_reqbuf(), plain_free_reqbuf(), gss_free_reqbuf().
- */
- void (*free_reqbuf)(struct ptlrpc_sec *sec, struct ptlrpc_request *req);
-
- /**
- * To allocate reply buffer for \a req.
- *
- * \pre req->rq_repbuf == NULL.
- * \post if success, req->rq_repbuf point to a buffer with size
- * req->rq_repbuf_len, the size should be large enough to receive
- * reply which be transformed from \a lustre_msg_size of clear text.
- *
- * \see null_alloc_repbuf(), plain_alloc_repbuf(), gss_alloc_repbuf().
- */
- int (*alloc_repbuf)(struct ptlrpc_sec *sec, struct ptlrpc_request *req,
- int lustre_msg_size);
-
- /**
- * To free reply buffer for \a req.
- *
- * \pre req->rq_repbuf != NULL.
- * \post req->rq_repbuf == NULL.
- * \post req->rq_repbuf_len == 0.
- *
- * \see null_free_repbuf(), plain_free_repbuf(), gss_free_repbuf().
- */
- void (*free_repbuf)(struct ptlrpc_sec *sec, struct ptlrpc_request *req);
-
- /**
- * To expand the request buffer of \a req, thus the \a segment in
- * the request message pointed by req->rq_reqmsg can accommodate
- * at least \a newsize of data.
- *
- * \pre req->rq_reqmsg->lm_buflens[segment] < newsize.
- *
- * \see null_enlarge_reqbuf(), plain_enlarge_reqbuf(),
- * gss_enlarge_reqbuf().
- */
- int (*enlarge_reqbuf)(struct ptlrpc_sec *sec,
- struct ptlrpc_request *req,
- int segment, int newsize);
- /*
- * misc
- */
- int (*display)(struct ptlrpc_sec *sec, struct seq_file *seq);
-};
-
-/**
- * server side policy operation vector.
- */
-struct ptlrpc_sec_sops {
- /**
- * verify an incoming request.
- *
- * \pre request message is pointed by req->rq_reqbuf, size is
- * req->rq_reqdata_len; and the message has been unpacked to
- * host byte order.
- *
- * \retval SECSVC_OK success, req->rq_reqmsg point to request message
- * in clear text, size is req->rq_reqlen; req->rq_svc_ctx is set;
- * req->rq_sp_from is decoded from request.
- * \retval SECSVC_COMPLETE success, the request has been fully
- * processed, and reply message has been prepared; req->rq_sp_from is
- * decoded from request.
- * \retval SECSVC_DROP failed, this request should be dropped.
- *
- * \see null_accept(), plain_accept(), gss_svc_accept_kr().
- */
- int (*accept)(struct ptlrpc_request *req);
-
- /**
- * Perform security transformation upon reply message.
- *
- * \pre reply message is pointed by req->rq_reply_state->rs_msg, size
- * is req->rq_replen.
- * \post req->rs_repdata_len is the final message size.
- * \post req->rq_reply_off is set.
- *
- * \see null_authorize(), plain_authorize(), gss_svc_authorize().
- */
- int (*authorize)(struct ptlrpc_request *req);
-
- /**
- * Invalidate server context \a ctx.
- *
- * \see gss_svc_invalidate_ctx().
- */
- void (*invalidate_ctx)(struct ptlrpc_svc_ctx *ctx);
-
- /**
- * Allocate a ptlrpc_reply_state.
- *
- * \param msgsize size of the reply message in clear text.
- * \pre if req->rq_reply_state != NULL, then it's pre-allocated, we
- * should simply use it; otherwise we'll responsible for allocating
- * a new one.
- * \post req->rq_reply_state != NULL;
- * \post req->rq_reply_state->rs_msg != NULL;
- *
- * \see null_alloc_rs(), plain_alloc_rs(), gss_svc_alloc_rs().
- */
- int (*alloc_rs)(struct ptlrpc_request *req, int msgsize);
-
- /**
- * Free a ptlrpc_reply_state.
- */
- void (*free_rs)(struct ptlrpc_reply_state *rs);
-
- /**
- * Release the server context \a ctx.
- *
- * \see gss_svc_free_ctx().
- */
- void (*free_ctx)(struct ptlrpc_svc_ctx *ctx);
-
- /**
- * Install a reverse context based on the server context \a ctx.
- *
- * \see gss_svc_install_rctx_kr().
- */
- int (*install_rctx)(struct obd_import *imp, struct ptlrpc_svc_ctx *ctx);
-
- /**
- * Prepare buffer for incoming bulk write.
- *
- * \pre desc->bd_iov and desc->bd_iov_count describes the buffer
- * intended to receive the write.
- *
- * \see gss_svc_prep_bulk().
- */
- int (*prep_bulk)(struct ptlrpc_request *req,
- struct ptlrpc_bulk_desc *desc);
-
- /**
- * Unwrap the bulk write data.
- *
- * \see plain_svc_unwrap_bulk(), gss_svc_unwrap_bulk().
- */
- int (*unwrap_bulk)(struct ptlrpc_request *req,
- struct ptlrpc_bulk_desc *desc);
-
- /**
- * Wrap the bulk read data.
- *
- * \see plain_svc_wrap_bulk(), gss_svc_wrap_bulk().
- */
- int (*wrap_bulk)(struct ptlrpc_request *req,
- struct ptlrpc_bulk_desc *desc);
-};
-
-struct ptlrpc_sec_policy {
- struct module *sp_owner;
- char *sp_name;
- __u16 sp_policy; /* policy number */
- struct ptlrpc_sec_cops *sp_cops; /* client ops */
- struct ptlrpc_sec_sops *sp_sops; /* server ops */
-};
-
-#define PTLRPC_SEC_FL_REVERSE 0x0001 /* reverse sec */
-#define PTLRPC_SEC_FL_ROOTONLY 0x0002 /* treat everyone as root */
-#define PTLRPC_SEC_FL_UDESC 0x0004 /* ship udesc */
-#define PTLRPC_SEC_FL_BULK 0x0008 /* intensive bulk i/o expected */
-#define PTLRPC_SEC_FL_PAG 0x0010 /* PAG mode */
-
-/**
- * The ptlrpc_sec represents the client side ptlrpc security facilities,
- * each obd_import (both regular and reverse import) must associate with
- * a ptlrpc_sec.
- *
- * \see sptlrpc_import_sec_adapt().
- */
-struct ptlrpc_sec {
- struct ptlrpc_sec_policy *ps_policy;
- atomic_t ps_refcount;
- /** statistic only */
- atomic_t ps_nctx;
- /** unique identifier */
- int ps_id;
- struct sptlrpc_flavor ps_flvr;
- enum lustre_sec_part ps_part;
- /** after set, no more new context will be created */
- unsigned int ps_dying:1;
- /** owning import */
- struct obd_import *ps_import;
- spinlock_t ps_lock;
-
- /*
- * garbage collection
- */
- struct list_head ps_gc_list;
- unsigned long ps_gc_interval; /* in seconds */
- time64_t ps_gc_next; /* in seconds */
-};
-
-static inline int sec_is_reverse(struct ptlrpc_sec *sec)
-{
- return (sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_REVERSE);
-}
-
-static inline int sec_is_rootonly(struct ptlrpc_sec *sec)
-{
- return (sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_ROOTONLY);
-}
-
-struct ptlrpc_svc_ctx {
- atomic_t sc_refcount;
- struct ptlrpc_sec_policy *sc_policy;
-};
-
-/*
- * user identity descriptor
- */
-#define LUSTRE_MAX_GROUPS (128)
-
-struct ptlrpc_user_desc {
- __u32 pud_uid;
- __u32 pud_gid;
- __u32 pud_fsuid;
- __u32 pud_fsgid;
- __u32 pud_cap;
- __u32 pud_ngroups;
- __u32 pud_groups[0];
-};
-
-/*
- * bulk flavors
- */
-enum sptlrpc_bulk_hash_alg {
- BULK_HASH_ALG_NULL = 0,
- BULK_HASH_ALG_ADLER32,
- BULK_HASH_ALG_CRC32,
- BULK_HASH_ALG_MD5,
- BULK_HASH_ALG_SHA1,
- BULK_HASH_ALG_SHA256,
- BULK_HASH_ALG_SHA384,
- BULK_HASH_ALG_SHA512,
- BULK_HASH_ALG_MAX
-};
-
-const char *sptlrpc_get_hash_name(__u8 hash_alg);
-__u8 sptlrpc_get_hash_alg(const char *algname);
-
-enum {
- BSD_FL_ERR = 1,
-};
-
-struct ptlrpc_bulk_sec_desc {
- __u8 bsd_version; /* 0 */
- __u8 bsd_type; /* SPTLRPC_BULK_XXX */
- __u8 bsd_svc; /* SPTLRPC_BULK_SVC_XXXX */
- __u8 bsd_flags; /* flags */
- __u32 bsd_nob; /* nob of bulk data */
- __u8 bsd_data[0]; /* policy-specific token */
-};
-
-/*
- * round size up to next power of 2, for slab allocation.
- * @size must be sane (can't overflow after round up)
- */
-static inline int size_roundup_power2(int size)
-{
- size--;
- size |= size >> 1;
- size |= size >> 2;
- size |= size >> 4;
- size |= size >> 8;
- size |= size >> 16;
- size++;
- return size;
-}
-
-/*
- * internal support libraries
- */
-void _sptlrpc_enlarge_msg_inplace(struct lustre_msg *msg,
- int segment, int newsize);
-
-/*
- * security policies
- */
-int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy);
-int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy);
-
-__u32 sptlrpc_name2flavor_base(const char *name);
-const char *sptlrpc_flavor2name_base(__u32 flvr);
-char *sptlrpc_flavor2name_bulk(struct sptlrpc_flavor *sf,
- char *buf, int bufsize);
-char *sptlrpc_flavor2name(struct sptlrpc_flavor *sf, char *buf, int bufsize);
-
-static inline
-struct ptlrpc_sec_policy *sptlrpc_policy_get(struct ptlrpc_sec_policy *policy)
-{
- __module_get(policy->sp_owner);
- return policy;
-}
-
-static inline
-void sptlrpc_policy_put(struct ptlrpc_sec_policy *policy)
-{
- module_put(policy->sp_owner);
-}
-
-/*
- * client credential
- */
-static inline
-unsigned long cli_ctx_status(struct ptlrpc_cli_ctx *ctx)
-{
- return (ctx->cc_flags & PTLRPC_CTX_STATUS_MASK);
-}
-
-static inline
-int cli_ctx_is_ready(struct ptlrpc_cli_ctx *ctx)
-{
- return (cli_ctx_status(ctx) == PTLRPC_CTX_UPTODATE);
-}
-
-static inline
-int cli_ctx_is_refreshed(struct ptlrpc_cli_ctx *ctx)
-{
- return (cli_ctx_status(ctx) != 0);
-}
-
-static inline
-int cli_ctx_is_uptodate(struct ptlrpc_cli_ctx *ctx)
-{
- return ((ctx->cc_flags & PTLRPC_CTX_UPTODATE) != 0);
-}
-
-static inline
-int cli_ctx_is_error(struct ptlrpc_cli_ctx *ctx)
-{
- return ((ctx->cc_flags & PTLRPC_CTX_ERROR) != 0);
-}
-
-static inline
-int cli_ctx_is_dead(struct ptlrpc_cli_ctx *ctx)
-{
- return ((ctx->cc_flags & (PTLRPC_CTX_DEAD | PTLRPC_CTX_ERROR)) != 0);
-}
-
-static inline
-int cli_ctx_is_eternal(struct ptlrpc_cli_ctx *ctx)
-{
- return ((ctx->cc_flags & PTLRPC_CTX_ETERNAL) != 0);
-}
-
-/*
- * sec get/put
- */
-void sptlrpc_sec_put(struct ptlrpc_sec *sec);
-
-/*
- * internal apis which only used by policy implementation
- */
-int sptlrpc_get_next_secid(void);
-
-/*
- * exported client context api
- */
-struct ptlrpc_cli_ctx *sptlrpc_cli_ctx_get(struct ptlrpc_cli_ctx *ctx);
-void sptlrpc_cli_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync);
-
-/*
- * exported client context wrap/buffers
- */
-int sptlrpc_cli_wrap_request(struct ptlrpc_request *req);
-int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req);
-int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize);
-void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req);
-int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize);
-void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req);
-int sptlrpc_cli_enlarge_reqbuf(struct ptlrpc_request *req,
- int segment, int newsize);
-int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req,
- struct ptlrpc_request **req_ret);
-void sptlrpc_cli_finish_early_reply(struct ptlrpc_request *early_req);
-
-void sptlrpc_request_out_callback(struct ptlrpc_request *req);
-
-/*
- * exported higher interface of import & request
- */
-int sptlrpc_import_sec_adapt(struct obd_import *imp,
- struct ptlrpc_svc_ctx *ctx,
- struct sptlrpc_flavor *flvr);
-struct ptlrpc_sec *sptlrpc_import_sec_ref(struct obd_import *imp);
-void sptlrpc_import_sec_put(struct obd_import *imp);
-
-int sptlrpc_import_check_ctx(struct obd_import *imp);
-void sptlrpc_import_flush_root_ctx(struct obd_import *imp);
-void sptlrpc_import_flush_my_ctx(struct obd_import *imp);
-void sptlrpc_import_flush_all_ctx(struct obd_import *imp);
-int sptlrpc_req_get_ctx(struct ptlrpc_request *req);
-void sptlrpc_req_put_ctx(struct ptlrpc_request *req, int sync);
-int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout);
-void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode);
-
-/* gc */
-void sptlrpc_gc_add_sec(struct ptlrpc_sec *sec);
-void sptlrpc_gc_del_sec(struct ptlrpc_sec *sec);
-
-/* misc */
-const char *sec2target_str(struct ptlrpc_sec *sec);
-/*
- * lprocfs
- */
-int sptlrpc_lprocfs_cliobd_attach(struct obd_device *dev);
-
-/*
- * server side
- */
-enum secsvc_accept_res {
- SECSVC_OK = 0,
- SECSVC_COMPLETE,
- SECSVC_DROP,
-};
-
-int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req);
-int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, int msglen);
-int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req);
-void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs);
-void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req);
-void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req);
-
-int sptlrpc_target_export_check(struct obd_export *exp,
- struct ptlrpc_request *req);
-
-/* bulk security api */
-void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc);
-int get_free_pages_in_pool(void);
-int pool_is_at_full_capacity(void);
-
-int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
- struct ptlrpc_bulk_desc *desc);
-int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req,
- struct ptlrpc_bulk_desc *desc,
- int nob);
-int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req,
- struct ptlrpc_bulk_desc *desc);
-
-/* bulk helpers (internal use only by policies) */
-int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
- void *buf, int buflen);
-
-int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset, int swabbed);
-
-/* user descriptor helpers */
-static inline int sptlrpc_user_desc_size(int ngroups)
-{
- return sizeof(struct ptlrpc_user_desc) + ngroups * sizeof(__u32);
-}
-
-int sptlrpc_current_user_desc_size(void);
-int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset);
-int sptlrpc_unpack_user_desc(struct lustre_msg *req, int offset, int swabbed);
-
-enum {
- LUSTRE_SEC_NONE = 0,
- LUSTRE_SEC_REMOTE = 1,
- LUSTRE_SEC_SPECIFY = 2,
- LUSTRE_SEC_ALL = 3
-};
-
-/** @} sptlrpc */
-
-#endif /* _LUSTRE_SEC_H_ */
diff --git a/drivers/staging/lustre/lustre/include/lustre_swab.h b/drivers/staging/lustre/lustre/include/lustre_swab.h
deleted file mode 100644
index 9d786bbe7f3f..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_swab.h
+++ /dev/null
@@ -1,109 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2014, Intel Corporation.
- *
- * Copyright 2015 Cray Inc, all rights reserved.
- * Author: Ben Evans.
- *
- * We assume all nodes are either little-endian or big-endian, and we
- * always send messages in the sender's native format. The receiver
- * detects the message format by checking the 'magic' field of the message
- * (see lustre_msg_swabbed() below).
- *
- * Each wire type has corresponding 'lustre_swab_xxxtypexxx()' routines
- * are implemented in ptlrpc/lustre_swab.c. These 'swabbers' convert the
- * type from "other" endian, in-place in the message buffer.
- *
- * A swabber takes a single pointer argument. The caller must already have
- * verified that the length of the message buffer >= sizeof (type).
- *
- * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine
- * may be defined that swabs just the variable part, after the caller has
- * verified that the message buffer is large enough.
- */
-
-#ifndef _LUSTRE_SWAB_H_
-#define _LUSTRE_SWAB_H_
-
-#include <uapi/linux/lustre/lustre_idl.h>
-
-void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
-void lustre_swab_connect(struct obd_connect_data *ocd);
-void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
-void lustre_swab_hsm_state_set(struct hsm_state_set *hss);
-void lustre_swab_obd_statfs(struct obd_statfs *os);
-void lustre_swab_obd_ioobj(struct obd_ioobj *ioo);
-void lustre_swab_niobuf_remote(struct niobuf_remote *nbr);
-void lustre_swab_ost_lvb_v1(struct ost_lvb_v1 *lvb);
-void lustre_swab_ost_lvb(struct ost_lvb *lvb);
-void lustre_swab_obd_quotactl(struct obd_quotactl *q);
-void lustre_swab_lquota_lvb(struct lquota_lvb *lvb);
-void lustre_swab_generic_32s(__u32 *val);
-void lustre_swab_mdt_body(struct mdt_body *b);
-void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b);
-void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa);
-void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr);
-void lustre_swab_lmv_desc(struct lmv_desc *ld);
-void lustre_swab_lmv_mds_md(union lmv_mds_md *lmm);
-void lustre_swab_lov_desc(struct lov_desc *ld);
-void lustre_swab_gl_desc(union ldlm_gl_desc *desc);
-void lustre_swab_ldlm_intent(struct ldlm_intent *i);
-void lustre_swab_ldlm_request(struct ldlm_request *rq);
-void lustre_swab_ldlm_reply(struct ldlm_reply *r);
-void lustre_swab_mgs_target_info(struct mgs_target_info *oinfo);
-void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *oinfo);
-void lustre_swab_mgs_config_body(struct mgs_config_body *body);
-void lustre_swab_mgs_config_res(struct mgs_config_res *body);
-void lustre_swab_ost_body(struct ost_body *b);
-void lustre_swab_ost_last_id(__u64 *id);
-void lustre_swab_fiemap(struct fiemap *fiemap);
-void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum);
-void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum);
-void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
- int stripe_count);
-void lustre_swab_lov_mds_md(struct lov_mds_md *lmm);
-void lustre_swab_lustre_capa(struct lustre_capa *c);
-void lustre_swab_lustre_capa_key(struct lustre_capa_key *k);
-void lustre_swab_fid2path(struct getinfo_fid2path *gf);
-void lustre_swab_layout_intent(struct layout_intent *li);
-void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
-void lustre_swab_hsm_current_action(struct hsm_current_action *action);
-void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk);
-void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
-void lustre_swab_hsm_user_item(struct hsm_user_item *hui);
-void lustre_swab_hsm_request(struct hsm_request *hr);
-void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl);
-void lustre_swab_close_data(struct close_data *data);
-void lustre_swab_lmv_user_md(struct lmv_user_md *lum);
-
-/* Functions for dumping PTLRPC fields */
-void dump_rniobuf(struct niobuf_remote *rnb);
-void dump_ioo(struct obd_ioobj *nb);
-void dump_ost_body(struct ost_body *ob);
-void dump_rcs(__u32 *rc);
-
-#endif
diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h
deleted file mode 100644
index b1907bbffb19..000000000000
--- a/drivers/staging/lustre/lustre/include/obd.h
+++ /dev/null
@@ -1,1114 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef __OBD_H
-#define __OBD_H
-
-#include <linux/spinlock.h>
-
-#include <uapi/linux/lustre/lustre_idl.h>
-#include <lustre_lib.h>
-#include <lu_ref.h>
-#include <lustre_export.h>
-#include <lustre_fid.h>
-#include <lustre_fld.h>
-#include <lustre_handles.h>
-#include <lustre_intent.h>
-#include <cl_object.h>
-
-#include <linux/rhashtable.h>
-
-#define MAX_OBD_DEVICES 8192
-
-struct osc_async_rc {
- int ar_rc;
- int ar_force_sync;
- __u64 ar_min_xid;
-};
-
-struct lov_oinfo { /* per-stripe data structure */
- struct ost_id loi_oi; /* object ID/Sequence on the target OST */
- int loi_ost_idx; /* OST stripe index in lov_tgt_desc->tgts */
- int loi_ost_gen; /* generation of this loi_ost_idx */
-
- unsigned long loi_kms_valid:1;
- __u64 loi_kms; /* known minimum size */
- struct ost_lvb loi_lvb;
- struct osc_async_rc loi_ar;
-};
-
-static inline void loi_kms_set(struct lov_oinfo *oinfo, __u64 kms)
-{
- oinfo->loi_kms = kms;
- oinfo->loi_kms_valid = 1;
-}
-
-static inline void loi_init(struct lov_oinfo *loi)
-{
-}
-
-struct lov_stripe_md;
-struct obd_info;
-
-int lov_read_and_clear_async_rc(struct cl_object *clob);
-
-typedef int (*obd_enqueue_update_f)(void *cookie, int rc);
-
-/* obd info for a particular level (lov, osc). */
-struct obd_info {
- /* OBD_STATFS_* flags */
- __u64 oi_flags;
- /* lsm data specific for every OSC. */
- struct lov_stripe_md *oi_md;
- /* statfs data specific for every OSC, if needed at all. */
- struct obd_statfs *oi_osfs;
- /* An update callback which is called to update some data on upper
- * level. E.g. it is used for update lsm->lsm_oinfo at every received
- * request in osc level for enqueue requests. It is also possible to
- * update some caller data from LOV layer if needed.
- */
- obd_enqueue_update_f oi_cb_up;
-};
-
-struct obd_type {
- struct list_head typ_chain;
- struct obd_ops *typ_dt_ops;
- struct md_ops *typ_md_ops;
- struct dentry *typ_debugfs_entry;
- char *typ_name;
- int typ_refcnt;
- struct lu_device_type *typ_lu;
- spinlock_t obd_type_lock;
- struct kobject *typ_kobj;
-};
-
-struct brw_page {
- u64 off;
- struct page *pg;
- unsigned int count;
- u32 flag;
-};
-
-struct timeout_item {
- enum timeout_event ti_event;
- unsigned long ti_timeout;
- timeout_cb_t ti_cb;
- void *ti_cb_data;
- struct list_head ti_obd_list;
- struct list_head ti_chain;
-};
-
-#define OBD_MAX_RIF_DEFAULT 8
-#define OBD_MAX_RIF_MAX 512
-#define OSC_MAX_RIF_MAX 256
-#define OSC_MAX_DIRTY_DEFAULT (OBD_MAX_RIF_DEFAULT * 4)
-#define OSC_MAX_DIRTY_MB_MAX 2048 /* arbitrary, but < MAX_LONG bytes */
-#define OSC_DEFAULT_RESENDS 10
-
-/* possible values for fo_sync_lock_cancel */
-enum {
- NEVER_SYNC_ON_CANCEL = 0,
- BLOCKING_SYNC_ON_CANCEL = 1,
- ALWAYS_SYNC_ON_CANCEL = 2,
- NUM_SYNC_ON_CANCEL_STATES
-};
-
-enum obd_cl_sem_lock_class {
- OBD_CLI_SEM_NORMAL,
- OBD_CLI_SEM_MGC,
- OBD_CLI_SEM_MDCOSC,
-};
-
-/*
- * Limit reply buffer size for striping data to one x86_64 page. This
- * value is chosen to fit the striping data for common use cases while
- * staying well below the limit at which the buffer must be backed by
- * vmalloc(). Excessive use of vmalloc() may cause spinlock contention
- * on the MDS.
- */
-#define OBD_MAX_DEFAULT_EA_SIZE 4096
-
-struct mdc_rpc_lock;
-struct obd_import;
-struct client_obd {
- struct rw_semaphore cl_sem;
- struct obd_uuid cl_target_uuid;
- struct obd_import *cl_import; /* ptlrpc connection state */
- size_t cl_conn_count;
- /*
- * Cache maximum and default values for easize. This is
- * strictly a performance optimization to minimize calls to
- * obd_size_diskmd(). The default values are used to calculate the
- * initial size of a request buffer. The ptlrpc layer will resize the
- * buffer as needed to accommodate a larger reply from the
- * server. The default values should be small enough to avoid wasted
- * memory and excessive use of vmalloc(), yet large enough to avoid
- * reallocating the buffer in the common use case.
- */
- /*
- * Default EA size for striping attributes. It is initialized at
- * mount-time based on the default stripe width of the filesystem,
- * then it tracks the largest observed EA size advertised by
- * the MDT, up to a maximum value of OBD_MAX_DEFAULT_EA_SIZE.
- */
- u32 cl_default_mds_easize;
- /* Maximum possible EA size computed at mount-time based on
- * the number of OSTs in the filesystem. May be increased at
- * run-time if a larger observed size is advertised by the MDT.
- */
- u32 cl_max_mds_easize;
-
- enum lustre_sec_part cl_sp_me;
- enum lustre_sec_part cl_sp_to;
- struct sptlrpc_flavor cl_flvr_mgc; /* fixed flavor of mgc->mgs */
-
- /* the grant values are protected by loi_list_lock below */
- unsigned long cl_dirty_pages; /* all _dirty_ in pages */
- unsigned long cl_dirty_max_pages; /* allowed w/o rpc */
- unsigned long cl_dirty_transit; /* dirty synchronous */
- unsigned long cl_avail_grant; /* bytes of credit for ost */
- unsigned long cl_lost_grant; /* lost credits (trunc) */
-
- /* since we allocate grant by blocks, we don't know how many grant will
- * be used to add a page into cache. As a solution, we reserve maximum
- * grant before trying to dirty a page and unreserve the rest.
- * See osc_{reserve|unreserve}_grant for details.
- */
- long cl_reserved_grant;
- struct list_head cl_cache_waiters; /* waiting for cache/grant */
- unsigned long cl_next_shrink_grant; /* jiffies */
- struct list_head cl_grant_shrink_list; /* Timeout event list */
- int cl_grant_shrink_interval; /* seconds */
-
- /* A chunk is an optimal size used by osc_extent to determine
- * the extent size. A chunk is max(PAGE_SIZE, OST block size)
- */
- int cl_chunkbits;
- unsigned int cl_extent_tax; /* extent overhead, by bytes */
-
- /* keep track of objects that have lois that contain pages which
- * have been queued for async brw. this lock also protects the
- * lists of osc_client_pages that hang off of the loi
- */
- /*
- * ->cl_loi_list_lock protects consistency of
- * ->cl_loi_{ready,read,write}_list. ->ap_make_ready() and
- * ->ap_completion() call-backs are executed under this lock. As we
- * cannot guarantee that these call-backs never block on all platforms
- * (as a matter of fact they do block on Mac OS X), type of
- * ->cl_loi_list_lock is platform dependent: it's a spin-lock on Linux
- * and blocking mutex on Mac OS X. (Alternative is to make this lock
- * blocking everywhere, but we don't want to slow down fast-path of
- * our main platform.)
- *
- * NB by Jinshan: though field names are still _loi_, but actually
- * osc_object{}s are in the list.
- */
- spinlock_t cl_loi_list_lock;
- struct list_head cl_loi_ready_list;
- struct list_head cl_loi_hp_ready_list;
- struct list_head cl_loi_write_list;
- struct list_head cl_loi_read_list;
- __u32 cl_r_in_flight;
- __u32 cl_w_in_flight;
- /* just a sum of the loi/lop pending numbers to be exported by sysfs */
- atomic_t cl_pending_w_pages;
- atomic_t cl_pending_r_pages;
- __u32 cl_max_pages_per_rpc;
- __u32 cl_max_rpcs_in_flight;
- struct obd_histogram cl_read_rpc_hist;
- struct obd_histogram cl_write_rpc_hist;
- struct obd_histogram cl_read_page_hist;
- struct obd_histogram cl_write_page_hist;
- struct obd_histogram cl_read_offset_hist;
- struct obd_histogram cl_write_offset_hist;
-
- /* LRU for osc caching pages */
- struct cl_client_cache *cl_cache;
- /** member of cl_cache->ccc_lru */
- struct list_head cl_lru_osc;
- /** # of available LRU slots left in the per-OSC cache.
- * Available LRU slots are shared by all OSCs of the same file system,
- * therefore this is a pointer to cl_client_cache::ccc_lru_left.
- */
- atomic_long_t *cl_lru_left;
- /** # of busy LRU pages. A page is considered busy if it's in writeback
- * queue, or in transfer. Busy pages can't be discarded so they are not
- * in LRU cache.
- */
- atomic_long_t cl_lru_busy;
- /** # of LRU pages in the cache for this client_obd */
- atomic_long_t cl_lru_in_list;
- /** # of threads are shrinking LRU cache. To avoid contention, it's not
- * allowed to have multiple threads shrinking LRU cache.
- */
- atomic_t cl_lru_shrinkers;
- /** The time when this LRU cache was last used. */
- time64_t cl_lru_last_used;
- /** stats: how many reclaims have happened for this client_obd.
- * reclaim and shrink - shrink is async, voluntarily rebalancing;
- * reclaim is sync, initiated by IO thread when the LRU slots are
- * in shortage.
- */
- u64 cl_lru_reclaim;
- /** List of LRU pages for this client_obd */
- struct list_head cl_lru_list;
- /** Lock for LRU page list */
- spinlock_t cl_lru_list_lock;
- /** # of unstable pages in this client_obd.
- * An unstable page is a page state that WRITE RPC has finished but
- * the transaction has NOT yet committed.
- */
- atomic_long_t cl_unstable_count;
- /** Link to osc_shrinker_list */
- struct list_head cl_shrink_list;
-
- /* number of in flight destroy rpcs is limited to max_rpcs_in_flight */
- atomic_t cl_destroy_in_flight;
- wait_queue_head_t cl_destroy_waitq;
-
- struct mdc_rpc_lock *cl_rpc_lock;
-
- /* modify rpcs in flight
- * currently used for metadata only
- */
- spinlock_t cl_mod_rpcs_lock;
- u16 cl_max_mod_rpcs_in_flight;
- u16 cl_mod_rpcs_in_flight;
- u16 cl_close_rpcs_in_flight;
- wait_queue_head_t cl_mod_rpcs_waitq;
- unsigned long *cl_mod_tag_bitmap;
- struct obd_histogram cl_mod_rpcs_hist;
-
- /* mgc datastruct */
- atomic_t cl_mgc_refcount;
- struct obd_export *cl_mgc_mgsexp;
-
- /* checksumming for data sent over the network */
- unsigned int cl_checksum:1; /* 0 = disabled, 1 = enabled */
- /* supported checksum types that are worked out at connect time */
- __u32 cl_supp_cksum_types;
- /* checksum algorithm to be used */
- enum cksum_type cl_cksum_type;
-
- /* also protected by the poorly named _loi_list_lock lock above */
- struct osc_async_rc cl_ar;
-
- /* sequence manager */
- struct lu_client_seq *cl_seq;
-
- atomic_t cl_resends; /* resend count */
-
- /* ptlrpc work for writeback in ptlrpcd context */
- void *cl_writeback_work;
- void *cl_lru_work;
- /* hash tables for osc_quota_info */
- struct rhashtable cl_quota_hash[MAXQUOTAS];
-};
-
-#define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid)
-
-struct obd_id_info {
- __u32 idx;
- u64 *data;
-};
-
-struct echo_client_obd {
- struct obd_export *ec_exp; /* the local connection to osc/lov */
- spinlock_t ec_lock;
- struct list_head ec_objects;
- struct list_head ec_locks;
- __u64 ec_unique;
-};
-
-/* Generic subset of OSTs */
-struct ost_pool {
- __u32 *op_array; /* array of index of lov_obd->lov_tgts */
- unsigned int op_count; /* number of OSTs in the array */
- unsigned int op_size; /* allocated size of lp_array */
- struct rw_semaphore op_rw_sem; /* to protect ost_pool use */
-};
-
-/* allow statfs data caching for 1 second */
-#define OBD_STATFS_CACHE_SECONDS 1
-
-struct lov_tgt_desc {
- struct list_head ltd_kill;
- struct obd_uuid ltd_uuid;
- struct obd_device *ltd_obd;
- struct obd_export *ltd_exp;
- __u32 ltd_gen;
- __u32 ltd_index; /* index in lov_obd->tgts */
- unsigned long ltd_active:1,/* is this target up for requests */
- ltd_activate:1,/* should target be activated */
- ltd_reap:1; /* should this target be deleted */
-};
-
-struct lov_obd {
- struct lov_desc desc;
- struct lov_tgt_desc **lov_tgts; /* sparse array */
- struct ost_pool lov_packed; /* all OSTs in a packed array */
- struct mutex lov_lock;
- struct obd_connect_data lov_ocd;
- atomic_t lov_refcount;
- __u32 lov_death_row;/* tgts scheduled to be deleted */
- __u32 lov_tgt_size; /* size of tgts array */
- int lov_connects;
- int lov_pool_count;
- struct rhashtable lov_pools_hash_body; /* used for key access */
- struct list_head lov_pool_list; /* used for sequential access */
- struct dentry *lov_pool_debugfs_entry;
- enum lustre_sec_part lov_sp_me;
-
- /* Cached LRU and unstable data from upper layer */
- struct cl_client_cache *lov_cache;
-
- struct rw_semaphore lov_notify_lock;
-
- struct kobject *lov_tgts_kobj;
-};
-
-struct lmv_tgt_desc {
- struct obd_uuid ltd_uuid;
- struct obd_export *ltd_exp;
- u32 ltd_idx;
- struct mutex ltd_fid_mutex;
- unsigned long ltd_active:1; /* target up for requests */
-};
-
-struct lmv_obd {
- struct lu_client_fld lmv_fld;
- spinlock_t lmv_lock;
- struct lmv_desc desc;
- struct obd_uuid cluuid;
-
- struct mutex lmv_init_mutex;
- int connected;
- int max_easize;
- int max_def_easize;
-
- u32 tgts_size; /* size of tgts array */
- struct lmv_tgt_desc **tgts;
-
- struct obd_connect_data conn_data;
- struct kobject *lmv_tgts_kobj;
-};
-
-struct niobuf_local {
- __u64 lnb_file_offset;
- __u32 lnb_page_offset;
- __u32 lnb_len;
- __u32 lnb_flags;
- int lnb_rc;
- struct page *lnb_page;
- void *lnb_data;
-};
-
-#define LUSTRE_FLD_NAME "fld"
-#define LUSTRE_SEQ_NAME "seq"
-
-#define LUSTRE_MDD_NAME "mdd"
-#define LUSTRE_OSD_LDISKFS_NAME "osd-ldiskfs"
-#define LUSTRE_OSD_ZFS_NAME "osd-zfs"
-#define LUSTRE_VVP_NAME "vvp"
-#define LUSTRE_LMV_NAME "lmv"
-#define LUSTRE_SLP_NAME "slp"
-#define LUSTRE_LOD_NAME "lod"
-#define LUSTRE_OSP_NAME "osp"
-#define LUSTRE_LWP_NAME "lwp"
-
-/* obd device type names */
- /* FIXME all the references to LUSTRE_MDS_NAME should be swapped with LUSTRE_MDT_NAME */
-#define LUSTRE_MDS_NAME "mds"
-#define LUSTRE_MDT_NAME "mdt"
-#define LUSTRE_MDC_NAME "mdc"
-#define LUSTRE_OSS_NAME "ost" /* FIXME change name to oss */
-#define LUSTRE_OST_NAME "obdfilter" /* FIXME change name to ost */
-#define LUSTRE_OSC_NAME "osc"
-#define LUSTRE_LOV_NAME "lov"
-#define LUSTRE_MGS_NAME "mgs"
-#define LUSTRE_MGC_NAME "mgc"
-
-#define LUSTRE_ECHO_NAME "obdecho"
-#define LUSTRE_ECHO_CLIENT_NAME "echo_client"
-#define LUSTRE_QMT_NAME "qmt"
-
-/* Constant obd names (post-rename) */
-#define LUSTRE_MDS_OBDNAME "MDS"
-#define LUSTRE_OSS_OBDNAME "OSS"
-#define LUSTRE_MGS_OBDNAME "MGS"
-#define LUSTRE_MGC_OBDNAME "MGC"
-
-/* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */
-#define N_LOCAL_TEMP_PAGE 0x10000000
-
-/*
- * Events signalled through obd_notify() upcall-chain.
- */
-enum obd_notify_event {
- /* Device connect start */
- OBD_NOTIFY_CONNECT,
- /* Device activated */
- OBD_NOTIFY_ACTIVE,
- /* Device deactivated */
- OBD_NOTIFY_INACTIVE,
- /* Connect data for import were changed */
- OBD_NOTIFY_OCD,
- /* Sync request */
- OBD_NOTIFY_SYNC_NONBLOCK,
- OBD_NOTIFY_SYNC,
- /* Configuration event */
- OBD_NOTIFY_CONFIG,
- /* Administratively deactivate/activate event */
- OBD_NOTIFY_DEACTIVATE,
- OBD_NOTIFY_ACTIVATE
-};
-
-/*
- * Data structure used to pass obd_notify()-event to non-obd listeners (llite
- * being main example).
- */
-struct obd_notify_upcall {
- int (*onu_upcall)(struct obd_device *host, struct obd_device *watched,
- enum obd_notify_event ev, void *owner, void *data);
- /* Opaque datum supplied by upper layer listener */
- void *onu_owner;
-};
-
-struct target_recovery_data {
- svc_handler_t trd_recovery_handler;
- pid_t trd_processing_task;
- struct completion trd_starting;
- struct completion trd_finishing;
-};
-
-struct obd_llog_group {
- struct llog_ctxt *olg_ctxts[LLOG_MAX_CTXTS];
- wait_queue_head_t olg_waitq;
- spinlock_t olg_lock;
- struct mutex olg_cat_processing;
-};
-
-/* corresponds to one of the obd's */
-#define OBD_DEVICE_MAGIC 0XAB5CD6EF
-
-struct lvfs_run_ctxt {
- struct dt_device *dt;
-};
-
-struct obd_device {
- struct obd_type *obd_type;
- u32 obd_magic; /* OBD_DEVICE_MAGIC */
- int obd_minor; /* device number: lctl dl */
- struct lu_device *obd_lu_dev;
-
- /* common and UUID name of this device */
- struct obd_uuid obd_uuid;
- char obd_name[MAX_OBD_NAME];
-
- /* bitfield modification is protected by obd_dev_lock */
- unsigned long obd_attached:1, /* finished attach */
- obd_set_up:1, /* finished setup */
- obd_version_recov:1, /* obd uses version checking */
- obd_replayable:1,/* recovery is enabled; inform clients */
- obd_no_transno:1, /* no committed-transno notification */
- obd_no_recov:1, /* fail instead of retry messages */
- obd_stopping:1, /* started cleanup */
- obd_starting:1, /* started setup */
- obd_force:1, /* cleanup with > 0 obd refcount */
- obd_fail:1, /* cleanup with failover */
- obd_no_conn:1, /* deny new connections */
- obd_inactive:1, /* device active/inactive
- * (for sysfs status only!!)
- */
- obd_no_ir:1, /* no imperative recovery. */
- obd_process_conf:1; /* device is processing mgs config */
- /* use separate field as it is set in interrupt to don't mess with
- * protection of other bits using _bh lock
- */
- unsigned long obd_recovery_expired:1;
- /* uuid-export hash body */
- struct rhashtable obd_uuid_hash;
- wait_queue_head_t obd_refcount_waitq;
- struct list_head obd_exports;
- struct list_head obd_unlinked_exports;
- struct list_head obd_delayed_exports;
- atomic_t obd_refcount;
- int obd_num_exports;
- spinlock_t obd_nid_lock;
- struct ldlm_namespace *obd_namespace;
- struct ptlrpc_client obd_ldlm_client; /* XXX OST/MDS only */
- /* a spinlock is OK for what we do now, may need a semaphore later */
- spinlock_t obd_dev_lock; /* protect OBD bitfield above */
- spinlock_t obd_osfs_lock;
- struct obd_statfs obd_osfs; /* locked by obd_osfs_lock */
- __u64 obd_osfs_age;
- u64 obd_last_committed;
- struct mutex obd_dev_mutex;
- struct lvfs_run_ctxt obd_lvfs_ctxt;
- struct obd_llog_group obd_olg; /* default llog group */
- struct obd_device *obd_observer;
- struct rw_semaphore obd_observer_link_sem;
- struct obd_notify_upcall obd_upcall;
- struct obd_export *obd_self_export;
-
- union {
- struct client_obd cli;
- struct echo_client_obd echo_client;
- struct lov_obd lov;
- struct lmv_obd lmv;
- } u;
-
- /* Fields used by LProcFS */
- struct lprocfs_stats *obd_stats;
- unsigned int obd_cntr_base;
-
- struct lprocfs_stats *md_stats;
- unsigned int md_cntr_base;
-
- struct dentry *obd_debugfs_entry;
- struct dentry *obd_svc_debugfs_entry;
- struct lprocfs_stats *obd_svc_stats;
- atomic_t obd_evict_inprogress;
- wait_queue_head_t obd_evict_inprogress_waitq;
- struct list_head obd_evict_list; /* protected with pet_lock */
-
- /**
- * Ldlm pool part. Save last calculated SLV and Limit.
- */
- rwlock_t obd_pool_lock;
- u64 obd_pool_slv;
- int obd_pool_limit;
-
- int obd_conn_inprogress;
-
- /**
- * A list of outstanding class_incref()'s against this obd. For
- * debugging.
- */
- struct lu_ref obd_reference;
-
- struct kobject obd_kobj; /* sysfs object */
- struct completion obd_kobj_unregister;
-};
-
-int obd_uuid_add(struct obd_device *obd, struct obd_export *export);
-void obd_uuid_del(struct obd_device *obd, struct obd_export *export);
-
-/* get/set_info keys */
-#define KEY_ASYNC "async"
-#define KEY_CHANGELOG_CLEAR "changelog_clear"
-#define KEY_FID2PATH "fid2path"
-#define KEY_CHECKSUM "checksum"
-#define KEY_CLEAR_FS "clear_fs"
-#define KEY_CONN_DATA "conn_data"
-#define KEY_EVICT_BY_NID "evict_by_nid"
-#define KEY_FIEMAP "fiemap"
-#define KEY_FLUSH_CTX "flush_ctx"
-#define KEY_GRANT_SHRINK "grant_shrink"
-#define KEY_HSM_COPYTOOL_SEND "hsm_send"
-#define KEY_INIT_RECOV_BACKUP "init_recov_bk"
-#define KEY_INTERMDS "inter_mds"
-#define KEY_LAST_ID "last_id"
-#define KEY_LAST_FID "last_fid"
-#define KEY_MAX_EASIZE "max_easize"
-#define KEY_DEFAULT_EASIZE "default_easize"
-#define KEY_MGSSEC "mgssec"
-#define KEY_READ_ONLY "read-only"
-#define KEY_REGISTER_TARGET "register_target"
-#define KEY_SET_FS "set_fs"
-#define KEY_TGT_COUNT "tgt_count"
-/* KEY_SET_INFO in lustre_idl.h */
-#define KEY_SPTLRPC_CONF "sptlrpc_conf"
-
-#define KEY_CACHE_SET "cache_set"
-#define KEY_CACHE_LRU_SHRINK "cache_lru_shrink"
-
-struct lu_context;
-
-static inline int it_to_lock_mode(struct lookup_intent *it)
-{
- /* CREAT needs to be tested before open (both could be set) */
- if (it->it_op & IT_CREAT)
- return LCK_CW;
- else if (it->it_op & (IT_GETATTR | IT_OPEN | IT_LOOKUP |
- IT_LAYOUT))
- return LCK_CR;
- else if (it->it_op & IT_READDIR)
- return LCK_PR;
- else if (it->it_op & IT_GETXATTR)
- return LCK_PR;
- else if (it->it_op & IT_SETXATTR)
- return LCK_PW;
-
- LASSERTF(0, "Invalid it_op: %d\n", it->it_op);
- return -EINVAL;
-}
-
-enum md_op_flags {
- MF_MDC_CANCEL_FID1 = BIT(0),
- MF_MDC_CANCEL_FID2 = BIT(1),
- MF_MDC_CANCEL_FID3 = BIT(2),
- MF_MDC_CANCEL_FID4 = BIT(3),
- MF_GET_MDT_IDX = BIT(4),
-};
-
-enum md_cli_flags {
- CLI_SET_MEA = BIT(0),
- CLI_RM_ENTRY = BIT(1),
- CLI_HASH64 = BIT(2),
- CLI_API32 = BIT(3),
- CLI_MIGRATE = BIT(4),
-};
-
-/**
- * GETXATTR is not included as only a couple of fields in the reply body
- * is filled, but not FID which is needed for common intent handling in
- * mdc_finish_intent_lock()
- */
-static inline bool it_has_reply_body(const struct lookup_intent *it)
-{
- return it->it_op & (IT_OPEN | IT_UNLINK | IT_LOOKUP | IT_GETATTR);
-}
-
-struct md_op_data {
- struct lu_fid op_fid1; /* operation fid1 (usually parent) */
- struct lu_fid op_fid2; /* operation fid2 (usually child) */
- struct lu_fid op_fid3; /* 2 extra fids to find conflicting */
- struct lu_fid op_fid4; /* to the operation locks. */
- u32 op_mds; /* what mds server open will go to */
- struct lustre_handle op_handle;
- s64 op_mod_time;
- const char *op_name;
- size_t op_namelen;
- __u32 op_mode;
- struct lmv_stripe_md *op_mea1;
- struct lmv_stripe_md *op_mea2;
- __u32 op_suppgids[2];
- __u32 op_fsuid;
- __u32 op_fsgid;
- kernel_cap_t op_cap;
- void *op_data;
- size_t op_data_size;
-
- /* iattr fields and blocks. */
- struct iattr op_attr;
- unsigned int op_attr_flags;
- __u64 op_valid;
- loff_t op_attr_blocks;
-
- __u32 op_flags;
-
- /* Various operation flags. */
- enum mds_op_bias op_bias;
-
- /* Used by readdir */
- __u64 op_offset;
-
- /* Used by readdir */
- __u32 op_max_pages;
-
- /* used to transfer info between the stacks of MD client
- * see enum op_cli_flags
- */
- enum md_cli_flags op_cli_flags;
-
- /* File object data version for HSM release, on client */
- __u64 op_data_version;
- struct lustre_handle op_lease_handle;
-
- /* default stripe offset */
- __u32 op_default_stripe_offset;
-};
-
-struct md_callback {
- int (*md_blocking_ast)(struct ldlm_lock *lock,
- struct ldlm_lock_desc *desc,
- void *data, int flag);
-};
-
-struct md_enqueue_info;
-/* metadata stat-ahead */
-
-struct md_enqueue_info {
- struct md_op_data mi_data;
- struct lookup_intent mi_it;
- struct lustre_handle mi_lockh;
- struct inode *mi_dir;
- struct ldlm_enqueue_info mi_einfo;
- int (*mi_cb)(struct ptlrpc_request *req,
- struct md_enqueue_info *minfo, int rc);
- void *mi_cbdata;
-};
-
-struct obd_ops {
- struct module *owner;
- int (*iocontrol)(unsigned int cmd, struct obd_export *exp, int len,
- void *karg, void __user *uarg);
- int (*get_info)(const struct lu_env *env, struct obd_export *,
- __u32 keylen, void *key, __u32 *vallen, void *val);
- int (*set_info_async)(const struct lu_env *, struct obd_export *,
- __u32 keylen, void *key,
- __u32 vallen, void *val,
- struct ptlrpc_request_set *set);
- int (*setup)(struct obd_device *dev, struct lustre_cfg *cfg);
- int (*precleanup)(struct obd_device *dev);
- int (*cleanup)(struct obd_device *dev);
- int (*process_config)(struct obd_device *dev, u32 len, void *data);
- int (*postrecov)(struct obd_device *dev);
- int (*add_conn)(struct obd_import *imp, struct obd_uuid *uuid,
- int priority);
- int (*del_conn)(struct obd_import *imp, struct obd_uuid *uuid);
- /* connect to the target device with given connection
- * data. @ocd->ocd_connect_flags is modified to reflect flags actually
- * granted by the target, which are guaranteed to be a subset of flags
- * asked for. If @ocd == NULL, use default parameters.
- */
- int (*connect)(const struct lu_env *env,
- struct obd_export **exp, struct obd_device *src,
- struct obd_uuid *cluuid, struct obd_connect_data *ocd,
- void *localdata);
- int (*reconnect)(const struct lu_env *env,
- struct obd_export *exp, struct obd_device *src,
- struct obd_uuid *cluuid,
- struct obd_connect_data *ocd,
- void *localdata);
- int (*disconnect)(struct obd_export *exp);
-
- /* Initialize/finalize fids infrastructure. */
- int (*fid_init)(struct obd_device *obd,
- struct obd_export *exp, enum lu_cli_type type);
- int (*fid_fini)(struct obd_device *obd);
-
- /* Allocate new fid according to passed @hint. */
- int (*fid_alloc)(const struct lu_env *env, struct obd_export *exp,
- struct lu_fid *fid, struct md_op_data *op_data);
-
- /*
- * Object with @fid is getting deleted, we may want to do something
- * about this.
- */
- int (*statfs)(const struct lu_env *, struct obd_export *exp,
- struct obd_statfs *osfs, __u64 max_age, __u32 flags);
- int (*statfs_async)(struct obd_export *exp, struct obd_info *oinfo,
- __u64 max_age, struct ptlrpc_request_set *set);
- int (*create)(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa);
- int (*destroy)(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa);
- int (*setattr)(const struct lu_env *, struct obd_export *exp,
- struct obdo *oa);
- int (*getattr)(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa);
- int (*preprw)(const struct lu_env *env, int cmd,
- struct obd_export *exp, struct obdo *oa, int objcount,
- struct obd_ioobj *obj, struct niobuf_remote *remote,
- int *nr_pages, struct niobuf_local *local);
- int (*commitrw)(const struct lu_env *env, int cmd,
- struct obd_export *exp, struct obdo *oa,
- int objcount, struct obd_ioobj *obj,
- struct niobuf_remote *remote, int pages,
- struct niobuf_local *local, int rc);
- int (*init_export)(struct obd_export *exp);
- int (*destroy_export)(struct obd_export *exp);
-
- /* metadata-only methods */
- int (*import_event)(struct obd_device *, struct obd_import *,
- enum obd_import_event);
-
- int (*notify)(struct obd_device *obd, struct obd_device *watched,
- enum obd_notify_event ev, void *data);
-
- int (*health_check)(const struct lu_env *env, struct obd_device *);
- struct obd_uuid *(*get_uuid)(struct obd_export *exp);
-
- /* quota methods */
- int (*quotactl)(struct obd_device *, struct obd_export *,
- struct obd_quotactl *);
-
- /* pools methods */
- int (*pool_new)(struct obd_device *obd, char *poolname);
- int (*pool_del)(struct obd_device *obd, char *poolname);
- int (*pool_add)(struct obd_device *obd, char *poolname,
- char *ostname);
- int (*pool_rem)(struct obd_device *obd, char *poolname,
- char *ostname);
- void (*getref)(struct obd_device *obd);
- void (*putref)(struct obd_device *obd);
- /*
- * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line
- * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c.
- * Also, add a wrapper function in include/linux/obd_class.h.
- */
-};
-
-/* lmv structures */
-struct lustre_md {
- struct mdt_body *body;
- struct lu_buf layout;
- struct lmv_stripe_md *lmv;
-#ifdef CONFIG_FS_POSIX_ACL
- struct posix_acl *posix_acl;
-#endif
- struct mdt_remote_perm *remote_perm;
-};
-
-struct md_open_data {
- struct obd_client_handle *mod_och;
- struct ptlrpc_request *mod_open_req;
- struct ptlrpc_request *mod_close_req;
- atomic_t mod_refcount;
- bool mod_is_create;
-};
-
-struct obd_client_handle {
- struct lustre_handle och_fh;
- struct lu_fid och_fid;
- struct md_open_data *och_mod;
- struct lustre_handle och_lease_handle; /* open lock for lease */
- __u32 och_magic;
- fmode_t och_flags;
-};
-
-#define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed
-
-struct lookup_intent;
-struct cl_attr;
-
-struct md_ops {
- int (*getstatus)(struct obd_export *, struct lu_fid *);
- int (*null_inode)(struct obd_export *, const struct lu_fid *);
- int (*close)(struct obd_export *, struct md_op_data *,
- struct md_open_data *, struct ptlrpc_request **);
- int (*create)(struct obd_export *, struct md_op_data *,
- const void *, size_t, umode_t, uid_t, gid_t,
- kernel_cap_t, __u64, struct ptlrpc_request **);
- int (*enqueue)(struct obd_export *, struct ldlm_enqueue_info *,
- const union ldlm_policy_data *, struct md_op_data *,
- struct lustre_handle *, __u64);
- int (*getattr)(struct obd_export *, struct md_op_data *,
- struct ptlrpc_request **);
- int (*getattr_name)(struct obd_export *, struct md_op_data *,
- struct ptlrpc_request **);
- int (*intent_lock)(struct obd_export *, struct md_op_data *,
- struct lookup_intent *,
- struct ptlrpc_request **,
- ldlm_blocking_callback, __u64);
- int (*link)(struct obd_export *, struct md_op_data *,
- struct ptlrpc_request **);
- int (*rename)(struct obd_export *, struct md_op_data *,
- const char *, size_t, const char *, size_t,
- struct ptlrpc_request **);
- int (*setattr)(struct obd_export *, struct md_op_data *, void *,
- size_t, struct ptlrpc_request **);
- int (*sync)(struct obd_export *, const struct lu_fid *,
- struct ptlrpc_request **);
- int (*read_page)(struct obd_export *, struct md_op_data *,
- struct md_callback *cb_op, __u64 hash_offset,
- struct page **ppage);
- int (*unlink)(struct obd_export *, struct md_op_data *,
- struct ptlrpc_request **);
-
- int (*setxattr)(struct obd_export *, const struct lu_fid *,
- u64, const char *, const void *, size_t, unsigned int,
- u32, struct ptlrpc_request **);
-
- int (*getxattr)(struct obd_export *, const struct lu_fid *,
- u64, const char *, size_t, struct ptlrpc_request **);
-
- int (*init_ea_size)(struct obd_export *, u32, u32);
-
- int (*get_lustre_md)(struct obd_export *, struct ptlrpc_request *,
- struct obd_export *, struct obd_export *,
- struct lustre_md *);
-
- int (*free_lustre_md)(struct obd_export *, struct lustre_md *);
-
- int (*merge_attr)(struct obd_export *,
- const struct lmv_stripe_md *lsm,
- struct cl_attr *attr, ldlm_blocking_callback);
-
- int (*set_open_replay_data)(struct obd_export *,
- struct obd_client_handle *,
- struct lookup_intent *);
- int (*clear_open_replay_data)(struct obd_export *,
- struct obd_client_handle *);
- int (*set_lock_data)(struct obd_export *, const struct lustre_handle *,
- void *, __u64 *);
-
- enum ldlm_mode (*lock_match)(struct obd_export *, __u64,
- const struct lu_fid *, enum ldlm_type,
- union ldlm_policy_data *, enum ldlm_mode,
- struct lustre_handle *);
-
- int (*cancel_unused)(struct obd_export *, const struct lu_fid *,
- union ldlm_policy_data *, enum ldlm_mode,
- enum ldlm_cancel_flags flags, void *opaque);
-
- int (*get_fid_from_lsm)(struct obd_export *,
- const struct lmv_stripe_md *,
- const char *name, int namelen,
- struct lu_fid *fid);
-
- int (*intent_getattr_async)(struct obd_export *,
- struct md_enqueue_info *);
-
- int (*revalidate_lock)(struct obd_export *, struct lookup_intent *,
- struct lu_fid *, __u64 *bits);
-
- int (*unpackmd)(struct obd_export *exp, struct lmv_stripe_md **plsm,
- const union lmv_mds_md *lmv, size_t lmv_size);
- /*
- * NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to
- * lprocfs_alloc_md_stats() in obdclass/lprocfs_status.c. Also, add a
- * wrapper function in include/linux/obd_class.h.
- */
-};
-
-static inline struct md_open_data *obd_mod_alloc(void)
-{
- struct md_open_data *mod;
-
- mod = kzalloc(sizeof(*mod), GFP_NOFS);
- if (!mod)
- return NULL;
- atomic_set(&mod->mod_refcount, 1);
- return mod;
-}
-
-#define obd_mod_get(mod) atomic_inc(&(mod)->mod_refcount)
-#define obd_mod_put(mod) \
-({ \
- if (atomic_dec_and_test(&(mod)->mod_refcount)) { \
- if ((mod)->mod_open_req) \
- ptlrpc_req_finished((mod)->mod_open_req); \
- kfree(mod); \
- } \
-})
-
-void obdo_from_inode(struct obdo *dst, struct inode *src, u32 valid);
-void obdo_set_parent_fid(struct obdo *dst, const struct lu_fid *parent);
-
-/* return 1 if client should be resend request */
-static inline int client_should_resend(int resend, struct client_obd *cli)
-{
- return atomic_read(&cli->cl_resends) ?
- atomic_read(&cli->cl_resends) > resend : 1;
-}
-
-/**
- * Return device name for this device
- *
- * XXX: lu_device is declared before obd_device, while a pointer pointing
- * back to obd_device in lu_device, so this helper function defines here
- * instead of in lu_object.h
- */
-static inline const char *lu_dev_name(const struct lu_device *lu_dev)
-{
- return lu_dev->ld_obd->obd_name;
-}
-
-static inline bool filename_is_volatile(const char *name, size_t namelen,
- int *idx)
-{
- const char *start;
- char *end;
-
- if (strncmp(name, LUSTRE_VOLATILE_HDR, LUSTRE_VOLATILE_HDR_LEN) != 0)
- return false;
-
- /* caller does not care of idx */
- if (!idx)
- return true;
-
- /* volatile file, the MDT can be set from name */
- /* name format is LUSTRE_VOLATILE_HDR:[idx]: */
- /* if no MDT is specified, use std way */
- if (namelen < LUSTRE_VOLATILE_HDR_LEN + 2)
- goto bad_format;
- /* test for no MDT idx case */
- if ((*(name + LUSTRE_VOLATILE_HDR_LEN) == ':') &&
- (*(name + LUSTRE_VOLATILE_HDR_LEN + 1) == ':')) {
- *idx = -1;
- return true;
- }
- /* we have an idx, read it */
- start = name + LUSTRE_VOLATILE_HDR_LEN + 1;
- *idx = simple_strtoul(start, &end, 0);
- /* error cases:
- * no digit, no trailing :, negative value
- */
- if (((*idx == 0) && (end == start)) ||
- (*end != ':') || (*idx < 0))
- goto bad_format;
-
- return true;
-bad_format:
- /* bad format of mdt idx, we cannot return an error
- * to caller so we use hash algo
- */
- CERROR("Bad volatile file name format: %s\n",
- name + LUSTRE_VOLATILE_HDR_LEN);
- return false;
-}
-
-static inline int cli_brw_size(struct obd_device *obd)
-{
- return obd->u.cli.cl_max_pages_per_rpc << PAGE_SHIFT;
-}
-
-/*
- * when RPC size or the max RPCs in flight is increased, the max dirty pages
- * of the client should be increased accordingly to avoid sending fragmented
- * RPCs over the network when the client runs out of the maximum dirty space
- * when so many RPCs are being generated.
- */
-static inline void client_adjust_max_dirty(struct client_obd *cli)
-{
- /* initializing */
- if (cli->cl_dirty_max_pages <= 0)
- cli->cl_dirty_max_pages =
- (OSC_MAX_DIRTY_DEFAULT * 1024 * 1024) >> PAGE_SHIFT;
- else {
- unsigned long dirty_max = cli->cl_max_rpcs_in_flight *
- cli->cl_max_pages_per_rpc;
-
- if (dirty_max > cli->cl_dirty_max_pages)
- cli->cl_dirty_max_pages = dirty_max;
- }
-
- if (cli->cl_dirty_max_pages > totalram_pages / 8)
- cli->cl_dirty_max_pages = totalram_pages / 8;
-}
-
-#endif /* __OBD_H */
diff --git a/drivers/staging/lustre/lustre/include/obd_cksum.h b/drivers/staging/lustre/lustre/include/obd_cksum.h
deleted file mode 100644
index e5f7bb20415d..000000000000
--- a/drivers/staging/lustre/lustre/include/obd_cksum.h
+++ /dev/null
@@ -1,153 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef __OBD_CKSUM
-#define __OBD_CKSUM
-#include <linux/libcfs/libcfs.h>
-#include <linux/libcfs/libcfs_crypto.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-
-static inline unsigned char cksum_obd2cfs(enum cksum_type cksum_type)
-{
- switch (cksum_type) {
- case OBD_CKSUM_CRC32:
- return CFS_HASH_ALG_CRC32;
- case OBD_CKSUM_ADLER:
- return CFS_HASH_ALG_ADLER32;
- case OBD_CKSUM_CRC32C:
- return CFS_HASH_ALG_CRC32C;
- default:
- CERROR("Unknown checksum type (%x)!!!\n", cksum_type);
- LBUG();
- }
- return 0;
-}
-
-/* The OBD_FL_CKSUM_* flags is packed into 5 bits of o_flags, since there can
- * only be a single checksum type per RPC.
- *
- * The OBD_CHECKSUM_* type bits passed in ocd_cksum_types are a 32-bit bitmask
- * since they need to represent the full range of checksum algorithms that
- * both the client and server can understand.
- *
- * In case of an unsupported types/flags we fall back to ADLER
- * because that is supported by all clients since 1.8
- *
- * In case multiple algorithms are supported the best one is used.
- */
-static inline u32 cksum_type_pack(enum cksum_type cksum_type)
-{
- unsigned int performance = 0, tmp;
- u32 flag = OBD_FL_CKSUM_ADLER;
-
- if (cksum_type & OBD_CKSUM_CRC32) {
- tmp = cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32));
- if (tmp > performance) {
- performance = tmp;
- flag = OBD_FL_CKSUM_CRC32;
- }
- }
- if (cksum_type & OBD_CKSUM_CRC32C) {
- tmp = cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32C));
- if (tmp > performance) {
- performance = tmp;
- flag = OBD_FL_CKSUM_CRC32C;
- }
- }
- if (cksum_type & OBD_CKSUM_ADLER) {
- tmp = cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_ADLER));
- if (tmp > performance) {
- performance = tmp;
- flag = OBD_FL_CKSUM_ADLER;
- }
- }
- if (unlikely(cksum_type && !(cksum_type & (OBD_CKSUM_CRC32C |
- OBD_CKSUM_CRC32 |
- OBD_CKSUM_ADLER))))
- CWARN("unknown cksum type %x\n", cksum_type);
-
- return flag;
-}
-
-static inline enum cksum_type cksum_type_unpack(u32 o_flags)
-{
- switch (o_flags & OBD_FL_CKSUM_ALL) {
- case OBD_FL_CKSUM_CRC32C:
- return OBD_CKSUM_CRC32C;
- case OBD_FL_CKSUM_CRC32:
- return OBD_CKSUM_CRC32;
- default:
- break;
- }
-
- return OBD_CKSUM_ADLER;
-}
-
-/* Return a bitmask of the checksum types supported on this system.
- * 1.8 supported ADLER it is base and not depend on hw
- * Client uses all available local algos
- */
-static inline enum cksum_type cksum_types_supported_client(void)
-{
- enum cksum_type ret = OBD_CKSUM_ADLER;
-
- CDEBUG(D_INFO, "Crypto hash speed: crc %d, crc32c %d, adler %d\n",
- cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32)),
- cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32C)),
- cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_ADLER)));
-
- if (cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32C)) > 0)
- ret |= OBD_CKSUM_CRC32C;
- if (cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32)) > 0)
- ret |= OBD_CKSUM_CRC32;
-
- return ret;
-}
-
-/* Select the best checksum algorithm among those supplied in the cksum_types
- * input.
- *
- * Currently, calling cksum_type_pack() with a mask will return the fastest
- * checksum type due to its benchmarking at libcfs module load.
- * Caution is advised, however, since what is fastest on a single client may
- * not be the fastest or most efficient algorithm on the server.
- */
-static inline enum cksum_type cksum_type_select(enum cksum_type cksum_types)
-{
- return cksum_type_unpack(cksum_type_pack(cksum_types));
-}
-
-/* Checksum algorithm names. Must be defined in the same order as the
- * OBD_CKSUM_* flags.
- */
-#define DECLARE_CKSUM_NAME char *cksum_name[] = {"crc32", "adler", "crc32c"}
-
-#endif /* __OBD_H */
diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h
deleted file mode 100644
index fc9c7720fee0..000000000000
--- a/drivers/staging/lustre/lustre/include/obd_class.h
+++ /dev/null
@@ -1,1603 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-#ifndef __CLASS_OBD_H
-#define __CLASS_OBD_H
-
-#include <obd_support.h>
-#include <lustre_import.h>
-#include <lustre_net.h>
-#include <obd.h>
-#include <lustre_lib.h>
-#include <lprocfs_status.h>
-
-/* requests should be send without delay and resends for avoid deadlocks */
-#define OBD_STATFS_NODELAY 0x0001
-/* the statfs callback should not update obd_osfs_age */
-#define OBD_STATFS_FROM_CACHE 0x0002
-/* the statfs is only for retrieving information from MDT0 */
-#define OBD_STATFS_FOR_MDT0 0x0004
-
-/* OBD Device Declarations */
-extern struct obd_device *obd_devs[MAX_OBD_DEVICES];
-extern rwlock_t obd_dev_lock;
-
-/* OBD Operations Declarations */
-struct obd_device *class_exp2obd(struct obd_export *exp);
-int class_handle_ioctl(unsigned int cmd, unsigned long arg);
-int lustre_get_jobid(char *jobid);
-
-struct lu_device_type;
-
-/* genops.c */
-extern struct list_head obd_types;
-struct obd_export *class_conn2export(struct lustre_handle *conn);
-int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops,
- const char *name, struct lu_device_type *ldt);
-int class_unregister_type(const char *name);
-
-struct obd_device *class_newdev(const char *type_name, const char *name);
-void class_release_dev(struct obd_device *obd);
-
-int class_name2dev(const char *name);
-struct obd_device *class_name2obd(const char *name);
-int class_uuid2dev(struct obd_uuid *uuid);
-struct obd_device *class_find_client_obd(struct obd_uuid *tgt_uuid,
- const char *typ_name,
- struct obd_uuid *grp_uuid);
-struct obd_device *class_devices_in_group(struct obd_uuid *grp_uuid,
- int *next);
-struct obd_device *class_num2obd(int num);
-
-int class_notify_sptlrpc_conf(const char *fsname, int namelen);
-
-int obd_connect_flags2str(char *page, int count, __u64 flags, char *sep);
-
-int obd_zombie_impexp_init(void);
-void obd_zombie_impexp_stop(void);
-void obd_zombie_barrier(void);
-
-int obd_get_request_slot(struct client_obd *cli);
-void obd_put_request_slot(struct client_obd *cli);
-__u32 obd_get_max_rpcs_in_flight(struct client_obd *cli);
-int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max);
-int obd_set_max_mod_rpcs_in_flight(struct client_obd *cli, u16 max);
-int obd_mod_rpc_stats_seq_show(struct client_obd *cli, struct seq_file *seq);
-
-u16 obd_get_mod_rpc_slot(struct client_obd *cli, u32 opc,
- struct lookup_intent *it);
-void obd_put_mod_rpc_slot(struct client_obd *cli, u32 opc,
- struct lookup_intent *it, u16 tag);
-
-struct llog_handle;
-struct llog_rec_hdr;
-typedef int (*llog_cb_t)(const struct lu_env *, struct llog_handle *,
- struct llog_rec_hdr *, void *);
-
-/* obd_config.c */
-char *lustre_cfg_string(struct lustre_cfg *lcfg, u32 index);
-int class_process_config(struct lustre_cfg *lcfg);
-int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
- struct lustre_cfg *lcfg, void *data);
-
-/* For interoperability */
-struct cfg_interop_param {
- char *old_param;
- char *new_param;
-};
-
-int class_find_param(char *buf, char *key, char **valp);
-struct cfg_interop_param *class_find_old_param(const char *param,
- struct cfg_interop_param *ptr);
-int class_get_next_param(char **params, char *copy);
-int class_parse_nid(char *buf, lnet_nid_t *nid, char **endh);
-int class_parse_nid_quiet(char *buf, lnet_nid_t *nid, char **endh);
-int class_parse_net(char *buf, u32 *net, char **endh);
-int class_match_nid(char *buf, char *key, lnet_nid_t nid);
-int class_match_net(char *buf, char *key, u32 net);
-
-struct obd_device *class_incref(struct obd_device *obd,
- const char *scope, const void *source);
-void class_decref(struct obd_device *obd,
- const char *scope, const void *source);
-int class_config_llog_handler(const struct lu_env *env,
- struct llog_handle *handle,
- struct llog_rec_hdr *rec, void *data);
-int class_add_uuid(const char *uuid, __u64 nid);
-
-/* obdecho */
-void lprocfs_echo_init_vars(struct lprocfs_static_vars *lvars);
-
-#define CFG_F_START 0x01 /* Set when we start updating from a log */
-#define CFG_F_MARKER 0x02 /* We are within a maker */
-#define CFG_F_SKIP 0x04 /* We should ignore this cfg command */
-#define CFG_F_COMPAT146 0x08 /* Allow old-style logs */
-#define CFG_F_EXCLUDE 0x10 /* OST exclusion list */
-
-/* Passed as data param to class_config_parse_llog */
-struct config_llog_instance {
- char *cfg_obdname;
- void *cfg_instance;
- struct super_block *cfg_sb;
- struct obd_uuid cfg_uuid;
- llog_cb_t cfg_callback;
- int cfg_last_idx; /* for partial llog processing */
- int cfg_flags;
-};
-
-int class_config_parse_llog(const struct lu_env *env, struct llog_ctxt *ctxt,
- char *name, struct config_llog_instance *cfg);
-enum {
- CONFIG_T_CONFIG = 0,
- CONFIG_T_SPTLRPC = 1,
- CONFIG_T_RECOVER = 2,
- CONFIG_T_PARAMS = 3,
- CONFIG_T_MAX = 4
-};
-
-#define PARAMS_FILENAME "params"
-#define LCTL_UPCALL "lctl"
-
-/* list of active configuration logs */
-struct config_llog_data {
- struct ldlm_res_id cld_resid;
- struct config_llog_instance cld_cfg;
- struct list_head cld_list_chain;
- atomic_t cld_refcount;
- struct config_llog_data *cld_sptlrpc;/* depended sptlrpc log */
- struct config_llog_data *cld_params; /* common parameters log */
- struct config_llog_data *cld_recover;/* imperative recover log */
- struct obd_export *cld_mgcexp;
- struct mutex cld_lock;
- int cld_type;
- unsigned int cld_stopping:1, /*
- * we were told to stop
- * watching
- */
- cld_lostlock:1; /* lock not requeued */
- char cld_logname[0];
-};
-
-struct lustre_profile {
- struct list_head lp_list;
- char *lp_profile;
- char *lp_dt;
- char *lp_md;
- int lp_refs;
- bool lp_list_deleted;
-};
-
-struct lustre_profile *class_get_profile(const char *prof);
-void class_del_profile(const char *prof);
-void class_put_profile(struct lustre_profile *lprof);
-void class_del_profiles(void);
-
-#if LUSTRE_TRACKS_LOCK_EXP_REFS
-
-void __class_export_add_lock_ref(struct obd_export *exp,
- struct ldlm_lock *lock);
-void __class_export_del_lock_ref(struct obd_export *exp,
- struct ldlm_lock *lock);
-extern void (*class_export_dump_hook)(struct obd_export *exp);
-
-#else
-
-#define __class_export_add_lock_ref(exp, lock) do {} while (0)
-#define __class_export_del_lock_ref(exp, lock) do {} while (0)
-
-#endif
-
-/* genops.c */
-struct obd_export *class_export_get(struct obd_export *exp);
-void class_export_put(struct obd_export *exp);
-struct obd_export *class_new_export(struct obd_device *obddev,
- struct obd_uuid *cluuid);
-void class_unlink_export(struct obd_export *exp);
-
-struct obd_import *class_import_get(struct obd_import *imp);
-void class_import_put(struct obd_import *imp);
-struct obd_import *class_new_import(struct obd_device *obd);
-void class_destroy_import(struct obd_import *exp);
-
-void class_put_type(struct obd_type *type);
-int class_connect(struct lustre_handle *conn, struct obd_device *obd,
- struct obd_uuid *cluuid);
-int class_disconnect(struct obd_export *exp);
-void class_fail_export(struct obd_export *exp);
-int class_manual_cleanup(struct obd_device *obd);
-
-static inline void class_export_rpc_inc(struct obd_export *exp)
-{
- atomic_inc(&(exp)->exp_rpc_count);
- CDEBUG(D_INFO, "RPC GETting export %p : new rpc_count %d\n",
- (exp), atomic_read(&(exp)->exp_rpc_count));
-}
-
-static inline void class_export_rpc_dec(struct obd_export *exp)
-{
- LASSERT_ATOMIC_POS(&exp->exp_rpc_count);
- atomic_dec(&(exp)->exp_rpc_count);
- CDEBUG(D_INFO, "RPC PUTting export %p : new rpc_count %d\n",
- (exp), atomic_read(&(exp)->exp_rpc_count));
-}
-
-static inline struct obd_export *class_export_lock_get(struct obd_export *exp,
- struct ldlm_lock *lock)
-{
- atomic_inc(&(exp)->exp_locks_count);
- __class_export_add_lock_ref(exp, lock);
- CDEBUG(D_INFO, "lock GETting export %p : new locks_count %d\n",
- (exp), atomic_read(&(exp)->exp_locks_count));
- return class_export_get(exp);
-}
-
-static inline void class_export_lock_put(struct obd_export *exp,
- struct ldlm_lock *lock)
-{
- LASSERT_ATOMIC_POS(&exp->exp_locks_count);
- atomic_dec(&(exp)->exp_locks_count);
- __class_export_del_lock_ref(exp, lock);
- CDEBUG(D_INFO, "lock PUTting export %p : new locks_count %d\n",
- (exp), atomic_read(&(exp)->exp_locks_count));
- class_export_put(exp);
-}
-
-static inline enum obd_option exp_flags_from_obd(struct obd_device *obd)
-{
- return ((obd->obd_fail ? OBD_OPT_FAILOVER : 0) |
- (obd->obd_force ? OBD_OPT_FORCE : 0) |
- 0);
-}
-
-static inline int lprocfs_climp_check(struct obd_device *obd)
-{
- down_read(&(obd)->u.cli.cl_sem);
- if (!(obd)->u.cli.cl_import) {
- up_read(&(obd)->u.cli.cl_sem);
- return -ENODEV;
- }
- return 0;
-}
-
-struct inode;
-struct lu_attr;
-struct obdo;
-
-void obdo_to_ioobj(const struct obdo *oa, struct obd_ioobj *ioobj);
-
-#define OBT(dev) ((dev)->obd_type)
-#define OBP(dev, op) ((dev)->obd_type->typ_dt_ops->op)
-#define MDP(dev, op) ((dev)->obd_type->typ_md_ops->op)
-#define CTXTP(ctxt, op) ((ctxt)->loc_logops->lop_##op)
-
-/*
- * Ensure obd_setup: used for cleanup which must be called
- * while obd is stopping
- */
-static inline int obd_check_dev(struct obd_device *obd)
-{
- if (!obd) {
- CERROR("NULL device\n");
- return -ENODEV;
- }
- return 0;
-}
-
-/* ensure obd_setup and !obd_stopping */
-static inline int obd_check_dev_active(struct obd_device *obd)
-{
- int rc;
-
- rc = obd_check_dev(obd);
- if (rc)
- return rc;
- if (!obd->obd_set_up || obd->obd_stopping) {
- CERROR("Device %d not setup\n", obd->obd_minor);
- return -ENODEV;
- }
- return rc;
-}
-
-#define OBD_COUNTER_OFFSET(op) \
- ((offsetof(struct obd_ops, op) - \
- offsetof(struct obd_ops, iocontrol)) \
- / sizeof(((struct obd_ops *)(0))->iocontrol))
-
-#define OBD_COUNTER_INCREMENT(obdx, op) \
-do { \
- if ((obdx)->obd_stats) { \
- unsigned int coffset; \
- coffset = (unsigned int)((obdx)->obd_cntr_base) + \
- OBD_COUNTER_OFFSET(op); \
- LASSERT(coffset < (obdx)->obd_stats->ls_num); \
- lprocfs_counter_incr((obdx)->obd_stats, coffset); \
- } \
-} while (0)
-
-#define EXP_COUNTER_INCREMENT(export, op) \
-do { \
- if ((export)->exp_obd->obd_stats) { \
- unsigned int coffset; \
- coffset = (unsigned int)((export)->exp_obd->obd_cntr_base) + \
- OBD_COUNTER_OFFSET(op); \
- LASSERT(coffset < (export)->exp_obd->obd_stats->ls_num); \
- lprocfs_counter_incr((export)->exp_obd->obd_stats, coffset); \
- } \
-} while (0)
-
-#define MD_COUNTER_OFFSET(op) \
- ((offsetof(struct md_ops, op) - \
- offsetof(struct md_ops, getstatus)) \
- / sizeof(((struct md_ops *)(0))->getstatus))
-
-#define MD_COUNTER_INCREMENT(obdx, op) \
-do { \
- if ((obd)->md_stats) { \
- unsigned int coffset; \
- coffset = (unsigned int)((obdx)->md_cntr_base) + \
- MD_COUNTER_OFFSET(op); \
- LASSERT(coffset < (obdx)->md_stats->ls_num); \
- lprocfs_counter_incr((obdx)->md_stats, coffset); \
- } \
-} while (0)
-
-#define EXP_MD_COUNTER_INCREMENT(export, op) \
-do { \
- if ((export)->exp_obd->obd_stats) { \
- unsigned int coffset; \
- coffset = (unsigned int)((export)->exp_obd->md_cntr_base) + \
- MD_COUNTER_OFFSET(op); \
- LASSERT(coffset < (export)->exp_obd->md_stats->ls_num); \
- lprocfs_counter_incr((export)->exp_obd->md_stats, coffset); \
- if ((export)->exp_md_stats) \
- lprocfs_counter_incr( \
- (export)->exp_md_stats, coffset); \
- } \
-} while (0)
-
-#define EXP_CHECK_MD_OP(exp, op) \
-do { \
- if (!(exp)) { \
- CERROR("obd_" #op ": NULL export\n"); \
- return -ENODEV; \
- } \
- if (!(exp)->exp_obd || !OBT((exp)->exp_obd)) { \
- CERROR("obd_" #op ": cleaned up obd\n"); \
- return -EOPNOTSUPP; \
- } \
- if (!OBT((exp)->exp_obd) || !MDP((exp)->exp_obd, op)) { \
- CERROR("obd_" #op ": dev %s/%d no operation\n", \
- (exp)->exp_obd->obd_name, \
- (exp)->exp_obd->obd_minor); \
- return -EOPNOTSUPP; \
- } \
-} while (0)
-
-#define OBD_CHECK_DT_OP(obd, op, err) \
-do { \
- if (!OBT(obd) || !OBP((obd), op)) { \
- if (err) \
- CERROR("obd_" #op ": dev %d no operation\n", \
- obd->obd_minor); \
- return err; \
- } \
-} while (0)
-
-#define EXP_CHECK_DT_OP(exp, op) \
-do { \
- if (!(exp)) { \
- CERROR("obd_" #op ": NULL export\n"); \
- return -ENODEV; \
- } \
- if (!(exp)->exp_obd || !OBT((exp)->exp_obd)) { \
- CERROR("obd_" #op ": cleaned up obd\n"); \
- return -EOPNOTSUPP; \
- } \
- if (!OBT((exp)->exp_obd) || !OBP((exp)->exp_obd, op)) { \
- CERROR("obd_" #op ": dev %d no operation\n", \
- (exp)->exp_obd->obd_minor); \
- return -EOPNOTSUPP; \
- } \
-} while (0)
-
-#define CTXT_CHECK_OP(ctxt, op, err) \
-do { \
- if (!OBT(ctxt->loc_obd) || !CTXTP((ctxt), op)) { \
- if (err) \
- CERROR("lop_" #op ": dev %d no operation\n", \
- ctxt->loc_obd->obd_minor); \
- return err; \
- } \
-} while (0)
-
-static inline int class_devno_max(void)
-{
- return MAX_OBD_DEVICES;
-}
-
-static inline int obd_get_info(const struct lu_env *env,
- struct obd_export *exp, __u32 keylen,
- void *key, __u32 *vallen, void *val)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, get_info);
- EXP_COUNTER_INCREMENT(exp, get_info);
-
- rc = OBP(exp->exp_obd, get_info)(env, exp, keylen, key, vallen, val);
- return rc;
-}
-
-static inline int obd_set_info_async(const struct lu_env *env,
- struct obd_export *exp, u32 keylen,
- void *key, u32 vallen, void *val,
- struct ptlrpc_request_set *set)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, set_info_async);
- EXP_COUNTER_INCREMENT(exp, set_info_async);
-
- rc = OBP(exp->exp_obd, set_info_async)(env, exp, keylen, key, vallen,
- val, set);
- return rc;
-}
-
-/*
- * obd-lu integration.
- *
- * Functionality is being moved into new lu_device-based layering, but some
- * pieces of configuration process are still based on obd devices.
- *
- * Specifically, lu_device_type_operations::ldto_device_alloc() methods fully
- * subsume ->o_setup() methods of obd devices they replace. The same for
- * lu_device_operations::ldo_process_config() and ->o_process_config(). As a
- * result, obd_setup() and obd_process_config() branch and call one XOR
- * another.
- *
- * Yet neither lu_device_type_operations::ldto_device_fini() nor
- * lu_device_type_operations::ldto_device_free() fully implement the
- * functionality of ->o_precleanup() and ->o_cleanup() they override. Hence,
- * obd_precleanup() and obd_cleanup() call both lu_device and obd operations.
- */
-
-static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg)
-{
- int rc;
- struct lu_device_type *ldt;
- struct lu_device *d;
-
- ldt = obd->obd_type->typ_lu;
- if (ldt) {
- struct lu_context session_ctx;
- struct lu_env env;
-
- lu_context_init(&session_ctx, LCT_SESSION | LCT_SERVER_SESSION);
- session_ctx.lc_thread = NULL;
- lu_context_enter(&session_ctx);
-
- rc = lu_env_init(&env, ldt->ldt_ctx_tags);
- if (rc == 0) {
- env.le_ses = &session_ctx;
- d = ldt->ldt_ops->ldto_device_alloc(&env, ldt, cfg);
- lu_env_fini(&env);
- if (!IS_ERR(d)) {
- obd->obd_lu_dev = d;
- d->ld_obd = obd;
- rc = 0;
- } else {
- rc = PTR_ERR(d);
- }
- }
- lu_context_exit(&session_ctx);
- lu_context_fini(&session_ctx);
-
- } else {
- OBD_CHECK_DT_OP(obd, setup, -EOPNOTSUPP);
- OBD_COUNTER_INCREMENT(obd, setup);
- rc = OBP(obd, setup)(obd, cfg);
- }
- return rc;
-}
-
-static inline int obd_precleanup(struct obd_device *obd)
-{
- int rc;
- struct lu_device_type *ldt;
- struct lu_device *d;
-
- rc = obd_check_dev(obd);
- if (rc)
- return rc;
- ldt = obd->obd_type->typ_lu;
- d = obd->obd_lu_dev;
- if (ldt && d) {
- struct lu_env env;
-
- rc = lu_env_init(&env, ldt->ldt_ctx_tags);
- if (!rc) {
- ldt->ldt_ops->ldto_device_fini(&env, d);
- lu_env_fini(&env);
- }
- }
- OBD_CHECK_DT_OP(obd, precleanup, 0);
- OBD_COUNTER_INCREMENT(obd, precleanup);
-
- rc = OBP(obd, precleanup)(obd);
- return rc;
-}
-
-static inline int obd_cleanup(struct obd_device *obd)
-{
- int rc;
- struct lu_device_type *ldt;
- struct lu_device *d;
-
- rc = obd_check_dev(obd);
- if (rc)
- return rc;
-
- ldt = obd->obd_type->typ_lu;
- d = obd->obd_lu_dev;
- if (ldt && d) {
- struct lu_env env;
-
- rc = lu_env_init(&env, ldt->ldt_ctx_tags);
- if (rc == 0) {
- ldt->ldt_ops->ldto_device_free(&env, d);
- lu_env_fini(&env);
- obd->obd_lu_dev = NULL;
- }
- }
- OBD_CHECK_DT_OP(obd, cleanup, 0);
- OBD_COUNTER_INCREMENT(obd, cleanup);
-
- rc = OBP(obd, cleanup)(obd);
- return rc;
-}
-
-static inline void obd_cleanup_client_import(struct obd_device *obd)
-{
- /*
- * If we set up but never connected, the
- * client import will not have been cleaned.
- */
- down_write(&obd->u.cli.cl_sem);
- if (obd->u.cli.cl_import) {
- struct obd_import *imp;
-
- imp = obd->u.cli.cl_import;
- CDEBUG(D_CONFIG, "%s: client import never connected\n",
- obd->obd_name);
- ptlrpc_invalidate_import(imp);
- client_destroy_import(imp);
- obd->u.cli.cl_import = NULL;
- }
- up_write(&obd->u.cli.cl_sem);
-}
-
-static inline int
-obd_process_config(struct obd_device *obd, int datalen, void *data)
-{
- int rc;
- struct lu_device_type *ldt;
- struct lu_device *d;
-
- rc = obd_check_dev(obd);
- if (rc)
- return rc;
-
- obd->obd_process_conf = 1;
- ldt = obd->obd_type->typ_lu;
- d = obd->obd_lu_dev;
- if (ldt && d) {
- struct lu_env env;
-
- rc = lu_env_init(&env, ldt->ldt_ctx_tags);
- if (rc == 0) {
- rc = d->ld_ops->ldo_process_config(&env, d, data);
- lu_env_fini(&env);
- }
- } else {
- OBD_CHECK_DT_OP(obd, process_config, -EOPNOTSUPP);
- rc = OBP(obd, process_config)(obd, datalen, data);
- }
- OBD_COUNTER_INCREMENT(obd, process_config);
- obd->obd_process_conf = 0;
-
- return rc;
-}
-
-static inline int obd_create(const struct lu_env *env, struct obd_export *exp,
- struct obdo *obdo)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, create);
- EXP_COUNTER_INCREMENT(exp, create);
-
- rc = OBP(exp->exp_obd, create)(env, exp, obdo);
- return rc;
-}
-
-static inline int obd_destroy(const struct lu_env *env, struct obd_export *exp,
- struct obdo *obdo)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, destroy);
- EXP_COUNTER_INCREMENT(exp, destroy);
-
- rc = OBP(exp->exp_obd, destroy)(env, exp, obdo);
- return rc;
-}
-
-static inline int obd_getattr(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, getattr);
- EXP_COUNTER_INCREMENT(exp, getattr);
-
- rc = OBP(exp->exp_obd, getattr)(env, exp, oa);
- return rc;
-}
-
-static inline int obd_setattr(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, setattr);
- EXP_COUNTER_INCREMENT(exp, setattr);
-
- rc = OBP(exp->exp_obd, setattr)(env, exp, oa);
- return rc;
-}
-
-static inline int obd_add_conn(struct obd_import *imp, struct obd_uuid *uuid,
- int priority)
-{
- struct obd_device *obd = imp->imp_obd;
- int rc;
-
- rc = obd_check_dev_active(obd);
- if (rc)
- return rc;
- OBD_CHECK_DT_OP(obd, add_conn, -EOPNOTSUPP);
- OBD_COUNTER_INCREMENT(obd, add_conn);
-
- rc = OBP(obd, add_conn)(imp, uuid, priority);
- return rc;
-}
-
-static inline int obd_del_conn(struct obd_import *imp, struct obd_uuid *uuid)
-{
- struct obd_device *obd = imp->imp_obd;
- int rc;
-
- rc = obd_check_dev_active(obd);
- if (rc)
- return rc;
- OBD_CHECK_DT_OP(obd, del_conn, -EOPNOTSUPP);
- OBD_COUNTER_INCREMENT(obd, del_conn);
-
- rc = OBP(obd, del_conn)(imp, uuid);
- return rc;
-}
-
-static inline struct obd_uuid *obd_get_uuid(struct obd_export *exp)
-{
- struct obd_uuid *uuid;
-
- OBD_CHECK_DT_OP(exp->exp_obd, get_uuid, NULL);
- EXP_COUNTER_INCREMENT(exp, get_uuid);
-
- uuid = OBP(exp->exp_obd, get_uuid)(exp);
- return uuid;
-}
-
-/*
- * Create a new /a exp on device /a obd for the uuid /a cluuid
- * @param exp New export handle
- * @param d Connect data, supported flags are set, flags also understood
- * by obd are returned.
- */
-static inline int obd_connect(const struct lu_env *env,
- struct obd_export **exp, struct obd_device *obd,
- struct obd_uuid *cluuid,
- struct obd_connect_data *data,
- void *localdata)
-{
- int rc;
- __u64 ocf = data ? data->ocd_connect_flags : 0; /*
- * for post-condition
- * check
- */
-
- rc = obd_check_dev_active(obd);
- if (rc)
- return rc;
- OBD_CHECK_DT_OP(obd, connect, -EOPNOTSUPP);
- OBD_COUNTER_INCREMENT(obd, connect);
-
- rc = OBP(obd, connect)(env, exp, obd, cluuid, data, localdata);
- /* check that only subset is granted */
- LASSERT(ergo(data, (data->ocd_connect_flags & ocf) ==
- data->ocd_connect_flags));
- return rc;
-}
-
-static inline int obd_reconnect(const struct lu_env *env,
- struct obd_export *exp,
- struct obd_device *obd,
- struct obd_uuid *cluuid,
- struct obd_connect_data *d,
- void *localdata)
-{
- int rc;
- __u64 ocf = d ? d->ocd_connect_flags : 0; /* for post-condition check */
-
- rc = obd_check_dev_active(obd);
- if (rc)
- return rc;
- OBD_CHECK_DT_OP(obd, reconnect, 0);
- OBD_COUNTER_INCREMENT(obd, reconnect);
-
- rc = OBP(obd, reconnect)(env, exp, obd, cluuid, d, localdata);
- /* check that only subset is granted */
- LASSERT(ergo(d, (d->ocd_connect_flags & ocf) == d->ocd_connect_flags));
- return rc;
-}
-
-static inline int obd_disconnect(struct obd_export *exp)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, disconnect);
- EXP_COUNTER_INCREMENT(exp, disconnect);
-
- rc = OBP(exp->exp_obd, disconnect)(exp);
- return rc;
-}
-
-static inline int obd_fid_init(struct obd_device *obd, struct obd_export *exp,
- enum lu_cli_type type)
-{
- int rc;
-
- OBD_CHECK_DT_OP(obd, fid_init, 0);
- OBD_COUNTER_INCREMENT(obd, fid_init);
-
- rc = OBP(obd, fid_init)(obd, exp, type);
- return rc;
-}
-
-static inline int obd_fid_fini(struct obd_device *obd)
-{
- int rc;
-
- OBD_CHECK_DT_OP(obd, fid_fini, 0);
- OBD_COUNTER_INCREMENT(obd, fid_fini);
-
- rc = OBP(obd, fid_fini)(obd);
- return rc;
-}
-
-static inline int obd_fid_alloc(const struct lu_env *env,
- struct obd_export *exp,
- struct lu_fid *fid,
- struct md_op_data *op_data)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, fid_alloc);
- EXP_COUNTER_INCREMENT(exp, fid_alloc);
-
- rc = OBP(exp->exp_obd, fid_alloc)(env, exp, fid, op_data);
- return rc;
-}
-
-static inline int obd_pool_new(struct obd_device *obd, char *poolname)
-{
- int rc;
-
- OBD_CHECK_DT_OP(obd, pool_new, -EOPNOTSUPP);
- OBD_COUNTER_INCREMENT(obd, pool_new);
-
- rc = OBP(obd, pool_new)(obd, poolname);
- return rc;
-}
-
-static inline int obd_pool_del(struct obd_device *obd, char *poolname)
-{
- int rc;
-
- OBD_CHECK_DT_OP(obd, pool_del, -EOPNOTSUPP);
- OBD_COUNTER_INCREMENT(obd, pool_del);
-
- rc = OBP(obd, pool_del)(obd, poolname);
- return rc;
-}
-
-static inline int obd_pool_add(struct obd_device *obd,
- char *poolname,
- char *ostname)
-{
- int rc;
-
- OBD_CHECK_DT_OP(obd, pool_add, -EOPNOTSUPP);
- OBD_COUNTER_INCREMENT(obd, pool_add);
-
- rc = OBP(obd, pool_add)(obd, poolname, ostname);
- return rc;
-}
-
-static inline int obd_pool_rem(struct obd_device *obd,
- char *poolname,
- char *ostname)
-{
- int rc;
-
- OBD_CHECK_DT_OP(obd, pool_rem, -EOPNOTSUPP);
- OBD_COUNTER_INCREMENT(obd, pool_rem);
-
- rc = OBP(obd, pool_rem)(obd, poolname, ostname);
- return rc;
-}
-
-static inline void obd_getref(struct obd_device *obd)
-{
- if (OBT(obd) && OBP(obd, getref)) {
- OBD_COUNTER_INCREMENT(obd, getref);
- OBP(obd, getref)(obd);
- }
-}
-
-static inline void obd_putref(struct obd_device *obd)
-{
- if (OBT(obd) && OBP(obd, putref)) {
- OBD_COUNTER_INCREMENT(obd, putref);
- OBP(obd, putref)(obd);
- }
-}
-
-static inline int obd_init_export(struct obd_export *exp)
-{
- int rc = 0;
-
- if ((exp)->exp_obd && OBT((exp)->exp_obd) &&
- OBP((exp)->exp_obd, init_export))
- rc = OBP(exp->exp_obd, init_export)(exp);
- return rc;
-}
-
-static inline int obd_destroy_export(struct obd_export *exp)
-{
- if ((exp)->exp_obd && OBT((exp)->exp_obd) &&
- OBP((exp)->exp_obd, destroy_export))
- OBP(exp->exp_obd, destroy_export)(exp);
- return 0;
-}
-
-/*
- * @max_age is the oldest time in jiffies that we accept using a cached data.
- * If the cache is older than @max_age we will get a new value from the
- * target. Use a value of "jiffies + HZ" to guarantee freshness.
- */
-static inline int obd_statfs_async(struct obd_export *exp,
- struct obd_info *oinfo,
- __u64 max_age,
- struct ptlrpc_request_set *rqset)
-{
- int rc = 0;
- struct obd_device *obd;
-
- if (!exp || !exp->exp_obd)
- return -EINVAL;
-
- obd = exp->exp_obd;
- OBD_CHECK_DT_OP(obd, statfs, -EOPNOTSUPP);
- OBD_COUNTER_INCREMENT(obd, statfs);
-
- CDEBUG(D_SUPER, "%s: osfs %p age %llu, max_age %llu\n",
- obd->obd_name, &obd->obd_osfs, obd->obd_osfs_age, max_age);
- if (time_before64(obd->obd_osfs_age, max_age)) {
- rc = OBP(obd, statfs_async)(exp, oinfo, max_age, rqset);
- } else {
- CDEBUG(D_SUPER,
- "%s: use %p cache blocks %llu/%llu objects %llu/%llu\n",
- obd->obd_name, &obd->obd_osfs,
- obd->obd_osfs.os_bavail, obd->obd_osfs.os_blocks,
- obd->obd_osfs.os_ffree, obd->obd_osfs.os_files);
- spin_lock(&obd->obd_osfs_lock);
- memcpy(oinfo->oi_osfs, &obd->obd_osfs, sizeof(*oinfo->oi_osfs));
- spin_unlock(&obd->obd_osfs_lock);
- oinfo->oi_flags |= OBD_STATFS_FROM_CACHE;
- if (oinfo->oi_cb_up)
- oinfo->oi_cb_up(oinfo, 0);
- }
- return rc;
-}
-
-static inline int obd_statfs_rqset(struct obd_export *exp,
- struct obd_statfs *osfs, __u64 max_age,
- __u32 flags)
-{
- struct ptlrpc_request_set *set = NULL;
- struct obd_info oinfo = {
- .oi_osfs = osfs,
- .oi_flags = flags,
- };
- int rc = 0;
-
- set = ptlrpc_prep_set();
- if (!set)
- return -ENOMEM;
-
- rc = obd_statfs_async(exp, &oinfo, max_age, set);
- if (rc == 0)
- rc = ptlrpc_set_wait(set);
- ptlrpc_set_destroy(set);
- return rc;
-}
-
-/*
- * @max_age is the oldest time in jiffies that we accept using a cached data.
- * If the cache is older than @max_age we will get a new value from the
- * target. Use a value of "jiffies + HZ" to guarantee freshness.
- */
-static inline int obd_statfs(const struct lu_env *env, struct obd_export *exp,
- struct obd_statfs *osfs, __u64 max_age,
- __u32 flags)
-{
- int rc = 0;
- struct obd_device *obd = exp->exp_obd;
-
- if (!obd)
- return -EINVAL;
-
- OBD_CHECK_DT_OP(obd, statfs, -EOPNOTSUPP);
- OBD_COUNTER_INCREMENT(obd, statfs);
-
- CDEBUG(D_SUPER, "osfs %llu, max_age %llu\n",
- obd->obd_osfs_age, max_age);
- if (time_before64(obd->obd_osfs_age, max_age)) {
- rc = OBP(obd, statfs)(env, exp, osfs, max_age, flags);
- if (rc == 0) {
- spin_lock(&obd->obd_osfs_lock);
- memcpy(&obd->obd_osfs, osfs, sizeof(obd->obd_osfs));
- obd->obd_osfs_age = get_jiffies_64();
- spin_unlock(&obd->obd_osfs_lock);
- }
- } else {
- CDEBUG(D_SUPER,
- "%s: use %p cache blocks %llu/%llu objects %llu/%llu\n",
- obd->obd_name, &obd->obd_osfs,
- obd->obd_osfs.os_bavail, obd->obd_osfs.os_blocks,
- obd->obd_osfs.os_ffree, obd->obd_osfs.os_files);
- spin_lock(&obd->obd_osfs_lock);
- memcpy(osfs, &obd->obd_osfs, sizeof(*osfs));
- spin_unlock(&obd->obd_osfs_lock);
- }
- return rc;
-}
-
-static inline int obd_preprw(const struct lu_env *env, int cmd,
- struct obd_export *exp, struct obdo *oa,
- int objcount, struct obd_ioobj *obj,
- struct niobuf_remote *remote, int *pages,
- struct niobuf_local *local)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, preprw);
- EXP_COUNTER_INCREMENT(exp, preprw);
-
- rc = OBP(exp->exp_obd, preprw)(env, cmd, exp, oa, objcount, obj, remote,
- pages, local);
- return rc;
-}
-
-static inline int obd_commitrw(const struct lu_env *env, int cmd,
- struct obd_export *exp, struct obdo *oa,
- int objcount, struct obd_ioobj *obj,
- struct niobuf_remote *rnb, int pages,
- struct niobuf_local *local, int rc)
-{
- EXP_CHECK_DT_OP(exp, commitrw);
- EXP_COUNTER_INCREMENT(exp, commitrw);
-
- rc = OBP(exp->exp_obd, commitrw)(env, cmd, exp, oa, objcount, obj,
- rnb, pages, local, rc);
- return rc;
-}
-
-static inline int obd_iocontrol(unsigned int cmd, struct obd_export *exp,
- int len, void *karg, void __user *uarg)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, iocontrol);
- EXP_COUNTER_INCREMENT(exp, iocontrol);
-
- rc = OBP(exp->exp_obd, iocontrol)(cmd, exp, len, karg, uarg);
- return rc;
-}
-
-static inline void obd_import_event(struct obd_device *obd,
- struct obd_import *imp,
- enum obd_import_event event)
-{
- if (!obd) {
- CERROR("NULL device\n");
- return;
- }
- if (obd->obd_set_up && OBP(obd, import_event)) {
- OBD_COUNTER_INCREMENT(obd, import_event);
- OBP(obd, import_event)(obd, imp, event);
- }
-}
-
-static inline int obd_notify(struct obd_device *obd,
- struct obd_device *watched,
- enum obd_notify_event ev,
- void *data)
-{
- int rc;
-
- rc = obd_check_dev(obd);
- if (rc)
- return rc;
-
- if (!obd->obd_set_up) {
- CDEBUG(D_HA, "obd %s not set up\n", obd->obd_name);
- return -EINVAL;
- }
-
- if (!OBP(obd, notify)) {
- CDEBUG(D_HA, "obd %s has no notify handler\n", obd->obd_name);
- return -ENOSYS;
- }
-
- OBD_COUNTER_INCREMENT(obd, notify);
- rc = OBP(obd, notify)(obd, watched, ev, data);
- return rc;
-}
-
-static inline int obd_notify_observer(struct obd_device *observer,
- struct obd_device *observed,
- enum obd_notify_event ev,
- void *data)
-{
- int rc1;
- int rc2;
-
- struct obd_notify_upcall *onu;
-
- if (observer->obd_observer)
- rc1 = obd_notify(observer->obd_observer, observed, ev, data);
- else
- rc1 = 0;
- /*
- * Also, call non-obd listener, if any
- */
- onu = &observer->obd_upcall;
- if (onu->onu_upcall)
- rc2 = onu->onu_upcall(observer, observed, ev,
- onu->onu_owner, NULL);
- else
- rc2 = 0;
-
- return rc1 ? rc1 : rc2;
-}
-
-static inline int obd_quotactl(struct obd_export *exp,
- struct obd_quotactl *oqctl)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, quotactl);
- EXP_COUNTER_INCREMENT(exp, quotactl);
-
- rc = OBP(exp->exp_obd, quotactl)(exp->exp_obd, exp, oqctl);
- return rc;
-}
-
-static inline int obd_health_check(const struct lu_env *env,
- struct obd_device *obd)
-{
- /*
- * returns: 0 on healthy
- * >0 on unhealthy + reason code/flag
- * however the only supported reason == 1 right now
- * We'll need to define some better reasons
- * or flags in the future.
- * <0 on error
- */
- int rc;
-
- /* don't use EXP_CHECK_DT_OP, because NULL method is normal here */
- if (!obd || !OBT(obd)) {
- CERROR("cleaned up obd\n");
- return -EOPNOTSUPP;
- }
- if (!obd->obd_set_up || obd->obd_stopping)
- return 0;
- if (!OBP(obd, health_check))
- return 0;
-
- rc = OBP(obd, health_check)(env, obd);
- return rc;
-}
-
-static inline int obd_register_observer(struct obd_device *obd,
- struct obd_device *observer)
-{
- int rc;
-
- rc = obd_check_dev(obd);
- if (rc)
- return rc;
- down_write(&obd->obd_observer_link_sem);
- if (obd->obd_observer && observer) {
- up_write(&obd->obd_observer_link_sem);
- return -EALREADY;
- }
- obd->obd_observer = observer;
- up_write(&obd->obd_observer_link_sem);
- return 0;
-}
-
-/* metadata helpers */
-static inline int md_getstatus(struct obd_export *exp, struct lu_fid *fid)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, getstatus);
- EXP_MD_COUNTER_INCREMENT(exp, getstatus);
- rc = MDP(exp->exp_obd, getstatus)(exp, fid);
- return rc;
-}
-
-static inline int md_getattr(struct obd_export *exp, struct md_op_data *op_data,
- struct ptlrpc_request **request)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, getattr);
- EXP_MD_COUNTER_INCREMENT(exp, getattr);
- rc = MDP(exp->exp_obd, getattr)(exp, op_data, request);
- return rc;
-}
-
-static inline int md_null_inode(struct obd_export *exp,
- const struct lu_fid *fid)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, null_inode);
- EXP_MD_COUNTER_INCREMENT(exp, null_inode);
- rc = MDP(exp->exp_obd, null_inode)(exp, fid);
- return rc;
-}
-
-static inline int md_close(struct obd_export *exp, struct md_op_data *op_data,
- struct md_open_data *mod,
- struct ptlrpc_request **request)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, close);
- EXP_MD_COUNTER_INCREMENT(exp, close);
- rc = MDP(exp->exp_obd, close)(exp, op_data, mod, request);
- return rc;
-}
-
-static inline int md_create(struct obd_export *exp, struct md_op_data *op_data,
- const void *data, size_t datalen, umode_t mode,
- uid_t uid, gid_t gid, kernel_cap_t cap_effective,
- __u64 rdev, struct ptlrpc_request **request)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, create);
- EXP_MD_COUNTER_INCREMENT(exp, create);
- rc = MDP(exp->exp_obd, create)(exp, op_data, data, datalen, mode,
- uid, gid, cap_effective, rdev, request);
- return rc;
-}
-
-static inline int md_enqueue(struct obd_export *exp,
- struct ldlm_enqueue_info *einfo,
- const union ldlm_policy_data *policy,
- struct md_op_data *op_data,
- struct lustre_handle *lockh,
- __u64 extra_lock_flags)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, enqueue);
- EXP_MD_COUNTER_INCREMENT(exp, enqueue);
- rc = MDP(exp->exp_obd, enqueue)(exp, einfo, policy, op_data, lockh,
- extra_lock_flags);
- return rc;
-}
-
-static inline int md_getattr_name(struct obd_export *exp,
- struct md_op_data *op_data,
- struct ptlrpc_request **request)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, getattr_name);
- EXP_MD_COUNTER_INCREMENT(exp, getattr_name);
- rc = MDP(exp->exp_obd, getattr_name)(exp, op_data, request);
- return rc;
-}
-
-static inline int md_intent_lock(struct obd_export *exp,
- struct md_op_data *op_data,
- struct lookup_intent *it,
- struct ptlrpc_request **reqp,
- ldlm_blocking_callback cb_blocking,
- __u64 extra_lock_flags)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, intent_lock);
- EXP_MD_COUNTER_INCREMENT(exp, intent_lock);
- rc = MDP(exp->exp_obd, intent_lock)(exp, op_data, it, reqp,
- cb_blocking, extra_lock_flags);
- return rc;
-}
-
-static inline int md_link(struct obd_export *exp, struct md_op_data *op_data,
- struct ptlrpc_request **request)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, link);
- EXP_MD_COUNTER_INCREMENT(exp, link);
- rc = MDP(exp->exp_obd, link)(exp, op_data, request);
- return rc;
-}
-
-static inline int md_rename(struct obd_export *exp, struct md_op_data *op_data,
- const char *old, size_t oldlen, const char *new,
- size_t newlen, struct ptlrpc_request **request)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, rename);
- EXP_MD_COUNTER_INCREMENT(exp, rename);
- rc = MDP(exp->exp_obd, rename)(exp, op_data, old, oldlen, new,
- newlen, request);
- return rc;
-}
-
-static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data,
- void *ea, size_t ealen,
- struct ptlrpc_request **request)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, setattr);
- EXP_MD_COUNTER_INCREMENT(exp, setattr);
- rc = MDP(exp->exp_obd, setattr)(exp, op_data, ea, ealen, request);
- return rc;
-}
-
-static inline int md_sync(struct obd_export *exp, const struct lu_fid *fid,
- struct ptlrpc_request **request)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, sync);
- EXP_MD_COUNTER_INCREMENT(exp, sync);
- rc = MDP(exp->exp_obd, sync)(exp, fid, request);
- return rc;
-}
-
-static inline int md_read_page(struct obd_export *exp,
- struct md_op_data *op_data,
- struct md_callback *cb_op,
- __u64 hash_offset,
- struct page **ppage)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, read_page);
- EXP_MD_COUNTER_INCREMENT(exp, read_page);
- rc = MDP(exp->exp_obd, read_page)(exp, op_data, cb_op, hash_offset,
- ppage);
- return rc;
-}
-
-static inline int md_unlink(struct obd_export *exp, struct md_op_data *op_data,
- struct ptlrpc_request **request)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, unlink);
- EXP_MD_COUNTER_INCREMENT(exp, unlink);
- rc = MDP(exp->exp_obd, unlink)(exp, op_data, request);
- return rc;
-}
-
-static inline int md_get_lustre_md(struct obd_export *exp,
- struct ptlrpc_request *req,
- struct obd_export *dt_exp,
- struct obd_export *md_exp,
- struct lustre_md *md)
-{
- EXP_CHECK_MD_OP(exp, get_lustre_md);
- EXP_MD_COUNTER_INCREMENT(exp, get_lustre_md);
- return MDP(exp->exp_obd, get_lustre_md)(exp, req, dt_exp, md_exp, md);
-}
-
-static inline int md_free_lustre_md(struct obd_export *exp,
- struct lustre_md *md)
-{
- EXP_CHECK_MD_OP(exp, free_lustre_md);
- EXP_MD_COUNTER_INCREMENT(exp, free_lustre_md);
- return MDP(exp->exp_obd, free_lustre_md)(exp, md);
-}
-
-static inline int md_merge_attr(struct obd_export *exp,
- const struct lmv_stripe_md *lsm,
- struct cl_attr *attr,
- ldlm_blocking_callback cb)
-{
- EXP_CHECK_MD_OP(exp, merge_attr);
- EXP_MD_COUNTER_INCREMENT(exp, merge_attr);
- return MDP(exp->exp_obd, merge_attr)(exp, lsm, attr, cb);
-}
-
-static inline int md_setxattr(struct obd_export *exp, const struct lu_fid *fid,
- u64 obd_md_valid, const char *name,
- const char *value, size_t value_size,
- unsigned int xattr_flags, u32 suppgid,
- struct ptlrpc_request **request)
-{
- EXP_CHECK_MD_OP(exp, setxattr);
- EXP_MD_COUNTER_INCREMENT(exp, setxattr);
- return MDP(exp->exp_obd, setxattr)(exp, fid, obd_md_valid, name,
- value, value_size, xattr_flags,
- suppgid, request);
-}
-
-static inline int md_getxattr(struct obd_export *exp, const struct lu_fid *fid,
- u64 obd_md_valid, const char *name,
- size_t buf_size, struct ptlrpc_request **req)
-{
- EXP_CHECK_MD_OP(exp, getxattr);
- EXP_MD_COUNTER_INCREMENT(exp, getxattr);
- return MDP(exp->exp_obd, getxattr)(exp, fid, obd_md_valid, name,
- buf_size, req);
-}
-
-static inline int md_set_open_replay_data(struct obd_export *exp,
- struct obd_client_handle *och,
- struct lookup_intent *it)
-{
- EXP_CHECK_MD_OP(exp, set_open_replay_data);
- EXP_MD_COUNTER_INCREMENT(exp, set_open_replay_data);
- return MDP(exp->exp_obd, set_open_replay_data)(exp, och, it);
-}
-
-static inline int md_clear_open_replay_data(struct obd_export *exp,
- struct obd_client_handle *och)
-{
- EXP_CHECK_MD_OP(exp, clear_open_replay_data);
- EXP_MD_COUNTER_INCREMENT(exp, clear_open_replay_data);
- return MDP(exp->exp_obd, clear_open_replay_data)(exp, och);
-}
-
-static inline int md_set_lock_data(struct obd_export *exp,
- const struct lustre_handle *lockh,
- void *data, __u64 *bits)
-{
- EXP_CHECK_MD_OP(exp, set_lock_data);
- EXP_MD_COUNTER_INCREMENT(exp, set_lock_data);
- return MDP(exp->exp_obd, set_lock_data)(exp, lockh, data, bits);
-}
-
-static inline int md_cancel_unused(struct obd_export *exp,
- const struct lu_fid *fid,
- union ldlm_policy_data *policy,
- enum ldlm_mode mode,
- enum ldlm_cancel_flags flags,
- void *opaque)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, cancel_unused);
- EXP_MD_COUNTER_INCREMENT(exp, cancel_unused);
-
- rc = MDP(exp->exp_obd, cancel_unused)(exp, fid, policy, mode,
- flags, opaque);
- return rc;
-}
-
-static inline enum ldlm_mode md_lock_match(struct obd_export *exp, __u64 flags,
- const struct lu_fid *fid,
- enum ldlm_type type,
- union ldlm_policy_data *policy,
- enum ldlm_mode mode,
- struct lustre_handle *lockh)
-{
- EXP_CHECK_MD_OP(exp, lock_match);
- EXP_MD_COUNTER_INCREMENT(exp, lock_match);
- return MDP(exp->exp_obd, lock_match)(exp, flags, fid, type,
- policy, mode, lockh);
-}
-
-static inline int md_init_ea_size(struct obd_export *exp, u32 easize,
- u32 def_asize)
-{
- EXP_CHECK_MD_OP(exp, init_ea_size);
- EXP_MD_COUNTER_INCREMENT(exp, init_ea_size);
- return MDP(exp->exp_obd, init_ea_size)(exp, easize, def_asize);
-}
-
-static inline int md_intent_getattr_async(struct obd_export *exp,
- struct md_enqueue_info *minfo)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, intent_getattr_async);
- EXP_MD_COUNTER_INCREMENT(exp, intent_getattr_async);
- rc = MDP(exp->exp_obd, intent_getattr_async)(exp, minfo);
- return rc;
-}
-
-static inline int md_revalidate_lock(struct obd_export *exp,
- struct lookup_intent *it,
- struct lu_fid *fid, __u64 *bits)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, revalidate_lock);
- EXP_MD_COUNTER_INCREMENT(exp, revalidate_lock);
- rc = MDP(exp->exp_obd, revalidate_lock)(exp, it, fid, bits);
- return rc;
-}
-
-static inline int md_get_fid_from_lsm(struct obd_export *exp,
- const struct lmv_stripe_md *lsm,
- const char *name, int namelen,
- struct lu_fid *fid)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, get_fid_from_lsm);
- EXP_MD_COUNTER_INCREMENT(exp, get_fid_from_lsm);
- rc = MDP(exp->exp_obd, get_fid_from_lsm)(exp, lsm, name, namelen, fid);
- return rc;
-}
-
-/*
- * Unpack an MD struct from disk to in-memory format.
- * Returns +ve size of unpacked MD (0 for free), or -ve error.
- *
- * If *plsm != NULL and lmm == NULL then *lsm will be freed.
- * If *plsm == NULL then it will be allocated.
- */
-static inline int md_unpackmd(struct obd_export *exp,
- struct lmv_stripe_md **plsm,
- const union lmv_mds_md *lmm, size_t lmm_size)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, unpackmd);
- EXP_MD_COUNTER_INCREMENT(exp, unpackmd);
- rc = MDP(exp->exp_obd, unpackmd)(exp, plsm, lmm, lmm_size);
- return rc;
-}
-
-/* OBD Metadata Support */
-
-int obd_init_caches(void);
-void obd_cleanup_caches(void);
-
-/* support routines */
-extern struct kmem_cache *obdo_cachep;
-
-typedef int (*register_lwp_cb)(void *data);
-
-struct lwp_register_item {
- struct obd_export **lri_exp;
- register_lwp_cb lri_cb_func;
- void *lri_cb_data;
- struct list_head lri_list;
- char lri_name[MTI_NAME_MAXLEN];
-};
-
-/*
- * I'm as embarrassed about this as you are.
- *
- * <shaver> // XXX do not look into _superhack with remaining eye
- * <shaver> // XXX if this were any uglier, I'd get my own show on MTV
- */
-extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
-
-/* obd_mount.c */
-int lustre_unregister_fs(void);
-int lustre_register_fs(void);
-int lustre_check_exclusion(struct super_block *sb, char *svname);
-
-/* sysctl.c */
-int obd_sysctl_init(void);
-
-/* uuid.c */
-typedef __u8 class_uuid_t[16];
-void class_uuid_unparse(class_uuid_t in, struct obd_uuid *out);
-
-/* lustre_peer.c */
-int lustre_uuid_to_peer(const char *uuid, lnet_nid_t *peer_nid, int index);
-int class_add_uuid(const char *uuid, __u64 nid);
-int class_del_uuid(const char *uuid);
-int class_check_uuid(struct obd_uuid *uuid, __u64 nid);
-void class_init_uuidlist(void);
-void class_exit_uuidlist(void);
-
-/* class_obd.c */
-extern char obd_jobid_node[];
-extern struct miscdevice obd_psdev;
-extern spinlock_t obd_types_lock;
-int class_procfs_init(void);
-int class_procfs_clean(void);
-
-/* prng.c */
-#define ll_generate_random_uuid(uuid_out) \
- get_random_bytes(uuid_out, sizeof(class_uuid_t))
-
-/* statfs_pack.c */
-struct kstatfs;
-void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs);
-void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs);
-
-/* root squash info */
-struct rw_semaphore;
-struct root_squash_info {
- uid_t rsi_uid;
- gid_t rsi_gid;
- struct list_head rsi_nosquash_nids;
- struct rw_semaphore rsi_sem;
-};
-
-/* linux-module.c */
-int obd_ioctl_getdata(char **buf, int *len, void __user *arg);
-
-#endif /* __LINUX_OBD_CLASS_H */
diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h
deleted file mode 100644
index 9e41633823f7..000000000000
--- a/drivers/staging/lustre/lustre/include/obd_support.h
+++ /dev/null
@@ -1,517 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef _OBD_SUPPORT
-#define _OBD_SUPPORT
-
-#include <linux/slab.h>
-#include <linux/sched/signal.h>
-
-#include <lustre_compat.h>
-#include <lprocfs_status.h>
-
-/* global variables */
-extern unsigned int obd_debug_peer_on_timeout;
-extern unsigned int obd_dump_on_timeout;
-extern unsigned int obd_dump_on_eviction;
-/* obd_timeout should only be used for recovery, not for
- * networking / disk / timings affected by load (use Adaptive Timeouts)
- */
-extern unsigned int obd_timeout; /* seconds */
-extern unsigned int obd_timeout_set;
-extern unsigned int at_min;
-extern unsigned int at_max;
-extern unsigned int at_history;
-extern int at_early_margin;
-extern int at_extra;
-extern unsigned long obd_max_dirty_pages;
-extern atomic_long_t obd_dirty_pages;
-extern atomic_long_t obd_dirty_transit_pages;
-extern char obd_jobid_var[];
-
-/* Some hash init argument constants */
-/* Timeout definitions */
-#define OBD_TIMEOUT_DEFAULT 100
-/* Time to wait for all clients to reconnect during recovery (hard limit) */
-#define OBD_RECOVERY_TIME_HARD (obd_timeout * 9)
-/* Time to wait for all clients to reconnect during recovery (soft limit) */
-/* Should be very conservative; must catch the first reconnect after reboot */
-#define OBD_RECOVERY_TIME_SOFT (obd_timeout * 3)
-/* Change recovery-small 26b time if you change this */
-#define PING_INTERVAL max(obd_timeout / 4, 1U)
-/* a bit more than maximal journal commit time in seconds */
-#define PING_INTERVAL_SHORT min(PING_INTERVAL, 7U)
-/* Client may skip 1 ping; we must wait at least 2.5. But for multiple
- * failover targets the client only pings one server at a time, and pings
- * can be lost on a loaded network. Since eviction has serious consequences,
- * and there's no urgent need to evict a client just because it's idle, we
- * should be very conservative here.
- */
-#define PING_EVICT_TIMEOUT (PING_INTERVAL * 6)
-#define DISK_TIMEOUT 50 /* Beyond this we warn about disk speed */
-#define CONNECTION_SWITCH_MIN 5U /* Connection switching rate limiter */
-/* Max connect interval for nonresponsive servers; ~50s to avoid building up
- * connect requests in the LND queues, but within obd_timeout so we don't
- * miss the recovery window
- */
-#define CONNECTION_SWITCH_MAX min(50U, max(CONNECTION_SWITCH_MIN, obd_timeout))
-#define CONNECTION_SWITCH_INC 5 /* Connection timeout backoff */
-/* In general this should be low to have quick detection of a system
- * running on a backup server. (If it's too low, import_select_connection
- * will increase the timeout anyhow.)
- */
-#define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN, obd_timeout / 20)
-/* The max delay between connects is SWITCH_MAX + SWITCH_INC + INITIAL */
-#define RECONNECT_DELAY_MAX (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC + \
- INITIAL_CONNECT_TIMEOUT)
-/* The min time a target should wait for clients to reconnect in recovery */
-#define OBD_RECOVERY_TIME_MIN (2 * RECONNECT_DELAY_MAX)
-#define OBD_IR_FACTOR_MIN 1
-#define OBD_IR_FACTOR_MAX 10
-#define OBD_IR_FACTOR_DEFAULT (OBD_IR_FACTOR_MAX / 2)
-/* default timeout for the MGS to become IR_FULL */
-#define OBD_IR_MGS_TIMEOUT (4 * obd_timeout)
-#define LONG_UNLINK 300 /* Unlink should happen before now */
-
-/**
- * Time interval of shrink, if the client is "idle" more than this interval,
- * then the ll_grant thread will return the requested grant space to filter
- */
-#define GRANT_SHRINK_INTERVAL 1200/*20 minutes*/
-
-#define OBD_FAIL_MDS 0x100
-#define OBD_FAIL_MDS_HANDLE_UNPACK 0x101
-#define OBD_FAIL_MDS_GETATTR_NET 0x102
-#define OBD_FAIL_MDS_GETATTR_PACK 0x103
-#define OBD_FAIL_MDS_READPAGE_NET 0x104
-#define OBD_FAIL_MDS_READPAGE_PACK 0x105
-#define OBD_FAIL_MDS_SENDPAGE 0x106
-#define OBD_FAIL_MDS_REINT_NET 0x107
-#define OBD_FAIL_MDS_REINT_UNPACK 0x108
-#define OBD_FAIL_MDS_REINT_SETATTR 0x109
-#define OBD_FAIL_MDS_REINT_SETATTR_WRITE 0x10a
-#define OBD_FAIL_MDS_REINT_CREATE 0x10b
-#define OBD_FAIL_MDS_REINT_CREATE_WRITE 0x10c
-#define OBD_FAIL_MDS_REINT_UNLINK 0x10d
-#define OBD_FAIL_MDS_REINT_UNLINK_WRITE 0x10e
-#define OBD_FAIL_MDS_REINT_LINK 0x10f
-#define OBD_FAIL_MDS_REINT_LINK_WRITE 0x110
-#define OBD_FAIL_MDS_REINT_RENAME 0x111
-#define OBD_FAIL_MDS_REINT_RENAME_WRITE 0x112
-#define OBD_FAIL_MDS_OPEN_NET 0x113
-#define OBD_FAIL_MDS_OPEN_PACK 0x114
-#define OBD_FAIL_MDS_CLOSE_NET 0x115
-#define OBD_FAIL_MDS_CLOSE_PACK 0x116
-#define OBD_FAIL_MDS_CONNECT_NET 0x117
-#define OBD_FAIL_MDS_CONNECT_PACK 0x118
-#define OBD_FAIL_MDS_REINT_NET_REP 0x119
-#define OBD_FAIL_MDS_DISCONNECT_NET 0x11a
-#define OBD_FAIL_MDS_GETSTATUS_NET 0x11b
-#define OBD_FAIL_MDS_GETSTATUS_PACK 0x11c
-#define OBD_FAIL_MDS_STATFS_PACK 0x11d
-#define OBD_FAIL_MDS_STATFS_NET 0x11e
-#define OBD_FAIL_MDS_GETATTR_NAME_NET 0x11f
-#define OBD_FAIL_MDS_PIN_NET 0x120
-#define OBD_FAIL_MDS_UNPIN_NET 0x121
-#define OBD_FAIL_MDS_ALL_REPLY_NET 0x122
-#define OBD_FAIL_MDS_ALL_REQUEST_NET 0x123
-#define OBD_FAIL_MDS_SYNC_NET 0x124
-#define OBD_FAIL_MDS_SYNC_PACK 0x125
-/* OBD_FAIL_MDS_DONE_WRITING_NET 0x126 obsolete since 2.8.0 */
-/* OBD_FAIL_MDS_DONE_WRITING_PACK 0x127 obsolete since 2.8.0 */
-#define OBD_FAIL_MDS_ALLOC_OBDO 0x128
-#define OBD_FAIL_MDS_PAUSE_OPEN 0x129
-#define OBD_FAIL_MDS_STATFS_LCW_SLEEP 0x12a
-#define OBD_FAIL_MDS_OPEN_CREATE 0x12b
-#define OBD_FAIL_MDS_OST_SETATTR 0x12c
-/* OBD_FAIL_MDS_QUOTACHECK_NET 0x12d obsolete since 2.4 */
-#define OBD_FAIL_MDS_QUOTACTL_NET 0x12e
-#define OBD_FAIL_MDS_CLIENT_ADD 0x12f
-#define OBD_FAIL_MDS_GETXATTR_NET 0x130
-#define OBD_FAIL_MDS_GETXATTR_PACK 0x131
-#define OBD_FAIL_MDS_SETXATTR_NET 0x132
-#define OBD_FAIL_MDS_SETXATTR 0x133
-#define OBD_FAIL_MDS_SETXATTR_WRITE 0x134
-#define OBD_FAIL_MDS_FS_SETUP 0x135
-#define OBD_FAIL_MDS_RESEND 0x136
-#define OBD_FAIL_MDS_LLOG_CREATE_FAILED 0x137
-#define OBD_FAIL_MDS_LOV_SYNC_RACE 0x138
-#define OBD_FAIL_MDS_OSC_PRECREATE 0x139
-#define OBD_FAIL_MDS_LLOG_SYNC_TIMEOUT 0x13a
-#define OBD_FAIL_MDS_CLOSE_NET_REP 0x13b
-#define OBD_FAIL_MDS_BLOCK_QUOTA_REQ 0x13c
-#define OBD_FAIL_MDS_DROP_QUOTA_REQ 0x13d
-#define OBD_FAIL_MDS_REMOVE_COMMON_EA 0x13e
-#define OBD_FAIL_MDS_ALLOW_COMMON_EA_SETTING 0x13f
-#define OBD_FAIL_MDS_FAIL_LOV_LOG_ADD 0x140
-#define OBD_FAIL_MDS_LOV_PREP_CREATE 0x141
-#define OBD_FAIL_MDS_REINT_DELAY 0x142
-#define OBD_FAIL_MDS_READLINK_EPROTO 0x143
-#define OBD_FAIL_MDS_OPEN_WAIT_CREATE 0x144
-#define OBD_FAIL_MDS_PDO_LOCK 0x145
-#define OBD_FAIL_MDS_PDO_LOCK2 0x146
-#define OBD_FAIL_MDS_OSC_CREATE_FAIL 0x147
-#define OBD_FAIL_MDS_NEGATIVE_POSITIVE 0x148
-#define OBD_FAIL_MDS_HSM_STATE_GET_NET 0x149
-#define OBD_FAIL_MDS_HSM_STATE_SET_NET 0x14a
-#define OBD_FAIL_MDS_HSM_PROGRESS_NET 0x14b
-#define OBD_FAIL_MDS_HSM_REQUEST_NET 0x14c
-#define OBD_FAIL_MDS_HSM_CT_REGISTER_NET 0x14d
-#define OBD_FAIL_MDS_HSM_CT_UNREGISTER_NET 0x14e
-#define OBD_FAIL_MDS_SWAP_LAYOUTS_NET 0x14f
-#define OBD_FAIL_MDS_HSM_ACTION_NET 0x150
-#define OBD_FAIL_MDS_CHANGELOG_INIT 0x151
-
-/* layout lock */
-#define OBD_FAIL_MDS_NO_LL_GETATTR 0x170
-#define OBD_FAIL_MDS_NO_LL_OPEN 0x171
-#define OBD_FAIL_MDS_LL_BLOCK 0x172
-
-/* CMD */
-#define OBD_FAIL_MDS_IS_SUBDIR_NET 0x180
-#define OBD_FAIL_MDS_IS_SUBDIR_PACK 0x181
-#define OBD_FAIL_MDS_SET_INFO_NET 0x182
-#define OBD_FAIL_MDS_WRITEPAGE_NET 0x183
-#define OBD_FAIL_MDS_WRITEPAGE_PACK 0x184
-#define OBD_FAIL_MDS_RECOVERY_ACCEPTS_GAPS 0x185
-#define OBD_FAIL_MDS_GET_INFO_NET 0x186
-#define OBD_FAIL_MDS_DQACQ_NET 0x187
-
-/* OI scrub */
-#define OBD_FAIL_OSD_SCRUB_DELAY 0x190
-#define OBD_FAIL_OSD_SCRUB_CRASH 0x191
-#define OBD_FAIL_OSD_SCRUB_FATAL 0x192
-#define OBD_FAIL_OSD_FID_MAPPING 0x193
-#define OBD_FAIL_OSD_LMA_INCOMPAT 0x194
-#define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195
-
-#define OBD_FAIL_OST 0x200
-#define OBD_FAIL_OST_CONNECT_NET 0x201
-#define OBD_FAIL_OST_DISCONNECT_NET 0x202
-#define OBD_FAIL_OST_GET_INFO_NET 0x203
-#define OBD_FAIL_OST_CREATE_NET 0x204
-#define OBD_FAIL_OST_DESTROY_NET 0x205
-#define OBD_FAIL_OST_GETATTR_NET 0x206
-#define OBD_FAIL_OST_SETATTR_NET 0x207
-#define OBD_FAIL_OST_OPEN_NET 0x208
-#define OBD_FAIL_OST_CLOSE_NET 0x209
-#define OBD_FAIL_OST_BRW_NET 0x20a
-#define OBD_FAIL_OST_PUNCH_NET 0x20b
-#define OBD_FAIL_OST_STATFS_NET 0x20c
-#define OBD_FAIL_OST_HANDLE_UNPACK 0x20d
-#define OBD_FAIL_OST_BRW_WRITE_BULK 0x20e
-#define OBD_FAIL_OST_BRW_READ_BULK 0x20f
-#define OBD_FAIL_OST_SYNC_NET 0x210
-#define OBD_FAIL_OST_ALL_REPLY_NET 0x211
-#define OBD_FAIL_OST_ALL_REQUEST_NET 0x212
-#define OBD_FAIL_OST_LDLM_REPLY_NET 0x213
-#define OBD_FAIL_OST_BRW_PAUSE_BULK 0x214
-#define OBD_FAIL_OST_ENOSPC 0x215
-#define OBD_FAIL_OST_EROFS 0x216
-#define OBD_FAIL_OST_ENOENT 0x217
-/* OBD_FAIL_OST_QUOTACHECK_NET 0x218 obsolete since 2.4 */
-#define OBD_FAIL_OST_QUOTACTL_NET 0x219
-#define OBD_FAIL_OST_CHECKSUM_RECEIVE 0x21a
-#define OBD_FAIL_OST_CHECKSUM_SEND 0x21b
-#define OBD_FAIL_OST_BRW_SIZE 0x21c
-#define OBD_FAIL_OST_DROP_REQ 0x21d
-#define OBD_FAIL_OST_SETATTR_CREDITS 0x21e
-#define OBD_FAIL_OST_HOLD_WRITE_RPC 0x21f
-#define OBD_FAIL_OST_BRW_WRITE_BULK2 0x220
-#define OBD_FAIL_OST_LLOG_RECOVERY_TIMEOUT 0x221
-#define OBD_FAIL_OST_CANCEL_COOKIE_TIMEOUT 0x222
-#define OBD_FAIL_OST_PAUSE_CREATE 0x223
-#define OBD_FAIL_OST_BRW_PAUSE_PACK 0x224
-#define OBD_FAIL_OST_CONNECT_NET2 0x225
-#define OBD_FAIL_OST_NOMEM 0x226
-#define OBD_FAIL_OST_BRW_PAUSE_BULK2 0x227
-#define OBD_FAIL_OST_MAPBLK_ENOSPC 0x228
-#define OBD_FAIL_OST_ENOINO 0x229
-#define OBD_FAIL_OST_DQACQ_NET 0x230
-#define OBD_FAIL_OST_STATFS_EINPROGRESS 0x231
-#define OBD_FAIL_OST_SET_INFO_NET 0x232
-
-#define OBD_FAIL_LDLM 0x300
-#define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301
-#define OBD_FAIL_LDLM_ENQUEUE_NET 0x302
-#define OBD_FAIL_LDLM_CONVERT_NET 0x303
-#define OBD_FAIL_LDLM_CANCEL_NET 0x304
-#define OBD_FAIL_LDLM_BL_CALLBACK_NET 0x305
-#define OBD_FAIL_LDLM_CP_CALLBACK_NET 0x306
-#define OBD_FAIL_LDLM_GL_CALLBACK_NET 0x307
-#define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308
-#define OBD_FAIL_LDLM_ENQUEUE_INTENT_ERR 0x309
-#define OBD_FAIL_LDLM_CREATE_RESOURCE 0x30a
-#define OBD_FAIL_LDLM_ENQUEUE_BLOCKED 0x30b
-#define OBD_FAIL_LDLM_REPLY 0x30c
-#define OBD_FAIL_LDLM_RECOV_CLIENTS 0x30d
-#define OBD_FAIL_LDLM_ENQUEUE_OLD_EXPORT 0x30e
-#define OBD_FAIL_LDLM_GLIMPSE 0x30f
-#define OBD_FAIL_LDLM_CANCEL_RACE 0x310
-#define OBD_FAIL_LDLM_CANCEL_EVICT_RACE 0x311
-#define OBD_FAIL_LDLM_PAUSE_CANCEL 0x312
-#define OBD_FAIL_LDLM_CLOSE_THREAD 0x313
-#define OBD_FAIL_LDLM_CANCEL_BL_CB_RACE 0x314
-#define OBD_FAIL_LDLM_CP_CB_WAIT 0x315
-#define OBD_FAIL_LDLM_OST_FAIL_RACE 0x316
-#define OBD_FAIL_LDLM_INTR_CP_AST 0x317
-#define OBD_FAIL_LDLM_CP_BL_RACE 0x318
-#define OBD_FAIL_LDLM_NEW_LOCK 0x319
-#define OBD_FAIL_LDLM_AGL_DELAY 0x31a
-#define OBD_FAIL_LDLM_AGL_NOLOCK 0x31b
-#define OBD_FAIL_LDLM_OST_LVB 0x31c
-#define OBD_FAIL_LDLM_ENQUEUE_HANG 0x31d
-#define OBD_FAIL_LDLM_PAUSE_CANCEL2 0x31f
-#define OBD_FAIL_LDLM_CP_CB_WAIT2 0x320
-#define OBD_FAIL_LDLM_CP_CB_WAIT3 0x321
-#define OBD_FAIL_LDLM_CP_CB_WAIT4 0x322
-#define OBD_FAIL_LDLM_CP_CB_WAIT5 0x323
-
-#define OBD_FAIL_LDLM_GRANT_CHECK 0x32a
-
-/* LOCKLESS IO */
-#define OBD_FAIL_LDLM_SET_CONTENTION 0x385
-
-#define OBD_FAIL_OSC 0x400
-#define OBD_FAIL_OSC_BRW_READ_BULK 0x401
-#define OBD_FAIL_OSC_BRW_WRITE_BULK 0x402
-#define OBD_FAIL_OSC_LOCK_BL_AST 0x403
-#define OBD_FAIL_OSC_LOCK_CP_AST 0x404
-#define OBD_FAIL_OSC_MATCH 0x405
-#define OBD_FAIL_OSC_BRW_PREP_REQ 0x406
-#define OBD_FAIL_OSC_SHUTDOWN 0x407
-#define OBD_FAIL_OSC_CHECKSUM_RECEIVE 0x408
-#define OBD_FAIL_OSC_CHECKSUM_SEND 0x409
-#define OBD_FAIL_OSC_BRW_PREP_REQ2 0x40a
-#define OBD_FAIL_OSC_CONNECT_CKSUM 0x40b
-#define OBD_FAIL_OSC_CKSUM_ADLER_ONLY 0x40c
-#define OBD_FAIL_OSC_DIO_PAUSE 0x40d
-#define OBD_FAIL_OSC_OBJECT_CONTENTION 0x40e
-#define OBD_FAIL_OSC_CP_CANCEL_RACE 0x40f
-#define OBD_FAIL_OSC_CP_ENQ_RACE 0x410
-#define OBD_FAIL_OSC_NO_GRANT 0x411
-#define OBD_FAIL_OSC_DELAY_SETTIME 0x412
-#define OBD_FAIL_OSC_DELAY_IO 0x414
-
-#define OBD_FAIL_PTLRPC 0x500
-#define OBD_FAIL_PTLRPC_ACK 0x501
-#define OBD_FAIL_PTLRPC_RQBD 0x502
-#define OBD_FAIL_PTLRPC_BULK_GET_NET 0x503
-#define OBD_FAIL_PTLRPC_BULK_PUT_NET 0x504
-#define OBD_FAIL_PTLRPC_DROP_RPC 0x505
-#define OBD_FAIL_PTLRPC_DELAY_SEND 0x506
-#define OBD_FAIL_PTLRPC_DELAY_RECOV 0x507
-#define OBD_FAIL_PTLRPC_CLIENT_BULK_CB 0x508
-#define OBD_FAIL_PTLRPC_PAUSE_REQ 0x50a
-#define OBD_FAIL_PTLRPC_PAUSE_REP 0x50c
-#define OBD_FAIL_PTLRPC_IMP_DEACTIVE 0x50d
-#define OBD_FAIL_PTLRPC_DUMP_LOG 0x50e
-#define OBD_FAIL_PTLRPC_LONG_REPL_UNLINK 0x50f
-#define OBD_FAIL_PTLRPC_LONG_BULK_UNLINK 0x510
-#define OBD_FAIL_PTLRPC_HPREQ_TIMEOUT 0x511
-#define OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT 0x512
-#define OBD_FAIL_PTLRPC_DROP_REQ_OPC 0x513
-#define OBD_FAIL_PTLRPC_FINISH_REPLAY 0x514
-#define OBD_FAIL_PTLRPC_CLIENT_BULK_CB2 0x515
-#define OBD_FAIL_PTLRPC_DELAY_IMP_FULL 0x516
-#define OBD_FAIL_PTLRPC_CANCEL_RESEND 0x517
-#define OBD_FAIL_PTLRPC_DROP_BULK 0x51a
-#define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK 0x51b
-#define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c
-
-#define OBD_FAIL_OBD_PING_NET 0x600
-#define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601
-#define OBD_FAIL_OBD_LOGD_NET 0x602
-/* OBD_FAIL_OBD_QC_CALLBACK_NET 0x603 obsolete since 2.4 */
-#define OBD_FAIL_OBD_DQACQ 0x604
-#define OBD_FAIL_OBD_LLOG_SETUP 0x605
-#define OBD_FAIL_OBD_LOG_CANCEL_REP 0x606
-#define OBD_FAIL_OBD_IDX_READ_NET 0x607
-#define OBD_FAIL_OBD_IDX_READ_BREAK 0x608
-#define OBD_FAIL_OBD_NO_LRU 0x609
-
-#define OBD_FAIL_TGT_REPLY_NET 0x700
-#define OBD_FAIL_TGT_CONN_RACE 0x701
-#define OBD_FAIL_TGT_FORCE_RECONNECT 0x702
-#define OBD_FAIL_TGT_DELAY_CONNECT 0x703
-#define OBD_FAIL_TGT_DELAY_RECONNECT 0x704
-#define OBD_FAIL_TGT_DELAY_PRECREATE 0x705
-#define OBD_FAIL_TGT_TOOMANY_THREADS 0x706
-#define OBD_FAIL_TGT_REPLAY_DROP 0x707
-#define OBD_FAIL_TGT_FAKE_EXP 0x708
-#define OBD_FAIL_TGT_REPLAY_DELAY 0x709
-#define OBD_FAIL_TGT_LAST_REPLAY 0x710
-#define OBD_FAIL_TGT_CLIENT_ADD 0x711
-#define OBD_FAIL_TGT_RCVG_FLAG 0x712
-#define OBD_FAIL_TGT_DELAY_CONDITIONAL 0x713
-
-#define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800
-#define OBD_FAIL_MDC_ENQUEUE_PAUSE 0x801
-#define OBD_FAIL_MDC_OLD_EXT_FLAGS 0x802
-#define OBD_FAIL_MDC_GETATTR_ENQUEUE 0x803
-#define OBD_FAIL_MDC_RPCS_SEM 0x804
-#define OBD_FAIL_MDC_LIGHTWEIGHT 0x805
-#define OBD_FAIL_MDC_CLOSE 0x806
-
-#define OBD_FAIL_MGS 0x900
-#define OBD_FAIL_MGS_ALL_REQUEST_NET 0x901
-#define OBD_FAIL_MGS_ALL_REPLY_NET 0x902
-#define OBD_FAIL_MGC_PAUSE_PROCESS_LOG 0x903
-#define OBD_FAIL_MGS_PAUSE_REQ 0x904
-#define OBD_FAIL_MGS_PAUSE_TARGET_REG 0x905
-#define OBD_FAIL_MGS_CONNECT_NET 0x906
-#define OBD_FAIL_MGS_DISCONNECT_NET 0x907
-#define OBD_FAIL_MGS_SET_INFO_NET 0x908
-#define OBD_FAIL_MGS_EXCEPTION_NET 0x909
-#define OBD_FAIL_MGS_TARGET_REG_NET 0x90a
-#define OBD_FAIL_MGS_TARGET_DEL_NET 0x90b
-#define OBD_FAIL_MGS_CONFIG_READ_NET 0x90c
-
-#define OBD_FAIL_QUOTA_DQACQ_NET 0xA01
-#define OBD_FAIL_QUOTA_EDQUOT 0xA02
-#define OBD_FAIL_QUOTA_DELAY_REINT 0xA03
-#define OBD_FAIL_QUOTA_RECOVERABLE_ERR 0xA04
-
-#define OBD_FAIL_LPROC_REMOVE 0xB00
-
-#define OBD_FAIL_SEQ 0x1000
-#define OBD_FAIL_SEQ_QUERY_NET 0x1001
-#define OBD_FAIL_SEQ_EXHAUST 0x1002
-
-#define OBD_FAIL_FLD 0x1100
-#define OBD_FAIL_FLD_QUERY_NET 0x1101
-#define OBD_FAIL_FLD_READ_NET 0x1102
-
-#define OBD_FAIL_SEC_CTX 0x1200
-#define OBD_FAIL_SEC_CTX_INIT_NET 0x1201
-#define OBD_FAIL_SEC_CTX_INIT_CONT_NET 0x1202
-#define OBD_FAIL_SEC_CTX_FINI_NET 0x1203
-#define OBD_FAIL_SEC_CTX_HDL_PAUSE 0x1204
-
-#define OBD_FAIL_LLOG 0x1300
-#define OBD_FAIL_LLOG_ORIGIN_CONNECT_NET 0x1301
-#define OBD_FAIL_LLOG_ORIGIN_HANDLE_CREATE_NET 0x1302
-#define OBD_FAIL_LLOG_ORIGIN_HANDLE_DESTROY_NET 0x1303
-#define OBD_FAIL_LLOG_ORIGIN_HANDLE_READ_HEADER_NET 0x1304
-#define OBD_FAIL_LLOG_ORIGIN_HANDLE_NEXT_BLOCK_NET 0x1305
-#define OBD_FAIL_LLOG_ORIGIN_HANDLE_PREV_BLOCK_NET 0x1306
-#define OBD_FAIL_LLOG_ORIGIN_HANDLE_WRITE_REC_NET 0x1307
-#define OBD_FAIL_LLOG_ORIGIN_HANDLE_CLOSE_NET 0x1308
-#define OBD_FAIL_LLOG_CATINFO_NET 0x1309
-#define OBD_FAIL_MDS_SYNC_CAPA_SL 0x1310
-#define OBD_FAIL_SEQ_ALLOC 0x1311
-
-#define OBD_FAIL_LLITE 0x1400
-#define OBD_FAIL_LLITE_FAULT_TRUNC_RACE 0x1401
-#define OBD_FAIL_LOCK_STATE_WAIT_INTR 0x1402
-#define OBD_FAIL_LOV_INIT 0x1403
-#define OBD_FAIL_GLIMPSE_DELAY 0x1404
-#define OBD_FAIL_LLITE_XATTR_ENOMEM 0x1405
-#define OBD_FAIL_MAKE_LOVEA_HOLE 0x1406
-#define OBD_FAIL_LLITE_LOST_LAYOUT 0x1407
-#define OBD_FAIL_GETATTR_DELAY 0x1409
-
-#define OBD_FAIL_FID_INDIR 0x1501
-#define OBD_FAIL_FID_INLMA 0x1502
-#define OBD_FAIL_FID_IGIF 0x1504
-#define OBD_FAIL_FID_LOOKUP 0x1505
-#define OBD_FAIL_FID_NOLMA 0x1506
-
-/* LFSCK */
-#define OBD_FAIL_LFSCK_DELAY1 0x1600
-#define OBD_FAIL_LFSCK_DELAY2 0x1601
-#define OBD_FAIL_LFSCK_DELAY3 0x1602
-#define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
-#define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
-#define OBD_FAIL_LFSCK_LINKEA_MORE2 0x1605
-#define OBD_FAIL_LFSCK_FATAL1 0x1608
-#define OBD_FAIL_LFSCK_FATAL2 0x1609
-#define OBD_FAIL_LFSCK_CRASH 0x160a
-#define OBD_FAIL_LFSCK_NO_AUTO 0x160b
-#define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c
-#define OBD_FAIL_LFSCK_INVALID_PFID 0x1619
-#define OBD_FAIL_LFSCK_BAD_NAME_HASH 0x1628
-
-/* UPDATE */
-#define OBD_FAIL_UPDATE_OBJ_NET 0x1700
-#define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
-
-/* LMV */
-#define OBD_FAIL_UNKNOWN_LMV_STRIPE 0x1901
-
-/* Assign references to moved code to reduce code changes */
-#define OBD_FAIL_PRECHECK(id) CFS_FAIL_PRECHECK(id)
-#define OBD_FAIL_CHECK(id) CFS_FAIL_CHECK(id)
-#define OBD_FAIL_CHECK_VALUE(id, value) CFS_FAIL_CHECK_VALUE(id, value)
-#define OBD_FAIL_CHECK_ORSET(id, value) CFS_FAIL_CHECK_ORSET(id, value)
-#define OBD_FAIL_CHECK_RESET(id, value) CFS_FAIL_CHECK_RESET(id, value)
-#define OBD_FAIL_RETURN(id, ret) CFS_FAIL_RETURN(id, ret)
-#define OBD_FAIL_TIMEOUT(id, secs) CFS_FAIL_TIMEOUT(id, secs)
-#define OBD_FAIL_TIMEOUT_MS(id, ms) CFS_FAIL_TIMEOUT_MS(id, ms)
-#define OBD_FAIL_TIMEOUT_ORSET(id, value, secs) CFS_FAIL_TIMEOUT_ORSET(id, value, secs)
-#define OBD_RACE(id) CFS_RACE(id)
-#define OBD_FAIL_ONCE CFS_FAIL_ONCE
-#define OBD_FAILED CFS_FAILED
-
-#ifdef CONFIG_DEBUG_SLAB
-#define POISON(ptr, c, s) do {} while (0)
-#define POISON_PTR(ptr) ((void)0)
-#else
-#define POISON(ptr, c, s) memset(ptr, c, s)
-#define POISON_PTR(ptr) ((ptr) = (void *)0xdeadbeef)
-#endif
-
-#ifdef POISON_BULK
-#define POISON_PAGE(page, val) do { \
- memset(kmap(page), val, PAGE_SIZE); \
- kunmap(page); \
-} while (0)
-#else
-#define POISON_PAGE(page, val) do { } while (0)
-#endif
-
-#define OBD_FREE_RCU(ptr, size, handle) \
-do { \
- struct portals_handle *__h = (handle); \
- \
- __h->h_cookie = (unsigned long)(ptr); \
- __h->h_size = (size); \
- call_rcu(&__h->h_rcu, class_handle_free_cb); \
- POISON_PTR(ptr); \
-} while (0)
-
-#define KEY_IS(str) \
- (keylen >= (sizeof(str) - 1) && \
- memcmp(key, str, (sizeof(str) - 1)) == 0)
-
-#endif
diff --git a/drivers/staging/lustre/lustre/include/seq_range.h b/drivers/staging/lustre/lustre/include/seq_range.h
deleted file mode 100644
index 9450da728160..000000000000
--- a/drivers/staging/lustre/lustre/include/seq_range.h
+++ /dev/null
@@ -1,200 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2014, Intel Corporation.
- *
- * Copyright 2015 Cray Inc, all rights reserved.
- * Author: Ben Evans.
- *
- * Define lu_seq_range associated functions
- */
-
-#ifndef _SEQ_RANGE_H_
-#define _SEQ_RANGE_H_
-
-#include <uapi/linux/lustre/lustre_idl.h>
-
-/**
- * computes the sequence range type \a range
- */
-
-static inline unsigned int fld_range_type(const struct lu_seq_range *range)
-{
- return range->lsr_flags & LU_SEQ_RANGE_MASK;
-}
-
-/**
- * Is this sequence range an OST? \a range
- */
-
-static inline bool fld_range_is_ost(const struct lu_seq_range *range)
-{
- return fld_range_type(range) == LU_SEQ_RANGE_OST;
-}
-
-/**
- * Is this sequence range an MDT? \a range
- */
-
-static inline bool fld_range_is_mdt(const struct lu_seq_range *range)
-{
- return fld_range_type(range) == LU_SEQ_RANGE_MDT;
-}
-
-/**
- * ANY range is only used when the fld client sends a fld query request,
- * but it does not know whether the seq is an MDT or OST, so it will send the
- * request with ANY type, which means any seq type from the lookup can be
- * expected. /a range
- */
-static inline unsigned int fld_range_is_any(const struct lu_seq_range *range)
-{
- return fld_range_type(range) == LU_SEQ_RANGE_ANY;
-}
-
-/**
- * Apply flags to range \a range \a flags
- */
-
-static inline void fld_range_set_type(struct lu_seq_range *range,
- unsigned int flags)
-{
- range->lsr_flags |= flags;
-}
-
-/**
- * Add MDT to range type \a range
- */
-
-static inline void fld_range_set_mdt(struct lu_seq_range *range)
-{
- fld_range_set_type(range, LU_SEQ_RANGE_MDT);
-}
-
-/**
- * Add OST to range type \a range
- */
-
-static inline void fld_range_set_ost(struct lu_seq_range *range)
-{
- fld_range_set_type(range, LU_SEQ_RANGE_OST);
-}
-
-/**
- * Add ANY to range type \a range
- */
-
-static inline void fld_range_set_any(struct lu_seq_range *range)
-{
- fld_range_set_type(range, LU_SEQ_RANGE_ANY);
-}
-
-/**
- * computes width of given sequence range \a range
- */
-
-static inline u64 lu_seq_range_space(const struct lu_seq_range *range)
-{
- return range->lsr_end - range->lsr_start;
-}
-
-/**
- * initialize range to zero \a range
- */
-
-static inline void lu_seq_range_init(struct lu_seq_range *range)
-{
- memset(range, 0, sizeof(*range));
-}
-
-/**
- * check if given seq id \a s is within given range \a range
- */
-
-static inline bool lu_seq_range_within(const struct lu_seq_range *range,
- u64 seq)
-{
- return seq >= range->lsr_start && seq < range->lsr_end;
-}
-
-/**
- * Is the range sane? Is the end after the beginning? \a range
- */
-
-static inline bool lu_seq_range_is_sane(const struct lu_seq_range *range)
-{
- return range->lsr_end >= range->lsr_start;
-}
-
-/**
- * Is the range 0? \a range
- */
-
-static inline bool lu_seq_range_is_zero(const struct lu_seq_range *range)
-{
- return range->lsr_start == 0 && range->lsr_end == 0;
-}
-
-/**
- * Is the range out of space? \a range
- */
-
-static inline bool lu_seq_range_is_exhausted(const struct lu_seq_range *range)
-{
- return lu_seq_range_space(range) == 0;
-}
-
-/**
- * return 0 if two ranges have the same location, nonzero if they are
- * different \a r1 \a r2
- */
-
-static inline int lu_seq_range_compare_loc(const struct lu_seq_range *r1,
- const struct lu_seq_range *r2)
-{
- return r1->lsr_index != r2->lsr_index ||
- r1->lsr_flags != r2->lsr_flags;
-}
-
-#if !defined(__REQ_LAYOUT_USER__)
-/**
- * byte swap range structure \a range
- */
-
-void lustre_swab_lu_seq_range(struct lu_seq_range *range);
-#endif
-/**
- * printf string and argument list for sequence range
- */
-#define DRANGE "[%#16.16llx-%#16.16llx]:%x:%s"
-
-#define PRANGE(range) \
- (range)->lsr_start, \
- (range)->lsr_end, \
- (range)->lsr_index, \
- fld_range_is_mdt(range) ? "mdt" : "ost"
-
-#endif
diff --git a/drivers/staging/lustre/lustre/ldlm/interval_tree.c b/drivers/staging/lustre/lustre/ldlm/interval_tree.c
deleted file mode 100644
index 8df7a4463c21..000000000000
--- a/drivers/staging/lustre/lustre/ldlm/interval_tree.c
+++ /dev/null
@@ -1,599 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/ldlm/interval_tree.c
- *
- * Interval tree library used by ldlm extent lock code
- *
- * Author: Huang Wei <huangwei@clusterfs.com>
- * Author: Jay Xiong <jinshan.xiong@sun.com>
- */
-#include <lustre_dlm.h>
-#include <obd_support.h>
-#include <interval_tree.h>
-
-enum {
- INTERVAL_RED = 0,
- INTERVAL_BLACK = 1
-};
-
-static inline int node_is_left_child(struct interval_node *node)
-{
- return node == node->in_parent->in_left;
-}
-
-static inline int node_is_right_child(struct interval_node *node)
-{
- return node == node->in_parent->in_right;
-}
-
-static inline int node_is_red(struct interval_node *node)
-{
- return node->in_color == INTERVAL_RED;
-}
-
-static inline int node_is_black(struct interval_node *node)
-{
- return node->in_color == INTERVAL_BLACK;
-}
-
-static inline int extent_compare(struct interval_node_extent *e1,
- struct interval_node_extent *e2)
-{
- int rc;
-
- if (e1->start == e2->start) {
- if (e1->end < e2->end)
- rc = -1;
- else if (e1->end > e2->end)
- rc = 1;
- else
- rc = 0;
- } else {
- if (e1->start < e2->start)
- rc = -1;
- else
- rc = 1;
- }
- return rc;
-}
-
-static inline int extent_equal(struct interval_node_extent *e1,
- struct interval_node_extent *e2)
-{
- return (e1->start == e2->start) && (e1->end == e2->end);
-}
-
-static inline int extent_overlapped(struct interval_node_extent *e1,
- struct interval_node_extent *e2)
-{
- return (e1->start <= e2->end) && (e2->start <= e1->end);
-}
-
-static inline int node_equal(struct interval_node *n1, struct interval_node *n2)
-{
- return extent_equal(&n1->in_extent, &n2->in_extent);
-}
-
-static struct interval_node *interval_first(struct interval_node *node)
-{
- if (!node)
- return NULL;
- while (node->in_left)
- node = node->in_left;
- return node;
-}
-
-static struct interval_node *interval_last(struct interval_node *node)
-{
- if (!node)
- return NULL;
- while (node->in_right)
- node = node->in_right;
- return node;
-}
-
-static struct interval_node *interval_next(struct interval_node *node)
-{
- if (!node)
- return NULL;
- if (node->in_right)
- return interval_first(node->in_right);
- while (node->in_parent && node_is_right_child(node))
- node = node->in_parent;
- return node->in_parent;
-}
-
-static struct interval_node *interval_prev(struct interval_node *node)
-{
- if (!node)
- return NULL;
-
- if (node->in_left)
- return interval_last(node->in_left);
-
- while (node->in_parent && node_is_left_child(node))
- node = node->in_parent;
-
- return node->in_parent;
-}
-
-enum interval_iter interval_iterate_reverse(struct interval_node *root,
- interval_callback_t func,
- void *data)
-{
- enum interval_iter rc = INTERVAL_ITER_CONT;
- struct interval_node *node;
-
- for (node = interval_last(root); node; node = interval_prev(node)) {
- rc = func(node, data);
- if (rc == INTERVAL_ITER_STOP)
- break;
- }
-
- return rc;
-}
-EXPORT_SYMBOL(interval_iterate_reverse);
-
-static void __rotate_change_maxhigh(struct interval_node *node,
- struct interval_node *rotate)
-{
- __u64 left_max, right_max;
-
- rotate->in_max_high = node->in_max_high;
- left_max = node->in_left ? node->in_left->in_max_high : 0;
- right_max = node->in_right ? node->in_right->in_max_high : 0;
- node->in_max_high = max(interval_high(node),
- max(left_max, right_max));
-}
-
-/* The left rotation "pivots" around the link from node to node->right, and
- * - node will be linked to node->right's left child, and
- * - node->right's left child will be linked to node's right child.
- */
-static void __rotate_left(struct interval_node *node,
- struct interval_node **root)
-{
- struct interval_node *right = node->in_right;
- struct interval_node *parent = node->in_parent;
-
- node->in_right = right->in_left;
- if (node->in_right)
- right->in_left->in_parent = node;
-
- right->in_left = node;
- right->in_parent = parent;
- if (parent) {
- if (node_is_left_child(node))
- parent->in_left = right;
- else
- parent->in_right = right;
- } else {
- *root = right;
- }
- node->in_parent = right;
-
- /* update max_high for node and right */
- __rotate_change_maxhigh(node, right);
-}
-
-/* The right rotation "pivots" around the link from node to node->left, and
- * - node will be linked to node->left's right child, and
- * - node->left's right child will be linked to node's left child.
- */
-static void __rotate_right(struct interval_node *node,
- struct interval_node **root)
-{
- struct interval_node *left = node->in_left;
- struct interval_node *parent = node->in_parent;
-
- node->in_left = left->in_right;
- if (node->in_left)
- left->in_right->in_parent = node;
- left->in_right = node;
-
- left->in_parent = parent;
- if (parent) {
- if (node_is_right_child(node))
- parent->in_right = left;
- else
- parent->in_left = left;
- } else {
- *root = left;
- }
- node->in_parent = left;
-
- /* update max_high for node and left */
- __rotate_change_maxhigh(node, left);
-}
-
-#define interval_swap(a, b) do { \
- struct interval_node *c = a; a = b; b = c; \
-} while (0)
-
-/*
- * Operations INSERT and DELETE, when run on a tree with n keys,
- * take O(logN) time.Because they modify the tree, the result
- * may violate the red-black properties.To restore these properties,
- * we must change the colors of some of the nodes in the tree
- * and also change the pointer structure.
- */
-static void interval_insert_color(struct interval_node *node,
- struct interval_node **root)
-{
- struct interval_node *parent, *gparent;
-
- while ((parent = node->in_parent) && node_is_red(parent)) {
- gparent = parent->in_parent;
- /* Parent is RED, so gparent must not be NULL */
- if (node_is_left_child(parent)) {
- struct interval_node *uncle;
-
- uncle = gparent->in_right;
- if (uncle && node_is_red(uncle)) {
- uncle->in_color = INTERVAL_BLACK;
- parent->in_color = INTERVAL_BLACK;
- gparent->in_color = INTERVAL_RED;
- node = gparent;
- continue;
- }
-
- if (parent->in_right == node) {
- __rotate_left(parent, root);
- interval_swap(node, parent);
- }
-
- parent->in_color = INTERVAL_BLACK;
- gparent->in_color = INTERVAL_RED;
- __rotate_right(gparent, root);
- } else {
- struct interval_node *uncle;
-
- uncle = gparent->in_left;
- if (uncle && node_is_red(uncle)) {
- uncle->in_color = INTERVAL_BLACK;
- parent->in_color = INTERVAL_BLACK;
- gparent->in_color = INTERVAL_RED;
- node = gparent;
- continue;
- }
-
- if (node_is_left_child(node)) {
- __rotate_right(parent, root);
- interval_swap(node, parent);
- }
-
- parent->in_color = INTERVAL_BLACK;
- gparent->in_color = INTERVAL_RED;
- __rotate_left(gparent, root);
- }
- }
-
- (*root)->in_color = INTERVAL_BLACK;
-}
-
-struct interval_node *interval_insert(struct interval_node *node,
- struct interval_node **root)
-
-{
- struct interval_node **p, *parent = NULL;
-
- LASSERT(!interval_is_intree(node));
- p = root;
- while (*p) {
- parent = *p;
- if (node_equal(parent, node))
- return parent;
-
- /* max_high field must be updated after each iteration */
- if (parent->in_max_high < interval_high(node))
- parent->in_max_high = interval_high(node);
-
- if (extent_compare(&node->in_extent, &parent->in_extent) < 0)
- p = &parent->in_left;
- else
- p = &parent->in_right;
- }
-
- /* link node into the tree */
- node->in_parent = parent;
- node->in_color = INTERVAL_RED;
- node->in_left = NULL;
- node->in_right = NULL;
- *p = node;
-
- interval_insert_color(node, root);
- node->in_intree = 1;
-
- return NULL;
-}
-EXPORT_SYMBOL(interval_insert);
-
-static inline int node_is_black_or_0(struct interval_node *node)
-{
- return !node || node_is_black(node);
-}
-
-static void interval_erase_color(struct interval_node *node,
- struct interval_node *parent,
- struct interval_node **root)
-{
- struct interval_node *tmp;
-
- while (node_is_black_or_0(node) && node != *root) {
- if (parent->in_left == node) {
- tmp = parent->in_right;
- if (node_is_red(tmp)) {
- tmp->in_color = INTERVAL_BLACK;
- parent->in_color = INTERVAL_RED;
- __rotate_left(parent, root);
- tmp = parent->in_right;
- }
- if (node_is_black_or_0(tmp->in_left) &&
- node_is_black_or_0(tmp->in_right)) {
- tmp->in_color = INTERVAL_RED;
- node = parent;
- parent = node->in_parent;
- } else {
- if (node_is_black_or_0(tmp->in_right)) {
- struct interval_node *o_left;
-
- o_left = tmp->in_left;
- if (o_left)
- o_left->in_color = INTERVAL_BLACK;
- tmp->in_color = INTERVAL_RED;
- __rotate_right(tmp, root);
- tmp = parent->in_right;
- }
- tmp->in_color = parent->in_color;
- parent->in_color = INTERVAL_BLACK;
- if (tmp->in_right)
- tmp->in_right->in_color = INTERVAL_BLACK;
- __rotate_left(parent, root);
- node = *root;
- break;
- }
- } else {
- tmp = parent->in_left;
- if (node_is_red(tmp)) {
- tmp->in_color = INTERVAL_BLACK;
- parent->in_color = INTERVAL_RED;
- __rotate_right(parent, root);
- tmp = parent->in_left;
- }
- if (node_is_black_or_0(tmp->in_left) &&
- node_is_black_or_0(tmp->in_right)) {
- tmp->in_color = INTERVAL_RED;
- node = parent;
- parent = node->in_parent;
- } else {
- if (node_is_black_or_0(tmp->in_left)) {
- struct interval_node *o_right;
-
- o_right = tmp->in_right;
- if (o_right)
- o_right->in_color = INTERVAL_BLACK;
- tmp->in_color = INTERVAL_RED;
- __rotate_left(tmp, root);
- tmp = parent->in_left;
- }
- tmp->in_color = parent->in_color;
- parent->in_color = INTERVAL_BLACK;
- if (tmp->in_left)
- tmp->in_left->in_color = INTERVAL_BLACK;
- __rotate_right(parent, root);
- node = *root;
- break;
- }
- }
- }
- if (node)
- node->in_color = INTERVAL_BLACK;
-}
-
-/*
- * if the @max_high value of @node is changed, this function traverse a path
- * from node up to the root to update max_high for the whole tree.
- */
-static void update_maxhigh(struct interval_node *node,
- __u64 old_maxhigh)
-{
- __u64 left_max, right_max;
-
- while (node) {
- left_max = node->in_left ? node->in_left->in_max_high : 0;
- right_max = node->in_right ? node->in_right->in_max_high : 0;
- node->in_max_high = max(interval_high(node),
- max(left_max, right_max));
-
- if (node->in_max_high >= old_maxhigh)
- break;
- node = node->in_parent;
- }
-}
-
-void interval_erase(struct interval_node *node,
- struct interval_node **root)
-{
- struct interval_node *child, *parent;
- int color;
-
- LASSERT(interval_is_intree(node));
- node->in_intree = 0;
- if (!node->in_left) {
- child = node->in_right;
- } else if (!node->in_right) {
- child = node->in_left;
- } else { /* Both left and right child are not NULL */
- struct interval_node *old = node;
-
- node = interval_next(node);
- child = node->in_right;
- parent = node->in_parent;
- color = node->in_color;
-
- if (child)
- child->in_parent = parent;
- if (parent == old)
- parent->in_right = child;
- else
- parent->in_left = child;
-
- node->in_color = old->in_color;
- node->in_right = old->in_right;
- node->in_left = old->in_left;
- node->in_parent = old->in_parent;
-
- if (old->in_parent) {
- if (node_is_left_child(old))
- old->in_parent->in_left = node;
- else
- old->in_parent->in_right = node;
- } else {
- *root = node;
- }
-
- old->in_left->in_parent = node;
- if (old->in_right)
- old->in_right->in_parent = node;
- update_maxhigh(child ? : parent, node->in_max_high);
- update_maxhigh(node, old->in_max_high);
- if (parent == old)
- parent = node;
- goto color;
- }
- parent = node->in_parent;
- color = node->in_color;
-
- if (child)
- child->in_parent = parent;
- if (parent) {
- if (node_is_left_child(node))
- parent->in_left = child;
- else
- parent->in_right = child;
- } else {
- *root = child;
- }
-
- update_maxhigh(child ? : parent, node->in_max_high);
-
-color:
- if (color == INTERVAL_BLACK)
- interval_erase_color(child, parent, root);
-}
-EXPORT_SYMBOL(interval_erase);
-
-static inline int interval_may_overlap(struct interval_node *node,
- struct interval_node_extent *ext)
-{
- return (ext->start <= node->in_max_high &&
- ext->end >= interval_low(node));
-}
-
-/*
- * This function finds all intervals that overlap interval ext,
- * and calls func to handle resulted intervals one by one.
- * in lustre, this function will find all conflicting locks in
- * the granted queue and add these locks to the ast work list.
- *
- * {
- * if (!node)
- * return 0;
- * if (ext->end < interval_low(node)) {
- * interval_search(node->in_left, ext, func, data);
- * } else if (interval_may_overlap(node, ext)) {
- * if (extent_overlapped(ext, &node->in_extent))
- * func(node, data);
- * interval_search(node->in_left, ext, func, data);
- * interval_search(node->in_right, ext, func, data);
- * }
- * return 0;
- * }
- *
- */
-enum interval_iter interval_search(struct interval_node *node,
- struct interval_node_extent *ext,
- interval_callback_t func,
- void *data)
-{
- enum interval_iter rc = INTERVAL_ITER_CONT;
- struct interval_node *parent;
-
- LASSERT(ext);
- LASSERT(func);
-
- while (node) {
- if (ext->end < interval_low(node)) {
- if (node->in_left) {
- node = node->in_left;
- continue;
- }
- } else if (interval_may_overlap(node, ext)) {
- if (extent_overlapped(ext, &node->in_extent)) {
- rc = func(node, data);
- if (rc == INTERVAL_ITER_STOP)
- break;
- }
-
- if (node->in_left) {
- node = node->in_left;
- continue;
- }
- if (node->in_right) {
- node = node->in_right;
- continue;
- }
- }
-
- parent = node->in_parent;
- while (parent) {
- if (node_is_left_child(node) &&
- parent->in_right) {
- /*
- * If we ever got the left, it means that the
- * parent met ext->end<interval_low(parent), or
- * may_overlap(parent). If the former is true,
- * we needn't go back. So stop early and check
- * may_overlap(parent) after this loop.
- */
- node = parent->in_right;
- break;
- }
- node = parent;
- parent = parent->in_parent;
- }
- if (!parent || !interval_may_overlap(parent, ext))
- break;
- }
-
- return rc;
-}
-EXPORT_SYMBOL(interval_search);
diff --git a/drivers/staging/lustre/lustre/ldlm/l_lock.c b/drivers/staging/lustre/lustre/ldlm/l_lock.c
deleted file mode 100644
index 296259aa51e6..000000000000
--- a/drivers/staging/lustre/lustre/ldlm/l_lock.c
+++ /dev/null
@@ -1,73 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LDLM
-
-#include <lustre_dlm.h>
-#include <lustre_lib.h>
-
-/**
- * Lock a lock and its resource.
- *
- * LDLM locking uses resource to serialize access to locks
- * but there is a case when we change resource of lock upon
- * enqueue reply. We rely on lock->l_resource = new_res
- * being an atomic operation.
- */
-struct ldlm_resource *lock_res_and_lock(struct ldlm_lock *lock)
- __acquires(&lock->l_lock)
- __acquires(&lock->l_resource->lr_lock)
-{
- spin_lock(&lock->l_lock);
-
- lock_res(lock->l_resource);
-
- ldlm_set_res_locked(lock);
- return lock->l_resource;
-}
-EXPORT_SYMBOL(lock_res_and_lock);
-
-/**
- * Unlock a lock and its resource previously locked with lock_res_and_lock
- */
-void unlock_res_and_lock(struct ldlm_lock *lock)
- __releases(&lock->l_resource->lr_lock)
- __releases(&lock->l_lock)
-{
- /* on server-side resource of lock doesn't change */
- ldlm_clear_res_locked(lock);
-
- unlock_res(lock->l_resource);
- spin_unlock(&lock->l_lock);
-}
-EXPORT_SYMBOL(unlock_res_and_lock);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
deleted file mode 100644
index 4da23ade2bb3..000000000000
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
+++ /dev/null
@@ -1,258 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2010, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/ldlm/ldlm_extent.c
- *
- * Author: Peter Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- */
-
-/**
- * This file contains implementation of EXTENT lock type
- *
- * EXTENT lock type is for locking a contiguous range of values, represented
- * by 64-bit starting and ending offsets (inclusive). There are several extent
- * lock modes, some of which may be mutually incompatible. Extent locks are
- * considered incompatible if their modes are incompatible and their extents
- * intersect. See the lock mode compatibility matrix in lustre_dlm.h.
- */
-
-#define DEBUG_SUBSYSTEM S_LDLM
-#include <lustre_dlm.h>
-#include <obd_support.h>
-#include <obd.h>
-#include <obd_class.h>
-#include <lustre_lib.h>
-#include "ldlm_internal.h"
-
-/* When a lock is cancelled by a client, the KMS may undergo change if this
- * is the "highest lock". This function returns the new KMS value.
- * Caller must hold lr_lock already.
- *
- * NB: A lock on [x,y] protects a KMS of up to y + 1 bytes!
- */
-__u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms)
-{
- struct ldlm_resource *res = lock->l_resource;
- struct ldlm_lock *lck;
- __u64 kms = 0;
-
- /* don't let another thread in ldlm_extent_shift_kms race in
- * just after we finish and take our lock into account in its
- * calculation of the kms
- */
- ldlm_set_kms_ignore(lock);
-
- list_for_each_entry(lck, &res->lr_granted, l_res_link) {
-
- if (ldlm_is_kms_ignore(lck))
- continue;
-
- if (lck->l_policy_data.l_extent.end >= old_kms)
- return old_kms;
-
- /* This extent _has_ to be smaller than old_kms (checked above)
- * so kms can only ever be smaller or the same as old_kms.
- */
- if (lck->l_policy_data.l_extent.end + 1 > kms)
- kms = lck->l_policy_data.l_extent.end + 1;
- }
- LASSERTF(kms <= old_kms, "kms %llu old_kms %llu\n", kms, old_kms);
-
- return kms;
-}
-EXPORT_SYMBOL(ldlm_extent_shift_kms);
-
-struct kmem_cache *ldlm_interval_slab;
-
-/* interval tree, for LDLM_EXTENT. */
-static void ldlm_interval_attach(struct ldlm_interval *n, struct ldlm_lock *l)
-{
- LASSERT(!l->l_tree_node);
- LASSERT(l->l_resource->lr_type == LDLM_EXTENT);
-
- list_add_tail(&l->l_sl_policy, &n->li_group);
- l->l_tree_node = n;
-}
-
-struct ldlm_interval *ldlm_interval_alloc(struct ldlm_lock *lock)
-{
- struct ldlm_interval *node;
-
- LASSERT(lock->l_resource->lr_type == LDLM_EXTENT);
- node = kmem_cache_zalloc(ldlm_interval_slab, GFP_NOFS);
- if (!node)
- return NULL;
-
- INIT_LIST_HEAD(&node->li_group);
- ldlm_interval_attach(node, lock);
- return node;
-}
-
-void ldlm_interval_free(struct ldlm_interval *node)
-{
- if (node) {
- LASSERT(list_empty(&node->li_group));
- LASSERT(!interval_is_intree(&node->li_node));
- kmem_cache_free(ldlm_interval_slab, node);
- }
-}
-
-struct ldlm_interval *ldlm_interval_detach(struct ldlm_lock *l)
-{
- struct ldlm_interval *n = l->l_tree_node;
-
- if (!n)
- return NULL;
-
- LASSERT(!list_empty(&n->li_group));
- l->l_tree_node = NULL;
- list_del_init(&l->l_sl_policy);
-
- return list_empty(&n->li_group) ? n : NULL;
-}
-
-static inline int lock_mode_to_index(enum ldlm_mode mode)
-{
- int index;
-
- LASSERT(mode != 0);
- LASSERT(is_power_of_2(mode));
- for (index = -1; mode; index++)
- mode >>= 1;
- LASSERT(index < LCK_MODE_NUM);
- return index;
-}
-
-/** Add newly granted lock into interval tree for the resource. */
-void ldlm_extent_add_lock(struct ldlm_resource *res,
- struct ldlm_lock *lock)
-{
- struct interval_node *found, **root;
- struct ldlm_interval *node;
- struct ldlm_extent *extent;
- int idx, rc;
-
- LASSERT(lock->l_granted_mode == lock->l_req_mode);
-
- node = lock->l_tree_node;
- LASSERT(node);
- LASSERT(!interval_is_intree(&node->li_node));
-
- idx = lock_mode_to_index(lock->l_granted_mode);
- LASSERT(lock->l_granted_mode == 1 << idx);
- LASSERT(lock->l_granted_mode == res->lr_itree[idx].lit_mode);
-
- /* node extent initialize */
- extent = &lock->l_policy_data.l_extent;
- rc = interval_set(&node->li_node, extent->start, extent->end);
- LASSERT(!rc);
-
- root = &res->lr_itree[idx].lit_root;
- found = interval_insert(&node->li_node, root);
- if (found) { /* The policy group found. */
- struct ldlm_interval *tmp;
-
- tmp = ldlm_interval_detach(lock);
- ldlm_interval_free(tmp);
- ldlm_interval_attach(to_ldlm_interval(found), lock);
- }
- res->lr_itree[idx].lit_size++;
-
- /* even though we use interval tree to manage the extent lock, we also
- * add the locks into grant list, for debug purpose, ..
- */
- ldlm_resource_add_lock(res, &res->lr_granted, lock);
-
- if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_GRANT_CHECK)) {
- struct ldlm_lock *lck;
-
- list_for_each_entry_reverse(lck, &res->lr_granted,
- l_res_link) {
- if (lck == lock)
- continue;
- if (lockmode_compat(lck->l_granted_mode,
- lock->l_granted_mode))
- continue;
- if (ldlm_extent_overlap(&lck->l_req_extent,
- &lock->l_req_extent)) {
- CDEBUG(D_ERROR,
- "granting conflicting lock %p %p\n",
- lck, lock);
- ldlm_resource_dump(D_ERROR, res);
- LBUG();
- }
- }
- }
-}
-
-/** Remove cancelled lock from resource interval tree. */
-void ldlm_extent_unlink_lock(struct ldlm_lock *lock)
-{
- struct ldlm_resource *res = lock->l_resource;
- struct ldlm_interval *node = lock->l_tree_node;
- struct ldlm_interval_tree *tree;
- int idx;
-
- if (!node || !interval_is_intree(&node->li_node)) /* duplicate unlink */
- return;
-
- idx = lock_mode_to_index(lock->l_granted_mode);
- LASSERT(lock->l_granted_mode == 1 << idx);
- tree = &res->lr_itree[idx];
-
- LASSERT(tree->lit_root); /* assure the tree is not null */
-
- tree->lit_size--;
- node = ldlm_interval_detach(lock);
- if (node) {
- interval_erase(&node->li_node, &tree->lit_root);
- ldlm_interval_free(node);
- }
-}
-
-void ldlm_extent_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy,
- union ldlm_policy_data *lpolicy)
-{
- lpolicy->l_extent.start = wpolicy->l_extent.start;
- lpolicy->l_extent.end = wpolicy->l_extent.end;
- lpolicy->l_extent.gid = wpolicy->l_extent.gid;
-}
-
-void ldlm_extent_policy_local_to_wire(const union ldlm_policy_data *lpolicy,
- union ldlm_wire_policy_data *wpolicy)
-{
- memset(wpolicy, 0, sizeof(*wpolicy));
- wpolicy->l_extent.start = lpolicy->l_extent.start;
- wpolicy->l_extent.end = lpolicy->l_extent.end;
- wpolicy->l_extent.gid = lpolicy->l_extent.gid;
-}
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
deleted file mode 100644
index 94f3b1e49896..000000000000
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
+++ /dev/null
@@ -1,486 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003 Hewlett-Packard Development Company LP.
- * Developed under the sponsorship of the US Government under
- * Subcontract No. B514193
- *
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2010, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-/**
- * This file implements POSIX lock type for Lustre.
- * Its policy properties are start and end of extent and PID.
- *
- * These locks are only done through MDS due to POSIX semantics requiring
- * e.g. that locks could be only partially released and as such split into
- * two parts, and also that two adjacent locks from the same process may be
- * merged into a single wider lock.
- *
- * Lock modes are mapped like this:
- * PR and PW for READ and WRITE locks
- * NL to request a releasing of a portion of the lock
- *
- * These flock locks never timeout.
- */
-
-#define DEBUG_SUBSYSTEM S_LDLM
-
-#include <lustre_dlm.h>
-#include <obd_support.h>
-#include <obd_class.h>
-#include <lustre_lib.h>
-#include <linux/list.h>
-#include "ldlm_internal.h"
-
-static inline int
-ldlm_same_flock_owner(struct ldlm_lock *lock, struct ldlm_lock *new)
-{
- return((new->l_policy_data.l_flock.owner ==
- lock->l_policy_data.l_flock.owner) &&
- (new->l_export == lock->l_export));
-}
-
-static inline int
-ldlm_flocks_overlap(struct ldlm_lock *lock, struct ldlm_lock *new)
-{
- return((new->l_policy_data.l_flock.start <=
- lock->l_policy_data.l_flock.end) &&
- (new->l_policy_data.l_flock.end >=
- lock->l_policy_data.l_flock.start));
-}
-
-static inline void
-ldlm_flock_destroy(struct ldlm_lock *lock, enum ldlm_mode mode)
-{
- LDLM_DEBUG(lock, "%s(mode: %d)",
- __func__, mode);
-
- list_del_init(&lock->l_res_link);
-
- /* client side - set a flag to prevent sending a CANCEL */
- lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_CBPENDING;
-
- /* when reaching here, it is under lock_res_and_lock(). Thus,
- * need call the nolock version of ldlm_lock_decref_internal
- */
- ldlm_lock_decref_internal_nolock(lock, mode);
-
- ldlm_lock_destroy_nolock(lock);
-}
-
-/**
- * Process a granting attempt for flock lock.
- * Must be called under ns lock held.
- *
- * This function looks for any conflicts for \a lock in the granted or
- * waiting queues. The lock is granted if no conflicts are found in
- * either queue.
- *
- * It is also responsible for splitting a lock if a portion of the lock
- * is released.
- *
- */
-static int ldlm_process_flock_lock(struct ldlm_lock *req)
-{
- struct ldlm_resource *res = req->l_resource;
- struct ldlm_namespace *ns = ldlm_res_to_ns(res);
- struct ldlm_lock *tmp;
- struct ldlm_lock *lock;
- struct ldlm_lock *new = req;
- struct ldlm_lock *new2 = NULL;
- enum ldlm_mode mode = req->l_req_mode;
- int added = (mode == LCK_NL);
- int splitted = 0;
- const struct ldlm_callback_suite null_cbs = { };
-
- CDEBUG(D_DLMTRACE,
- "owner %llu pid %u mode %u start %llu end %llu\n",
- new->l_policy_data.l_flock.owner,
- new->l_policy_data.l_flock.pid, mode,
- req->l_policy_data.l_flock.start,
- req->l_policy_data.l_flock.end);
-
- /* No blocking ASTs are sent to the clients for
- * Posix file & record locks
- */
- req->l_blocking_ast = NULL;
-
-reprocess:
- /* This loop determines where this processes locks start
- * in the resource lr_granted list.
- */
- list_for_each_entry(lock, &res->lr_granted, l_res_link)
- if (ldlm_same_flock_owner(lock, req))
- break;
-
- /* Scan the locks owned by this process to find the insertion point
- * (as locks are ordered), and to handle overlaps.
- * We may have to merge or split existing locks.
- */
- list_for_each_entry_safe_from(lock, tmp, &res->lr_granted, l_res_link) {
-
- if (!ldlm_same_flock_owner(lock, new))
- break;
-
- if (lock->l_granted_mode == mode) {
- /* If the modes are the same then we need to process
- * locks that overlap OR adjoin the new lock. The extra
- * logic condition is necessary to deal with arithmetic
- * overflow and underflow.
- */
- if ((new->l_policy_data.l_flock.start >
- (lock->l_policy_data.l_flock.end + 1)) &&
- (lock->l_policy_data.l_flock.end != OBD_OBJECT_EOF))
- continue;
-
- if ((new->l_policy_data.l_flock.end <
- (lock->l_policy_data.l_flock.start - 1)) &&
- (lock->l_policy_data.l_flock.start != 0))
- break;
-
- if (new->l_policy_data.l_flock.start <
- lock->l_policy_data.l_flock.start) {
- lock->l_policy_data.l_flock.start =
- new->l_policy_data.l_flock.start;
- } else {
- new->l_policy_data.l_flock.start =
- lock->l_policy_data.l_flock.start;
- }
-
- if (new->l_policy_data.l_flock.end >
- lock->l_policy_data.l_flock.end) {
- lock->l_policy_data.l_flock.end =
- new->l_policy_data.l_flock.end;
- } else {
- new->l_policy_data.l_flock.end =
- lock->l_policy_data.l_flock.end;
- }
-
- if (added) {
- ldlm_flock_destroy(lock, mode);
- } else {
- new = lock;
- added = 1;
- }
- continue;
- }
-
- if (new->l_policy_data.l_flock.start >
- lock->l_policy_data.l_flock.end)
- continue;
-
- if (new->l_policy_data.l_flock.end <
- lock->l_policy_data.l_flock.start)
- break;
-
- if (new->l_policy_data.l_flock.start <=
- lock->l_policy_data.l_flock.start) {
- if (new->l_policy_data.l_flock.end <
- lock->l_policy_data.l_flock.end) {
- lock->l_policy_data.l_flock.start =
- new->l_policy_data.l_flock.end + 1;
- break;
- }
- ldlm_flock_destroy(lock, lock->l_req_mode);
- continue;
- }
- if (new->l_policy_data.l_flock.end >=
- lock->l_policy_data.l_flock.end) {
- lock->l_policy_data.l_flock.end =
- new->l_policy_data.l_flock.start - 1;
- continue;
- }
-
- /* split the existing lock into two locks */
-
- /* if this is an F_UNLCK operation then we could avoid
- * allocating a new lock and use the req lock passed in
- * with the request but this would complicate the reply
- * processing since updates to req get reflected in the
- * reply. The client side replays the lock request so
- * it must see the original lock data in the reply.
- */
-
- /* XXX - if ldlm_lock_new() can sleep we should
- * release the lr_lock, allocate the new lock,
- * and restart processing this lock.
- */
- if (!new2) {
- unlock_res_and_lock(req);
- new2 = ldlm_lock_create(ns, &res->lr_name, LDLM_FLOCK,
- lock->l_granted_mode, &null_cbs,
- NULL, 0, LVB_T_NONE);
- lock_res_and_lock(req);
- if (IS_ERR(new2)) {
- ldlm_flock_destroy(req, lock->l_granted_mode);
- return LDLM_ITER_STOP;
- }
- goto reprocess;
- }
-
- splitted = 1;
-
- new2->l_granted_mode = lock->l_granted_mode;
- new2->l_policy_data.l_flock.pid =
- new->l_policy_data.l_flock.pid;
- new2->l_policy_data.l_flock.owner =
- new->l_policy_data.l_flock.owner;
- new2->l_policy_data.l_flock.start =
- lock->l_policy_data.l_flock.start;
- new2->l_policy_data.l_flock.end =
- new->l_policy_data.l_flock.start - 1;
- lock->l_policy_data.l_flock.start =
- new->l_policy_data.l_flock.end + 1;
- new2->l_conn_export = lock->l_conn_export;
- if (lock->l_export)
- new2->l_export = class_export_lock_get(lock->l_export,
- new2);
- ldlm_lock_addref_internal_nolock(new2,
- lock->l_granted_mode);
-
- /* insert new2 at lock */
- ldlm_resource_add_lock(res, &lock->l_res_link, new2);
- LDLM_LOCK_RELEASE(new2);
- break;
- }
-
- /* if new2 is created but never used, destroy it*/
- if (splitted == 0 && new2)
- ldlm_lock_destroy_nolock(new2);
-
- /* At this point we're granting the lock request. */
- req->l_granted_mode = req->l_req_mode;
-
- if (!added) {
- list_del_init(&req->l_res_link);
- /* insert new lock before "lock", which might be the
- * next lock for this owner, or might be the first
- * lock for the next owner, or might not be a lock at
- * all, but instead points at the head of the list
- */
- ldlm_resource_add_lock(res, &lock->l_res_link, req);
- }
-
- /* In case we're reprocessing the requested lock we can't destroy
- * it until after calling ldlm_add_ast_work_item() above so that laawi()
- * can bump the reference count on \a req. Otherwise \a req
- * could be freed before the completion AST can be sent.
- */
- if (added)
- ldlm_flock_destroy(req, mode);
-
- ldlm_resource_dump(D_INFO, res);
- return LDLM_ITER_CONTINUE;
-}
-
-/**
- * Flock completion callback function.
- *
- * \param lock [in,out]: A lock to be handled
- * \param flags [in]: flags
- * \param *data [in]: ldlm_work_cp_ast_lock() will use ldlm_cb_set_arg
- *
- * \retval 0 : success
- * \retval <0 : failure
- */
-int
-ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
-{
- struct file_lock *getlk = lock->l_ast_data;
- int rc = 0;
-
- OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT2, 4);
- if (OBD_FAIL_PRECHECK(OBD_FAIL_LDLM_CP_CB_WAIT3)) {
- lock_res_and_lock(lock);
- lock->l_flags |= LDLM_FL_FAIL_LOC;
- unlock_res_and_lock(lock);
- OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT3, 4);
- }
- CDEBUG(D_DLMTRACE, "flags: 0x%llx data: %p getlk: %p\n",
- flags, data, getlk);
-
- LASSERT(flags != LDLM_FL_WAIT_NOREPROC);
-
- if (flags & LDLM_FL_FAILED)
- goto granted;
-
- if (!(flags & LDLM_FL_BLOCKED_MASK)) {
- if (!data)
- /* mds granted the lock in the reply */
- goto granted;
- /* CP AST RPC: lock get granted, wake it up */
- wake_up(&lock->l_waitq);
- return 0;
- }
-
- LDLM_DEBUG(lock,
- "client-side enqueue returned a blocked lock, sleeping");
-
- /* Go to sleep until the lock is granted. */
- rc = l_wait_event_abortable(lock->l_waitq, is_granted_or_cancelled(lock));
-
- if (rc) {
- lock_res_and_lock(lock);
-
- /* client side - set flag to prevent lock from being put on LRU list */
- ldlm_set_cbpending(lock);
- unlock_res_and_lock(lock);
-
- LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)",
- rc);
- return rc;
- }
-
-granted:
- OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT, 10);
-
- if (OBD_FAIL_PRECHECK(OBD_FAIL_LDLM_CP_CB_WAIT4)) {
- lock_res_and_lock(lock);
- /* DEADLOCK is always set with CBPENDING */
- lock->l_flags |= LDLM_FL_FLOCK_DEADLOCK | LDLM_FL_CBPENDING;
- unlock_res_and_lock(lock);
- OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT4, 4);
- }
- if (OBD_FAIL_PRECHECK(OBD_FAIL_LDLM_CP_CB_WAIT5)) {
- lock_res_and_lock(lock);
- /* DEADLOCK is always set with CBPENDING */
- lock->l_flags |= LDLM_FL_FAIL_LOC |
- LDLM_FL_FLOCK_DEADLOCK | LDLM_FL_CBPENDING;
- unlock_res_and_lock(lock);
- OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT5, 4);
- }
-
- lock_res_and_lock(lock);
-
- /*
- * Protect against race where lock could have been just destroyed
- * due to overlap in ldlm_process_flock_lock().
- */
- if (ldlm_is_destroyed(lock)) {
- unlock_res_and_lock(lock);
- LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed");
- /*
- * An error is still to be returned, to propagate it up to
- * ldlm_cli_enqueue_fini() caller.
- */
- return -EIO;
- }
-
- /* ldlm_lock_enqueue() has already placed lock on the granted list. */
- ldlm_resource_unlink_lock(lock);
-
- /*
- * Import invalidation. We need to actually release the lock
- * references being held, so that it can go away. No point in
- * holding the lock even if app still believes it has it, since
- * server already dropped it anyway. Only for granted locks too.
- */
- /* Do the same for DEADLOCK'ed locks. */
- if (ldlm_is_failed(lock) || ldlm_is_flock_deadlock(lock)) {
- int mode;
-
- if (flags & LDLM_FL_TEST_LOCK)
- LASSERT(ldlm_is_test_lock(lock));
-
- if (ldlm_is_test_lock(lock) || ldlm_is_flock_deadlock(lock))
- mode = getlk->fl_type;
- else
- mode = lock->l_granted_mode;
-
- if (ldlm_is_flock_deadlock(lock)) {
- LDLM_DEBUG(lock,
- "client-side enqueue deadlock received");
- rc = -EDEADLK;
- }
- ldlm_flock_destroy(lock, mode);
- unlock_res_and_lock(lock);
-
- /* Need to wake up the waiter if we were evicted */
- wake_up(&lock->l_waitq);
-
- /*
- * An error is still to be returned, to propagate it up to
- * ldlm_cli_enqueue_fini() caller.
- */
- return rc ? : -EIO;
- }
-
- LDLM_DEBUG(lock, "client-side enqueue granted");
-
- if (flags & LDLM_FL_TEST_LOCK) {
- /* fcntl(F_GETLK) request */
- /* The old mode was saved in getlk->fl_type so that if the mode
- * in the lock changes we can decref the appropriate refcount.
- */
- LASSERT(ldlm_is_test_lock(lock));
- ldlm_flock_destroy(lock, getlk->fl_type);
- switch (lock->l_granted_mode) {
- case LCK_PR:
- getlk->fl_type = F_RDLCK;
- break;
- case LCK_PW:
- getlk->fl_type = F_WRLCK;
- break;
- default:
- getlk->fl_type = F_UNLCK;
- }
- getlk->fl_pid = -(pid_t)lock->l_policy_data.l_flock.pid;
- getlk->fl_start = (loff_t)lock->l_policy_data.l_flock.start;
- getlk->fl_end = (loff_t)lock->l_policy_data.l_flock.end;
- } else {
- /* We need to reprocess the lock to do merges or splits
- * with existing locks owned by this process.
- */
- ldlm_process_flock_lock(lock);
- }
- unlock_res_and_lock(lock);
- return rc;
-}
-EXPORT_SYMBOL(ldlm_flock_completion_ast);
-
-void ldlm_flock_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy,
- union ldlm_policy_data *lpolicy)
-{
- lpolicy->l_flock.start = wpolicy->l_flock.lfw_start;
- lpolicy->l_flock.end = wpolicy->l_flock.lfw_end;
- lpolicy->l_flock.pid = wpolicy->l_flock.lfw_pid;
- lpolicy->l_flock.owner = wpolicy->l_flock.lfw_owner;
-}
-
-void ldlm_flock_policy_local_to_wire(const union ldlm_policy_data *lpolicy,
- union ldlm_wire_policy_data *wpolicy)
-{
- memset(wpolicy, 0, sizeof(*wpolicy));
- wpolicy->l_flock.lfw_start = lpolicy->l_flock.start;
- wpolicy->l_flock.lfw_end = lpolicy->l_flock.end;
- wpolicy->l_flock.lfw_pid = lpolicy->l_flock.pid;
- wpolicy->l_flock.lfw_owner = lpolicy->l_flock.owner;
-}
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c b/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c
deleted file mode 100644
index 2926208cdfa1..000000000000
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c
+++ /dev/null
@@ -1,69 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/ldlm/ldlm_inodebits.c
- *
- * Author: Peter Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- */
-
-/**
- * This file contains implementation of IBITS lock type
- *
- * IBITS lock type contains a bit mask determining various properties of an
- * object. The meanings of specific bits are specific to the caller and are
- * opaque to LDLM code.
- *
- * Locks with intersecting bitmasks and conflicting lock modes (e.g. LCK_PW)
- * are considered conflicting. See the lock mode compatibility matrix
- * in lustre_dlm.h.
- */
-
-#define DEBUG_SUBSYSTEM S_LDLM
-
-#include <lustre_dlm.h>
-#include <obd_support.h>
-#include <lustre_lib.h>
-#include "ldlm_internal.h"
-
-void ldlm_ibits_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy,
- union ldlm_policy_data *lpolicy)
-{
- lpolicy->l_inodebits.bits = wpolicy->l_inodebits.bits;
-}
-
-void ldlm_ibits_policy_local_to_wire(const union ldlm_policy_data *lpolicy,
- union ldlm_wire_policy_data *wpolicy)
-{
- memset(wpolicy, 0, sizeof(*wpolicy));
- wpolicy->l_inodebits.bits = lpolicy->l_inodebits.bits;
-}
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
deleted file mode 100644
index bc33ca100620..000000000000
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
+++ /dev/null
@@ -1,342 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define MAX_STRING_SIZE 128
-
-extern int ldlm_srv_namespace_nr;
-extern int ldlm_cli_namespace_nr;
-extern struct mutex ldlm_srv_namespace_lock;
-extern struct list_head ldlm_srv_namespace_list;
-extern struct mutex ldlm_cli_namespace_lock;
-extern struct list_head ldlm_cli_active_namespace_list;
-
-static inline int ldlm_namespace_nr_read(enum ldlm_side client)
-{
- return client == LDLM_NAMESPACE_SERVER ?
- ldlm_srv_namespace_nr : ldlm_cli_namespace_nr;
-}
-
-static inline void ldlm_namespace_nr_inc(enum ldlm_side client)
-{
- if (client == LDLM_NAMESPACE_SERVER)
- ldlm_srv_namespace_nr++;
- else
- ldlm_cli_namespace_nr++;
-}
-
-static inline void ldlm_namespace_nr_dec(enum ldlm_side client)
-{
- if (client == LDLM_NAMESPACE_SERVER)
- ldlm_srv_namespace_nr--;
- else
- ldlm_cli_namespace_nr--;
-}
-
-static inline struct list_head *ldlm_namespace_list(enum ldlm_side client)
-{
- return client == LDLM_NAMESPACE_SERVER ?
- &ldlm_srv_namespace_list : &ldlm_cli_active_namespace_list;
-}
-
-static inline struct mutex *ldlm_namespace_lock(enum ldlm_side client)
-{
- return client == LDLM_NAMESPACE_SERVER ?
- &ldlm_srv_namespace_lock : &ldlm_cli_namespace_lock;
-}
-
-/* ns_bref is the number of resources in this namespace */
-static inline int ldlm_ns_empty(struct ldlm_namespace *ns)
-{
- return atomic_read(&ns->ns_bref) == 0;
-}
-
-void ldlm_namespace_move_to_active_locked(struct ldlm_namespace *ns,
- enum ldlm_side client);
-void ldlm_namespace_move_to_inactive_locked(struct ldlm_namespace *ns,
- enum ldlm_side client);
-struct ldlm_namespace *ldlm_namespace_first_locked(enum ldlm_side client);
-
-/* ldlm_request.c */
-/* Cancel lru flag, it indicates we cancel aged locks. */
-enum {
- LDLM_LRU_FLAG_AGED = BIT(0), /* Cancel old non-LRU resize locks */
- LDLM_LRU_FLAG_PASSED = BIT(1), /* Cancel passed number of locks. */
- LDLM_LRU_FLAG_SHRINK = BIT(2), /* Cancel locks from shrinker. */
- LDLM_LRU_FLAG_LRUR = BIT(3), /* Cancel locks from lru resize. */
- LDLM_LRU_FLAG_NO_WAIT = BIT(4), /* Cancel locks w/o blocking (neither
- * sending nor waiting for any rpcs)
- */
- LDLM_LRU_FLAG_LRUR_NO_WAIT = BIT(5), /* LRUR + NO_WAIT */
-};
-
-int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr,
- enum ldlm_cancel_flags sync, int flags);
-int ldlm_cancel_lru_local(struct ldlm_namespace *ns,
- struct list_head *cancels, int count, int max,
- enum ldlm_cancel_flags cancel_flags, int flags);
-extern unsigned int ldlm_enqueue_min;
-extern unsigned int ldlm_cancel_unused_locks_before_replay;
-
-/* ldlm_lock.c */
-
-struct ldlm_cb_set_arg {
- struct ptlrpc_request_set *set;
- int type; /* LDLM_{CP,BL,GL}_CALLBACK */
- atomic_t restart;
- struct list_head *list;
- union ldlm_gl_desc *gl_desc; /* glimpse AST descriptor */
-};
-
-enum ldlm_desc_ast_t {
- LDLM_WORK_BL_AST,
- LDLM_WORK_CP_AST,
- LDLM_WORK_REVOKE_AST,
- LDLM_WORK_GL_AST
-};
-
-void ldlm_grant_lock(struct ldlm_lock *lock, struct list_head *work_list);
-int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill,
- enum req_location loc, void *data, int size);
-struct ldlm_lock *
-ldlm_lock_create(struct ldlm_namespace *ns, const struct ldlm_res_id *id,
- enum ldlm_type type, enum ldlm_mode mode,
- const struct ldlm_callback_suite *cbs,
- void *data, __u32 lvb_len, enum lvb_type lvb_type);
-enum ldlm_error ldlm_lock_enqueue(struct ldlm_namespace *ns,
- struct ldlm_lock **lock, void *cookie,
- __u64 *flags);
-void ldlm_lock_addref_internal(struct ldlm_lock *lock, enum ldlm_mode mode);
-void ldlm_lock_addref_internal_nolock(struct ldlm_lock *lock,
- enum ldlm_mode mode);
-void ldlm_lock_decref_internal(struct ldlm_lock *lock, enum ldlm_mode mode);
-void ldlm_lock_decref_internal_nolock(struct ldlm_lock *lock,
- enum ldlm_mode mode);
-int ldlm_run_ast_work(struct ldlm_namespace *ns, struct list_head *rpc_list,
- enum ldlm_desc_ast_t ast_type);
-int ldlm_lock_remove_from_lru_check(struct ldlm_lock *lock, time_t last_use);
-#define ldlm_lock_remove_from_lru(lock) ldlm_lock_remove_from_lru_check(lock, 0)
-int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock);
-void ldlm_lock_destroy_nolock(struct ldlm_lock *lock);
-
-/* ldlm_lockd.c */
-int ldlm_bl_to_thread_lock(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld,
- struct ldlm_lock *lock);
-int ldlm_bl_to_thread_list(struct ldlm_namespace *ns,
- struct ldlm_lock_desc *ld,
- struct list_head *cancels, int count,
- enum ldlm_cancel_flags cancel_flags);
-int ldlm_bl_thread_wakeup(void);
-
-void ldlm_handle_bl_callback(struct ldlm_namespace *ns,
- struct ldlm_lock_desc *ld, struct ldlm_lock *lock);
-
-extern struct kmem_cache *ldlm_resource_slab;
-extern struct kset *ldlm_ns_kset;
-
-/* ldlm_lockd.c & ldlm_lock.c */
-extern struct kmem_cache *ldlm_lock_slab;
-
-/* ldlm_extent.c */
-void ldlm_extent_add_lock(struct ldlm_resource *res, struct ldlm_lock *lock);
-void ldlm_extent_unlink_lock(struct ldlm_lock *lock);
-
-/* l_lock.c */
-void l_check_ns_lock(struct ldlm_namespace *ns);
-void l_check_no_ns_lock(struct ldlm_namespace *ns);
-
-extern struct dentry *ldlm_svc_debugfs_dir;
-
-struct ldlm_state {
- struct ptlrpc_service *ldlm_cb_service;
- struct ptlrpc_service *ldlm_cancel_service;
- struct ptlrpc_client *ldlm_client;
- struct ptlrpc_connection *ldlm_server_conn;
- struct ldlm_bl_pool *ldlm_bl_pool;
-};
-
-/* ldlm_pool.c */
-__u64 ldlm_pool_get_slv(struct ldlm_pool *pl);
-void ldlm_pool_set_clv(struct ldlm_pool *pl, __u64 clv);
-__u32 ldlm_pool_get_lvf(struct ldlm_pool *pl);
-
-/* interval tree, for LDLM_EXTENT. */
-extern struct kmem_cache *ldlm_interval_slab; /* slab cache for ldlm_interval */
-struct ldlm_interval *ldlm_interval_detach(struct ldlm_lock *l);
-struct ldlm_interval *ldlm_interval_alloc(struct ldlm_lock *lock);
-void ldlm_interval_free(struct ldlm_interval *node);
-/* this function must be called with res lock held */
-static inline struct ldlm_extent *
-ldlm_interval_extent(struct ldlm_interval *node)
-{
- struct ldlm_lock *lock;
-
- LASSERT(!list_empty(&node->li_group));
-
- lock = list_entry(node->li_group.next, struct ldlm_lock, l_sl_policy);
- return &lock->l_policy_data.l_extent;
-}
-
-int ldlm_init(void);
-void ldlm_exit(void);
-
-enum ldlm_policy_res {
- LDLM_POLICY_CANCEL_LOCK,
- LDLM_POLICY_KEEP_LOCK,
- LDLM_POLICY_SKIP_LOCK
-};
-
-#define LDLM_POOL_SYSFS_PRINT_int(v) sprintf(buf, "%d\n", v)
-#define LDLM_POOL_SYSFS_SET_int(a, b) { a = b; }
-#define LDLM_POOL_SYSFS_PRINT_u64(v) sprintf(buf, "%lld\n", v)
-#define LDLM_POOL_SYSFS_SET_u64(a, b) { a = b; }
-#define LDLM_POOL_SYSFS_PRINT_atomic(v) sprintf(buf, "%d\n", atomic_read(&v))
-#define LDLM_POOL_SYSFS_SET_atomic(a, b) atomic_set(&a, b)
-
-#define LDLM_POOL_SYSFS_READER_SHOW(var, type) \
- static ssize_t var##_show(struct kobject *kobj, \
- struct attribute *attr, \
- char *buf) \
- { \
- struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, \
- pl_kobj); \
- type tmp; \
- \
- spin_lock(&pl->pl_lock); \
- tmp = pl->pl_##var; \
- spin_unlock(&pl->pl_lock); \
- \
- return LDLM_POOL_SYSFS_PRINT_##type(tmp); \
- } \
- struct __##var##__dummy_read {; } /* semicolon catcher */
-
-#define LDLM_POOL_SYSFS_WRITER_STORE(var, type) \
- static ssize_t var##_store(struct kobject *kobj, \
- struct attribute *attr, \
- const char *buffer, \
- size_t count) \
- { \
- struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, \
- pl_kobj); \
- unsigned long tmp; \
- int rc; \
- \
- rc = kstrtoul(buffer, 10, &tmp); \
- if (rc < 0) { \
- return rc; \
- } \
- \
- spin_lock(&pl->pl_lock); \
- LDLM_POOL_SYSFS_SET_##type(pl->pl_##var, tmp); \
- spin_unlock(&pl->pl_lock); \
- \
- return count; \
- } \
- struct __##var##__dummy_write {; } /* semicolon catcher */
-
-#define LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(var, type) \
- static ssize_t var##_show(struct kobject *kobj, \
- struct attribute *attr, \
- char *buf) \
- { \
- struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, \
- pl_kobj); \
- \
- return LDLM_POOL_SYSFS_PRINT_##type(pl->pl_##var); \
- } \
- struct __##var##__dummy_read {; } /* semicolon catcher */
-
-#define LDLM_POOL_SYSFS_WRITER_NOLOCK_STORE(var, type) \
- static ssize_t var##_store(struct kobject *kobj, \
- struct attribute *attr, \
- const char *buffer, \
- size_t count) \
- { \
- struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, \
- pl_kobj); \
- unsigned long tmp; \
- int rc; \
- \
- rc = kstrtoul(buffer, 10, &tmp); \
- if (rc < 0) { \
- return rc; \
- } \
- \
- LDLM_POOL_SYSFS_SET_##type(pl->pl_##var, tmp); \
- \
- return count; \
- } \
- struct __##var##__dummy_write {; } /* semicolon catcher */
-
-static inline int is_granted_or_cancelled(struct ldlm_lock *lock)
-{
- int ret = 0;
-
- lock_res_and_lock(lock);
- if ((lock->l_req_mode == lock->l_granted_mode) &&
- !ldlm_is_cp_reqd(lock))
- ret = 1;
- else if (ldlm_is_failed(lock) || ldlm_is_cancel(lock))
- ret = 1;
- unlock_res_and_lock(lock);
-
- return ret;
-}
-
-typedef void (*ldlm_policy_wire_to_local_t)(const union ldlm_wire_policy_data *,
- union ldlm_policy_data *);
-
-typedef void (*ldlm_policy_local_to_wire_t)(const union ldlm_policy_data *,
- union ldlm_wire_policy_data *);
-
-void ldlm_plain_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy,
- union ldlm_policy_data *lpolicy);
-void ldlm_plain_policy_local_to_wire(const union ldlm_policy_data *lpolicy,
- union ldlm_wire_policy_data *wpolicy);
-void ldlm_ibits_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy,
- union ldlm_policy_data *lpolicy);
-void ldlm_ibits_policy_local_to_wire(const union ldlm_policy_data *lpolicy,
- union ldlm_wire_policy_data *wpolicy);
-void ldlm_extent_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy,
- union ldlm_policy_data *lpolicy);
-void ldlm_extent_policy_local_to_wire(const union ldlm_policy_data *lpolicy,
- union ldlm_wire_policy_data *wpolicy);
-void ldlm_flock_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy,
- union ldlm_policy_data *lpolicy);
-void ldlm_flock_policy_local_to_wire(const union ldlm_policy_data *lpolicy,
- union ldlm_wire_policy_data *wpolicy);
-
-static inline bool ldlm_res_eq(const struct ldlm_res_id *res0,
- const struct ldlm_res_id *res1)
-{
- return memcmp(res0, res1, sizeof(*res0)) == 0;
-}
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
deleted file mode 100644
index 0aa4f234a4f4..000000000000
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
+++ /dev/null
@@ -1,842 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2010, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-/**
- * This file deals with various client/target related logic including recovery.
- *
- * TODO: This code more logically belongs in the ptlrpc module than in ldlm and
- * should be moved.
- */
-
-#define DEBUG_SUBSYSTEM S_LDLM
-
-#include <obd.h>
-#include <obd_class.h>
-#include <lustre_dlm.h>
-#include <lustre_net.h>
-#include <lustre_sec.h>
-#include "ldlm_internal.h"
-
-/* @priority: If non-zero, move the selected connection to the list head.
- * @create: If zero, only search in existing connections.
- */
-static int import_set_conn(struct obd_import *imp, struct obd_uuid *uuid,
- int priority, int create)
-{
- struct ptlrpc_connection *ptlrpc_conn;
- struct obd_import_conn *imp_conn = NULL, *item;
- int rc = 0;
-
- if (!create && !priority) {
- CDEBUG(D_HA, "Nothing to do\n");
- return -EINVAL;
- }
-
- ptlrpc_conn = ptlrpc_uuid_to_connection(uuid);
- if (!ptlrpc_conn) {
- CDEBUG(D_HA, "can't find connection %s\n", uuid->uuid);
- return -ENOENT;
- }
-
- if (create) {
- imp_conn = kzalloc(sizeof(*imp_conn), GFP_NOFS);
- if (!imp_conn) {
- rc = -ENOMEM;
- goto out_put;
- }
- }
-
- spin_lock(&imp->imp_lock);
- list_for_each_entry(item, &imp->imp_conn_list, oic_item) {
- if (obd_uuid_equals(uuid, &item->oic_uuid)) {
- if (priority) {
- list_del(&item->oic_item);
- list_add(&item->oic_item,
- &imp->imp_conn_list);
- item->oic_last_attempt = 0;
- }
- CDEBUG(D_HA, "imp %p@%s: found existing conn %s%s\n",
- imp, imp->imp_obd->obd_name, uuid->uuid,
- (priority ? ", moved to head" : ""));
- spin_unlock(&imp->imp_lock);
- rc = 0;
- goto out_free;
- }
- }
- /* No existing import connection found for \a uuid. */
- if (create) {
- imp_conn->oic_conn = ptlrpc_conn;
- imp_conn->oic_uuid = *uuid;
- imp_conn->oic_last_attempt = 0;
- if (priority)
- list_add(&imp_conn->oic_item, &imp->imp_conn_list);
- else
- list_add_tail(&imp_conn->oic_item,
- &imp->imp_conn_list);
- CDEBUG(D_HA, "imp %p@%s: add connection %s at %s\n",
- imp, imp->imp_obd->obd_name, uuid->uuid,
- (priority ? "head" : "tail"));
- } else {
- spin_unlock(&imp->imp_lock);
- rc = -ENOENT;
- goto out_free;
- }
-
- spin_unlock(&imp->imp_lock);
- return 0;
-out_free:
- kfree(imp_conn);
-out_put:
- ptlrpc_connection_put(ptlrpc_conn);
- return rc;
-}
-
-int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid)
-{
- return import_set_conn(imp, uuid, 1, 0);
-}
-
-int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid,
- int priority)
-{
- return import_set_conn(imp, uuid, priority, 1);
-}
-EXPORT_SYMBOL(client_import_add_conn);
-
-int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid)
-{
- struct obd_import_conn *imp_conn;
- struct obd_export *dlmexp;
- int rc = -ENOENT;
-
- spin_lock(&imp->imp_lock);
- if (list_empty(&imp->imp_conn_list)) {
- LASSERT(!imp->imp_connection);
- goto out;
- }
-
- list_for_each_entry(imp_conn, &imp->imp_conn_list, oic_item) {
- if (!obd_uuid_equals(uuid, &imp_conn->oic_uuid))
- continue;
- LASSERT(imp_conn->oic_conn);
-
- if (imp_conn == imp->imp_conn_current) {
- LASSERT(imp_conn->oic_conn == imp->imp_connection);
-
- if (imp->imp_state != LUSTRE_IMP_CLOSED &&
- imp->imp_state != LUSTRE_IMP_DISCON) {
- CERROR("can't remove current connection\n");
- rc = -EBUSY;
- goto out;
- }
-
- ptlrpc_connection_put(imp->imp_connection);
- imp->imp_connection = NULL;
-
- dlmexp = class_conn2export(&imp->imp_dlm_handle);
- if (dlmexp && dlmexp->exp_connection) {
- LASSERT(dlmexp->exp_connection ==
- imp_conn->oic_conn);
- ptlrpc_connection_put(dlmexp->exp_connection);
- dlmexp->exp_connection = NULL;
- }
-
- if (dlmexp)
- class_export_put(dlmexp);
- }
-
- list_del(&imp_conn->oic_item);
- ptlrpc_connection_put(imp_conn->oic_conn);
- kfree(imp_conn);
- CDEBUG(D_HA, "imp %p@%s: remove connection %s\n",
- imp, imp->imp_obd->obd_name, uuid->uuid);
- rc = 0;
- break;
- }
-out:
- spin_unlock(&imp->imp_lock);
- if (rc == -ENOENT)
- CERROR("connection %s not found\n", uuid->uuid);
- return rc;
-}
-EXPORT_SYMBOL(client_import_del_conn);
-
-/**
- * Find conn UUID by peer NID. \a peer is a server NID. This function is used
- * to find a conn uuid of \a imp which can reach \a peer.
- */
-int client_import_find_conn(struct obd_import *imp, lnet_nid_t peer,
- struct obd_uuid *uuid)
-{
- struct obd_import_conn *conn;
- int rc = -ENOENT;
-
- spin_lock(&imp->imp_lock);
- list_for_each_entry(conn, &imp->imp_conn_list, oic_item) {
- /* Check if conn UUID does have this peer NID. */
- if (class_check_uuid(&conn->oic_uuid, peer)) {
- *uuid = conn->oic_uuid;
- rc = 0;
- break;
- }
- }
- spin_unlock(&imp->imp_lock);
- return rc;
-}
-EXPORT_SYMBOL(client_import_find_conn);
-
-void client_destroy_import(struct obd_import *imp)
-{
- /* Drop security policy instance after all RPCs have finished/aborted
- * to let all busy contexts be released.
- */
- class_import_get(imp);
- class_destroy_import(imp);
- sptlrpc_import_sec_put(imp);
- class_import_put(imp);
-}
-EXPORT_SYMBOL(client_destroy_import);
-
-/* Configure an RPC client OBD device.
- *
- * lcfg parameters:
- * 1 - client UUID
- * 2 - server UUID
- * 3 - inactive-on-startup
- */
-int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
-{
- struct client_obd *cli = &obddev->u.cli;
- struct obd_import *imp;
- struct obd_uuid server_uuid;
- int rq_portal, rp_portal, connect_op;
- char *name = obddev->obd_type->typ_name;
- enum ldlm_ns_type ns_type = LDLM_NS_TYPE_UNKNOWN;
- int rc;
-
- /* In a more perfect world, we would hang a ptlrpc_client off of
- * obd_type and just use the values from there.
- */
- if (!strcmp(name, LUSTRE_OSC_NAME)) {
- rq_portal = OST_REQUEST_PORTAL;
- rp_portal = OSC_REPLY_PORTAL;
- connect_op = OST_CONNECT;
- cli->cl_sp_me = LUSTRE_SP_CLI;
- cli->cl_sp_to = LUSTRE_SP_OST;
- ns_type = LDLM_NS_TYPE_OSC;
- } else if (!strcmp(name, LUSTRE_MDC_NAME) ||
- !strcmp(name, LUSTRE_LWP_NAME)) {
- rq_portal = MDS_REQUEST_PORTAL;
- rp_portal = MDC_REPLY_PORTAL;
- connect_op = MDS_CONNECT;
- cli->cl_sp_me = LUSTRE_SP_CLI;
- cli->cl_sp_to = LUSTRE_SP_MDT;
- ns_type = LDLM_NS_TYPE_MDC;
- } else if (!strcmp(name, LUSTRE_MGC_NAME)) {
- rq_portal = MGS_REQUEST_PORTAL;
- rp_portal = MGC_REPLY_PORTAL;
- connect_op = MGS_CONNECT;
- cli->cl_sp_me = LUSTRE_SP_MGC;
- cli->cl_sp_to = LUSTRE_SP_MGS;
- cli->cl_flvr_mgc.sf_rpc = SPTLRPC_FLVR_INVALID;
- ns_type = LDLM_NS_TYPE_MGC;
- } else {
- CERROR("unknown client OBD type \"%s\", can't setup\n",
- name);
- return -EINVAL;
- }
-
- if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) {
- CERROR("requires a TARGET UUID\n");
- return -EINVAL;
- }
-
- if (LUSTRE_CFG_BUFLEN(lcfg, 1) > 37) {
- CERROR("client UUID must be less than 38 characters\n");
- return -EINVAL;
- }
-
- if (LUSTRE_CFG_BUFLEN(lcfg, 2) < 1) {
- CERROR("setup requires a SERVER UUID\n");
- return -EINVAL;
- }
-
- if (LUSTRE_CFG_BUFLEN(lcfg, 2) > 37) {
- CERROR("target UUID must be less than 38 characters\n");
- return -EINVAL;
- }
-
- init_rwsem(&cli->cl_sem);
- cli->cl_conn_count = 0;
- memcpy(server_uuid.uuid, lustre_cfg_buf(lcfg, 2),
- min_t(unsigned int, LUSTRE_CFG_BUFLEN(lcfg, 2),
- sizeof(server_uuid)));
-
- cli->cl_dirty_pages = 0;
- cli->cl_avail_grant = 0;
- /* FIXME: Should limit this for the sum of all cl_dirty_max_pages. */
- /*
- * cl_dirty_max_pages may be changed at connect time in
- * ptlrpc_connect_interpret().
- */
- client_adjust_max_dirty(cli);
- INIT_LIST_HEAD(&cli->cl_cache_waiters);
- INIT_LIST_HEAD(&cli->cl_loi_ready_list);
- INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list);
- INIT_LIST_HEAD(&cli->cl_loi_write_list);
- INIT_LIST_HEAD(&cli->cl_loi_read_list);
- spin_lock_init(&cli->cl_loi_list_lock);
- atomic_set(&cli->cl_pending_w_pages, 0);
- atomic_set(&cli->cl_pending_r_pages, 0);
- cli->cl_r_in_flight = 0;
- cli->cl_w_in_flight = 0;
-
- spin_lock_init(&cli->cl_read_rpc_hist.oh_lock);
- spin_lock_init(&cli->cl_write_rpc_hist.oh_lock);
- spin_lock_init(&cli->cl_read_page_hist.oh_lock);
- spin_lock_init(&cli->cl_write_page_hist.oh_lock);
- spin_lock_init(&cli->cl_read_offset_hist.oh_lock);
- spin_lock_init(&cli->cl_write_offset_hist.oh_lock);
-
- /* lru for osc. */
- INIT_LIST_HEAD(&cli->cl_lru_osc);
- atomic_set(&cli->cl_lru_shrinkers, 0);
- atomic_long_set(&cli->cl_lru_busy, 0);
- atomic_long_set(&cli->cl_lru_in_list, 0);
- INIT_LIST_HEAD(&cli->cl_lru_list);
- spin_lock_init(&cli->cl_lru_list_lock);
- atomic_long_set(&cli->cl_unstable_count, 0);
- INIT_LIST_HEAD(&cli->cl_shrink_list);
-
- init_waitqueue_head(&cli->cl_destroy_waitq);
- atomic_set(&cli->cl_destroy_in_flight, 0);
- /* Turn on checksumming by default. */
- cli->cl_checksum = 1;
- /*
- * The supported checksum types will be worked out at connect time
- * Set cl_chksum* to CRC32 for now to avoid returning screwed info
- * through procfs.
- */
- cli->cl_cksum_type = OBD_CKSUM_CRC32;
- cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
- atomic_set(&cli->cl_resends, OSC_DEFAULT_RESENDS);
-
- /*
- * Set it to possible maximum size. It may be reduced by ocd_brw_size
- * from OFD after connecting.
- */
- cli->cl_max_pages_per_rpc = PTLRPC_MAX_BRW_PAGES;
-
- /*
- * set cl_chunkbits default value to PAGE_CACHE_SHIFT,
- * it will be updated at OSC connection time.
- */
- cli->cl_chunkbits = PAGE_SHIFT;
-
- if (!strcmp(name, LUSTRE_MDC_NAME))
- cli->cl_max_rpcs_in_flight = OBD_MAX_RIF_DEFAULT;
- else if (totalram_pages >> (20 - PAGE_SHIFT) <= 128 /* MB */)
- cli->cl_max_rpcs_in_flight = 2;
- else if (totalram_pages >> (20 - PAGE_SHIFT) <= 256 /* MB */)
- cli->cl_max_rpcs_in_flight = 3;
- else if (totalram_pages >> (20 - PAGE_SHIFT) <= 512 /* MB */)
- cli->cl_max_rpcs_in_flight = 4;
- else
- cli->cl_max_rpcs_in_flight = OBD_MAX_RIF_DEFAULT;
-
- spin_lock_init(&cli->cl_mod_rpcs_lock);
- spin_lock_init(&cli->cl_mod_rpcs_hist.oh_lock);
- cli->cl_max_mod_rpcs_in_flight = 0;
- cli->cl_mod_rpcs_in_flight = 0;
- cli->cl_close_rpcs_in_flight = 0;
- init_waitqueue_head(&cli->cl_mod_rpcs_waitq);
- cli->cl_mod_tag_bitmap = NULL;
-
- if (connect_op == MDS_CONNECT) {
- cli->cl_max_mod_rpcs_in_flight = cli->cl_max_rpcs_in_flight - 1;
- cli->cl_mod_tag_bitmap = kcalloc(BITS_TO_LONGS(OBD_MAX_RIF_MAX),
- sizeof(long), GFP_NOFS);
- if (!cli->cl_mod_tag_bitmap) {
- rc = -ENOMEM;
- goto err;
- }
- }
-
- rc = ldlm_get_ref();
- if (rc) {
- CERROR("ldlm_get_ref failed: %d\n", rc);
- goto err;
- }
-
- ptlrpc_init_client(rq_portal, rp_portal, name,
- &obddev->obd_ldlm_client);
-
- imp = class_new_import(obddev);
- if (!imp) {
- rc = -ENOENT;
- goto err_ldlm;
- }
- imp->imp_client = &obddev->obd_ldlm_client;
- imp->imp_connect_op = connect_op;
- memcpy(cli->cl_target_uuid.uuid, lustre_cfg_buf(lcfg, 1),
- LUSTRE_CFG_BUFLEN(lcfg, 1));
- class_import_put(imp);
-
- rc = client_import_add_conn(imp, &server_uuid, 1);
- if (rc) {
- CERROR("can't add initial connection\n");
- goto err_import;
- }
-
- cli->cl_import = imp;
- /* cli->cl_max_mds_easize updated by mdc_init_ea_size() */
- cli->cl_max_mds_easize = sizeof(struct lov_mds_md_v3);
-
- if (LUSTRE_CFG_BUFLEN(lcfg, 3) > 0) {
- if (!strcmp(lustre_cfg_string(lcfg, 3), "inactive")) {
- CDEBUG(D_HA, "marking %s %s->%s as inactive\n",
- name, obddev->obd_name,
- cli->cl_target_uuid.uuid);
- spin_lock(&imp->imp_lock);
- imp->imp_deactive = 1;
- spin_unlock(&imp->imp_lock);
- }
- }
-
- obddev->obd_namespace = ldlm_namespace_new(obddev, obddev->obd_name,
- LDLM_NAMESPACE_CLIENT,
- LDLM_NAMESPACE_GREEDY,
- ns_type);
- if (!obddev->obd_namespace) {
- CERROR("Unable to create client namespace - %s\n",
- obddev->obd_name);
- rc = -ENOMEM;
- goto err_import;
- }
-
- return rc;
-
-err_import:
- class_destroy_import(imp);
-err_ldlm:
- ldlm_put_ref();
-err:
- kfree(cli->cl_mod_tag_bitmap);
- cli->cl_mod_tag_bitmap = NULL;
- return rc;
-}
-EXPORT_SYMBOL(client_obd_setup);
-
-int client_obd_cleanup(struct obd_device *obddev)
-{
- struct client_obd *cli = &obddev->u.cli;
-
- ldlm_namespace_free_post(obddev->obd_namespace);
- obddev->obd_namespace = NULL;
-
- obd_cleanup_client_import(obddev);
- LASSERT(!obddev->u.cli.cl_import);
-
- ldlm_put_ref();
-
- kfree(cli->cl_mod_tag_bitmap);
- cli->cl_mod_tag_bitmap = NULL;
-
- return 0;
-}
-EXPORT_SYMBOL(client_obd_cleanup);
-
-/* ->o_connect() method for client side (OSC and MDC and MGC) */
-int client_connect_import(const struct lu_env *env,
- struct obd_export **exp,
- struct obd_device *obd, struct obd_uuid *cluuid,
- struct obd_connect_data *data, void *localdata)
-{
- struct client_obd *cli = &obd->u.cli;
- struct obd_import *imp = cli->cl_import;
- struct obd_connect_data *ocd;
- struct lustre_handle conn = { 0 };
- bool is_mdc = false;
- int rc;
-
- *exp = NULL;
- down_write(&cli->cl_sem);
- if (cli->cl_conn_count > 0) {
- rc = -EALREADY;
- goto out_sem;
- }
-
- rc = class_connect(&conn, obd, cluuid);
- if (rc)
- goto out_sem;
-
- cli->cl_conn_count++;
- *exp = class_conn2export(&conn);
-
- LASSERT(obd->obd_namespace);
-
- imp->imp_dlm_handle = conn;
- rc = ptlrpc_init_import(imp);
- if (rc != 0)
- goto out_ldlm;
-
- ocd = &imp->imp_connect_data;
- if (data) {
- *ocd = *data;
- is_mdc = !strncmp(imp->imp_obd->obd_type->typ_name,
- LUSTRE_MDC_NAME, 3);
- if (is_mdc)
- data->ocd_connect_flags |= OBD_CONNECT_MULTIMODRPCS;
- imp->imp_connect_flags_orig = data->ocd_connect_flags;
- }
-
- rc = ptlrpc_connect_import(imp);
- if (rc != 0) {
- if (data && is_mdc)
- data->ocd_connect_flags &= ~OBD_CONNECT_MULTIMODRPCS;
- LASSERT(imp->imp_state == LUSTRE_IMP_DISCON);
- goto out_ldlm;
- }
- LASSERT(*exp && (*exp)->exp_connection);
-
- if (data) {
- LASSERTF((ocd->ocd_connect_flags & data->ocd_connect_flags) ==
- ocd->ocd_connect_flags, "old %#llx, new %#llx\n",
- data->ocd_connect_flags, ocd->ocd_connect_flags);
- data->ocd_connect_flags = ocd->ocd_connect_flags;
- /* clear the flag as it was not set and is not known
- * by upper layers
- */
- if (is_mdc)
- data->ocd_connect_flags &= ~OBD_CONNECT_MULTIMODRPCS;
- }
-
- ptlrpc_pinger_add_import(imp);
-
- if (rc) {
-out_ldlm:
- cli->cl_conn_count--;
- class_disconnect(*exp);
- *exp = NULL;
- }
-out_sem:
- up_write(&cli->cl_sem);
-
- return rc;
-}
-EXPORT_SYMBOL(client_connect_import);
-
-int client_disconnect_export(struct obd_export *exp)
-{
- struct obd_device *obd = class_exp2obd(exp);
- struct client_obd *cli;
- struct obd_import *imp;
- int rc = 0, err;
-
- if (!obd) {
- CERROR("invalid export for disconnect: exp %p cookie %#llx\n",
- exp, exp ? exp->exp_handle.h_cookie : -1);
- return -EINVAL;
- }
-
- cli = &obd->u.cli;
- imp = cli->cl_import;
-
- down_write(&cli->cl_sem);
- CDEBUG(D_INFO, "disconnect %s - %zu\n", obd->obd_name,
- cli->cl_conn_count);
-
- if (!cli->cl_conn_count) {
- CERROR("disconnecting disconnected device (%s)\n",
- obd->obd_name);
- rc = -EINVAL;
- goto out_disconnect;
- }
-
- cli->cl_conn_count--;
- if (cli->cl_conn_count) {
- rc = 0;
- goto out_disconnect;
- }
-
- /* Mark import deactivated now, so we don't try to reconnect if any
- * of the cleanup RPCs fails (e.g. LDLM cancel, etc). We don't
- * fully deactivate the import, or that would drop all requests.
- */
- spin_lock(&imp->imp_lock);
- imp->imp_deactive = 1;
- spin_unlock(&imp->imp_lock);
-
- /* Some non-replayable imports (MDS's OSCs) are pinged, so just
- * delete it regardless. (It's safe to delete an import that was
- * never added.)
- */
- (void)ptlrpc_pinger_del_import(imp);
-
- if (obd->obd_namespace) {
- /* obd_force == local only */
- ldlm_cli_cancel_unused(obd->obd_namespace, NULL,
- obd->obd_force ? LCF_LOCAL : 0, NULL);
- ldlm_namespace_free_prior(obd->obd_namespace, imp,
- obd->obd_force);
- }
-
- /* There's no need to hold sem while disconnecting an import,
- * and it may actually cause deadlock in GSS.
- */
- up_write(&cli->cl_sem);
- rc = ptlrpc_disconnect_import(imp, 0);
- down_write(&cli->cl_sem);
-
- ptlrpc_invalidate_import(imp);
-
-out_disconnect:
- /* Use server style - class_disconnect should be always called for
- * o_disconnect.
- */
- err = class_disconnect(exp);
- if (!rc && err)
- rc = err;
-
- up_write(&cli->cl_sem);
-
- return rc;
-}
-EXPORT_SYMBOL(client_disconnect_export);
-
-/**
- * Packs current SLV and Limit into \a req.
- */
-int target_pack_pool_reply(struct ptlrpc_request *req)
-{
- struct obd_device *obd;
-
- /* Check that we still have all structures alive as this may
- * be some late RPC at shutdown time.
- */
- if (unlikely(!req->rq_export || !req->rq_export->exp_obd ||
- !exp_connect_lru_resize(req->rq_export))) {
- lustre_msg_set_slv(req->rq_repmsg, 0);
- lustre_msg_set_limit(req->rq_repmsg, 0);
- return 0;
- }
-
- /* OBD is alive here as export is alive, which we checked above. */
- obd = req->rq_export->exp_obd;
-
- read_lock(&obd->obd_pool_lock);
- lustre_msg_set_slv(req->rq_repmsg, obd->obd_pool_slv);
- lustre_msg_set_limit(req->rq_repmsg, obd->obd_pool_limit);
- read_unlock(&obd->obd_pool_lock);
-
- return 0;
-}
-EXPORT_SYMBOL(target_pack_pool_reply);
-
-static int
-target_send_reply_msg(struct ptlrpc_request *req, int rc, int fail_id)
-{
- if (OBD_FAIL_CHECK_ORSET(fail_id & ~OBD_FAIL_ONCE, OBD_FAIL_ONCE)) {
- DEBUG_REQ(D_ERROR, req, "dropping reply");
- return -ECOMM;
- }
-
- if (unlikely(rc)) {
- DEBUG_REQ(D_NET, req, "processing error (%d)", rc);
- req->rq_status = rc;
- return ptlrpc_send_error(req, 1);
- }
-
- DEBUG_REQ(D_NET, req, "sending reply");
- return ptlrpc_send_reply(req, PTLRPC_REPLY_MAYBE_DIFFICULT);
-}
-
-void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id)
-{
- struct ptlrpc_service_part *svcpt;
- int netrc;
- struct ptlrpc_reply_state *rs;
- struct obd_export *exp;
-
- if (req->rq_no_reply)
- return;
-
- svcpt = req->rq_rqbd->rqbd_svcpt;
- rs = req->rq_reply_state;
- if (!rs || !rs->rs_difficult) {
- /* no notifiers */
- target_send_reply_msg(req, rc, fail_id);
- return;
- }
-
- /* must be an export if locks saved */
- LASSERT(req->rq_export);
- /* req/reply consistent */
- LASSERT(rs->rs_svcpt == svcpt);
-
- /* "fresh" reply */
- LASSERT(!rs->rs_scheduled);
- LASSERT(!rs->rs_scheduled_ever);
- LASSERT(!rs->rs_handled);
- LASSERT(!rs->rs_on_net);
- LASSERT(!rs->rs_export);
- LASSERT(list_empty(&rs->rs_obd_list));
- LASSERT(list_empty(&rs->rs_exp_list));
-
- exp = class_export_get(req->rq_export);
-
- /* disable reply scheduling while I'm setting up */
- rs->rs_scheduled = 1;
- rs->rs_on_net = 1;
- rs->rs_xid = req->rq_xid;
- rs->rs_transno = req->rq_transno;
- rs->rs_export = exp;
- rs->rs_opc = lustre_msg_get_opc(req->rq_reqmsg);
-
- spin_lock(&exp->exp_uncommitted_replies_lock);
- CDEBUG(D_NET, "rs transno = %llu, last committed = %llu\n",
- rs->rs_transno, exp->exp_last_committed);
- if (rs->rs_transno > exp->exp_last_committed) {
- /* not committed already */
- list_add_tail(&rs->rs_obd_list,
- &exp->exp_uncommitted_replies);
- }
- spin_unlock(&exp->exp_uncommitted_replies_lock);
-
- spin_lock(&exp->exp_lock);
- list_add_tail(&rs->rs_exp_list, &exp->exp_outstanding_replies);
- spin_unlock(&exp->exp_lock);
-
- netrc = target_send_reply_msg(req, rc, fail_id);
-
- spin_lock(&svcpt->scp_rep_lock);
-
- atomic_inc(&svcpt->scp_nreps_difficult);
-
- if (netrc != 0) {
- /* error sending: reply is off the net. Also we need +1
- * reply ref until ptlrpc_handle_rs() is done
- * with the reply state (if the send was successful, there
- * would have been +1 ref for the net, which
- * reply_out_callback leaves alone)
- */
- rs->rs_on_net = 0;
- ptlrpc_rs_addref(rs);
- }
-
- spin_lock(&rs->rs_lock);
- if (rs->rs_transno <= exp->exp_last_committed ||
- (!rs->rs_on_net && !rs->rs_no_ack) ||
- list_empty(&rs->rs_exp_list) || /* completed already */
- list_empty(&rs->rs_obd_list)) {
- CDEBUG(D_HA, "Schedule reply immediately\n");
- ptlrpc_dispatch_difficult_reply(rs);
- } else {
- list_add(&rs->rs_list, &svcpt->scp_rep_active);
- rs->rs_scheduled = 0; /* allow notifier to schedule */
- }
- spin_unlock(&rs->rs_lock);
- spin_unlock(&svcpt->scp_rep_lock);
-}
-EXPORT_SYMBOL(target_send_reply);
-
-enum ldlm_mode lck_compat_array[] = {
- [LCK_EX] = LCK_COMPAT_EX,
- [LCK_PW] = LCK_COMPAT_PW,
- [LCK_PR] = LCK_COMPAT_PR,
- [LCK_CW] = LCK_COMPAT_CW,
- [LCK_CR] = LCK_COMPAT_CR,
- [LCK_NL] = LCK_COMPAT_NL,
- [LCK_GROUP] = LCK_COMPAT_GROUP,
- [LCK_COS] = LCK_COMPAT_COS,
-};
-
-/**
- * Rather arbitrary mapping from LDLM error codes to errno values. This should
- * not escape to the user level.
- */
-int ldlm_error2errno(enum ldlm_error error)
-{
- int result;
-
- switch (error) {
- case ELDLM_OK:
- case ELDLM_LOCK_MATCHED:
- result = 0;
- break;
- case ELDLM_LOCK_CHANGED:
- result = -ESTALE;
- break;
- case ELDLM_LOCK_ABORTED:
- result = -ENAVAIL;
- break;
- case ELDLM_LOCK_REPLACED:
- result = -ESRCH;
- break;
- case ELDLM_NO_LOCK_DATA:
- result = -ENOENT;
- break;
- case ELDLM_NAMESPACE_EXISTS:
- result = -EEXIST;
- break;
- case ELDLM_BAD_NAMESPACE:
- result = -EBADF;
- break;
- default:
- if (((int)error) < 0) /* cast to signed type */
- result = error; /* as enum ldlm_error can be unsigned */
- else {
- CERROR("Invalid DLM result code: %d\n", error);
- result = -EPROTO;
- }
- }
- return result;
-}
-EXPORT_SYMBOL(ldlm_error2errno);
-
-#if LUSTRE_TRACKS_LOCK_EXP_REFS
-void ldlm_dump_export_locks(struct obd_export *exp)
-{
- spin_lock(&exp->exp_locks_list_guard);
- if (!list_empty(&exp->exp_locks_list)) {
- struct ldlm_lock *lock;
-
- CERROR("dumping locks for export %p,ignore if the unmount doesn't hang\n",
- exp);
- list_for_each_entry(lock, &exp->exp_locks_list,
- l_exp_refs_link)
- LDLM_ERROR(lock, "lock:");
- }
- spin_unlock(&exp->exp_locks_list_guard);
-}
-#endif
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
deleted file mode 100644
index a644d133063b..000000000000
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
+++ /dev/null
@@ -1,2135 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2010, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/ldlm/ldlm_lock.c
- *
- * Author: Peter Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LDLM
-
-#include <lustre_intent.h>
-#include <lustre_swab.h>
-#include <obd_class.h>
-#include "ldlm_internal.h"
-
-/* lock types */
-char *ldlm_lockname[] = {
- [0] = "--",
- [LCK_EX] = "EX",
- [LCK_PW] = "PW",
- [LCK_PR] = "PR",
- [LCK_CW] = "CW",
- [LCK_CR] = "CR",
- [LCK_NL] = "NL",
- [LCK_GROUP] = "GROUP",
- [LCK_COS] = "COS",
-};
-EXPORT_SYMBOL(ldlm_lockname);
-
-static char *ldlm_typename[] = {
- [LDLM_PLAIN] = "PLN",
- [LDLM_EXTENT] = "EXT",
- [LDLM_FLOCK] = "FLK",
- [LDLM_IBITS] = "IBT",
-};
-
-static ldlm_policy_wire_to_local_t ldlm_policy_wire_to_local[] = {
- [LDLM_PLAIN - LDLM_MIN_TYPE] = ldlm_plain_policy_wire_to_local,
- [LDLM_EXTENT - LDLM_MIN_TYPE] = ldlm_extent_policy_wire_to_local,
- [LDLM_FLOCK - LDLM_MIN_TYPE] = ldlm_flock_policy_wire_to_local,
- [LDLM_IBITS - LDLM_MIN_TYPE] = ldlm_ibits_policy_wire_to_local,
-};
-
-static ldlm_policy_local_to_wire_t ldlm_policy_local_to_wire[] = {
- [LDLM_PLAIN - LDLM_MIN_TYPE] = ldlm_plain_policy_local_to_wire,
- [LDLM_EXTENT - LDLM_MIN_TYPE] = ldlm_extent_policy_local_to_wire,
- [LDLM_FLOCK - LDLM_MIN_TYPE] = ldlm_flock_policy_local_to_wire,
- [LDLM_IBITS - LDLM_MIN_TYPE] = ldlm_ibits_policy_local_to_wire,
-};
-
-/**
- * Converts lock policy from local format to on the wire lock_desc format
- */
-static void ldlm_convert_policy_to_wire(enum ldlm_type type,
- const union ldlm_policy_data *lpolicy,
- union ldlm_wire_policy_data *wpolicy)
-{
- ldlm_policy_local_to_wire_t convert;
-
- convert = ldlm_policy_local_to_wire[type - LDLM_MIN_TYPE];
-
- convert(lpolicy, wpolicy);
-}
-
-/**
- * Converts lock policy from on the wire lock_desc format to local format
- */
-void ldlm_convert_policy_to_local(struct obd_export *exp, enum ldlm_type type,
- const union ldlm_wire_policy_data *wpolicy,
- union ldlm_policy_data *lpolicy)
-{
- ldlm_policy_wire_to_local_t convert;
-
- convert = ldlm_policy_wire_to_local[type - LDLM_MIN_TYPE];
-
- convert(wpolicy, lpolicy);
-}
-
-const char *ldlm_it2str(enum ldlm_intent_flags it)
-{
- switch (it) {
- case IT_OPEN:
- return "open";
- case IT_CREAT:
- return "creat";
- case (IT_OPEN | IT_CREAT):
- return "open|creat";
- case IT_READDIR:
- return "readdir";
- case IT_GETATTR:
- return "getattr";
- case IT_LOOKUP:
- return "lookup";
- case IT_UNLINK:
- return "unlink";
- case IT_GETXATTR:
- return "getxattr";
- case IT_LAYOUT:
- return "layout";
- default:
- CERROR("Unknown intent 0x%08x\n", it);
- return "UNKNOWN";
- }
-}
-EXPORT_SYMBOL(ldlm_it2str);
-
-/*
- * REFCOUNTED LOCK OBJECTS
- */
-
-/**
- * Get a reference on a lock.
- *
- * Lock refcounts, during creation:
- * - one special one for allocation, dec'd only once in destroy
- * - one for being a lock that's in-use
- * - one for the addref associated with a new lock
- */
-struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock)
-{
- atomic_inc(&lock->l_refc);
- return lock;
-}
-EXPORT_SYMBOL(ldlm_lock_get);
-
-/**
- * Release lock reference.
- *
- * Also frees the lock if it was last reference.
- */
-void ldlm_lock_put(struct ldlm_lock *lock)
-{
- LASSERT(lock->l_resource != LP_POISON);
- LASSERT(atomic_read(&lock->l_refc) > 0);
- if (atomic_dec_and_test(&lock->l_refc)) {
- struct ldlm_resource *res;
-
- LDLM_DEBUG(lock,
- "final lock_put on destroyed lock, freeing it.");
-
- res = lock->l_resource;
- LASSERT(ldlm_is_destroyed(lock));
- LASSERT(list_empty(&lock->l_res_link));
- LASSERT(list_empty(&lock->l_pending_chain));
-
- lprocfs_counter_decr(ldlm_res_to_ns(res)->ns_stats,
- LDLM_NSS_LOCKS);
- lu_ref_del(&res->lr_reference, "lock", lock);
- ldlm_resource_putref(res);
- lock->l_resource = NULL;
- if (lock->l_export) {
- class_export_lock_put(lock->l_export, lock);
- lock->l_export = NULL;
- }
-
- kfree(lock->l_lvb_data);
-
- ldlm_interval_free(ldlm_interval_detach(lock));
- lu_ref_fini(&lock->l_reference);
- OBD_FREE_RCU(lock, sizeof(*lock), &lock->l_handle);
- }
-}
-EXPORT_SYMBOL(ldlm_lock_put);
-
-/**
- * Removes LDLM lock \a lock from LRU. Assumes LRU is already locked.
- */
-int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock)
-{
- int rc = 0;
-
- if (!list_empty(&lock->l_lru)) {
- struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
-
- LASSERT(lock->l_resource->lr_type != LDLM_FLOCK);
- list_del_init(&lock->l_lru);
- LASSERT(ns->ns_nr_unused > 0);
- ns->ns_nr_unused--;
- rc = 1;
- }
- return rc;
-}
-
-/**
- * Removes LDLM lock \a lock from LRU. Obtains the LRU lock first.
- *
- * If \a last_use is non-zero, it will remove the lock from LRU only if
- * it matches lock's l_last_used.
- *
- * \retval 0 if \a last_use is set, the lock is not in LRU list or \a last_use
- * doesn't match lock's l_last_used;
- * otherwise, the lock hasn't been in the LRU list.
- * \retval 1 the lock was in LRU list and removed.
- */
-int ldlm_lock_remove_from_lru_check(struct ldlm_lock *lock, time_t last_use)
-{
- struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
- int rc = 0;
-
- spin_lock(&ns->ns_lock);
- if (last_use == 0 || last_use == lock->l_last_used)
- rc = ldlm_lock_remove_from_lru_nolock(lock);
- spin_unlock(&ns->ns_lock);
-
- return rc;
-}
-
-/**
- * Adds LDLM lock \a lock to namespace LRU. Assumes LRU is already locked.
- */
-static void ldlm_lock_add_to_lru_nolock(struct ldlm_lock *lock)
-{
- struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
-
- lock->l_last_used = jiffies;
- LASSERT(list_empty(&lock->l_lru));
- LASSERT(lock->l_resource->lr_type != LDLM_FLOCK);
- list_add_tail(&lock->l_lru, &ns->ns_unused_list);
- ldlm_clear_skipped(lock);
- LASSERT(ns->ns_nr_unused >= 0);
- ns->ns_nr_unused++;
-}
-
-/**
- * Adds LDLM lock \a lock to namespace LRU. Obtains necessary LRU locks
- * first.
- */
-static void ldlm_lock_add_to_lru(struct ldlm_lock *lock)
-{
- struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
-
- spin_lock(&ns->ns_lock);
- ldlm_lock_add_to_lru_nolock(lock);
- spin_unlock(&ns->ns_lock);
-}
-
-/**
- * Moves LDLM lock \a lock that is already in namespace LRU to the tail of
- * the LRU. Performs necessary LRU locking
- */
-static void ldlm_lock_touch_in_lru(struct ldlm_lock *lock)
-{
- struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
-
- spin_lock(&ns->ns_lock);
- if (!list_empty(&lock->l_lru)) {
- ldlm_lock_remove_from_lru_nolock(lock);
- ldlm_lock_add_to_lru_nolock(lock);
- }
- spin_unlock(&ns->ns_lock);
-}
-
-/**
- * Helper to destroy a locked lock.
- *
- * Used by ldlm_lock_destroy and ldlm_lock_destroy_nolock
- * Must be called with l_lock and lr_lock held.
- *
- * Does not actually free the lock data, but rather marks the lock as
- * destroyed by setting l_destroyed field in the lock to 1. Destroys a
- * handle->lock association too, so that the lock can no longer be found
- * and removes the lock from LRU list. Actual lock freeing occurs when
- * last lock reference goes away.
- *
- * Original comment (of some historical value):
- * This used to have a 'strict' flag, which recovery would use to mark an
- * in-use lock as needing-to-die. Lest I am ever tempted to put it back, I
- * shall explain why it's gone: with the new hash table scheme, once you call
- * ldlm_lock_destroy, you can never drop your final references on this lock.
- * Because it's not in the hash table anymore. -phil
- */
-static int ldlm_lock_destroy_internal(struct ldlm_lock *lock)
-{
- if (lock->l_readers || lock->l_writers) {
- LDLM_ERROR(lock, "lock still has references");
- LBUG();
- }
-
- if (!list_empty(&lock->l_res_link)) {
- LDLM_ERROR(lock, "lock still on resource");
- LBUG();
- }
-
- if (ldlm_is_destroyed(lock)) {
- LASSERT(list_empty(&lock->l_lru));
- return 0;
- }
- ldlm_set_destroyed(lock);
-
- ldlm_lock_remove_from_lru(lock);
- class_handle_unhash(&lock->l_handle);
-
- return 1;
-}
-
-/**
- * Destroys a LDLM lock \a lock. Performs necessary locking first.
- */
-static void ldlm_lock_destroy(struct ldlm_lock *lock)
-{
- int first;
-
- lock_res_and_lock(lock);
- first = ldlm_lock_destroy_internal(lock);
- unlock_res_and_lock(lock);
-
- /* drop reference from hashtable only for first destroy */
- if (first) {
- lu_ref_del(&lock->l_reference, "hash", lock);
- LDLM_LOCK_RELEASE(lock);
- }
-}
-
-/**
- * Destroys a LDLM lock \a lock that is already locked.
- */
-void ldlm_lock_destroy_nolock(struct ldlm_lock *lock)
-{
- int first;
-
- first = ldlm_lock_destroy_internal(lock);
- /* drop reference from hashtable only for first destroy */
- if (first) {
- lu_ref_del(&lock->l_reference, "hash", lock);
- LDLM_LOCK_RELEASE(lock);
- }
-}
-
-/* this is called by portals_handle2object with the handle lock taken */
-static void lock_handle_addref(void *lock)
-{
- LDLM_LOCK_GET((struct ldlm_lock *)lock);
-}
-
-static void lock_handle_free(void *lock, int size)
-{
- LASSERT(size == sizeof(struct ldlm_lock));
- kmem_cache_free(ldlm_lock_slab, lock);
-}
-
-static struct portals_handle_ops lock_handle_ops = {
- .hop_addref = lock_handle_addref,
- .hop_free = lock_handle_free,
-};
-
-/**
- *
- * Allocate and initialize new lock structure.
- *
- * usage: pass in a resource on which you have done ldlm_resource_get
- * new lock will take over the refcount.
- * returns: lock with refcount 2 - one for current caller and one for remote
- */
-static struct ldlm_lock *ldlm_lock_new(struct ldlm_resource *resource)
-{
- struct ldlm_lock *lock;
-
- LASSERT(resource);
-
- lock = kmem_cache_zalloc(ldlm_lock_slab, GFP_NOFS);
- if (!lock)
- return NULL;
-
- spin_lock_init(&lock->l_lock);
- lock->l_resource = resource;
- lu_ref_add(&resource->lr_reference, "lock", lock);
-
- atomic_set(&lock->l_refc, 2);
- INIT_LIST_HEAD(&lock->l_res_link);
- INIT_LIST_HEAD(&lock->l_lru);
- INIT_LIST_HEAD(&lock->l_pending_chain);
- INIT_LIST_HEAD(&lock->l_bl_ast);
- INIT_LIST_HEAD(&lock->l_cp_ast);
- INIT_LIST_HEAD(&lock->l_rk_ast);
- init_waitqueue_head(&lock->l_waitq);
- lock->l_blocking_lock = NULL;
- INIT_LIST_HEAD(&lock->l_sl_mode);
- INIT_LIST_HEAD(&lock->l_sl_policy);
-
- lprocfs_counter_incr(ldlm_res_to_ns(resource)->ns_stats,
- LDLM_NSS_LOCKS);
- INIT_LIST_HEAD(&lock->l_handle.h_link);
- class_handle_hash(&lock->l_handle, &lock_handle_ops);
-
- lu_ref_init(&lock->l_reference);
- lu_ref_add(&lock->l_reference, "hash", lock);
- lock->l_callback_timeout = 0;
-
-#if LUSTRE_TRACKS_LOCK_EXP_REFS
- INIT_LIST_HEAD(&lock->l_exp_refs_link);
- lock->l_exp_refs_nr = 0;
- lock->l_exp_refs_target = NULL;
-#endif
-
- return lock;
-}
-
-/**
- * Moves LDLM lock \a lock to another resource.
- * This is used on client when server returns some other lock than requested
- * (typically as a result of intent operation)
- */
-int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock,
- const struct ldlm_res_id *new_resid)
-{
- struct ldlm_resource *oldres = lock->l_resource;
- struct ldlm_resource *newres;
- int type;
-
- lock_res_and_lock(lock);
- if (memcmp(new_resid, &lock->l_resource->lr_name,
- sizeof(lock->l_resource->lr_name)) == 0) {
- /* Nothing to do */
- unlock_res_and_lock(lock);
- return 0;
- }
-
- LASSERT(new_resid->name[0] != 0);
-
- /* This function assumes that the lock isn't on any lists */
- LASSERT(list_empty(&lock->l_res_link));
-
- type = oldres->lr_type;
- unlock_res_and_lock(lock);
-
- newres = ldlm_resource_get(ns, NULL, new_resid, type, 1);
- if (IS_ERR(newres))
- return PTR_ERR(newres);
-
- lu_ref_add(&newres->lr_reference, "lock", lock);
- /*
- * To flip the lock from the old to the new resource, lock, oldres and
- * newres have to be locked. Resource spin-locks are nested within
- * lock->l_lock, and are taken in the memory address order to avoid
- * dead-locks.
- */
- spin_lock(&lock->l_lock);
- oldres = lock->l_resource;
- if (oldres < newres) {
- lock_res(oldres);
- lock_res_nested(newres, LRT_NEW);
- } else {
- lock_res(newres);
- lock_res_nested(oldres, LRT_NEW);
- }
- LASSERT(memcmp(new_resid, &oldres->lr_name,
- sizeof(oldres->lr_name)) != 0);
- lock->l_resource = newres;
- unlock_res(oldres);
- unlock_res_and_lock(lock);
-
- /* ...and the flowers are still standing! */
- lu_ref_del(&oldres->lr_reference, "lock", lock);
- ldlm_resource_putref(oldres);
-
- return 0;
-}
-
-/** \defgroup ldlm_handles LDLM HANDLES
- * Ways to get hold of locks without any addresses.
- * @{
- */
-
-/**
- * Fills in handle for LDLM lock \a lock into supplied \a lockh
- * Does not take any references.
- */
-void ldlm_lock2handle(const struct ldlm_lock *lock, struct lustre_handle *lockh)
-{
- lockh->cookie = lock->l_handle.h_cookie;
-}
-EXPORT_SYMBOL(ldlm_lock2handle);
-
-/**
- * Obtain a lock reference by handle.
- *
- * if \a flags: atomically get the lock and set the flags.
- * Return NULL if flag already set
- */
-struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *handle,
- __u64 flags)
-{
- struct ldlm_lock *lock;
-
- LASSERT(handle);
-
- lock = class_handle2object(handle->cookie, NULL);
- if (!lock)
- return NULL;
-
- if (lock->l_export && lock->l_export->exp_failed) {
- CDEBUG(D_INFO, "lock export failed: lock %p, exp %p\n",
- lock, lock->l_export);
- LDLM_LOCK_PUT(lock);
- return NULL;
- }
-
- /* It's unlikely but possible that someone marked the lock as
- * destroyed after we did handle2object on it
- */
- if (flags == 0 && !ldlm_is_destroyed(lock)) {
- lu_ref_add(&lock->l_reference, "handle", current);
- return lock;
- }
-
- lock_res_and_lock(lock);
-
- LASSERT(lock->l_resource);
-
- lu_ref_add_atomic(&lock->l_reference, "handle", current);
- if (unlikely(ldlm_is_destroyed(lock))) {
- unlock_res_and_lock(lock);
- CDEBUG(D_INFO, "lock already destroyed: lock %p\n", lock);
- LDLM_LOCK_PUT(lock);
- return NULL;
- }
-
- if (flags) {
- if (lock->l_flags & flags) {
- unlock_res_and_lock(lock);
- LDLM_LOCK_PUT(lock);
- return NULL;
- }
-
- lock->l_flags |= flags;
- }
-
- unlock_res_and_lock(lock);
- return lock;
-}
-EXPORT_SYMBOL(__ldlm_handle2lock);
-/** @} ldlm_handles */
-
-/**
- * Fill in "on the wire" representation for given LDLM lock into supplied
- * lock descriptor \a desc structure.
- */
-void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc)
-{
- ldlm_res2desc(lock->l_resource, &desc->l_resource);
- desc->l_req_mode = lock->l_req_mode;
- desc->l_granted_mode = lock->l_granted_mode;
- ldlm_convert_policy_to_wire(lock->l_resource->lr_type,
- &lock->l_policy_data,
- &desc->l_policy_data);
-}
-
-/**
- * Add a lock to list of conflicting locks to send AST to.
- *
- * Only add if we have not sent a blocking AST to the lock yet.
- */
-static void ldlm_add_bl_work_item(struct ldlm_lock *lock, struct ldlm_lock *new,
- struct list_head *work_list)
-{
- if (!ldlm_is_ast_sent(lock)) {
- LDLM_DEBUG(lock, "lock incompatible; sending blocking AST.");
- ldlm_set_ast_sent(lock);
- /* If the enqueuing client said so, tell the AST recipient to
- * discard dirty data, rather than writing back.
- */
- if (ldlm_is_ast_discard_data(new))
- ldlm_set_discard_data(lock);
- LASSERT(list_empty(&lock->l_bl_ast));
- list_add(&lock->l_bl_ast, work_list);
- LDLM_LOCK_GET(lock);
- LASSERT(!lock->l_blocking_lock);
- lock->l_blocking_lock = LDLM_LOCK_GET(new);
- }
-}
-
-/**
- * Add a lock to list of just granted locks to send completion AST to.
- */
-static void ldlm_add_cp_work_item(struct ldlm_lock *lock,
- struct list_head *work_list)
-{
- if (!ldlm_is_cp_reqd(lock)) {
- ldlm_set_cp_reqd(lock);
- LDLM_DEBUG(lock, "lock granted; sending completion AST.");
- LASSERT(list_empty(&lock->l_cp_ast));
- list_add(&lock->l_cp_ast, work_list);
- LDLM_LOCK_GET(lock);
- }
-}
-
-/**
- * Aggregator function to add AST work items into a list. Determines
- * what sort of an AST work needs to be done and calls the proper
- * adding function.
- * Must be called with lr_lock held.
- */
-static void ldlm_add_ast_work_item(struct ldlm_lock *lock,
- struct ldlm_lock *new,
- struct list_head *work_list)
-{
- check_res_locked(lock->l_resource);
- if (new)
- ldlm_add_bl_work_item(lock, new, work_list);
- else
- ldlm_add_cp_work_item(lock, work_list);
-}
-
-/**
- * Add specified reader/writer reference to LDLM lock with handle \a lockh.
- * r/w reference type is determined by \a mode
- * Calls ldlm_lock_addref_internal.
- */
-void ldlm_lock_addref(const struct lustre_handle *lockh, enum ldlm_mode mode)
-{
- struct ldlm_lock *lock;
-
- lock = ldlm_handle2lock(lockh);
- LASSERTF(lock, "Non-existing lock: %llx\n", lockh->cookie);
- ldlm_lock_addref_internal(lock, mode);
- LDLM_LOCK_PUT(lock);
-}
-EXPORT_SYMBOL(ldlm_lock_addref);
-
-/**
- * Helper function.
- * Add specified reader/writer reference to LDLM lock \a lock.
- * r/w reference type is determined by \a mode
- * Removes lock from LRU if it is there.
- * Assumes the LDLM lock is already locked.
- */
-void ldlm_lock_addref_internal_nolock(struct ldlm_lock *lock,
- enum ldlm_mode mode)
-{
- ldlm_lock_remove_from_lru(lock);
- if (mode & (LCK_NL | LCK_CR | LCK_PR)) {
- lock->l_readers++;
- lu_ref_add_atomic(&lock->l_reference, "reader", lock);
- }
- if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP | LCK_COS)) {
- lock->l_writers++;
- lu_ref_add_atomic(&lock->l_reference, "writer", lock);
- }
- LDLM_LOCK_GET(lock);
- lu_ref_add_atomic(&lock->l_reference, "user", lock);
- LDLM_DEBUG(lock, "ldlm_lock_addref(%s)", ldlm_lockname[mode]);
-}
-
-/**
- * Attempts to add reader/writer reference to a lock with handle \a lockh, and
- * fails if lock is already LDLM_FL_CBPENDING or destroyed.
- *
- * \retval 0 success, lock was addref-ed
- *
- * \retval -EAGAIN lock is being canceled.
- */
-int ldlm_lock_addref_try(const struct lustre_handle *lockh, enum ldlm_mode mode)
-{
- struct ldlm_lock *lock;
- int result;
-
- result = -EAGAIN;
- lock = ldlm_handle2lock(lockh);
- if (lock) {
- lock_res_and_lock(lock);
- if (lock->l_readers != 0 || lock->l_writers != 0 ||
- !ldlm_is_cbpending(lock)) {
- ldlm_lock_addref_internal_nolock(lock, mode);
- result = 0;
- }
- unlock_res_and_lock(lock);
- LDLM_LOCK_PUT(lock);
- }
- return result;
-}
-EXPORT_SYMBOL(ldlm_lock_addref_try);
-
-/**
- * Add specified reader/writer reference to LDLM lock \a lock.
- * Locks LDLM lock and calls ldlm_lock_addref_internal_nolock to do the work.
- * Only called for local locks.
- */
-void ldlm_lock_addref_internal(struct ldlm_lock *lock, enum ldlm_mode mode)
-{
- lock_res_and_lock(lock);
- ldlm_lock_addref_internal_nolock(lock, mode);
- unlock_res_and_lock(lock);
-}
-
-/**
- * Removes reader/writer reference for LDLM lock \a lock.
- * Assumes LDLM lock is already locked.
- * only called in ldlm_flock_destroy and for local locks.
- * Does NOT add lock to LRU if no r/w references left to accommodate flock locks
- * that cannot be placed in LRU.
- */
-void ldlm_lock_decref_internal_nolock(struct ldlm_lock *lock,
- enum ldlm_mode mode)
-{
- LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
- if (mode & (LCK_NL | LCK_CR | LCK_PR)) {
- LASSERT(lock->l_readers > 0);
- lu_ref_del(&lock->l_reference, "reader", lock);
- lock->l_readers--;
- }
- if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP | LCK_COS)) {
- LASSERT(lock->l_writers > 0);
- lu_ref_del(&lock->l_reference, "writer", lock);
- lock->l_writers--;
- }
-
- lu_ref_del(&lock->l_reference, "user", lock);
- LDLM_LOCK_RELEASE(lock); /* matches the LDLM_LOCK_GET() in addref */
-}
-
-/**
- * Removes reader/writer reference for LDLM lock \a lock.
- * Locks LDLM lock first.
- * If the lock is determined to be client lock on a client and r/w refcount
- * drops to zero and the lock is not blocked, the lock is added to LRU lock
- * on the namespace.
- * For blocked LDLM locks if r/w count drops to zero, blocking_ast is called.
- */
-void ldlm_lock_decref_internal(struct ldlm_lock *lock, enum ldlm_mode mode)
-{
- struct ldlm_namespace *ns;
-
- lock_res_and_lock(lock);
-
- ns = ldlm_lock_to_ns(lock);
-
- ldlm_lock_decref_internal_nolock(lock, mode);
-
- if ((ldlm_is_local(lock) || lock->l_req_mode == LCK_GROUP) &&
- !lock->l_readers && !lock->l_writers) {
- /* If this is a local lock on a server namespace and this was
- * the last reference, cancel the lock.
- *
- * Group locks are special:
- * They must not go in LRU, but they are not called back
- * like non-group locks, instead they are manually released.
- * They have an l_writers reference which they keep until
- * they are manually released, so we remove them when they have
- * no more reader or writer references. - LU-6368
- */
- ldlm_set_cbpending(lock);
- }
-
- if (!lock->l_readers && !lock->l_writers && ldlm_is_cbpending(lock)) {
- /* If we received a blocked AST and this was the last reference,
- * run the callback.
- */
- LDLM_DEBUG(lock, "final decref done on cbpending lock");
-
- LDLM_LOCK_GET(lock); /* dropped by bl thread */
- ldlm_lock_remove_from_lru(lock);
- unlock_res_and_lock(lock);
-
- if (ldlm_is_fail_loc(lock))
- OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
-
- if (ldlm_is_atomic_cb(lock) ||
- ldlm_bl_to_thread_lock(ns, NULL, lock) != 0)
- ldlm_handle_bl_callback(ns, NULL, lock);
- } else if (!lock->l_readers && !lock->l_writers &&
- !ldlm_is_no_lru(lock) && !ldlm_is_bl_ast(lock)) {
- LDLM_DEBUG(lock, "add lock into lru list");
-
- /* If this is a client-side namespace and this was the last
- * reference, put it on the LRU.
- */
- ldlm_lock_add_to_lru(lock);
- unlock_res_and_lock(lock);
-
- if (ldlm_is_fail_loc(lock))
- OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
-
- /* Call ldlm_cancel_lru() only if EARLY_CANCEL and LRU RESIZE
- * are not supported by the server, otherwise, it is done on
- * enqueue.
- */
- if (!exp_connect_cancelset(lock->l_conn_export) &&
- !ns_connect_lru_resize(ns))
- ldlm_cancel_lru(ns, 0, LCF_ASYNC, 0);
- } else {
- LDLM_DEBUG(lock, "do not add lock into lru list");
- unlock_res_and_lock(lock);
- }
-}
-
-/**
- * Decrease reader/writer refcount for LDLM lock with handle \a lockh
- */
-void ldlm_lock_decref(const struct lustre_handle *lockh, enum ldlm_mode mode)
-{
- struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
-
- LASSERTF(lock, "Non-existing lock: %#llx\n", lockh->cookie);
- ldlm_lock_decref_internal(lock, mode);
- LDLM_LOCK_PUT(lock);
-}
-EXPORT_SYMBOL(ldlm_lock_decref);
-
-/**
- * Decrease reader/writer refcount for LDLM lock with handle
- * \a lockh and mark it for subsequent cancellation once r/w refcount
- * drops to zero instead of putting into LRU.
- */
-void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh,
- enum ldlm_mode mode)
-{
- struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
-
- LASSERT(lock);
-
- LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
- lock_res_and_lock(lock);
- ldlm_set_cbpending(lock);
- unlock_res_and_lock(lock);
- ldlm_lock_decref_internal(lock, mode);
- LDLM_LOCK_PUT(lock);
-}
-EXPORT_SYMBOL(ldlm_lock_decref_and_cancel);
-
-struct sl_insert_point {
- struct list_head *res_link;
- struct list_head *mode_link;
- struct list_head *policy_link;
-};
-
-/**
- * Finds a position to insert the new lock into granted lock list.
- *
- * Used for locks eligible for skiplist optimization.
- *
- * Parameters:
- * queue [input]: the granted list where search acts on;
- * req [input]: the lock whose position to be located;
- * prev [output]: positions within 3 lists to insert @req to
- * Return Value:
- * filled @prev
- * NOTE: called by
- * - ldlm_grant_lock_with_skiplist
- */
-static void search_granted_lock(struct list_head *queue,
- struct ldlm_lock *req,
- struct sl_insert_point *prev)
-{
- struct ldlm_lock *lock, *mode_end, *policy_end;
-
- list_for_each_entry(lock, queue, l_res_link) {
-
- mode_end = list_prev_entry(lock, l_sl_mode);
-
- if (lock->l_req_mode != req->l_req_mode) {
- /* jump to last lock of mode group */
- lock = mode_end;
- continue;
- }
-
- /* suitable mode group is found */
- if (lock->l_resource->lr_type == LDLM_PLAIN) {
- /* insert point is last lock of the mode group */
- prev->res_link = &mode_end->l_res_link;
- prev->mode_link = &mode_end->l_sl_mode;
- prev->policy_link = &req->l_sl_policy;
- return;
- }
-
- if (lock->l_resource->lr_type == LDLM_IBITS) {
- for (;;) {
- policy_end =
- list_prev_entry(lock, l_sl_policy);
-
- if (lock->l_policy_data.l_inodebits.bits ==
- req->l_policy_data.l_inodebits.bits) {
- /* insert point is last lock of
- * the policy group
- */
- prev->res_link =
- &policy_end->l_res_link;
- prev->mode_link =
- &policy_end->l_sl_mode;
- prev->policy_link =
- &policy_end->l_sl_policy;
- return;
- }
-
- if (policy_end == mode_end)
- /* done with mode group */
- break;
-
- /* go to next policy group within mode group */
- lock = list_next_entry(policy_end, l_res_link);
- } /* loop over policy groups within the mode group */
-
- /* insert point is last lock of the mode group,
- * new policy group is started
- */
- prev->res_link = &mode_end->l_res_link;
- prev->mode_link = &mode_end->l_sl_mode;
- prev->policy_link = &req->l_sl_policy;
- return;
- }
-
- LDLM_ERROR(lock, "is not LDLM_PLAIN or LDLM_IBITS lock");
- LBUG();
- }
-
- /* insert point is last lock on the queue,
- * new mode group and new policy group are started
- */
- prev->res_link = queue->prev;
- prev->mode_link = &req->l_sl_mode;
- prev->policy_link = &req->l_sl_policy;
-}
-
-/**
- * Add a lock into resource granted list after a position described by
- * \a prev.
- */
-static void ldlm_granted_list_add_lock(struct ldlm_lock *lock,
- struct sl_insert_point *prev)
-{
- struct ldlm_resource *res = lock->l_resource;
-
- check_res_locked(res);
-
- ldlm_resource_dump(D_INFO, res);
- LDLM_DEBUG(lock, "About to add lock:");
-
- if (ldlm_is_destroyed(lock)) {
- CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n");
- return;
- }
-
- LASSERT(list_empty(&lock->l_res_link));
- LASSERT(list_empty(&lock->l_sl_mode));
- LASSERT(list_empty(&lock->l_sl_policy));
-
- /*
- * lock->link == prev->link means lock is first starting the group.
- * Don't re-add to itself to suppress kernel warnings.
- */
- if (&lock->l_res_link != prev->res_link)
- list_add(&lock->l_res_link, prev->res_link);
- if (&lock->l_sl_mode != prev->mode_link)
- list_add(&lock->l_sl_mode, prev->mode_link);
- if (&lock->l_sl_policy != prev->policy_link)
- list_add(&lock->l_sl_policy, prev->policy_link);
-}
-
-/**
- * Add a lock to granted list on a resource maintaining skiplist
- * correctness.
- */
-static void ldlm_grant_lock_with_skiplist(struct ldlm_lock *lock)
-{
- struct sl_insert_point prev;
-
- LASSERT(lock->l_req_mode == lock->l_granted_mode);
-
- search_granted_lock(&lock->l_resource->lr_granted, lock, &prev);
- ldlm_granted_list_add_lock(lock, &prev);
-}
-
-/**
- * Perform lock granting bookkeeping.
- *
- * Includes putting the lock into granted list and updating lock mode.
- * NOTE: called by
- * - ldlm_lock_enqueue
- * - ldlm_reprocess_queue
- * - ldlm_lock_convert
- *
- * must be called with lr_lock held
- */
-void ldlm_grant_lock(struct ldlm_lock *lock, struct list_head *work_list)
-{
- struct ldlm_resource *res = lock->l_resource;
-
- check_res_locked(res);
-
- lock->l_granted_mode = lock->l_req_mode;
-
- if (work_list && lock->l_completion_ast)
- ldlm_add_ast_work_item(lock, NULL, work_list);
-
- if (res->lr_type == LDLM_PLAIN || res->lr_type == LDLM_IBITS) {
- ldlm_grant_lock_with_skiplist(lock);
- } else if (res->lr_type == LDLM_EXTENT) {
- ldlm_extent_add_lock(res, lock);
- } else if (res->lr_type == LDLM_FLOCK) {
- /*
- * We should not add locks to granted list in
- * the following cases:
- * - this is an UNLOCK but not a real lock;
- * - this is a TEST lock;
- * - this is a F_CANCELLK lock (async flock has req_mode == 0)
- * - this is a deadlock (flock cannot be granted)
- */
- if (!lock->l_req_mode || lock->l_req_mode == LCK_NL ||
- ldlm_is_test_lock(lock) || ldlm_is_flock_deadlock(lock))
- return;
- ldlm_resource_add_lock(res, &res->lr_granted, lock);
- } else {
- LBUG();
- }
-
- ldlm_pool_add(&ldlm_res_to_ns(res)->ns_pool, lock);
-}
-
-/**
- * Describe the overlap between two locks. itree_overlap_cb data.
- */
-struct lock_match_data {
- struct ldlm_lock *lmd_old;
- struct ldlm_lock *lmd_lock;
- enum ldlm_mode *lmd_mode;
- union ldlm_policy_data *lmd_policy;
- __u64 lmd_flags;
- int lmd_unref;
-};
-
-/**
- * Check if the given @lock meets the criteria for a match.
- * A reference on the lock is taken if matched.
- *
- * \param lock test-against this lock
- * \param data parameters
- */
-static int lock_matches(struct ldlm_lock *lock, struct lock_match_data *data)
-{
- union ldlm_policy_data *lpol = &lock->l_policy_data;
- enum ldlm_mode match;
-
- if (lock == data->lmd_old)
- return INTERVAL_ITER_STOP;
-
- /*
- * Check if this lock can be matched.
- * Used by LU-2919(exclusive open) for open lease lock
- */
- if (ldlm_is_excl(lock))
- return INTERVAL_ITER_CONT;
-
- /*
- * llite sometimes wants to match locks that will be
- * canceled when their users drop, but we allow it to match
- * if it passes in CBPENDING and the lock still has users.
- * this is generally only going to be used by children
- * whose parents already hold a lock so forward progress
- * can still happen.
- */
- if (ldlm_is_cbpending(lock) &&
- !(data->lmd_flags & LDLM_FL_CBPENDING))
- return INTERVAL_ITER_CONT;
-
- if (!data->lmd_unref && ldlm_is_cbpending(lock) &&
- !lock->l_readers && !lock->l_writers)
- return INTERVAL_ITER_CONT;
-
- if (!(lock->l_req_mode & *data->lmd_mode))
- return INTERVAL_ITER_CONT;
- match = lock->l_req_mode;
-
- switch (lock->l_resource->lr_type) {
- case LDLM_EXTENT:
- if (lpol->l_extent.start > data->lmd_policy->l_extent.start ||
- lpol->l_extent.end < data->lmd_policy->l_extent.end)
- return INTERVAL_ITER_CONT;
-
- if (unlikely(match == LCK_GROUP) &&
- data->lmd_policy->l_extent.gid != LDLM_GID_ANY &&
- lpol->l_extent.gid != data->lmd_policy->l_extent.gid)
- return INTERVAL_ITER_CONT;
- break;
- case LDLM_IBITS:
- /*
- * We match if we have existing lock with same or wider set
- * of bits.
- */
- if ((lpol->l_inodebits.bits &
- data->lmd_policy->l_inodebits.bits) !=
- data->lmd_policy->l_inodebits.bits)
- return INTERVAL_ITER_CONT;
- break;
- default:
- break;
- }
- /*
- * We match if we have existing lock with same or wider set
- * of bits.
- */
- if (!data->lmd_unref && LDLM_HAVE_MASK(lock, GONE))
- return INTERVAL_ITER_CONT;
-
- if (!equi(data->lmd_flags & LDLM_FL_LOCAL_ONLY, ldlm_is_local(lock)))
- return INTERVAL_ITER_CONT;
-
- if (data->lmd_flags & LDLM_FL_TEST_LOCK) {
- LDLM_LOCK_GET(lock);
- ldlm_lock_touch_in_lru(lock);
- } else {
- ldlm_lock_addref_internal_nolock(lock, match);
- }
-
- *data->lmd_mode = match;
- data->lmd_lock = lock;
-
- return INTERVAL_ITER_STOP;
-}
-
-static enum interval_iter itree_overlap_cb(struct interval_node *in, void *args)
-{
- struct ldlm_interval *node = to_ldlm_interval(in);
- struct lock_match_data *data = args;
- struct ldlm_lock *lock;
- int rc;
-
- list_for_each_entry(lock, &node->li_group, l_sl_policy) {
- rc = lock_matches(lock, data);
- if (rc == INTERVAL_ITER_STOP)
- return INTERVAL_ITER_STOP;
- }
- return INTERVAL_ITER_CONT;
-}
-
-/**
- * Search for a lock with given parameters in interval trees.
- *
- * \param res search for a lock in this resource
- * \param data parameters
- *
- * \retval a referenced lock or NULL.
- */
-static struct ldlm_lock *search_itree(struct ldlm_resource *res,
- struct lock_match_data *data)
-{
- struct interval_node_extent ext = {
- .start = data->lmd_policy->l_extent.start,
- .end = data->lmd_policy->l_extent.end
- };
- int idx;
-
- for (idx = 0; idx < LCK_MODE_NUM; idx++) {
- struct ldlm_interval_tree *tree = &res->lr_itree[idx];
-
- if (!tree->lit_root)
- continue;
-
- if (!(tree->lit_mode & *data->lmd_mode))
- continue;
-
- interval_search(tree->lit_root, &ext,
- itree_overlap_cb, data);
- }
- return data->lmd_lock;
-}
-
-/**
- * Search for a lock with given properties in a queue.
- *
- * \param queue search for a lock in this queue
- * \param data parameters
- *
- * \retval a referenced lock or NULL.
- */
-static struct ldlm_lock *search_queue(struct list_head *queue,
- struct lock_match_data *data)
-{
- struct ldlm_lock *lock;
- int rc;
-
- list_for_each_entry(lock, queue, l_res_link) {
- rc = lock_matches(lock, data);
- if (rc == INTERVAL_ITER_STOP)
- return data->lmd_lock;
- }
- return NULL;
-}
-
-void ldlm_lock_fail_match_locked(struct ldlm_lock *lock)
-{
- if ((lock->l_flags & LDLM_FL_FAIL_NOTIFIED) == 0) {
- lock->l_flags |= LDLM_FL_FAIL_NOTIFIED;
- wake_up_all(&lock->l_waitq);
- }
-}
-
-/**
- * Mark lock as "matchable" by OST.
- *
- * Used to prevent certain races in LOV/OSC where the lock is granted, but LVB
- * is not yet valid.
- * Assumes LDLM lock is already locked.
- */
-void ldlm_lock_allow_match_locked(struct ldlm_lock *lock)
-{
- ldlm_set_lvb_ready(lock);
- wake_up_all(&lock->l_waitq);
-}
-EXPORT_SYMBOL(ldlm_lock_allow_match_locked);
-
-/**
- * Mark lock as "matchable" by OST.
- * Locks the lock and then \see ldlm_lock_allow_match_locked
- */
-void ldlm_lock_allow_match(struct ldlm_lock *lock)
-{
- lock_res_and_lock(lock);
- ldlm_lock_allow_match_locked(lock);
- unlock_res_and_lock(lock);
-}
-EXPORT_SYMBOL(ldlm_lock_allow_match);
-
-/**
- * Attempt to find a lock with specified properties.
- *
- * Typically returns a reference to matched lock unless LDLM_FL_TEST_LOCK is
- * set in \a flags
- *
- * Can be called in two ways:
- *
- * If 'ns' is NULL, then lockh describes an existing lock that we want to look
- * for a duplicate of.
- *
- * Otherwise, all of the fields must be filled in, to match against.
- *
- * If 'flags' contains LDLM_FL_LOCAL_ONLY, then only match local locks on the
- * server (ie, connh is NULL)
- * If 'flags' contains LDLM_FL_BLOCK_GRANTED, then only locks on the granted
- * list will be considered
- * If 'flags' contains LDLM_FL_CBPENDING, then locks that have been marked
- * to be canceled can still be matched as long as they still have reader
- * or writer referneces
- * If 'flags' contains LDLM_FL_TEST_LOCK, then don't actually reference a lock,
- * just tell us if we would have matched.
- *
- * \retval 1 if it finds an already-existing lock that is compatible; in this
- * case, lockh is filled in with a addref()ed lock
- *
- * We also check security context, and if that fails we simply return 0 (to
- * keep caller code unchanged), the context failure will be discovered by
- * caller sometime later.
- */
-enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
- const struct ldlm_res_id *res_id,
- enum ldlm_type type,
- union ldlm_policy_data *policy,
- enum ldlm_mode mode,
- struct lustre_handle *lockh, int unref)
-{
- struct lock_match_data data = {
- .lmd_old = NULL,
- .lmd_lock = NULL,
- .lmd_mode = &mode,
- .lmd_policy = policy,
- .lmd_flags = flags,
- .lmd_unref = unref,
- };
- struct ldlm_resource *res;
- struct ldlm_lock *lock;
- int rc = 0;
-
- if (!ns) {
- data.lmd_old = ldlm_handle2lock(lockh);
- LASSERT(data.lmd_old);
-
- ns = ldlm_lock_to_ns(data.lmd_old);
- res_id = &data.lmd_old->l_resource->lr_name;
- type = data.lmd_old->l_resource->lr_type;
- *data.lmd_mode = data.lmd_old->l_req_mode;
- }
-
- res = ldlm_resource_get(ns, NULL, res_id, type, 0);
- if (IS_ERR(res)) {
- LASSERT(!data.lmd_old);
- return 0;
- }
-
- LDLM_RESOURCE_ADDREF(res);
- lock_res(res);
-
- if (res->lr_type == LDLM_EXTENT)
- lock = search_itree(res, &data);
- else
- lock = search_queue(&res->lr_granted, &data);
- if (lock) {
- rc = 1;
- goto out;
- }
- if (flags & LDLM_FL_BLOCK_GRANTED) {
- rc = 0;
- goto out;
- }
- lock = search_queue(&res->lr_waiting, &data);
- if (lock) {
- rc = 1;
- goto out;
- }
-out:
- unlock_res(res);
- LDLM_RESOURCE_DELREF(res);
- ldlm_resource_putref(res);
-
- if (lock) {
- ldlm_lock2handle(lock, lockh);
- if ((flags & LDLM_FL_LVB_READY) && !ldlm_is_lvb_ready(lock)) {
- __u64 wait_flags = LDLM_FL_LVB_READY |
- LDLM_FL_DESTROYED | LDLM_FL_FAIL_NOTIFIED;
-
- if (lock->l_completion_ast) {
- int err = lock->l_completion_ast(lock,
- LDLM_FL_WAIT_NOREPROC,
- NULL);
- if (err) {
- if (flags & LDLM_FL_TEST_LOCK)
- LDLM_LOCK_RELEASE(lock);
- else
- ldlm_lock_decref_internal(lock,
- mode);
- rc = 0;
- goto out2;
- }
- }
-
- /* XXX FIXME see comment on CAN_MATCH in lustre_dlm.h */
- wait_event_idle_timeout(lock->l_waitq,
- lock->l_flags & wait_flags,
- obd_timeout * HZ);
- if (!ldlm_is_lvb_ready(lock)) {
- if (flags & LDLM_FL_TEST_LOCK)
- LDLM_LOCK_RELEASE(lock);
- else
- ldlm_lock_decref_internal(lock, mode);
- rc = 0;
- }
- }
- }
- out2:
- if (rc) {
- LDLM_DEBUG(lock, "matched (%llu %llu)",
- (type == LDLM_PLAIN || type == LDLM_IBITS) ?
- res_id->name[2] : policy->l_extent.start,
- (type == LDLM_PLAIN || type == LDLM_IBITS) ?
- res_id->name[3] : policy->l_extent.end);
-
- /* check user's security context */
- if (lock->l_conn_export &&
- sptlrpc_import_check_ctx(
- class_exp2cliimp(lock->l_conn_export))) {
- if (!(flags & LDLM_FL_TEST_LOCK))
- ldlm_lock_decref_internal(lock, mode);
- rc = 0;
- }
-
- if (flags & LDLM_FL_TEST_LOCK)
- LDLM_LOCK_RELEASE(lock);
-
- } else if (!(flags & LDLM_FL_TEST_LOCK)) {/*less verbose for test-only*/
- LDLM_DEBUG_NOLOCK("not matched ns %p type %u mode %u res %llu/%llu (%llu %llu)",
- ns, type, mode, res_id->name[0],
- res_id->name[1],
- (type == LDLM_PLAIN || type == LDLM_IBITS) ?
- res_id->name[2] : policy->l_extent.start,
- (type == LDLM_PLAIN || type == LDLM_IBITS) ?
- res_id->name[3] : policy->l_extent.end);
- }
- if (data.lmd_old)
- LDLM_LOCK_PUT(data.lmd_old);
-
- return rc ? mode : 0;
-}
-EXPORT_SYMBOL(ldlm_lock_match);
-
-enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh,
- __u64 *bits)
-{
- struct ldlm_lock *lock;
- enum ldlm_mode mode = 0;
-
- lock = ldlm_handle2lock(lockh);
- if (lock) {
- lock_res_and_lock(lock);
- if (LDLM_HAVE_MASK(lock, GONE))
- goto out;
-
- if (ldlm_is_cbpending(lock) &&
- lock->l_readers == 0 && lock->l_writers == 0)
- goto out;
-
- if (bits)
- *bits = lock->l_policy_data.l_inodebits.bits;
- mode = lock->l_granted_mode;
- ldlm_lock_addref_internal_nolock(lock, mode);
- }
-
-out:
- if (lock) {
- unlock_res_and_lock(lock);
- LDLM_LOCK_PUT(lock);
- }
- return mode;
-}
-EXPORT_SYMBOL(ldlm_revalidate_lock_handle);
-
-/** The caller must guarantee that the buffer is large enough. */
-int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill,
- enum req_location loc, void *data, int size)
-{
- void *lvb;
-
- LASSERT(data);
- LASSERT(size >= 0);
-
- switch (lock->l_lvb_type) {
- case LVB_T_OST:
- if (size == sizeof(struct ost_lvb)) {
- if (loc == RCL_CLIENT)
- lvb = req_capsule_client_swab_get(pill,
- &RMF_DLM_LVB,
- lustre_swab_ost_lvb);
- else
- lvb = req_capsule_server_swab_get(pill,
- &RMF_DLM_LVB,
- lustre_swab_ost_lvb);
- if (unlikely(!lvb)) {
- LDLM_ERROR(lock, "no LVB");
- return -EPROTO;
- }
-
- memcpy(data, lvb, size);
- } else if (size == sizeof(struct ost_lvb_v1)) {
- struct ost_lvb *olvb = data;
-
- if (loc == RCL_CLIENT)
- lvb = req_capsule_client_swab_get(pill,
- &RMF_DLM_LVB,
- lustre_swab_ost_lvb_v1);
- else
- lvb = req_capsule_server_sized_swab_get(pill,
- &RMF_DLM_LVB, size,
- lustre_swab_ost_lvb_v1);
- if (unlikely(!lvb)) {
- LDLM_ERROR(lock, "no LVB");
- return -EPROTO;
- }
-
- memcpy(data, lvb, size);
- olvb->lvb_mtime_ns = 0;
- olvb->lvb_atime_ns = 0;
- olvb->lvb_ctime_ns = 0;
- } else {
- LDLM_ERROR(lock, "Replied unexpected ost LVB size %d",
- size);
- return -EINVAL;
- }
- break;
- case LVB_T_LQUOTA:
- if (size == sizeof(struct lquota_lvb)) {
- if (loc == RCL_CLIENT)
- lvb = req_capsule_client_swab_get(pill,
- &RMF_DLM_LVB,
- lustre_swab_lquota_lvb);
- else
- lvb = req_capsule_server_swab_get(pill,
- &RMF_DLM_LVB,
- lustre_swab_lquota_lvb);
- if (unlikely(!lvb)) {
- LDLM_ERROR(lock, "no LVB");
- return -EPROTO;
- }
-
- memcpy(data, lvb, size);
- } else {
- LDLM_ERROR(lock,
- "Replied unexpected lquota LVB size %d",
- size);
- return -EINVAL;
- }
- break;
- case LVB_T_LAYOUT:
- if (size == 0)
- break;
-
- if (loc == RCL_CLIENT)
- lvb = req_capsule_client_get(pill, &RMF_DLM_LVB);
- else
- lvb = req_capsule_server_get(pill, &RMF_DLM_LVB);
- if (unlikely(!lvb)) {
- LDLM_ERROR(lock, "no LVB");
- return -EPROTO;
- }
-
- memcpy(data, lvb, size);
- break;
- default:
- LDLM_ERROR(lock, "Unknown LVB type: %d", lock->l_lvb_type);
- dump_stack();
- return -EINVAL;
- }
-
- return 0;
-}
-
-/**
- * Create and fill in new LDLM lock with specified properties.
- * Returns a referenced lock
- */
-struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
- const struct ldlm_res_id *res_id,
- enum ldlm_type type,
- enum ldlm_mode mode,
- const struct ldlm_callback_suite *cbs,
- void *data, __u32 lvb_len,
- enum lvb_type lvb_type)
-{
- struct ldlm_lock *lock;
- struct ldlm_resource *res;
- int rc;
-
- res = ldlm_resource_get(ns, NULL, res_id, type, 1);
- if (IS_ERR(res))
- return ERR_CAST(res);
-
- lock = ldlm_lock_new(res);
- if (!lock) {
- ldlm_resource_putref(res);
- return ERR_PTR(-ENOMEM);
- }
-
- lock->l_req_mode = mode;
- lock->l_ast_data = data;
- lock->l_pid = current->pid;
- if (cbs) {
- lock->l_blocking_ast = cbs->lcs_blocking;
- lock->l_completion_ast = cbs->lcs_completion;
- lock->l_glimpse_ast = cbs->lcs_glimpse;
- }
-
- lock->l_tree_node = NULL;
- /* if this is the extent lock, allocate the interval tree node */
- if (type == LDLM_EXTENT) {
- if (!ldlm_interval_alloc(lock)) {
- rc = -ENOMEM;
- goto out;
- }
- }
-
- if (lvb_len) {
- lock->l_lvb_len = lvb_len;
- lock->l_lvb_data = kzalloc(lvb_len, GFP_NOFS);
- if (!lock->l_lvb_data) {
- rc = -ENOMEM;
- goto out;
- }
- }
-
- lock->l_lvb_type = lvb_type;
- if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_NEW_LOCK)) {
- rc = -ENOENT;
- goto out;
- }
-
- return lock;
-
-out:
- ldlm_lock_destroy(lock);
- LDLM_LOCK_RELEASE(lock);
- return ERR_PTR(rc);
-}
-
-
-
-/**
- * Enqueue (request) a lock.
- * On the client this is called from ldlm_cli_enqueue_fini
- * after we already got an initial reply from the server with some status.
- *
- * Does not block. As a result of enqueue the lock would be put
- * into granted or waiting list.
- */
-enum ldlm_error ldlm_lock_enqueue(struct ldlm_namespace *ns,
- struct ldlm_lock **lockp,
- void *cookie, __u64 *flags)
-{
- struct ldlm_lock *lock = *lockp;
- struct ldlm_resource *res = lock->l_resource;
-
- lock_res_and_lock(lock);
- if (lock->l_req_mode == lock->l_granted_mode) {
- /* The server returned a blocked lock, but it was granted
- * before we got a chance to actually enqueue it. We don't
- * need to do anything else.
- */
- *flags &= ~LDLM_FL_BLOCKED_MASK;
- goto out;
- }
-
- ldlm_resource_unlink_lock(lock);
-
- /* Cannot happen unless on the server */
- if (res->lr_type == LDLM_EXTENT && !lock->l_tree_node)
- LBUG();
-
- /* Some flags from the enqueue want to make it into the AST, via the
- * lock's l_flags.
- */
- if (*flags & LDLM_FL_AST_DISCARD_DATA)
- ldlm_set_ast_discard_data(lock);
- if (*flags & LDLM_FL_TEST_LOCK)
- ldlm_set_test_lock(lock);
-
- /*
- * This distinction between local lock trees is very important; a client
- * namespace only has information about locks taken by that client, and
- * thus doesn't have enough information to decide for itself if it can
- * be granted (below). In this case, we do exactly what the server
- * tells us to do, as dictated by the 'flags'.
- */
- if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED))
- ldlm_resource_add_lock(res, &res->lr_waiting, lock);
- else
- ldlm_grant_lock(lock, NULL);
-
-out:
- unlock_res_and_lock(lock);
- return ELDLM_OK;
-}
-
-/**
- * Process a call to blocking AST callback for a lock in ast_work list
- */
-static int
-ldlm_work_bl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
-{
- struct ldlm_cb_set_arg *arg = opaq;
- struct ldlm_lock_desc d;
- int rc;
- struct ldlm_lock *lock;
-
- if (list_empty(arg->list))
- return -ENOENT;
-
- lock = list_first_entry(arg->list, struct ldlm_lock, l_bl_ast);
-
- /* nobody should touch l_bl_ast */
- lock_res_and_lock(lock);
- list_del_init(&lock->l_bl_ast);
-
- LASSERT(ldlm_is_ast_sent(lock));
- LASSERT(lock->l_bl_ast_run == 0);
- LASSERT(lock->l_blocking_lock);
- lock->l_bl_ast_run++;
- unlock_res_and_lock(lock);
-
- ldlm_lock2desc(lock->l_blocking_lock, &d);
-
- rc = lock->l_blocking_ast(lock, &d, (void *)arg, LDLM_CB_BLOCKING);
- LDLM_LOCK_RELEASE(lock->l_blocking_lock);
- lock->l_blocking_lock = NULL;
- LDLM_LOCK_RELEASE(lock);
-
- return rc;
-}
-
-/**
- * Process a call to completion AST callback for a lock in ast_work list
- */
-static int
-ldlm_work_cp_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
-{
- struct ldlm_cb_set_arg *arg = opaq;
- int rc = 0;
- struct ldlm_lock *lock;
- ldlm_completion_callback completion_callback;
-
- if (list_empty(arg->list))
- return -ENOENT;
-
- lock = list_first_entry(arg->list, struct ldlm_lock, l_cp_ast);
-
- /* It's possible to receive a completion AST before we've set
- * the l_completion_ast pointer: either because the AST arrived
- * before the reply, or simply because there's a small race
- * window between receiving the reply and finishing the local
- * enqueue. (bug 842)
- *
- * This can't happen with the blocking_ast, however, because we
- * will never call the local blocking_ast until we drop our
- * reader/writer reference, which we won't do until we get the
- * reply and finish enqueueing.
- */
-
- /* nobody should touch l_cp_ast */
- lock_res_and_lock(lock);
- list_del_init(&lock->l_cp_ast);
- LASSERT(ldlm_is_cp_reqd(lock));
- /* save l_completion_ast since it can be changed by
- * mds_intent_policy(), see bug 14225
- */
- completion_callback = lock->l_completion_ast;
- ldlm_clear_cp_reqd(lock);
- unlock_res_and_lock(lock);
-
- if (completion_callback)
- rc = completion_callback(lock, 0, (void *)arg);
- LDLM_LOCK_RELEASE(lock);
-
- return rc;
-}
-
-/**
- * Process a call to revocation AST callback for a lock in ast_work list
- */
-static int
-ldlm_work_revoke_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
-{
- struct ldlm_cb_set_arg *arg = opaq;
- struct ldlm_lock_desc desc;
- int rc;
- struct ldlm_lock *lock;
-
- if (list_empty(arg->list))
- return -ENOENT;
-
- lock = list_first_entry(arg->list, struct ldlm_lock, l_rk_ast);
- list_del_init(&lock->l_rk_ast);
-
- /* the desc just pretend to exclusive */
- ldlm_lock2desc(lock, &desc);
- desc.l_req_mode = LCK_EX;
- desc.l_granted_mode = 0;
-
- rc = lock->l_blocking_ast(lock, &desc, (void *)arg, LDLM_CB_BLOCKING);
- LDLM_LOCK_RELEASE(lock);
-
- return rc;
-}
-
-/**
- * Process a call to glimpse AST callback for a lock in ast_work list
- */
-static int ldlm_work_gl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
-{
- struct ldlm_cb_set_arg *arg = opaq;
- struct ldlm_glimpse_work *gl_work;
- struct ldlm_lock *lock;
- int rc = 0;
-
- if (list_empty(arg->list))
- return -ENOENT;
-
- gl_work = list_first_entry(arg->list, struct ldlm_glimpse_work,
- gl_list);
- list_del_init(&gl_work->gl_list);
-
- lock = gl_work->gl_lock;
-
- /* transfer the glimpse descriptor to ldlm_cb_set_arg */
- arg->gl_desc = gl_work->gl_desc;
-
- /* invoke the actual glimpse callback */
- if (lock->l_glimpse_ast(lock, (void *)arg) == 0)
- rc = 1;
-
- LDLM_LOCK_RELEASE(lock);
-
- if ((gl_work->gl_flags & LDLM_GL_WORK_NOFREE) == 0)
- kfree(gl_work);
-
- return rc;
-}
-
-/**
- * Process list of locks in need of ASTs being sent.
- *
- * Used on server to send multiple ASTs together instead of sending one by
- * one.
- */
-int ldlm_run_ast_work(struct ldlm_namespace *ns, struct list_head *rpc_list,
- enum ldlm_desc_ast_t ast_type)
-{
- struct ldlm_cb_set_arg *arg;
- set_producer_func work_ast_lock;
- int rc;
-
- if (list_empty(rpc_list))
- return 0;
-
- arg = kzalloc(sizeof(*arg), GFP_NOFS);
- if (!arg)
- return -ENOMEM;
-
- atomic_set(&arg->restart, 0);
- arg->list = rpc_list;
-
- switch (ast_type) {
- case LDLM_WORK_BL_AST:
- arg->type = LDLM_BL_CALLBACK;
- work_ast_lock = ldlm_work_bl_ast_lock;
- break;
- case LDLM_WORK_CP_AST:
- arg->type = LDLM_CP_CALLBACK;
- work_ast_lock = ldlm_work_cp_ast_lock;
- break;
- case LDLM_WORK_REVOKE_AST:
- arg->type = LDLM_BL_CALLBACK;
- work_ast_lock = ldlm_work_revoke_ast_lock;
- break;
- case LDLM_WORK_GL_AST:
- arg->type = LDLM_GL_CALLBACK;
- work_ast_lock = ldlm_work_gl_ast_lock;
- break;
- default:
- LBUG();
- }
-
- /* We create a ptlrpc request set with flow control extension.
- * This request set will use the work_ast_lock function to produce new
- * requests and will send a new request each time one completes in order
- * to keep the number of requests in flight to ns_max_parallel_ast
- */
- arg->set = ptlrpc_prep_fcset(ns->ns_max_parallel_ast ? : UINT_MAX,
- work_ast_lock, arg);
- if (!arg->set) {
- rc = -ENOMEM;
- goto out;
- }
-
- ptlrpc_set_wait(arg->set);
- ptlrpc_set_destroy(arg->set);
-
- rc = atomic_read(&arg->restart) ? -ERESTART : 0;
- goto out;
-out:
- kfree(arg);
- return rc;
-}
-
-static bool is_bl_done(struct ldlm_lock *lock)
-{
- bool bl_done = true;
-
- if (!ldlm_is_bl_done(lock)) {
- lock_res_and_lock(lock);
- bl_done = ldlm_is_bl_done(lock);
- unlock_res_and_lock(lock);
- }
-
- return bl_done;
-}
-
-/**
- * Helper function to call blocking AST for LDLM lock \a lock in a
- * "cancelling" mode.
- */
-void ldlm_cancel_callback(struct ldlm_lock *lock)
-{
- check_res_locked(lock->l_resource);
- if (!ldlm_is_cancel(lock)) {
- ldlm_set_cancel(lock);
- if (lock->l_blocking_ast) {
- unlock_res_and_lock(lock);
- lock->l_blocking_ast(lock, NULL, lock->l_ast_data,
- LDLM_CB_CANCELING);
- lock_res_and_lock(lock);
- } else {
- LDLM_DEBUG(lock, "no blocking ast");
- }
- /* only canceller can set bl_done bit */
- ldlm_set_bl_done(lock);
- wake_up_all(&lock->l_waitq);
- } else if (!ldlm_is_bl_done(lock)) {
- /*
- * The lock is guaranteed to have been canceled once
- * returning from this function.
- */
- unlock_res_and_lock(lock);
- wait_event_idle(lock->l_waitq, is_bl_done(lock));
- lock_res_and_lock(lock);
- }
-}
-
-/**
- * Remove skiplist-enabled LDLM lock \a req from granted list
- */
-void ldlm_unlink_lock_skiplist(struct ldlm_lock *req)
-{
- if (req->l_resource->lr_type != LDLM_PLAIN &&
- req->l_resource->lr_type != LDLM_IBITS)
- return;
-
- list_del_init(&req->l_sl_policy);
- list_del_init(&req->l_sl_mode);
-}
-
-/**
- * Attempts to cancel LDLM lock \a lock that has no reader/writer references.
- */
-void ldlm_lock_cancel(struct ldlm_lock *lock)
-{
- struct ldlm_resource *res;
- struct ldlm_namespace *ns;
-
- lock_res_and_lock(lock);
-
- res = lock->l_resource;
- ns = ldlm_res_to_ns(res);
-
- /* Please do not, no matter how tempting, remove this LBUG without
- * talking to me first. -phik
- */
- if (lock->l_readers || lock->l_writers) {
- LDLM_ERROR(lock, "lock still has references");
- LBUG();
- }
-
- /* Releases cancel callback. */
- ldlm_cancel_callback(lock);
-
- ldlm_resource_unlink_lock(lock);
- ldlm_lock_destroy_nolock(lock);
-
- if (lock->l_granted_mode == lock->l_req_mode)
- ldlm_pool_del(&ns->ns_pool, lock);
-
- /* Make sure we will not be called again for same lock what is possible
- * if not to zero out lock->l_granted_mode
- */
- lock->l_granted_mode = LCK_MINMODE;
- unlock_res_and_lock(lock);
-}
-EXPORT_SYMBOL(ldlm_lock_cancel);
-
-/**
- * Set opaque data into the lock that only makes sense to upper layer.
- */
-int ldlm_lock_set_data(const struct lustre_handle *lockh, void *data)
-{
- struct ldlm_lock *lock = ldlm_handle2lock(lockh);
- int rc = -EINVAL;
-
- if (lock) {
- if (!lock->l_ast_data)
- lock->l_ast_data = data;
- if (lock->l_ast_data == data)
- rc = 0;
- LDLM_LOCK_PUT(lock);
- }
- return rc;
-}
-EXPORT_SYMBOL(ldlm_lock_set_data);
-
-struct export_cl_data {
- struct obd_export *ecl_exp;
- int ecl_loop;
-};
-
-/**
- * Print lock with lock handle \a lockh description into debug log.
- *
- * Used when printing all locks on a resource for debug purposes.
- */
-void ldlm_lock_dump_handle(int level, const struct lustre_handle *lockh)
-{
- struct ldlm_lock *lock;
-
- if (!((libcfs_debug | D_ERROR) & level))
- return;
-
- lock = ldlm_handle2lock(lockh);
- if (!lock)
- return;
-
- LDLM_DEBUG_LIMIT(level, lock, "###");
-
- LDLM_LOCK_PUT(lock);
-}
-EXPORT_SYMBOL(ldlm_lock_dump_handle);
-
-/**
- * Print lock information with custom message into debug log.
- * Helper function.
- */
-void _ldlm_lock_debug(struct ldlm_lock *lock,
- struct libcfs_debug_msg_data *msgdata,
- const char *fmt, ...)
-{
- va_list args;
- struct obd_export *exp = lock->l_export;
- struct ldlm_resource *resource = lock->l_resource;
- char *nid = "local";
-
- va_start(args, fmt);
-
- if (exp && exp->exp_connection) {
- nid = libcfs_nid2str(exp->exp_connection->c_peer.nid);
- } else if (exp && exp->exp_obd) {
- struct obd_import *imp = exp->exp_obd->u.cli.cl_import;
-
- nid = libcfs_nid2str(imp->imp_connection->c_peer.nid);
- }
-
- if (!resource) {
- libcfs_debug_vmsg2(msgdata, fmt, args,
- " ns: \?\? lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: \?\? rrc=\?\? type: \?\?\? flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lu lvb_type: %d\n",
- lock,
- lock->l_handle.h_cookie,
- atomic_read(&lock->l_refc),
- lock->l_readers, lock->l_writers,
- ldlm_lockname[lock->l_granted_mode],
- ldlm_lockname[lock->l_req_mode],
- lock->l_flags, nid,
- lock->l_remote_handle.cookie,
- exp ? atomic_read(&exp->exp_refcount) : -99,
- lock->l_pid, lock->l_callback_timeout,
- lock->l_lvb_type);
- va_end(args);
- return;
- }
-
- switch (resource->lr_type) {
- case LDLM_EXTENT:
- libcfs_debug_vmsg2(msgdata, fmt, args,
- " ns: %s lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " rrc: %d type: %s [%llu->%llu] (req %llu->%llu) flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lu lvb_type: %d\n",
- ldlm_lock_to_ns_name(lock), lock,
- lock->l_handle.h_cookie,
- atomic_read(&lock->l_refc),
- lock->l_readers, lock->l_writers,
- ldlm_lockname[lock->l_granted_mode],
- ldlm_lockname[lock->l_req_mode],
- PLDLMRES(resource),
- atomic_read(&resource->lr_refcount),
- ldlm_typename[resource->lr_type],
- lock->l_policy_data.l_extent.start,
- lock->l_policy_data.l_extent.end,
- lock->l_req_extent.start,
- lock->l_req_extent.end,
- lock->l_flags, nid,
- lock->l_remote_handle.cookie,
- exp ? atomic_read(&exp->exp_refcount) : -99,
- lock->l_pid, lock->l_callback_timeout,
- lock->l_lvb_type);
- break;
-
- case LDLM_FLOCK:
- libcfs_debug_vmsg2(msgdata, fmt, args,
- " ns: %s lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " rrc: %d type: %s pid: %d [%llu->%llu] flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lu\n",
- ldlm_lock_to_ns_name(lock), lock,
- lock->l_handle.h_cookie,
- atomic_read(&lock->l_refc),
- lock->l_readers, lock->l_writers,
- ldlm_lockname[lock->l_granted_mode],
- ldlm_lockname[lock->l_req_mode],
- PLDLMRES(resource),
- atomic_read(&resource->lr_refcount),
- ldlm_typename[resource->lr_type],
- lock->l_policy_data.l_flock.pid,
- lock->l_policy_data.l_flock.start,
- lock->l_policy_data.l_flock.end,
- lock->l_flags, nid,
- lock->l_remote_handle.cookie,
- exp ? atomic_read(&exp->exp_refcount) : -99,
- lock->l_pid, lock->l_callback_timeout);
- break;
-
- case LDLM_IBITS:
- libcfs_debug_vmsg2(msgdata, fmt, args,
- " ns: %s lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " bits %#llx rrc: %d type: %s flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lu lvb_type: %d\n",
- ldlm_lock_to_ns_name(lock),
- lock, lock->l_handle.h_cookie,
- atomic_read(&lock->l_refc),
- lock->l_readers, lock->l_writers,
- ldlm_lockname[lock->l_granted_mode],
- ldlm_lockname[lock->l_req_mode],
- PLDLMRES(resource),
- lock->l_policy_data.l_inodebits.bits,
- atomic_read(&resource->lr_refcount),
- ldlm_typename[resource->lr_type],
- lock->l_flags, nid,
- lock->l_remote_handle.cookie,
- exp ? atomic_read(&exp->exp_refcount) : -99,
- lock->l_pid, lock->l_callback_timeout,
- lock->l_lvb_type);
- break;
-
- default:
- libcfs_debug_vmsg2(msgdata, fmt, args,
- " ns: %s lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " rrc: %d type: %s flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lu lvb_type: %d\n",
- ldlm_lock_to_ns_name(lock),
- lock, lock->l_handle.h_cookie,
- atomic_read(&lock->l_refc),
- lock->l_readers, lock->l_writers,
- ldlm_lockname[lock->l_granted_mode],
- ldlm_lockname[lock->l_req_mode],
- PLDLMRES(resource),
- atomic_read(&resource->lr_refcount),
- ldlm_typename[resource->lr_type],
- lock->l_flags, nid,
- lock->l_remote_handle.cookie,
- exp ? atomic_read(&exp->exp_refcount) : -99,
- lock->l_pid, lock->l_callback_timeout,
- lock->l_lvb_type);
- break;
- }
- va_end(args);
-}
-EXPORT_SYMBOL(_ldlm_lock_debug);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
deleted file mode 100644
index 5963e90d0938..000000000000
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
+++ /dev/null
@@ -1,1163 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2010, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/ldlm/ldlm_lockd.c
- *
- * Author: Peter Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LDLM
-
-#include <linux/kthread.h>
-#include <linux/sched/mm.h>
-#include <lustre_dlm.h>
-#include <obd_class.h>
-#include <linux/list.h>
-#include "ldlm_internal.h"
-
-static int ldlm_num_threads;
-module_param(ldlm_num_threads, int, 0444);
-MODULE_PARM_DESC(ldlm_num_threads, "number of DLM service threads to start");
-
-static char *ldlm_cpts;
-module_param(ldlm_cpts, charp, 0444);
-MODULE_PARM_DESC(ldlm_cpts, "CPU partitions ldlm threads should run on");
-
-static struct mutex ldlm_ref_mutex;
-static int ldlm_refcount;
-
-static struct kobject *ldlm_kobj;
-struct kset *ldlm_ns_kset;
-static struct kset *ldlm_svc_kset;
-
-struct ldlm_cb_async_args {
- struct ldlm_cb_set_arg *ca_set_arg;
- struct ldlm_lock *ca_lock;
-};
-
-/* LDLM state */
-
-static struct ldlm_state *ldlm_state;
-
-#define ELT_STOPPED 0
-#define ELT_READY 1
-#define ELT_TERMINATE 2
-
-struct ldlm_bl_pool {
- spinlock_t blp_lock;
-
- /*
- * blp_prio_list is used for callbacks that should be handled
- * as a priority. It is used for LDLM_FL_DISCARD_DATA requests.
- * see bug 13843
- */
- struct list_head blp_prio_list;
-
- /*
- * blp_list is used for all other callbacks which are likely
- * to take longer to process.
- */
- struct list_head blp_list;
-
- wait_queue_head_t blp_waitq;
- struct completion blp_comp;
- atomic_t blp_num_threads;
- atomic_t blp_busy_threads;
- int blp_min_threads;
- int blp_max_threads;
-};
-
-struct ldlm_bl_work_item {
- struct list_head blwi_entry;
- struct ldlm_namespace *blwi_ns;
- struct ldlm_lock_desc blwi_ld;
- struct ldlm_lock *blwi_lock;
- struct list_head blwi_head;
- int blwi_count;
- struct completion blwi_comp;
- enum ldlm_cancel_flags blwi_flags;
- int blwi_mem_pressure;
-};
-
-/**
- * Callback handler for receiving incoming blocking ASTs.
- *
- * This can only happen on client side.
- */
-void ldlm_handle_bl_callback(struct ldlm_namespace *ns,
- struct ldlm_lock_desc *ld, struct ldlm_lock *lock)
-{
- int do_ast;
-
- LDLM_DEBUG(lock, "client blocking AST callback handler");
-
- lock_res_and_lock(lock);
- ldlm_set_cbpending(lock);
-
- if (ldlm_is_cancel_on_block(lock))
- ldlm_set_cancel(lock);
-
- do_ast = !lock->l_readers && !lock->l_writers;
- unlock_res_and_lock(lock);
-
- if (do_ast) {
- CDEBUG(D_DLMTRACE,
- "Lock %p already unused, calling callback (%p)\n", lock,
- lock->l_blocking_ast);
- if (lock->l_blocking_ast)
- lock->l_blocking_ast(lock, ld, lock->l_ast_data,
- LDLM_CB_BLOCKING);
- } else {
- CDEBUG(D_DLMTRACE,
- "Lock %p is referenced, will be cancelled later\n",
- lock);
- }
-
- LDLM_DEBUG(lock, "client blocking callback handler END");
- LDLM_LOCK_RELEASE(lock);
-}
-
-/**
- * Callback handler for receiving incoming completion ASTs.
- *
- * This only can happen on client side.
- */
-static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
- struct ldlm_namespace *ns,
- struct ldlm_request *dlm_req,
- struct ldlm_lock *lock)
-{
- int lvb_len;
- LIST_HEAD(ast_list);
- int rc = 0;
-
- LDLM_DEBUG(lock, "client completion callback handler START");
-
- if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE)) {
- int to = HZ;
-
- while (to > 0) {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(to);
- if (lock->l_granted_mode == lock->l_req_mode ||
- ldlm_is_destroyed(lock))
- break;
- }
- }
-
- lvb_len = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB, RCL_CLIENT);
- if (lvb_len < 0) {
- LDLM_ERROR(lock, "Fail to get lvb_len, rc = %d", lvb_len);
- rc = lvb_len;
- goto out;
- } else if (lvb_len > 0) {
- if (lock->l_lvb_len > 0) {
- /* for extent lock, lvb contains ost_lvb{}. */
- LASSERT(lock->l_lvb_data);
-
- if (unlikely(lock->l_lvb_len < lvb_len)) {
- LDLM_ERROR(lock,
- "Replied LVB is larger than expectation, expected = %d, replied = %d",
- lock->l_lvb_len, lvb_len);
- rc = -EINVAL;
- goto out;
- }
- } else if (ldlm_has_layout(lock)) { /* for layout lock, lvb has
- * variable length
- */
- void *lvb_data;
-
- lvb_data = kzalloc(lvb_len, GFP_NOFS);
- if (!lvb_data) {
- LDLM_ERROR(lock, "No memory: %d.\n", lvb_len);
- rc = -ENOMEM;
- goto out;
- }
-
- lock_res_and_lock(lock);
- LASSERT(!lock->l_lvb_data);
- lock->l_lvb_type = LVB_T_LAYOUT;
- lock->l_lvb_data = lvb_data;
- lock->l_lvb_len = lvb_len;
- unlock_res_and_lock(lock);
- }
- }
-
- lock_res_and_lock(lock);
- if (ldlm_is_destroyed(lock) ||
- lock->l_granted_mode == lock->l_req_mode) {
- /* bug 11300: the lock has already been granted */
- unlock_res_and_lock(lock);
- LDLM_DEBUG(lock, "Double grant race happened");
- rc = 0;
- goto out;
- }
-
- /* If we receive the completion AST before the actual enqueue returned,
- * then we might need to switch lock modes, resources, or extents.
- */
- if (dlm_req->lock_desc.l_granted_mode != lock->l_req_mode) {
- lock->l_req_mode = dlm_req->lock_desc.l_granted_mode;
- LDLM_DEBUG(lock, "completion AST, new lock mode");
- }
-
- if (lock->l_resource->lr_type != LDLM_PLAIN) {
- ldlm_convert_policy_to_local(req->rq_export,
- dlm_req->lock_desc.l_resource.lr_type,
- &dlm_req->lock_desc.l_policy_data,
- &lock->l_policy_data);
- LDLM_DEBUG(lock, "completion AST, new policy data");
- }
-
- ldlm_resource_unlink_lock(lock);
- if (memcmp(&dlm_req->lock_desc.l_resource.lr_name,
- &lock->l_resource->lr_name,
- sizeof(lock->l_resource->lr_name)) != 0) {
- unlock_res_and_lock(lock);
- rc = ldlm_lock_change_resource(ns, lock,
- &dlm_req->lock_desc.l_resource.lr_name);
- if (rc < 0) {
- LDLM_ERROR(lock, "Failed to allocate resource");
- goto out;
- }
- LDLM_DEBUG(lock, "completion AST, new resource");
- CERROR("change resource!\n");
- lock_res_and_lock(lock);
- }
-
- if (dlm_req->lock_flags & LDLM_FL_AST_SENT) {
- /* BL_AST locks are not needed in LRU.
- * Let ldlm_cancel_lru() be fast.
- */
- ldlm_lock_remove_from_lru(lock);
- lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST;
- LDLM_DEBUG(lock, "completion AST includes blocking AST");
- }
-
- if (lock->l_lvb_len > 0) {
- rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_CLIENT,
- lock->l_lvb_data, lvb_len);
- if (rc < 0) {
- unlock_res_and_lock(lock);
- goto out;
- }
- }
-
- ldlm_grant_lock(lock, &ast_list);
- unlock_res_and_lock(lock);
-
- LDLM_DEBUG(lock, "callback handler finished, about to run_ast_work");
-
- /* Let Enqueue to call osc_lock_upcall() and initialize l_ast_data */
- OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 2);
-
- ldlm_run_ast_work(ns, &ast_list, LDLM_WORK_CP_AST);
-
- LDLM_DEBUG_NOLOCK("client completion callback handler END (lock %p)",
- lock);
- goto out;
-
-out:
- if (rc < 0) {
- lock_res_and_lock(lock);
- ldlm_set_failed(lock);
- unlock_res_and_lock(lock);
- wake_up(&lock->l_waitq);
- }
- LDLM_LOCK_RELEASE(lock);
-}
-
-/**
- * Callback handler for receiving incoming glimpse ASTs.
- *
- * This only can happen on client side. After handling the glimpse AST
- * we also consider dropping the lock here if it is unused locally for a
- * long time.
- */
-static void ldlm_handle_gl_callback(struct ptlrpc_request *req,
- struct ldlm_namespace *ns,
- struct ldlm_request *dlm_req,
- struct ldlm_lock *lock)
-{
- int rc = -ENOSYS;
-
- LDLM_DEBUG(lock, "client glimpse AST callback handler");
-
- if (lock->l_glimpse_ast)
- rc = lock->l_glimpse_ast(lock, req);
-
- if (req->rq_repmsg) {
- ptlrpc_reply(req);
- } else {
- req->rq_status = rc;
- ptlrpc_error(req);
- }
-
- lock_res_and_lock(lock);
- if (lock->l_granted_mode == LCK_PW &&
- !lock->l_readers && !lock->l_writers &&
- time_after(jiffies,
- lock->l_last_used + 10 * HZ)) {
- unlock_res_and_lock(lock);
- if (ldlm_bl_to_thread_lock(ns, NULL, lock))
- ldlm_handle_bl_callback(ns, NULL, lock);
-
- return;
- }
- unlock_res_and_lock(lock);
- LDLM_LOCK_RELEASE(lock);
-}
-
-static int ldlm_callback_reply(struct ptlrpc_request *req, int rc)
-{
- if (req->rq_no_reply)
- return 0;
-
- req->rq_status = rc;
- if (!req->rq_packed_final) {
- rc = lustre_pack_reply(req, 1, NULL, NULL);
- if (rc)
- return rc;
- }
- return ptlrpc_reply(req);
-}
-
-static int __ldlm_bl_to_thread(struct ldlm_bl_work_item *blwi,
- enum ldlm_cancel_flags cancel_flags)
-{
- struct ldlm_bl_pool *blp = ldlm_state->ldlm_bl_pool;
-
- spin_lock(&blp->blp_lock);
- if (blwi->blwi_lock && ldlm_is_discard_data(blwi->blwi_lock)) {
- /* add LDLM_FL_DISCARD_DATA requests to the priority list */
- list_add_tail(&blwi->blwi_entry, &blp->blp_prio_list);
- } else {
- /* other blocking callbacks are added to the regular list */
- list_add_tail(&blwi->blwi_entry, &blp->blp_list);
- }
- spin_unlock(&blp->blp_lock);
-
- wake_up(&blp->blp_waitq);
-
- /* can not check blwi->blwi_flags as blwi could be already freed in
- * LCF_ASYNC mode
- */
- if (!(cancel_flags & LCF_ASYNC))
- wait_for_completion(&blwi->blwi_comp);
-
- return 0;
-}
-
-static inline void init_blwi(struct ldlm_bl_work_item *blwi,
- struct ldlm_namespace *ns,
- struct ldlm_lock_desc *ld,
- struct list_head *cancels, int count,
- struct ldlm_lock *lock,
- enum ldlm_cancel_flags cancel_flags)
-{
- init_completion(&blwi->blwi_comp);
- INIT_LIST_HEAD(&blwi->blwi_head);
-
- if (current->flags & PF_MEMALLOC)
- blwi->blwi_mem_pressure = 1;
-
- blwi->blwi_ns = ns;
- blwi->blwi_flags = cancel_flags;
- if (ld)
- blwi->blwi_ld = *ld;
- if (count) {
- list_add(&blwi->blwi_head, cancels);
- list_del_init(cancels);
- blwi->blwi_count = count;
- } else {
- blwi->blwi_lock = lock;
- }
-}
-
-/**
- * Queues a list of locks \a cancels containing \a count locks
- * for later processing by a blocking thread. If \a count is zero,
- * then the lock referenced as \a lock is queued instead.
- *
- * The blocking thread would then call ->l_blocking_ast callback in the lock.
- * If list addition fails an error is returned and caller is supposed to
- * call ->l_blocking_ast itself.
- */
-static int ldlm_bl_to_thread(struct ldlm_namespace *ns,
- struct ldlm_lock_desc *ld,
- struct ldlm_lock *lock,
- struct list_head *cancels, int count,
- enum ldlm_cancel_flags cancel_flags)
-{
- if (cancels && count == 0)
- return 0;
-
- if (cancel_flags & LCF_ASYNC) {
- struct ldlm_bl_work_item *blwi;
-
- blwi = kzalloc(sizeof(*blwi), GFP_NOFS);
- if (!blwi)
- return -ENOMEM;
- init_blwi(blwi, ns, ld, cancels, count, lock, cancel_flags);
-
- return __ldlm_bl_to_thread(blwi, cancel_flags);
- } else {
- /* if it is synchronous call do minimum mem alloc, as it could
- * be triggered from kernel shrinker
- */
- struct ldlm_bl_work_item blwi;
-
- memset(&blwi, 0, sizeof(blwi));
- init_blwi(&blwi, ns, ld, cancels, count, lock, cancel_flags);
- return __ldlm_bl_to_thread(&blwi, cancel_flags);
- }
-}
-
-int ldlm_bl_to_thread_lock(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld,
- struct ldlm_lock *lock)
-{
- return ldlm_bl_to_thread(ns, ld, lock, NULL, 0, LCF_ASYNC);
-}
-
-int ldlm_bl_to_thread_list(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld,
- struct list_head *cancels, int count,
- enum ldlm_cancel_flags cancel_flags)
-{
- return ldlm_bl_to_thread(ns, ld, NULL, cancels, count, cancel_flags);
-}
-
-int ldlm_bl_thread_wakeup(void)
-{
- wake_up(&ldlm_state->ldlm_bl_pool->blp_waitq);
- return 0;
-}
-
-/* Setinfo coming from Server (eg MDT) to Client (eg MDC)! */
-static int ldlm_handle_setinfo(struct ptlrpc_request *req)
-{
- struct obd_device *obd = req->rq_export->exp_obd;
- char *key;
- void *val;
- int keylen, vallen;
- int rc = -ENOSYS;
-
- DEBUG_REQ(D_HSM, req, "%s: handle setinfo\n", obd->obd_name);
-
- req_capsule_set(&req->rq_pill, &RQF_OBD_SET_INFO);
-
- key = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY);
- if (!key) {
- DEBUG_REQ(D_IOCTL, req, "no set_info key");
- return -EFAULT;
- }
- keylen = req_capsule_get_size(&req->rq_pill, &RMF_SETINFO_KEY,
- RCL_CLIENT);
- val = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_VAL);
- if (!val) {
- DEBUG_REQ(D_IOCTL, req, "no set_info val");
- return -EFAULT;
- }
- vallen = req_capsule_get_size(&req->rq_pill, &RMF_SETINFO_VAL,
- RCL_CLIENT);
-
- /* We are responsible for swabbing contents of val */
-
- if (KEY_IS(KEY_HSM_COPYTOOL_SEND))
- /* Pass it on to mdc (the "export" in this case) */
- rc = obd_set_info_async(req->rq_svc_thread->t_env,
- req->rq_export,
- sizeof(KEY_HSM_COPYTOOL_SEND),
- KEY_HSM_COPYTOOL_SEND,
- vallen, val, NULL);
- else
- DEBUG_REQ(D_WARNING, req, "ignoring unknown key %s", key);
-
- return rc;
-}
-
-static inline void ldlm_callback_errmsg(struct ptlrpc_request *req,
- const char *msg, int rc,
- const struct lustre_handle *handle)
-{
- DEBUG_REQ((req->rq_no_reply || rc) ? D_WARNING : D_DLMTRACE, req,
- "%s: [nid %s] [rc %d] [lock %#llx]",
- msg, libcfs_id2str(req->rq_peer), rc,
- handle ? handle->cookie : 0);
- if (req->rq_no_reply)
- CWARN("No reply was sent, maybe cause bug 21636.\n");
- else if (rc)
- CWARN("Send reply failed, maybe cause bug 21636.\n");
-}
-
-/* TODO: handle requests in a similar way as MDT: see mdt_handle_common() */
-static int ldlm_callback_handler(struct ptlrpc_request *req)
-{
- struct ldlm_namespace *ns;
- struct ldlm_request *dlm_req;
- struct ldlm_lock *lock;
- int rc;
-
- /* Requests arrive in sender's byte order. The ptlrpc service
- * handler has already checked and, if necessary, byte-swapped the
- * incoming request message body, but I am responsible for the
- * message buffers.
- */
-
- /* do nothing for sec context finalize */
- if (lustre_msg_get_opc(req->rq_reqmsg) == SEC_CTX_FINI)
- return 0;
-
- req_capsule_init(&req->rq_pill, req, RCL_SERVER);
-
- if (!req->rq_export) {
- rc = ldlm_callback_reply(req, -ENOTCONN);
- ldlm_callback_errmsg(req, "Operate on unconnected server",
- rc, NULL);
- return 0;
- }
-
- LASSERT(req->rq_export->exp_obd);
-
- switch (lustre_msg_get_opc(req->rq_reqmsg)) {
- case LDLM_BL_CALLBACK:
- if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET)) {
- if (cfs_fail_err)
- ldlm_callback_reply(req, -(int)cfs_fail_err);
- return 0;
- }
- break;
- case LDLM_CP_CALLBACK:
- if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CP_CALLBACK_NET))
- return 0;
- break;
- case LDLM_GL_CALLBACK:
- if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_GL_CALLBACK_NET))
- return 0;
- break;
- case LDLM_SET_INFO:
- rc = ldlm_handle_setinfo(req);
- ldlm_callback_reply(req, rc);
- return 0;
- default:
- CERROR("unknown opcode %u\n",
- lustre_msg_get_opc(req->rq_reqmsg));
- ldlm_callback_reply(req, -EPROTO);
- return 0;
- }
-
- ns = req->rq_export->exp_obd->obd_namespace;
- LASSERT(ns);
-
- req_capsule_set(&req->rq_pill, &RQF_LDLM_CALLBACK);
-
- dlm_req = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
- if (!dlm_req) {
- rc = ldlm_callback_reply(req, -EPROTO);
- ldlm_callback_errmsg(req, "Operate without parameter", rc,
- NULL);
- return 0;
- }
-
- /* Force a known safe race, send a cancel to the server for a lock
- * which the server has already started a blocking callback on.
- */
- if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE) &&
- lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) {
- rc = ldlm_cli_cancel(&dlm_req->lock_handle[0], 0);
- if (rc < 0)
- CERROR("ldlm_cli_cancel: %d\n", rc);
- }
-
- lock = ldlm_handle2lock_long(&dlm_req->lock_handle[0], 0);
- if (!lock) {
- CDEBUG(D_DLMTRACE,
- "callback on lock %#llx - lock disappeared\n",
- dlm_req->lock_handle[0].cookie);
- rc = ldlm_callback_reply(req, -EINVAL);
- ldlm_callback_errmsg(req, "Operate with invalid parameter", rc,
- &dlm_req->lock_handle[0]);
- return 0;
- }
-
- if (ldlm_is_fail_loc(lock) &&
- lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK)
- OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
-
- /* Copy hints/flags (e.g. LDLM_FL_DISCARD_DATA) from AST. */
- lock_res_and_lock(lock);
- lock->l_flags |= ldlm_flags_from_wire(dlm_req->lock_flags &
- LDLM_FL_AST_MASK);
- if (lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) {
- /* If somebody cancels lock and cache is already dropped,
- * or lock is failed before cp_ast received on client,
- * we can tell the server we have no lock. Otherwise, we
- * should send cancel after dropping the cache.
- */
- if ((ldlm_is_canceling(lock) && ldlm_is_bl_done(lock)) ||
- ldlm_is_failed(lock)) {
- LDLM_DEBUG(lock,
- "callback on lock %#llx - lock disappeared",
- dlm_req->lock_handle[0].cookie);
- unlock_res_and_lock(lock);
- LDLM_LOCK_RELEASE(lock);
- rc = ldlm_callback_reply(req, -EINVAL);
- ldlm_callback_errmsg(req, "Operate on stale lock", rc,
- &dlm_req->lock_handle[0]);
- return 0;
- }
- /* BL_AST locks are not needed in LRU.
- * Let ldlm_cancel_lru() be fast.
- */
- ldlm_lock_remove_from_lru(lock);
- ldlm_set_bl_ast(lock);
- }
- unlock_res_and_lock(lock);
-
- /* We want the ost thread to get this reply so that it can respond
- * to ost requests (write cache writeback) that might be triggered
- * in the callback.
- *
- * But we'd also like to be able to indicate in the reply that we're
- * cancelling right now, because it's unused, or have an intent result
- * in the reply, so we might have to push the responsibility for sending
- * the reply down into the AST handlers, alas.
- */
-
- switch (lustre_msg_get_opc(req->rq_reqmsg)) {
- case LDLM_BL_CALLBACK:
- CDEBUG(D_INODE, "blocking ast\n");
- req_capsule_extend(&req->rq_pill, &RQF_LDLM_BL_CALLBACK);
- if (!ldlm_is_cancel_on_block(lock)) {
- rc = ldlm_callback_reply(req, 0);
- if (req->rq_no_reply || rc)
- ldlm_callback_errmsg(req, "Normal process", rc,
- &dlm_req->lock_handle[0]);
- }
- if (ldlm_bl_to_thread_lock(ns, &dlm_req->lock_desc, lock))
- ldlm_handle_bl_callback(ns, &dlm_req->lock_desc, lock);
- break;
- case LDLM_CP_CALLBACK:
- CDEBUG(D_INODE, "completion ast\n");
- req_capsule_extend(&req->rq_pill, &RQF_LDLM_CP_CALLBACK);
- ldlm_callback_reply(req, 0);
- ldlm_handle_cp_callback(req, ns, dlm_req, lock);
- break;
- case LDLM_GL_CALLBACK:
- CDEBUG(D_INODE, "glimpse ast\n");
- req_capsule_extend(&req->rq_pill, &RQF_LDLM_GL_CALLBACK);
- ldlm_handle_gl_callback(req, ns, dlm_req, lock);
- break;
- default:
- LBUG(); /* checked above */
- }
-
- return 0;
-}
-
-static int ldlm_bl_get_work(struct ldlm_bl_pool *blp,
- struct ldlm_bl_work_item **p_blwi,
- struct obd_export **p_exp)
-{
- int num_th = atomic_read(&blp->blp_num_threads);
- struct ldlm_bl_work_item *blwi = NULL;
- static unsigned int num_bl;
-
- spin_lock(&blp->blp_lock);
- /* process a request from the blp_list at least every blp_num_threads */
- if (!list_empty(&blp->blp_list) &&
- (list_empty(&blp->blp_prio_list) || num_bl == 0))
- blwi = list_first_entry(&blp->blp_list,
- struct ldlm_bl_work_item, blwi_entry);
- else
- if (!list_empty(&blp->blp_prio_list))
- blwi = list_first_entry(&blp->blp_prio_list,
- struct ldlm_bl_work_item,
- blwi_entry);
-
- if (blwi) {
- if (++num_bl >= num_th)
- num_bl = 0;
- list_del(&blwi->blwi_entry);
- }
- spin_unlock(&blp->blp_lock);
- *p_blwi = blwi;
-
- return (*p_blwi || *p_exp) ? 1 : 0;
-}
-
-/* This only contains temporary data until the thread starts */
-struct ldlm_bl_thread_data {
- struct ldlm_bl_pool *bltd_blp;
- struct completion bltd_comp;
- int bltd_num;
-};
-
-static int ldlm_bl_thread_main(void *arg);
-
-static int ldlm_bl_thread_start(struct ldlm_bl_pool *blp, bool check_busy)
-{
- struct ldlm_bl_thread_data bltd = { .bltd_blp = blp };
- struct task_struct *task;
-
- init_completion(&bltd.bltd_comp);
-
- bltd.bltd_num = atomic_inc_return(&blp->blp_num_threads);
- if (bltd.bltd_num >= blp->blp_max_threads) {
- atomic_dec(&blp->blp_num_threads);
- return 0;
- }
-
- LASSERTF(bltd.bltd_num > 0, "thread num:%d\n", bltd.bltd_num);
- if (check_busy &&
- atomic_read(&blp->blp_busy_threads) < (bltd.bltd_num - 1)) {
- atomic_dec(&blp->blp_num_threads);
- return 0;
- }
-
- task = kthread_run(ldlm_bl_thread_main, &bltd, "ldlm_bl_%02d",
- bltd.bltd_num);
- if (IS_ERR(task)) {
- CERROR("cannot start LDLM thread ldlm_bl_%02d: rc %ld\n",
- bltd.bltd_num, PTR_ERR(task));
- atomic_dec(&blp->blp_num_threads);
- return PTR_ERR(task);
- }
- wait_for_completion(&bltd.bltd_comp);
-
- return 0;
-}
-
-/* Not fatal if racy and have a few too many threads */
-static int ldlm_bl_thread_need_create(struct ldlm_bl_pool *blp,
- struct ldlm_bl_work_item *blwi)
-{
- if (atomic_read(&blp->blp_num_threads) >= blp->blp_max_threads)
- return 0;
-
- if (atomic_read(&blp->blp_busy_threads) <
- atomic_read(&blp->blp_num_threads))
- return 0;
-
- if (blwi && (!blwi->blwi_ns || blwi->blwi_mem_pressure))
- return 0;
-
- return 1;
-}
-
-static int ldlm_bl_thread_blwi(struct ldlm_bl_pool *blp,
- struct ldlm_bl_work_item *blwi)
-{
- unsigned int flags = 0;
-
- if (!blwi->blwi_ns)
- /* added by ldlm_cleanup() */
- return LDLM_ITER_STOP;
-
- if (blwi->blwi_mem_pressure)
- flags = memalloc_noreclaim_save();
-
- OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_PAUSE_CANCEL2, 4);
-
- if (blwi->blwi_count) {
- int count;
-
- /*
- * The special case when we cancel locks in lru
- * asynchronously, we pass the list of locks here.
- * Thus locks are marked LDLM_FL_CANCELING, but NOT
- * canceled locally yet.
- */
- count = ldlm_cli_cancel_list_local(&blwi->blwi_head,
- blwi->blwi_count,
- LCF_BL_AST);
- ldlm_cli_cancel_list(&blwi->blwi_head, count, NULL,
- blwi->blwi_flags);
- } else {
- ldlm_handle_bl_callback(blwi->blwi_ns, &blwi->blwi_ld,
- blwi->blwi_lock);
- }
- if (blwi->blwi_mem_pressure)
- memalloc_noreclaim_restore(flags);
-
- if (blwi->blwi_flags & LCF_ASYNC)
- kfree(blwi);
- else
- complete(&blwi->blwi_comp);
-
- return 0;
-}
-
-/**
- * Main blocking requests processing thread.
- *
- * Callers put locks into its queue by calling ldlm_bl_to_thread.
- * This thread in the end ends up doing actual call to ->l_blocking_ast
- * for queued locks.
- */
-static int ldlm_bl_thread_main(void *arg)
-{
- struct ldlm_bl_pool *blp;
- struct ldlm_bl_thread_data *bltd = arg;
-
- blp = bltd->bltd_blp;
-
- complete(&bltd->bltd_comp);
- /* cannot use bltd after this, it is only on caller's stack */
-
- while (1) {
- struct ldlm_bl_work_item *blwi = NULL;
- struct obd_export *exp = NULL;
- int rc;
-
- rc = ldlm_bl_get_work(blp, &blwi, &exp);
- if (!rc)
- wait_event_idle_exclusive(blp->blp_waitq,
- ldlm_bl_get_work(blp, &blwi,
- &exp));
- atomic_inc(&blp->blp_busy_threads);
-
- if (ldlm_bl_thread_need_create(blp, blwi))
- /* discard the return value, we tried */
- ldlm_bl_thread_start(blp, true);
-
- if (blwi)
- rc = ldlm_bl_thread_blwi(blp, blwi);
-
- atomic_dec(&blp->blp_busy_threads);
-
- if (rc == LDLM_ITER_STOP)
- break;
- }
-
- atomic_dec(&blp->blp_num_threads);
- complete(&blp->blp_comp);
- return 0;
-}
-
-static int ldlm_setup(void);
-static int ldlm_cleanup(void);
-
-int ldlm_get_ref(void)
-{
- int rc = 0;
-
- rc = ptlrpc_inc_ref();
- if (rc)
- return rc;
-
- mutex_lock(&ldlm_ref_mutex);
- if (++ldlm_refcount == 1) {
- rc = ldlm_setup();
- if (rc)
- ldlm_refcount--;
- }
- mutex_unlock(&ldlm_ref_mutex);
-
- if (rc)
- ptlrpc_dec_ref();
-
- return rc;
-}
-
-void ldlm_put_ref(void)
-{
- int rc = 0;
- mutex_lock(&ldlm_ref_mutex);
- if (ldlm_refcount == 1) {
- rc = ldlm_cleanup();
-
- if (rc)
- CERROR("ldlm_cleanup failed: %d\n", rc);
- else
- ldlm_refcount--;
- } else {
- ldlm_refcount--;
- }
- mutex_unlock(&ldlm_ref_mutex);
- if (!rc)
- ptlrpc_dec_ref();
-}
-
-static ssize_t cancel_unused_locks_before_replay_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- return sprintf(buf, "%d\n", ldlm_cancel_unused_locks_before_replay);
-}
-
-static ssize_t cancel_unused_locks_before_replay_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- int rc;
- unsigned long val;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- ldlm_cancel_unused_locks_before_replay = val;
-
- return count;
-}
-LUSTRE_RW_ATTR(cancel_unused_locks_before_replay);
-
-/* These are for root of /sys/fs/lustre/ldlm */
-static struct attribute *ldlm_attrs[] = {
- &lustre_attr_cancel_unused_locks_before_replay.attr,
- NULL,
-};
-
-static const struct attribute_group ldlm_attr_group = {
- .attrs = ldlm_attrs,
-};
-
-static int ldlm_setup(void)
-{
- static struct ptlrpc_service_conf conf;
- struct ldlm_bl_pool *blp = NULL;
- int rc = 0;
- int i;
-
- if (ldlm_state)
- return -EALREADY;
-
- ldlm_state = kzalloc(sizeof(*ldlm_state), GFP_NOFS);
- if (!ldlm_state)
- return -ENOMEM;
-
- ldlm_kobj = kobject_create_and_add("ldlm", lustre_kobj);
- if (!ldlm_kobj) {
- rc = -ENOMEM;
- goto out;
- }
-
- rc = sysfs_create_group(ldlm_kobj, &ldlm_attr_group);
- if (rc)
- goto out;
-
- ldlm_ns_kset = kset_create_and_add("namespaces", NULL, ldlm_kobj);
- if (!ldlm_ns_kset) {
- rc = -ENOMEM;
- goto out;
- }
-
- ldlm_svc_kset = kset_create_and_add("services", NULL, ldlm_kobj);
- if (!ldlm_svc_kset) {
- rc = -ENOMEM;
- goto out;
- }
-
- ldlm_debugfs_setup();
-
- memset(&conf, 0, sizeof(conf));
- conf = (typeof(conf)) {
- .psc_name = "ldlm_cbd",
- .psc_watchdog_factor = 2,
- .psc_buf = {
- .bc_nbufs = LDLM_CLIENT_NBUFS,
- .bc_buf_size = LDLM_BUFSIZE,
- .bc_req_max_size = LDLM_MAXREQSIZE,
- .bc_rep_max_size = LDLM_MAXREPSIZE,
- .bc_req_portal = LDLM_CB_REQUEST_PORTAL,
- .bc_rep_portal = LDLM_CB_REPLY_PORTAL,
- },
- .psc_thr = {
- .tc_thr_name = "ldlm_cb",
- .tc_thr_factor = LDLM_THR_FACTOR,
- .tc_nthrs_init = LDLM_NTHRS_INIT,
- .tc_nthrs_base = LDLM_NTHRS_BASE,
- .tc_nthrs_max = LDLM_NTHRS_MAX,
- .tc_nthrs_user = ldlm_num_threads,
- .tc_cpu_affinity = 1,
- .tc_ctx_tags = LCT_MD_THREAD | LCT_DT_THREAD,
- },
- .psc_cpt = {
- .cc_pattern = ldlm_cpts,
- },
- .psc_ops = {
- .so_req_handler = ldlm_callback_handler,
- },
- };
- ldlm_state->ldlm_cb_service =
- ptlrpc_register_service(&conf, ldlm_svc_kset,
- ldlm_svc_debugfs_dir);
- if (IS_ERR(ldlm_state->ldlm_cb_service)) {
- CERROR("failed to start service\n");
- rc = PTR_ERR(ldlm_state->ldlm_cb_service);
- ldlm_state->ldlm_cb_service = NULL;
- goto out;
- }
-
- blp = kzalloc(sizeof(*blp), GFP_NOFS);
- if (!blp) {
- rc = -ENOMEM;
- goto out;
- }
- ldlm_state->ldlm_bl_pool = blp;
-
- spin_lock_init(&blp->blp_lock);
- INIT_LIST_HEAD(&blp->blp_list);
- INIT_LIST_HEAD(&blp->blp_prio_list);
- init_waitqueue_head(&blp->blp_waitq);
- atomic_set(&blp->blp_num_threads, 0);
- atomic_set(&blp->blp_busy_threads, 0);
-
- if (ldlm_num_threads == 0) {
- blp->blp_min_threads = LDLM_NTHRS_INIT;
- blp->blp_max_threads = LDLM_NTHRS_MAX;
- } else {
- blp->blp_min_threads = min_t(int, LDLM_NTHRS_MAX,
- max_t(int, LDLM_NTHRS_INIT,
- ldlm_num_threads));
-
- blp->blp_max_threads = blp->blp_min_threads;
- }
-
- for (i = 0; i < blp->blp_min_threads; i++) {
- rc = ldlm_bl_thread_start(blp, false);
- if (rc < 0)
- goto out;
- }
-
- rc = ldlm_pools_init();
- if (rc) {
- CERROR("Failed to initialize LDLM pools: %d\n", rc);
- goto out;
- }
- return 0;
-
- out:
- ldlm_cleanup();
- return rc;
-}
-
-static int ldlm_cleanup(void)
-{
- if (!list_empty(ldlm_namespace_list(LDLM_NAMESPACE_SERVER)) ||
- !list_empty(ldlm_namespace_list(LDLM_NAMESPACE_CLIENT))) {
- CERROR("ldlm still has namespaces; clean these up first.\n");
- ldlm_dump_all_namespaces(LDLM_NAMESPACE_SERVER, D_DLMTRACE);
- ldlm_dump_all_namespaces(LDLM_NAMESPACE_CLIENT, D_DLMTRACE);
- return -EBUSY;
- }
-
- ldlm_pools_fini();
-
- if (ldlm_state->ldlm_bl_pool) {
- struct ldlm_bl_pool *blp = ldlm_state->ldlm_bl_pool;
-
- while (atomic_read(&blp->blp_num_threads) > 0) {
- struct ldlm_bl_work_item blwi = { .blwi_ns = NULL };
-
- init_completion(&blp->blp_comp);
-
- spin_lock(&blp->blp_lock);
- list_add_tail(&blwi.blwi_entry, &blp->blp_list);
- wake_up(&blp->blp_waitq);
- spin_unlock(&blp->blp_lock);
-
- wait_for_completion(&blp->blp_comp);
- }
-
- kfree(blp);
- }
-
- if (ldlm_state->ldlm_cb_service)
- ptlrpc_unregister_service(ldlm_state->ldlm_cb_service);
-
- if (ldlm_ns_kset)
- kset_unregister(ldlm_ns_kset);
- if (ldlm_svc_kset)
- kset_unregister(ldlm_svc_kset);
- if (ldlm_kobj) {
- sysfs_remove_group(ldlm_kobj, &ldlm_attr_group);
- kobject_put(ldlm_kobj);
- }
-
- ldlm_debugfs_cleanup();
-
- kfree(ldlm_state);
- ldlm_state = NULL;
-
- return 0;
-}
-
-int ldlm_init(void)
-{
- mutex_init(&ldlm_ref_mutex);
- mutex_init(ldlm_namespace_lock(LDLM_NAMESPACE_SERVER));
- mutex_init(ldlm_namespace_lock(LDLM_NAMESPACE_CLIENT));
- ldlm_resource_slab = kmem_cache_create("ldlm_resources",
- sizeof(struct ldlm_resource), 0,
- SLAB_HWCACHE_ALIGN, NULL);
- if (!ldlm_resource_slab)
- return -ENOMEM;
-
- ldlm_lock_slab = kmem_cache_create("ldlm_locks",
- sizeof(struct ldlm_lock), 0,
- SLAB_HWCACHE_ALIGN |
- SLAB_TYPESAFE_BY_RCU, NULL);
- if (!ldlm_lock_slab) {
- kmem_cache_destroy(ldlm_resource_slab);
- return -ENOMEM;
- }
-
- ldlm_interval_slab = kmem_cache_create("interval_node",
- sizeof(struct ldlm_interval),
- 0, SLAB_HWCACHE_ALIGN, NULL);
- if (!ldlm_interval_slab) {
- kmem_cache_destroy(ldlm_resource_slab);
- kmem_cache_destroy(ldlm_lock_slab);
- return -ENOMEM;
- }
-#if LUSTRE_TRACKS_LOCK_EXP_REFS
- class_export_dump_hook = ldlm_dump_export_locks;
-#endif
- return 0;
-}
-
-void ldlm_exit(void)
-{
- if (ldlm_refcount)
- CERROR("ldlm_refcount is %d in %s!\n", ldlm_refcount, __func__);
- kmem_cache_destroy(ldlm_resource_slab);
- /* ldlm_lock_put() use RCU to call ldlm_lock_free, so need call
- * synchronize_rcu() to wait a grace period elapsed, so that
- * ldlm_lock_free() get a chance to be called.
- */
- synchronize_rcu();
- kmem_cache_destroy(ldlm_lock_slab);
- kmem_cache_destroy(ldlm_interval_slab);
-}
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c b/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c
deleted file mode 100644
index 33b5a3f96fcb..000000000000
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c
+++ /dev/null
@@ -1,68 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/ldlm/ldlm_plain.c
- *
- * Author: Peter Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- */
-
-/**
- * This file contains implementation of PLAIN lock type.
- *
- * PLAIN locks are the simplest form of LDLM locking, and are used when
- * there only needs to be a single lock on a resource. This avoids some
- * of the complexity of EXTENT and IBITS lock types, but doesn't allow
- * different "parts" of a resource to be locked concurrently. Example
- * use cases for PLAIN locks include locking of MGS configuration logs
- * and (as of Lustre 2.4) quota records.
- */
-
-#define DEBUG_SUBSYSTEM S_LDLM
-
-#include <lustre_dlm.h>
-#include <obd_support.h>
-#include <lustre_lib.h>
-
-#include "ldlm_internal.h"
-
-void ldlm_plain_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy,
- union ldlm_policy_data *lpolicy)
-{
- /* No policy for plain locks */
-}
-
-void ldlm_plain_policy_local_to_wire(const union ldlm_policy_data *lpolicy,
- union ldlm_wire_policy_data *wpolicy)
-{
- /* No policy for plain locks */
-}
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
deleted file mode 100644
index 36d14ee4e5b1..000000000000
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
+++ /dev/null
@@ -1,1013 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2010, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/ldlm/ldlm_pool.c
- *
- * Author: Yury Umanets <umka@clusterfs.com>
- */
-
-/*
- * Idea of this code is rather simple. Each second, for each server namespace
- * we have SLV - server lock volume which is calculated on current number of
- * granted locks, grant speed for past period, etc - that is, locking load.
- * This SLV number may be thought as a flow definition for simplicity. It is
- * sent to clients with each occasion to let them know what is current load
- * situation on the server. By default, at the beginning, SLV on server is
- * set max value which is calculated as the following: allow to one client
- * have all locks of limit ->pl_limit for 10h.
- *
- * Next, on clients, number of cached locks is not limited artificially in any
- * way as it was before. Instead, client calculates CLV, that is, client lock
- * volume for each lock and compares it with last SLV from the server. CLV is
- * calculated as the number of locks in LRU * lock live time in seconds. If
- * CLV > SLV - lock is canceled.
- *
- * Client has LVF, that is, lock volume factor which regulates how much
- * sensitive client should be about last SLV from server. The higher LVF is the
- * more locks will be canceled on client. Default value for it is 1. Setting LVF
- * to 2 means that client will cancel locks 2 times faster.
- *
- * Locks on a client will be canceled more intensively in these cases:
- * (1) if SLV is smaller, that is, load is higher on the server;
- * (2) client has a lot of locks (the more locks are held by client, the bigger
- * chances that some of them should be canceled);
- * (3) client has old locks (taken some time ago);
- *
- * Thus, according to flow paradigm that we use for better understanding SLV,
- * CLV is the volume of particle in flow described by SLV. According to this,
- * if flow is getting thinner, more and more particles become outside of it and
- * as particles are locks, they should be canceled.
- *
- * General idea of this belongs to Vitaly Fertman (vitaly@clusterfs.com).
- * Andreas Dilger (adilger@clusterfs.com) proposed few nice ideas like using
- * LVF and many cleanups. Flow definition to allow more easy understanding of
- * the logic belongs to Nikita Danilov (nikita@clusterfs.com) as well as many
- * cleanups and fixes. And design and implementation are done by Yury Umanets
- * (umka@clusterfs.com).
- *
- * Glossary for terms used:
- *
- * pl_limit - Number of allowed locks in pool. Applies to server and client
- * side (tunable);
- *
- * pl_granted - Number of granted locks (calculated);
- * pl_grant_rate - Number of granted locks for last T (calculated);
- * pl_cancel_rate - Number of canceled locks for last T (calculated);
- * pl_grant_speed - Grant speed (GR - CR) for last T (calculated);
- * pl_grant_plan - Planned number of granted locks for next T (calculated);
- * pl_server_lock_volume - Current server lock volume (calculated);
- *
- * As it may be seen from list above, we have few possible tunables which may
- * affect behavior much. They all may be modified via sysfs. However, they also
- * give a possibility for constructing few pre-defined behavior policies. If
- * none of predefines is suitable for a working pattern being used, new one may
- * be "constructed" via sysfs tunables.
- */
-
-#define DEBUG_SUBSYSTEM S_LDLM
-
-#include <lustre_dlm.h>
-#include <cl_object.h>
-#include <obd_class.h>
-#include <obd_support.h>
-#include "ldlm_internal.h"
-
-/*
- * 50 ldlm locks for 1MB of RAM.
- */
-#define LDLM_POOL_HOST_L ((NUM_CACHEPAGES >> (20 - PAGE_SHIFT)) * 50)
-
-/*
- * Maximal possible grant step plan in %.
- */
-#define LDLM_POOL_MAX_GSP (30)
-
-/*
- * Minimal possible grant step plan in %.
- */
-#define LDLM_POOL_MIN_GSP (1)
-
-/*
- * This controls the speed of reaching LDLM_POOL_MAX_GSP
- * with increasing thread period.
- */
-#define LDLM_POOL_GSP_STEP_SHIFT (2)
-
-/*
- * LDLM_POOL_GSP% of all locks is default GP.
- */
-#define LDLM_POOL_GP(L) (((L) * LDLM_POOL_MAX_GSP) / 100)
-
-/*
- * Max age for locks on clients.
- */
-#define LDLM_POOL_MAX_AGE (36000)
-
-/*
- * The granularity of SLV calculation.
- */
-#define LDLM_POOL_SLV_SHIFT (10)
-
-static inline __u64 dru(__u64 val, __u32 shift, int round_up)
-{
- return (val + (round_up ? (1 << shift) - 1 : 0)) >> shift;
-}
-
-static inline __u64 ldlm_pool_slv_max(__u32 L)
-{
- /*
- * Allow to have all locks for 1 client for 10 hrs.
- * Formula is the following: limit * 10h / 1 client.
- */
- __u64 lim = (__u64)L * LDLM_POOL_MAX_AGE / 1;
- return lim;
-}
-
-static inline __u64 ldlm_pool_slv_min(__u32 L)
-{
- return 1;
-}
-
-enum {
- LDLM_POOL_FIRST_STAT = 0,
- LDLM_POOL_GRANTED_STAT = LDLM_POOL_FIRST_STAT,
- LDLM_POOL_GRANT_STAT,
- LDLM_POOL_CANCEL_STAT,
- LDLM_POOL_GRANT_RATE_STAT,
- LDLM_POOL_CANCEL_RATE_STAT,
- LDLM_POOL_GRANT_PLAN_STAT,
- LDLM_POOL_SLV_STAT,
- LDLM_POOL_SHRINK_REQTD_STAT,
- LDLM_POOL_SHRINK_FREED_STAT,
- LDLM_POOL_RECALC_STAT,
- LDLM_POOL_TIMING_STAT,
- LDLM_POOL_LAST_STAT
-};
-
-/**
- * Calculates suggested grant_step in % of available locks for passed
- * \a period. This is later used in grant_plan calculations.
- */
-static inline int ldlm_pool_t2gsp(unsigned int t)
-{
- /*
- * This yields 1% grant step for anything below LDLM_POOL_GSP_STEP
- * and up to 30% for anything higher than LDLM_POOL_GSP_STEP.
- *
- * How this will affect execution is the following:
- *
- * - for thread period 1s we will have grant_step 1% which good from
- * pov of taking some load off from server and push it out to clients.
- * This is like that because 1% for grant_step means that server will
- * not allow clients to get lots of locks in short period of time and
- * keep all old locks in their caches. Clients will always have to
- * get some locks back if they want to take some new;
- *
- * - for thread period 10s (which is default) we will have 23% which
- * means that clients will have enough of room to take some new locks
- * without getting some back. All locks from this 23% which were not
- * taken by clients in current period will contribute in SLV growing.
- * SLV growing means more locks cached on clients until limit or grant
- * plan is reached.
- */
- return LDLM_POOL_MAX_GSP -
- ((LDLM_POOL_MAX_GSP - LDLM_POOL_MIN_GSP) >>
- (t >> LDLM_POOL_GSP_STEP_SHIFT));
-}
-
-/**
- * Recalculates next stats on passed \a pl.
- *
- * \pre ->pl_lock is locked.
- */
-static void ldlm_pool_recalc_stats(struct ldlm_pool *pl)
-{
- int grant_plan = pl->pl_grant_plan;
- __u64 slv = pl->pl_server_lock_volume;
- int granted = atomic_read(&pl->pl_granted);
- int grant_rate = atomic_read(&pl->pl_grant_rate);
- int cancel_rate = atomic_read(&pl->pl_cancel_rate);
-
- lprocfs_counter_add(pl->pl_stats, LDLM_POOL_SLV_STAT,
- slv);
- lprocfs_counter_add(pl->pl_stats, LDLM_POOL_GRANTED_STAT,
- granted);
- lprocfs_counter_add(pl->pl_stats, LDLM_POOL_GRANT_RATE_STAT,
- grant_rate);
- lprocfs_counter_add(pl->pl_stats, LDLM_POOL_GRANT_PLAN_STAT,
- grant_plan);
- lprocfs_counter_add(pl->pl_stats, LDLM_POOL_CANCEL_RATE_STAT,
- cancel_rate);
-}
-
-/**
- * Sets SLV and Limit from container_of(pl, struct ldlm_namespace,
- * ns_pool)->ns_obd tp passed \a pl.
- */
-static void ldlm_cli_pool_pop_slv(struct ldlm_pool *pl)
-{
- struct obd_device *obd;
-
- /*
- * Get new SLV and Limit from obd which is updated with coming
- * RPCs.
- */
- obd = container_of(pl, struct ldlm_namespace,
- ns_pool)->ns_obd;
- read_lock(&obd->obd_pool_lock);
- pl->pl_server_lock_volume = obd->obd_pool_slv;
- atomic_set(&pl->pl_limit, obd->obd_pool_limit);
- read_unlock(&obd->obd_pool_lock);
-}
-
-/**
- * Recalculates client size pool \a pl according to current SLV and Limit.
- */
-static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
-{
- time64_t recalc_interval_sec;
- int ret;
-
- recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time;
- if (recalc_interval_sec < pl->pl_recalc_period)
- return 0;
-
- spin_lock(&pl->pl_lock);
- /*
- * Check if we need to recalc lists now.
- */
- recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time;
- if (recalc_interval_sec < pl->pl_recalc_period) {
- spin_unlock(&pl->pl_lock);
- return 0;
- }
-
- /*
- * Make sure that pool knows last SLV and Limit from obd.
- */
- ldlm_cli_pool_pop_slv(pl);
-
- spin_unlock(&pl->pl_lock);
-
- /*
- * Do not cancel locks in case lru resize is disabled for this ns.
- */
- if (!ns_connect_lru_resize(container_of(pl, struct ldlm_namespace,
- ns_pool))) {
- ret = 0;
- goto out;
- }
-
- /*
- * In the time of canceling locks on client we do not need to maintain
- * sharp timing, we only want to cancel locks asap according to new SLV.
- * It may be called when SLV has changed much, this is why we do not
- * take into account pl->pl_recalc_time here.
- */
- ret = ldlm_cancel_lru(container_of(pl, struct ldlm_namespace, ns_pool),
- 0, LCF_ASYNC, LDLM_LRU_FLAG_LRUR);
-
-out:
- spin_lock(&pl->pl_lock);
- /*
- * Time of LRU resizing might be longer than period,
- * so update after LRU resizing rather than before it.
- */
- pl->pl_recalc_time = ktime_get_real_seconds();
- lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT,
- recalc_interval_sec);
- spin_unlock(&pl->pl_lock);
- return ret;
-}
-
-/**
- * This function is main entry point for memory pressure handling on client
- * side. Main goal of this function is to cancel some number of locks on
- * passed \a pl according to \a nr and \a gfp_mask.
- */
-static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,
- int nr, gfp_t gfp_mask)
-{
- struct ldlm_namespace *ns;
- int unused;
-
- ns = container_of(pl, struct ldlm_namespace, ns_pool);
-
- /*
- * Do not cancel locks in case lru resize is disabled for this ns.
- */
- if (!ns_connect_lru_resize(ns))
- return 0;
-
- /*
- * Make sure that pool knows last SLV and Limit from obd.
- */
- ldlm_cli_pool_pop_slv(pl);
-
- spin_lock(&ns->ns_lock);
- unused = ns->ns_nr_unused;
- spin_unlock(&ns->ns_lock);
-
- if (nr == 0)
- return (unused / 100) * sysctl_vfs_cache_pressure;
- else
- return ldlm_cancel_lru(ns, nr, LCF_ASYNC, LDLM_LRU_FLAG_SHRINK);
-}
-
-static const struct ldlm_pool_ops ldlm_cli_pool_ops = {
- .po_recalc = ldlm_cli_pool_recalc,
- .po_shrink = ldlm_cli_pool_shrink
-};
-
-/**
- * Pool recalc wrapper. Will call either client or server pool recalc callback
- * depending what pool \a pl is used.
- */
-static int ldlm_pool_recalc(struct ldlm_pool *pl)
-{
- u32 recalc_interval_sec;
- int count;
-
- recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time;
- if (recalc_interval_sec > 0) {
- spin_lock(&pl->pl_lock);
- recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time;
-
- if (recalc_interval_sec > 0) {
- /*
- * Update pool statistics every 1s.
- */
- ldlm_pool_recalc_stats(pl);
-
- /*
- * Zero out all rates and speed for the last period.
- */
- atomic_set(&pl->pl_grant_rate, 0);
- atomic_set(&pl->pl_cancel_rate, 0);
- }
- spin_unlock(&pl->pl_lock);
- }
-
- if (pl->pl_ops->po_recalc) {
- count = pl->pl_ops->po_recalc(pl);
- lprocfs_counter_add(pl->pl_stats, LDLM_POOL_RECALC_STAT,
- count);
- }
-
- recalc_interval_sec = pl->pl_recalc_time - ktime_get_real_seconds() +
- pl->pl_recalc_period;
- if (recalc_interval_sec <= 0) {
- /* DEBUG: should be re-removed after LU-4536 is fixed */
- CDEBUG(D_DLMTRACE,
- "%s: Negative interval(%ld), too short period(%ld)\n",
- pl->pl_name, (long)recalc_interval_sec,
- (long)pl->pl_recalc_period);
-
- /* Prevent too frequent recalculation. */
- recalc_interval_sec = 1;
- }
-
- return recalc_interval_sec;
-}
-
-/*
- * Pool shrink wrapper. Will call either client or server pool recalc callback
- * depending what pool pl is used. When nr == 0, just return the number of
- * freeable locks. Otherwise, return the number of canceled locks.
- */
-static int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, gfp_t gfp_mask)
-{
- int cancel = 0;
-
- if (pl->pl_ops->po_shrink) {
- cancel = pl->pl_ops->po_shrink(pl, nr, gfp_mask);
- if (nr > 0) {
- lprocfs_counter_add(pl->pl_stats,
- LDLM_POOL_SHRINK_REQTD_STAT,
- nr);
- lprocfs_counter_add(pl->pl_stats,
- LDLM_POOL_SHRINK_FREED_STAT,
- cancel);
- CDEBUG(D_DLMTRACE,
- "%s: request to shrink %d locks, shrunk %d\n",
- pl->pl_name, nr, cancel);
- }
- }
- return cancel;
-}
-
-static int lprocfs_pool_state_seq_show(struct seq_file *m, void *unused)
-{
- int granted, grant_rate, cancel_rate;
- int grant_speed, lvf;
- struct ldlm_pool *pl = m->private;
- __u64 slv, clv;
- __u32 limit;
-
- spin_lock(&pl->pl_lock);
- slv = pl->pl_server_lock_volume;
- clv = pl->pl_client_lock_volume;
- limit = atomic_read(&pl->pl_limit);
- granted = atomic_read(&pl->pl_granted);
- grant_rate = atomic_read(&pl->pl_grant_rate);
- cancel_rate = atomic_read(&pl->pl_cancel_rate);
- grant_speed = grant_rate - cancel_rate;
- lvf = atomic_read(&pl->pl_lock_volume_factor);
- spin_unlock(&pl->pl_lock);
-
- seq_printf(m, "LDLM pool state (%s):\n"
- " SLV: %llu\n"
- " CLV: %llu\n"
- " LVF: %d\n",
- pl->pl_name, slv, clv, lvf);
-
- seq_printf(m, " GR: %d\n CR: %d\n GS: %d\n"
- " G: %d\n L: %d\n",
- grant_rate, cancel_rate, grant_speed,
- granted, limit);
-
- return 0;
-}
-
-LPROC_SEQ_FOPS_RO(lprocfs_pool_state);
-
-static ssize_t grant_speed_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool,
- pl_kobj);
-
- int grant_speed;
-
- spin_lock(&pl->pl_lock);
- /* serialize with ldlm_pool_recalc */
- grant_speed = atomic_read(&pl->pl_grant_rate) -
- atomic_read(&pl->pl_cancel_rate);
- spin_unlock(&pl->pl_lock);
- return sprintf(buf, "%d\n", grant_speed);
-}
-LUSTRE_RO_ATTR(grant_speed);
-
-LDLM_POOL_SYSFS_READER_SHOW(grant_plan, int);
-LUSTRE_RO_ATTR(grant_plan);
-
-LDLM_POOL_SYSFS_READER_SHOW(recalc_period, int);
-LDLM_POOL_SYSFS_WRITER_STORE(recalc_period, int);
-LUSTRE_RW_ATTR(recalc_period);
-
-LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(server_lock_volume, u64);
-LUSTRE_RO_ATTR(server_lock_volume);
-
-LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(limit, atomic);
-LDLM_POOL_SYSFS_WRITER_NOLOCK_STORE(limit, atomic);
-LUSTRE_RW_ATTR(limit);
-
-LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(granted, atomic);
-LUSTRE_RO_ATTR(granted);
-
-LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(cancel_rate, atomic);
-LUSTRE_RO_ATTR(cancel_rate);
-
-LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(grant_rate, atomic);
-LUSTRE_RO_ATTR(grant_rate);
-
-LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(lock_volume_factor, atomic);
-LDLM_POOL_SYSFS_WRITER_NOLOCK_STORE(lock_volume_factor, atomic);
-LUSTRE_RW_ATTR(lock_volume_factor);
-
-#define LDLM_POOL_ADD_VAR(name, var, ops) \
- do { \
- snprintf(var_name, MAX_STRING_SIZE, #name); \
- pool_vars[0].data = var; \
- pool_vars[0].fops = ops; \
- ldebugfs_add_vars(pl->pl_debugfs_entry, pool_vars, NULL);\
- } while (0)
-
-/* These are for pools in /sys/fs/lustre/ldlm/namespaces/.../pool */
-static struct attribute *ldlm_pl_attrs[] = {
- &lustre_attr_grant_speed.attr,
- &lustre_attr_grant_plan.attr,
- &lustre_attr_recalc_period.attr,
- &lustre_attr_server_lock_volume.attr,
- &lustre_attr_limit.attr,
- &lustre_attr_granted.attr,
- &lustre_attr_cancel_rate.attr,
- &lustre_attr_grant_rate.attr,
- &lustre_attr_lock_volume_factor.attr,
- NULL,
-};
-
-static void ldlm_pl_release(struct kobject *kobj)
-{
- struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool,
- pl_kobj);
- complete(&pl->pl_kobj_unregister);
-}
-
-static struct kobj_type ldlm_pl_ktype = {
- .default_attrs = ldlm_pl_attrs,
- .sysfs_ops = &lustre_sysfs_ops,
- .release = ldlm_pl_release,
-};
-
-static int ldlm_pool_sysfs_init(struct ldlm_pool *pl)
-{
- struct ldlm_namespace *ns = container_of(pl, struct ldlm_namespace,
- ns_pool);
- int err;
-
- init_completion(&pl->pl_kobj_unregister);
- err = kobject_init_and_add(&pl->pl_kobj, &ldlm_pl_ktype, &ns->ns_kobj,
- "pool");
-
- return err;
-}
-
-static int ldlm_pool_debugfs_init(struct ldlm_pool *pl)
-{
- struct ldlm_namespace *ns = container_of(pl, struct ldlm_namespace,
- ns_pool);
- struct dentry *debugfs_ns_parent;
- struct lprocfs_vars pool_vars[2];
- char *var_name = NULL;
- int rc = 0;
-
- var_name = kzalloc(MAX_STRING_SIZE + 1, GFP_NOFS);
- if (!var_name)
- return -ENOMEM;
-
- debugfs_ns_parent = ns->ns_debugfs_entry;
- if (IS_ERR_OR_NULL(debugfs_ns_parent)) {
- CERROR("%s: debugfs entry is not initialized\n",
- ldlm_ns_name(ns));
- rc = -EINVAL;
- goto out_free_name;
- }
- pl->pl_debugfs_entry = debugfs_create_dir("pool", debugfs_ns_parent);
-
- var_name[MAX_STRING_SIZE] = '\0';
- memset(pool_vars, 0, sizeof(pool_vars));
- pool_vars[0].name = var_name;
-
- LDLM_POOL_ADD_VAR(state, pl, &lprocfs_pool_state_fops);
-
- pl->pl_stats = lprocfs_alloc_stats(LDLM_POOL_LAST_STAT -
- LDLM_POOL_FIRST_STAT, 0);
- if (!pl->pl_stats) {
- rc = -ENOMEM;
- goto out_free_name;
- }
-
- lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANTED_STAT,
- LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
- "granted", "locks");
- lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_STAT,
- LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
- "grant", "locks");
- lprocfs_counter_init(pl->pl_stats, LDLM_POOL_CANCEL_STAT,
- LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
- "cancel", "locks");
- lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_RATE_STAT,
- LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
- "grant_rate", "locks/s");
- lprocfs_counter_init(pl->pl_stats, LDLM_POOL_CANCEL_RATE_STAT,
- LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
- "cancel_rate", "locks/s");
- lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_PLAN_STAT,
- LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
- "grant_plan", "locks/s");
- lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SLV_STAT,
- LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
- "slv", "slv");
- lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SHRINK_REQTD_STAT,
- LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
- "shrink_request", "locks");
- lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SHRINK_FREED_STAT,
- LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
- "shrink_freed", "locks");
- lprocfs_counter_init(pl->pl_stats, LDLM_POOL_RECALC_STAT,
- LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
- "recalc_freed", "locks");
- lprocfs_counter_init(pl->pl_stats, LDLM_POOL_TIMING_STAT,
- LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
- "recalc_timing", "sec");
- debugfs_create_file("stats", 0644, pl->pl_debugfs_entry, pl->pl_stats,
- &lprocfs_stats_seq_fops);
-
-out_free_name:
- kfree(var_name);
- return rc;
-}
-
-static void ldlm_pool_sysfs_fini(struct ldlm_pool *pl)
-{
- kobject_put(&pl->pl_kobj);
- wait_for_completion(&pl->pl_kobj_unregister);
-}
-
-static void ldlm_pool_debugfs_fini(struct ldlm_pool *pl)
-{
- if (pl->pl_stats) {
- lprocfs_free_stats(&pl->pl_stats);
- pl->pl_stats = NULL;
- }
- debugfs_remove_recursive(pl->pl_debugfs_entry);
-}
-
-int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns,
- int idx, enum ldlm_side client)
-{
- int rc;
-
- spin_lock_init(&pl->pl_lock);
- atomic_set(&pl->pl_granted, 0);
- pl->pl_recalc_time = ktime_get_real_seconds();
- atomic_set(&pl->pl_lock_volume_factor, 1);
-
- atomic_set(&pl->pl_grant_rate, 0);
- atomic_set(&pl->pl_cancel_rate, 0);
- pl->pl_grant_plan = LDLM_POOL_GP(LDLM_POOL_HOST_L);
-
- snprintf(pl->pl_name, sizeof(pl->pl_name), "ldlm-pool-%s-%d",
- ldlm_ns_name(ns), idx);
-
- atomic_set(&pl->pl_limit, 1);
- pl->pl_server_lock_volume = 0;
- pl->pl_ops = &ldlm_cli_pool_ops;
- pl->pl_recalc_period = LDLM_POOL_CLI_DEF_RECALC_PERIOD;
- pl->pl_client_lock_volume = 0;
- rc = ldlm_pool_debugfs_init(pl);
- if (rc)
- return rc;
-
- rc = ldlm_pool_sysfs_init(pl);
- if (rc)
- return rc;
-
- CDEBUG(D_DLMTRACE, "Lock pool %s is initialized\n", pl->pl_name);
-
- return rc;
-}
-
-void ldlm_pool_fini(struct ldlm_pool *pl)
-{
- ldlm_pool_sysfs_fini(pl);
- ldlm_pool_debugfs_fini(pl);
-
- /*
- * Pool should not be used after this point. We can't free it here as
- * it lives in struct ldlm_namespace, but still interested in catching
- * any abnormal using cases.
- */
- POISON(pl, 0x5a, sizeof(*pl));
-}
-
-/**
- * Add new taken ldlm lock \a lock into pool \a pl accounting.
- */
-void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock)
-{
- /*
- * FLOCK locks are special in a sense that they are almost never
- * cancelled, instead special kind of lock is used to drop them.
- * also there is no LRU for flock locks, so no point in tracking
- * them anyway.
- */
- if (lock->l_resource->lr_type == LDLM_FLOCK)
- return;
-
- atomic_inc(&pl->pl_granted);
- atomic_inc(&pl->pl_grant_rate);
- lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_GRANT_STAT);
- /*
- * Do not do pool recalc for client side as all locks which
- * potentially may be canceled has already been packed into
- * enqueue/cancel rpc. Also we do not want to run out of stack
- * with too long call paths.
- */
-}
-
-/**
- * Remove ldlm lock \a lock from pool \a pl accounting.
- */
-void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock)
-{
- /*
- * Filter out FLOCK locks. Read above comment in ldlm_pool_add().
- */
- if (lock->l_resource->lr_type == LDLM_FLOCK)
- return;
-
- LASSERT(atomic_read(&pl->pl_granted) > 0);
- atomic_dec(&pl->pl_granted);
- atomic_inc(&pl->pl_cancel_rate);
-
- lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_CANCEL_STAT);
-}
-
-/**
- * Returns current \a pl SLV.
- *
- * \pre ->pl_lock is not locked.
- */
-__u64 ldlm_pool_get_slv(struct ldlm_pool *pl)
-{
- __u64 slv;
-
- spin_lock(&pl->pl_lock);
- slv = pl->pl_server_lock_volume;
- spin_unlock(&pl->pl_lock);
- return slv;
-}
-
-/**
- * Sets passed \a clv to \a pl.
- *
- * \pre ->pl_lock is not locked.
- */
-void ldlm_pool_set_clv(struct ldlm_pool *pl, __u64 clv)
-{
- spin_lock(&pl->pl_lock);
- pl->pl_client_lock_volume = clv;
- spin_unlock(&pl->pl_lock);
-}
-
-/**
- * Returns current LVF from \a pl.
- */
-__u32 ldlm_pool_get_lvf(struct ldlm_pool *pl)
-{
- return atomic_read(&pl->pl_lock_volume_factor);
-}
-
-static int ldlm_pool_granted(struct ldlm_pool *pl)
-{
- return atomic_read(&pl->pl_granted);
-}
-
-/*
- * count locks from all namespaces (if possible). Returns number of
- * cached locks.
- */
-static unsigned long ldlm_pools_count(enum ldlm_side client, gfp_t gfp_mask)
-{
- unsigned long total = 0;
- int nr_ns;
- struct ldlm_namespace *ns;
- struct ldlm_namespace *ns_old = NULL; /* loop detection */
-
- if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS))
- return 0;
-
- CDEBUG(D_DLMTRACE, "Request to count %s locks from all pools\n",
- client == LDLM_NAMESPACE_CLIENT ? "client" : "server");
-
- /*
- * Find out how many resources we may release.
- */
- for (nr_ns = ldlm_namespace_nr_read(client);
- nr_ns > 0; nr_ns--) {
- mutex_lock(ldlm_namespace_lock(client));
- if (list_empty(ldlm_namespace_list(client))) {
- mutex_unlock(ldlm_namespace_lock(client));
- return 0;
- }
- ns = ldlm_namespace_first_locked(client);
-
- if (ns == ns_old) {
- mutex_unlock(ldlm_namespace_lock(client));
- break;
- }
-
- if (ldlm_ns_empty(ns)) {
- ldlm_namespace_move_to_inactive_locked(ns, client);
- mutex_unlock(ldlm_namespace_lock(client));
- continue;
- }
-
- if (!ns_old)
- ns_old = ns;
-
- ldlm_namespace_get(ns);
- ldlm_namespace_move_to_active_locked(ns, client);
- mutex_unlock(ldlm_namespace_lock(client));
- total += ldlm_pool_shrink(&ns->ns_pool, 0, gfp_mask);
- ldlm_namespace_put(ns);
- }
-
- return total;
-}
-
-static unsigned long ldlm_pools_scan(enum ldlm_side client, int nr,
- gfp_t gfp_mask)
-{
- unsigned long freed = 0;
- int tmp, nr_ns;
- struct ldlm_namespace *ns;
-
- if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS))
- return -1;
-
- /*
- * Shrink at least ldlm_namespace_nr_read(client) namespaces.
- */
- for (tmp = nr_ns = ldlm_namespace_nr_read(client);
- tmp > 0; tmp--) {
- int cancel, nr_locks;
-
- /*
- * Do not call shrink under ldlm_namespace_lock(client)
- */
- mutex_lock(ldlm_namespace_lock(client));
- if (list_empty(ldlm_namespace_list(client))) {
- mutex_unlock(ldlm_namespace_lock(client));
- break;
- }
- ns = ldlm_namespace_first_locked(client);
- ldlm_namespace_get(ns);
- ldlm_namespace_move_to_active_locked(ns, client);
- mutex_unlock(ldlm_namespace_lock(client));
-
- nr_locks = ldlm_pool_granted(&ns->ns_pool);
- /*
- * We use to shrink propotionally but with new shrinker API,
- * we lost the total number of freeable locks.
- */
- cancel = 1 + min_t(int, nr_locks, nr / nr_ns);
- freed += ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask);
- ldlm_namespace_put(ns);
- }
- /*
- * we only decrease the SLV in server pools shrinker, return
- * SHRINK_STOP to kernel to avoid needless loop. LU-1128
- */
- return freed;
-}
-
-static unsigned long ldlm_pools_cli_count(struct shrinker *s,
- struct shrink_control *sc)
-{
- return ldlm_pools_count(LDLM_NAMESPACE_CLIENT, sc->gfp_mask);
-}
-
-static unsigned long ldlm_pools_cli_scan(struct shrinker *s,
- struct shrink_control *sc)
-{
- return ldlm_pools_scan(LDLM_NAMESPACE_CLIENT, sc->nr_to_scan,
- sc->gfp_mask);
-}
-
-static void ldlm_pools_recalc(struct work_struct *ws);
-static DECLARE_DELAYED_WORK(ldlm_recalc_pools, ldlm_pools_recalc);
-
-static void ldlm_pools_recalc(struct work_struct *ws)
-{
- enum ldlm_side client = LDLM_NAMESPACE_CLIENT;
- struct ldlm_namespace *ns;
- struct ldlm_namespace *ns_old = NULL;
- /* seconds of sleep if no active namespaces */
- int time = LDLM_POOL_CLI_DEF_RECALC_PERIOD;
- int nr;
-
- /*
- * Recalc at least ldlm_namespace_nr_read(client) namespaces.
- */
- for (nr = ldlm_namespace_nr_read(client); nr > 0; nr--) {
- int skip;
- /*
- * Lock the list, get first @ns in the list, getref, move it
- * to the tail, unlock and call pool recalc. This way we avoid
- * calling recalc under @ns lock what is really good as we get
- * rid of potential deadlock on client nodes when canceling
- * locks synchronously.
- */
- mutex_lock(ldlm_namespace_lock(client));
- if (list_empty(ldlm_namespace_list(client))) {
- mutex_unlock(ldlm_namespace_lock(client));
- break;
- }
- ns = ldlm_namespace_first_locked(client);
-
- if (ns_old == ns) { /* Full pass complete */
- mutex_unlock(ldlm_namespace_lock(client));
- break;
- }
-
- /* We got an empty namespace, need to move it back to inactive
- * list.
- * The race with parallel resource creation is fine:
- * - If they do namespace_get before our check, we fail the
- * check and they move this item to the end of the list anyway
- * - If we do the check and then they do namespace_get, then
- * we move the namespace to inactive and they will move
- * it back to active (synchronised by the lock, so no clash
- * there).
- */
- if (ldlm_ns_empty(ns)) {
- ldlm_namespace_move_to_inactive_locked(ns, client);
- mutex_unlock(ldlm_namespace_lock(client));
- continue;
- }
-
- if (!ns_old)
- ns_old = ns;
-
- spin_lock(&ns->ns_lock);
- /*
- * skip ns which is being freed, and we don't want to increase
- * its refcount again, not even temporarily. bz21519 & LU-499.
- */
- if (ns->ns_stopping) {
- skip = 1;
- } else {
- skip = 0;
- ldlm_namespace_get(ns);
- }
- spin_unlock(&ns->ns_lock);
-
- ldlm_namespace_move_to_active_locked(ns, client);
- mutex_unlock(ldlm_namespace_lock(client));
-
- /*
- * After setup is done - recalc the pool.
- */
- if (!skip) {
- int ttime = ldlm_pool_recalc(&ns->ns_pool);
-
- if (ttime < time)
- time = ttime;
-
- ldlm_namespace_put(ns);
- }
- }
-
- /* Wake up the blocking threads from time to time. */
- ldlm_bl_thread_wakeup();
-
- schedule_delayed_work(&ldlm_recalc_pools, time * HZ);
-}
-
-static int ldlm_pools_thread_start(void)
-{
- schedule_delayed_work(&ldlm_recalc_pools, 0);
-
- return 0;
-}
-
-static void ldlm_pools_thread_stop(void)
-{
- cancel_delayed_work_sync(&ldlm_recalc_pools);
-}
-
-static struct shrinker ldlm_pools_cli_shrinker = {
- .count_objects = ldlm_pools_cli_count,
- .scan_objects = ldlm_pools_cli_scan,
- .seeks = DEFAULT_SEEKS,
-};
-
-int ldlm_pools_init(void)
-{
- int rc;
-
- rc = ldlm_pools_thread_start();
- if (!rc)
- rc = register_shrinker(&ldlm_pools_cli_shrinker);
-
- return rc;
-}
-
-void ldlm_pools_fini(void)
-{
- unregister_shrinker(&ldlm_pools_cli_shrinker);
-
- ldlm_pools_thread_stop();
-}
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
deleted file mode 100644
index cdc52eed6d85..000000000000
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
+++ /dev/null
@@ -1,2033 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2010, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-/**
- * This file contains Asynchronous System Trap (AST) handlers and related
- * LDLM request-processing routines.
- *
- * An AST is a callback issued on a lock when its state is changed. There are
- * several different types of ASTs (callbacks) registered for each lock:
- *
- * - completion AST: when a lock is enqueued by some process, but cannot be
- * granted immediately due to other conflicting locks on the same resource,
- * the completion AST is sent to notify the caller when the lock is
- * eventually granted
- *
- * - blocking AST: when a lock is granted to some process, if another process
- * enqueues a conflicting (blocking) lock on a resource, a blocking AST is
- * sent to notify the holder(s) of the lock(s) of the conflicting lock
- * request. The lock holder(s) must release their lock(s) on that resource in
- * a timely manner or be evicted by the server.
- *
- * - glimpse AST: this is used when a process wants information about a lock
- * (i.e. the lock value block (LVB)) but does not necessarily require holding
- * the lock. If the resource is locked, the lock holder(s) are sent glimpse
- * ASTs and the LVB is returned to the caller, and lock holder(s) may CANCEL
- * their lock(s) if they are idle. If the resource is not locked, the server
- * may grant the lock.
- */
-
-#define DEBUG_SUBSYSTEM S_LDLM
-
-#include <lustre_errno.h>
-#include <lustre_dlm.h>
-#include <obd_class.h>
-#include <obd.h>
-#include <linux/libcfs/libcfs_hash.h>
-
-#include "ldlm_internal.h"
-
-unsigned int ldlm_enqueue_min = OBD_TIMEOUT_DEFAULT;
-module_param(ldlm_enqueue_min, uint, 0644);
-MODULE_PARM_DESC(ldlm_enqueue_min, "lock enqueue timeout minimum");
-
-/* in client side, whether the cached locks will be canceled before replay */
-unsigned int ldlm_cancel_unused_locks_before_replay = 1;
-
-struct ldlm_async_args {
- struct lustre_handle lock_handle;
-};
-
-/**
- * ldlm_request_bufsize
- *
- * @count: number of ldlm handles
- * @type: ldlm opcode
- *
- * If opcode=LDLM_ENQUEUE, 1 slot is already occupied,
- * LDLM_LOCKREQ_HANDLE -1 slots are available.
- * Otherwise, LDLM_LOCKREQ_HANDLE slots are available.
- *
- * Return: size of the request buffer
- */
-static int ldlm_request_bufsize(int count, int type)
-{
- int avail = LDLM_LOCKREQ_HANDLES;
-
- if (type == LDLM_ENQUEUE)
- avail -= LDLM_ENQUEUE_CANCEL_OFF;
-
- if (count > avail)
- avail = (count - avail) * sizeof(struct lustre_handle);
- else
- avail = 0;
-
- return sizeof(struct ldlm_request) + avail;
-}
-
-static void ldlm_expired_completion_wait(struct ldlm_lock *lock, __u32 conn_cnt)
-{
- struct obd_import *imp;
- struct obd_device *obd;
-
- if (!lock->l_conn_export) {
- static unsigned long next_dump, last_dump;
-
- LDLM_ERROR(lock,
- "lock timed out (enqueued at %lld, %llds ago); not entering recovery in server code, just going back to sleep",
- (s64)lock->l_last_activity,
- (s64)(ktime_get_real_seconds() -
- lock->l_last_activity));
- if (time_after(jiffies, next_dump)) {
- last_dump = next_dump;
- next_dump = jiffies + 300 * HZ;
- ldlm_namespace_dump(D_DLMTRACE,
- ldlm_lock_to_ns(lock));
- if (last_dump == 0)
- libcfs_debug_dumplog();
- }
- return;
- }
-
- obd = lock->l_conn_export->exp_obd;
- imp = obd->u.cli.cl_import;
- ptlrpc_fail_import(imp, conn_cnt);
- LDLM_ERROR(lock,
- "lock timed out (enqueued at %lld, %llds ago), entering recovery for %s@%s",
- (s64)lock->l_last_activity,
- (s64)(ktime_get_real_seconds() - lock->l_last_activity),
- obd2cli_tgt(obd), imp->imp_connection->c_remote_uuid.uuid);
-}
-
-/**
- * Calculate the Completion timeout (covering enqueue, BL AST, data flush,
- * lock cancel, and their replies). Used for lock completion timeout on the
- * client side.
- *
- * \param[in] lock lock which is waiting the completion callback
- *
- * \retval timeout in seconds to wait for the server reply
- */
-/* We use the same basis for both server side and client side functions
- * from a single node.
- */
-static unsigned int ldlm_cp_timeout(struct ldlm_lock *lock)
-{
- unsigned int timeout;
-
- if (AT_OFF)
- return obd_timeout;
-
- /*
- * Wait a long time for enqueue - server may have to callback a
- * lock from another client. Server will evict the other client if it
- * doesn't respond reasonably, and then give us the lock.
- */
- timeout = at_get(ldlm_lock_to_ns_at(lock));
- return max(3 * timeout, ldlm_enqueue_min);
-}
-
-/**
- * Helper function for ldlm_completion_ast(), updating timings when lock is
- * actually granted.
- */
-static int ldlm_completion_tail(struct ldlm_lock *lock, void *data)
-{
- long delay;
- int result = 0;
-
- if (ldlm_is_destroyed(lock) || ldlm_is_failed(lock)) {
- LDLM_DEBUG(lock, "client-side enqueue: destroyed");
- result = -EIO;
- } else if (!data) {
- LDLM_DEBUG(lock, "client-side enqueue: granted");
- } else {
- /* Take into AT only CP RPC, not immediately granted locks */
- delay = ktime_get_real_seconds() - lock->l_last_activity;
- LDLM_DEBUG(lock, "client-side enqueue: granted after %lds",
- delay);
-
- /* Update our time estimate */
- at_measured(ldlm_lock_to_ns_at(lock), delay);
- }
- return result;
-}
-
-/**
- * Generic LDLM "completion" AST. This is called in several cases:
- *
- * - when a reply to an ENQUEUE RPC is received from the server
- * (ldlm_cli_enqueue_fini()). Lock might be granted or not granted at
- * this point (determined by flags);
- *
- * - when LDLM_CP_CALLBACK RPC comes to client to notify it that lock has
- * been granted;
- *
- * - when ldlm_lock_match(LDLM_FL_LVB_READY) is about to wait until lock
- * gets correct lvb;
- *
- * - to force all locks when resource is destroyed (cleanup_resource());
- *
- * - during lock conversion (not used currently).
- *
- * If lock is not granted in the first case, this function waits until second
- * or penultimate cases happen in some other thread.
- *
- */
-int ldlm_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
-{
- /* XXX ALLOCATE - 160 bytes */
- struct obd_device *obd;
- struct obd_import *imp = NULL;
- __u32 timeout;
- __u32 conn_cnt = 0;
- int rc = 0;
-
- if (flags == LDLM_FL_WAIT_NOREPROC) {
- LDLM_DEBUG(lock, "client-side enqueue waiting on pending lock");
- goto noreproc;
- }
-
- if (!(flags & LDLM_FL_BLOCKED_MASK)) {
- wake_up(&lock->l_waitq);
- return 0;
- }
-
- LDLM_DEBUG(lock,
- "client-side enqueue returned a blocked lock, sleeping");
-
-noreproc:
-
- obd = class_exp2obd(lock->l_conn_export);
-
- /* if this is a local lock, then there is no import */
- if (obd)
- imp = obd->u.cli.cl_import;
-
- timeout = ldlm_cp_timeout(lock);
-
- lock->l_last_activity = ktime_get_real_seconds();
-
- if (imp) {
- spin_lock(&imp->imp_lock);
- conn_cnt = imp->imp_conn_cnt;
- spin_unlock(&imp->imp_lock);
- }
- if (OBD_FAIL_CHECK_RESET(OBD_FAIL_LDLM_INTR_CP_AST,
- OBD_FAIL_LDLM_CP_BL_RACE | OBD_FAIL_ONCE)) {
- ldlm_set_fail_loc(lock);
- rc = -EINTR;
- } else {
- /* Go to sleep until the lock is granted or canceled. */
- if (!ldlm_is_no_timeout(lock)) {
- /* Wait uninterruptible for a while first */
- rc = wait_event_idle_timeout(lock->l_waitq,
- is_granted_or_cancelled(lock),
- timeout * HZ);
- if (rc == 0)
- ldlm_expired_completion_wait(lock, conn_cnt);
- }
- /* Now wait abortable */
- if (rc == 0)
- rc = l_wait_event_abortable(lock->l_waitq,
- is_granted_or_cancelled(lock));
- else
- rc = 0;
- }
-
- if (rc) {
- LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)",
- rc);
- return rc;
- }
-
- return ldlm_completion_tail(lock, data);
-}
-EXPORT_SYMBOL(ldlm_completion_ast);
-
-static void failed_lock_cleanup(struct ldlm_namespace *ns,
- struct ldlm_lock *lock, int mode)
-{
- int need_cancel = 0;
-
- /* Set a flag to prevent us from sending a CANCEL (bug 407) */
- lock_res_and_lock(lock);
- /* Check that lock is not granted or failed, we might race. */
- if ((lock->l_req_mode != lock->l_granted_mode) &&
- !ldlm_is_failed(lock)) {
- /* Make sure that this lock will not be found by raced
- * bl_ast and -EINVAL reply is sent to server anyways.
- * bug 17645
- */
- lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_FAILED |
- LDLM_FL_ATOMIC_CB | LDLM_FL_CBPENDING;
- need_cancel = 1;
- }
- unlock_res_and_lock(lock);
-
- if (need_cancel)
- LDLM_DEBUG(lock,
- "setting FL_LOCAL_ONLY | LDLM_FL_FAILED | LDLM_FL_ATOMIC_CB | LDLM_FL_CBPENDING");
- else
- LDLM_DEBUG(lock, "lock was granted or failed in race");
-
- /* XXX - HACK because we shouldn't call ldlm_lock_destroy()
- * from llite/file.c/ll_file_flock().
- */
- /* This code makes for the fact that we do not have blocking handler on
- * a client for flock locks. As such this is the place where we must
- * completely kill failed locks. (interrupted and those that
- * were waiting to be granted when server evicted us.
- */
- if (lock->l_resource->lr_type == LDLM_FLOCK) {
- lock_res_and_lock(lock);
- if (!ldlm_is_destroyed(lock)) {
- ldlm_resource_unlink_lock(lock);
- ldlm_lock_decref_internal_nolock(lock, mode);
- ldlm_lock_destroy_nolock(lock);
- }
- unlock_res_and_lock(lock);
- } else {
- ldlm_lock_decref_internal(lock, mode);
- }
-}
-
-/**
- * Finishing portion of client lock enqueue code.
- *
- * Called after receiving reply from server.
- */
-int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
- enum ldlm_type type, __u8 with_policy,
- enum ldlm_mode mode,
- __u64 *flags, void *lvb, __u32 lvb_len,
- const struct lustre_handle *lockh, int rc)
-{
- struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
- int is_replay = *flags & LDLM_FL_REPLAY;
- struct ldlm_lock *lock;
- struct ldlm_reply *reply;
- int cleanup_phase = 1;
-
- lock = ldlm_handle2lock(lockh);
- /* ldlm_cli_enqueue is holding a reference on this lock. */
- if (!lock) {
- LASSERT(type == LDLM_FLOCK);
- return -ENOLCK;
- }
-
- LASSERTF(ergo(lvb_len != 0, lvb_len == lock->l_lvb_len),
- "lvb_len = %d, l_lvb_len = %d\n", lvb_len, lock->l_lvb_len);
-
- if (rc != ELDLM_OK) {
- LASSERT(!is_replay);
- LDLM_DEBUG(lock, "client-side enqueue END (%s)",
- rc == ELDLM_LOCK_ABORTED ? "ABORTED" : "FAILED");
-
- if (rc != ELDLM_LOCK_ABORTED)
- goto cleanup;
- }
-
- /* Before we return, swab the reply */
- reply = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
- if (!reply) {
- rc = -EPROTO;
- goto cleanup;
- }
-
- if (lvb_len > 0) {
- int size = 0;
-
- size = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB,
- RCL_SERVER);
- if (size < 0) {
- LDLM_ERROR(lock, "Fail to get lvb_len, rc = %d", size);
- rc = size;
- goto cleanup;
- } else if (unlikely(size > lvb_len)) {
- LDLM_ERROR(lock,
- "Replied LVB is larger than expectation, expected = %d, replied = %d",
- lvb_len, size);
- rc = -EINVAL;
- goto cleanup;
- }
- lvb_len = size;
- }
-
- if (rc == ELDLM_LOCK_ABORTED) {
- if (lvb_len > 0 && lvb)
- rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER,
- lvb, lvb_len);
- if (rc == 0)
- rc = ELDLM_LOCK_ABORTED;
- goto cleanup;
- }
-
- /* lock enqueued on the server */
- cleanup_phase = 0;
-
- lock_res_and_lock(lock);
- lock->l_remote_handle = reply->lock_handle;
-
- *flags = ldlm_flags_from_wire(reply->lock_flags);
- lock->l_flags |= ldlm_flags_from_wire(reply->lock_flags &
- LDLM_FL_INHERIT_MASK);
- unlock_res_and_lock(lock);
-
- CDEBUG(D_INFO, "local: %p, remote cookie: %#llx, flags: 0x%llx\n",
- lock, reply->lock_handle.cookie, *flags);
-
- /* If enqueue returned a blocked lock but the completion handler has
- * already run, then it fixed up the resource and we don't need to do it
- * again.
- */
- if ((*flags) & LDLM_FL_LOCK_CHANGED) {
- int newmode = reply->lock_desc.l_req_mode;
-
- LASSERT(!is_replay);
- if (newmode && newmode != lock->l_req_mode) {
- LDLM_DEBUG(lock, "server returned different mode %s",
- ldlm_lockname[newmode]);
- lock->l_req_mode = newmode;
- }
-
- if (!ldlm_res_eq(&reply->lock_desc.l_resource.lr_name,
- &lock->l_resource->lr_name)) {
- CDEBUG(D_INFO,
- "remote intent success, locking " DLDLMRES " instead of " DLDLMRES "\n",
- PLDLMRES(&reply->lock_desc.l_resource),
- PLDLMRES(lock->l_resource));
-
- rc = ldlm_lock_change_resource(ns, lock,
- &reply->lock_desc.l_resource.lr_name);
- if (rc || !lock->l_resource) {
- rc = -ENOMEM;
- goto cleanup;
- }
- LDLM_DEBUG(lock, "client-side enqueue, new resource");
- }
- if (with_policy)
- if (!(type == LDLM_IBITS &&
- !(exp_connect_flags(exp) & OBD_CONNECT_IBITS)))
- /* We assume lock type cannot change on server*/
- ldlm_convert_policy_to_local(exp,
- lock->l_resource->lr_type,
- &reply->lock_desc.l_policy_data,
- &lock->l_policy_data);
- if (type != LDLM_PLAIN)
- LDLM_DEBUG(lock,
- "client-side enqueue, new policy data");
- }
-
- if ((*flags) & LDLM_FL_AST_SENT) {
- lock_res_and_lock(lock);
- lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST;
- unlock_res_and_lock(lock);
- LDLM_DEBUG(lock, "enqueue reply includes blocking AST");
- }
-
- /* If the lock has already been granted by a completion AST, don't
- * clobber the LVB with an older one.
- */
- if (lvb_len > 0) {
- /* We must lock or a racing completion might update lvb without
- * letting us know and we'll clobber the correct value.
- * Cannot unlock after the check either, as that still leaves
- * a tiny window for completion to get in
- */
- lock_res_and_lock(lock);
- if (lock->l_req_mode != lock->l_granted_mode)
- rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER,
- lock->l_lvb_data, lvb_len);
- unlock_res_and_lock(lock);
- if (rc < 0) {
- cleanup_phase = 1;
- goto cleanup;
- }
- }
-
- if (!is_replay) {
- rc = ldlm_lock_enqueue(ns, &lock, NULL, flags);
- if (lock->l_completion_ast) {
- int err = lock->l_completion_ast(lock, *flags, NULL);
-
- if (!rc)
- rc = err;
- if (rc)
- cleanup_phase = 1;
- }
- }
-
- if (lvb_len > 0 && lvb) {
- /* Copy the LVB here, and not earlier, because the completion
- * AST (if any) can override what we got in the reply
- */
- memcpy(lvb, lock->l_lvb_data, lvb_len);
- }
-
- LDLM_DEBUG(lock, "client-side enqueue END");
-cleanup:
- if (cleanup_phase == 1 && rc)
- failed_lock_cleanup(ns, lock, mode);
- /* Put lock 2 times, the second reference is held by ldlm_cli_enqueue */
- LDLM_LOCK_PUT(lock);
- LDLM_LOCK_RELEASE(lock);
- return rc;
-}
-EXPORT_SYMBOL(ldlm_cli_enqueue_fini);
-
-/**
- * Estimate number of lock handles that would fit into request of given
- * size. PAGE_SIZE-512 is to allow TCP/IP and LNET headers to fit into
- * a single page on the send/receive side. XXX: 512 should be changed to
- * more adequate value.
- */
-static inline int ldlm_req_handles_avail(int req_size, int off)
-{
- int avail;
-
- avail = min_t(int, LDLM_MAXREQSIZE, PAGE_SIZE - 512) - req_size;
- if (likely(avail >= 0))
- avail /= (int)sizeof(struct lustre_handle);
- else
- avail = 0;
- avail += LDLM_LOCKREQ_HANDLES - off;
-
- return avail;
-}
-
-static inline int ldlm_capsule_handles_avail(struct req_capsule *pill,
- enum req_location loc,
- int off)
-{
- u32 size = req_capsule_msg_size(pill, loc);
-
- return ldlm_req_handles_avail(size, off);
-}
-
-static inline int ldlm_format_handles_avail(struct obd_import *imp,
- const struct req_format *fmt,
- enum req_location loc, int off)
-{
- u32 size = req_capsule_fmt_size(imp->imp_msg_magic, fmt, loc);
-
- return ldlm_req_handles_avail(size, off);
-}
-
-/**
- * Cancel LRU locks and pack them into the enqueue request. Pack there the given
- * \a count locks in \a cancels.
- *
- * This is to be called by functions preparing their own requests that
- * might contain lists of locks to cancel in addition to actual operation
- * that needs to be performed.
- */
-int ldlm_prep_elc_req(struct obd_export *exp, struct ptlrpc_request *req,
- int version, int opc, int canceloff,
- struct list_head *cancels, int count)
-{
- struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
- struct req_capsule *pill = &req->rq_pill;
- struct ldlm_request *dlm = NULL;
- int flags, avail, to_free, pack = 0;
- LIST_HEAD(head);
- int rc;
-
- if (!cancels)
- cancels = &head;
- if (ns_connect_cancelset(ns)) {
- /* Estimate the amount of available space in the request. */
- req_capsule_filled_sizes(pill, RCL_CLIENT);
- avail = ldlm_capsule_handles_avail(pill, RCL_CLIENT, canceloff);
-
- flags = ns_connect_lru_resize(ns) ?
- LDLM_LRU_FLAG_LRUR_NO_WAIT : LDLM_LRU_FLAG_AGED;
- to_free = !ns_connect_lru_resize(ns) &&
- opc == LDLM_ENQUEUE ? 1 : 0;
-
- /* Cancel LRU locks here _only_ if the server supports
- * EARLY_CANCEL. Otherwise we have to send extra CANCEL
- * RPC, which will make us slower.
- */
- if (avail > count)
- count += ldlm_cancel_lru_local(ns, cancels, to_free,
- avail - count, 0, flags);
- if (avail > count)
- pack = count;
- else
- pack = avail;
- req_capsule_set_size(pill, &RMF_DLM_REQ, RCL_CLIENT,
- ldlm_request_bufsize(pack, opc));
- }
-
- rc = ptlrpc_request_pack(req, version, opc);
- if (rc) {
- ldlm_lock_list_put(cancels, l_bl_ast, count);
- return rc;
- }
-
- if (ns_connect_cancelset(ns)) {
- if (canceloff) {
- dlm = req_capsule_client_get(pill, &RMF_DLM_REQ);
- LASSERT(dlm);
- /* Skip first lock handler in ldlm_request_pack(),
- * this method will increment @lock_count according
- * to the lock handle amount actually written to
- * the buffer.
- */
- dlm->lock_count = canceloff;
- }
- /* Pack into the request @pack lock handles. */
- ldlm_cli_cancel_list(cancels, pack, req, 0);
- /* Prepare and send separate cancel RPC for others. */
- ldlm_cli_cancel_list(cancels, count - pack, NULL, 0);
- } else {
- ldlm_lock_list_put(cancels, l_bl_ast, count);
- }
- return 0;
-}
-EXPORT_SYMBOL(ldlm_prep_elc_req);
-
-int ldlm_prep_enqueue_req(struct obd_export *exp, struct ptlrpc_request *req,
- struct list_head *cancels, int count)
-{
- return ldlm_prep_elc_req(exp, req, LUSTRE_DLM_VERSION, LDLM_ENQUEUE,
- LDLM_ENQUEUE_CANCEL_OFF, cancels, count);
-}
-EXPORT_SYMBOL(ldlm_prep_enqueue_req);
-
-static struct ptlrpc_request *ldlm_enqueue_pack(struct obd_export *exp,
- int lvb_len)
-{
- struct ptlrpc_request *req;
- int rc;
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_LDLM_ENQUEUE);
- if (!req)
- return ERR_PTR(-ENOMEM);
-
- rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
- if (rc) {
- ptlrpc_request_free(req);
- return ERR_PTR(rc);
- }
-
- req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER, lvb_len);
- ptlrpc_request_set_replen(req);
- return req;
-}
-
-/**
- * Client-side lock enqueue.
- *
- * If a request has some specific initialisation it is passed in \a reqp,
- * otherwise it is created in ldlm_cli_enqueue.
- *
- * Supports sync and async requests, pass \a async flag accordingly. If a
- * request was created in ldlm_cli_enqueue and it is the async request,
- * pass it to the caller in \a reqp.
- */
-int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
- struct ldlm_enqueue_info *einfo,
- const struct ldlm_res_id *res_id,
- union ldlm_policy_data const *policy, __u64 *flags,
- void *lvb, __u32 lvb_len, enum lvb_type lvb_type,
- struct lustre_handle *lockh, int async)
-{
- struct ldlm_namespace *ns;
- struct ldlm_lock *lock;
- struct ldlm_request *body;
- int is_replay = *flags & LDLM_FL_REPLAY;
- int req_passed_in = 1;
- int rc, err;
- struct ptlrpc_request *req;
-
- ns = exp->exp_obd->obd_namespace;
-
- /* If we're replaying this lock, just check some invariants.
- * If we're creating a new lock, get everything all setup nicely.
- */
- if (is_replay) {
- lock = ldlm_handle2lock_long(lockh, 0);
- LASSERT(lock);
- LDLM_DEBUG(lock, "client-side enqueue START");
- LASSERT(exp == lock->l_conn_export);
- } else {
- const struct ldlm_callback_suite cbs = {
- .lcs_completion = einfo->ei_cb_cp,
- .lcs_blocking = einfo->ei_cb_bl,
- .lcs_glimpse = einfo->ei_cb_gl
- };
- lock = ldlm_lock_create(ns, res_id, einfo->ei_type,
- einfo->ei_mode, &cbs, einfo->ei_cbdata,
- lvb_len, lvb_type);
- if (IS_ERR(lock))
- return PTR_ERR(lock);
- /* for the local lock, add the reference */
- ldlm_lock_addref_internal(lock, einfo->ei_mode);
- ldlm_lock2handle(lock, lockh);
- if (policy)
- lock->l_policy_data = *policy;
-
- if (einfo->ei_type == LDLM_EXTENT) {
- /* extent lock without policy is a bug */
- if (!policy)
- LBUG();
-
- lock->l_req_extent = policy->l_extent;
- }
- LDLM_DEBUG(lock, "client-side enqueue START, flags %llx",
- *flags);
- }
-
- lock->l_conn_export = exp;
- lock->l_export = NULL;
- lock->l_blocking_ast = einfo->ei_cb_bl;
- lock->l_flags |= (*flags & (LDLM_FL_NO_LRU | LDLM_FL_EXCL));
- lock->l_last_activity = ktime_get_real_seconds();
-
- /* lock not sent to server yet */
- if (!reqp || !*reqp) {
- req = ldlm_enqueue_pack(exp, lvb_len);
- if (IS_ERR(req)) {
- failed_lock_cleanup(ns, lock, einfo->ei_mode);
- LDLM_LOCK_RELEASE(lock);
- return PTR_ERR(req);
- }
-
- req_passed_in = 0;
- if (reqp)
- *reqp = req;
- } else {
- int len;
-
- req = *reqp;
- len = req_capsule_get_size(&req->rq_pill, &RMF_DLM_REQ,
- RCL_CLIENT);
- LASSERTF(len >= sizeof(*body), "buflen[%d] = %d, not %d\n",
- DLM_LOCKREQ_OFF, len, (int)sizeof(*body));
- }
-
- /* Dump lock data into the request buffer */
- body = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
- ldlm_lock2desc(lock, &body->lock_desc);
- body->lock_flags = ldlm_flags_to_wire(*flags);
- body->lock_handle[0] = *lockh;
-
- if (async) {
- LASSERT(reqp);
- return 0;
- }
-
- LDLM_DEBUG(lock, "sending request");
-
- rc = ptlrpc_queue_wait(req);
-
- err = ldlm_cli_enqueue_fini(exp, req, einfo->ei_type, policy ? 1 : 0,
- einfo->ei_mode, flags, lvb, lvb_len,
- lockh, rc);
-
- /* If ldlm_cli_enqueue_fini did not find the lock, we need to free
- * one reference that we took
- */
- if (err == -ENOLCK)
- LDLM_LOCK_RELEASE(lock);
- else
- rc = err;
-
- if (!req_passed_in && req) {
- ptlrpc_req_finished(req);
- if (reqp)
- *reqp = NULL;
- }
-
- return rc;
-}
-EXPORT_SYMBOL(ldlm_cli_enqueue);
-
-/**
- * Cancel locks locally.
- * Returns:
- * \retval LDLM_FL_LOCAL_ONLY if there is no need for a CANCEL RPC to the server
- * \retval LDLM_FL_CANCELING otherwise;
- * \retval LDLM_FL_BL_AST if there is a need for a separate CANCEL RPC.
- */
-static __u64 ldlm_cli_cancel_local(struct ldlm_lock *lock)
-{
- __u64 rc = LDLM_FL_LOCAL_ONLY;
-
- if (lock->l_conn_export) {
- bool local_only;
-
- LDLM_DEBUG(lock, "client-side cancel");
- /* Set this flag to prevent others from getting new references*/
- lock_res_and_lock(lock);
- ldlm_set_cbpending(lock);
- local_only = !!(lock->l_flags &
- (LDLM_FL_LOCAL_ONLY | LDLM_FL_CANCEL_ON_BLOCK));
- ldlm_cancel_callback(lock);
- rc = ldlm_is_bl_ast(lock) ? LDLM_FL_BL_AST : LDLM_FL_CANCELING;
- unlock_res_and_lock(lock);
-
- if (local_only) {
- CDEBUG(D_DLMTRACE,
- "not sending request (at caller's instruction)\n");
- rc = LDLM_FL_LOCAL_ONLY;
- }
- ldlm_lock_cancel(lock);
- } else {
- LDLM_ERROR(lock, "Trying to cancel local lock");
- LBUG();
- }
-
- return rc;
-}
-
-/**
- * Pack \a count locks in \a head into ldlm_request buffer of request \a req.
- */
-static void ldlm_cancel_pack(struct ptlrpc_request *req,
- struct list_head *head, int count)
-{
- struct ldlm_request *dlm;
- struct ldlm_lock *lock;
- int max, packed = 0;
-
- dlm = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
- LASSERT(dlm);
-
- /* Check the room in the request buffer. */
- max = req_capsule_get_size(&req->rq_pill, &RMF_DLM_REQ, RCL_CLIENT) -
- sizeof(struct ldlm_request);
- max /= sizeof(struct lustre_handle);
- max += LDLM_LOCKREQ_HANDLES;
- LASSERT(max >= dlm->lock_count + count);
-
- /* XXX: it would be better to pack lock handles grouped by resource.
- * so that the server cancel would call filter_lvbo_update() less
- * frequently.
- */
- list_for_each_entry(lock, head, l_bl_ast) {
- if (!count--)
- break;
- LASSERT(lock->l_conn_export);
- /* Pack the lock handle to the given request buffer. */
- LDLM_DEBUG(lock, "packing");
- dlm->lock_handle[dlm->lock_count++] = lock->l_remote_handle;
- packed++;
- }
- CDEBUG(D_DLMTRACE, "%d locks packed\n", packed);
-}
-
-/**
- * Prepare and send a batched cancel RPC. It will include \a count lock
- * handles of locks given in \a cancels list.
- */
-static int ldlm_cli_cancel_req(struct obd_export *exp,
- struct list_head *cancels,
- int count, enum ldlm_cancel_flags flags)
-{
- struct ptlrpc_request *req = NULL;
- struct obd_import *imp;
- int free, sent = 0;
- int rc = 0;
-
- LASSERT(exp);
- LASSERT(count > 0);
-
- CFS_FAIL_TIMEOUT(OBD_FAIL_LDLM_PAUSE_CANCEL, cfs_fail_val);
-
- if (CFS_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_RACE))
- return count;
-
- free = ldlm_format_handles_avail(class_exp2cliimp(exp),
- &RQF_LDLM_CANCEL, RCL_CLIENT, 0);
- if (count > free)
- count = free;
-
- while (1) {
- imp = class_exp2cliimp(exp);
- if (!imp || imp->imp_invalid) {
- CDEBUG(D_DLMTRACE,
- "skipping cancel on invalid import %p\n", imp);
- return count;
- }
-
- req = ptlrpc_request_alloc(imp, &RQF_LDLM_CANCEL);
- if (!req) {
- rc = -ENOMEM;
- goto out;
- }
-
- req_capsule_filled_sizes(&req->rq_pill, RCL_CLIENT);
- req_capsule_set_size(&req->rq_pill, &RMF_DLM_REQ, RCL_CLIENT,
- ldlm_request_bufsize(count, LDLM_CANCEL));
-
- rc = ptlrpc_request_pack(req, LUSTRE_DLM_VERSION, LDLM_CANCEL);
- if (rc) {
- ptlrpc_request_free(req);
- goto out;
- }
-
- req->rq_request_portal = LDLM_CANCEL_REQUEST_PORTAL;
- req->rq_reply_portal = LDLM_CANCEL_REPLY_PORTAL;
- ptlrpc_at_set_req_timeout(req);
-
- ldlm_cancel_pack(req, cancels, count);
-
- ptlrpc_request_set_replen(req);
- if (flags & LCF_ASYNC) {
- ptlrpcd_add_req(req);
- sent = count;
- goto out;
- }
-
- rc = ptlrpc_queue_wait(req);
- if (rc == LUSTRE_ESTALE) {
- CDEBUG(D_DLMTRACE,
- "client/server (nid %s) out of sync -- not fatal\n",
- libcfs_nid2str(req->rq_import->
- imp_connection->c_peer.nid));
- rc = 0;
- } else if (rc == -ETIMEDOUT && /* check there was no reconnect*/
- req->rq_import_generation == imp->imp_generation) {
- ptlrpc_req_finished(req);
- continue;
- } else if (rc != ELDLM_OK) {
- /* -ESHUTDOWN is common on umount */
- CDEBUG_LIMIT(rc == -ESHUTDOWN ? D_DLMTRACE : D_ERROR,
- "Got rc %d from cancel RPC: canceling anyway\n",
- rc);
- break;
- }
- sent = count;
- break;
- }
-
- ptlrpc_req_finished(req);
-out:
- return sent ? sent : rc;
-}
-
-static inline struct ldlm_pool *ldlm_imp2pl(struct obd_import *imp)
-{
- return &imp->imp_obd->obd_namespace->ns_pool;
-}
-
-/**
- * Update client's OBD pool related fields with new SLV and Limit from \a req.
- */
-int ldlm_cli_update_pool(struct ptlrpc_request *req)
-{
- struct obd_device *obd;
- __u64 new_slv;
- __u32 new_limit;
-
- if (unlikely(!req->rq_import || !req->rq_import->imp_obd ||
- !imp_connect_lru_resize(req->rq_import))) {
- /*
- * Do nothing for corner cases.
- */
- return 0;
- }
-
- /* In some cases RPC may contain SLV and limit zeroed out. This
- * is the case when server does not support LRU resize feature.
- * This is also possible in some recovery cases when server-side
- * reqs have no reference to the OBD export and thus access to
- * server-side namespace is not possible.
- */
- if (lustre_msg_get_slv(req->rq_repmsg) == 0 ||
- lustre_msg_get_limit(req->rq_repmsg) == 0) {
- DEBUG_REQ(D_HA, req,
- "Zero SLV or Limit found (SLV: %llu, Limit: %u)",
- lustre_msg_get_slv(req->rq_repmsg),
- lustre_msg_get_limit(req->rq_repmsg));
- return 0;
- }
-
- new_limit = lustre_msg_get_limit(req->rq_repmsg);
- new_slv = lustre_msg_get_slv(req->rq_repmsg);
- obd = req->rq_import->imp_obd;
-
- /* Set new SLV and limit in OBD fields to make them accessible
- * to the pool thread. We do not access obd_namespace and pool
- * directly here as there is no reliable way to make sure that
- * they are still alive at cleanup time. Evil races are possible
- * which may cause Oops at that time.
- */
- write_lock(&obd->obd_pool_lock);
- obd->obd_pool_slv = new_slv;
- obd->obd_pool_limit = new_limit;
- write_unlock(&obd->obd_pool_lock);
-
- return 0;
-}
-
-/**
- * Client side lock cancel.
- *
- * Lock must not have any readers or writers by this time.
- */
-int ldlm_cli_cancel(const struct lustre_handle *lockh,
- enum ldlm_cancel_flags cancel_flags)
-{
- struct obd_export *exp;
- int avail, flags, count = 1;
- __u64 rc = 0;
- struct ldlm_namespace *ns;
- struct ldlm_lock *lock;
- LIST_HEAD(cancels);
-
- lock = ldlm_handle2lock_long(lockh, 0);
- if (!lock) {
- LDLM_DEBUG_NOLOCK("lock is already being destroyed");
- return 0;
- }
-
- lock_res_and_lock(lock);
- /* Lock is being canceled and the caller doesn't want to wait */
- if (ldlm_is_canceling(lock) && (cancel_flags & LCF_ASYNC)) {
- unlock_res_and_lock(lock);
- LDLM_LOCK_RELEASE(lock);
- return 0;
- }
-
- ldlm_set_canceling(lock);
- unlock_res_and_lock(lock);
-
- rc = ldlm_cli_cancel_local(lock);
- if (rc == LDLM_FL_LOCAL_ONLY || cancel_flags & LCF_LOCAL) {
- LDLM_LOCK_RELEASE(lock);
- return 0;
- }
- /* Even if the lock is marked as LDLM_FL_BL_AST, this is a LDLM_CANCEL
- * RPC which goes to canceld portal, so we can cancel other LRU locks
- * here and send them all as one LDLM_CANCEL RPC.
- */
- LASSERT(list_empty(&lock->l_bl_ast));
- list_add(&lock->l_bl_ast, &cancels);
-
- exp = lock->l_conn_export;
- if (exp_connect_cancelset(exp)) {
- avail = ldlm_format_handles_avail(class_exp2cliimp(exp),
- &RQF_LDLM_CANCEL,
- RCL_CLIENT, 0);
- LASSERT(avail > 0);
-
- ns = ldlm_lock_to_ns(lock);
- flags = ns_connect_lru_resize(ns) ?
- LDLM_LRU_FLAG_LRUR : LDLM_LRU_FLAG_AGED;
- count += ldlm_cancel_lru_local(ns, &cancels, 0, avail - 1,
- LCF_BL_AST, flags);
- }
- ldlm_cli_cancel_list(&cancels, count, NULL, cancel_flags);
- return 0;
-}
-EXPORT_SYMBOL(ldlm_cli_cancel);
-
-/**
- * Locally cancel up to \a count locks in list \a cancels.
- * Return the number of cancelled locks.
- */
-int ldlm_cli_cancel_list_local(struct list_head *cancels, int count,
- enum ldlm_cancel_flags flags)
-{
- LIST_HEAD(head);
- struct ldlm_lock *lock, *next;
- int left = 0, bl_ast = 0;
- __u64 rc;
-
- left = count;
- list_for_each_entry_safe(lock, next, cancels, l_bl_ast) {
- if (left-- == 0)
- break;
-
- if (flags & LCF_LOCAL) {
- rc = LDLM_FL_LOCAL_ONLY;
- ldlm_lock_cancel(lock);
- } else {
- rc = ldlm_cli_cancel_local(lock);
- }
- /* Until we have compound requests and can send LDLM_CANCEL
- * requests batched with generic RPCs, we need to send cancels
- * with the LDLM_FL_BL_AST flag in a separate RPC from
- * the one being generated now.
- */
- if (!(flags & LCF_BL_AST) && (rc == LDLM_FL_BL_AST)) {
- LDLM_DEBUG(lock, "Cancel lock separately");
- list_del_init(&lock->l_bl_ast);
- list_add(&lock->l_bl_ast, &head);
- bl_ast++;
- continue;
- }
- if (rc == LDLM_FL_LOCAL_ONLY) {
- /* CANCEL RPC should not be sent to server. */
- list_del_init(&lock->l_bl_ast);
- LDLM_LOCK_RELEASE(lock);
- count--;
- }
- }
- if (bl_ast > 0) {
- count -= bl_ast;
- ldlm_cli_cancel_list(&head, bl_ast, NULL, 0);
- }
-
- return count;
-}
-
-/**
- * Cancel as many locks as possible w/o sending any RPCs (e.g. to write back
- * dirty data, to close a file, ...) or waiting for any RPCs in-flight (e.g.
- * readahead requests, ...)
- */
-static enum ldlm_policy_res
-ldlm_cancel_no_wait_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock,
- int unused, int added, int count)
-{
- enum ldlm_policy_res result = LDLM_POLICY_CANCEL_LOCK;
-
- /* don't check added & count since we want to process all locks
- * from unused list.
- * It's fine to not take lock to access lock->l_resource since
- * the lock has already been granted so it won't change.
- */
- switch (lock->l_resource->lr_type) {
- case LDLM_EXTENT:
- case LDLM_IBITS:
- if (ns->ns_cancel && ns->ns_cancel(lock) != 0)
- break;
- /* fall through */
- default:
- result = LDLM_POLICY_SKIP_LOCK;
- lock_res_and_lock(lock);
- ldlm_set_skipped(lock);
- unlock_res_and_lock(lock);
- break;
- }
-
- return result;
-}
-
-/**
- * Callback function for LRU-resize policy. Decides whether to keep
- * \a lock in LRU for current \a LRU size \a unused, added in current
- * scan \a added and number of locks to be preferably canceled \a count.
- *
- * \retval LDLM_POLICY_KEEP_LOCK keep lock in LRU in stop scanning
- *
- * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
- */
-static enum ldlm_policy_res ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
- struct ldlm_lock *lock,
- int unused, int added,
- int count)
-{
- unsigned long cur = jiffies;
- struct ldlm_pool *pl = &ns->ns_pool;
- __u64 slv, lvf, lv;
- unsigned long la;
-
- /* Stop LRU processing when we reach past @count or have checked all
- * locks in LRU.
- */
- if (count && added >= count)
- return LDLM_POLICY_KEEP_LOCK;
-
- /*
- * Despite of the LV, It doesn't make sense to keep the lock which
- * is unused for ns_max_age time.
- */
- if (time_after(jiffies, lock->l_last_used + ns->ns_max_age))
- return LDLM_POLICY_CANCEL_LOCK;
-
- slv = ldlm_pool_get_slv(pl);
- lvf = ldlm_pool_get_lvf(pl);
- la = (cur - lock->l_last_used) / HZ;
- lv = lvf * la * unused;
-
- /* Inform pool about current CLV to see it via debugfs. */
- ldlm_pool_set_clv(pl, lv);
-
- /* Stop when SLV is not yet come from server or lv is smaller than
- * it is.
- */
- if (slv == 0 || lv < slv)
- return LDLM_POLICY_KEEP_LOCK;
-
- return LDLM_POLICY_CANCEL_LOCK;
-}
-
-/**
- * Callback function for debugfs used policy. Makes decision whether to keep
- * \a lock in LRU for current \a LRU size \a unused, added in current scan \a
- * added and number of locks to be preferably canceled \a count.
- *
- * \retval LDLM_POLICY_KEEP_LOCK keep lock in LRU in stop scanning
- *
- * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
- */
-static enum ldlm_policy_res ldlm_cancel_passed_policy(struct ldlm_namespace *ns,
- struct ldlm_lock *lock,
- int unused, int added,
- int count)
-{
- /* Stop LRU processing when we reach past @count or have checked all
- * locks in LRU.
- */
- return (added >= count) ?
- LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK;
-}
-
-/**
- * Callback function for aged policy. Makes decision whether to keep \a lock in
- * LRU for current LRU size \a unused, added in current scan \a added and
- * number of locks to be preferably canceled \a count.
- *
- * \retval LDLM_POLICY_KEEP_LOCK keep lock in LRU in stop scanning
- *
- * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
- */
-static enum ldlm_policy_res ldlm_cancel_aged_policy(struct ldlm_namespace *ns,
- struct ldlm_lock *lock,
- int unused, int added,
- int count)
-{
- if ((added >= count) &&
- time_before(jiffies, lock->l_last_used + ns->ns_max_age))
- return LDLM_POLICY_KEEP_LOCK;
-
- return LDLM_POLICY_CANCEL_LOCK;
-}
-
-static enum ldlm_policy_res
-ldlm_cancel_lrur_no_wait_policy(struct ldlm_namespace *ns,
- struct ldlm_lock *lock,
- int unused, int added,
- int count)
-{
- enum ldlm_policy_res result;
-
- result = ldlm_cancel_lrur_policy(ns, lock, unused, added, count);
- if (result == LDLM_POLICY_KEEP_LOCK)
- return result;
-
- return ldlm_cancel_no_wait_policy(ns, lock, unused, added, count);
-}
-
-/**
- * Callback function for default policy. Makes decision whether to keep \a lock
- * in LRU for current LRU size \a unused, added in current scan \a added and
- * number of locks to be preferably canceled \a count.
- *
- * \retval LDLM_POLICY_KEEP_LOCK keep lock in LRU in stop scanning
- *
- * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
- */
-static enum ldlm_policy_res
-ldlm_cancel_default_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock,
- int unused, int added, int count)
-{
- /* Stop LRU processing when we reach past count or have checked all
- * locks in LRU.
- */
- return (added >= count) ?
- LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK;
-}
-
-typedef enum ldlm_policy_res (*ldlm_cancel_lru_policy_t)(
- struct ldlm_namespace *,
- struct ldlm_lock *, int,
- int, int);
-
-static ldlm_cancel_lru_policy_t
-ldlm_cancel_lru_policy(struct ldlm_namespace *ns, int flags)
-{
- if (flags & LDLM_LRU_FLAG_NO_WAIT)
- return ldlm_cancel_no_wait_policy;
-
- if (ns_connect_lru_resize(ns)) {
- if (flags & LDLM_LRU_FLAG_SHRINK)
- /* We kill passed number of old locks. */
- return ldlm_cancel_passed_policy;
- else if (flags & LDLM_LRU_FLAG_LRUR)
- return ldlm_cancel_lrur_policy;
- else if (flags & LDLM_LRU_FLAG_PASSED)
- return ldlm_cancel_passed_policy;
- else if (flags & LDLM_LRU_FLAG_LRUR_NO_WAIT)
- return ldlm_cancel_lrur_no_wait_policy;
- } else {
- if (flags & LDLM_LRU_FLAG_AGED)
- return ldlm_cancel_aged_policy;
- }
-
- return ldlm_cancel_default_policy;
-}
-
-/**
- * - Free space in LRU for \a count new locks,
- * redundant unused locks are canceled locally;
- * - also cancel locally unused aged locks;
- * - do not cancel more than \a max locks;
- * - GET the found locks and add them into the \a cancels list.
- *
- * A client lock can be added to the l_bl_ast list only when it is
- * marked LDLM_FL_CANCELING. Otherwise, somebody is already doing
- * CANCEL. There are the following use cases:
- * ldlm_cancel_resource_local(), ldlm_cancel_lru_local() and
- * ldlm_cli_cancel(), which check and set this flag properly. As any
- * attempt to cancel a lock rely on this flag, l_bl_ast list is accessed
- * later without any special locking.
- *
- * Calling policies for enabled LRU resize:
- * ----------------------------------------
- * flags & LDLM_LRU_FLAG_LRUR - use LRU resize policy (SLV from server) to
- * cancel not more than \a count locks;
- *
- * flags & LDLM_LRU_FLAG_PASSED - cancel \a count number of old locks (located
- * at the beginning of LRU list);
- *
- * flags & LDLM_LRU_FLAG_SHRINK - cancel not more than \a count locks according
- * to memory pressure policy function;
- *
- * flags & LDLM_LRU_FLAG_AGED - cancel \a count locks according to
- * "aged policy".
- *
- * flags & LDLM_LRU_FLAG_NO_WAIT - cancel as many unused locks as possible
- * (typically before replaying locks) w/o
- * sending any RPCs or waiting for any
- * outstanding RPC to complete.
- */
-static int ldlm_prepare_lru_list(struct ldlm_namespace *ns,
- struct list_head *cancels, int count, int max,
- int flags)
-{
- ldlm_cancel_lru_policy_t pf;
- struct ldlm_lock *lock, *next;
- int added = 0, unused, remained;
- int no_wait = flags &
- (LDLM_LRU_FLAG_NO_WAIT | LDLM_LRU_FLAG_LRUR_NO_WAIT);
-
- spin_lock(&ns->ns_lock);
- unused = ns->ns_nr_unused;
- remained = unused;
-
- if (!ns_connect_lru_resize(ns))
- count += unused - ns->ns_max_unused;
-
- pf = ldlm_cancel_lru_policy(ns, flags);
- LASSERT(pf);
-
- while (!list_empty(&ns->ns_unused_list)) {
- enum ldlm_policy_res result;
- time_t last_use = 0;
-
- /* all unused locks */
- if (remained-- <= 0)
- break;
-
- /* For any flags, stop scanning if @max is reached. */
- if (max && added >= max)
- break;
-
- list_for_each_entry_safe(lock, next, &ns->ns_unused_list,
- l_lru) {
- /* No locks which got blocking requests. */
- LASSERT(!ldlm_is_bl_ast(lock));
-
- if (no_wait && ldlm_is_skipped(lock))
- /* already processed */
- continue;
-
- last_use = lock->l_last_used;
- if (last_use == jiffies)
- continue;
-
- /* Somebody is already doing CANCEL. No need for this
- * lock in LRU, do not traverse it again.
- */
- if (!ldlm_is_canceling(lock))
- break;
-
- ldlm_lock_remove_from_lru_nolock(lock);
- }
- if (&lock->l_lru == &ns->ns_unused_list)
- break;
-
- LDLM_LOCK_GET(lock);
- spin_unlock(&ns->ns_lock);
- lu_ref_add(&lock->l_reference, __func__, current);
-
- /* Pass the lock through the policy filter and see if it
- * should stay in LRU.
- *
- * Even for shrinker policy we stop scanning if
- * we find a lock that should stay in the cache.
- * We should take into account lock age anyway
- * as a new lock is a valuable resource even if
- * it has a low weight.
- *
- * That is, for shrinker policy we drop only
- * old locks, but additionally choose them by
- * their weight. Big extent locks will stay in
- * the cache.
- */
- result = pf(ns, lock, unused, added, count);
- if (result == LDLM_POLICY_KEEP_LOCK) {
- lu_ref_del(&lock->l_reference,
- __func__, current);
- LDLM_LOCK_RELEASE(lock);
- spin_lock(&ns->ns_lock);
- break;
- }
- if (result == LDLM_POLICY_SKIP_LOCK) {
- lu_ref_del(&lock->l_reference,
- __func__, current);
- LDLM_LOCK_RELEASE(lock);
- spin_lock(&ns->ns_lock);
- continue;
- }
-
- lock_res_and_lock(lock);
- /* Check flags again under the lock. */
- if (ldlm_is_canceling(lock) ||
- (ldlm_lock_remove_from_lru_check(lock, last_use) == 0)) {
- /* Another thread is removing lock from LRU, or
- * somebody is already doing CANCEL, or there
- * is a blocking request which will send cancel
- * by itself, or the lock is no longer unused or
- * the lock has been used since the pf() call and
- * pages could be put under it.
- */
- unlock_res_and_lock(lock);
- lu_ref_del(&lock->l_reference,
- __func__, current);
- LDLM_LOCK_RELEASE(lock);
- spin_lock(&ns->ns_lock);
- continue;
- }
- LASSERT(!lock->l_readers && !lock->l_writers);
-
- /* If we have chosen to cancel this lock voluntarily, we
- * better send cancel notification to server, so that it
- * frees appropriate state. This might lead to a race
- * where while we are doing cancel here, server is also
- * silently cancelling this lock.
- */
- ldlm_clear_cancel_on_block(lock);
-
- /* Setting the CBPENDING flag is a little misleading,
- * but prevents an important race; namely, once
- * CBPENDING is set, the lock can accumulate no more
- * readers/writers. Since readers and writers are
- * already zero here, ldlm_lock_decref() won't see
- * this flag and call l_blocking_ast
- */
- lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING;
-
- /* We can't re-add to l_lru as it confuses the
- * refcounting in ldlm_lock_remove_from_lru() if an AST
- * arrives after we drop lr_lock below. We use l_bl_ast
- * and can't use l_pending_chain as it is used both on
- * server and client nevertheless bug 5666 says it is
- * used only on server
- */
- LASSERT(list_empty(&lock->l_bl_ast));
- list_add(&lock->l_bl_ast, cancels);
- unlock_res_and_lock(lock);
- lu_ref_del(&lock->l_reference, __func__, current);
- spin_lock(&ns->ns_lock);
- added++;
- unused--;
- }
- spin_unlock(&ns->ns_lock);
- return added;
-}
-
-int ldlm_cancel_lru_local(struct ldlm_namespace *ns,
- struct list_head *cancels, int count, int max,
- enum ldlm_cancel_flags cancel_flags, int flags)
-{
- int added;
-
- added = ldlm_prepare_lru_list(ns, cancels, count, max, flags);
- if (added <= 0)
- return added;
- return ldlm_cli_cancel_list_local(cancels, added, cancel_flags);
-}
-
-/**
- * Cancel at least \a nr locks from given namespace LRU.
- *
- * When called with LCF_ASYNC the blocking callback will be handled
- * in a thread and this function will return after the thread has been
- * asked to call the callback. When called with LCF_ASYNC the blocking
- * callback will be performed in this function.
- */
-int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr,
- enum ldlm_cancel_flags cancel_flags,
- int flags)
-{
- LIST_HEAD(cancels);
- int count, rc;
-
- /* Just prepare the list of locks, do not actually cancel them yet.
- * Locks are cancelled later in a separate thread.
- */
- count = ldlm_prepare_lru_list(ns, &cancels, nr, 0, flags);
- rc = ldlm_bl_to_thread_list(ns, NULL, &cancels, count, cancel_flags);
- if (rc == 0)
- return count;
-
- return 0;
-}
-
-/**
- * Find and cancel locally unused locks found on resource, matched to the
- * given policy, mode. GET the found locks and add them into the \a cancels
- * list.
- */
-int ldlm_cancel_resource_local(struct ldlm_resource *res,
- struct list_head *cancels,
- union ldlm_policy_data *policy,
- enum ldlm_mode mode, __u64 lock_flags,
- enum ldlm_cancel_flags cancel_flags,
- void *opaque)
-{
- struct ldlm_lock *lock;
- int count = 0;
-
- lock_res(res);
- list_for_each_entry(lock, &res->lr_granted, l_res_link) {
- if (opaque && lock->l_ast_data != opaque) {
- LDLM_ERROR(lock, "data %p doesn't match opaque %p",
- lock->l_ast_data, opaque);
- continue;
- }
-
- if (lock->l_readers || lock->l_writers)
- continue;
-
- /* If somebody is already doing CANCEL, or blocking AST came,
- * skip this lock.
- */
- if (ldlm_is_bl_ast(lock) || ldlm_is_canceling(lock))
- continue;
-
- if (lockmode_compat(lock->l_granted_mode, mode))
- continue;
-
- /* If policy is given and this is IBITS lock, add to list only
- * those locks that match by policy.
- */
- if (policy && (lock->l_resource->lr_type == LDLM_IBITS) &&
- !(lock->l_policy_data.l_inodebits.bits &
- policy->l_inodebits.bits))
- continue;
-
- /* See CBPENDING comment in ldlm_cancel_lru */
- lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING |
- lock_flags;
-
- LASSERT(list_empty(&lock->l_bl_ast));
- list_add(&lock->l_bl_ast, cancels);
- LDLM_LOCK_GET(lock);
- count++;
- }
- unlock_res(res);
-
- return ldlm_cli_cancel_list_local(cancels, count, cancel_flags);
-}
-EXPORT_SYMBOL(ldlm_cancel_resource_local);
-
-/**
- * Cancel client-side locks from a list and send/prepare cancel RPCs to the
- * server.
- * If \a req is NULL, send CANCEL request to server with handles of locks
- * in the \a cancels. If EARLY_CANCEL is not supported, send CANCEL requests
- * separately per lock.
- * If \a req is not NULL, put handles of locks in \a cancels into the request
- * buffer at the offset \a off.
- * Destroy \a cancels at the end.
- */
-int ldlm_cli_cancel_list(struct list_head *cancels, int count,
- struct ptlrpc_request *req,
- enum ldlm_cancel_flags flags)
-{
- struct ldlm_lock *lock;
- int res = 0;
-
- if (list_empty(cancels) || count == 0)
- return 0;
-
- /* XXX: requests (both batched and not) could be sent in parallel.
- * Usually it is enough to have just 1 RPC, but it is possible that
- * there are too many locks to be cancelled in LRU or on a resource.
- * It would also speed up the case when the server does not support
- * the feature.
- */
- while (count > 0) {
- LASSERT(!list_empty(cancels));
- lock = list_first_entry(cancels, struct ldlm_lock, l_bl_ast);
- LASSERT(lock->l_conn_export);
-
- if (exp_connect_cancelset(lock->l_conn_export)) {
- res = count;
- if (req)
- ldlm_cancel_pack(req, cancels, count);
- else
- res = ldlm_cli_cancel_req(lock->l_conn_export,
- cancels, count,
- flags);
- } else {
- res = ldlm_cli_cancel_req(lock->l_conn_export,
- cancels, 1, flags);
- }
-
- if (res < 0) {
- CDEBUG_LIMIT(res == -ESHUTDOWN ? D_DLMTRACE : D_ERROR,
- "%s: %d\n", __func__, res);
- res = count;
- }
-
- count -= res;
- ldlm_lock_list_put(cancels, l_bl_ast, res);
- }
- LASSERT(count == 0);
- return 0;
-}
-EXPORT_SYMBOL(ldlm_cli_cancel_list);
-
-/**
- * Cancel all locks on a resource that have 0 readers/writers.
- *
- * If flags & LDLM_FL_LOCAL_ONLY, throw the locks away without trying
- * to notify the server.
- */
-int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
- const struct ldlm_res_id *res_id,
- union ldlm_policy_data *policy,
- enum ldlm_mode mode,
- enum ldlm_cancel_flags flags,
- void *opaque)
-{
- struct ldlm_resource *res;
- LIST_HEAD(cancels);
- int count;
- int rc;
-
- res = ldlm_resource_get(ns, NULL, res_id, 0, 0);
- if (IS_ERR(res)) {
- /* This is not a problem. */
- CDEBUG(D_INFO, "No resource %llu\n", res_id->name[0]);
- return 0;
- }
-
- LDLM_RESOURCE_ADDREF(res);
- count = ldlm_cancel_resource_local(res, &cancels, policy, mode,
- 0, flags | LCF_BL_AST, opaque);
- rc = ldlm_cli_cancel_list(&cancels, count, NULL, flags);
- if (rc != ELDLM_OK)
- CERROR("canceling unused lock " DLDLMRES ": rc = %d\n",
- PLDLMRES(res), rc);
-
- LDLM_RESOURCE_DELREF(res);
- ldlm_resource_putref(res);
- return 0;
-}
-EXPORT_SYMBOL(ldlm_cli_cancel_unused_resource);
-
-struct ldlm_cli_cancel_arg {
- int lc_flags;
- void *lc_opaque;
-};
-
-static int ldlm_cli_hash_cancel_unused(struct cfs_hash *hs,
- struct cfs_hash_bd *bd,
- struct hlist_node *hnode, void *arg)
-{
- struct ldlm_resource *res = cfs_hash_object(hs, hnode);
- struct ldlm_cli_cancel_arg *lc = arg;
-
- ldlm_cli_cancel_unused_resource(ldlm_res_to_ns(res), &res->lr_name,
- NULL, LCK_MINMODE,
- lc->lc_flags, lc->lc_opaque);
- /* must return 0 for hash iteration */
- return 0;
-}
-
-/**
- * Cancel all locks on a namespace (or a specific resource, if given)
- * that have 0 readers/writers.
- *
- * If flags & LCF_LOCAL, throw the locks away without trying
- * to notify the server.
- */
-int ldlm_cli_cancel_unused(struct ldlm_namespace *ns,
- const struct ldlm_res_id *res_id,
- enum ldlm_cancel_flags flags, void *opaque)
-{
- struct ldlm_cli_cancel_arg arg = {
- .lc_flags = flags,
- .lc_opaque = opaque,
- };
-
- if (!ns)
- return ELDLM_OK;
-
- if (res_id) {
- return ldlm_cli_cancel_unused_resource(ns, res_id, NULL,
- LCK_MINMODE, flags,
- opaque);
- } else {
- cfs_hash_for_each_nolock(ns->ns_rs_hash,
- ldlm_cli_hash_cancel_unused, &arg, 0);
- return ELDLM_OK;
- }
-}
-EXPORT_SYMBOL(ldlm_cli_cancel_unused);
-
-/* Lock iterators. */
-
-static int ldlm_resource_foreach(struct ldlm_resource *res,
- ldlm_iterator_t iter, void *closure)
-{
- struct ldlm_lock *tmp;
- struct ldlm_lock *lock;
- int rc = LDLM_ITER_CONTINUE;
-
- if (!res)
- return LDLM_ITER_CONTINUE;
-
- lock_res(res);
- list_for_each_entry_safe(lock, tmp, &res->lr_granted, l_res_link) {
- if (iter(lock, closure) == LDLM_ITER_STOP) {
- rc = LDLM_ITER_STOP;
- goto out;
- }
- }
-
- list_for_each_entry_safe(lock, tmp, &res->lr_waiting, l_res_link) {
- if (iter(lock, closure) == LDLM_ITER_STOP) {
- rc = LDLM_ITER_STOP;
- goto out;
- }
- }
- out:
- unlock_res(res);
- return rc;
-}
-
-struct iter_helper_data {
- ldlm_iterator_t iter;
- void *closure;
-};
-
-static int ldlm_iter_helper(struct ldlm_lock *lock, void *closure)
-{
- struct iter_helper_data *helper = closure;
-
- return helper->iter(lock, helper->closure);
-}
-
-static int ldlm_res_iter_helper(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode, void *arg)
-
-{
- struct ldlm_resource *res = cfs_hash_object(hs, hnode);
-
- return ldlm_resource_foreach(res, ldlm_iter_helper, arg) ==
- LDLM_ITER_STOP;
-}
-
-static void ldlm_namespace_foreach(struct ldlm_namespace *ns,
- ldlm_iterator_t iter, void *closure)
-
-{
- struct iter_helper_data helper = {
- .iter = iter,
- .closure = closure,
- };
-
- cfs_hash_for_each_nolock(ns->ns_rs_hash,
- ldlm_res_iter_helper, &helper, 0);
-}
-
-/* non-blocking function to manipulate a lock whose cb_data is being put away.
- * return 0: find no resource
- * > 0: must be LDLM_ITER_STOP/LDLM_ITER_CONTINUE.
- * < 0: errors
- */
-int ldlm_resource_iterate(struct ldlm_namespace *ns,
- const struct ldlm_res_id *res_id,
- ldlm_iterator_t iter, void *data)
-{
- struct ldlm_resource *res;
- int rc;
-
- LASSERTF(ns, "must pass in namespace\n");
-
- res = ldlm_resource_get(ns, NULL, res_id, 0, 0);
- if (IS_ERR(res))
- return 0;
-
- LDLM_RESOURCE_ADDREF(res);
- rc = ldlm_resource_foreach(res, iter, data);
- LDLM_RESOURCE_DELREF(res);
- ldlm_resource_putref(res);
- return rc;
-}
-EXPORT_SYMBOL(ldlm_resource_iterate);
-
-/* Lock replay */
-
-static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure)
-{
- struct list_head *list = closure;
-
- /* we use l_pending_chain here, because it's unused on clients. */
- LASSERTF(list_empty(&lock->l_pending_chain),
- "lock %p next %p prev %p\n",
- lock, &lock->l_pending_chain.next,
- &lock->l_pending_chain.prev);
- /* bug 9573: don't replay locks left after eviction, or
- * bug 17614: locks being actively cancelled. Get a reference
- * on a lock so that it does not disappear under us (e.g. due to cancel)
- */
- if (!(lock->l_flags & (LDLM_FL_FAILED | LDLM_FL_BL_DONE))) {
- list_add(&lock->l_pending_chain, list);
- LDLM_LOCK_GET(lock);
- }
-
- return LDLM_ITER_CONTINUE;
-}
-
-static int replay_lock_interpret(const struct lu_env *env,
- struct ptlrpc_request *req,
- struct ldlm_async_args *aa, int rc)
-{
- struct ldlm_lock *lock;
- struct ldlm_reply *reply;
- struct obd_export *exp;
-
- atomic_dec(&req->rq_import->imp_replay_inflight);
- if (rc != ELDLM_OK)
- goto out;
-
- reply = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
- if (!reply) {
- rc = -EPROTO;
- goto out;
- }
-
- lock = ldlm_handle2lock(&aa->lock_handle);
- if (!lock) {
- CERROR("received replay ack for unknown local cookie %#llx remote cookie %#llx from server %s id %s\n",
- aa->lock_handle.cookie, reply->lock_handle.cookie,
- req->rq_export->exp_client_uuid.uuid,
- libcfs_id2str(req->rq_peer));
- rc = -ESTALE;
- goto out;
- }
-
- /* Key change rehash lock in per-export hash with new key */
- exp = req->rq_export;
- lock->l_remote_handle = reply->lock_handle;
-
- LDLM_DEBUG(lock, "replayed lock:");
- ptlrpc_import_recovery_state_machine(req->rq_import);
- LDLM_LOCK_PUT(lock);
-out:
- if (rc != ELDLM_OK)
- ptlrpc_connect_import(req->rq_import);
-
- return rc;
-}
-
-static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
-{
- struct ptlrpc_request *req;
- struct ldlm_async_args *aa;
- struct ldlm_request *body;
- int flags;
-
- /* Bug 11974: Do not replay a lock which is actively being canceled */
- if (ldlm_is_bl_done(lock)) {
- LDLM_DEBUG(lock, "Not replaying canceled lock:");
- return 0;
- }
-
- /* If this is reply-less callback lock, we cannot replay it, since
- * server might have long dropped it, but notification of that event was
- * lost by network. (and server granted conflicting lock already)
- */
- if (ldlm_is_cancel_on_block(lock)) {
- LDLM_DEBUG(lock, "Not replaying reply-less lock:");
- ldlm_lock_cancel(lock);
- return 0;
- }
-
- /*
- * If granted mode matches the requested mode, this lock is granted.
- *
- * If they differ, but we have a granted mode, then we were granted
- * one mode and now want another: ergo, converting.
- *
- * If we haven't been granted anything and are on a resource list,
- * then we're blocked/waiting.
- *
- * If we haven't been granted anything and we're NOT on a resource list,
- * then we haven't got a reply yet and don't have a known disposition.
- * This happens whenever a lock enqueue is the request that triggers
- * recovery.
- */
- if (lock->l_granted_mode == lock->l_req_mode)
- flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_GRANTED;
- else if (lock->l_granted_mode)
- flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_CONV;
- else if (!list_empty(&lock->l_res_link))
- flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_WAIT;
- else
- flags = LDLM_FL_REPLAY;
-
- req = ptlrpc_request_alloc_pack(imp, &RQF_LDLM_ENQUEUE,
- LUSTRE_DLM_VERSION, LDLM_ENQUEUE);
- if (!req)
- return -ENOMEM;
-
- /* We're part of recovery, so don't wait for it. */
- req->rq_send_state = LUSTRE_IMP_REPLAY_LOCKS;
-
- body = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
- ldlm_lock2desc(lock, &body->lock_desc);
- body->lock_flags = ldlm_flags_to_wire(flags);
-
- ldlm_lock2handle(lock, &body->lock_handle[0]);
- if (lock->l_lvb_len > 0)
- req_capsule_extend(&req->rq_pill, &RQF_LDLM_ENQUEUE_LVB);
- req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER,
- lock->l_lvb_len);
- ptlrpc_request_set_replen(req);
- /* notify the server we've replayed all requests.
- * also, we mark the request to be put on a dedicated
- * queue to be processed after all request replayes.
- * bug 6063
- */
- lustre_msg_set_flags(req->rq_reqmsg, MSG_REQ_REPLAY_DONE);
-
- LDLM_DEBUG(lock, "replaying lock:");
-
- atomic_inc(&req->rq_import->imp_replay_inflight);
- BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args));
- aa = ptlrpc_req_async_args(req);
- aa->lock_handle = body->lock_handle[0];
- req->rq_interpret_reply = (ptlrpc_interpterer_t)replay_lock_interpret;
- ptlrpcd_add_req(req);
-
- return 0;
-}
-
-/**
- * Cancel as many unused locks as possible before replay. since we are
- * in recovery, we can't wait for any outstanding RPCs to send any RPC
- * to the server.
- *
- * Called only in recovery before replaying locks. there is no need to
- * replay locks that are unused. since the clients may hold thousands of
- * cached unused locks, dropping the unused locks can greatly reduce the
- * load on the servers at recovery time.
- */
-static void ldlm_cancel_unused_locks_for_replay(struct ldlm_namespace *ns)
-{
- int canceled;
- LIST_HEAD(cancels);
-
- CDEBUG(D_DLMTRACE,
- "Dropping as many unused locks as possible before replay for namespace %s (%d)\n",
- ldlm_ns_name(ns), ns->ns_nr_unused);
-
- /* We don't need to care whether or not LRU resize is enabled
- * because the LDLM_LRU_FLAG_NO_WAIT policy doesn't use the
- * count parameter
- */
- canceled = ldlm_cancel_lru_local(ns, &cancels, ns->ns_nr_unused, 0,
- LCF_LOCAL, LDLM_LRU_FLAG_NO_WAIT);
-
- CDEBUG(D_DLMTRACE, "Canceled %d unused locks from namespace %s\n",
- canceled, ldlm_ns_name(ns));
-}
-
-int ldlm_replay_locks(struct obd_import *imp)
-{
- struct ldlm_namespace *ns = imp->imp_obd->obd_namespace;
- LIST_HEAD(list);
- struct ldlm_lock *lock, *next;
- int rc = 0;
-
- LASSERT(atomic_read(&imp->imp_replay_inflight) == 0);
-
- /* don't replay locks if import failed recovery */
- if (imp->imp_vbr_failed)
- return 0;
-
- /* ensure this doesn't fall to 0 before all have been queued */
- atomic_inc(&imp->imp_replay_inflight);
-
- if (ldlm_cancel_unused_locks_before_replay)
- ldlm_cancel_unused_locks_for_replay(ns);
-
- ldlm_namespace_foreach(ns, ldlm_chain_lock_for_replay, &list);
-
- list_for_each_entry_safe(lock, next, &list, l_pending_chain) {
- list_del_init(&lock->l_pending_chain);
- if (rc) {
- LDLM_LOCK_RELEASE(lock);
- continue; /* or try to do the rest? */
- }
- rc = replay_one_lock(imp, lock);
- LDLM_LOCK_RELEASE(lock);
- }
-
- atomic_dec(&imp->imp_replay_inflight);
-
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
deleted file mode 100644
index c93b019b8e37..000000000000
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
+++ /dev/null
@@ -1,1318 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2010, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/ldlm/ldlm_resource.c
- *
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Peter Braam <braam@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LDLM
-#include <lustre_dlm.h>
-#include <lustre_fid.h>
-#include <obd_class.h>
-#include "ldlm_internal.h"
-#include <linux/libcfs/libcfs_hash.h>
-
-struct kmem_cache *ldlm_resource_slab, *ldlm_lock_slab;
-
-int ldlm_srv_namespace_nr;
-int ldlm_cli_namespace_nr;
-
-struct mutex ldlm_srv_namespace_lock;
-LIST_HEAD(ldlm_srv_namespace_list);
-
-struct mutex ldlm_cli_namespace_lock;
-/* Client Namespaces that have active resources in them.
- * Once all resources go away, ldlm_poold moves such namespaces to the
- * inactive list
- */
-LIST_HEAD(ldlm_cli_active_namespace_list);
-/* Client namespaces that don't have any locks in them */
-static LIST_HEAD(ldlm_cli_inactive_namespace_list);
-
-static struct dentry *ldlm_debugfs_dir;
-static struct dentry *ldlm_ns_debugfs_dir;
-struct dentry *ldlm_svc_debugfs_dir;
-
-/* during debug dump certain amount of granted locks for one resource to avoid
- * DDOS.
- */
-static unsigned int ldlm_dump_granted_max = 256;
-
-static ssize_t
-lprocfs_wr_dump_ns(struct file *file, const char __user *buffer,
- size_t count, loff_t *off)
-{
- ldlm_dump_all_namespaces(LDLM_NAMESPACE_SERVER, D_DLMTRACE);
- ldlm_dump_all_namespaces(LDLM_NAMESPACE_CLIENT, D_DLMTRACE);
- return count;
-}
-
-LPROC_SEQ_FOPS_WR_ONLY(ldlm, dump_ns);
-
-static int ldlm_rw_uint_seq_show(struct seq_file *m, void *v)
-{
- seq_printf(m, "%u\n", *(unsigned int *)m->private);
- return 0;
-}
-
-static ssize_t
-ldlm_rw_uint_seq_write(struct file *file, const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct seq_file *seq = file->private_data;
-
- if (count == 0)
- return 0;
- return kstrtouint_from_user(buffer, count, 0,
- (unsigned int *)seq->private);
-}
-
-LPROC_SEQ_FOPS(ldlm_rw_uint);
-
-static struct lprocfs_vars ldlm_debugfs_list[] = {
- { "dump_namespaces", &ldlm_dump_ns_fops, NULL, 0222 },
- { "dump_granted_max", &ldlm_rw_uint_fops, &ldlm_dump_granted_max },
- { NULL }
-};
-
-void ldlm_debugfs_setup(void)
-{
- ldlm_debugfs_dir = debugfs_create_dir(OBD_LDLM_DEVICENAME,
- debugfs_lustre_root);
-
- ldlm_ns_debugfs_dir = debugfs_create_dir("namespaces",
- ldlm_debugfs_dir);
-
- ldlm_svc_debugfs_dir = debugfs_create_dir("services", ldlm_debugfs_dir);
-
- ldebugfs_add_vars(ldlm_debugfs_dir, ldlm_debugfs_list, NULL);
-}
-
-void ldlm_debugfs_cleanup(void)
-{
- debugfs_remove_recursive(ldlm_svc_debugfs_dir);
- debugfs_remove_recursive(ldlm_ns_debugfs_dir);
- debugfs_remove_recursive(ldlm_debugfs_dir);
-}
-
-static ssize_t resource_count_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace,
- ns_kobj);
- __u64 res = 0;
- struct cfs_hash_bd bd;
- int i;
-
- /* result is not strictly consistent */
- cfs_hash_for_each_bucket(ns->ns_rs_hash, &bd, i)
- res += cfs_hash_bd_count_get(&bd);
- return sprintf(buf, "%lld\n", res);
-}
-LUSTRE_RO_ATTR(resource_count);
-
-static ssize_t lock_count_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace,
- ns_kobj);
- __u64 locks;
-
- locks = lprocfs_stats_collector(ns->ns_stats, LDLM_NSS_LOCKS,
- LPROCFS_FIELDS_FLAGS_SUM);
- return sprintf(buf, "%lld\n", locks);
-}
-LUSTRE_RO_ATTR(lock_count);
-
-static ssize_t lock_unused_count_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace,
- ns_kobj);
-
- return sprintf(buf, "%d\n", ns->ns_nr_unused);
-}
-LUSTRE_RO_ATTR(lock_unused_count);
-
-static ssize_t lru_size_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace,
- ns_kobj);
- __u32 *nr = &ns->ns_max_unused;
-
- if (ns_connect_lru_resize(ns))
- nr = &ns->ns_nr_unused;
- return sprintf(buf, "%u\n", *nr);
-}
-
-static ssize_t lru_size_store(struct kobject *kobj, struct attribute *attr,
- const char *buffer, size_t count)
-{
- struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace,
- ns_kobj);
- unsigned long tmp;
- int lru_resize;
- int err;
-
- if (strncmp(buffer, "clear", 5) == 0) {
- CDEBUG(D_DLMTRACE,
- "dropping all unused locks from namespace %s\n",
- ldlm_ns_name(ns));
- if (ns_connect_lru_resize(ns)) {
- int canceled, unused = ns->ns_nr_unused;
-
- /* Try to cancel all @ns_nr_unused locks. */
- canceled = ldlm_cancel_lru(ns, unused, 0,
- LDLM_LRU_FLAG_PASSED);
- if (canceled < unused) {
- CDEBUG(D_DLMTRACE,
- "not all requested locks are canceled, requested: %d, canceled: %d\n",
- unused,
- canceled);
- return -EINVAL;
- }
- } else {
- tmp = ns->ns_max_unused;
- ns->ns_max_unused = 0;
- ldlm_cancel_lru(ns, 0, 0, LDLM_LRU_FLAG_PASSED);
- ns->ns_max_unused = tmp;
- }
- return count;
- }
-
- err = kstrtoul(buffer, 10, &tmp);
- if (err != 0) {
- CERROR("lru_size: invalid value written\n");
- return -EINVAL;
- }
- lru_resize = (tmp == 0);
-
- if (ns_connect_lru_resize(ns)) {
- if (!lru_resize)
- ns->ns_max_unused = (unsigned int)tmp;
-
- if (tmp > ns->ns_nr_unused)
- tmp = ns->ns_nr_unused;
- tmp = ns->ns_nr_unused - tmp;
-
- CDEBUG(D_DLMTRACE,
- "changing namespace %s unused locks from %u to %u\n",
- ldlm_ns_name(ns), ns->ns_nr_unused,
- (unsigned int)tmp);
- ldlm_cancel_lru(ns, tmp, LCF_ASYNC, LDLM_LRU_FLAG_PASSED);
-
- if (!lru_resize) {
- CDEBUG(D_DLMTRACE,
- "disable lru_resize for namespace %s\n",
- ldlm_ns_name(ns));
- ns->ns_connect_flags &= ~OBD_CONNECT_LRU_RESIZE;
- }
- } else {
- CDEBUG(D_DLMTRACE,
- "changing namespace %s max_unused from %u to %u\n",
- ldlm_ns_name(ns), ns->ns_max_unused,
- (unsigned int)tmp);
- ns->ns_max_unused = (unsigned int)tmp;
- ldlm_cancel_lru(ns, 0, LCF_ASYNC, LDLM_LRU_FLAG_PASSED);
-
- /* Make sure that LRU resize was originally supported before
- * turning it on here.
- */
- if (lru_resize &&
- (ns->ns_orig_connect_flags & OBD_CONNECT_LRU_RESIZE)) {
- CDEBUG(D_DLMTRACE,
- "enable lru_resize for namespace %s\n",
- ldlm_ns_name(ns));
- ns->ns_connect_flags |= OBD_CONNECT_LRU_RESIZE;
- }
- }
-
- return count;
-}
-LUSTRE_RW_ATTR(lru_size);
-
-static ssize_t lru_max_age_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace,
- ns_kobj);
-
- return sprintf(buf, "%u\n", ns->ns_max_age);
-}
-
-static ssize_t lru_max_age_store(struct kobject *kobj, struct attribute *attr,
- const char *buffer, size_t count)
-{
- struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace,
- ns_kobj);
- unsigned long tmp;
- int err;
-
- err = kstrtoul(buffer, 10, &tmp);
- if (err != 0)
- return -EINVAL;
-
- ns->ns_max_age = tmp;
-
- return count;
-}
-LUSTRE_RW_ATTR(lru_max_age);
-
-static ssize_t early_lock_cancel_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace,
- ns_kobj);
-
- return sprintf(buf, "%d\n", ns_connect_cancelset(ns));
-}
-
-static ssize_t early_lock_cancel_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace,
- ns_kobj);
- unsigned long supp = -1;
- int rc;
-
- rc = kstrtoul(buffer, 10, &supp);
- if (rc < 0)
- return rc;
-
- if (supp == 0)
- ns->ns_connect_flags &= ~OBD_CONNECT_CANCELSET;
- else if (ns->ns_orig_connect_flags & OBD_CONNECT_CANCELSET)
- ns->ns_connect_flags |= OBD_CONNECT_CANCELSET;
- return count;
-}
-LUSTRE_RW_ATTR(early_lock_cancel);
-
-/* These are for namespaces in /sys/fs/lustre/ldlm/namespaces/ */
-static struct attribute *ldlm_ns_attrs[] = {
- &lustre_attr_resource_count.attr,
- &lustre_attr_lock_count.attr,
- &lustre_attr_lock_unused_count.attr,
- &lustre_attr_lru_size.attr,
- &lustre_attr_lru_max_age.attr,
- &lustre_attr_early_lock_cancel.attr,
- NULL,
-};
-
-static void ldlm_ns_release(struct kobject *kobj)
-{
- struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace,
- ns_kobj);
- complete(&ns->ns_kobj_unregister);
-}
-
-static struct kobj_type ldlm_ns_ktype = {
- .default_attrs = ldlm_ns_attrs,
- .sysfs_ops = &lustre_sysfs_ops,
- .release = ldlm_ns_release,
-};
-
-static void ldlm_namespace_debugfs_unregister(struct ldlm_namespace *ns)
-{
- debugfs_remove_recursive(ns->ns_debugfs_entry);
-
- if (ns->ns_stats)
- lprocfs_free_stats(&ns->ns_stats);
-}
-
-static void ldlm_namespace_sysfs_unregister(struct ldlm_namespace *ns)
-{
- kobject_put(&ns->ns_kobj);
- wait_for_completion(&ns->ns_kobj_unregister);
-}
-
-static int ldlm_namespace_sysfs_register(struct ldlm_namespace *ns)
-{
- int err;
-
- ns->ns_kobj.kset = ldlm_ns_kset;
- init_completion(&ns->ns_kobj_unregister);
- err = kobject_init_and_add(&ns->ns_kobj, &ldlm_ns_ktype, NULL,
- "%s", ldlm_ns_name(ns));
-
- ns->ns_stats = lprocfs_alloc_stats(LDLM_NSS_LAST, 0);
- if (!ns->ns_stats) {
- kobject_put(&ns->ns_kobj);
- return -ENOMEM;
- }
-
- lprocfs_counter_init(ns->ns_stats, LDLM_NSS_LOCKS,
- LPROCFS_CNTR_AVGMINMAX, "locks", "locks");
-
- return err;
-}
-
-static int ldlm_namespace_debugfs_register(struct ldlm_namespace *ns)
-{
- struct dentry *ns_entry;
-
- if (!IS_ERR_OR_NULL(ns->ns_debugfs_entry)) {
- ns_entry = ns->ns_debugfs_entry;
- } else {
- ns_entry = debugfs_create_dir(ldlm_ns_name(ns),
- ldlm_ns_debugfs_dir);
- if (!ns_entry)
- return -ENOMEM;
- ns->ns_debugfs_entry = ns_entry;
- }
-
- return 0;
-}
-
-#undef MAX_STRING_SIZE
-
-static struct ldlm_resource *ldlm_resource_getref(struct ldlm_resource *res)
-{
- LASSERT(res);
- LASSERT(res != LP_POISON);
- atomic_inc(&res->lr_refcount);
- CDEBUG(D_INFO, "getref res: %p count: %d\n", res,
- atomic_read(&res->lr_refcount));
- return res;
-}
-
-static unsigned int ldlm_res_hop_hash(struct cfs_hash *hs,
- const void *key, unsigned int mask)
-{
- const struct ldlm_res_id *id = key;
- unsigned int val = 0;
- unsigned int i;
-
- for (i = 0; i < RES_NAME_SIZE; i++)
- val += id->name[i];
- return val & mask;
-}
-
-static unsigned int ldlm_res_hop_fid_hash(struct cfs_hash *hs,
- const void *key, unsigned int mask)
-{
- const struct ldlm_res_id *id = key;
- struct lu_fid fid;
- __u32 hash;
- __u32 val;
-
- fid.f_seq = id->name[LUSTRE_RES_ID_SEQ_OFF];
- fid.f_oid = (__u32)id->name[LUSTRE_RES_ID_VER_OID_OFF];
- fid.f_ver = (__u32)(id->name[LUSTRE_RES_ID_VER_OID_OFF] >> 32);
-
- hash = fid_flatten32(&fid);
- hash += (hash >> 4) + (hash << 12); /* mixing oid and seq */
- if (id->name[LUSTRE_RES_ID_HSH_OFF] != 0) {
- val = id->name[LUSTRE_RES_ID_HSH_OFF];
- hash += (val >> 5) + (val << 11);
- } else {
- val = fid_oid(&fid);
- }
- hash = hash_long(hash, hs->hs_bkt_bits);
- /* give me another random factor */
- hash -= hash_long((unsigned long)hs, val % 11 + 3);
-
- hash <<= hs->hs_cur_bits - hs->hs_bkt_bits;
- hash |= ldlm_res_hop_hash(hs, key, CFS_HASH_NBKT(hs) - 1);
-
- return hash & mask;
-}
-
-static void *ldlm_res_hop_key(struct hlist_node *hnode)
-{
- struct ldlm_resource *res;
-
- res = hlist_entry(hnode, struct ldlm_resource, lr_hash);
- return &res->lr_name;
-}
-
-static int ldlm_res_hop_keycmp(const void *key, struct hlist_node *hnode)
-{
- struct ldlm_resource *res;
-
- res = hlist_entry(hnode, struct ldlm_resource, lr_hash);
- return ldlm_res_eq((const struct ldlm_res_id *)key,
- (const struct ldlm_res_id *)&res->lr_name);
-}
-
-static void *ldlm_res_hop_object(struct hlist_node *hnode)
-{
- return hlist_entry(hnode, struct ldlm_resource, lr_hash);
-}
-
-static void ldlm_res_hop_get_locked(struct cfs_hash *hs,
- struct hlist_node *hnode)
-{
- struct ldlm_resource *res;
-
- res = hlist_entry(hnode, struct ldlm_resource, lr_hash);
- ldlm_resource_getref(res);
-}
-
-static void ldlm_res_hop_put(struct cfs_hash *hs, struct hlist_node *hnode)
-{
- struct ldlm_resource *res;
-
- res = hlist_entry(hnode, struct ldlm_resource, lr_hash);
- ldlm_resource_putref(res);
-}
-
-static struct cfs_hash_ops ldlm_ns_hash_ops = {
- .hs_hash = ldlm_res_hop_hash,
- .hs_key = ldlm_res_hop_key,
- .hs_keycmp = ldlm_res_hop_keycmp,
- .hs_keycpy = NULL,
- .hs_object = ldlm_res_hop_object,
- .hs_get = ldlm_res_hop_get_locked,
- .hs_put = ldlm_res_hop_put
-};
-
-static struct cfs_hash_ops ldlm_ns_fid_hash_ops = {
- .hs_hash = ldlm_res_hop_fid_hash,
- .hs_key = ldlm_res_hop_key,
- .hs_keycmp = ldlm_res_hop_keycmp,
- .hs_keycpy = NULL,
- .hs_object = ldlm_res_hop_object,
- .hs_get = ldlm_res_hop_get_locked,
- .hs_put = ldlm_res_hop_put
-};
-
-struct ldlm_ns_hash_def {
- enum ldlm_ns_type nsd_type;
- /** hash bucket bits */
- unsigned int nsd_bkt_bits;
- /** hash bits */
- unsigned int nsd_all_bits;
- /** hash operations */
- struct cfs_hash_ops *nsd_hops;
-};
-
-static struct ldlm_ns_hash_def ldlm_ns_hash_defs[] = {
- {
- .nsd_type = LDLM_NS_TYPE_MDC,
- .nsd_bkt_bits = 11,
- .nsd_all_bits = 16,
- .nsd_hops = &ldlm_ns_fid_hash_ops,
- },
- {
- .nsd_type = LDLM_NS_TYPE_MDT,
- .nsd_bkt_bits = 14,
- .nsd_all_bits = 21,
- .nsd_hops = &ldlm_ns_fid_hash_ops,
- },
- {
- .nsd_type = LDLM_NS_TYPE_OSC,
- .nsd_bkt_bits = 8,
- .nsd_all_bits = 12,
- .nsd_hops = &ldlm_ns_hash_ops,
- },
- {
- .nsd_type = LDLM_NS_TYPE_OST,
- .nsd_bkt_bits = 11,
- .nsd_all_bits = 17,
- .nsd_hops = &ldlm_ns_hash_ops,
- },
- {
- .nsd_type = LDLM_NS_TYPE_MGC,
- .nsd_bkt_bits = 4,
- .nsd_all_bits = 4,
- .nsd_hops = &ldlm_ns_hash_ops,
- },
- {
- .nsd_type = LDLM_NS_TYPE_MGT,
- .nsd_bkt_bits = 4,
- .nsd_all_bits = 4,
- .nsd_hops = &ldlm_ns_hash_ops,
- },
- {
- .nsd_type = LDLM_NS_TYPE_UNKNOWN,
- },
-};
-
-/** Register \a ns in the list of namespaces */
-static void ldlm_namespace_register(struct ldlm_namespace *ns,
- enum ldlm_side client)
-{
- mutex_lock(ldlm_namespace_lock(client));
- LASSERT(list_empty(&ns->ns_list_chain));
- list_add(&ns->ns_list_chain, &ldlm_cli_inactive_namespace_list);
- ldlm_namespace_nr_inc(client);
- mutex_unlock(ldlm_namespace_lock(client));
-}
-
-/**
- * Create and initialize new empty namespace.
- */
-struct ldlm_namespace *ldlm_namespace_new(struct obd_device *obd, char *name,
- enum ldlm_side client,
- enum ldlm_appetite apt,
- enum ldlm_ns_type ns_type)
-{
- struct ldlm_namespace *ns = NULL;
- struct ldlm_ns_bucket *nsb;
- struct ldlm_ns_hash_def *nsd;
- struct cfs_hash_bd bd;
- int idx;
- int rc;
-
- LASSERT(obd);
-
- rc = ldlm_get_ref();
- if (rc) {
- CERROR("ldlm_get_ref failed: %d\n", rc);
- return NULL;
- }
-
- for (idx = 0;; idx++) {
- nsd = &ldlm_ns_hash_defs[idx];
- if (nsd->nsd_type == LDLM_NS_TYPE_UNKNOWN) {
- CERROR("Unknown type %d for ns %s\n", ns_type, name);
- goto out_ref;
- }
-
- if (nsd->nsd_type == ns_type)
- break;
- }
-
- ns = kzalloc(sizeof(*ns), GFP_NOFS);
- if (!ns)
- goto out_ref;
-
- ns->ns_rs_hash = cfs_hash_create(name,
- nsd->nsd_all_bits, nsd->nsd_all_bits,
- nsd->nsd_bkt_bits, sizeof(*nsb),
- CFS_HASH_MIN_THETA,
- CFS_HASH_MAX_THETA,
- nsd->nsd_hops,
- CFS_HASH_DEPTH |
- CFS_HASH_BIGNAME |
- CFS_HASH_SPIN_BKTLOCK |
- CFS_HASH_NO_ITEMREF);
- if (!ns->ns_rs_hash)
- goto out_ns;
-
- cfs_hash_for_each_bucket(ns->ns_rs_hash, &bd, idx) {
- nsb = cfs_hash_bd_extra_get(ns->ns_rs_hash, &bd);
- at_init(&nsb->nsb_at_estimate, ldlm_enqueue_min, 0);
- nsb->nsb_namespace = ns;
- }
-
- ns->ns_obd = obd;
- ns->ns_appetite = apt;
- ns->ns_client = client;
- ns->ns_name = kstrdup(name, GFP_KERNEL);
- if (!ns->ns_name)
- goto out_hash;
-
- INIT_LIST_HEAD(&ns->ns_list_chain);
- INIT_LIST_HEAD(&ns->ns_unused_list);
- spin_lock_init(&ns->ns_lock);
- atomic_set(&ns->ns_bref, 0);
- init_waitqueue_head(&ns->ns_waitq);
-
- ns->ns_max_parallel_ast = LDLM_DEFAULT_PARALLEL_AST_LIMIT;
- ns->ns_nr_unused = 0;
- ns->ns_max_unused = LDLM_DEFAULT_LRU_SIZE;
- ns->ns_max_age = LDLM_DEFAULT_MAX_ALIVE;
- ns->ns_orig_connect_flags = 0;
- ns->ns_connect_flags = 0;
- ns->ns_stopping = 0;
-
- rc = ldlm_namespace_sysfs_register(ns);
- if (rc != 0) {
- CERROR("Can't initialize ns sysfs, rc %d\n", rc);
- goto out_hash;
- }
-
- rc = ldlm_namespace_debugfs_register(ns);
- if (rc != 0) {
- CERROR("Can't initialize ns proc, rc %d\n", rc);
- goto out_sysfs;
- }
-
- idx = ldlm_namespace_nr_read(client);
- rc = ldlm_pool_init(&ns->ns_pool, ns, idx, client);
- if (rc) {
- CERROR("Can't initialize lock pool, rc %d\n", rc);
- goto out_proc;
- }
-
- ldlm_namespace_register(ns, client);
- return ns;
-out_proc:
- ldlm_namespace_debugfs_unregister(ns);
-out_sysfs:
- ldlm_namespace_sysfs_unregister(ns);
- ldlm_namespace_cleanup(ns, 0);
-out_hash:
- kfree(ns->ns_name);
- cfs_hash_putref(ns->ns_rs_hash);
-out_ns:
- kfree(ns);
-out_ref:
- ldlm_put_ref();
- return NULL;
-}
-EXPORT_SYMBOL(ldlm_namespace_new);
-
-extern struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock);
-
-/**
- * Cancel and destroy all locks on a resource.
- *
- * If flags contains FL_LOCAL_ONLY, don't try to tell the server, just
- * clean up. This is currently only used for recovery, and we make
- * certain assumptions as a result--notably, that we shouldn't cancel
- * locks with refs.
- */
-static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
- __u64 flags)
-{
- int rc = 0;
- bool local_only = !!(flags & LDLM_FL_LOCAL_ONLY);
-
- do {
- struct ldlm_lock *lock = NULL, *tmp;
- struct lustre_handle lockh;
-
- /* First, we look for non-cleaned-yet lock
- * all cleaned locks are marked by CLEANED flag.
- */
- lock_res(res);
- list_for_each_entry(tmp, q, l_res_link) {
- if (ldlm_is_cleaned(tmp))
- continue;
-
- lock = tmp;
- LDLM_LOCK_GET(lock);
- ldlm_set_cleaned(lock);
- break;
- }
-
- if (!lock) {
- unlock_res(res);
- break;
- }
-
- /* Set CBPENDING so nothing in the cancellation path
- * can match this lock.
- */
- ldlm_set_cbpending(lock);
- ldlm_set_failed(lock);
- lock->l_flags |= flags;
-
- /* ... without sending a CANCEL message for local_only. */
- if (local_only)
- ldlm_set_local_only(lock);
-
- if (local_only && (lock->l_readers || lock->l_writers)) {
- /* This is a little bit gross, but much better than the
- * alternative: pretend that we got a blocking AST from
- * the server, so that when the lock is decref'd, it
- * will go away ...
- */
- unlock_res(res);
- LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY");
- if (lock->l_flags & LDLM_FL_FAIL_LOC) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(4 * HZ);
- set_current_state(TASK_RUNNING);
- }
- if (lock->l_completion_ast)
- lock->l_completion_ast(lock, LDLM_FL_FAILED,
- NULL);
- LDLM_LOCK_RELEASE(lock);
- continue;
- }
-
- unlock_res(res);
- ldlm_lock2handle(lock, &lockh);
- rc = ldlm_cli_cancel(&lockh, LCF_LOCAL);
- if (rc)
- CERROR("ldlm_cli_cancel: %d\n", rc);
- LDLM_LOCK_RELEASE(lock);
- } while (1);
-}
-
-static int ldlm_resource_clean(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode, void *arg)
-{
- struct ldlm_resource *res = cfs_hash_object(hs, hnode);
- __u64 flags = *(__u64 *)arg;
-
- cleanup_resource(res, &res->lr_granted, flags);
- cleanup_resource(res, &res->lr_waiting, flags);
-
- return 0;
-}
-
-static int ldlm_resource_complain(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode, void *arg)
-{
- struct ldlm_resource *res = cfs_hash_object(hs, hnode);
-
- lock_res(res);
- CERROR("%s: namespace resource " DLDLMRES
- " (%p) refcount nonzero (%d) after lock cleanup; forcing cleanup.\n",
- ldlm_ns_name(ldlm_res_to_ns(res)), PLDLMRES(res), res,
- atomic_read(&res->lr_refcount) - 1);
-
- ldlm_resource_dump(D_ERROR, res);
- unlock_res(res);
- return 0;
-}
-
-/**
- * Cancel and destroy all locks in the namespace.
- *
- * Typically used during evictions when server notified client that it was
- * evicted and all of its state needs to be destroyed.
- * Also used during shutdown.
- */
-int ldlm_namespace_cleanup(struct ldlm_namespace *ns, __u64 flags)
-{
- if (!ns) {
- CDEBUG(D_INFO, "NULL ns, skipping cleanup\n");
- return ELDLM_OK;
- }
-
- cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_resource_clean,
- &flags, 0);
- cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_resource_complain,
- NULL, 0);
- return ELDLM_OK;
-}
-EXPORT_SYMBOL(ldlm_namespace_cleanup);
-
-/**
- * Attempts to free namespace.
- *
- * Only used when namespace goes away, like during an unmount.
- */
-static int __ldlm_namespace_free(struct ldlm_namespace *ns, int force)
-{
- /* At shutdown time, don't call the cancellation callback */
- ldlm_namespace_cleanup(ns, force ? LDLM_FL_LOCAL_ONLY : 0);
-
- if (atomic_read(&ns->ns_bref) > 0) {
- int rc;
-
- CDEBUG(D_DLMTRACE,
- "dlm namespace %s free waiting on refcount %d\n",
- ldlm_ns_name(ns), atomic_read(&ns->ns_bref));
-force_wait:
- if (force)
- rc = wait_event_idle_timeout(ns->ns_waitq,
- atomic_read(&ns->ns_bref) == 0,
- obd_timeout * HZ / 4) ? 0 : -ETIMEDOUT;
- else
- rc = l_wait_event_abortable(ns->ns_waitq,
- atomic_read(&ns->ns_bref) == 0);
-
- /* Forced cleanups should be able to reclaim all references,
- * so it's safe to wait forever... we can't leak locks...
- */
- if (force && rc == -ETIMEDOUT) {
- LCONSOLE_ERROR("Forced cleanup waiting for %s namespace with %d resources in use, (rc=%d)\n",
- ldlm_ns_name(ns),
- atomic_read(&ns->ns_bref), rc);
- goto force_wait;
- }
-
- if (atomic_read(&ns->ns_bref)) {
- LCONSOLE_ERROR("Cleanup waiting for %s namespace with %d resources in use, (rc=%d)\n",
- ldlm_ns_name(ns),
- atomic_read(&ns->ns_bref), rc);
- return ELDLM_NAMESPACE_EXISTS;
- }
- CDEBUG(D_DLMTRACE, "dlm namespace %s free done waiting\n",
- ldlm_ns_name(ns));
- }
-
- return ELDLM_OK;
-}
-
-/**
- * Performs various cleanups for passed \a ns to make it drop refc and be
- * ready for freeing. Waits for refc == 0.
- *
- * The following is done:
- * (0) Unregister \a ns from its list to make inaccessible for potential
- * users like pools thread and others;
- * (1) Clear all locks in \a ns.
- */
-void ldlm_namespace_free_prior(struct ldlm_namespace *ns,
- struct obd_import *imp,
- int force)
-{
- int rc;
-
- if (!ns)
- return;
-
- spin_lock(&ns->ns_lock);
- ns->ns_stopping = 1;
- spin_unlock(&ns->ns_lock);
-
- /*
- * Can fail with -EINTR when force == 0 in which case try harder.
- */
- rc = __ldlm_namespace_free(ns, force);
- if (rc != ELDLM_OK) {
- if (imp) {
- ptlrpc_disconnect_import(imp, 0);
- ptlrpc_invalidate_import(imp);
- }
-
- /*
- * With all requests dropped and the import inactive
- * we are guaranteed all reference will be dropped.
- */
- rc = __ldlm_namespace_free(ns, 1);
- LASSERT(rc == 0);
- }
-}
-
-/** Unregister \a ns from the list of namespaces. */
-static void ldlm_namespace_unregister(struct ldlm_namespace *ns,
- enum ldlm_side client)
-{
- mutex_lock(ldlm_namespace_lock(client));
- LASSERT(!list_empty(&ns->ns_list_chain));
- /* Some asserts and possibly other parts of the code are still
- * using list_empty(&ns->ns_list_chain). This is why it is
- * important to use list_del_init() here.
- */
- list_del_init(&ns->ns_list_chain);
- ldlm_namespace_nr_dec(client);
- mutex_unlock(ldlm_namespace_lock(client));
-}
-
-/**
- * Performs freeing memory structures related to \a ns. This is only done
- * when ldlm_namespce_free_prior() successfully removed all resources
- * referencing \a ns and its refc == 0.
- */
-void ldlm_namespace_free_post(struct ldlm_namespace *ns)
-{
- if (!ns)
- return;
-
- /* Make sure that nobody can find this ns in its list. */
- ldlm_namespace_unregister(ns, ns->ns_client);
- /* Fini pool _before_ parent proc dir is removed. This is important as
- * ldlm_pool_fini() removes own proc dir which is child to @dir.
- * Removing it after @dir may cause oops.
- */
- ldlm_pool_fini(&ns->ns_pool);
-
- ldlm_namespace_debugfs_unregister(ns);
- ldlm_namespace_sysfs_unregister(ns);
- cfs_hash_putref(ns->ns_rs_hash);
- kfree(ns->ns_name);
- /* Namespace \a ns should be not on list at this time, otherwise
- * this will cause issues related to using freed \a ns in poold
- * thread.
- */
- LASSERT(list_empty(&ns->ns_list_chain));
- kfree(ns);
- ldlm_put_ref();
-}
-
-void ldlm_namespace_get(struct ldlm_namespace *ns)
-{
- atomic_inc(&ns->ns_bref);
-}
-
-/* This is only for callers that care about refcount */
-static int ldlm_namespace_get_return(struct ldlm_namespace *ns)
-{
- return atomic_inc_return(&ns->ns_bref);
-}
-
-void ldlm_namespace_put(struct ldlm_namespace *ns)
-{
- if (atomic_dec_and_lock(&ns->ns_bref, &ns->ns_lock)) {
- wake_up(&ns->ns_waitq);
- spin_unlock(&ns->ns_lock);
- }
-}
-
-/** Should be called with ldlm_namespace_lock(client) taken. */
-void ldlm_namespace_move_to_active_locked(struct ldlm_namespace *ns,
- enum ldlm_side client)
-{
- LASSERT(!list_empty(&ns->ns_list_chain));
- LASSERT(mutex_is_locked(ldlm_namespace_lock(client)));
- list_move_tail(&ns->ns_list_chain, ldlm_namespace_list(client));
-}
-
-/** Should be called with ldlm_namespace_lock(client) taken. */
-void ldlm_namespace_move_to_inactive_locked(struct ldlm_namespace *ns,
- enum ldlm_side client)
-{
- LASSERT(!list_empty(&ns->ns_list_chain));
- LASSERT(mutex_is_locked(ldlm_namespace_lock(client)));
- list_move_tail(&ns->ns_list_chain, &ldlm_cli_inactive_namespace_list);
-}
-
-/** Should be called with ldlm_namespace_lock(client) taken. */
-struct ldlm_namespace *ldlm_namespace_first_locked(enum ldlm_side client)
-{
- LASSERT(mutex_is_locked(ldlm_namespace_lock(client)));
- LASSERT(!list_empty(ldlm_namespace_list(client)));
- return container_of(ldlm_namespace_list(client)->next,
- struct ldlm_namespace, ns_list_chain);
-}
-
-/** Create and initialize new resource. */
-static struct ldlm_resource *ldlm_resource_new(void)
-{
- struct ldlm_resource *res;
- int idx;
-
- res = kmem_cache_zalloc(ldlm_resource_slab, GFP_NOFS);
- if (!res)
- return NULL;
-
- INIT_LIST_HEAD(&res->lr_granted);
- INIT_LIST_HEAD(&res->lr_waiting);
-
- /* Initialize interval trees for each lock mode. */
- for (idx = 0; idx < LCK_MODE_NUM; idx++) {
- res->lr_itree[idx].lit_size = 0;
- res->lr_itree[idx].lit_mode = 1 << idx;
- res->lr_itree[idx].lit_root = NULL;
- }
-
- atomic_set(&res->lr_refcount, 1);
- spin_lock_init(&res->lr_lock);
- lu_ref_init(&res->lr_reference);
-
- /* The creator of the resource must unlock the mutex after LVB
- * initialization.
- */
- mutex_init(&res->lr_lvb_mutex);
- mutex_lock(&res->lr_lvb_mutex);
-
- return res;
-}
-
-/**
- * Return a reference to resource with given name, creating it if necessary.
- * Args: namespace with ns_lock unlocked
- * Locks: takes and releases NS hash-lock and res->lr_lock
- * Returns: referenced, unlocked ldlm_resource or NULL
- */
-struct ldlm_resource *
-ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
- const struct ldlm_res_id *name, enum ldlm_type type,
- int create)
-{
- struct hlist_node *hnode;
- struct ldlm_resource *res = NULL;
- struct cfs_hash_bd bd;
- __u64 version;
- int ns_refcount = 0;
- int rc;
-
- LASSERT(!parent);
- LASSERT(ns->ns_rs_hash);
- LASSERT(name->name[0] != 0);
-
- cfs_hash_bd_get_and_lock(ns->ns_rs_hash, (void *)name, &bd, 0);
- hnode = cfs_hash_bd_lookup_locked(ns->ns_rs_hash, &bd, (void *)name);
- if (hnode) {
- cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 0);
- goto lvbo_init;
- }
-
- version = cfs_hash_bd_version_get(&bd);
- cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 0);
-
- if (create == 0)
- return ERR_PTR(-ENOENT);
-
- LASSERTF(type >= LDLM_MIN_TYPE && type < LDLM_MAX_TYPE,
- "type: %d\n", type);
- res = ldlm_resource_new();
- if (!res)
- return ERR_PTR(-ENOMEM);
-
- res->lr_ns_bucket = cfs_hash_bd_extra_get(ns->ns_rs_hash, &bd);
- res->lr_name = *name;
- res->lr_type = type;
-
- cfs_hash_bd_lock(ns->ns_rs_hash, &bd, 1);
- hnode = (version == cfs_hash_bd_version_get(&bd)) ? NULL :
- cfs_hash_bd_lookup_locked(ns->ns_rs_hash, &bd, (void *)name);
-
- if (hnode) {
- /* Someone won the race and already added the resource. */
- cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 1);
- /* Clean lu_ref for failed resource. */
- lu_ref_fini(&res->lr_reference);
- /* We have taken lr_lvb_mutex. Drop it. */
- mutex_unlock(&res->lr_lvb_mutex);
- kmem_cache_free(ldlm_resource_slab, res);
-lvbo_init:
- res = hlist_entry(hnode, struct ldlm_resource, lr_hash);
- /* Synchronize with regard to resource creation. */
- if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) {
- mutex_lock(&res->lr_lvb_mutex);
- mutex_unlock(&res->lr_lvb_mutex);
- }
-
- if (unlikely(res->lr_lvb_len < 0)) {
- rc = res->lr_lvb_len;
- ldlm_resource_putref(res);
- res = ERR_PTR(rc);
- }
- return res;
- }
- /* We won! Let's add the resource. */
- cfs_hash_bd_add_locked(ns->ns_rs_hash, &bd, &res->lr_hash);
- if (cfs_hash_bd_count_get(&bd) == 1)
- ns_refcount = ldlm_namespace_get_return(ns);
-
- cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 1);
- if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) {
- OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CREATE_RESOURCE, 2);
- rc = ns->ns_lvbo->lvbo_init(res);
- if (rc < 0) {
- CERROR("%s: lvbo_init failed for resource %#llx:%#llx: rc = %d\n",
- ns->ns_obd->obd_name, name->name[0],
- name->name[1], rc);
- res->lr_lvb_len = rc;
- mutex_unlock(&res->lr_lvb_mutex);
- ldlm_resource_putref(res);
- return ERR_PTR(rc);
- }
- }
-
- /* We create resource with locked lr_lvb_mutex. */
- mutex_unlock(&res->lr_lvb_mutex);
-
- /* Let's see if we happened to be the very first resource in this
- * namespace. If so, and this is a client namespace, we need to move
- * the namespace into the active namespaces list to be patrolled by
- * the ldlm_poold.
- */
- if (ns_refcount == 1) {
- mutex_lock(ldlm_namespace_lock(LDLM_NAMESPACE_CLIENT));
- ldlm_namespace_move_to_active_locked(ns, LDLM_NAMESPACE_CLIENT);
- mutex_unlock(ldlm_namespace_lock(LDLM_NAMESPACE_CLIENT));
- }
-
- return res;
-}
-EXPORT_SYMBOL(ldlm_resource_get);
-
-static void __ldlm_resource_putref_final(struct cfs_hash_bd *bd,
- struct ldlm_resource *res)
-{
- struct ldlm_ns_bucket *nsb = res->lr_ns_bucket;
- struct ldlm_namespace *ns = nsb->nsb_namespace;
-
- if (!list_empty(&res->lr_granted)) {
- ldlm_resource_dump(D_ERROR, res);
- LBUG();
- }
-
- if (!list_empty(&res->lr_waiting)) {
- ldlm_resource_dump(D_ERROR, res);
- LBUG();
- }
-
- cfs_hash_bd_del_locked(ns->ns_rs_hash,
- bd, &res->lr_hash);
- lu_ref_fini(&res->lr_reference);
- cfs_hash_bd_unlock(ns->ns_rs_hash, bd, 1);
- if (ns->ns_lvbo && ns->ns_lvbo->lvbo_free)
- ns->ns_lvbo->lvbo_free(res);
- if (cfs_hash_bd_count_get(bd) == 0)
- ldlm_namespace_put(ns);
- kmem_cache_free(ldlm_resource_slab, res);
-}
-
-void ldlm_resource_putref(struct ldlm_resource *res)
-{
- struct ldlm_namespace *ns = ldlm_res_to_ns(res);
- struct cfs_hash_bd bd;
-
- LASSERT_ATOMIC_GT_LT(&res->lr_refcount, 0, LI_POISON);
- CDEBUG(D_INFO, "putref res: %p count: %d\n",
- res, atomic_read(&res->lr_refcount) - 1);
-
- cfs_hash_bd_get(ns->ns_rs_hash, &res->lr_name, &bd);
- if (cfs_hash_bd_dec_and_lock(ns->ns_rs_hash, &bd, &res->lr_refcount))
- __ldlm_resource_putref_final(&bd, res);
-}
-EXPORT_SYMBOL(ldlm_resource_putref);
-
-/**
- * Add a lock into a given resource into specified lock list.
- */
-void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head,
- struct ldlm_lock *lock)
-{
- check_res_locked(res);
-
- LDLM_DEBUG(lock, "About to add this lock:");
-
- if (ldlm_is_destroyed(lock)) {
- CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n");
- return;
- }
-
- LASSERT(list_empty(&lock->l_res_link));
-
- list_add_tail(&lock->l_res_link, head);
-}
-
-void ldlm_resource_unlink_lock(struct ldlm_lock *lock)
-{
- int type = lock->l_resource->lr_type;
-
- check_res_locked(lock->l_resource);
- if (type == LDLM_IBITS || type == LDLM_PLAIN)
- ldlm_unlink_lock_skiplist(lock);
- else if (type == LDLM_EXTENT)
- ldlm_extent_unlink_lock(lock);
- list_del_init(&lock->l_res_link);
-}
-EXPORT_SYMBOL(ldlm_resource_unlink_lock);
-
-void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc)
-{
- desc->lr_type = res->lr_type;
- desc->lr_name = res->lr_name;
-}
-
-/**
- * Print information about all locks in all namespaces on this node to debug
- * log.
- */
-void ldlm_dump_all_namespaces(enum ldlm_side client, int level)
-{
- struct ldlm_namespace *ns;
-
- if (!((libcfs_debug | D_ERROR) & level))
- return;
-
- mutex_lock(ldlm_namespace_lock(client));
-
- list_for_each_entry(ns, ldlm_namespace_list(client), ns_list_chain)
- ldlm_namespace_dump(level, ns);
-
- mutex_unlock(ldlm_namespace_lock(client));
-}
-
-static int ldlm_res_hash_dump(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode, void *arg)
-{
- struct ldlm_resource *res = cfs_hash_object(hs, hnode);
- int level = (int)(unsigned long)arg;
-
- lock_res(res);
- ldlm_resource_dump(level, res);
- unlock_res(res);
-
- return 0;
-}
-
-/**
- * Print information about all locks in this namespace on this node to debug
- * log.
- */
-void ldlm_namespace_dump(int level, struct ldlm_namespace *ns)
-{
- if (!((libcfs_debug | D_ERROR) & level))
- return;
-
- CDEBUG(level, "--- Namespace: %s (rc: %d, side: client)\n",
- ldlm_ns_name(ns), atomic_read(&ns->ns_bref));
-
- if (time_before(jiffies, ns->ns_next_dump))
- return;
-
- cfs_hash_for_each_nolock(ns->ns_rs_hash,
- ldlm_res_hash_dump,
- (void *)(unsigned long)level, 0);
- spin_lock(&ns->ns_lock);
- ns->ns_next_dump = jiffies + 10 * HZ;
- spin_unlock(&ns->ns_lock);
-}
-
-/**
- * Print information about all locks in this resource to debug log.
- */
-void ldlm_resource_dump(int level, struct ldlm_resource *res)
-{
- struct ldlm_lock *lock;
- unsigned int granted = 0;
-
- BUILD_BUG_ON(RES_NAME_SIZE != 4);
-
- if (!((libcfs_debug | D_ERROR) & level))
- return;
-
- CDEBUG(level, "--- Resource: " DLDLMRES " (%p) refcount = %d\n",
- PLDLMRES(res), res, atomic_read(&res->lr_refcount));
-
- if (!list_empty(&res->lr_granted)) {
- CDEBUG(level, "Granted locks (in reverse order):\n");
- list_for_each_entry_reverse(lock, &res->lr_granted,
- l_res_link) {
- LDLM_DEBUG_LIMIT(level, lock, "###");
- if (!(level & D_CANTMASK) &&
- ++granted > ldlm_dump_granted_max) {
- CDEBUG(level,
- "only dump %d granted locks to avoid DDOS.\n",
- granted);
- break;
- }
- }
- }
- if (!list_empty(&res->lr_waiting)) {
- CDEBUG(level, "Waiting locks:\n");
- list_for_each_entry(lock, &res->lr_waiting, l_res_link)
- LDLM_DEBUG_LIMIT(level, lock, "###");
- }
-}
-EXPORT_SYMBOL(ldlm_resource_dump);
diff --git a/drivers/staging/lustre/lustre/llite/Makefile b/drivers/staging/lustre/lustre/llite/Makefile
deleted file mode 100644
index 5200924182ae..000000000000
--- a/drivers/staging/lustre/lustre/llite/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LUSTRE_FS) += lustre.o
-lustre-y := dcache.o dir.o file.o llite_lib.o llite_nfs.o \
- rw.o rw26.o namei.o symlink.o llite_mmap.o range_lock.o \
- xattr.o xattr_cache.o xattr_security.o \
- super25.o statahead.o glimpse.o lcommon_cl.o lcommon_misc.o \
- vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o \
- lproc_llite.o
-
-lustre-$(CONFIG_FS_POSIX_ACL) += acl.o
diff --git a/drivers/staging/lustre/lustre/llite/acl.c b/drivers/staging/lustre/lustre/llite/acl.c
deleted file mode 100644
index 2ee9ff931236..000000000000
--- a/drivers/staging/lustre/lustre/llite/acl.c
+++ /dev/null
@@ -1,108 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/llite/acl.c
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include "llite_internal.h"
-
-struct posix_acl *ll_get_acl(struct inode *inode, int type)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct posix_acl *acl = NULL;
-
- spin_lock(&lli->lli_lock);
- /* VFS' acl_permission_check->check_acl will release the refcount */
- acl = posix_acl_dup(lli->lli_posix_acl);
- spin_unlock(&lli->lli_lock);
-
- return acl;
-}
-
-int ll_set_acl(struct inode *inode, struct posix_acl *acl, int type)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *req = NULL;
- const char *name = NULL;
- size_t value_size = 0;
- char *value = NULL;
- int rc = 0;
-
- switch (type) {
- case ACL_TYPE_ACCESS:
- name = XATTR_NAME_POSIX_ACL_ACCESS;
- if (acl)
- rc = posix_acl_update_mode(inode, &inode->i_mode, &acl);
- break;
-
- case ACL_TYPE_DEFAULT:
- name = XATTR_NAME_POSIX_ACL_DEFAULT;
- if (!S_ISDIR(inode->i_mode))
- rc = acl ? -EACCES : 0;
- break;
-
- default:
- rc = -EINVAL;
- break;
- }
- if (rc)
- return rc;
-
- if (acl) {
- value_size = posix_acl_xattr_size(acl->a_count);
- value = kmalloc(value_size, GFP_NOFS);
- if (!value) {
- rc = -ENOMEM;
- goto out;
- }
-
- rc = posix_acl_to_xattr(&init_user_ns, acl, value, value_size);
- if (rc < 0)
- goto out_value;
- }
-
- rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode),
- value ? OBD_MD_FLXATTR : OBD_MD_FLXATTRRM,
- name, value, value_size, 0, 0, &req);
-
- ptlrpc_req_finished(req);
-out_value:
- kfree(value);
-out:
- if (rc)
- forget_cached_acl(inode, type);
- else
- set_cached_acl(inode, type, acl);
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c
deleted file mode 100644
index 11b82c639bfe..000000000000
--- a/drivers/staging/lustre/lustre/llite/dcache.c
+++ /dev/null
@@ -1,300 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/quotaops.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd_support.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-#include <lustre_dlm.h>
-
-#include "llite_internal.h"
-
-static void free_dentry_data(struct rcu_head *head)
-{
- struct ll_dentry_data *lld;
-
- lld = container_of(head, struct ll_dentry_data, lld_rcu_head);
- kfree(lld);
-}
-
-/* should NOT be called with the dcache lock, see fs/dcache.c */
-static void ll_release(struct dentry *de)
-{
- struct ll_dentry_data *lld;
-
- LASSERT(de);
- lld = ll_d2d(de);
- if (lld->lld_it) {
- ll_intent_release(lld->lld_it);
- kfree(lld->lld_it);
- }
-
- de->d_fsdata = NULL;
- call_rcu(&lld->lld_rcu_head, free_dentry_data);
-}
-
-/* Compare if two dentries are the same. Don't match if the existing dentry
- * is marked invalid. Returns 1 if different, 0 if the same.
- *
- * This avoids a race where ll_lookup_it() instantiates a dentry, but we get
- * an AST before calling d_revalidate_it(). The dentry still exists (marked
- * INVALID) so d_lookup() matches it, but we have no lock on it (so
- * lock_match() fails) and we spin around real_lookup().
- *
- * This race doesn't apply to lookups in d_alloc_parallel(), and for
- * those we want to ensure that only one dentry with a given name is
- * in ll_lookup_nd() at a time. So allow invalid dentries to match
- * while d_in_lookup(). We will be called again when the lookup
- * completes, and can give a different answer then.
- */
-static int ll_dcompare(const struct dentry *dentry,
- unsigned int len, const char *str,
- const struct qstr *name)
-{
- if (len != name->len)
- return 1;
-
- if (memcmp(str, name->name, len))
- return 1;
-
- CDEBUG(D_DENTRY, "found name %.*s(%p) flags %#x refc %d\n",
- name->len, name->name, dentry, dentry->d_flags,
- d_count(dentry));
-
- /* mountpoint is always valid */
- if (d_mountpoint(dentry))
- return 0;
-
- /* ensure exclusion against parallel lookup of the same name */
- if (d_in_lookup((struct dentry *)dentry))
- return 0;
-
- if (d_lustre_invalid(dentry))
- return 1;
-
- return 0;
-}
-
-/**
- * Called when last reference to a dentry is dropped and dcache wants to know
- * whether or not it should cache it:
- * - return 1 to delete the dentry immediately
- * - return 0 to cache the dentry
- * Should NOT be called with the dcache lock, see fs/dcache.c
- */
-static int ll_ddelete(const struct dentry *de)
-{
- LASSERT(de);
-
- CDEBUG(D_DENTRY, "%s dentry %pd (%p, parent %p, inode %p) %s%s\n",
- d_lustre_invalid(de) ? "deleting" : "keeping",
- de, de, de->d_parent, d_inode(de),
- d_unhashed(de) ? "" : "hashed,",
- list_empty(&de->d_subdirs) ? "" : "subdirs");
-
- /* kernel >= 2.6.38 last refcount is decreased after this function. */
- LASSERT(d_count(de) == 1);
-
- if (d_lustre_invalid(de))
- return 1;
- return 0;
-}
-
-static int ll_d_init(struct dentry *de)
-{
- struct ll_dentry_data *lld = kzalloc(sizeof(*lld), GFP_KERNEL);
-
- if (unlikely(!lld))
- return -ENOMEM;
- lld->lld_invalid = 1;
- de->d_fsdata = lld;
- return 0;
-}
-
-void ll_intent_drop_lock(struct lookup_intent *it)
-{
- if (it->it_op && it->it_lock_mode) {
- struct lustre_handle handle;
-
- handle.cookie = it->it_lock_handle;
-
- CDEBUG(D_DLMTRACE,
- "releasing lock with cookie %#llx from it %p\n",
- handle.cookie, it);
- ldlm_lock_decref(&handle, it->it_lock_mode);
-
- /* bug 494: intent_release may be called multiple times, from
- * this thread and we don't want to double-decref this lock
- */
- it->it_lock_mode = 0;
- if (it->it_remote_lock_mode != 0) {
- handle.cookie = it->it_remote_lock_handle;
-
- CDEBUG(D_DLMTRACE,
- "releasing remote lock with cookie%#llx from it %p\n",
- handle.cookie, it);
- ldlm_lock_decref(&handle,
- it->it_remote_lock_mode);
- it->it_remote_lock_mode = 0;
- }
- }
-}
-
-void ll_intent_release(struct lookup_intent *it)
-{
- CDEBUG(D_INFO, "intent %p released\n", it);
- ll_intent_drop_lock(it);
- /* We are still holding extra reference on a request, need to free it */
- if (it_disposition(it, DISP_ENQ_OPEN_REF))
- ptlrpc_req_finished(it->it_request); /* ll_file_open */
-
- if (it_disposition(it, DISP_ENQ_CREATE_REF)) /* create rec */
- ptlrpc_req_finished(it->it_request);
-
- it->it_disposition = 0;
- it->it_request = NULL;
-}
-
-void ll_invalidate_aliases(struct inode *inode)
-{
- struct dentry *dentry;
-
- CDEBUG(D_INODE, "marking dentries for ino " DFID "(%p) invalid\n",
- PFID(ll_inode2fid(inode)), inode);
-
- spin_lock(&inode->i_lock);
- hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
- CDEBUG(D_DENTRY,
- "dentry in drop %pd (%p) parent %p inode %p flags %d\n",
- dentry, dentry, dentry->d_parent,
- d_inode(dentry), dentry->d_flags);
-
- d_lustre_invalidate(dentry, 0);
- }
- spin_unlock(&inode->i_lock);
-}
-
-int ll_revalidate_it_finish(struct ptlrpc_request *request,
- struct lookup_intent *it,
- struct inode *inode)
-{
- int rc = 0;
-
- if (!request)
- return 0;
-
- if (it_disposition(it, DISP_LOOKUP_NEG))
- return -ENOENT;
-
- rc = ll_prep_inode(&inode, request, NULL, it);
-
- return rc;
-}
-
-void ll_lookup_finish_locks(struct lookup_intent *it, struct inode *inode)
-{
- if (it->it_lock_mode && inode) {
- struct ll_sb_info *sbi = ll_i2sbi(inode);
-
- CDEBUG(D_DLMTRACE, "setting l_data to inode " DFID "(%p)\n",
- PFID(ll_inode2fid(inode)), inode);
- ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL);
- }
-
- /* drop lookup or getattr locks immediately */
- if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR) {
- /* on 2.6 there are situation when several lookups and
- * revalidations may be requested during single operation.
- * therefore, we don't release intent here -bzzz
- */
- ll_intent_drop_lock(it);
- }
-}
-
-static int ll_revalidate_dentry(struct dentry *dentry,
- unsigned int lookup_flags)
-{
- struct inode *dir = d_inode(dentry->d_parent);
-
- /* If this is intermediate component path lookup and we were able to get
- * to this dentry, then its lock has not been revoked and the
- * path component is valid.
- */
- if (lookup_flags & LOOKUP_PARENT)
- return 1;
-
- /* Symlink - always valid as long as the dentry was found */
- if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode))
- return 1;
-
- /*
- * VFS warns us that this is the second go around and previous
- * operation failed (most likely open|creat), so this time
- * we better talk to the server via the lookup path by name,
- * not by fid.
- */
- if (lookup_flags & LOOKUP_REVAL)
- return 0;
-
- if (!dentry_may_statahead(dir, dentry))
- return 1;
-
- if (lookup_flags & LOOKUP_RCU)
- return -ECHILD;
-
- ll_statahead(dir, &dentry, !d_inode(dentry));
- return 1;
-}
-
-/*
- * Always trust cached dentries. Update statahead window if necessary.
- */
-static int ll_revalidate_nd(struct dentry *dentry, unsigned int flags)
-{
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, flags=%u\n",
- dentry, flags);
-
- return ll_revalidate_dentry(dentry, flags);
-}
-
-const struct dentry_operations ll_d_ops = {
- .d_init = ll_d_init,
- .d_revalidate = ll_revalidate_nd,
- .d_release = ll_release,
- .d_delete = ll_ddelete,
- .d_compare = ll_dcompare,
-};
diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c
deleted file mode 100644
index 688dddf3ca47..000000000000
--- a/drivers/staging/lustre/lustre/llite/dir.c
+++ /dev/null
@@ -1,1708 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/llite/dir.c
- *
- * Directory code for lustre client.
- */
-
-#include <linux/fs.h>
-#include <linux/pagemap.h>
-#include <linux/mm.h>
-#include <linux/uaccess.h>
-#include <linux/buffer_head.h> /* for wait_on_buffer */
-#include <linux/pagevec.h>
-#include <linux/prefetch.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd_support.h>
-#include <obd_class.h>
-#include <uapi/linux/lustre/lustre_ioctl.h>
-#include <lustre_lib.h>
-#include <lustre_dlm.h>
-#include <lustre_fid.h>
-#include <lustre_kernelcomm.h>
-#include <lustre_swab.h>
-
-#include "llite_internal.h"
-
-/*
- * (new) readdir implementation overview.
- *
- * Original lustre readdir implementation cached exact copy of raw directory
- * pages on the client. These pages were indexed in client page cache by
- * logical offset in the directory file. This design, while very simple and
- * intuitive had some inherent problems:
- *
- * . it implies that byte offset to the directory entry serves as a
- * telldir(3)/seekdir(3) cookie, but that offset is not stable: in
- * ext3/htree directory entries may move due to splits, and more
- * importantly,
- *
- * . it is incompatible with the design of split directories for cmd3,
- * that assumes that names are distributed across nodes based on their
- * hash, and so readdir should be done in hash order.
- *
- * New readdir implementation does readdir in hash order, and uses hash of a
- * file name as a telldir/seekdir cookie. This led to number of complications:
- *
- * . hash is not unique, so it cannot be used to index cached directory
- * pages on the client (note, that it requires a whole pageful of hash
- * collided entries to cause two pages to have identical hashes);
- *
- * . hash is not unique, so it cannot, strictly speaking, be used as an
- * entry cookie. ext3/htree has the same problem and lustre implementation
- * mimics their solution: seekdir(hash) positions directory at the first
- * entry with the given hash.
- *
- * Client side.
- *
- * 0. caching
- *
- * Client caches directory pages using hash of the first entry as an index. As
- * noted above hash is not unique, so this solution doesn't work as is:
- * special processing is needed for "page hash chains" (i.e., sequences of
- * pages filled with entries all having the same hash value).
- *
- * First, such chains have to be detected. To this end, server returns to the
- * client the hash of the first entry on the page next to one returned. When
- * client detects that this hash is the same as hash of the first entry on the
- * returned page, page hash collision has to be handled. Pages in the
- * hash chain, except first one, are termed "overflow pages".
- *
- * Solution to index uniqueness problem is to not cache overflow
- * pages. Instead, when page hash collision is detected, all overflow pages
- * from emerging chain are immediately requested from the server and placed in
- * a special data structure (struct ll_dir_chain). This data structure is used
- * by ll_readdir() to process entries from overflow pages. When readdir
- * invocation finishes, overflow pages are discarded. If page hash collision
- * chain weren't completely processed, next call to readdir will again detect
- * page hash collision, again read overflow pages in, process next portion of
- * entries and again discard the pages. This is not as wasteful as it looks,
- * because, given reasonable hash, page hash collisions are extremely rare.
- *
- * 1. directory positioning
- *
- * When seekdir(hash) is called, original
- *
- *
- *
- *
- *
- *
- *
- *
- * Server.
- *
- * identification of and access to overflow pages
- *
- * page format
- *
- * Page in MDS_READPAGE RPC is packed in LU_PAGE_SIZE, and each page contains
- * a header lu_dirpage which describes the start/end hash, and whether this
- * page is empty (contains no dir entry) or hash collide with next page.
- * After client receives reply, several pages will be integrated into dir page
- * in PAGE_SIZE (if PAGE_SIZE greater than LU_PAGE_SIZE), and the lu_dirpage
- * for this integrated page will be adjusted. See lmv_adjust_dirpages().
- *
- */
-struct page *ll_get_dir_page(struct inode *dir, struct md_op_data *op_data,
- __u64 offset)
-{
- struct md_callback cb_op;
- struct page *page;
- int rc;
-
- cb_op.md_blocking_ast = ll_md_blocking_ast;
- rc = md_read_page(ll_i2mdexp(dir), op_data, &cb_op, offset, &page);
- if (rc)
- return ERR_PTR(rc);
-
- return page;
-}
-
-void ll_release_page(struct inode *inode, struct page *page, bool remove)
-{
- kunmap(page);
-
- /*
- * Always remove the page for striped dir, because the page is
- * built from temporarily in LMV layer
- */
- if (inode && S_ISDIR(inode->i_mode) &&
- ll_i2info(inode)->lli_lsm_md) {
- __free_page(page);
- return;
- }
-
- if (remove) {
- lock_page(page);
- if (likely(page->mapping))
- truncate_complete_page(page->mapping, page);
- unlock_page(page);
- }
- put_page(page);
-}
-
-/**
- * return IF_* type for given lu_dirent entry.
- * IF_* flag shld be converted to particular OS file type in
- * platform llite module.
- */
-static __u16 ll_dirent_type_get(struct lu_dirent *ent)
-{
- __u16 type = 0;
- struct luda_type *lt;
- int len = 0;
-
- if (le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) {
- const unsigned int align = sizeof(struct luda_type) - 1;
-
- len = le16_to_cpu(ent->lde_namelen);
- len = (len + align) & ~align;
- lt = (void *)ent->lde_name + len;
- type = IFTODT(le16_to_cpu(lt->lt_type));
- }
- return type;
-}
-
-int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data,
- struct dir_context *ctx)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- __u64 pos = *ppos;
- int is_api32 = ll_need_32bit_api(sbi);
- int is_hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH;
- struct page *page;
- bool done = false;
- int rc = 0;
-
- page = ll_get_dir_page(inode, op_data, pos);
-
- while (rc == 0 && !done) {
- struct lu_dirpage *dp;
- struct lu_dirent *ent;
- __u64 hash;
- __u64 next;
-
- if (IS_ERR(page)) {
- rc = PTR_ERR(page);
- break;
- }
-
- hash = MDS_DIR_END_OFF;
- dp = page_address(page);
- for (ent = lu_dirent_start(dp); ent && !done;
- ent = lu_dirent_next(ent)) {
- __u16 type;
- int namelen;
- struct lu_fid fid;
- __u64 lhash;
- __u64 ino;
-
- hash = le64_to_cpu(ent->lde_hash);
- if (hash < pos)
- /*
- * Skip until we find target hash
- * value.
- */
- continue;
-
- namelen = le16_to_cpu(ent->lde_namelen);
- if (namelen == 0)
- /*
- * Skip dummy record.
- */
- continue;
-
- if (is_api32 && is_hash64)
- lhash = hash >> 32;
- else
- lhash = hash;
- fid_le_to_cpu(&fid, &ent->lde_fid);
- ino = cl_fid_build_ino(&fid, is_api32);
- type = ll_dirent_type_get(ent);
- ctx->pos = lhash;
- /* For 'll_nfs_get_name_filldir()', it will try
- * to access the 'ent' through its 'lde_name',
- * so the parameter 'name' for 'ctx->actor()'
- * must be part of the 'ent'.
- */
- done = !dir_emit(ctx, ent->lde_name,
- namelen, ino, type);
- }
-
- if (done) {
- pos = hash;
- ll_release_page(inode, page, false);
- break;
- }
-
- next = le64_to_cpu(dp->ldp_hash_end);
- pos = next;
- if (pos == MDS_DIR_END_OFF) {
- /*
- * End of directory reached.
- */
- done = 1;
- ll_release_page(inode, page, false);
- } else {
- /*
- * Normal case: continue to the next
- * page.
- */
- ll_release_page(inode, page,
- le32_to_cpu(dp->ldp_flags) &
- LDF_COLLIDE);
- next = pos;
- page = ll_get_dir_page(inode, op_data, pos);
- }
- }
-
- ctx->pos = pos;
- return rc;
-}
-
-static int ll_readdir(struct file *filp, struct dir_context *ctx)
-{
- struct inode *inode = file_inode(filp);
- struct ll_file_data *lfd = LUSTRE_FPRIVATE(filp);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- __u64 pos = lfd ? lfd->lfd_pos : 0;
- int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH;
- int api32 = ll_need_32bit_api(sbi);
- struct md_op_data *op_data;
- int rc;
-
- CDEBUG(D_VFSTRACE,
- "VFS Op:inode=" DFID "(%p) pos/size %lu/%llu 32bit_api %d\n",
- PFID(ll_inode2fid(inode)), inode, (unsigned long)pos,
- i_size_read(inode), api32);
-
- if (pos == MDS_DIR_END_OFF) {
- /*
- * end-of-file.
- */
- rc = 0;
- goto out;
- }
-
- op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
- LUSTRE_OPC_ANY, inode);
- if (IS_ERR(op_data)) {
- rc = PTR_ERR(op_data);
- goto out;
- }
-
- if (unlikely(op_data->op_mea1)) {
- /*
- * This is only needed for striped dir to fill ..,
- * see lmv_read_page
- */
- if (file_dentry(filp)->d_parent &&
- file_dentry(filp)->d_parent->d_inode) {
- __u64 ibits = MDS_INODELOCK_UPDATE;
- struct inode *parent;
-
- parent = file_dentry(filp)->d_parent->d_inode;
- if (ll_have_md_lock(parent, &ibits, LCK_MINMODE))
- op_data->op_fid3 = *ll_inode2fid(parent);
- }
-
- /*
- * If it can not find in cache, do lookup .. on the master
- * object
- */
- if (fid_is_zero(&op_data->op_fid3)) {
- rc = ll_dir_get_parent_fid(inode, &op_data->op_fid3);
- if (rc) {
- ll_finish_md_op_data(op_data);
- return rc;
- }
- }
- }
- op_data->op_max_pages = sbi->ll_md_brw_pages;
- ctx->pos = pos;
- rc = ll_dir_read(inode, &pos, op_data, ctx);
- pos = ctx->pos;
- if (lfd)
- lfd->lfd_pos = pos;
-
- if (pos == MDS_DIR_END_OFF) {
- if (api32)
- pos = LL_DIR_END_OFF_32BIT;
- else
- pos = LL_DIR_END_OFF;
- } else {
- if (api32 && hash64)
- pos >>= 32;
- }
- ctx->pos = pos;
- ll_finish_md_op_data(op_data);
-out:
- if (!rc)
- ll_stats_ops_tally(sbi, LPROC_LL_READDIR, 1);
-
- return rc;
-}
-
-static int ll_send_mgc_param(struct obd_export *mgc, char *string)
-{
- struct mgs_send_param *msp;
- int rc = 0;
-
- msp = kzalloc(sizeof(*msp), GFP_NOFS);
- if (!msp)
- return -ENOMEM;
-
- strlcpy(msp->mgs_param, string, sizeof(msp->mgs_param));
- rc = obd_set_info_async(NULL, mgc, sizeof(KEY_SET_INFO), KEY_SET_INFO,
- sizeof(struct mgs_send_param), msp, NULL);
- if (rc)
- CERROR("Failed to set parameter: %d\n", rc);
- kfree(msp);
-
- return rc;
-}
-
-/**
- * Create striped directory with specified stripe(@lump)
- *
- * param[in] parent the parent of the directory.
- * param[in] lump the specified stripes.
- * param[in] dirname the name of the directory.
- * param[in] mode the specified mode of the directory.
- *
- * retval =0 if striped directory is being created successfully.
- * <0 if the creation is failed.
- */
-static int ll_dir_setdirstripe(struct inode *parent, struct lmv_user_md *lump,
- const char *dirname, umode_t mode)
-{
- struct ptlrpc_request *request = NULL;
- struct md_op_data *op_data;
- struct ll_sb_info *sbi = ll_i2sbi(parent);
- struct inode *inode = NULL;
- struct dentry dentry;
- int err;
-
- if (unlikely(lump->lum_magic != LMV_USER_MAGIC))
- return -EINVAL;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p) name %s stripe_offset %d, stripe_count: %u\n",
- PFID(ll_inode2fid(parent)), parent, dirname,
- (int)lump->lum_stripe_offset, lump->lum_stripe_count);
-
- if (lump->lum_stripe_count > 1 &&
- !(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_DIR_STRIPE))
- return -EINVAL;
-
- if (lump->lum_magic != cpu_to_le32(LMV_USER_MAGIC))
- lustre_swab_lmv_user_md(lump);
-
- if (!IS_POSIXACL(parent) || !exp_connect_umask(ll_i2mdexp(parent)))
- mode &= ~current_umask();
- mode = (mode & (0777 | S_ISVTX)) | S_IFDIR;
- op_data = ll_prep_md_op_data(NULL, parent, NULL, dirname,
- strlen(dirname), mode, LUSTRE_OPC_MKDIR,
- lump);
- if (IS_ERR(op_data)) {
- err = PTR_ERR(op_data);
- goto err_exit;
- }
-
- op_data->op_cli_flags |= CLI_SET_MEA;
- err = md_create(sbi->ll_md_exp, op_data, lump, sizeof(*lump), mode,
- from_kuid(&init_user_ns, current_fsuid()),
- from_kgid(&init_user_ns, current_fsgid()),
- current_cap(), 0, &request);
- ll_finish_md_op_data(op_data);
-
- err = ll_prep_inode(&inode, request, parent->i_sb, NULL);
- if (err)
- goto err_exit;
-
- memset(&dentry, 0, sizeof(dentry));
- dentry.d_inode = inode;
-
- err = ll_init_security(&dentry, inode, parent);
- iput(inode);
-
-err_exit:
- ptlrpc_req_finished(request);
- return err;
-}
-
-int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
- int set_default)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct md_op_data *op_data;
- struct ptlrpc_request *req = NULL;
- int rc = 0;
- struct lustre_sb_info *lsi = s2lsi(inode->i_sb);
- struct obd_device *mgc = lsi->lsi_mgc;
- int lum_size;
-
- if (lump) {
- /*
- * This is coming from userspace, so should be in
- * local endian. But the MDS would like it in little
- * endian, so we swab it before we send it.
- */
- switch (lump->lmm_magic) {
- case LOV_USER_MAGIC_V1: {
- if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V1))
- lustre_swab_lov_user_md_v1(lump);
- lum_size = sizeof(struct lov_user_md_v1);
- break;
- }
- case LOV_USER_MAGIC_V3: {
- if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V3))
- lustre_swab_lov_user_md_v3(
- (struct lov_user_md_v3 *)lump);
- lum_size = sizeof(struct lov_user_md_v3);
- break;
- }
- case LMV_USER_MAGIC: {
- if (lump->lmm_magic != cpu_to_le32(LMV_USER_MAGIC))
- lustre_swab_lmv_user_md(
- (struct lmv_user_md *)lump);
- lum_size = sizeof(struct lmv_user_md);
- break;
- }
- default: {
- CDEBUG(D_IOCTL,
- "bad userland LOV MAGIC: %#08x != %#08x nor %#08x\n",
- lump->lmm_magic, LOV_USER_MAGIC_V1,
- LOV_USER_MAGIC_V3);
- return -EINVAL;
- }
- }
- } else {
- lum_size = sizeof(struct lov_user_md_v1);
- }
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- /* swabbing is done in lov_setstripe() on server side */
- rc = md_setattr(sbi->ll_md_exp, op_data, lump, lum_size, &req);
- ll_finish_md_op_data(op_data);
- ptlrpc_req_finished(req);
- if (rc)
- return rc;
-
-#if OBD_OCD_VERSION(2, 13, 53, 0) > LUSTRE_VERSION_CODE
- /*
- * 2.9 server has stored filesystem default stripe in ROOT xattr,
- * and it's stored into system config for backward compatibility.
- *
- * In the following we use the fact that LOV_USER_MAGIC_V1 and
- * LOV_USER_MAGIC_V3 have the same initial fields so we do not
- * need to make the distinction between the 2 versions
- */
- if (set_default && mgc->u.cli.cl_mgc_mgsexp) {
- char *param = NULL;
- char *buf;
-
- param = kzalloc(MGS_PARAM_MAXLEN, GFP_NOFS);
- if (!param)
- return -ENOMEM;
-
- buf = param;
- /* Get fsname and assume devname to be -MDT0000. */
- ll_get_fsname(inode->i_sb, buf, MTI_NAME_MAXLEN);
- strcat(buf, "-MDT0000.lov");
- buf += strlen(buf);
-
- /* Set root stripesize */
- sprintf(buf, ".stripesize=%u",
- lump ? le32_to_cpu(lump->lmm_stripe_size) : 0);
- rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
- if (rc)
- goto end;
-
- /* Set root stripecount */
- sprintf(buf, ".stripecount=%hd",
- lump ? le16_to_cpu(lump->lmm_stripe_count) : 0);
- rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
- if (rc)
- goto end;
-
- /* Set root stripeoffset */
- sprintf(buf, ".stripeoffset=%hd",
- lump ? le16_to_cpu(lump->lmm_stripe_offset) :
- (typeof(lump->lmm_stripe_offset))(-1));
- rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
-
-end:
- kfree(param);
- }
-#endif
- return rc;
-}
-
-/**
- * This function will be used to get default LOV/LMV/Default LMV
- * @valid will be used to indicate which stripe it will retrieve
- * OBD_MD_MEA LMV stripe EA
- * OBD_MD_DEFAULT_MEA Default LMV stripe EA
- * otherwise Default LOV EA.
- * Each time, it can only retrieve 1 stripe EA
- **/
-int ll_dir_getstripe(struct inode *inode, void **plmm, int *plmm_size,
- struct ptlrpc_request **request, u64 valid)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct mdt_body *body;
- struct lov_mds_md *lmm = NULL;
- struct ptlrpc_request *req = NULL;
- int rc, lmmsize;
- struct md_op_data *op_data;
-
- rc = ll_get_max_mdsize(sbi, &lmmsize);
- if (rc)
- return rc;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
- 0, lmmsize, LUSTRE_OPC_ANY,
- NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
- rc = md_getattr(sbi->ll_md_exp, op_data, &req);
- ll_finish_md_op_data(op_data);
- if (rc < 0) {
- CDEBUG(D_INFO, "md_getattr failed on inode " DFID ": rc %d\n",
- PFID(ll_inode2fid(inode)), rc);
- goto out;
- }
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
-
- lmmsize = body->mbo_eadatasize;
-
- if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
- lmmsize == 0) {
- rc = -ENODATA;
- goto out;
- }
-
- lmm = req_capsule_server_sized_get(&req->rq_pill,
- &RMF_MDT_MD, lmmsize);
- LASSERT(lmm);
-
- /*
- * This is coming from the MDS, so is probably in
- * little endian. We convert it to host endian before
- * passing it to userspace.
- */
- /* We don't swab objects for directories */
- switch (le32_to_cpu(lmm->lmm_magic)) {
- case LOV_MAGIC_V1:
- if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC)
- lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
- break;
- case LOV_MAGIC_V3:
- if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC)
- lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
- break;
- case LMV_MAGIC_V1:
- if (cpu_to_le32(LMV_MAGIC) != LMV_MAGIC)
- lustre_swab_lmv_mds_md((union lmv_mds_md *)lmm);
- break;
- case LMV_USER_MAGIC:
- if (cpu_to_le32(LMV_USER_MAGIC) != LMV_USER_MAGIC)
- lustre_swab_lmv_user_md((struct lmv_user_md *)lmm);
- break;
- default:
- CERROR("unknown magic: %lX\n", (unsigned long)lmm->lmm_magic);
- rc = -EPROTO;
- }
-out:
- *plmm = lmm;
- *plmm_size = lmmsize;
- *request = req;
- return rc;
-}
-
-int ll_get_mdt_idx_by_fid(struct ll_sb_info *sbi, const struct lu_fid *fid)
-{
- struct md_op_data *op_data;
- int mdt_index, rc;
-
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
- if (!op_data)
- return -ENOMEM;
-
- op_data->op_flags |= MF_GET_MDT_IDX;
- op_data->op_fid1 = *fid;
- rc = md_getattr(sbi->ll_md_exp, op_data, NULL);
- mdt_index = op_data->op_mds;
- kvfree(op_data);
- if (rc < 0)
- return rc;
-
- return mdt_index;
-}
-
-/*
- * Get MDT index for the inode.
- */
-int ll_get_mdt_idx(struct inode *inode)
-{
- return ll_get_mdt_idx_by_fid(ll_i2sbi(inode), ll_inode2fid(inode));
-}
-
-/**
- * Generic handler to do any pre-copy work.
- *
- * It sends a first hsm_progress (with extent length == 0) to coordinator as a
- * first information for it that real work has started.
- *
- * Moreover, for a ARCHIVE request, it will sample the file data version and
- * store it in \a copy.
- *
- * \return 0 on success.
- */
-static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct hsm_progress_kernel hpk;
- int rc2, rc = 0;
-
- /* Forge a hsm_progress based on data from copy. */
- hpk.hpk_fid = copy->hc_hai.hai_fid;
- hpk.hpk_cookie = copy->hc_hai.hai_cookie;
- hpk.hpk_extent.offset = copy->hc_hai.hai_extent.offset;
- hpk.hpk_extent.length = 0;
- hpk.hpk_flags = 0;
- hpk.hpk_errval = 0;
- hpk.hpk_data_version = 0;
-
- /* For archive request, we need to read the current file version. */
- if (copy->hc_hai.hai_action == HSMA_ARCHIVE) {
- struct inode *inode;
- __u64 data_version = 0;
-
- /* Get inode for this fid */
- inode = search_inode_for_lustre(sb, &copy->hc_hai.hai_fid);
- if (IS_ERR(inode)) {
- hpk.hpk_flags |= HP_FLAG_RETRY;
- /* hpk_errval is >= 0 */
- hpk.hpk_errval = -PTR_ERR(inode);
- rc = PTR_ERR(inode);
- goto progress;
- }
-
- /* Read current file data version */
- rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH);
- iput(inode);
- if (rc != 0) {
- CDEBUG(D_HSM,
- "Could not read file data version of " DFID " (rc = %d). Archive request (%#llx) could not be done.\n",
- PFID(&copy->hc_hai.hai_fid), rc,
- copy->hc_hai.hai_cookie);
- hpk.hpk_flags |= HP_FLAG_RETRY;
- /* hpk_errval must be >= 0 */
- hpk.hpk_errval = -rc;
- goto progress;
- }
-
- /* Store in the hsm_copy for later copytool use.
- * Always modified even if no lsm.
- */
- copy->hc_data_version = data_version;
- }
-
-progress:
- /* On error, the request should be considered as completed */
- if (hpk.hpk_errval > 0)
- hpk.hpk_flags |= HP_FLAG_COMPLETED;
- rc2 = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk),
- &hpk, NULL);
-
- return rc ? rc : rc2;
-}
-
-/**
- * Generic handler to do any post-copy work.
- *
- * It will send the last hsm_progress update to coordinator to inform it
- * that copy is finished and whether it was successful or not.
- *
- * Moreover,
- * - for ARCHIVE request, it will sample the file data version and compare it
- * with the version saved in ll_ioc_copy_start(). If they do not match, copy
- * will be considered as failed.
- * - for RESTORE request, it will sample the file data version and send it to
- * coordinator which is useful if the file was imported as 'released'.
- *
- * \return 0 on success.
- */
-static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct hsm_progress_kernel hpk;
- int rc2, rc = 0;
-
- /* If you modify the logic here, also check llapi_hsm_copy_end(). */
- /* Take care: copy->hc_hai.hai_action, len, gid and data are not
- * initialized if copy_end was called with copy == NULL.
- */
-
- /* Forge a hsm_progress based on data from copy. */
- hpk.hpk_fid = copy->hc_hai.hai_fid;
- hpk.hpk_cookie = copy->hc_hai.hai_cookie;
- hpk.hpk_extent = copy->hc_hai.hai_extent;
- hpk.hpk_flags = copy->hc_flags | HP_FLAG_COMPLETED;
- hpk.hpk_errval = copy->hc_errval;
- hpk.hpk_data_version = 0;
-
- /* For archive request, we need to check the file data was not changed.
- *
- * For restore request, we need to send the file data version, this is
- * useful when the file was created using hsm_import.
- */
- if (((copy->hc_hai.hai_action == HSMA_ARCHIVE) ||
- (copy->hc_hai.hai_action == HSMA_RESTORE)) &&
- (copy->hc_errval == 0)) {
- struct inode *inode;
- __u64 data_version = 0;
-
- /* Get lsm for this fid */
- inode = search_inode_for_lustre(sb, &copy->hc_hai.hai_fid);
- if (IS_ERR(inode)) {
- hpk.hpk_flags |= HP_FLAG_RETRY;
- /* hpk_errval must be >= 0 */
- hpk.hpk_errval = -PTR_ERR(inode);
- rc = PTR_ERR(inode);
- goto progress;
- }
-
- rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH);
- iput(inode);
- if (rc) {
- CDEBUG(D_HSM,
- "Could not read file data version. Request could not be confirmed.\n");
- if (hpk.hpk_errval == 0)
- hpk.hpk_errval = -rc;
- goto progress;
- }
-
- /* Store in the hsm_copy for later copytool use.
- * Always modified even if no lsm.
- */
- hpk.hpk_data_version = data_version;
-
- /* File could have been stripped during archiving, so we need
- * to check anyway.
- */
- if ((copy->hc_hai.hai_action == HSMA_ARCHIVE) &&
- (copy->hc_data_version != data_version)) {
- CDEBUG(D_HSM, "File data version mismatched. File content was changed during archiving. " DFID ", start:%#llx current:%#llx\n",
- PFID(&copy->hc_hai.hai_fid),
- copy->hc_data_version, data_version);
- /* File was changed, send error to cdt. Do not ask for
- * retry because if a file is modified frequently,
- * the cdt will loop on retried archive requests.
- * The policy engine will ask for a new archive later
- * when the file will not be modified for some tunable
- * time
- */
- hpk.hpk_flags &= ~HP_FLAG_RETRY;
- rc = -EBUSY;
- /* hpk_errval must be >= 0 */
- hpk.hpk_errval = -rc;
- }
- }
-
-progress:
- rc2 = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk),
- &hpk, NULL);
-
- return rc ? rc : rc2;
-}
-
-static int copy_and_ioctl(int cmd, struct obd_export *exp,
- const void __user *data, size_t size)
-{
- void *copy;
- int rc;
-
- copy = memdup_user(data, size);
- if (IS_ERR(copy))
- return PTR_ERR(copy);
-
- rc = obd_iocontrol(cmd, exp, size, copy, NULL);
- kfree(copy);
-
- return rc;
-}
-
-static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl)
-{
- int cmd = qctl->qc_cmd;
- int type = qctl->qc_type;
- int id = qctl->qc_id;
- int valid = qctl->qc_valid;
- int rc = 0;
-
- switch (cmd) {
- case Q_SETQUOTA:
- case Q_SETINFO:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- break;
- case Q_GETQUOTA:
- if (((type == USRQUOTA &&
- !uid_eq(current_euid(), make_kuid(&init_user_ns, id))) ||
- (type == GRPQUOTA &&
- !in_egroup_p(make_kgid(&init_user_ns, id)))) &&
- !capable(CAP_SYS_ADMIN))
- return -EPERM;
- break;
- case Q_GETINFO:
- break;
- default:
- CERROR("unsupported quotactl op: %#x\n", cmd);
- return -ENOTTY;
- }
-
- if (valid != QC_GENERAL) {
- if (cmd == Q_GETINFO)
- qctl->qc_cmd = Q_GETOINFO;
- else if (cmd == Q_GETQUOTA)
- qctl->qc_cmd = Q_GETOQUOTA;
- else
- return -EINVAL;
-
- switch (valid) {
- case QC_MDTIDX:
- rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_md_exp,
- sizeof(*qctl), qctl, NULL);
- break;
- case QC_OSTIDX:
- rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_dt_exp,
- sizeof(*qctl), qctl, NULL);
- break;
- case QC_UUID:
- rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_md_exp,
- sizeof(*qctl), qctl, NULL);
- if (rc == -EAGAIN)
- rc = obd_iocontrol(OBD_IOC_QUOTACTL,
- sbi->ll_dt_exp,
- sizeof(*qctl), qctl, NULL);
- break;
- default:
- rc = -EINVAL;
- break;
- }
-
- if (rc)
- return rc;
-
- qctl->qc_cmd = cmd;
- } else {
- struct obd_quotactl *oqctl;
-
- oqctl = kzalloc(sizeof(*oqctl), GFP_NOFS);
- if (!oqctl)
- return -ENOMEM;
-
- QCTL_COPY(oqctl, qctl);
- rc = obd_quotactl(sbi->ll_md_exp, oqctl);
- if (rc) {
- kfree(oqctl);
- return rc;
- }
- /* If QIF_SPACE is not set, client should collect the
- * space usage from OSSs by itself
- */
- if (cmd == Q_GETQUOTA &&
- !(oqctl->qc_dqblk.dqb_valid & QIF_SPACE) &&
- !oqctl->qc_dqblk.dqb_curspace) {
- struct obd_quotactl *oqctl_tmp;
-
- oqctl_tmp = kzalloc(sizeof(*oqctl_tmp), GFP_NOFS);
- if (!oqctl_tmp) {
- rc = -ENOMEM;
- goto out;
- }
-
- oqctl_tmp->qc_cmd = Q_GETOQUOTA;
- oqctl_tmp->qc_id = oqctl->qc_id;
- oqctl_tmp->qc_type = oqctl->qc_type;
-
- /* collect space usage from OSTs */
- oqctl_tmp->qc_dqblk.dqb_curspace = 0;
- rc = obd_quotactl(sbi->ll_dt_exp, oqctl_tmp);
- if (!rc || rc == -EREMOTEIO) {
- oqctl->qc_dqblk.dqb_curspace =
- oqctl_tmp->qc_dqblk.dqb_curspace;
- oqctl->qc_dqblk.dqb_valid |= QIF_SPACE;
- }
-
- /* collect space & inode usage from MDTs */
- oqctl_tmp->qc_dqblk.dqb_curspace = 0;
- oqctl_tmp->qc_dqblk.dqb_curinodes = 0;
- rc = obd_quotactl(sbi->ll_md_exp, oqctl_tmp);
- if (!rc || rc == -EREMOTEIO) {
- oqctl->qc_dqblk.dqb_curspace +=
- oqctl_tmp->qc_dqblk.dqb_curspace;
- oqctl->qc_dqblk.dqb_curinodes =
- oqctl_tmp->qc_dqblk.dqb_curinodes;
- oqctl->qc_dqblk.dqb_valid |= QIF_INODES;
- } else {
- oqctl->qc_dqblk.dqb_valid &= ~QIF_SPACE;
- }
-
- kfree(oqctl_tmp);
- }
-out:
- QCTL_COPY(qctl, oqctl);
- kfree(oqctl);
- }
-
- return rc;
-}
-
-/* This function tries to get a single name component,
- * to send to the server. No actual path traversal involved,
- * so we limit to NAME_MAX
- */
-static char *ll_getname(const char __user *filename)
-{
- int ret = 0, len;
- char *tmp;
-
- tmp = kzalloc(NAME_MAX + 1, GFP_KERNEL);
- if (!tmp)
- return ERR_PTR(-ENOMEM);
-
- len = strncpy_from_user(tmp, filename, NAME_MAX + 1);
- if (len < 0)
- ret = len;
- else if (len == 0)
- ret = -ENOENT;
- else if (len > NAME_MAX && tmp[NAME_MAX] != 0)
- ret = -ENAMETOOLONG;
-
- if (ret) {
- kfree(tmp);
- tmp = ERR_PTR(ret);
- }
- return tmp;
-}
-
-#define ll_putname(filename) kfree(filename)
-
-static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
- struct inode *inode = file_inode(file);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct obd_ioctl_data *data;
- int rc = 0;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), cmd=%#x\n",
- PFID(ll_inode2fid(inode)), inode, cmd);
-
- /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
- if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
- return -ENOTTY;
-
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
- switch (cmd) {
- case FSFILT_IOC_GETFLAGS:
- case FSFILT_IOC_SETFLAGS:
- return ll_iocontrol(inode, file, cmd, arg);
- case FSFILT_IOC_GETVERSION_OLD:
- case FSFILT_IOC_GETVERSION:
- return put_user(inode->i_generation, (int __user *)arg);
- /* We need to special case any other ioctls we want to handle,
- * to send them to the MDS/OST as appropriate and to properly
- * network encode the arg field.
- case FSFILT_IOC_SETVERSION_OLD:
- case FSFILT_IOC_SETVERSION:
- */
- case LL_IOC_GET_MDTIDX: {
- int mdtidx;
-
- mdtidx = ll_get_mdt_idx(inode);
- if (mdtidx < 0)
- return mdtidx;
-
- if (put_user((int)mdtidx, (int __user *)arg))
- return -EFAULT;
-
- return 0;
- }
- case IOC_MDC_LOOKUP: {
- int namelen, len = 0;
- char *buf = NULL;
- char *filename;
-
- rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg);
- if (rc)
- return rc;
- data = (void *)buf;
-
- filename = data->ioc_inlbuf1;
- namelen = strlen(filename);
-
- if (namelen < 1) {
- CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n");
- rc = -EINVAL;
- goto out_free;
- }
-
- rc = ll_get_fid_by_name(inode, filename, namelen, NULL, NULL);
- if (rc < 0) {
- CERROR("%s: lookup %.*s failed: rc = %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0), namelen,
- filename, rc);
- goto out_free;
- }
-out_free:
- kvfree(buf);
- return rc;
- }
- case LL_IOC_LMV_SETSTRIPE: {
- struct lmv_user_md *lum;
- char *buf = NULL;
- char *filename;
- int namelen = 0;
- int lumlen = 0;
- umode_t mode;
- int len;
- int rc;
-
- rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg);
- if (rc)
- return rc;
-
- data = (void *)buf;
- if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 ||
- data->ioc_inllen1 == 0 || data->ioc_inllen2 == 0) {
- rc = -EINVAL;
- goto lmv_out_free;
- }
-
- filename = data->ioc_inlbuf1;
- namelen = data->ioc_inllen1;
-
- if (namelen < 1) {
- CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n");
- rc = -EINVAL;
- goto lmv_out_free;
- }
- lum = (struct lmv_user_md *)data->ioc_inlbuf2;
- lumlen = data->ioc_inllen2;
-
- if (lum->lum_magic != LMV_USER_MAGIC ||
- lumlen != sizeof(*lum)) {
- CERROR("%s: wrong lum magic %x or size %d: rc = %d\n",
- filename, lum->lum_magic, lumlen, -EFAULT);
- rc = -EINVAL;
- goto lmv_out_free;
- }
-
-#if OBD_OCD_VERSION(2, 9, 50, 0) > LUSTRE_VERSION_CODE
- mode = data->ioc_type != 0 ? data->ioc_type : 0777;
-#else
- mode = data->ioc_type;
-#endif
- rc = ll_dir_setdirstripe(inode, lum, filename, mode);
-lmv_out_free:
- kvfree(buf);
- return rc;
- }
- case LL_IOC_LMV_SET_DEFAULT_STRIPE: {
- struct lmv_user_md __user *ulump;
- struct lmv_user_md lum;
- int rc;
-
- ulump = (struct lmv_user_md __user *)arg;
- if (copy_from_user(&lum, ulump, sizeof(lum)))
- return -EFAULT;
-
- if (lum.lum_magic != LMV_USER_MAGIC)
- return -EINVAL;
-
- rc = ll_dir_setstripe(inode, (struct lov_user_md *)&lum, 0);
-
- return rc;
- }
- case LL_IOC_LOV_SETSTRIPE: {
- struct lov_user_md_v3 lumv3;
- struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
- struct lov_user_md_v1 __user *lumv1p = (void __user *)arg;
- struct lov_user_md_v3 __user *lumv3p = (void __user *)arg;
-
- int set_default = 0;
-
- LASSERT(sizeof(lumv3) == sizeof(*lumv3p));
- LASSERT(sizeof(lumv3.lmm_objects[0]) ==
- sizeof(lumv3p->lmm_objects[0]));
- /* first try with v1 which is smaller than v3 */
- if (copy_from_user(lumv1, lumv1p, sizeof(*lumv1)))
- return -EFAULT;
-
- if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
- if (copy_from_user(&lumv3, lumv3p, sizeof(lumv3)))
- return -EFAULT;
- if (lumv3.lmm_magic != LOV_USER_MAGIC_V3)
- return -EINVAL;
- }
-
- if (is_root_inode(inode))
- set_default = 1;
-
- /* in v1 and v3 cases lumv1 points to data */
- rc = ll_dir_setstripe(inode, lumv1, set_default);
-
- return rc;
- }
- case LL_IOC_LMV_GETSTRIPE: {
- struct lmv_user_md __user *ulmv;
- struct lmv_user_md lum;
- struct ptlrpc_request *request = NULL;
- struct lmv_user_md *tmp = NULL;
- union lmv_mds_md *lmm = NULL;
- u64 valid = 0;
- int max_stripe_count;
- int stripe_count;
- int mdt_index;
- int lum_size;
- int lmmsize;
- int rc;
- int i;
-
- ulmv = (struct lmv_user_md __user *)arg;
- if (copy_from_user(&lum, ulmv, sizeof(*ulmv)))
- return -EFAULT;
-
- max_stripe_count = lum.lum_stripe_count;
- /*
- * lum_magic will indicate which stripe the ioctl will like
- * to get, LMV_MAGIC_V1 is for normal LMV stripe, LMV_USER_MAGIC
- * is for default LMV stripe
- */
- if (lum.lum_magic == LMV_MAGIC_V1)
- valid |= OBD_MD_MEA;
- else if (lum.lum_magic == LMV_USER_MAGIC)
- valid |= OBD_MD_DEFAULT_MEA;
- else
- return -EINVAL;
-
- rc = ll_dir_getstripe(inode, (void **)&lmm, &lmmsize, &request,
- valid);
- if (rc)
- goto finish_req;
-
- /* Get default LMV EA */
- if (lum.lum_magic == LMV_USER_MAGIC) {
- if (lmmsize > sizeof(*ulmv)) {
- rc = -EINVAL;
- goto finish_req;
- }
-
- if (copy_to_user(ulmv, lmm, lmmsize))
- rc = -EFAULT;
-
- goto finish_req;
- }
-
- stripe_count = lmv_mds_md_stripe_count_get(lmm);
- if (max_stripe_count < stripe_count) {
- lum.lum_stripe_count = stripe_count;
- if (copy_to_user(ulmv, &lum, sizeof(lum))) {
- rc = -EFAULT;
- goto finish_req;
- }
- rc = -E2BIG;
- goto finish_req;
- }
-
- lum_size = lmv_user_md_size(stripe_count, LMV_MAGIC_V1);
- tmp = kzalloc(lum_size, GFP_NOFS);
- if (!tmp) {
- rc = -ENOMEM;
- goto finish_req;
- }
-
- mdt_index = ll_get_mdt_idx(inode);
- if (mdt_index < 0) {
- rc = -ENOMEM;
- goto out_tmp;
- }
- tmp->lum_magic = LMV_MAGIC_V1;
- tmp->lum_stripe_count = 0;
- tmp->lum_stripe_offset = mdt_index;
- for (i = 0; i < stripe_count; i++) {
- struct lu_fid fid;
-
- fid_le_to_cpu(&fid, &lmm->lmv_md_v1.lmv_stripe_fids[i]);
- mdt_index = ll_get_mdt_idx_by_fid(sbi, &fid);
- if (mdt_index < 0) {
- rc = mdt_index;
- goto out_tmp;
- }
- tmp->lum_objects[i].lum_mds = mdt_index;
- tmp->lum_objects[i].lum_fid = fid;
- tmp->lum_stripe_count++;
- }
-
- if (copy_to_user(ulmv, tmp, lum_size)) {
- rc = -EFAULT;
- goto out_tmp;
- }
-out_tmp:
- kfree(tmp);
-finish_req:
- ptlrpc_req_finished(request);
- return rc;
- }
-
- case LL_IOC_LOV_SWAP_LAYOUTS:
- return -EPERM;
- case IOC_OBD_STATFS:
- return ll_obd_statfs(inode, (void __user *)arg);
- case LL_IOC_LOV_GETSTRIPE:
- case LL_IOC_MDC_GETINFO:
- case IOC_MDC_GETFILEINFO:
- case IOC_MDC_GETFILESTRIPE: {
- struct ptlrpc_request *request = NULL;
- struct lov_user_md __user *lump;
- struct lov_mds_md *lmm = NULL;
- struct mdt_body *body;
- char *filename = NULL;
- int lmmsize;
-
- if (cmd == IOC_MDC_GETFILEINFO ||
- cmd == IOC_MDC_GETFILESTRIPE) {
- filename = ll_getname((const char __user *)arg);
- if (IS_ERR(filename))
- return PTR_ERR(filename);
-
- rc = ll_lov_getstripe_ea_info(inode, filename, &lmm,
- &lmmsize, &request);
- } else {
- rc = ll_dir_getstripe(inode, (void **)&lmm, &lmmsize,
- &request, 0);
- }
-
- if (request) {
- body = req_capsule_server_get(&request->rq_pill,
- &RMF_MDT_BODY);
- LASSERT(body);
- } else {
- goto out_req;
- }
-
- if (rc < 0) {
- if (rc == -ENODATA && (cmd == IOC_MDC_GETFILEINFO ||
- cmd == LL_IOC_MDC_GETINFO)) {
- rc = 0;
- goto skip_lmm;
- }
-
- goto out_req;
- }
-
- if (cmd == IOC_MDC_GETFILESTRIPE ||
- cmd == LL_IOC_LOV_GETSTRIPE) {
- lump = (struct lov_user_md __user *)arg;
- } else {
- struct lov_user_mds_data __user *lmdp;
-
- lmdp = (struct lov_user_mds_data __user *)arg;
- lump = &lmdp->lmd_lmm;
- }
- if (copy_to_user(lump, lmm, lmmsize)) {
- if (copy_to_user(lump, lmm, sizeof(*lump))) {
- rc = -EFAULT;
- goto out_req;
- }
- rc = -EOVERFLOW;
- }
-skip_lmm:
- if (cmd == IOC_MDC_GETFILEINFO || cmd == LL_IOC_MDC_GETINFO) {
- struct lov_user_mds_data __user *lmdp;
- lstat_t st = { 0 };
-
- st.st_dev = inode->i_sb->s_dev;
- st.st_mode = body->mbo_mode;
- st.st_nlink = body->mbo_nlink;
- st.st_uid = body->mbo_uid;
- st.st_gid = body->mbo_gid;
- st.st_rdev = body->mbo_rdev;
- st.st_size = body->mbo_size;
- st.st_blksize = PAGE_SIZE;
- st.st_blocks = body->mbo_blocks;
- st.st_atime = body->mbo_atime;
- st.st_mtime = body->mbo_mtime;
- st.st_ctime = body->mbo_ctime;
- st.st_ino = cl_fid_build_ino(&body->mbo_fid1,
- sbi->ll_flags &
- LL_SBI_32BIT_API);
-
- lmdp = (struct lov_user_mds_data __user *)arg;
- if (copy_to_user(&lmdp->lmd_st, &st, sizeof(st))) {
- rc = -EFAULT;
- goto out_req;
- }
- }
-
-out_req:
- ptlrpc_req_finished(request);
- if (filename)
- ll_putname(filename);
- return rc;
- }
- case OBD_IOC_QUOTACTL: {
- struct if_quotactl *qctl;
-
- qctl = kzalloc(sizeof(*qctl), GFP_NOFS);
- if (!qctl)
- return -ENOMEM;
-
- if (copy_from_user(qctl, (void __user *)arg, sizeof(*qctl))) {
- rc = -EFAULT;
- goto out_quotactl;
- }
-
- rc = quotactl_ioctl(sbi, qctl);
-
- if (rc == 0 && copy_to_user((void __user *)arg, qctl,
- sizeof(*qctl)))
- rc = -EFAULT;
-
-out_quotactl:
- kfree(qctl);
- return rc;
- }
- case OBD_IOC_GETDTNAME:
- case OBD_IOC_GETMDNAME:
- return ll_get_obd_name(inode, cmd, arg);
- case LL_IOC_FLUSHCTX:
- return ll_flush_ctx(inode);
- case LL_IOC_GETOBDCOUNT: {
- int count, vallen;
- struct obd_export *exp;
-
- if (copy_from_user(&count, (int __user *)arg, sizeof(int)))
- return -EFAULT;
-
- /* get ost count when count is zero, get mdt count otherwise */
- exp = count ? sbi->ll_md_exp : sbi->ll_dt_exp;
- vallen = sizeof(count);
- rc = obd_get_info(NULL, exp, sizeof(KEY_TGT_COUNT),
- KEY_TGT_COUNT, &vallen, &count);
- if (rc) {
- CERROR("get target count failed: %d\n", rc);
- return rc;
- }
-
- if (copy_to_user((int __user *)arg, &count, sizeof(int)))
- return -EFAULT;
-
- return 0;
- }
- case LL_IOC_PATH2FID:
- if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
- sizeof(struct lu_fid)))
- return -EFAULT;
- return 0;
- case LL_IOC_GET_CONNECT_FLAGS: {
- return obd_iocontrol(cmd, sbi->ll_md_exp, 0, NULL,
- (void __user *)arg);
- }
- case OBD_IOC_CHANGELOG_SEND:
- case OBD_IOC_CHANGELOG_CLEAR:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void __user *)arg,
- sizeof(struct ioc_changelog));
- return rc;
- case OBD_IOC_FID2PATH:
- return ll_fid2path(inode, (void __user *)arg);
- case LL_IOC_GETPARENT:
- return ll_getparent(file, (void __user *)arg);
- case LL_IOC_FID2MDTIDX: {
- struct obd_export *exp = ll_i2mdexp(inode);
- struct lu_fid fid;
- __u32 index;
-
- if (copy_from_user(&fid, (const struct lu_fid __user *)arg,
- sizeof(fid)))
- return -EFAULT;
-
- /* Call mdc_iocontrol */
- rc = obd_iocontrol(LL_IOC_FID2MDTIDX, exp, sizeof(fid), &fid,
- &index);
- if (rc)
- return rc;
-
- return index;
- }
- case LL_IOC_HSM_REQUEST: {
- struct hsm_user_request *hur;
- ssize_t totalsize;
-
- hur = memdup_user((void __user *)arg, sizeof(*hur));
- if (IS_ERR(hur))
- return PTR_ERR(hur);
-
- /* Compute the whole struct size */
- totalsize = hur_len(hur);
- kfree(hur);
- if (totalsize < 0)
- return -E2BIG;
-
- /* Final size will be more than double totalsize */
- if (totalsize >= MDS_MAXREQSIZE / 3)
- return -E2BIG;
-
- hur = kzalloc(totalsize, GFP_NOFS);
- if (!hur)
- return -ENOMEM;
-
- /* Copy the whole struct */
- if (copy_from_user(hur, (void __user *)arg, totalsize)) {
- kvfree(hur);
- return -EFAULT;
- }
-
- if (hur->hur_request.hr_action == HUA_RELEASE) {
- const struct lu_fid *fid;
- struct inode *f;
- int i;
-
- for (i = 0; i < hur->hur_request.hr_itemcount; i++) {
- fid = &hur->hur_user_item[i].hui_fid;
- f = search_inode_for_lustre(inode->i_sb, fid);
- if (IS_ERR(f)) {
- rc = PTR_ERR(f);
- break;
- }
-
- rc = ll_hsm_release(f);
- iput(f);
- if (rc != 0)
- break;
- }
- } else {
- rc = obd_iocontrol(cmd, ll_i2mdexp(inode), totalsize,
- hur, NULL);
- }
-
- kvfree(hur);
-
- return rc;
- }
- case LL_IOC_HSM_PROGRESS: {
- struct hsm_progress_kernel hpk;
- struct hsm_progress hp;
-
- if (copy_from_user(&hp, (void __user *)arg, sizeof(hp)))
- return -EFAULT;
-
- hpk.hpk_fid = hp.hp_fid;
- hpk.hpk_cookie = hp.hp_cookie;
- hpk.hpk_extent = hp.hp_extent;
- hpk.hpk_flags = hp.hp_flags;
- hpk.hpk_errval = hp.hp_errval;
- hpk.hpk_data_version = 0;
-
- /* File may not exist in Lustre; all progress
- * reported to Lustre root
- */
- rc = obd_iocontrol(cmd, sbi->ll_md_exp, sizeof(hpk), &hpk,
- NULL);
- return rc;
- }
- case LL_IOC_HSM_CT_START:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void __user *)arg,
- sizeof(struct lustre_kernelcomm));
- return rc;
-
- case LL_IOC_HSM_COPY_START: {
- struct hsm_copy *copy;
- int rc;
-
- copy = memdup_user((char __user *)arg, sizeof(*copy));
- if (IS_ERR(copy))
- return PTR_ERR(copy);
-
- rc = ll_ioc_copy_start(inode->i_sb, copy);
- if (copy_to_user((char __user *)arg, copy, sizeof(*copy)))
- rc = -EFAULT;
-
- kfree(copy);
- return rc;
- }
- case LL_IOC_HSM_COPY_END: {
- struct hsm_copy *copy;
- int rc;
-
- copy = memdup_user((char __user *)arg, sizeof(*copy));
- if (IS_ERR(copy))
- return PTR_ERR(copy);
-
- rc = ll_ioc_copy_end(inode->i_sb, copy);
- if (copy_to_user((char __user *)arg, copy, sizeof(*copy)))
- rc = -EFAULT;
-
- kfree(copy);
- return rc;
- }
- case LL_IOC_MIGRATE: {
- char *buf = NULL;
- const char *filename;
- int namelen = 0;
- int len;
- int rc;
- int mdtidx;
-
- rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg);
- if (rc < 0)
- return rc;
-
- data = (struct obd_ioctl_data *)buf;
- if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 ||
- !data->ioc_inllen1 || !data->ioc_inllen2) {
- rc = -EINVAL;
- goto migrate_free;
- }
-
- filename = data->ioc_inlbuf1;
- namelen = data->ioc_inllen1;
- if (namelen < 1 || namelen != strlen(filename) + 1) {
- rc = -EINVAL;
- goto migrate_free;
- }
-
- if (data->ioc_inllen2 != sizeof(mdtidx)) {
- rc = -EINVAL;
- goto migrate_free;
- }
- mdtidx = *(int *)data->ioc_inlbuf2;
-
- rc = ll_migrate(inode, file, mdtidx, filename, namelen - 1);
-migrate_free:
- kvfree(buf);
-
- return rc;
- }
-
- default:
- return obd_iocontrol(cmd, sbi->ll_dt_exp, 0, NULL,
- (void __user *)arg);
- }
-}
-
-static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
-{
- struct inode *inode = file->f_mapping->host;
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- int api32 = ll_need_32bit_api(sbi);
- loff_t ret = -EINVAL;
-
- switch (origin) {
- case SEEK_SET:
- break;
- case SEEK_CUR:
- offset += file->f_pos;
- break;
- case SEEK_END:
- if (offset > 0)
- goto out;
- if (api32)
- offset += LL_DIR_END_OFF_32BIT;
- else
- offset += LL_DIR_END_OFF;
- break;
- default:
- goto out;
- }
-
- if (offset >= 0 &&
- ((api32 && offset <= LL_DIR_END_OFF_32BIT) ||
- (!api32 && offset <= LL_DIR_END_OFF))) {
- if (offset != file->f_pos) {
- if ((api32 && offset == LL_DIR_END_OFF_32BIT) ||
- (!api32 && offset == LL_DIR_END_OFF))
- fd->lfd_pos = MDS_DIR_END_OFF;
- else if (api32 && sbi->ll_flags & LL_SBI_64BIT_HASH)
- fd->lfd_pos = offset << 32;
- else
- fd->lfd_pos = offset;
- file->f_pos = offset;
- }
- ret = offset;
- }
- goto out;
-
-out:
- return ret;
-}
-
-static int ll_dir_open(struct inode *inode, struct file *file)
-{
- return ll_file_open(inode, file);
-}
-
-static int ll_dir_release(struct inode *inode, struct file *file)
-{
- return ll_file_release(inode, file);
-}
-
-const struct file_operations ll_dir_operations = {
- .llseek = ll_dir_seek,
- .open = ll_dir_open,
- .release = ll_dir_release,
- .read = generic_read_dir,
- .iterate_shared = ll_readdir,
- .unlocked_ioctl = ll_dir_ioctl,
- .fsync = ll_fsync,
-};
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
deleted file mode 100644
index 02295931883b..000000000000
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ /dev/null
@@ -1,3580 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/llite/file.c
- *
- * Author: Peter Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Andreas Dilger <adilger@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-#include <lustre_dlm.h>
-#include <linux/pagemap.h>
-#include <linux/file.h>
-#include <linux/sched.h>
-#include <linux/mount.h>
-#include <uapi/linux/lustre/lustre_fiemap.h>
-#include <uapi/linux/lustre/lustre_ioctl.h>
-#include <lustre_swab.h>
-
-#include <cl_object.h>
-#include "llite_internal.h"
-
-static int
-ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
-
-static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
- bool *lease_broken);
-
-static enum llioc_iter
-ll_iocontrol_call(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg, int *rcp);
-
-static struct ll_file_data *ll_file_data_get(void)
-{
- struct ll_file_data *fd;
-
- fd = kmem_cache_zalloc(ll_file_data_slab, GFP_NOFS);
- if (!fd)
- return NULL;
- fd->fd_write_failed = false;
- return fd;
-}
-
-static void ll_file_data_put(struct ll_file_data *fd)
-{
- if (fd)
- kmem_cache_free(ll_file_data_slab, fd);
-}
-
-/**
- * Packs all the attributes into @op_data for the CLOSE rpc.
- */
-static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
- struct obd_client_handle *och)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
-
- ll_prep_md_op_data(op_data, inode, NULL, NULL,
- 0, 0, LUSTRE_OPC_ANY, NULL);
-
- op_data->op_attr.ia_mode = inode->i_mode;
- op_data->op_attr.ia_atime = inode->i_atime;
- op_data->op_attr.ia_mtime = inode->i_mtime;
- op_data->op_attr.ia_ctime = inode->i_ctime;
- op_data->op_attr.ia_size = i_size_read(inode);
- op_data->op_attr.ia_valid |= ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
- ATTR_MTIME | ATTR_MTIME_SET |
- ATTR_CTIME | ATTR_CTIME_SET;
- op_data->op_attr_blocks = inode->i_blocks;
- op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
- op_data->op_handle = och->och_fh;
-
- /*
- * For HSM: if inode data has been modified, pack it so that
- * MDT can set data dirty flag in the archive.
- */
- if (och->och_flags & FMODE_WRITE &&
- test_and_clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags))
- op_data->op_bias |= MDS_DATA_MODIFIED;
-}
-
-/**
- * Perform a close, possibly with a bias.
- * The meaning of "data" depends on the value of "bias".
- *
- * If \a bias is MDS_HSM_RELEASE then \a data is a pointer to the data version.
- * If \a bias is MDS_CLOSE_LAYOUT_SWAP then \a data is a pointer to the inode to
- * swap layouts with.
- */
-static int ll_close_inode_openhandle(struct inode *inode,
- struct obd_client_handle *och,
- enum mds_op_bias bias,
- void *data)
-{
- const struct ll_inode_info *lli = ll_i2info(inode);
- struct obd_export *md_exp = ll_i2mdexp(inode);
- struct md_op_data *op_data;
- struct ptlrpc_request *req = NULL;
- int rc;
-
- if (!class_exp2obd(md_exp)) {
- CERROR("%s: invalid MDC connection handle closing " DFID "\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(&lli->lli_fid));
- rc = 0;
- goto out;
- }
-
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
- /*
- * We leak openhandle and request here on error, but not much to be
- * done in OOM case since app won't retry close on error either.
- */
- if (!op_data) {
- rc = -ENOMEM;
- goto out;
- }
-
- ll_prepare_close(inode, op_data, och);
- switch (bias) {
- case MDS_CLOSE_LAYOUT_SWAP:
- LASSERT(data);
- op_data->op_bias |= MDS_CLOSE_LAYOUT_SWAP;
- op_data->op_data_version = 0;
- op_data->op_lease_handle = och->och_lease_handle;
- op_data->op_fid2 = *ll_inode2fid(data);
- break;
-
- case MDS_HSM_RELEASE:
- LASSERT(data);
- op_data->op_bias |= MDS_HSM_RELEASE;
- op_data->op_data_version = *(__u64 *)data;
- op_data->op_lease_handle = och->och_lease_handle;
- op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
- break;
-
- default:
- LASSERT(!data);
- break;
- }
-
- rc = md_close(md_exp, op_data, och->och_mod, &req);
- if (rc && rc != -EINTR) {
- CERROR("%s: inode " DFID " mdc close failed: rc = %d\n",
- md_exp->exp_obd->obd_name, PFID(&lli->lli_fid), rc);
- }
-
- if (op_data->op_bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP) &&
- !rc) {
- struct mdt_body *body;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (!(body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED))
- rc = -EBUSY;
- }
-
- ll_finish_md_op_data(op_data);
-
-out:
- md_clear_open_replay_data(md_exp, och);
- och->och_fh.cookie = DEAD_HANDLE_MAGIC;
- kfree(och);
-
- ptlrpc_req_finished(req);
- return rc;
-}
-
-int ll_md_real_close(struct inode *inode, fmode_t fmode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct obd_client_handle **och_p;
- struct obd_client_handle *och;
- __u64 *och_usecount;
- int rc = 0;
-
- if (fmode & FMODE_WRITE) {
- och_p = &lli->lli_mds_write_och;
- och_usecount = &lli->lli_open_fd_write_count;
- } else if (fmode & FMODE_EXEC) {
- och_p = &lli->lli_mds_exec_och;
- och_usecount = &lli->lli_open_fd_exec_count;
- } else {
- LASSERT(fmode & FMODE_READ);
- och_p = &lli->lli_mds_read_och;
- och_usecount = &lli->lli_open_fd_read_count;
- }
-
- mutex_lock(&lli->lli_och_mutex);
- if (*och_usecount > 0) {
- /* There are still users of this handle, so skip
- * freeing it.
- */
- mutex_unlock(&lli->lli_och_mutex);
- return 0;
- }
-
- och = *och_p;
- *och_p = NULL;
- mutex_unlock(&lli->lli_och_mutex);
-
- if (och) {
- /* There might be a race and this handle may already
- * be closed.
- */
- rc = ll_close_inode_openhandle(inode, och, 0, NULL);
- }
-
- return rc;
-}
-
-static int ll_md_close(struct inode *inode, struct file *file)
-{
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ll_inode_info *lli = ll_i2info(inode);
- int lockmode;
- __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
- struct lustre_handle lockh;
- union ldlm_policy_data policy = {
- .l_inodebits = { MDS_INODELOCK_OPEN }
- };
- int rc = 0;
-
- /* clear group lock, if present */
- if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
- ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid);
-
- if (fd->fd_lease_och) {
- bool lease_broken;
-
- /* Usually the lease is not released when the
- * application crashed, we need to release here.
- */
- rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
- CDEBUG(rc ? D_ERROR : D_INODE,
- "Clean up lease " DFID " %d/%d\n",
- PFID(&lli->lli_fid), rc, lease_broken);
-
- fd->fd_lease_och = NULL;
- }
-
- if (fd->fd_och) {
- rc = ll_close_inode_openhandle(inode, fd->fd_och, 0, NULL);
- fd->fd_och = NULL;
- goto out;
- }
-
- /* Let's see if we have good enough OPEN lock on the file and if
- * we can skip talking to MDS
- */
-
- mutex_lock(&lli->lli_och_mutex);
- if (fd->fd_omode & FMODE_WRITE) {
- lockmode = LCK_CW;
- LASSERT(lli->lli_open_fd_write_count);
- lli->lli_open_fd_write_count--;
- } else if (fd->fd_omode & FMODE_EXEC) {
- lockmode = LCK_PR;
- LASSERT(lli->lli_open_fd_exec_count);
- lli->lli_open_fd_exec_count--;
- } else {
- lockmode = LCK_CR;
- LASSERT(lli->lli_open_fd_read_count);
- lli->lli_open_fd_read_count--;
- }
- mutex_unlock(&lli->lli_och_mutex);
-
- if (!md_lock_match(ll_i2mdexp(inode), flags, ll_inode2fid(inode),
- LDLM_IBITS, &policy, lockmode, &lockh))
- rc = ll_md_real_close(inode, fd->fd_omode);
-
-out:
- LUSTRE_FPRIVATE(file) = NULL;
- ll_file_data_put(fd);
-
- return rc;
-}
-
-/* While this returns an error code, fput() the caller does not, so we need
- * to make every effort to clean up all of our state here. Also, applications
- * rarely check close errors and even if an error is returned they will not
- * re-try the close call.
- */
-int ll_file_release(struct inode *inode, struct file *file)
-{
- struct ll_file_data *fd;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ll_inode_info *lli = ll_i2info(inode);
- int rc;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n",
- PFID(ll_inode2fid(inode)), inode);
-
- if (!is_root_inode(inode))
- ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
- fd = LUSTRE_FPRIVATE(file);
- LASSERT(fd);
-
- /* The last ref on @file, maybe not be the owner pid of statahead,
- * because parent and child process can share the same file handle.
- */
- if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd)
- ll_deauthorize_statahead(inode, fd);
-
- if (is_root_inode(inode)) {
- LUSTRE_FPRIVATE(file) = NULL;
- ll_file_data_put(fd);
- return 0;
- }
-
- if (!S_ISDIR(inode->i_mode)) {
- if (lli->lli_clob)
- lov_read_and_clear_async_rc(lli->lli_clob);
- lli->lli_async_rc = 0;
- }
-
- rc = ll_md_close(inode, file);
-
- if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
- libcfs_debug_dumplog();
-
- return rc;
-}
-
-static int ll_intent_file_open(struct dentry *de, void *lmm, int lmmsize,
- struct lookup_intent *itp)
-{
- struct inode *inode = d_inode(de);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct dentry *parent = de->d_parent;
- const char *name = NULL;
- struct md_op_data *op_data;
- struct ptlrpc_request *req = NULL;
- int len = 0, rc;
-
- LASSERT(parent);
- LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
-
- /*
- * if server supports open-by-fid, or file name is invalid, don't pack
- * name in open request
- */
- if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
- lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
- name = de->d_name.name;
- len = de->d_name.len;
- }
-
- op_data = ll_prep_md_op_data(NULL, d_inode(parent), inode, name, len,
- O_RDWR, LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
- op_data->op_data = lmm;
- op_data->op_data_size = lmmsize;
-
- rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
- &ll_md_blocking_ast, 0);
- ll_finish_md_op_data(op_data);
- if (rc == -ESTALE) {
- /* reason for keep own exit path - don`t flood log
- * with messages with -ESTALE errors.
- */
- if (!it_disposition(itp, DISP_OPEN_OPEN) ||
- it_open_error(DISP_OPEN_OPEN, itp))
- goto out;
- ll_release_openhandle(inode, itp);
- goto out;
- }
-
- if (it_disposition(itp, DISP_LOOKUP_NEG)) {
- rc = -ENOENT;
- goto out;
- }
-
- if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
- rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
- CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
- goto out;
- }
-
- rc = ll_prep_inode(&inode, req, NULL, itp);
- if (!rc && itp->it_lock_mode)
- ll_set_lock_data(sbi->ll_md_exp, inode, itp, NULL);
-
-out:
- ptlrpc_req_finished(req);
- ll_intent_drop_lock(itp);
-
- /*
- * We did open by fid, but by the time we got to the server,
- * the object disappeared. If this is a create, we cannot really
- * tell the userspace that the file it was trying to create
- * does not exist. Instead let's return -ESTALE, and the VFS will
- * retry the create with LOOKUP_REVAL that we are going to catch
- * in ll_revalidate_dentry() and use lookup then.
- */
- if (rc == -ENOENT && itp->it_op & IT_CREAT)
- rc = -ESTALE;
-
- return rc;
-}
-
-static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
- struct obd_client_handle *och)
-{
- struct mdt_body *body;
-
- body = req_capsule_server_get(&it->it_request->rq_pill, &RMF_MDT_BODY);
- och->och_fh = body->mbo_handle;
- och->och_fid = body->mbo_fid1;
- och->och_lease_handle.cookie = it->it_lock_handle;
- och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
- och->och_flags = it->it_flags;
-
- return md_set_open_replay_data(md_exp, och, it);
-}
-
-static int ll_local_open(struct file *file, struct lookup_intent *it,
- struct ll_file_data *fd, struct obd_client_handle *och)
-{
- struct inode *inode = file_inode(file);
-
- LASSERT(!LUSTRE_FPRIVATE(file));
-
- LASSERT(fd);
-
- if (och) {
- int rc;
-
- rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
- if (rc != 0)
- return rc;
- }
-
- LUSTRE_FPRIVATE(file) = fd;
- ll_readahead_init(inode, &fd->fd_ras);
- fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
-
- /* ll_cl_context initialize */
- rwlock_init(&fd->fd_lock);
- INIT_LIST_HEAD(&fd->fd_lccs);
-
- return 0;
-}
-
-/* Open a file, and (for the very first open) create objects on the OSTs at
- * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
- * creation or open until ll_lov_setstripe() ioctl is called.
- *
- * If we already have the stripe MD locally then we don't request it in
- * md_open(), by passing a lmm_size = 0.
- *
- * It is up to the application to ensure no other processes open this file
- * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
- * used. We might be able to avoid races of that sort by getting lli_open_sem
- * before returning in the O_LOV_DELAY_CREATE case and dropping it here
- * or in ll_file_release(), but I'm not sure that is desirable/necessary.
- */
-int ll_file_open(struct inode *inode, struct file *file)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct lookup_intent *it, oit = { .it_op = IT_OPEN,
- .it_flags = file->f_flags };
- struct obd_client_handle **och_p = NULL;
- __u64 *och_usecount = NULL;
- struct ll_file_data *fd;
- int rc = 0;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), flags %o\n",
- PFID(ll_inode2fid(inode)), inode, file->f_flags);
-
- it = file->private_data; /* XXX: compat macro */
- file->private_data = NULL; /* prevent ll_local_open assertion */
-
- fd = ll_file_data_get();
- if (!fd) {
- rc = -ENOMEM;
- goto out_openerr;
- }
-
- fd->fd_file = file;
- if (S_ISDIR(inode->i_mode))
- ll_authorize_statahead(inode, fd);
-
- if (is_root_inode(inode)) {
- LUSTRE_FPRIVATE(file) = fd;
- return 0;
- }
-
- if (!it || !it->it_disposition) {
- /* Convert f_flags into access mode. We cannot use file->f_mode,
- * because everything but O_ACCMODE mask was stripped from
- * there
- */
- if ((oit.it_flags + 1) & O_ACCMODE)
- oit.it_flags++;
- if (file->f_flags & O_TRUNC)
- oit.it_flags |= FMODE_WRITE;
-
- /* kernel only call f_op->open in dentry_open. filp_open calls
- * dentry_open after call to open_namei that checks permissions.
- * Only nfsd_open call dentry_open directly without checking
- * permissions and because of that this code below is safe.
- */
- if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
- oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
-
- /* We do not want O_EXCL here, presumably we opened the file
- * already? XXX - NFS implications?
- */
- oit.it_flags &= ~O_EXCL;
-
- /* bug20584, if "it_flags" contains O_CREAT, the file will be
- * created if necessary, then "IT_CREAT" should be set to keep
- * consistent with it
- */
- if (oit.it_flags & O_CREAT)
- oit.it_op |= IT_CREAT;
-
- it = &oit;
- }
-
-restart:
- /* Let's see if we have file open on MDS already. */
- if (it->it_flags & FMODE_WRITE) {
- och_p = &lli->lli_mds_write_och;
- och_usecount = &lli->lli_open_fd_write_count;
- } else if (it->it_flags & FMODE_EXEC) {
- och_p = &lli->lli_mds_exec_och;
- och_usecount = &lli->lli_open_fd_exec_count;
- } else {
- och_p = &lli->lli_mds_read_och;
- och_usecount = &lli->lli_open_fd_read_count;
- }
-
- mutex_lock(&lli->lli_och_mutex);
- if (*och_p) { /* Open handle is present */
- if (it_disposition(it, DISP_OPEN_OPEN)) {
- /* Well, there's extra open request that we do not need,
- * let's close it somehow. This will decref request.
- */
- rc = it_open_error(DISP_OPEN_OPEN, it);
- if (rc) {
- mutex_unlock(&lli->lli_och_mutex);
- goto out_openerr;
- }
-
- ll_release_openhandle(inode, it);
- }
- (*och_usecount)++;
-
- rc = ll_local_open(file, it, fd, NULL);
- if (rc) {
- (*och_usecount)--;
- mutex_unlock(&lli->lli_och_mutex);
- goto out_openerr;
- }
- } else {
- LASSERT(*och_usecount == 0);
- if (!it->it_disposition) {
- /* We cannot just request lock handle now, new ELC code
- * means that one of other OPEN locks for this file
- * could be cancelled, and since blocking ast handler
- * would attempt to grab och_mutex as well, that would
- * result in a deadlock
- */
- mutex_unlock(&lli->lli_och_mutex);
- /*
- * Normally called under two situations:
- * 1. NFS export.
- * 2. revalidate with IT_OPEN (revalidate doesn't
- * execute this intent any more).
- *
- * Always fetch MDS_OPEN_LOCK if this is not setstripe.
- *
- * Always specify MDS_OPEN_BY_FID because we don't want
- * to get file with different fid.
- */
- it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
- rc = ll_intent_file_open(file->f_path.dentry,
- NULL, 0, it);
- if (rc)
- goto out_openerr;
-
- goto restart;
- }
- *och_p = kzalloc(sizeof(struct obd_client_handle), GFP_NOFS);
- if (!*och_p) {
- rc = -ENOMEM;
- goto out_och_free;
- }
-
- (*och_usecount)++;
-
- /* md_intent_lock() didn't get a request ref if there was an
- * open error, so don't do cleanup on the request here
- * (bug 3430)
- */
- /* XXX (green): Should not we bail out on any error here, not
- * just open error?
- */
- rc = it_open_error(DISP_OPEN_OPEN, it);
- if (rc)
- goto out_och_free;
-
- LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
- "inode %p: disposition %x, status %d\n", inode,
- it_disposition(it, ~0), it->it_status);
-
- rc = ll_local_open(file, it, fd, *och_p);
- if (rc)
- goto out_och_free;
- }
- mutex_unlock(&lli->lli_och_mutex);
- fd = NULL;
-
- /* Must do this outside lli_och_mutex lock to prevent deadlock where
- * different kind of OPEN lock for this same inode gets cancelled
- * by ldlm_cancel_lru
- */
- if (!S_ISREG(inode->i_mode))
- goto out_och_free;
-
- cl_lov_delay_create_clear(&file->f_flags);
- goto out_och_free;
-
-out_och_free:
- if (rc) {
- if (och_p && *och_p) {
- kfree(*och_p);
- *och_p = NULL;
- (*och_usecount)--;
- }
- mutex_unlock(&lli->lli_och_mutex);
-
-out_openerr:
- if (lli->lli_opendir_key == fd)
- ll_deauthorize_statahead(inode, fd);
- if (fd)
- ll_file_data_put(fd);
- } else {
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
- }
-
- if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
- ptlrpc_req_finished(it->it_request);
- it_clear_disposition(it, DISP_ENQ_OPEN_REF);
- }
-
- return rc;
-}
-
-static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
- struct ldlm_lock_desc *desc,
- void *data, int flag)
-{
- int rc;
- struct lustre_handle lockh;
-
- switch (flag) {
- case LDLM_CB_BLOCKING:
- ldlm_lock2handle(lock, &lockh);
- rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
- if (rc < 0) {
- CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
- return rc;
- }
- break;
- case LDLM_CB_CANCELING:
- /* do nothing */
- break;
- }
- return 0;
-}
-
-/**
- * Acquire a lease and open the file.
- */
-static struct obd_client_handle *
-ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
- __u64 open_flags)
-{
- struct lookup_intent it = { .it_op = IT_OPEN };
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct md_op_data *op_data;
- struct ptlrpc_request *req = NULL;
- struct lustre_handle old_handle = { 0 };
- struct obd_client_handle *och = NULL;
- int rc;
- int rc2;
-
- if (fmode != FMODE_WRITE && fmode != FMODE_READ)
- return ERR_PTR(-EINVAL);
-
- if (file) {
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct obd_client_handle **och_p;
- __u64 *och_usecount;
-
- if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
- return ERR_PTR(-EPERM);
-
- /* Get the openhandle of the file */
- rc = -EBUSY;
- mutex_lock(&lli->lli_och_mutex);
- if (fd->fd_lease_och) {
- mutex_unlock(&lli->lli_och_mutex);
- return ERR_PTR(rc);
- }
-
- if (!fd->fd_och) {
- if (file->f_mode & FMODE_WRITE) {
- LASSERT(lli->lli_mds_write_och);
- och_p = &lli->lli_mds_write_och;
- och_usecount = &lli->lli_open_fd_write_count;
- } else {
- LASSERT(lli->lli_mds_read_och);
- och_p = &lli->lli_mds_read_och;
- och_usecount = &lli->lli_open_fd_read_count;
- }
- if (*och_usecount == 1) {
- fd->fd_och = *och_p;
- *och_p = NULL;
- *och_usecount = 0;
- rc = 0;
- }
- }
- mutex_unlock(&lli->lli_och_mutex);
- if (rc < 0) /* more than 1 opener */
- return ERR_PTR(rc);
-
- LASSERT(fd->fd_och);
- old_handle = fd->fd_och->och_fh;
- }
-
- och = kzalloc(sizeof(*och), GFP_NOFS);
- if (!och)
- return ERR_PTR(-ENOMEM);
-
- op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data)) {
- rc = PTR_ERR(op_data);
- goto out;
- }
-
- /* To tell the MDT this openhandle is from the same owner */
- op_data->op_handle = old_handle;
-
- it.it_flags = fmode | open_flags;
- it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
- rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
- &ll_md_blocking_lease_ast,
- /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
- * it can be cancelled which may mislead applications that the lease is
- * broken;
- * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
- * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
- * doesn't deal with openhandle, so normal openhandle will be leaked.
- */
- LDLM_FL_NO_LRU | LDLM_FL_EXCL);
- ll_finish_md_op_data(op_data);
- ptlrpc_req_finished(req);
- if (rc < 0)
- goto out_release_it;
-
- if (it_disposition(&it, DISP_LOOKUP_NEG)) {
- rc = -ENOENT;
- goto out_release_it;
- }
-
- rc = it_open_error(DISP_OPEN_OPEN, &it);
- if (rc)
- goto out_release_it;
-
- LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
- ll_och_fill(sbi->ll_md_exp, &it, och);
-
- if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */ {
- rc = -EOPNOTSUPP;
- goto out_close;
- }
-
- /* already get lease, handle lease lock */
- ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
- if (it.it_lock_mode == 0 ||
- it.it_lock_bits != MDS_INODELOCK_OPEN) {
- /* open lock must return for lease */
- CERROR(DFID "lease granted but no open lock, %d/%llu.\n",
- PFID(ll_inode2fid(inode)), it.it_lock_mode,
- it.it_lock_bits);
- rc = -EPROTO;
- goto out_close;
- }
-
- ll_intent_release(&it);
- return och;
-
-out_close:
- /* Cancel open lock */
- if (it.it_lock_mode != 0) {
- ldlm_lock_decref_and_cancel(&och->och_lease_handle,
- it.it_lock_mode);
- it.it_lock_mode = 0;
- och->och_lease_handle.cookie = 0ULL;
- }
- rc2 = ll_close_inode_openhandle(inode, och, 0, NULL);
- if (rc2 < 0)
- CERROR("%s: error closing file " DFID ": %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(&ll_i2info(inode)->lli_fid), rc2);
- och = NULL; /* och has been freed in ll_close_inode_openhandle() */
-out_release_it:
- ll_intent_release(&it);
-out:
- kfree(och);
- return ERR_PTR(rc);
-}
-
-/**
- * Check whether a layout swap can be done between two inodes.
- *
- * \param[in] inode1 First inode to check
- * \param[in] inode2 Second inode to check
- *
- * \retval 0 on success, layout swap can be performed between both inodes
- * \retval negative error code if requirements are not met
- */
-static int ll_check_swap_layouts_validity(struct inode *inode1,
- struct inode *inode2)
-{
- if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode))
- return -EINVAL;
-
- if (inode_permission(inode1, MAY_WRITE) ||
- inode_permission(inode2, MAY_WRITE))
- return -EPERM;
-
- if (inode1->i_sb != inode2->i_sb)
- return -EXDEV;
-
- return 0;
-}
-
-static int ll_swap_layouts_close(struct obd_client_handle *och,
- struct inode *inode, struct inode *inode2)
-{
- const struct lu_fid *fid1 = ll_inode2fid(inode);
- const struct lu_fid *fid2;
- int rc;
-
- CDEBUG(D_INODE, "%s: biased close of file " DFID "\n",
- ll_get_fsname(inode->i_sb, NULL, 0), PFID(fid1));
-
- rc = ll_check_swap_layouts_validity(inode, inode2);
- if (rc < 0)
- goto out_free_och;
-
- /* We now know that inode2 is a lustre inode */
- fid2 = ll_inode2fid(inode2);
-
- rc = lu_fid_cmp(fid1, fid2);
- if (!rc) {
- rc = -EINVAL;
- goto out_free_och;
- }
-
- /*
- * Close the file and swap layouts between inode & inode2.
- * NB: lease lock handle is released in mdc_close_layout_swap_pack()
- * because we still need it to pack l_remote_handle to MDT.
- */
- rc = ll_close_inode_openhandle(inode, och, MDS_CLOSE_LAYOUT_SWAP,
- inode2);
-
- och = NULL; /* freed in ll_close_inode_openhandle() */
-
-out_free_och:
- kfree(och);
- return rc;
-}
-
-/**
- * Release lease and close the file.
- * It will check if the lease has ever broken.
- */
-static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
- bool *lease_broken)
-{
- struct ldlm_lock *lock;
- bool cancelled = true;
-
- lock = ldlm_handle2lock(&och->och_lease_handle);
- if (lock) {
- lock_res_and_lock(lock);
- cancelled = ldlm_is_cancel(lock);
- unlock_res_and_lock(lock);
- LDLM_LOCK_PUT(lock);
- }
-
- CDEBUG(D_INODE, "lease for " DFID " broken? %d\n",
- PFID(&ll_i2info(inode)->lli_fid), cancelled);
-
- if (!cancelled)
- ldlm_cli_cancel(&och->och_lease_handle, 0);
- if (lease_broken)
- *lease_broken = cancelled;
-
- return ll_close_inode_openhandle(inode, och, 0, NULL);
-}
-
-int ll_merge_attr(const struct lu_env *env, struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct cl_object *obj = lli->lli_clob;
- struct cl_attr *attr = vvp_env_thread_attr(env);
- s64 atime;
- s64 mtime;
- s64 ctime;
- int rc = 0;
-
- ll_inode_size_lock(inode);
-
- /* merge timestamps the most recently obtained from mds with
- * timestamps obtained from osts
- */
- LTIME_S(inode->i_atime) = lli->lli_atime;
- LTIME_S(inode->i_mtime) = lli->lli_mtime;
- LTIME_S(inode->i_ctime) = lli->lli_ctime;
-
- mtime = LTIME_S(inode->i_mtime);
- atime = LTIME_S(inode->i_atime);
- ctime = LTIME_S(inode->i_ctime);
-
- cl_object_attr_lock(obj);
- rc = cl_object_attr_get(env, obj, attr);
- cl_object_attr_unlock(obj);
-
- if (rc != 0)
- goto out_size_unlock;
-
- if (atime < attr->cat_atime)
- atime = attr->cat_atime;
-
- if (ctime < attr->cat_ctime)
- ctime = attr->cat_ctime;
-
- if (mtime < attr->cat_mtime)
- mtime = attr->cat_mtime;
-
- CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n",
- PFID(&lli->lli_fid), attr->cat_size);
-
- i_size_write(inode, attr->cat_size);
-
- inode->i_blocks = attr->cat_blocks;
-
- LTIME_S(inode->i_mtime) = mtime;
- LTIME_S(inode->i_atime) = atime;
- LTIME_S(inode->i_ctime) = ctime;
-
-out_size_unlock:
- ll_inode_size_unlock(inode);
-
- return rc;
-}
-
-static bool file_is_noatime(const struct file *file)
-{
- const struct vfsmount *mnt = file->f_path.mnt;
- const struct inode *inode = file_inode(file);
-
- /* Adapted from file_accessed() and touch_atime().*/
- if (file->f_flags & O_NOATIME)
- return true;
-
- if (inode->i_flags & S_NOATIME)
- return true;
-
- if (IS_NOATIME(inode))
- return true;
-
- if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
- return true;
-
- if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
- return true;
-
- if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode))
- return true;
-
- return false;
-}
-
-static void ll_io_init(struct cl_io *io, const struct file *file, int write)
-{
- struct inode *inode = file_inode(file);
-
- io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
- if (write) {
- io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
- io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
- file->f_flags & O_DIRECT ||
- IS_SYNC(inode);
- }
- io->ci_obj = ll_i2info(inode)->lli_clob;
- io->ci_lockreq = CILR_MAYBE;
- if (ll_file_nolock(file)) {
- io->ci_lockreq = CILR_NEVER;
- io->ci_no_srvlock = 1;
- } else if (file->f_flags & O_APPEND) {
- io->ci_lockreq = CILR_MANDATORY;
- }
-
- io->ci_noatime = file_is_noatime(file);
-}
-
-static ssize_t
-ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
- struct file *file, enum cl_io_type iot,
- loff_t *ppos, size_t count)
-{
- struct ll_inode_info *lli = ll_i2info(file_inode(file));
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct vvp_io *vio = vvp_env_io(env);
- struct range_lock range;
- struct cl_io *io;
- ssize_t result = 0;
- int rc = 0;
-
- CDEBUG(D_VFSTRACE, "file: %pD, type: %d ppos: %llu, count: %zu\n",
- file, iot, *ppos, count);
-
-restart:
- io = vvp_env_thread_io(env);
- ll_io_init(io, file, iot == CIT_WRITE);
-
- if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
- struct vvp_io *vio = vvp_env_io(env);
- bool range_locked = false;
-
- if (file->f_flags & O_APPEND)
- range_lock_init(&range, 0, LUSTRE_EOF);
- else
- range_lock_init(&range, *ppos, *ppos + count - 1);
-
- vio->vui_fd = LUSTRE_FPRIVATE(file);
- vio->vui_iter = args->u.normal.via_iter;
- vio->vui_iocb = args->u.normal.via_iocb;
- /*
- * Direct IO reads must also take range lock,
- * or multiple reads will try to work on the same pages
- * See LU-6227 for details.
- */
- if (((iot == CIT_WRITE) ||
- (iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
- !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
- CDEBUG(D_VFSTRACE, "Range lock [%llu, %llu]\n",
- range.rl_node.in_extent.start,
- range.rl_node.in_extent.end);
- rc = range_lock(&lli->lli_write_tree, &range);
- if (rc < 0)
- goto out;
-
- range_locked = true;
- }
- ll_cl_add(file, env, io);
- rc = cl_io_loop(env, io);
- ll_cl_remove(file, env);
- if (range_locked) {
- CDEBUG(D_VFSTRACE, "Range unlock [%llu, %llu]\n",
- range.rl_node.in_extent.start,
- range.rl_node.in_extent.end);
- range_unlock(&lli->lli_write_tree, &range);
- }
- } else {
- /* cl_io_rw_init() handled IO */
- rc = io->ci_result;
- }
-
- if (io->ci_nob > 0) {
- result = io->ci_nob;
- count -= io->ci_nob;
- *ppos = io->u.ci_wr.wr.crw_pos;
-
- /* prepare IO restart */
- if (count > 0)
- args->u.normal.via_iter = vio->vui_iter;
- }
-out:
- cl_io_fini(env, io);
-
- if ((!rc || rc == -ENODATA) && count > 0 && io->ci_need_restart) {
- CDEBUG(D_VFSTRACE,
- "%s: restart %s from %lld, count:%zu, result: %zd\n",
- file_dentry(file)->d_name.name,
- iot == CIT_READ ? "read" : "write",
- *ppos, count, result);
- goto restart;
- }
-
- if (iot == CIT_READ) {
- if (result >= 0)
- ll_stats_ops_tally(ll_i2sbi(file_inode(file)),
- LPROC_LL_READ_BYTES, result);
- } else if (iot == CIT_WRITE) {
- if (result >= 0) {
- ll_stats_ops_tally(ll_i2sbi(file_inode(file)),
- LPROC_LL_WRITE_BYTES, result);
- fd->fd_write_failed = false;
- } else if (!result && !rc) {
- rc = io->ci_result;
- if (rc < 0)
- fd->fd_write_failed = true;
- else
- fd->fd_write_failed = false;
- } else if (rc != -ERESTARTSYS) {
- fd->fd_write_failed = true;
- }
- }
- CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
-
- return result > 0 ? result : rc;
-}
-
-static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
-{
- struct lu_env *env;
- struct vvp_io_args *args;
- ssize_t result;
- u16 refcheck;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- args = ll_env_args(env);
- args->u.normal.via_iter = to;
- args->u.normal.via_iocb = iocb;
-
- result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
- &iocb->ki_pos, iov_iter_count(to));
- cl_env_put(env, &refcheck);
- return result;
-}
-
-/*
- * Write to a file (through the page cache).
- */
-static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
-{
- struct lu_env *env;
- struct vvp_io_args *args;
- ssize_t result;
- u16 refcheck;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- args = ll_env_args(env);
- args->u.normal.via_iter = from;
- args->u.normal.via_iocb = iocb;
-
- result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
- &iocb->ki_pos, iov_iter_count(from));
- cl_env_put(env, &refcheck);
- return result;
-}
-
-int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
- __u64 flags, struct lov_user_md *lum,
- int lum_size)
-{
- struct lookup_intent oit = {
- .it_op = IT_OPEN,
- .it_flags = flags | MDS_OPEN_BY_FID,
- };
- int rc = 0;
-
- ll_inode_size_lock(inode);
- rc = ll_intent_file_open(dentry, lum, lum_size, &oit);
- if (rc < 0)
- goto out_unlock;
-
- ll_release_openhandle(inode, &oit);
-
-out_unlock:
- ll_inode_size_unlock(inode);
- ll_intent_release(&oit);
- return rc;
-}
-
-int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
- struct lov_mds_md **lmmp, int *lmm_size,
- struct ptlrpc_request **request)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct mdt_body *body;
- struct lov_mds_md *lmm = NULL;
- struct ptlrpc_request *req = NULL;
- struct md_op_data *op_data;
- int rc, lmmsize;
-
- rc = ll_get_default_mdsize(sbi, &lmmsize);
- if (rc)
- return rc;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
- strlen(filename), lmmsize,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
- rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
- ll_finish_md_op_data(op_data);
- if (rc < 0) {
- CDEBUG(D_INFO, "md_getattr_name failed on %s: rc %d\n",
- filename, rc);
- goto out;
- }
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
-
- lmmsize = body->mbo_eadatasize;
-
- if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
- lmmsize == 0) {
- rc = -ENODATA;
- goto out;
- }
-
- lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
-
- if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
- (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
- rc = -EPROTO;
- goto out;
- }
-
- /*
- * This is coming from the MDS, so is probably in
- * little endian. We convert it to host endian before
- * passing it to userspace.
- */
- if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) {
- int stripe_count;
-
- stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
- if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
- stripe_count = 0;
-
- /* if function called for directory - we should
- * avoid swab not existent lsm objects
- */
- if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
- lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
- if (S_ISREG(body->mbo_mode))
- lustre_swab_lov_user_md_objects(
- ((struct lov_user_md_v1 *)lmm)->lmm_objects,
- stripe_count);
- } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
- lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
- if (S_ISREG(body->mbo_mode))
- lustre_swab_lov_user_md_objects(
- ((struct lov_user_md_v3 *)lmm)->lmm_objects,
- stripe_count);
- }
- }
-
-out:
- *lmmp = lmm;
- *lmm_size = lmmsize;
- *request = req;
- return rc;
-}
-
-static int ll_lov_setea(struct inode *inode, struct file *file,
- unsigned long arg)
-{
- __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
- struct lov_user_md *lump;
- int lum_size = sizeof(struct lov_user_md) +
- sizeof(struct lov_user_ost_data);
- int rc;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- lump = kzalloc(lum_size, GFP_NOFS);
- if (!lump)
- return -ENOMEM;
-
- if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) {
- kvfree(lump);
- return -EFAULT;
- }
-
- rc = ll_lov_setstripe_ea_info(inode, file->f_path.dentry, flags, lump,
- lum_size);
- cl_lov_delay_create_clear(&file->f_flags);
-
- kvfree(lump);
- return rc;
-}
-
-static int ll_file_getstripe(struct inode *inode,
- struct lov_user_md __user *lum)
-{
- struct lu_env *env;
- u16 refcheck;
- int rc;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum);
- cl_env_put(env, &refcheck);
- return rc;
-}
-
-static int ll_lov_setstripe(struct inode *inode, struct file *file,
- unsigned long arg)
-{
- struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
- struct lov_user_md *klum;
- int lum_size, rc;
- __u64 flags = FMODE_WRITE;
-
- rc = ll_copy_user_md(lum, &klum);
- if (rc < 0)
- return rc;
-
- lum_size = rc;
- rc = ll_lov_setstripe_ea_info(inode, file->f_path.dentry, flags, klum,
- lum_size);
- cl_lov_delay_create_clear(&file->f_flags);
- if (rc == 0) {
- __u32 gen;
-
- put_user(0, &lum->lmm_stripe_count);
-
- ll_layout_refresh(inode, &gen);
- rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg);
- }
-
- kfree(klum);
- return rc;
-}
-
-static int
-ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ll_grouplock grouplock;
- int rc;
-
- if (arg == 0) {
- CWARN("group id for group lock must not be 0\n");
- return -EINVAL;
- }
-
- if (ll_file_nolock(file))
- return -EOPNOTSUPP;
-
- spin_lock(&lli->lli_lock);
- if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
- CWARN("group lock already existed with gid %lu\n",
- fd->fd_grouplock.lg_gid);
- spin_unlock(&lli->lli_lock);
- return -EINVAL;
- }
- LASSERT(!fd->fd_grouplock.lg_lock);
- spin_unlock(&lli->lli_lock);
-
- rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
- arg, (file->f_flags & O_NONBLOCK), &grouplock);
- if (rc)
- return rc;
-
- spin_lock(&lli->lli_lock);
- if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
- spin_unlock(&lli->lli_lock);
- CERROR("another thread just won the race\n");
- cl_put_grouplock(&grouplock);
- return -EINVAL;
- }
-
- fd->fd_flags |= LL_FILE_GROUP_LOCKED;
- fd->fd_grouplock = grouplock;
- spin_unlock(&lli->lli_lock);
-
- CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
- return 0;
-}
-
-static int ll_put_grouplock(struct inode *inode, struct file *file,
- unsigned long arg)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ll_grouplock grouplock;
-
- spin_lock(&lli->lli_lock);
- if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
- spin_unlock(&lli->lli_lock);
- CWARN("no group lock held\n");
- return -EINVAL;
- }
- LASSERT(fd->fd_grouplock.lg_lock);
-
- if (fd->fd_grouplock.lg_gid != arg) {
- CWARN("group lock %lu doesn't match current id %lu\n",
- arg, fd->fd_grouplock.lg_gid);
- spin_unlock(&lli->lli_lock);
- return -EINVAL;
- }
-
- grouplock = fd->fd_grouplock;
- memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
- fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
- spin_unlock(&lli->lli_lock);
-
- cl_put_grouplock(&grouplock);
- CDEBUG(D_INFO, "group lock %lu released\n", arg);
- return 0;
-}
-
-/**
- * Close inode open handle
- *
- * \param inode [in] inode in question
- * \param it [in,out] intent which contains open info and result
- *
- * \retval 0 success
- * \retval <0 failure
- */
-int ll_release_openhandle(struct inode *inode, struct lookup_intent *it)
-{
- struct obd_client_handle *och;
- int rc;
-
- LASSERT(inode);
-
- /* Root ? Do nothing. */
- if (is_root_inode(inode))
- return 0;
-
- /* No open handle to close? Move away */
- if (!it_disposition(it, DISP_OPEN_OPEN))
- return 0;
-
- LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
-
- och = kzalloc(sizeof(*och), GFP_NOFS);
- if (!och) {
- rc = -ENOMEM;
- goto out;
- }
-
- ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
-
- rc = ll_close_inode_openhandle(inode, och, 0, NULL);
-out:
- /* this one is in place of ll_file_open */
- if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
- ptlrpc_req_finished(it->it_request);
- it_clear_disposition(it, DISP_ENQ_OPEN_REF);
- }
- return rc;
-}
-
-/**
- * Get size for inode for which FIEMAP mapping is requested.
- * Make the FIEMAP get_info call and returns the result.
- *
- * \param fiemap kernel buffer to hold extens
- * \param num_bytes kernel buffer size
- */
-static int ll_do_fiemap(struct inode *inode, struct fiemap *fiemap,
- size_t num_bytes)
-{
- struct ll_fiemap_info_key fmkey = { .lfik_name = KEY_FIEMAP, };
- struct lu_env *env;
- u16 refcheck;
- int rc = 0;
-
- /* Checks for fiemap flags */
- if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
- fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
- return -EBADR;
- }
-
- /* Check for FIEMAP_FLAG_SYNC */
- if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
- rc = filemap_fdatawrite(inode->i_mapping);
- if (rc)
- return rc;
- }
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- if (i_size_read(inode) == 0) {
- rc = ll_glimpse_size(inode);
- if (rc)
- goto out;
- }
-
- fmkey.lfik_oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
- obdo_from_inode(&fmkey.lfik_oa, inode, OBD_MD_FLSIZE);
- obdo_set_parent_fid(&fmkey.lfik_oa, &ll_i2info(inode)->lli_fid);
-
- /* If filesize is 0, then there would be no objects for mapping */
- if (fmkey.lfik_oa.o_size == 0) {
- fiemap->fm_mapped_extents = 0;
- rc = 0;
- goto out;
- }
-
- memcpy(&fmkey.lfik_fiemap, fiemap, sizeof(*fiemap));
-
- rc = cl_object_fiemap(env, ll_i2info(inode)->lli_clob,
- &fmkey, fiemap, &num_bytes);
-out:
- cl_env_put(env, &refcheck);
- return rc;
-}
-
-int ll_fid2path(struct inode *inode, void __user *arg)
-{
- struct obd_export *exp = ll_i2mdexp(inode);
- const struct getinfo_fid2path __user *gfin = arg;
- struct getinfo_fid2path *gfout;
- u32 pathlen;
- size_t outsize;
- int rc;
-
- if (!capable(CAP_DAC_READ_SEARCH) &&
- !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
- return -EPERM;
-
- /* Only need to get the buflen */
- if (get_user(pathlen, &gfin->gf_pathlen))
- return -EFAULT;
-
- if (pathlen > PATH_MAX)
- return -EINVAL;
-
- outsize = sizeof(*gfout) + pathlen;
-
- gfout = kzalloc(outsize, GFP_NOFS);
- if (!gfout)
- return -ENOMEM;
-
- if (copy_from_user(gfout, arg, sizeof(*gfout))) {
- rc = -EFAULT;
- goto gf_free;
- }
-
- /* Call mdc_iocontrol */
- rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
- if (rc != 0)
- goto gf_free;
-
- if (copy_to_user(arg, gfout, outsize))
- rc = -EFAULT;
-
-gf_free:
- kfree(gfout);
- return rc;
-}
-
-/*
- * Read the data_version for inode.
- *
- * This value is computed using stripe object version on OST.
- * Version is computed using server side locking.
- *
- * @param flags if do sync on the OST side;
- * 0: no sync
- * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
- * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
- */
-int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
-{
- struct cl_object *obj = ll_i2info(inode)->lli_clob;
- struct lu_env *env;
- struct cl_io *io;
- u16 refcheck;
- int result;
-
- /* If no file object initialized, we consider its version is 0. */
- if (!obj) {
- *data_version = 0;
- return 0;
- }
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- io = vvp_env_thread_io(env);
- io->ci_obj = obj;
- io->u.ci_data_version.dv_data_version = 0;
- io->u.ci_data_version.dv_flags = flags;
-
-restart:
- if (!cl_io_init(env, io, CIT_DATA_VERSION, io->ci_obj))
- result = cl_io_loop(env, io);
- else
- result = io->ci_result;
-
- *data_version = io->u.ci_data_version.dv_data_version;
-
- cl_io_fini(env, io);
-
- if (unlikely(io->ci_need_restart))
- goto restart;
-
- cl_env_put(env, &refcheck);
-
- return result;
-}
-
-/*
- * Trigger a HSM release request for the provided inode.
- */
-int ll_hsm_release(struct inode *inode)
-{
- struct lu_env *env;
- struct obd_client_handle *och = NULL;
- __u64 data_version = 0;
- int rc;
- u16 refcheck;
-
- CDEBUG(D_INODE, "%s: Releasing file " DFID ".\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(&ll_i2info(inode)->lli_fid));
-
- och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
- if (IS_ERR(och)) {
- rc = PTR_ERR(och);
- goto out;
- }
-
- /* Grab latest data_version and [am]time values */
- rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
- if (rc != 0)
- goto out;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env)) {
- rc = PTR_ERR(env);
- goto out;
- }
-
- ll_merge_attr(env, inode);
- cl_env_put(env, &refcheck);
-
- /* Release the file.
- * NB: lease lock handle is released in mdc_hsm_release_pack() because
- * we still need it to pack l_remote_handle to MDT.
- */
- rc = ll_close_inode_openhandle(inode, och, MDS_HSM_RELEASE,
- &data_version);
- och = NULL;
-
-out:
- if (och && !IS_ERR(och)) /* close the file */
- ll_lease_close(och, inode, NULL);
-
- return rc;
-}
-
-struct ll_swap_stack {
- u64 dv1;
- u64 dv2;
- struct inode *inode1;
- struct inode *inode2;
- bool check_dv1;
- bool check_dv2;
-};
-
-static int ll_swap_layouts(struct file *file1, struct file *file2,
- struct lustre_swap_layouts *lsl)
-{
- struct mdc_swap_layouts msl;
- struct md_op_data *op_data;
- __u32 gid;
- __u64 dv;
- struct ll_swap_stack *llss = NULL;
- int rc;
-
- llss = kzalloc(sizeof(*llss), GFP_NOFS);
- if (!llss)
- return -ENOMEM;
-
- llss->inode1 = file_inode(file1);
- llss->inode2 = file_inode(file2);
-
- rc = ll_check_swap_layouts_validity(llss->inode1, llss->inode2);
- if (rc < 0)
- goto free;
-
- /* we use 2 bool because it is easier to swap than 2 bits */
- if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
- llss->check_dv1 = true;
-
- if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
- llss->check_dv2 = true;
-
- /* we cannot use lsl->sl_dvX directly because we may swap them */
- llss->dv1 = lsl->sl_dv1;
- llss->dv2 = lsl->sl_dv2;
-
- rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
- if (!rc) /* same file, done! */
- goto free;
-
- if (rc < 0) { /* sequentialize it */
- swap(llss->inode1, llss->inode2);
- swap(file1, file2);
- swap(llss->dv1, llss->dv2);
- swap(llss->check_dv1, llss->check_dv2);
- }
-
- gid = lsl->sl_gid;
- if (gid != 0) { /* application asks to flush dirty cache */
- rc = ll_get_grouplock(llss->inode1, file1, gid);
- if (rc < 0)
- goto free;
-
- rc = ll_get_grouplock(llss->inode2, file2, gid);
- if (rc < 0) {
- ll_put_grouplock(llss->inode1, file1, gid);
- goto free;
- }
- }
-
- /* ultimate check, before swapping the layouts we check if
- * dataversion has changed (if requested)
- */
- if (llss->check_dv1) {
- rc = ll_data_version(llss->inode1, &dv, 0);
- if (rc)
- goto putgl;
- if (dv != llss->dv1) {
- rc = -EAGAIN;
- goto putgl;
- }
- }
-
- if (llss->check_dv2) {
- rc = ll_data_version(llss->inode2, &dv, 0);
- if (rc)
- goto putgl;
- if (dv != llss->dv2) {
- rc = -EAGAIN;
- goto putgl;
- }
- }
-
- /* struct md_op_data is used to send the swap args to the mdt
- * only flags is missing, so we use struct mdc_swap_layouts
- * through the md_op_data->op_data
- */
- /* flags from user space have to be converted before they are send to
- * server, no flag is sent today, they are only used on the client
- */
- msl.msl_flags = 0;
- rc = -ENOMEM;
- op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
- 0, LUSTRE_OPC_ANY, &msl);
- if (IS_ERR(op_data)) {
- rc = PTR_ERR(op_data);
- goto free;
- }
-
- rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
- sizeof(*op_data), op_data, NULL);
- ll_finish_md_op_data(op_data);
-
-putgl:
- if (gid != 0) {
- ll_put_grouplock(llss->inode2, file2, gid);
- ll_put_grouplock(llss->inode1, file1, gid);
- }
-
-free:
- kfree(llss);
-
- return rc;
-}
-
-int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
-{
- struct md_op_data *op_data;
- int rc;
-
- /* Detect out-of range masks */
- if ((hss->hss_setmask | hss->hss_clearmask) & ~HSM_FLAGS_MASK)
- return -EINVAL;
-
- /* Non-root users are forbidden to set or clear flags which are
- * NOT defined in HSM_USER_MASK.
- */
- if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
- !capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- /* Detect out-of range archive id */
- if ((hss->hss_valid & HSS_ARCHIVE_ID) &&
- (hss->hss_archive_id > LL_HSM_MAX_ARCHIVE))
- return -EINVAL;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, hss);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
- sizeof(*op_data), op_data, NULL);
-
- ll_finish_md_op_data(op_data);
-
- return rc;
-}
-
-static int ll_hsm_import(struct inode *inode, struct file *file,
- struct hsm_user_import *hui)
-{
- struct hsm_state_set *hss = NULL;
- struct iattr *attr = NULL;
- int rc;
-
- if (!S_ISREG(inode->i_mode))
- return -EINVAL;
-
- /* set HSM flags */
- hss = kzalloc(sizeof(*hss), GFP_NOFS);
- if (!hss)
- return -ENOMEM;
-
- hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
- hss->hss_archive_id = hui->hui_archive_id;
- hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
- rc = ll_hsm_state_set(inode, hss);
- if (rc != 0)
- goto free_hss;
-
- attr = kzalloc(sizeof(*attr), GFP_NOFS);
- if (!attr) {
- rc = -ENOMEM;
- goto free_hss;
- }
-
- attr->ia_mode = hui->hui_mode & 0777;
- attr->ia_mode |= S_IFREG;
- attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
- attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
- attr->ia_size = hui->hui_size;
- attr->ia_mtime.tv_sec = hui->hui_mtime;
- attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
- attr->ia_atime.tv_sec = hui->hui_atime;
- attr->ia_atime.tv_nsec = hui->hui_atime_ns;
-
- attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
- ATTR_UID | ATTR_GID |
- ATTR_MTIME | ATTR_MTIME_SET |
- ATTR_ATIME | ATTR_ATIME_SET;
-
- inode_lock(inode);
-
- rc = ll_setattr_raw(file->f_path.dentry, attr, true);
- if (rc == -ENODATA)
- rc = 0;
-
- inode_unlock(inode);
-
- kfree(attr);
-free_hss:
- kfree(hss);
- return rc;
-}
-
-static inline long ll_lease_type_from_fmode(fmode_t fmode)
-{
- return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) |
- ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0);
-}
-
-static long
-ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
- struct inode *inode = file_inode(file);
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- int flags, rc;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p),cmd=%x\n",
- PFID(ll_inode2fid(inode)), inode, cmd);
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
-
- /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
- if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
- return -ENOTTY;
-
- switch (cmd) {
- case LL_IOC_GETFLAGS:
- /* Get the current value of the file flags */
- return put_user(fd->fd_flags, (int __user *)arg);
- case LL_IOC_SETFLAGS:
- case LL_IOC_CLRFLAGS:
- /* Set or clear specific file flags */
- /* XXX This probably needs checks to ensure the flags are
- * not abused, and to handle any flag side effects.
- */
- if (get_user(flags, (int __user *)arg))
- return -EFAULT;
-
- if (cmd == LL_IOC_SETFLAGS) {
- if ((flags & LL_FILE_IGNORE_LOCK) &&
- !(file->f_flags & O_DIRECT)) {
- CERROR("%s: unable to disable locking on non-O_DIRECT file\n",
- current->comm);
- return -EINVAL;
- }
-
- fd->fd_flags |= flags;
- } else {
- fd->fd_flags &= ~flags;
- }
- return 0;
- case LL_IOC_LOV_SETSTRIPE:
- return ll_lov_setstripe(inode, file, arg);
- case LL_IOC_LOV_SETEA:
- return ll_lov_setea(inode, file, arg);
- case LL_IOC_LOV_SWAP_LAYOUTS: {
- struct file *file2;
- struct lustre_swap_layouts lsl;
-
- if (copy_from_user(&lsl, (char __user *)arg,
- sizeof(struct lustre_swap_layouts)))
- return -EFAULT;
-
- if ((file->f_flags & O_ACCMODE) == O_RDONLY)
- return -EPERM;
-
- file2 = fget(lsl.sl_fd);
- if (!file2)
- return -EBADF;
-
- /* O_WRONLY or O_RDWR */
- if ((file2->f_flags & O_ACCMODE) == O_RDONLY) {
- rc = -EPERM;
- goto out;
- }
-
- if (lsl.sl_flags & SWAP_LAYOUTS_CLOSE) {
- struct obd_client_handle *och = NULL;
- struct ll_inode_info *lli;
- struct inode *inode2;
-
- if (lsl.sl_flags != SWAP_LAYOUTS_CLOSE) {
- rc = -EINVAL;
- goto out;
- }
-
- lli = ll_i2info(inode);
- mutex_lock(&lli->lli_och_mutex);
- if (fd->fd_lease_och) {
- och = fd->fd_lease_och;
- fd->fd_lease_och = NULL;
- }
- mutex_unlock(&lli->lli_och_mutex);
- if (!och) {
- rc = -ENOLCK;
- goto out;
- }
- inode2 = file_inode(file2);
- rc = ll_swap_layouts_close(och, inode, inode2);
- } else {
- rc = ll_swap_layouts(file, file2, &lsl);
- }
-out:
- fput(file2);
- return rc;
- }
- case LL_IOC_LOV_GETSTRIPE:
- return ll_file_getstripe(inode,
- (struct lov_user_md __user *)arg);
- case FSFILT_IOC_GETFLAGS:
- case FSFILT_IOC_SETFLAGS:
- return ll_iocontrol(inode, file, cmd, arg);
- case FSFILT_IOC_GETVERSION_OLD:
- case FSFILT_IOC_GETVERSION:
- return put_user(inode->i_generation, (int __user *)arg);
- case LL_IOC_GROUP_LOCK:
- return ll_get_grouplock(inode, file, arg);
- case LL_IOC_GROUP_UNLOCK:
- return ll_put_grouplock(inode, file, arg);
- case IOC_OBD_STATFS:
- return ll_obd_statfs(inode, (void __user *)arg);
-
- /* We need to special case any other ioctls we want to handle,
- * to send them to the MDS/OST as appropriate and to properly
- * network encode the arg field.
- case FSFILT_IOC_SETVERSION_OLD:
- case FSFILT_IOC_SETVERSION:
- */
- case LL_IOC_FLUSHCTX:
- return ll_flush_ctx(inode);
- case LL_IOC_PATH2FID: {
- if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
- sizeof(struct lu_fid)))
- return -EFAULT;
-
- return 0;
- }
- case LL_IOC_GETPARENT:
- return ll_getparent(file, (struct getparent __user *)arg);
- case OBD_IOC_FID2PATH:
- return ll_fid2path(inode, (void __user *)arg);
- case LL_IOC_DATA_VERSION: {
- struct ioc_data_version idv;
- int rc;
-
- if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
- return -EFAULT;
-
- idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
- rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
- if (rc == 0 && copy_to_user((char __user *)arg, &idv,
- sizeof(idv)))
- return -EFAULT;
-
- return rc;
- }
-
- case LL_IOC_GET_MDTIDX: {
- int mdtidx;
-
- mdtidx = ll_get_mdt_idx(inode);
- if (mdtidx < 0)
- return mdtidx;
-
- if (put_user(mdtidx, (int __user *)arg))
- return -EFAULT;
-
- return 0;
- }
- case OBD_IOC_GETDTNAME:
- case OBD_IOC_GETMDNAME:
- return ll_get_obd_name(inode, cmd, arg);
- case LL_IOC_HSM_STATE_GET: {
- struct md_op_data *op_data;
- struct hsm_user_state *hus;
- int rc;
-
- hus = kzalloc(sizeof(*hus), GFP_NOFS);
- if (!hus)
- return -ENOMEM;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, hus);
- if (IS_ERR(op_data)) {
- kfree(hus);
- return PTR_ERR(op_data);
- }
-
- rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
- op_data, NULL);
-
- if (copy_to_user((void __user *)arg, hus, sizeof(*hus)))
- rc = -EFAULT;
-
- ll_finish_md_op_data(op_data);
- kfree(hus);
- return rc;
- }
- case LL_IOC_HSM_STATE_SET: {
- struct hsm_state_set *hss;
- int rc;
-
- hss = memdup_user((char __user *)arg, sizeof(*hss));
- if (IS_ERR(hss))
- return PTR_ERR(hss);
-
- rc = ll_hsm_state_set(inode, hss);
-
- kfree(hss);
- return rc;
- }
- case LL_IOC_HSM_ACTION: {
- struct md_op_data *op_data;
- struct hsm_current_action *hca;
- int rc;
-
- hca = kzalloc(sizeof(*hca), GFP_NOFS);
- if (!hca)
- return -ENOMEM;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, hca);
- if (IS_ERR(op_data)) {
- kfree(hca);
- return PTR_ERR(op_data);
- }
-
- rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
- op_data, NULL);
-
- if (copy_to_user((char __user *)arg, hca, sizeof(*hca)))
- rc = -EFAULT;
-
- ll_finish_md_op_data(op_data);
- kfree(hca);
- return rc;
- }
- case LL_IOC_SET_LEASE: {
- struct ll_inode_info *lli = ll_i2info(inode);
- struct obd_client_handle *och = NULL;
- bool lease_broken;
- fmode_t fmode;
-
- switch (arg) {
- case LL_LEASE_WRLCK:
- if (!(file->f_mode & FMODE_WRITE))
- return -EPERM;
- fmode = FMODE_WRITE;
- break;
- case LL_LEASE_RDLCK:
- if (!(file->f_mode & FMODE_READ))
- return -EPERM;
- fmode = FMODE_READ;
- break;
- case LL_LEASE_UNLCK:
- mutex_lock(&lli->lli_och_mutex);
- if (fd->fd_lease_och) {
- och = fd->fd_lease_och;
- fd->fd_lease_och = NULL;
- }
- mutex_unlock(&lli->lli_och_mutex);
-
- if (!och)
- return -ENOLCK;
-
- fmode = och->och_flags;
- rc = ll_lease_close(och, inode, &lease_broken);
- if (rc < 0)
- return rc;
-
- if (lease_broken)
- fmode = 0;
-
- return ll_lease_type_from_fmode(fmode);
- default:
- return -EINVAL;
- }
-
- CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
-
- /* apply for lease */
- och = ll_lease_open(inode, file, fmode, 0);
- if (IS_ERR(och))
- return PTR_ERR(och);
-
- rc = 0;
- mutex_lock(&lli->lli_och_mutex);
- if (!fd->fd_lease_och) {
- fd->fd_lease_och = och;
- och = NULL;
- }
- mutex_unlock(&lli->lli_och_mutex);
- if (och) {
- /* impossible now that only excl is supported for now */
- ll_lease_close(och, inode, &lease_broken);
- rc = -EBUSY;
- }
- return rc;
- }
- case LL_IOC_GET_LEASE: {
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ldlm_lock *lock = NULL;
- fmode_t fmode = 0;
-
- mutex_lock(&lli->lli_och_mutex);
- if (fd->fd_lease_och) {
- struct obd_client_handle *och = fd->fd_lease_och;
-
- lock = ldlm_handle2lock(&och->och_lease_handle);
- if (lock) {
- lock_res_and_lock(lock);
- if (!ldlm_is_cancel(lock))
- fmode = och->och_flags;
- unlock_res_and_lock(lock);
- LDLM_LOCK_PUT(lock);
- }
- }
- mutex_unlock(&lli->lli_och_mutex);
- return ll_lease_type_from_fmode(fmode);
- }
- case LL_IOC_HSM_IMPORT: {
- struct hsm_user_import *hui;
-
- hui = memdup_user((void __user *)arg, sizeof(*hui));
- if (IS_ERR(hui))
- return PTR_ERR(hui);
-
- rc = ll_hsm_import(inode, file, hui);
-
- kfree(hui);
- return rc;
- }
- default: {
- int err;
-
- if (ll_iocontrol_call(inode, file, cmd, arg, &err) ==
- LLIOC_STOP)
- return err;
-
- return obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
- (void __user *)arg);
- }
- }
-}
-
-static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
-{
- struct inode *inode = file_inode(file);
- loff_t retval, eof = 0;
-
- retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
- (origin == SEEK_CUR) ? file->f_pos : 0);
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), to=%llu=%#llx(%d)\n",
- PFID(ll_inode2fid(inode)), inode, retval, retval, origin);
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
-
- if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
- retval = ll_glimpse_size(inode);
- if (retval != 0)
- return retval;
- eof = i_size_read(inode);
- }
-
- return generic_file_llseek_size(file, offset, origin,
- ll_file_maxbytes(inode), eof);
-}
-
-static int ll_flush(struct file *file, fl_owner_t id)
-{
- struct inode *inode = file_inode(file);
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- int rc, err;
-
- LASSERT(!S_ISDIR(inode->i_mode));
-
- /* catch async errors that were recorded back when async writeback
- * failed for pages in this mapping.
- */
- rc = lli->lli_async_rc;
- lli->lli_async_rc = 0;
- if (lli->lli_clob) {
- err = lov_read_and_clear_async_rc(lli->lli_clob);
- if (!rc)
- rc = err;
- }
-
- /* The application has been told about write failure already.
- * Do not report failure again.
- */
- if (fd->fd_write_failed)
- return 0;
- return rc ? -EIO : 0;
-}
-
-/**
- * Called to make sure a portion of file has been written out.
- * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
- *
- * Return how many pages have been written.
- */
-int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
- enum cl_fsync_mode mode, int ignore_layout)
-{
- struct lu_env *env;
- struct cl_io *io;
- struct cl_fsync_io *fio;
- int result;
- u16 refcheck;
-
- if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
- mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
- return -EINVAL;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- io = vvp_env_thread_io(env);
- io->ci_obj = ll_i2info(inode)->lli_clob;
- io->ci_ignore_layout = ignore_layout;
-
- /* initialize parameters for sync */
- fio = &io->u.ci_fsync;
- fio->fi_start = start;
- fio->fi_end = end;
- fio->fi_fid = ll_inode2fid(inode);
- fio->fi_mode = mode;
- fio->fi_nr_written = 0;
-
- if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
- result = cl_io_loop(env, io);
- else
- result = io->ci_result;
- if (result == 0)
- result = fio->fi_nr_written;
- cl_io_fini(env, io);
- cl_env_put(env, &refcheck);
-
- return result;
-}
-
-int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
-{
- struct inode *inode = file_inode(file);
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ptlrpc_request *req;
- int rc, err;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n",
- PFID(ll_inode2fid(inode)), inode);
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
-
- rc = file_write_and_wait_range(file, start, end);
- inode_lock(inode);
-
- /* catch async errors that were recorded back when async writeback
- * failed for pages in this mapping.
- */
- if (!S_ISDIR(inode->i_mode)) {
- err = lli->lli_async_rc;
- lli->lli_async_rc = 0;
- if (rc == 0)
- rc = err;
- if (lli->lli_clob) {
- err = lov_read_and_clear_async_rc(lli->lli_clob);
- if (rc == 0)
- rc = err;
- }
- }
-
- err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), &req);
- if (!rc)
- rc = err;
- if (!err)
- ptlrpc_req_finished(req);
-
- if (S_ISREG(inode->i_mode)) {
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-
- err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
- if (rc == 0 && err < 0)
- rc = err;
- if (rc < 0)
- fd->fd_write_failed = true;
- else
- fd->fd_write_failed = false;
- }
-
- inode_unlock(inode);
- return rc;
-}
-
-static int
-ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
-{
- struct inode *inode = file_inode(file);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ldlm_enqueue_info einfo = {
- .ei_type = LDLM_FLOCK,
- .ei_cb_cp = ldlm_flock_completion_ast,
- .ei_cbdata = file_lock,
- };
- struct md_op_data *op_data;
- struct lustre_handle lockh = {0};
- union ldlm_policy_data flock = { { 0 } };
- int fl_type = file_lock->fl_type;
- __u64 flags = 0;
- int rc;
- int rc2 = 0;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID " file_lock=%p\n",
- PFID(ll_inode2fid(inode)), file_lock);
-
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
-
- if (file_lock->fl_flags & FL_FLOCK)
- LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
- else if (!(file_lock->fl_flags & FL_POSIX))
- return -EINVAL;
-
- flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
- flock.l_flock.pid = file_lock->fl_pid;
- flock.l_flock.start = file_lock->fl_start;
- flock.l_flock.end = file_lock->fl_end;
-
- /* Somewhat ugly workaround for svc lockd.
- * lockd installs custom fl_lmops->lm_compare_owner that checks
- * for the fl_owner to be the same (which it always is on local node
- * I guess between lockd processes) and then compares pid.
- * As such we assign pid to the owner field to make it all work,
- * conflict with normal locks is unlikely since pid space and
- * pointer space for current->files are not intersecting
- */
- if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
- flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
-
- switch (fl_type) {
- case F_RDLCK:
- einfo.ei_mode = LCK_PR;
- break;
- case F_UNLCK:
- /* An unlock request may or may not have any relation to
- * existing locks so we may not be able to pass a lock handle
- * via a normal ldlm_lock_cancel() request. The request may even
- * unlock a byte range in the middle of an existing lock. In
- * order to process an unlock request we need all of the same
- * information that is given with a normal read or write record
- * lock request. To avoid creating another ldlm unlock (cancel)
- * message we'll treat a LCK_NL flock request as an unlock.
- */
- einfo.ei_mode = LCK_NL;
- break;
- case F_WRLCK:
- einfo.ei_mode = LCK_PW;
- break;
- default:
- CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
- return -ENOTSUPP;
- }
-
- switch (cmd) {
- case F_SETLKW:
-#ifdef F_SETLKW64
- case F_SETLKW64:
-#endif
- flags = 0;
- break;
- case F_SETLK:
-#ifdef F_SETLK64
- case F_SETLK64:
-#endif
- flags = LDLM_FL_BLOCK_NOWAIT;
- break;
- case F_GETLK:
-#ifdef F_GETLK64
- case F_GETLK64:
-#endif
- flags = LDLM_FL_TEST_LOCK;
- break;
- default:
- CERROR("unknown fcntl lock command: %d\n", cmd);
- return -EINVAL;
- }
-
- /*
- * Save the old mode so that if the mode in the lock changes we
- * can decrement the appropriate reader or writer refcount.
- */
- file_lock->fl_type = einfo.ei_mode;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- CDEBUG(D_DLMTRACE, "inode=" DFID ", pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n",
- PFID(ll_inode2fid(inode)), flock.l_flock.pid, flags,
- einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
-
- rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, op_data, &lockh,
- flags);
-
- /* Restore the file lock type if not TEST lock. */
- if (!(flags & LDLM_FL_TEST_LOCK))
- file_lock->fl_type = fl_type;
-
- if ((rc == 0 || file_lock->fl_type == F_UNLCK) &&
- !(flags & LDLM_FL_TEST_LOCK))
- rc2 = locks_lock_file_wait(file, file_lock);
-
- if (rc2 && file_lock->fl_type != F_UNLCK) {
- einfo.ei_mode = LCK_NL;
- md_enqueue(sbi->ll_md_exp, &einfo, &flock, op_data,
- &lockh, flags);
- rc = rc2;
- }
-
- ll_finish_md_op_data(op_data);
-
- return rc;
-}
-
-int ll_get_fid_by_name(struct inode *parent, const char *name,
- int namelen, struct lu_fid *fid,
- struct inode **inode)
-{
- struct md_op_data *op_data = NULL;
- struct ptlrpc_request *req;
- struct mdt_body *body;
- int rc;
-
- op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- op_data->op_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
- rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
- ll_finish_md_op_data(op_data);
- if (rc < 0)
- return rc;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (!body) {
- rc = -EFAULT;
- goto out_req;
- }
- if (fid)
- *fid = body->mbo_fid1;
-
- if (inode)
- rc = ll_prep_inode(inode, req, parent->i_sb, NULL);
-out_req:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
- const char *name, int namelen)
-{
- struct ptlrpc_request *request = NULL;
- struct obd_client_handle *och = NULL;
- struct inode *child_inode = NULL;
- struct dentry *dchild = NULL;
- struct md_op_data *op_data;
- struct mdt_body *body;
- u64 data_version = 0;
- struct qstr qstr;
- int rc;
-
- CDEBUG(D_VFSTRACE, "migrate %s under " DFID " to MDT%d\n",
- name, PFID(ll_inode2fid(parent)), mdtidx);
-
- op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
- 0, LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- /* Get child FID first */
- qstr.hash = full_name_hash(parent, name, namelen);
- qstr.name = name;
- qstr.len = namelen;
- dchild = d_lookup(file_dentry(file), &qstr);
- if (dchild) {
- op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
- if (dchild->d_inode)
- child_inode = igrab(dchild->d_inode);
- dput(dchild);
- }
-
- if (!child_inode) {
- rc = ll_get_fid_by_name(parent, name, namelen,
- &op_data->op_fid3, &child_inode);
- if (rc)
- goto out_free;
- }
-
- if (!child_inode) {
- rc = -EINVAL;
- goto out_free;
- }
-
- inode_lock(child_inode);
- op_data->op_fid3 = *ll_inode2fid(child_inode);
- if (!fid_is_sane(&op_data->op_fid3)) {
- CERROR("%s: migrate %s, but fid " DFID " is insane\n",
- ll_get_fsname(parent->i_sb, NULL, 0), name,
- PFID(&op_data->op_fid3));
- rc = -EINVAL;
- goto out_unlock;
- }
-
- rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
- if (rc < 0)
- goto out_unlock;
-
- if (rc == mdtidx) {
- CDEBUG(D_INFO, "%s: " DFID " is already on MDT%d.\n", name,
- PFID(&op_data->op_fid3), mdtidx);
- rc = 0;
- goto out_unlock;
- }
-again:
- if (S_ISREG(child_inode->i_mode)) {
- och = ll_lease_open(child_inode, NULL, FMODE_WRITE, 0);
- if (IS_ERR(och)) {
- rc = PTR_ERR(och);
- och = NULL;
- goto out_unlock;
- }
-
- rc = ll_data_version(child_inode, &data_version,
- LL_DV_WR_FLUSH);
- if (rc)
- goto out_close;
-
- op_data->op_handle = och->och_fh;
- op_data->op_data = och->och_mod;
- op_data->op_data_version = data_version;
- op_data->op_lease_handle = och->och_lease_handle;
- op_data->op_bias |= MDS_RENAME_MIGRATE;
- }
-
- op_data->op_mds = mdtidx;
- op_data->op_cli_flags = CLI_MIGRATE;
- rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
- namelen, name, namelen, &request);
- if (!rc) {
- LASSERT(request);
- ll_update_times(request, parent);
-
- body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
- LASSERT(body);
-
- /*
- * If the server does release layout lock, then we cleanup
- * the client och here, otherwise release it in out_close:
- */
- if (och && body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED) {
- obd_mod_put(och->och_mod);
- md_clear_open_replay_data(ll_i2sbi(parent)->ll_md_exp,
- och);
- och->och_fh.cookie = DEAD_HANDLE_MAGIC;
- kfree(och);
- och = NULL;
- }
- }
-
- if (request) {
- ptlrpc_req_finished(request);
- request = NULL;
- }
-
- /* Try again if the file layout has changed. */
- if (rc == -EAGAIN && S_ISREG(child_inode->i_mode))
- goto again;
-
-out_close:
- if (och) /* close the file */
- ll_lease_close(och, child_inode, NULL);
- if (!rc)
- clear_nlink(child_inode);
-out_unlock:
- inode_unlock(child_inode);
- iput(child_inode);
-out_free:
- ll_finish_md_op_data(op_data);
- return rc;
-}
-
-static int
-ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
-{
- return -ENOSYS;
-}
-
-/**
- * test if some locks matching bits and l_req_mode are acquired
- * - bits can be in different locks
- * - if found clear the common lock bits in *bits
- * - the bits not found, are kept in *bits
- * \param inode [IN]
- * \param bits [IN] searched lock bits [IN]
- * \param l_req_mode [IN] searched lock mode
- * \retval boolean, true iff all bits are found
- */
-int ll_have_md_lock(struct inode *inode, __u64 *bits,
- enum ldlm_mode l_req_mode)
-{
- struct lustre_handle lockh;
- union ldlm_policy_data policy;
- enum ldlm_mode mode = (l_req_mode == LCK_MINMODE) ?
- (LCK_CR | LCK_CW | LCK_PR | LCK_PW) : l_req_mode;
- struct lu_fid *fid;
- __u64 flags;
- int i;
-
- if (!inode)
- return 0;
-
- fid = &ll_i2info(inode)->lli_fid;
- CDEBUG(D_INFO, "trying to match res " DFID " mode %s\n", PFID(fid),
- ldlm_lockname[mode]);
-
- flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
- for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
- policy.l_inodebits.bits = *bits & (1 << i);
- if (policy.l_inodebits.bits == 0)
- continue;
-
- if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
- &policy, mode, &lockh)) {
- struct ldlm_lock *lock;
-
- lock = ldlm_handle2lock(&lockh);
- if (lock) {
- *bits &=
- ~(lock->l_policy_data.l_inodebits.bits);
- LDLM_LOCK_PUT(lock);
- } else {
- *bits &= ~policy.l_inodebits.bits;
- }
- }
- }
- return *bits == 0;
-}
-
-enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits,
- struct lustre_handle *lockh, __u64 flags,
- enum ldlm_mode mode)
-{
- union ldlm_policy_data policy = { .l_inodebits = { bits } };
- struct lu_fid *fid;
-
- fid = &ll_i2info(inode)->lli_fid;
- CDEBUG(D_INFO, "trying to match res " DFID "\n", PFID(fid));
-
- return md_lock_match(ll_i2mdexp(inode), flags | LDLM_FL_BLOCK_GRANTED,
- fid, LDLM_IBITS, &policy, mode, lockh);
-}
-
-static int ll_inode_revalidate_fini(struct inode *inode, int rc)
-{
- /* Already unlinked. Just update nlink and return success */
- if (rc == -ENOENT) {
- clear_nlink(inode);
- /* If it is striped directory, and there is bad stripe
- * Let's revalidate the dentry again, instead of returning
- * error
- */
- if (S_ISDIR(inode->i_mode) && ll_i2info(inode)->lli_lsm_md)
- return 0;
-
- /* This path cannot be hit for regular files unless in
- * case of obscure races, so no need to validate size.
- */
- if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
- return 0;
- } else if (rc != 0) {
- CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
- "%s: revalidate FID " DFID " error: rc = %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(ll_inode2fid(inode)), rc);
- }
-
- return rc;
-}
-
-static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
-{
- struct inode *inode = d_inode(dentry);
- struct ptlrpc_request *req = NULL;
- struct obd_export *exp;
- int rc = 0;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p),name=%pd\n",
- PFID(ll_inode2fid(inode)), inode, dentry);
-
- exp = ll_i2mdexp(inode);
-
- /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
- * But under CMD case, it caused some lock issues, should be fixed
- * with new CMD ibits lock. See bug 12718
- */
- if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
- struct lookup_intent oit = { .it_op = IT_GETATTR };
- struct md_op_data *op_data;
-
- if (ibits == MDS_INODELOCK_LOOKUP)
- oit.it_op = IT_LOOKUP;
-
- /* Call getattr by fid, so do not provide name at all. */
- op_data = ll_prep_md_op_data(NULL, inode,
- inode, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- rc = md_intent_lock(exp, op_data, &oit, &req,
- &ll_md_blocking_ast, 0);
- ll_finish_md_op_data(op_data);
- if (rc < 0) {
- rc = ll_inode_revalidate_fini(inode, rc);
- goto out;
- }
-
- rc = ll_revalidate_it_finish(req, &oit, inode);
- if (rc != 0) {
- ll_intent_release(&oit);
- goto out;
- }
-
- /* Unlinked? Unhash dentry, so it is not picked up later by
- * do_lookup() -> ll_revalidate_it(). We cannot use d_drop
- * here to preserve get_cwd functionality on 2.6.
- * Bug 10503
- */
- if (!d_inode(dentry)->i_nlink) {
- spin_lock(&inode->i_lock);
- d_lustre_invalidate(dentry, 0);
- spin_unlock(&inode->i_lock);
- }
-
- ll_lookup_finish_locks(&oit, inode);
- } else if (!ll_have_md_lock(d_inode(dentry), &ibits, LCK_MINMODE)) {
- struct ll_sb_info *sbi = ll_i2sbi(d_inode(dentry));
- u64 valid = OBD_MD_FLGETATTR;
- struct md_op_data *op_data;
- int ealen = 0;
-
- if (S_ISREG(inode->i_mode)) {
- rc = ll_get_default_mdsize(sbi, &ealen);
- if (rc)
- return rc;
- valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
- }
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
- 0, ealen, LUSTRE_OPC_ANY,
- NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- op_data->op_valid = valid;
- rc = md_getattr(sbi->ll_md_exp, op_data, &req);
- ll_finish_md_op_data(op_data);
- if (rc)
- return ll_inode_revalidate_fini(inode, rc);
-
- rc = ll_prep_inode(&inode, req, NULL, NULL);
- }
-out:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-static int ll_merge_md_attr(struct inode *inode)
-{
- struct cl_attr attr = { 0 };
- int rc;
-
- LASSERT(ll_i2info(inode)->lli_lsm_md);
- rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
- &attr, ll_md_blocking_ast);
- if (rc)
- return rc;
-
- set_nlink(inode, attr.cat_nlink);
- inode->i_blocks = attr.cat_blocks;
- i_size_write(inode, attr.cat_size);
-
- ll_i2info(inode)->lli_atime = attr.cat_atime;
- ll_i2info(inode)->lli_mtime = attr.cat_mtime;
- ll_i2info(inode)->lli_ctime = attr.cat_ctime;
-
- return 0;
-}
-
-static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
-{
- struct inode *inode = d_inode(dentry);
- int rc;
-
- rc = __ll_inode_revalidate(dentry, ibits);
- if (rc != 0)
- return rc;
-
- /* if object isn't regular file, don't validate size */
- if (!S_ISREG(inode->i_mode)) {
- if (S_ISDIR(inode->i_mode) &&
- ll_i2info(inode)->lli_lsm_md) {
- rc = ll_merge_md_attr(inode);
- if (rc)
- return rc;
- }
-
- LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
- LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
- LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
- } else {
- struct ll_inode_info *lli = ll_i2info(inode);
-
- /* In case of restore, the MDT has the right size and has
- * already send it back without granting the layout lock,
- * inode is up-to-date so glimpse is useless.
- * Also to glimpse we need the layout, in case of a running
- * restore the MDT holds the layout lock so the glimpse will
- * block up to the end of restore (getattr will block)
- */
- if (!test_bit(LLIF_FILE_RESTORING, &lli->lli_flags))
- rc = ll_glimpse_size(inode);
- }
- return rc;
-}
-
-int ll_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags)
-{
- struct inode *inode = d_inode(path->dentry);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ll_inode_info *lli = ll_i2info(inode);
- int res;
-
- res = ll_inode_revalidate(path->dentry,
- MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP);
- ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
-
- if (res)
- return res;
-
- OBD_FAIL_TIMEOUT(OBD_FAIL_GETATTR_DELAY, 30);
-
- stat->dev = inode->i_sb->s_dev;
- if (ll_need_32bit_api(sbi))
- stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
- else
- stat->ino = inode->i_ino;
- stat->mode = inode->i_mode;
- stat->uid = inode->i_uid;
- stat->gid = inode->i_gid;
- stat->rdev = inode->i_rdev;
- stat->atime = inode->i_atime;
- stat->mtime = inode->i_mtime;
- stat->ctime = inode->i_ctime;
- stat->blksize = 1 << inode->i_blkbits;
-
- stat->nlink = inode->i_nlink;
- stat->size = i_size_read(inode);
- stat->blocks = inode->i_blocks;
-
- return 0;
-}
-
-static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
- __u64 start, __u64 len)
-{
- int rc;
- size_t num_bytes;
- struct fiemap *fiemap;
- unsigned int extent_count = fieinfo->fi_extents_max;
-
- num_bytes = sizeof(*fiemap) + (extent_count *
- sizeof(struct fiemap_extent));
- fiemap = kvzalloc(num_bytes, GFP_KERNEL);
- if (!fiemap)
- return -ENOMEM;
-
- fiemap->fm_flags = fieinfo->fi_flags;
- fiemap->fm_extent_count = fieinfo->fi_extents_max;
- fiemap->fm_start = start;
- fiemap->fm_length = len;
-
- if (extent_count > 0 &&
- copy_from_user(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
- sizeof(struct fiemap_extent))) {
- rc = -EFAULT;
- goto out;
- }
-
- rc = ll_do_fiemap(inode, fiemap, num_bytes);
-
- fieinfo->fi_flags = fiemap->fm_flags;
- fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
- if (extent_count > 0 &&
- copy_to_user(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
- fiemap->fm_mapped_extents *
- sizeof(struct fiemap_extent))) {
- rc = -EFAULT;
- goto out;
- }
-out:
- kvfree(fiemap);
- return rc;
-}
-
-int ll_inode_permission(struct inode *inode, int mask)
-{
- struct ll_sb_info *sbi;
- struct root_squash_info *squash;
- const struct cred *old_cred = NULL;
- struct cred *cred = NULL;
- bool squash_id = false;
- int rc = 0;
-
- if (mask & MAY_NOT_BLOCK)
- return -ECHILD;
-
- /* as root inode are NOT getting validated in lookup operation,
- * need to do it before permission check.
- */
-
- if (is_root_inode(inode)) {
- rc = __ll_inode_revalidate(inode->i_sb->s_root,
- MDS_INODELOCK_LOOKUP);
- if (rc)
- return rc;
- }
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), inode mode %x mask %o\n",
- PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
-
- /* squash fsuid/fsgid if needed */
- sbi = ll_i2sbi(inode);
- squash = &sbi->ll_squash;
- if (unlikely(squash->rsi_uid &&
- uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
- !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
- squash_id = true;
- }
-
- if (squash_id) {
- CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
- __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
- squash->rsi_uid, squash->rsi_gid);
-
- /*
- * update current process's credentials
- * and FS capability
- */
- cred = prepare_creds();
- if (!cred)
- return -ENOMEM;
-
- cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
- cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
- cred->cap_effective = cap_drop_nfsd_set(cred->cap_effective);
- cred->cap_effective = cap_drop_fs_set(cred->cap_effective);
-
- old_cred = override_creds(cred);
- }
-
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
- rc = generic_permission(inode, mask);
-
- /* restore current process's credentials and FS capability */
- if (squash_id) {
- revert_creds(old_cred);
- put_cred(cred);
- }
-
- return rc;
-}
-
-/* -o localflock - only provides locally consistent flock locks */
-const struct file_operations ll_file_operations = {
- .read_iter = ll_file_read_iter,
- .write_iter = ll_file_write_iter,
- .unlocked_ioctl = ll_file_ioctl,
- .open = ll_file_open,
- .release = ll_file_release,
- .mmap = ll_file_mmap,
- .llseek = ll_file_seek,
- .splice_read = generic_file_splice_read,
- .fsync = ll_fsync,
- .flush = ll_flush
-};
-
-const struct file_operations ll_file_operations_flock = {
- .read_iter = ll_file_read_iter,
- .write_iter = ll_file_write_iter,
- .unlocked_ioctl = ll_file_ioctl,
- .open = ll_file_open,
- .release = ll_file_release,
- .mmap = ll_file_mmap,
- .llseek = ll_file_seek,
- .splice_read = generic_file_splice_read,
- .fsync = ll_fsync,
- .flush = ll_flush,
- .flock = ll_file_flock,
- .lock = ll_file_flock
-};
-
-/* These are for -o noflock - to return ENOSYS on flock calls */
-const struct file_operations ll_file_operations_noflock = {
- .read_iter = ll_file_read_iter,
- .write_iter = ll_file_write_iter,
- .unlocked_ioctl = ll_file_ioctl,
- .open = ll_file_open,
- .release = ll_file_release,
- .mmap = ll_file_mmap,
- .llseek = ll_file_seek,
- .splice_read = generic_file_splice_read,
- .fsync = ll_fsync,
- .flush = ll_flush,
- .flock = ll_file_noflock,
- .lock = ll_file_noflock
-};
-
-const struct inode_operations ll_file_inode_operations = {
- .setattr = ll_setattr,
- .getattr = ll_getattr,
- .permission = ll_inode_permission,
- .listxattr = ll_listxattr,
- .fiemap = ll_fiemap,
- .get_acl = ll_get_acl,
-};
-
-/* dynamic ioctl number support routines */
-static struct llioc_ctl_data {
- struct rw_semaphore ioc_sem;
- struct list_head ioc_head;
-} llioc = {
- __RWSEM_INITIALIZER(llioc.ioc_sem),
- LIST_HEAD_INIT(llioc.ioc_head)
-};
-
-struct llioc_data {
- struct list_head iocd_list;
- unsigned int iocd_size;
- llioc_callback_t iocd_cb;
- unsigned int iocd_count;
- unsigned int iocd_cmd[0];
-};
-
-void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
-{
- unsigned int size;
- struct llioc_data *in_data = NULL;
-
- if (!cb || !cmd || count > LLIOC_MAX_CMD || count < 0)
- return NULL;
-
- size = sizeof(*in_data) + count * sizeof(unsigned int);
- in_data = kzalloc(size, GFP_NOFS);
- if (!in_data)
- return NULL;
-
- in_data->iocd_size = size;
- in_data->iocd_cb = cb;
- in_data->iocd_count = count;
- memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
-
- down_write(&llioc.ioc_sem);
- list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
- up_write(&llioc.ioc_sem);
-
- return in_data;
-}
-EXPORT_SYMBOL(ll_iocontrol_register);
-
-void ll_iocontrol_unregister(void *magic)
-{
- struct llioc_data *tmp;
-
- if (!magic)
- return;
-
- down_write(&llioc.ioc_sem);
- list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
- if (tmp == magic) {
- list_del(&tmp->iocd_list);
- up_write(&llioc.ioc_sem);
-
- kfree(tmp);
- return;
- }
- }
- up_write(&llioc.ioc_sem);
-
- CWARN("didn't find iocontrol register block with magic: %p\n", magic);
-}
-EXPORT_SYMBOL(ll_iocontrol_unregister);
-
-static enum llioc_iter
-ll_iocontrol_call(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg, int *rcp)
-{
- enum llioc_iter ret = LLIOC_CONT;
- struct llioc_data *data;
- int rc = -EINVAL, i;
-
- down_read(&llioc.ioc_sem);
- list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
- for (i = 0; i < data->iocd_count; i++) {
- if (cmd != data->iocd_cmd[i])
- continue;
-
- ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
- break;
- }
-
- if (ret == LLIOC_STOP)
- break;
- }
- up_read(&llioc.ioc_sem);
-
- if (rcp)
- *rcp = rc;
- return ret;
-}
-
-int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct cl_object *obj = lli->lli_clob;
- struct lu_env *env;
- int rc;
- u16 refcheck;
-
- if (!obj)
- return 0;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- rc = cl_conf_set(env, obj, conf);
- if (rc < 0)
- goto out;
-
- if (conf->coc_opc == OBJECT_CONF_SET) {
- struct ldlm_lock *lock = conf->coc_lock;
- struct cl_layout cl = {
- .cl_layout_gen = 0,
- };
-
- LASSERT(lock);
- LASSERT(ldlm_has_layout(lock));
-
- /* it can only be allowed to match after layout is
- * applied to inode otherwise false layout would be
- * seen. Applying layout should happen before dropping
- * the intent lock.
- */
- ldlm_lock_allow_match(lock);
-
- rc = cl_object_layout_get(env, obj, &cl);
- if (rc < 0)
- goto out;
-
- CDEBUG(D_VFSTRACE, DFID ": layout version change: %u -> %u\n",
- PFID(&lli->lli_fid), ll_layout_version_get(lli),
- cl.cl_layout_gen);
- ll_layout_version_set(lli, cl.cl_layout_gen);
- }
-out:
- cl_env_put(env, &refcheck);
- return rc;
-}
-
-/* Fetch layout from MDT with getxattr request, if it's not ready yet */
-static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
-
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *req;
- struct mdt_body *body;
- void *lvbdata;
- void *lmm;
- int lmmsize;
- int rc;
-
- CDEBUG(D_INODE, DFID " LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
- PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
- lock->l_lvb_data, lock->l_lvb_len);
-
- if (lock->l_lvb_data && ldlm_is_lvb_ready(lock))
- return 0;
-
- /* if layout lock was granted right away, the layout is returned
- * within DLM_LVB of dlm reply; otherwise if the lock was ever
- * blocked and then granted via completion ast, we have to fetch
- * layout here. Please note that we can't use the LVB buffer in
- * completion AST because it doesn't have a large enough buffer
- */
- rc = ll_get_default_mdsize(sbi, &lmmsize);
- if (rc == 0)
- rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode),
- OBD_MD_FLXATTR, XATTR_NAME_LOV, lmmsize, &req);
- if (rc < 0)
- return rc;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (!body) {
- rc = -EPROTO;
- goto out;
- }
-
- lmmsize = body->mbo_eadatasize;
- if (lmmsize == 0) /* empty layout */ {
- rc = 0;
- goto out;
- }
-
- lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
- if (!lmm) {
- rc = -EFAULT;
- goto out;
- }
-
- lvbdata = kvzalloc(lmmsize, GFP_NOFS);
- if (!lvbdata) {
- rc = -ENOMEM;
- goto out;
- }
-
- memcpy(lvbdata, lmm, lmmsize);
- lock_res_and_lock(lock);
- if (lock->l_lvb_data)
- kvfree(lock->l_lvb_data);
-
- lock->l_lvb_data = lvbdata;
- lock->l_lvb_len = lmmsize;
- unlock_res_and_lock(lock);
-
-out:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-/**
- * Apply the layout to the inode. Layout lock is held and will be released
- * in this function.
- */
-static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
- struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ldlm_lock *lock;
- struct cl_object_conf conf;
- int rc = 0;
- bool lvb_ready;
- bool wait_layout = false;
-
- LASSERT(lustre_handle_is_used(lockh));
-
- lock = ldlm_handle2lock(lockh);
- LASSERT(lock);
- LASSERT(ldlm_has_layout(lock));
-
- LDLM_DEBUG(lock, "File " DFID "(%p) being reconfigured",
- PFID(&lli->lli_fid), inode);
-
- /* in case this is a caching lock and reinstate with new inode */
- md_set_lock_data(sbi->ll_md_exp, lockh, inode, NULL);
-
- lock_res_and_lock(lock);
- lvb_ready = ldlm_is_lvb_ready(lock);
- unlock_res_and_lock(lock);
- /* checking lvb_ready is racy but this is okay. The worst case is
- * that multi processes may configure the file on the same time.
- */
- if (lvb_ready) {
- rc = 0;
- goto out;
- }
-
- rc = ll_layout_fetch(inode, lock);
- if (rc < 0)
- goto out;
-
- /* for layout lock, lmm is returned in lock's lvb.
- * lvb_data is immutable if the lock is held so it's safe to access it
- * without res lock.
- *
- * set layout to file. Unlikely this will fail as old layout was
- * surely eliminated
- */
- memset(&conf, 0, sizeof(conf));
- conf.coc_opc = OBJECT_CONF_SET;
- conf.coc_inode = inode;
- conf.coc_lock = lock;
- conf.u.coc_layout.lb_buf = lock->l_lvb_data;
- conf.u.coc_layout.lb_len = lock->l_lvb_len;
- rc = ll_layout_conf(inode, &conf);
-
- /* refresh layout failed, need to wait */
- wait_layout = rc == -EBUSY;
-
-out:
- LDLM_LOCK_PUT(lock);
- ldlm_lock_decref(lockh, mode);
-
- /* wait for IO to complete if it's still being used. */
- if (wait_layout) {
- CDEBUG(D_INODE, "%s: " DFID "(%p) wait for layout reconf\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(&lli->lli_fid), inode);
-
- memset(&conf, 0, sizeof(conf));
- conf.coc_opc = OBJECT_CONF_WAIT;
- conf.coc_inode = inode;
- rc = ll_layout_conf(inode, &conf);
- if (rc == 0)
- rc = -EAGAIN;
-
- CDEBUG(D_INODE,
- "%s: file=" DFID " waiting layout return: %d.\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(&lli->lli_fid), rc);
- }
- return rc;
-}
-
-static int ll_layout_refresh_locked(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct md_op_data *op_data;
- struct lookup_intent it;
- struct lustre_handle lockh;
- enum ldlm_mode mode;
- struct ptlrpc_request *req;
- int rc;
-
-again:
- /* mostly layout lock is caching on the local side, so try to match
- * it before grabbing layout lock mutex.
- */
- mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
- LCK_CR | LCK_CW | LCK_PR | LCK_PW);
- if (mode != 0) { /* hit cached lock */
- rc = ll_layout_lock_set(&lockh, mode, inode);
- if (rc == -EAGAIN)
- goto again;
- return rc;
- }
-
- op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
- 0, 0, LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- /* have to enqueue one */
- memset(&it, 0, sizeof(it));
- it.it_op = IT_LAYOUT;
-
- LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file " DFID "(%p)",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(&lli->lli_fid), inode);
-
- rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
- &ll_md_blocking_ast, 0);
- ptlrpc_req_finished(it.it_request);
- it.it_request = NULL;
-
- ll_finish_md_op_data(op_data);
-
- mode = it.it_lock_mode;
- it.it_lock_mode = 0;
- ll_intent_drop_lock(&it);
-
- if (rc == 0) {
- /* set lock data in case this is a new lock */
- ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
- lockh.cookie = it.it_lock_handle;
- rc = ll_layout_lock_set(&lockh, mode, inode);
- if (rc == -EAGAIN)
- goto again;
- }
-
- return rc;
-}
-
-/**
- * This function checks if there exists a LAYOUT lock on the client side,
- * or enqueues it if it doesn't have one in cache.
- *
- * This function will not hold layout lock so it may be revoked any time after
- * this function returns. Any operations depend on layout should be redone
- * in that case.
- *
- * This function should be called before lov_io_init() to get an uptodate
- * layout version, the caller should save the version number and after IO
- * is finished, this function should be called again to verify that layout
- * is not changed during IO time.
- */
-int ll_layout_refresh(struct inode *inode, __u32 *gen)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- int rc;
-
- *gen = ll_layout_version_get(lli);
- if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != CL_LAYOUT_GEN_NONE)
- return 0;
-
- /* sanity checks */
- LASSERT(fid_is_sane(ll_inode2fid(inode)));
- LASSERT(S_ISREG(inode->i_mode));
-
- /* take layout lock mutex to enqueue layout lock exclusively. */
- mutex_lock(&lli->lli_layout_mutex);
-
- rc = ll_layout_refresh_locked(inode);
- if (rc < 0)
- goto out;
-
- *gen = ll_layout_version_get(lli);
-out:
- mutex_unlock(&lli->lli_layout_mutex);
-
- return rc;
-}
-
-/**
- * This function send a restore request to the MDT
- */
-int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
-{
- struct hsm_user_request *hur;
- int len, rc;
-
- len = sizeof(struct hsm_user_request) +
- sizeof(struct hsm_user_item);
- hur = kzalloc(len, GFP_NOFS);
- if (!hur)
- return -ENOMEM;
-
- hur->hur_request.hr_action = HUA_RESTORE;
- hur->hur_request.hr_archive_id = 0;
- hur->hur_request.hr_flags = 0;
- memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
- sizeof(hur->hur_user_item[0].hui_fid));
- hur->hur_user_item[0].hui_extent.offset = offset;
- hur->hur_user_item[0].hui_extent.length = length;
- hur->hur_request.hr_itemcount = 1;
- rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,
- len, hur, NULL);
- kfree(hur);
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/llite/glimpse.c b/drivers/staging/lustre/lustre/llite/glimpse.c
deleted file mode 100644
index ce0d51767da3..000000000000
--- a/drivers/staging/lustre/lustre/llite/glimpse.c
+++ /dev/null
@@ -1,205 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * glimpse code shared between vvp and liblustre (and other Lustre clients in
- * the future).
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Oleg Drokin <oleg.drokin@sun.com>
- */
-
-#include <obd_class.h>
-#include <obd_support.h>
-#include <obd.h>
-
-#include <lustre_dlm.h>
-#include <lustre_mdc.h>
-#include <linux/pagemap.h>
-#include <linux/file.h>
-
-#include <cl_object.h>
-#include "llite_internal.h"
-
-static const struct cl_lock_descr whole_file = {
- .cld_start = 0,
- .cld_end = CL_PAGE_EOF,
- .cld_mode = CLM_READ
-};
-
-/*
- * Check whether file has possible unwriten pages.
- *
- * \retval 1 file is mmap-ed or has dirty pages
- * 0 otherwise
- */
-blkcnt_t dirty_cnt(struct inode *inode)
-{
- blkcnt_t cnt = 0;
- struct vvp_object *vob = cl_inode2vvp(inode);
- void *results[1];
-
- if (inode->i_mapping)
- cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->i_pages,
- results, 0, 1,
- PAGECACHE_TAG_DIRTY);
- if (cnt == 0 && atomic_read(&vob->vob_mmap_cnt) > 0)
- cnt = 1;
-
- return (cnt > 0) ? 1 : 0;
-}
-
-int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
- struct inode *inode, struct cl_object *clob, int agl)
-{
- const struct lu_fid *fid = lu_object_fid(&clob->co_lu);
- struct cl_lock *lock = vvp_env_lock(env);
- struct cl_lock_descr *descr = &lock->cll_descr;
- int result = 0;
-
- CDEBUG(D_DLMTRACE, "Glimpsing inode " DFID "\n", PFID(fid));
-
- /* NOTE: this looks like DLM lock request, but it may
- * not be one. Due to CEF_ASYNC flag (translated
- * to LDLM_FL_HAS_INTENT by osc), this is
- * glimpse request, that won't revoke any
- * conflicting DLM locks held. Instead,
- * ll_glimpse_callback() will be called on each
- * client holding a DLM lock against this file,
- * and resulting size will be returned for each
- * stripe. DLM lock on [0, EOF] is acquired only
- * if there were no conflicting locks. If there
- * were conflicting locks, enqueuing or waiting
- * fails with -ENAVAIL, but valid inode
- * attributes are returned anyway.
- */
- *descr = whole_file;
- descr->cld_obj = clob;
- descr->cld_mode = CLM_READ;
- descr->cld_enq_flags = CEF_ASYNC | CEF_MUST;
- if (agl)
- descr->cld_enq_flags |= CEF_AGL;
- /*
- * CEF_ASYNC is used because glimpse sub-locks cannot
- * deadlock (because they never conflict with other
- * locks) and, hence, can be enqueued out-of-order.
- *
- * CEF_MUST protects glimpse lock from conversion into
- * a lockless mode.
- */
- result = cl_lock_request(env, io, lock);
- if (result < 0)
- return result;
-
- if (!agl) {
- ll_merge_attr(env, inode);
- if (i_size_read(inode) > 0 && !inode->i_blocks) {
- /*
- * LU-417: Add dirty pages block count
- * lest i_blocks reports 0, some "cp" or
- * "tar" may think it's a completely
- * sparse file and skip it.
- */
- inode->i_blocks = dirty_cnt(inode);
- }
- }
-
- cl_lock_release(env, lock);
-
- return result;
-}
-
-static int cl_io_get(struct inode *inode, struct lu_env **envout,
- struct cl_io **ioout, u16 *refcheck)
-{
- struct lu_env *env;
- struct cl_io *io;
- struct ll_inode_info *lli = ll_i2info(inode);
- struct cl_object *clob = lli->lli_clob;
- int result;
-
- if (S_ISREG(inode->i_mode)) {
- env = cl_env_get(refcheck);
- if (!IS_ERR(env)) {
- io = vvp_env_thread_io(env);
- io->ci_obj = clob;
- *envout = env;
- *ioout = io;
- result = 1;
- } else {
- result = PTR_ERR(env);
- }
- } else {
- result = 0;
- }
- return result;
-}
-
-int cl_glimpse_size0(struct inode *inode, int agl)
-{
- /*
- * We don't need ast_flags argument to cl_glimpse_size(), because
- * osc_lock_enqueue() takes care of the possible deadlock that said
- * argument was introduced to avoid.
- */
- /*
- * XXX but note that ll_file_seek() passes LDLM_FL_BLOCK_NOWAIT to
- * cl_glimpse_size(), which doesn't make sense: glimpse locks are not
- * blocking anyway.
- */
- struct lu_env *env = NULL;
- struct cl_io *io = NULL;
- int result;
- u16 refcheck;
-
- result = cl_io_get(inode, &env, &io, &refcheck);
- if (result > 0) {
-again:
- io->ci_verify_layout = 1;
- result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
- if (result > 0)
- /*
- * nothing to do for this io. This currently happens
- * when stripe sub-object's are not yet created.
- */
- result = io->ci_result;
- else if (result == 0)
- result = cl_glimpse_lock(env, io, inode, io->ci_obj,
- agl);
-
- OBD_FAIL_TIMEOUT(OBD_FAIL_GLIMPSE_DELAY, 2);
- cl_io_fini(env, io);
- if (unlikely(io->ci_need_restart))
- goto again;
- cl_env_put(env, &refcheck);
- }
- return result;
-}
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
deleted file mode 100644
index d7ea39ce0cb2..000000000000
--- a/drivers/staging/lustre/lustre/llite/lcommon_cl.c
+++ /dev/null
@@ -1,292 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * cl code shared between vvp and liblustre (and other Lustre clients in the
- * future).
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/quotaops.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
-#include <linux/rbtree.h>
-
-#include <obd.h>
-#include <obd_support.h>
-#include <lustre_fid.h>
-#include <lustre_dlm.h>
-#include <lustre_mdc.h>
-#include <cl_object.h>
-
-#include "llite_internal.h"
-
-/*
- * ccc_ prefix stands for "Common Client Code".
- */
-
-/*****************************************************************************
- *
- * Vvp device and device type functions.
- *
- */
-
-/**
- * An `emergency' environment used by cl_inode_fini() when cl_env_get()
- * fails. Access to this environment is serialized by cl_inode_fini_guard
- * mutex.
- */
-struct lu_env *cl_inode_fini_env;
-u16 cl_inode_fini_refcheck;
-
-/**
- * A mutex serializing calls to slp_inode_fini() under extreme memory
- * pressure, when environments cannot be allocated.
- */
-static DEFINE_MUTEX(cl_inode_fini_guard);
-
-int cl_setattr_ost(struct cl_object *obj, const struct iattr *attr,
- unsigned int attr_flags)
-{
- struct lu_env *env;
- struct cl_io *io;
- int result;
- u16 refcheck;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- io = vvp_env_thread_io(env);
- io->ci_obj = obj;
- io->ci_verify_layout = 1;
-
- io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime);
- io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime);
- io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime);
- io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size;
- io->u.ci_setattr.sa_attr_flags = attr_flags;
- io->u.ci_setattr.sa_valid = attr->ia_valid;
- io->u.ci_setattr.sa_parent_fid = lu_object_fid(&obj->co_lu);
-
-again:
- if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) {
- struct vvp_io *vio = vvp_env_io(env);
-
- if (attr->ia_valid & ATTR_FILE)
- /* populate the file descriptor for ftruncate to honor
- * group lock - see LU-787
- */
- vio->vui_fd = LUSTRE_FPRIVATE(attr->ia_file);
-
- result = cl_io_loop(env, io);
- } else {
- result = io->ci_result;
- }
- cl_io_fini(env, io);
- if (unlikely(io->ci_need_restart))
- goto again;
-
- cl_env_put(env, &refcheck);
- return result;
-}
-
-/**
- * Initialize or update CLIO structures for regular files when new
- * meta-data arrives from the server.
- *
- * \param inode regular file inode
- * \param md new file metadata from MDS
- * - allocates cl_object if necessary,
- * - updated layout, if object was already here.
- */
-int cl_file_inode_init(struct inode *inode, struct lustre_md *md)
-{
- struct lu_env *env;
- struct ll_inode_info *lli;
- struct cl_object *clob;
- struct lu_site *site;
- struct lu_fid *fid;
- struct cl_object_conf conf = {
- .coc_inode = inode,
- .u = {
- .coc_layout = md->layout,
- }
- };
- int result = 0;
- u16 refcheck;
-
- LASSERT(md->body->mbo_valid & OBD_MD_FLID);
- LASSERT(S_ISREG(inode->i_mode));
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- site = ll_i2sbi(inode)->ll_site;
- lli = ll_i2info(inode);
- fid = &lli->lli_fid;
- LASSERT(fid_is_sane(fid));
-
- if (!lli->lli_clob) {
- /* clob is slave of inode, empty lli_clob means for new inode,
- * there is no clob in cache with the given fid, so it is
- * unnecessary to perform lookup-alloc-lookup-insert, just
- * alloc and insert directly.
- */
- LASSERT(inode->i_state & I_NEW);
- conf.coc_lu.loc_flags = LOC_F_NEW;
- clob = cl_object_find(env, lu2cl_dev(site->ls_top_dev),
- fid, &conf);
- if (!IS_ERR(clob)) {
- /*
- * No locking is necessary, as new inode is
- * locked by I_NEW bit.
- */
- lli->lli_clob = clob;
- lu_object_ref_add(&clob->co_lu, "inode", inode);
- } else {
- result = PTR_ERR(clob);
- }
- } else {
- result = cl_conf_set(env, lli->lli_clob, &conf);
- }
-
- cl_env_put(env, &refcheck);
-
- if (result != 0)
- CERROR("Failure to initialize cl object " DFID ": %d\n",
- PFID(fid), result);
- return result;
-}
-
-/**
- * Wait for others drop their references of the object at first, then we drop
- * the last one, which will lead to the object be destroyed immediately.
- * Must be called after cl_object_kill() against this object.
- *
- * The reason we want to do this is: destroying top object will wait for sub
- * objects being destroyed first, so we can't let bottom layer (e.g. from ASTs)
- * to initiate top object destroying which may deadlock. See bz22520.
- */
-static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
-{
- struct lu_object_header *header = obj->co_lu.lo_header;
- wait_queue_entry_t waiter;
-
- if (unlikely(atomic_read(&header->loh_ref) != 1)) {
- struct lu_site *site = obj->co_lu.lo_dev->ld_site;
- wait_queue_head_t *wq;
-
- wq = lu_site_wq_from_fid(site, &header->loh_fid);
-
- init_waitqueue_entry(&waiter, current);
- add_wait_queue(wq, &waiter);
-
- while (1) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (atomic_read(&header->loh_ref) == 1)
- break;
- schedule();
- }
-
- set_current_state(TASK_RUNNING);
- remove_wait_queue(wq, &waiter);
- }
-
- cl_object_put(env, obj);
-}
-
-void cl_inode_fini(struct inode *inode)
-{
- struct lu_env *env;
- struct ll_inode_info *lli = ll_i2info(inode);
- struct cl_object *clob = lli->lli_clob;
- u16 refcheck;
- int emergency;
-
- if (clob) {
- env = cl_env_get(&refcheck);
- emergency = IS_ERR(env);
- if (emergency) {
- mutex_lock(&cl_inode_fini_guard);
- LASSERT(cl_inode_fini_env);
- env = cl_inode_fini_env;
- }
- /*
- * cl_object cache is a slave to inode cache (which, in turn
- * is a slave to dentry cache), don't keep cl_object in memory
- * when its master is evicted.
- */
- cl_object_kill(env, clob);
- lu_object_ref_del(&clob->co_lu, "inode", inode);
- cl_object_put_last(env, clob);
- lli->lli_clob = NULL;
- if (emergency)
- mutex_unlock(&cl_inode_fini_guard);
- else
- cl_env_put(env, &refcheck);
- }
-}
-
-/**
- * build inode number from passed @fid
- */
-__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32)
-{
- if (BITS_PER_LONG == 32 || api32)
- return fid_flatten32(fid);
- else
- return fid_flatten(fid);
-}
-
-/**
- * build inode generation from passed @fid. If our FID overflows the 32-bit
- * inode number then return a non-zero generation to distinguish them.
- */
-__u32 cl_fid_build_gen(const struct lu_fid *fid)
-{
- __u32 gen;
-
- if (fid_is_igif(fid)) {
- gen = lu_igif_gen(fid);
- return gen;
- }
-
- gen = fid_flatten(fid) >> 32;
- return gen;
-}
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_misc.c b/drivers/staging/lustre/lustre/llite/lcommon_misc.c
deleted file mode 100644
index a246b955306e..000000000000
--- a/drivers/staging/lustre/lustre/llite/lcommon_misc.c
+++ /dev/null
@@ -1,186 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * cl code shared between vvp and liblustre (and other Lustre clients in the
- * future).
- *
- */
-#define DEBUG_SUBSYSTEM S_LLITE
-#include <obd_class.h>
-#include <obd_support.h>
-#include <obd.h>
-#include <cl_object.h>
-
-#include "llite_internal.h"
-
-/* Initialize the default and maximum LOV EA and cookie sizes. This allows
- * us to make MDS RPCs with large enough reply buffers to hold the
- * maximum-sized (= maximum striped) EA and cookie without having to
- * calculate this (via a call into the LOV + OSCs) each time we make an RPC.
- */
-int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp)
-{
- u32 val_size, max_easize, def_easize;
- int rc;
-
- val_size = sizeof(max_easize);
- rc = obd_get_info(NULL, dt_exp, sizeof(KEY_MAX_EASIZE), KEY_MAX_EASIZE,
- &val_size, &max_easize);
- if (rc)
- return rc;
-
- val_size = sizeof(def_easize);
- rc = obd_get_info(NULL, dt_exp, sizeof(KEY_DEFAULT_EASIZE),
- KEY_DEFAULT_EASIZE, &val_size, &def_easize);
- if (rc)
- return rc;
-
- /*
- * default cookiesize is 0 because from 2.4 server doesn't send
- * llog cookies to client.
- */
- CDEBUG(D_HA, "updating def/max_easize: %d/%d\n",
- def_easize, max_easize);
-
- rc = md_init_ea_size(md_exp, max_easize, def_easize);
- return rc;
-}
-
-/**
- * This function is used as an upcall-callback hooked by liblustre and llite
- * clients into obd_notify() listeners chain to handle notifications about
- * change of import connect_flags. See llu_fsswop_mount() and
- * lustre_common_fill_super().
- */
-int cl_ocd_update(struct obd_device *host,
- struct obd_device *watched,
- enum obd_notify_event ev, void *owner, void *data)
-{
- struct lustre_client_ocd *lco;
- struct client_obd *cli;
- __u64 flags;
- int result;
-
- if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME) &&
- watched->obd_set_up && !watched->obd_stopping) {
- cli = &watched->u.cli;
- lco = owner;
- flags = cli->cl_import->imp_connect_data.ocd_connect_flags;
- CDEBUG(D_SUPER, "Changing connect_flags: %#llx -> %#llx\n",
- lco->lco_flags, flags);
- mutex_lock(&lco->lco_lock);
- lco->lco_flags &= flags;
- /* for each osc event update ea size */
- if (lco->lco_dt_exp)
- cl_init_ea_size(lco->lco_md_exp, lco->lco_dt_exp);
-
- mutex_unlock(&lco->lco_lock);
- result = 0;
- } else {
- CERROR("unexpected notification from %s %s (setup:%d,stopping:%d)!\n",
- watched->obd_type->typ_name,
- watched->obd_name, watched->obd_set_up,
- watched->obd_stopping);
- result = -EINVAL;
- }
- return result;
-}
-
-#define GROUPLOCK_SCOPE "grouplock"
-
-int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
- struct ll_grouplock *cg)
-{
- struct lu_env *env;
- struct cl_io *io;
- struct cl_lock *lock;
- struct cl_lock_descr *descr;
- __u32 enqflags;
- u16 refcheck;
- int rc;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- io = vvp_env_thread_io(env);
- io->ci_obj = obj;
-
- rc = cl_io_init(env, io, CIT_MISC, io->ci_obj);
- if (rc != 0) {
- cl_io_fini(env, io);
- cl_env_put(env, &refcheck);
- /* Does not make sense to take GL for released layout */
- if (rc > 0)
- rc = -ENOTSUPP;
- return rc;
- }
-
- lock = vvp_env_lock(env);
- descr = &lock->cll_descr;
- descr->cld_obj = obj;
- descr->cld_start = 0;
- descr->cld_end = CL_PAGE_EOF;
- descr->cld_gid = gid;
- descr->cld_mode = CLM_GROUP;
-
- enqflags = CEF_MUST | (nonblock ? CEF_NONBLOCK : 0);
- descr->cld_enq_flags = enqflags;
-
- rc = cl_lock_request(env, io, lock);
- if (rc < 0) {
- cl_io_fini(env, io);
- cl_env_put(env, &refcheck);
- return rc;
- }
-
- cg->lg_env = env;
- cg->lg_io = io;
- cg->lg_lock = lock;
- cg->lg_gid = gid;
-
- return 0;
-}
-
-void cl_put_grouplock(struct ll_grouplock *cg)
-{
- struct lu_env *env = cg->lg_env;
- struct cl_io *io = cg->lg_io;
- struct cl_lock *lock = cg->lg_lock;
-
- LASSERT(cg->lg_env);
- LASSERT(cg->lg_gid);
-
- cl_lock_release(env, lock);
- cl_io_fini(env, io);
- cl_env_put(env, NULL);
-}
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
deleted file mode 100644
index c08a6e14b6d7..000000000000
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ /dev/null
@@ -1,1344 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef LLITE_INTERNAL_H
-#define LLITE_INTERNAL_H
-#include <lustre_debug.h>
-#include <uapi/linux/lustre/lustre_ver.h>
-#include <lustre_disk.h> /* for s2sbi */
-#include <lustre_linkea.h>
-
-/* for struct cl_lock_descr and struct cl_io */
-#include <lustre_patchless_compat.h>
-#include <lustre_compat.h>
-#include <cl_object.h>
-#include <lustre_lmv.h>
-#include <lustre_mdc.h>
-#include <lustre_intent.h>
-#include <linux/compat.h>
-#include <linux/namei.h>
-#include <linux/xattr.h>
-#include <linux/posix_acl_xattr.h>
-#include "vvp_internal.h"
-#include "range_lock.h"
-
-#ifndef FMODE_EXEC
-#define FMODE_EXEC 0
-#endif
-
-#ifndef VM_FAULT_RETRY
-#define VM_FAULT_RETRY 0
-#endif
-
-/** Only used on client-side for indicating the tail of dir hash/offset. */
-#define LL_DIR_END_OFF 0x7fffffffffffffffULL
-#define LL_DIR_END_OFF_32BIT 0x7fffffffUL
-
-/* 4UL * 1024 * 1024 */
-#define LL_MAX_BLKSIZE_BITS 22
-
-#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
-#define LUSTRE_FPRIVATE(file) ((file)->private_data)
-
-struct ll_dentry_data {
- struct lookup_intent *lld_it;
- unsigned int lld_sa_generation;
- unsigned int lld_invalid:1;
- unsigned int lld_nfs_dentry:1;
- struct rcu_head lld_rcu_head;
-};
-
-#define ll_d2d(de) ((struct ll_dentry_data *)((de)->d_fsdata))
-
-#define LLI_INODE_MAGIC 0x111d0de5
-#define LLI_INODE_DEAD 0xdeadd00d
-
-struct ll_getname_data {
- struct dir_context ctx;
- char *lgd_name; /* points to buffer with NAME_MAX+1 size */
- struct lu_fid lgd_fid; /* target fid we are looking for */
- int lgd_found; /* inode matched? */
-};
-
-struct ll_grouplock {
- struct lu_env *lg_env;
- struct cl_io *lg_io;
- struct cl_lock *lg_lock;
- unsigned long lg_gid;
-};
-
-enum ll_file_flags {
- /* File data is modified. */
- LLIF_DATA_MODIFIED = 0,
- /* File is being restored */
- LLIF_FILE_RESTORING = 1,
- /* Xattr cache is attached to the file */
- LLIF_XATTR_CACHE = 2,
-};
-
-struct ll_inode_info {
- __u32 lli_inode_magic;
-
- spinlock_t lli_lock;
- unsigned long lli_flags;
- struct posix_acl *lli_posix_acl;
-
- /* identifying fields for both metadata and data stacks. */
- struct lu_fid lli_fid;
- /* master inode fid for stripe directory */
- struct lu_fid lli_pfid;
-
- /* We need all three because every inode may be opened in different
- * modes
- */
- struct obd_client_handle *lli_mds_read_och;
- struct obd_client_handle *lli_mds_write_och;
- struct obd_client_handle *lli_mds_exec_och;
- __u64 lli_open_fd_read_count;
- __u64 lli_open_fd_write_count;
- __u64 lli_open_fd_exec_count;
- /* Protects access to och pointers and their usage counters */
- struct mutex lli_och_mutex;
-
- struct inode lli_vfs_inode;
-
- /* the most recent timestamps obtained from mds */
- s64 lli_atime;
- s64 lli_mtime;
- s64 lli_ctime;
- spinlock_t lli_agl_lock;
-
- /* Try to make the d::member and f::member are aligned. Before using
- * these members, make clear whether it is directory or not.
- */
- union {
- /* for directory */
- struct {
- /* serialize normal readdir and statahead-readdir. */
- struct mutex lli_readdir_mutex;
-
- /* metadata statahead */
- /* since parent-child threads can share the same @file
- * struct, "opendir_key" is the token when dir close for
- * case of parent exit before child -- it is me should
- * cleanup the dir readahead.
- */
- void *lli_opendir_key;
- struct ll_statahead_info *lli_sai;
- /* protect statahead stuff. */
- spinlock_t lli_sa_lock;
- /* "opendir_pid" is the token when lookup/revalidate
- * -- I am the owner of dir statahead.
- */
- pid_t lli_opendir_pid;
- /* stat will try to access statahead entries or start
- * statahead if this flag is set, and this flag will be
- * set upon dir open, and cleared when dir is closed,
- * statahead hit ratio is too low, or start statahead
- * thread failed.
- */
- unsigned int lli_sa_enabled:1;
- /* generation for statahead */
- unsigned int lli_sa_generation;
- /* directory stripe information */
- struct lmv_stripe_md *lli_lsm_md;
- /* default directory stripe offset. This is extracted
- * from the "dmv" xattr in order to decide which MDT to
- * create a subdirectory on. The MDS itself fetches
- * "dmv" and gets the rest of the default layout itself
- * (count, hash, etc).
- */
- __u32 lli_def_stripe_offset;
- };
-
- /* for non-directory */
- struct {
- struct mutex lli_size_mutex;
- char *lli_symlink_name;
- /*
- * struct rw_semaphore {
- * signed long count; // align d.d_def_acl
- * spinlock_t wait_lock; // align d.d_sa_lock
- * struct list_head wait_list;
- * }
- */
- struct rw_semaphore lli_trunc_sem;
- struct range_lock_tree lli_write_tree;
-
- struct rw_semaphore lli_glimpse_sem;
- unsigned long lli_glimpse_time;
- struct list_head lli_agl_list;
- __u64 lli_agl_index;
-
- /* for writepage() only to communicate to fsync */
- int lli_async_rc;
-
- /*
- * whenever a process try to read/write the file, the
- * jobid of the process will be saved here, and it'll
- * be packed into the write PRC when flush later.
- *
- * so the read/write statistics for jobid will not be
- * accurate if the file is shared by different jobs.
- */
- char lli_jobid[LUSTRE_JOBID_SIZE];
- };
- };
-
- /* XXX: For following frequent used members, although they maybe special
- * used for non-directory object, it is some time-wasting to check
- * whether the object is directory or not before using them. On the
- * other hand, currently, sizeof(f) > sizeof(d), it cannot reduce
- * the "ll_inode_info" size even if moving those members into u.f.
- * So keep them out side.
- *
- * In the future, if more members are added only for directory,
- * some of the following members can be moved into u.f.
- */
- struct cl_object *lli_clob;
-
- /* mutex to request for layout lock exclusively. */
- struct mutex lli_layout_mutex;
- /* Layout version, protected by lli_layout_lock */
- __u32 lli_layout_gen;
- spinlock_t lli_layout_lock;
-
- struct rw_semaphore lli_xattrs_list_rwsem;
- struct mutex lli_xattrs_enq_lock;
- struct list_head lli_xattrs;/* ll_xattr_entry->xe_list */
-};
-
-static inline __u32 ll_layout_version_get(struct ll_inode_info *lli)
-{
- __u32 gen;
-
- spin_lock(&lli->lli_layout_lock);
- gen = lli->lli_layout_gen;
- spin_unlock(&lli->lli_layout_lock);
-
- return gen;
-}
-
-static inline void ll_layout_version_set(struct ll_inode_info *lli, __u32 gen)
-{
- spin_lock(&lli->lli_layout_lock);
- lli->lli_layout_gen = gen;
- spin_unlock(&lli->lli_layout_lock);
-}
-
-int ll_xattr_cache_destroy(struct inode *inode);
-
-int ll_xattr_cache_get(struct inode *inode, const char *name,
- char *buffer, size_t size, __u64 valid);
-
-int ll_init_security(struct dentry *dentry, struct inode *inode,
- struct inode *dir);
-
-/*
- * Locking to guarantee consistency of non-atomic updates to long long i_size,
- * consistency between file size and KMS.
- *
- * Implemented by ->lli_size_mutex and ->lsm_lock, nested in that order.
- */
-
-void ll_inode_size_lock(struct inode *inode);
-void ll_inode_size_unlock(struct inode *inode);
-
-/* FIXME: replace the name of this with LL_I to conform to kernel stuff */
-/* static inline struct ll_inode_info *LL_I(struct inode *inode) */
-static inline struct ll_inode_info *ll_i2info(struct inode *inode)
-{
- return container_of(inode, struct ll_inode_info, lli_vfs_inode);
-}
-
-/* default to about 64M of readahead on a given system. */
-#define SBI_DEFAULT_READAHEAD_MAX (64UL << (20 - PAGE_SHIFT))
-
-/* default to read-ahead full files smaller than 2MB on the second read */
-#define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - PAGE_SHIFT))
-
-enum ra_stat {
- RA_STAT_HIT = 0,
- RA_STAT_MISS,
- RA_STAT_DISTANT_READPAGE,
- RA_STAT_MISS_IN_WINDOW,
- RA_STAT_FAILED_GRAB_PAGE,
- RA_STAT_FAILED_MATCH,
- RA_STAT_DISCARDED,
- RA_STAT_ZERO_LEN,
- RA_STAT_ZERO_WINDOW,
- RA_STAT_EOF,
- RA_STAT_MAX_IN_FLIGHT,
- RA_STAT_WRONG_GRAB_PAGE,
- RA_STAT_FAILED_REACH_END,
- _NR_RA_STAT,
-};
-
-struct ll_ra_info {
- atomic_t ra_cur_pages;
- unsigned long ra_max_pages;
- unsigned long ra_max_pages_per_file;
- unsigned long ra_max_read_ahead_whole_pages;
-};
-
-/* ra_io_arg will be filled in the beginning of ll_readahead with
- * ras_lock, then the following ll_read_ahead_pages will read RA
- * pages according to this arg, all the items in this structure are
- * counted by page index.
- */
-struct ra_io_arg {
- unsigned long ria_start; /* start offset of read-ahead*/
- unsigned long ria_end; /* end offset of read-ahead*/
- unsigned long ria_reserved; /* reserved pages for read-ahead */
- unsigned long ria_end_min; /* minimum end to cover current read */
- bool ria_eof; /* reach end of file */
- /* If stride read pattern is detected, ria_stoff means where
- * stride read is started. Note: for normal read-ahead, the
- * value here is meaningless, and also it will not be accessed
- */
- pgoff_t ria_stoff;
- /* ria_length and ria_pages are the length and pages length in the
- * stride I/O mode. And they will also be used to check whether
- * it is stride I/O read-ahead in the read-ahead pages
- */
- unsigned long ria_length;
- unsigned long ria_pages;
-};
-
-/* LL_HIST_MAX=32 causes an overflow */
-#define LL_HIST_MAX 28
-#define LL_HIST_START 12 /* buckets start at 2^12 = 4k */
-#define LL_PROCESS_HIST_MAX 10
-struct per_process_info {
- pid_t pid;
- struct obd_histogram pp_r_hist;
- struct obd_histogram pp_w_hist;
-};
-
-/* pp_extents[LL_PROCESS_HIST_MAX] will hold the combined process info */
-struct ll_rw_extents_info {
- struct per_process_info pp_extents[LL_PROCESS_HIST_MAX + 1];
-};
-
-#define LL_OFFSET_HIST_MAX 100
-struct ll_rw_process_info {
- pid_t rw_pid;
- int rw_op;
- loff_t rw_range_start;
- loff_t rw_range_end;
- loff_t rw_last_file_pos;
- loff_t rw_offset;
- size_t rw_smallest_extent;
- size_t rw_largest_extent;
- struct ll_file_data *rw_last_file;
-};
-
-enum stats_track_type {
- STATS_TRACK_ALL = 0, /* track all processes */
- STATS_TRACK_PID, /* track process with this pid */
- STATS_TRACK_PPID, /* track processes with this ppid */
- STATS_TRACK_GID, /* track processes with this gid */
- STATS_TRACK_LAST,
-};
-
-/* flags for sbi->ll_flags */
-#define LL_SBI_NOLCK 0x01 /* DLM locking disabled (directio-only) */
-#define LL_SBI_CHECKSUM 0x02 /* checksum each page as it's written */
-#define LL_SBI_FLOCK 0x04
-#define LL_SBI_USER_XATTR 0x08 /* support user xattr */
-#define LL_SBI_ACL 0x10 /* support ACL */
-/* LL_SBI_RMT_CLIENT 0x40 remote client */
-#define LL_SBI_MDS_CAPA 0x80 /* support mds capa, obsolete */
-#define LL_SBI_OSS_CAPA 0x100 /* support oss capa, obsolete */
-#define LL_SBI_LOCALFLOCK 0x200 /* Local flocks support by kernel */
-#define LL_SBI_LRU_RESIZE 0x400 /* lru resize support */
-#define LL_SBI_LAZYSTATFS 0x800 /* lazystatfs mount option */
-/* LL_SBI_SOM_PREVIEW 0x1000 SOM preview mount option, obsolete */
-#define LL_SBI_32BIT_API 0x2000 /* generate 32 bit inodes. */
-#define LL_SBI_64BIT_HASH 0x4000 /* support 64-bits dir hash/offset */
-#define LL_SBI_AGL_ENABLED 0x8000 /* enable agl */
-#define LL_SBI_VERBOSE 0x10000 /* verbose mount/umount */
-#define LL_SBI_LAYOUT_LOCK 0x20000 /* layout lock support */
-#define LL_SBI_USER_FID2PATH 0x40000 /* allow fid2path by unprivileged users */
-#define LL_SBI_XATTR_CACHE 0x80000 /* support for xattr cache */
-#define LL_SBI_NOROOTSQUASH 0x100000 /* do not apply root squash */
-#define LL_SBI_ALWAYS_PING 0x200000 /* always ping even if server
- * suppress_pings
- */
-
-#define LL_SBI_FLAGS { \
- "nolck", \
- "checksum", \
- "flock", \
- "user_xattr", \
- "acl", \
- "???", \
- "???", \
- "mds_capa", \
- "oss_capa", \
- "flock", \
- "lru_resize", \
- "lazy_statfs", \
- "som", \
- "32bit_api", \
- "64bit_hash", \
- "agl", \
- "verbose", \
- "layout", \
- "user_fid2path",\
- "xattr_cache", \
- "norootsquash", \
- "always_ping", \
-}
-
-/*
- * This is embedded into llite super-blocks to keep track of connect
- * flags (capabilities) supported by all imports given mount is
- * connected to.
- */
-struct lustre_client_ocd {
- /*
- * This is conjunction of connect_flags across all imports
- * (LOVs) this mount is connected to. This field is updated by
- * cl_ocd_update() under ->lco_lock.
- */
- __u64 lco_flags;
- struct mutex lco_lock;
- struct obd_export *lco_md_exp;
- struct obd_export *lco_dt_exp;
-};
-
-struct ll_sb_info {
- /* this protects pglist and ra_info. It isn't safe to
- * grab from interrupt contexts
- */
- spinlock_t ll_lock;
- spinlock_t ll_pp_extent_lock; /* pp_extent entry*/
- spinlock_t ll_process_lock; /* ll_rw_process_info */
- struct obd_uuid ll_sb_uuid;
- struct obd_export *ll_md_exp;
- struct obd_export *ll_dt_exp;
- struct dentry *ll_debugfs_entry;
- struct lu_fid ll_root_fid; /* root object fid */
-
- int ll_flags;
- unsigned int ll_umounting:1,
- ll_xattr_cache_enabled:1,
- ll_client_common_fill_super_succeeded:1;
-
- struct lustre_client_ocd ll_lco;
-
- struct lprocfs_stats *ll_stats; /* lprocfs stats counter */
-
- /*
- * Used to track "unstable" pages on a client, and maintain a
- * LRU list of clean pages. An "unstable" page is defined as
- * any page which is sent to a server as part of a bulk request,
- * but is uncommitted to stable storage.
- */
- struct cl_client_cache *ll_cache;
-
- struct lprocfs_stats *ll_ra_stats;
-
- struct ll_ra_info ll_ra_info;
- unsigned int ll_namelen;
- const struct file_operations *ll_fop;
-
- unsigned int ll_md_brw_pages; /* readdir pages per RPC */
-
- struct lu_site *ll_site;
- struct cl_device *ll_cl;
- /* Statistics */
- struct ll_rw_extents_info ll_rw_extents_info;
- int ll_extent_process_count;
- struct ll_rw_process_info ll_rw_process_info[LL_PROCESS_HIST_MAX];
- unsigned int ll_offset_process_count;
- struct ll_rw_process_info ll_rw_offset_info[LL_OFFSET_HIST_MAX];
- unsigned int ll_rw_offset_entry_count;
- int ll_stats_track_id;
- enum stats_track_type ll_stats_track_type;
- int ll_rw_stats_on;
-
- /* metadata stat-ahead */
- unsigned int ll_sa_max; /* max statahead RPCs */
- atomic_t ll_sa_total; /* statahead thread started
- * count
- */
- atomic_t ll_sa_wrong; /* statahead thread stopped for
- * low hit ratio
- */
- atomic_t ll_sa_running; /* running statahead thread
- * count
- */
- atomic_t ll_agl_total; /* AGL thread started count */
-
- dev_t ll_sdev_orig; /* save s_dev before assign for
- * clustered nfs
- */
- /* root squash */
- struct root_squash_info ll_squash;
- struct path ll_mnt;
-
- __kernel_fsid_t ll_fsid;
- struct kobject ll_kobj; /* sysfs object */
- struct super_block *ll_sb; /* struct super_block (for sysfs code)*/
- struct completion ll_kobj_unregister;
-};
-
-/*
- * per file-descriptor read-ahead data.
- */
-struct ll_readahead_state {
- spinlock_t ras_lock;
- /*
- * index of the last page that read(2) needed and that wasn't in the
- * cache. Used by ras_update() to detect seeks.
- *
- * XXX nikita: if access seeks into cached region, Lustre doesn't see
- * this.
- */
- unsigned long ras_last_readpage;
- /*
- * number of pages read after last read-ahead window reset. As window
- * is reset on each seek, this is effectively a number of consecutive
- * accesses. Maybe ->ras_accessed_in_window is better name.
- *
- * XXX nikita: window is also reset (by ras_update()) when Lustre
- * believes that memory pressure evicts read-ahead pages. In that
- * case, it probably doesn't make sense to expand window to
- * PTLRPC_MAX_BRW_PAGES on the third access.
- */
- unsigned long ras_consecutive_pages;
- /*
- * number of read requests after the last read-ahead window reset
- * As window is reset on each seek, this is effectively the number
- * on consecutive read request and is used to trigger read-ahead.
- */
- unsigned long ras_consecutive_requests;
- /*
- * Parameters of current read-ahead window. Handled by
- * ras_update(). On the initial access to the file or after a seek,
- * window is reset to 0. After 3 consecutive accesses, window is
- * expanded to PTLRPC_MAX_BRW_PAGES. Afterwards, window is enlarged by
- * PTLRPC_MAX_BRW_PAGES chunks up to ->ra_max_pages.
- */
- unsigned long ras_window_start, ras_window_len;
- /*
- * Optimal RPC size. It decides how many pages will be sent
- * for each read-ahead.
- */
- unsigned long ras_rpc_size;
- /*
- * Where next read-ahead should start at. This lies within read-ahead
- * window. Read-ahead window is read in pieces rather than at once
- * because: 1. lustre limits total number of pages under read-ahead by
- * ->ra_max_pages (see ll_ra_count_get()), 2. client cannot read pages
- * not covered by DLM lock.
- */
- unsigned long ras_next_readahead;
- /*
- * Total number of ll_file_read requests issued, reads originating
- * due to mmap are not counted in this total. This value is used to
- * trigger full file read-ahead after multiple reads to a small file.
- */
- unsigned long ras_requests;
- /*
- * Page index with respect to the current request, these value
- * will not be accurate when dealing with reads issued via mmap.
- */
- unsigned long ras_request_index;
- /*
- * The following 3 items are used for detecting the stride I/O
- * mode.
- * In stride I/O mode,
- * ...............|-----data-----|****gap*****|--------|******|....
- * offset |-stride_pages-|-stride_gap-|
- * ras_stride_offset = offset;
- * ras_stride_length = stride_pages + stride_gap;
- * ras_stride_pages = stride_pages;
- * Note: all these three items are counted by pages.
- */
- unsigned long ras_stride_length;
- unsigned long ras_stride_pages;
- pgoff_t ras_stride_offset;
- /*
- * number of consecutive stride request count, and it is similar as
- * ras_consecutive_requests, but used for stride I/O mode.
- * Note: only more than 2 consecutive stride request are detected,
- * stride read-ahead will be enable
- */
- unsigned long ras_consecutive_stride_requests;
-};
-
-extern struct kmem_cache *ll_file_data_slab;
-struct lustre_handle;
-struct ll_file_data {
- struct ll_readahead_state fd_ras;
- struct ll_grouplock fd_grouplock;
- __u64 lfd_pos;
- __u32 fd_flags;
- fmode_t fd_omode;
- /* openhandle if lease exists for this file.
- * Borrow lli->lli_och_mutex to protect assignment
- */
- struct obd_client_handle *fd_lease_och;
- struct obd_client_handle *fd_och;
- struct file *fd_file;
- /* Indicate whether need to report failure when close.
- * true: failure is known, not report again.
- * false: unknown failure, should report.
- */
- bool fd_write_failed;
- rwlock_t fd_lock; /* protect lcc list */
- struct list_head fd_lccs; /* list of ll_cl_context */
-};
-
-extern struct dentry *llite_root;
-extern struct kset *llite_kset;
-
-static inline struct inode *ll_info2i(struct ll_inode_info *lli)
-{
- return &lli->lli_vfs_inode;
-}
-
-__u32 ll_i2suppgid(struct inode *i);
-void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2);
-
-static inline int ll_need_32bit_api(struct ll_sb_info *sbi)
-{
-#if BITS_PER_LONG == 32
- return 1;
-#elif defined(CONFIG_COMPAT)
- return unlikely(in_compat_syscall() ||
- (sbi->ll_flags & LL_SBI_32BIT_API));
-#else
- return unlikely(sbi->ll_flags & LL_SBI_32BIT_API);
-#endif
-}
-
-void ll_ras_enter(struct file *f);
-
-/* llite/lcommon_misc.c */
-int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp);
-int cl_ocd_update(struct obd_device *host,
- struct obd_device *watched,
- enum obd_notify_event ev, void *owner, void *data);
-int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
- struct ll_grouplock *cg);
-void cl_put_grouplock(struct ll_grouplock *cg);
-
-/* llite/lproc_llite.c */
-int ldebugfs_register_mountpoint(struct dentry *parent,
- struct super_block *sb, char *osc, char *mdc);
-void ldebugfs_unregister_mountpoint(struct ll_sb_info *sbi);
-void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count);
-void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars);
-void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid,
- struct ll_file_data *file, loff_t pos,
- size_t count, int rw);
-
-enum {
- LPROC_LL_DIRTY_HITS,
- LPROC_LL_DIRTY_MISSES,
- LPROC_LL_READ_BYTES,
- LPROC_LL_WRITE_BYTES,
- LPROC_LL_BRW_READ,
- LPROC_LL_BRW_WRITE,
- LPROC_LL_IOCTL,
- LPROC_LL_OPEN,
- LPROC_LL_RELEASE,
- LPROC_LL_MAP,
- LPROC_LL_LLSEEK,
- LPROC_LL_FSYNC,
- LPROC_LL_READDIR,
- LPROC_LL_SETATTR,
- LPROC_LL_TRUNC,
- LPROC_LL_FLOCK,
- LPROC_LL_GETATTR,
- LPROC_LL_CREATE,
- LPROC_LL_LINK,
- LPROC_LL_UNLINK,
- LPROC_LL_SYMLINK,
- LPROC_LL_MKDIR,
- LPROC_LL_RMDIR,
- LPROC_LL_MKNOD,
- LPROC_LL_RENAME,
- LPROC_LL_STAFS,
- LPROC_LL_ALLOC_INODE,
- LPROC_LL_SETXATTR,
- LPROC_LL_GETXATTR,
- LPROC_LL_GETXATTR_HITS,
- LPROC_LL_LISTXATTR,
- LPROC_LL_REMOVEXATTR,
- LPROC_LL_INODE_PERM,
- LPROC_LL_FILE_OPCODES
-};
-
-/* llite/dir.c */
-extern const struct file_operations ll_dir_operations;
-extern const struct inode_operations ll_dir_inode_operations;
-int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data,
- struct dir_context *ctx);
-int ll_get_mdt_idx(struct inode *inode);
-int ll_get_mdt_idx_by_fid(struct ll_sb_info *sbi, const struct lu_fid *fid);
-struct page *ll_get_dir_page(struct inode *dir, struct md_op_data *op_data,
- __u64 offset);
-void ll_release_page(struct inode *inode, struct page *page, bool remove);
-
-/* llite/namei.c */
-extern const struct inode_operations ll_special_inode_operations;
-
-struct inode *ll_iget(struct super_block *sb, ino_t hash,
- struct lustre_md *lic);
-int ll_test_inode_by_fid(struct inode *inode, void *opaque);
-int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
- void *data, int flag);
-struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de);
-void ll_update_times(struct ptlrpc_request *request, struct inode *inode);
-
-/* llite/rw.c */
-int ll_writepage(struct page *page, struct writeback_control *wbc);
-int ll_writepages(struct address_space *mapping, struct writeback_control *wbc);
-int ll_readpage(struct file *file, struct page *page);
-void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
-int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io);
-struct ll_cl_context *ll_cl_find(struct file *file);
-void ll_cl_add(struct file *file, const struct lu_env *env, struct cl_io *io);
-void ll_cl_remove(struct file *file, const struct lu_env *env);
-
-extern const struct address_space_operations ll_aops;
-
-/* llite/file.c */
-extern const struct file_operations ll_file_operations;
-extern const struct file_operations ll_file_operations_flock;
-extern const struct file_operations ll_file_operations_noflock;
-extern const struct inode_operations ll_file_inode_operations;
-int ll_have_md_lock(struct inode *inode, __u64 *bits,
- enum ldlm_mode l_req_mode);
-enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits,
- struct lustre_handle *lockh, __u64 flags,
- enum ldlm_mode mode);
-int ll_file_open(struct inode *inode, struct file *file);
-int ll_file_release(struct inode *inode, struct file *file);
-int ll_release_openhandle(struct inode *inode, struct lookup_intent *it);
-int ll_md_real_close(struct inode *inode, fmode_t fmode);
-int ll_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags);
-#ifdef CONFIG_FS_POSIX_ACL
-struct posix_acl *ll_get_acl(struct inode *inode, int type);
-int ll_set_acl(struct inode *inode, struct posix_acl *acl, int type);
-#else
-#define ll_get_acl NULL
-#define ll_set_acl NULL
-#endif /* CONFIG_FS_POSIX_ACL */
-
-int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
- const char *name, int namelen);
-int ll_get_fid_by_name(struct inode *parent, const char *name,
- int namelen, struct lu_fid *fid, struct inode **inode);
-int ll_inode_permission(struct inode *inode, int mask);
-
-int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
- __u64 flags, struct lov_user_md *lum,
- int lum_size);
-int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
- struct lov_mds_md **lmm, int *lmm_size,
- struct ptlrpc_request **request);
-int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
- int set_default);
-int ll_dir_getstripe(struct inode *inode, void **lmmp, int *lmm_size,
- struct ptlrpc_request **request, u64 valid);
-int ll_fsync(struct file *file, loff_t start, loff_t end, int data);
-int ll_merge_attr(const struct lu_env *env, struct inode *inode);
-int ll_fid2path(struct inode *inode, void __user *arg);
-int ll_data_version(struct inode *inode, __u64 *data_version, int flags);
-int ll_hsm_release(struct inode *inode);
-int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss);
-
-/* llite/dcache.c */
-
-extern const struct dentry_operations ll_d_ops;
-void ll_intent_drop_lock(struct lookup_intent *it);
-void ll_intent_release(struct lookup_intent *it);
-void ll_invalidate_aliases(struct inode *inode);
-void ll_lookup_finish_locks(struct lookup_intent *it, struct inode *inode);
-int ll_revalidate_it_finish(struct ptlrpc_request *request,
- struct lookup_intent *it, struct inode *inode);
-
-/* llite/llite_lib.c */
-extern struct super_operations lustre_super_operations;
-
-void ll_lli_init(struct ll_inode_info *lli);
-int ll_fill_super(struct super_block *sb);
-void ll_put_super(struct super_block *sb);
-void ll_kill_super(struct super_block *sb);
-struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock);
-void ll_dir_clear_lsm_md(struct inode *inode);
-void ll_clear_inode(struct inode *inode);
-int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import);
-int ll_setattr(struct dentry *de, struct iattr *attr);
-int ll_statfs(struct dentry *de, struct kstatfs *sfs);
-int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
- __u64 max_age, __u32 flags);
-int ll_update_inode(struct inode *inode, struct lustre_md *md);
-int ll_read_inode2(struct inode *inode, void *opaque);
-void ll_delete_inode(struct inode *inode);
-int ll_iocontrol(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg);
-int ll_flush_ctx(struct inode *inode);
-void ll_umount_begin(struct super_block *sb);
-int ll_remount_fs(struct super_block *sb, int *flags, char *data);
-int ll_show_options(struct seq_file *seq, struct dentry *dentry);
-void ll_dirty_page_discard_warn(struct page *page, int ioret);
-int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
- struct super_block *sb, struct lookup_intent *it);
-int ll_obd_statfs(struct inode *inode, void __user *arg);
-int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize);
-int ll_get_default_mdsize(struct ll_sb_info *sbi, int *default_mdsize);
-int ll_set_default_mdsize(struct ll_sb_info *sbi, int default_mdsize);
-int ll_process_config(struct lustre_cfg *lcfg);
-
-enum {
- LUSTRE_OPC_MKDIR = 0,
- LUSTRE_OPC_SYMLINK = 1,
- LUSTRE_OPC_MKNOD = 2,
- LUSTRE_OPC_CREATE = 3,
- LUSTRE_OPC_ANY = 5,
-};
-
-struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
- struct inode *i1, struct inode *i2,
- const char *name, size_t namelen,
- u32 mode, __u32 opc, void *data);
-void ll_finish_md_op_data(struct md_op_data *op_data);
-int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg);
-char *ll_get_fsname(struct super_block *sb, char *buf, int buflen);
-void ll_compute_rootsquash_state(struct ll_sb_info *sbi);
-void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req);
-ssize_t ll_copy_user_md(const struct lov_user_md __user *md,
- struct lov_user_md **kbuf);
-
-/* Compute expected user md size when passing in a md from user space */
-static inline ssize_t ll_lov_user_md_size(const struct lov_user_md *lum)
-{
- switch (lum->lmm_magic) {
- case LOV_USER_MAGIC_V1:
- return sizeof(struct lov_user_md_v1);
- case LOV_USER_MAGIC_V3:
- return sizeof(struct lov_user_md_v3);
- case LOV_USER_MAGIC_SPECIFIC:
- if (lum->lmm_stripe_count > LOV_MAX_STRIPE_COUNT)
- return -EINVAL;
-
- return lov_user_md_size(lum->lmm_stripe_count,
- LOV_USER_MAGIC_SPECIFIC);
- }
- return -EINVAL;
-}
-
-/* llite/llite_nfs.c */
-extern const struct export_operations lustre_export_operations;
-__u32 get_uuid2int(const char *name, int len);
-void get_uuid2fsid(const char *name, int len, __kernel_fsid_t *fsid);
-struct inode *search_inode_for_lustre(struct super_block *sb,
- const struct lu_fid *fid);
-int ll_dir_get_parent_fid(struct inode *dir, struct lu_fid *parent_fid);
-
-/* llite/symlink.c */
-extern const struct inode_operations ll_fast_symlink_inode_operations;
-
-/**
- * IO arguments for various VFS I/O interfaces.
- */
-struct vvp_io_args {
- /** normal/splice */
- union {
- struct {
- struct kiocb *via_iocb;
- struct iov_iter *via_iter;
- } normal;
- } u;
-};
-
-struct ll_cl_context {
- struct list_head lcc_list;
- void *lcc_cookie;
- const struct lu_env *lcc_env;
- struct cl_io *lcc_io;
- struct cl_page *lcc_page;
-};
-
-struct ll_thread_info {
- struct vvp_io_args lti_args;
- struct ra_io_arg lti_ria;
- struct ll_cl_context lti_io_ctx;
-};
-
-extern struct lu_context_key ll_thread_key;
-static inline struct ll_thread_info *ll_env_info(const struct lu_env *env)
-{
- struct ll_thread_info *lti;
-
- lti = lu_context_key_get(&env->le_ctx, &ll_thread_key);
- LASSERT(lti);
- return lti;
-}
-
-static inline struct vvp_io_args *ll_env_args(const struct lu_env *env)
-{
- return &ll_env_info(env)->lti_args;
-}
-
-/* llite/llite_mmap.c */
-
-int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last);
-int ll_file_mmap(struct file *file, struct vm_area_struct *vma);
-void policy_from_vma(union ldlm_policy_data *policy, struct vm_area_struct *vma,
- unsigned long addr, size_t count);
-struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr,
- size_t count);
-
-static inline void ll_invalidate_page(struct page *vmpage)
-{
- struct address_space *mapping = vmpage->mapping;
- loff_t offset = vmpage->index << PAGE_SHIFT;
-
- LASSERT(PageLocked(vmpage));
- if (!mapping)
- return;
-
- /*
- * truncate_complete_page() calls
- * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete().
- */
- ll_teardown_mmaps(mapping, offset, offset + PAGE_SIZE);
- truncate_complete_page(mapping, vmpage);
-}
-
-#define ll_s2sbi(sb) (s2lsi(sb)->lsi_llsbi)
-
-/* don't need an addref as the sb_info should be holding one */
-static inline struct obd_export *ll_s2dtexp(struct super_block *sb)
-{
- return ll_s2sbi(sb)->ll_dt_exp;
-}
-
-/* don't need an addref as the sb_info should be holding one */
-static inline struct obd_export *ll_s2mdexp(struct super_block *sb)
-{
- return ll_s2sbi(sb)->ll_md_exp;
-}
-
-static inline struct client_obd *sbi2mdc(struct ll_sb_info *sbi)
-{
- struct obd_device *obd = sbi->ll_md_exp->exp_obd;
-
- if (!obd)
- LBUG();
- return &obd->u.cli;
-}
-
-/* FIXME: replace the name of this with LL_SB to conform to kernel stuff */
-static inline struct ll_sb_info *ll_i2sbi(struct inode *inode)
-{
- return ll_s2sbi(inode->i_sb);
-}
-
-static inline struct obd_export *ll_i2dtexp(struct inode *inode)
-{
- return ll_s2dtexp(inode->i_sb);
-}
-
-static inline struct obd_export *ll_i2mdexp(struct inode *inode)
-{
- return ll_s2mdexp(inode->i_sb);
-}
-
-static inline struct lu_fid *ll_inode2fid(struct inode *inode)
-{
- struct lu_fid *fid;
-
- LASSERT(inode);
- fid = &ll_i2info(inode)->lli_fid;
-
- return fid;
-}
-
-static inline loff_t ll_file_maxbytes(struct inode *inode)
-{
- struct cl_object *obj = ll_i2info(inode)->lli_clob;
-
- if (!obj)
- return MAX_LFS_FILESIZE;
-
- return min_t(loff_t, cl_object_maxbytes(obj), MAX_LFS_FILESIZE);
-}
-
-/* llite/xattr.c */
-extern const struct xattr_handler *ll_xattr_handlers[];
-
-#define XATTR_USER_T 1
-#define XATTR_TRUSTED_T 2
-#define XATTR_SECURITY_T 3
-#define XATTR_ACL_ACCESS_T 4
-#define XATTR_ACL_DEFAULT_T 5
-#define XATTR_LUSTRE_T 6
-#define XATTR_OTHER_T 7
-
-ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size);
-int ll_xattr_list(struct inode *inode, const char *name, int type,
- void *buffer, size_t size, __u64 valid);
-const struct xattr_handler *get_xattr_type(const char *name);
-
-/**
- * Common IO arguments for various VFS I/O interfaces.
- */
-int cl_sb_init(struct super_block *sb);
-int cl_sb_fini(struct super_block *sb);
-
-enum ras_update_flags {
- LL_RAS_HIT = 0x1,
- LL_RAS_MMAP = 0x2
-};
-void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len);
-void ll_ra_stats_inc(struct inode *inode, enum ra_stat which);
-
-/* statahead.c */
-#define LL_SA_RPC_MIN 2
-#define LL_SA_RPC_DEF 32
-#define LL_SA_RPC_MAX 8192
-
-#define LL_SA_CACHE_BIT 5
-#define LL_SA_CACHE_SIZE (1 << LL_SA_CACHE_BIT)
-#define LL_SA_CACHE_MASK (LL_SA_CACHE_SIZE - 1)
-
-/* per inode struct, for dir only */
-struct ll_statahead_info {
- struct dentry *sai_dentry;
- atomic_t sai_refcount; /* when access this struct, hold
- * refcount
- */
- unsigned int sai_max; /* max ahead of lookup */
- __u64 sai_sent; /* stat requests sent count */
- __u64 sai_replied; /* stat requests which received
- * reply
- */
- __u64 sai_index; /* index of statahead entry */
- __u64 sai_index_wait; /* index of entry which is the
- * caller is waiting for
- */
- __u64 sai_hit; /* hit count */
- __u64 sai_miss; /* miss count:
- * for "ls -al" case, it includes
- * hidden dentry miss;
- * for "ls -l" case, it does not
- * include hidden dentry miss.
- * "sai_miss_hidden" is used for
- * the later case.
- */
- unsigned int sai_consecutive_miss; /* consecutive miss */
- unsigned int sai_miss_hidden;/* "ls -al", but first dentry
- * is not a hidden one
- */
- unsigned int sai_skip_hidden;/* skipped hidden dentry count */
- unsigned int sai_ls_all:1, /* "ls -al", do stat-ahead for
- * hidden entries
- */
- sai_agl_valid:1,/* AGL is valid for the dir */
- sai_in_readpage:1;/* statahead in readdir() */
- wait_queue_head_t sai_waitq; /* stat-ahead wait queue */
- struct task_struct *sai_task; /* stat-ahead thread */
- struct task_struct *sai_agl_task; /* AGL thread */
- struct list_head sai_interim_entries; /* entries which got async
- * stat reply, but not
- * instantiated
- */
- struct list_head sai_entries; /* completed entries */
- struct list_head sai_agls; /* AGLs to be sent */
- struct list_head sai_cache[LL_SA_CACHE_SIZE];
- spinlock_t sai_cache_lock[LL_SA_CACHE_SIZE];
- atomic_t sai_cache_count; /* entry count in cache */
-};
-
-int ll_statahead(struct inode *dir, struct dentry **dentry, bool unplug);
-void ll_authorize_statahead(struct inode *dir, void *key);
-void ll_deauthorize_statahead(struct inode *dir, void *key);
-
-blkcnt_t dirty_cnt(struct inode *inode);
-
-int cl_glimpse_size0(struct inode *inode, int agl);
-int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
- struct inode *inode, struct cl_object *clob, int agl);
-
-static inline int cl_glimpse_size(struct inode *inode)
-{
- return cl_glimpse_size0(inode, 0);
-}
-
-static inline int cl_agl(struct inode *inode)
-{
- return cl_glimpse_size0(inode, 1);
-}
-
-static inline int ll_glimpse_size(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- int rc;
-
- down_read(&lli->lli_glimpse_sem);
- rc = cl_glimpse_size(inode);
- lli->lli_glimpse_time = jiffies;
- up_read(&lli->lli_glimpse_sem);
- return rc;
-}
-
-/*
- * dentry may statahead when statahead is enabled and current process has opened
- * parent directory, and this dentry hasn't accessed statahead cache before
- */
-static inline bool
-dentry_may_statahead(struct inode *dir, struct dentry *dentry)
-{
- struct ll_inode_info *lli;
- struct ll_dentry_data *ldd;
-
- if (ll_i2sbi(dir)->ll_sa_max == 0)
- return false;
-
- lli = ll_i2info(dir);
-
- /*
- * statahead is not allowed for this dir, there may be three causes:
- * 1. dir is not opened.
- * 2. statahead hit ratio is too low.
- * 3. previous stat started statahead thread failed.
- */
- if (!lli->lli_sa_enabled)
- return false;
-
- /* not the same process, don't statahead */
- if (lli->lli_opendir_pid != current->pid)
- return false;
-
- /*
- * When stating a dentry, kernel may trigger 'revalidate' or 'lookup'
- * multiple times, eg. for 'getattr', 'getxattr' and etc.
- * For patchless client, lookup intent is not accurate, which may
- * misguide statahead. For example:
- * The 'revalidate' call for 'getattr' and 'getxattr' of a dentry will
- * have the same intent -- IT_GETATTR, while one dentry should access
- * statahead cache once, otherwise statahead windows is messed up.
- * The solution is as following:
- * Assign 'lld_sa_generation' with 'lli_sa_generation' when a dentry
- * IT_GETATTR for the first time, and subsequent IT_GETATTR will
- * bypass interacting with statahead cache by checking
- * 'lld_sa_generation == lli->lli_sa_generation'.
- */
- ldd = ll_d2d(dentry);
- if (ldd->lld_sa_generation == lli->lli_sa_generation)
- return false;
-
- return true;
-}
-
-/* llite ioctl register support routine */
-enum llioc_iter {
- LLIOC_CONT = 0,
- LLIOC_STOP
-};
-
-#define LLIOC_MAX_CMD 256
-
-/*
- * Rules to write a callback function:
- *
- * Parameters:
- * @magic: Dynamic ioctl call routine will feed this value with the pointer
- * returned to ll_iocontrol_register. Callback functions should use this
- * data to check the potential collasion of ioctl cmd. If collasion is
- * found, callback function should return LLIOC_CONT.
- * @rcp: The result of ioctl command.
- *
- * Return values:
- * If @magic matches the pointer returned by ll_iocontrol_data, the
- * callback should return LLIOC_STOP; return LLIOC_STOP otherwise.
- */
-typedef enum llioc_iter (*llioc_callback_t)(struct inode *inode,
- struct file *file, unsigned int cmd, unsigned long arg,
- void *magic, int *rcp);
-
-/* export functions */
-/* Register ioctl block dynamatically for a regular file.
- *
- * @cmd: the array of ioctl command set
- * @count: number of commands in the @cmd
- * @cb: callback function, it will be called if an ioctl command is found to
- * belong to the command list @cmd.
- *
- * Return value:
- * A magic pointer will be returned if success;
- * otherwise, NULL will be returned.
- */
-void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd);
-void ll_iocontrol_unregister(void *magic);
-
-int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
- enum cl_fsync_mode mode, int ignore_layout);
-
-/** direct write pages */
-struct ll_dio_pages {
- /** page array to be written. we don't support
- * partial pages except the last one.
- */
- struct page **ldp_pages;
- /* offset of each page */
- loff_t *ldp_offsets;
- /** if ldp_offsets is NULL, it means a sequential
- * pages to be written, then this is the file offset
- * of the first page.
- */
- loff_t ldp_start_offset;
- /** how many bytes are to be written. */
- size_t ldp_size;
- /** # of pages in the array. */
- int ldp_nr;
-};
-
-ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
- int rw, struct inode *inode,
- struct ll_dio_pages *pv);
-
-static inline int ll_file_nolock(const struct file *file)
-{
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct inode *inode = file_inode(file);
-
- return ((fd->fd_flags & LL_FILE_IGNORE_LOCK) ||
- (ll_i2sbi(inode)->ll_flags & LL_SBI_NOLCK));
-}
-
-static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode,
- struct lookup_intent *it, __u64 *bits)
-{
- if (!it->it_lock_set) {
- struct lustre_handle handle;
-
- /* If this inode is a remote object, it will get two
- * separate locks in different namespaces, Master MDT,
- * where the name entry is, will grant LOOKUP lock,
- * remote MDT, where the object is, will grant
- * UPDATE|PERM lock. The inode will be attached to both
- * LOOKUP and PERM locks, so revoking either locks will
- * case the dcache being cleared
- */
- if (it->it_remote_lock_mode) {
- handle.cookie = it->it_remote_lock_handle;
- CDEBUG(D_DLMTRACE, "setting l_data to inode " DFID "%p for remote lock %#llx\n",
- PFID(ll_inode2fid(inode)), inode,
- handle.cookie);
- md_set_lock_data(exp, &handle, inode, NULL);
- }
-
- handle.cookie = it->it_lock_handle;
-
- CDEBUG(D_DLMTRACE,
- "setting l_data to inode " DFID "%p for lock %#llx\n",
- PFID(ll_inode2fid(inode)), inode, handle.cookie);
-
- md_set_lock_data(exp, &handle, inode, &it->it_lock_bits);
- it->it_lock_set = 1;
- }
-
- if (bits)
- *bits = it->it_lock_bits;
-}
-
-static inline int d_lustre_invalid(const struct dentry *dentry)
-{
- return ll_d2d(dentry)->lld_invalid;
-}
-
-/*
- * Mark dentry INVALID, if dentry refcount is zero (this is normally case for
- * ll_md_blocking_ast), unhash this dentry, and let dcache to reclaim it later;
- * else dput() of the last refcount will unhash this dentry and kill it.
- */
-static inline void d_lustre_invalidate(struct dentry *dentry, int nested)
-{
- CDEBUG(D_DENTRY,
- "invalidate dentry %pd (%p) parent %p inode %p refc %d\n",
- dentry, dentry,
- dentry->d_parent, d_inode(dentry), d_count(dentry));
-
- spin_lock_nested(&dentry->d_lock,
- nested ? DENTRY_D_LOCK_NESTED : DENTRY_D_LOCK_NORMAL);
- ll_d2d(dentry)->lld_invalid = 1;
- if (d_count(dentry) == 0)
- __d_drop(dentry);
- spin_unlock(&dentry->d_lock);
-}
-
-static inline void d_lustre_revalidate(struct dentry *dentry)
-{
- spin_lock(&dentry->d_lock);
- LASSERT(ll_d2d(dentry));
- ll_d2d(dentry)->lld_invalid = 0;
- spin_unlock(&dentry->d_lock);
-}
-
-int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf);
-int ll_layout_refresh(struct inode *inode, __u32 *gen);
-int ll_layout_restore(struct inode *inode, loff_t start, __u64 length);
-
-int ll_xattr_init(void);
-void ll_xattr_fini(void);
-
-int ll_page_sync_io(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, enum cl_req_type crt);
-
-int ll_getparent(struct file *file, struct getparent __user *arg);
-
-/* lcommon_cl.c */
-int cl_setattr_ost(struct cl_object *obj, const struct iattr *attr,
- unsigned int attr_flags);
-
-extern struct lu_env *cl_inode_fini_env;
-extern u16 cl_inode_fini_refcheck;
-
-int cl_file_inode_init(struct inode *inode, struct lustre_md *md);
-void cl_inode_fini(struct inode *inode);
-
-__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32);
-__u32 cl_fid_build_gen(const struct lu_fid *fid);
-
-#endif /* LLITE_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
deleted file mode 100644
index 36066c839160..000000000000
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ /dev/null
@@ -1,2668 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/llite/llite_lib.c
- *
- * Lustre Light Super operations
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/module.h>
-#include <linux/statfs.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/random.h>
-
-#include <uapi/linux/lustre/lustre_ioctl.h>
-#include <lustre_ha.h>
-#include <lustre_dlm.h>
-#include <lprocfs_status.h>
-#include <lustre_disk.h>
-#include <uapi/linux/lustre/lustre_param.h>
-#include <lustre_log.h>
-#include <cl_object.h>
-#include <obd_cksum.h>
-#include "llite_internal.h"
-
-struct kmem_cache *ll_file_data_slab;
-struct dentry *llite_root;
-struct kset *llite_kset;
-
-#ifndef log2
-#define log2(n) ffz(~(n))
-#endif
-
-static struct ll_sb_info *ll_init_sbi(struct super_block *sb)
-{
- struct ll_sb_info *sbi = NULL;
- unsigned long pages;
- unsigned long lru_page_max;
- struct sysinfo si;
- class_uuid_t uuid;
- int i;
-
- sbi = kzalloc(sizeof(*sbi), GFP_NOFS);
- if (!sbi)
- return NULL;
-
- spin_lock_init(&sbi->ll_lock);
- mutex_init(&sbi->ll_lco.lco_lock);
- spin_lock_init(&sbi->ll_pp_extent_lock);
- spin_lock_init(&sbi->ll_process_lock);
- sbi->ll_rw_stats_on = 0;
-
- si_meminfo(&si);
- pages = si.totalram - si.totalhigh;
- lru_page_max = pages / 2;
-
- sbi->ll_cache = cl_cache_init(lru_page_max);
- if (!sbi->ll_cache) {
- kfree(sbi);
- return NULL;
- }
-
- sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32,
- SBI_DEFAULT_READAHEAD_MAX);
- sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file;
- sbi->ll_ra_info.ra_max_read_ahead_whole_pages =
- SBI_DEFAULT_READAHEAD_WHOLE_MAX;
-
- ll_generate_random_uuid(uuid);
- class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
- CDEBUG(D_CONFIG, "generated uuid: %s\n", sbi->ll_sb_uuid.uuid);
-
- sbi->ll_flags |= LL_SBI_VERBOSE;
- sbi->ll_flags |= LL_SBI_CHECKSUM;
-
- sbi->ll_flags |= LL_SBI_LRU_RESIZE;
- sbi->ll_flags |= LL_SBI_LAZYSTATFS;
-
- for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) {
- spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].
- pp_r_hist.oh_lock);
- spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].
- pp_w_hist.oh_lock);
- }
-
- /* metadata statahead is enabled by default */
- sbi->ll_sa_max = LL_SA_RPC_DEF;
- atomic_set(&sbi->ll_sa_total, 0);
- atomic_set(&sbi->ll_sa_wrong, 0);
- atomic_set(&sbi->ll_sa_running, 0);
- atomic_set(&sbi->ll_agl_total, 0);
- sbi->ll_flags |= LL_SBI_AGL_ENABLED;
-
- /* root squash */
- sbi->ll_squash.rsi_uid = 0;
- sbi->ll_squash.rsi_gid = 0;
- INIT_LIST_HEAD(&sbi->ll_squash.rsi_nosquash_nids);
- init_rwsem(&sbi->ll_squash.rsi_sem);
-
- sbi->ll_sb = sb;
-
- return sbi;
-}
-
-static void ll_free_sbi(struct super_block *sb)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
-
- if (sbi->ll_cache) {
- if (!list_empty(&sbi->ll_squash.rsi_nosquash_nids))
- cfs_free_nidlist(&sbi->ll_squash.rsi_nosquash_nids);
- cl_cache_decref(sbi->ll_cache);
- sbi->ll_cache = NULL;
- }
-
- kfree(sbi);
-}
-
-static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
-{
- struct inode *root = NULL;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct obd_device *obd;
- struct obd_statfs *osfs = NULL;
- struct ptlrpc_request *request = NULL;
- struct obd_connect_data *data = NULL;
- struct obd_uuid *uuid;
- struct md_op_data *op_data;
- struct lustre_md lmd;
- u64 valid;
- int size, err, checksum;
-
- obd = class_name2obd(md);
- if (!obd) {
- CERROR("MD %s: not setup or attached\n", md);
- return -EINVAL;
- }
-
- data = kzalloc(sizeof(*data), GFP_NOFS);
- if (!data)
- return -ENOMEM;
-
- osfs = kzalloc(sizeof(*osfs), GFP_NOFS);
- if (!osfs) {
- kfree(data);
- return -ENOMEM;
- }
-
- /* indicate the features supported by this client */
- data->ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_NODEVOH |
- OBD_CONNECT_ATTRFID |
- OBD_CONNECT_VERSION | OBD_CONNECT_BRW_SIZE |
- OBD_CONNECT_CANCELSET | OBD_CONNECT_FID |
- OBD_CONNECT_AT | OBD_CONNECT_LOV_V3 |
- OBD_CONNECT_VBR | OBD_CONNECT_FULL20 |
- OBD_CONNECT_64BITHASH |
- OBD_CONNECT_EINPROGRESS |
- OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
- OBD_CONNECT_LAYOUTLOCK |
- OBD_CONNECT_PINGLESS |
- OBD_CONNECT_MAX_EASIZE |
- OBD_CONNECT_FLOCK_DEAD |
- OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK |
- OBD_CONNECT_OPEN_BY_FID |
- OBD_CONNECT_DIR_STRIPE |
- OBD_CONNECT_BULK_MBITS;
-
- if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
- data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
-#ifdef CONFIG_FS_POSIX_ACL
- data->ocd_connect_flags |= OBD_CONNECT_ACL | OBD_CONNECT_UMASK |
- OBD_CONNECT_LARGE_ACL;
-#endif
-
- if (OBD_FAIL_CHECK(OBD_FAIL_MDC_LIGHTWEIGHT))
- /* flag mdc connection as lightweight, only used for test
- * purpose, use with care
- */
- data->ocd_connect_flags |= OBD_CONNECT_LIGHTWEIGHT;
-
- data->ocd_ibits_known = MDS_INODELOCK_FULL;
- data->ocd_version = LUSTRE_VERSION_CODE;
-
- if (sb_rdonly(sb))
- data->ocd_connect_flags |= OBD_CONNECT_RDONLY;
- if (sbi->ll_flags & LL_SBI_USER_XATTR)
- data->ocd_connect_flags |= OBD_CONNECT_XATTR;
-
- if (sbi->ll_flags & LL_SBI_FLOCK)
- sbi->ll_fop = &ll_file_operations_flock;
- else if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
- sbi->ll_fop = &ll_file_operations;
- else
- sbi->ll_fop = &ll_file_operations_noflock;
-
- /* always ping even if server suppress_pings */
- if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
- data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS;
-
- data->ocd_brw_size = MD_MAX_BRW_SIZE;
-
- err = obd_connect(NULL, &sbi->ll_md_exp, obd, &sbi->ll_sb_uuid,
- data, NULL);
- if (err == -EBUSY) {
- LCONSOLE_ERROR_MSG(0x14f,
- "An MDT (md %s) is performing recovery, of which this client is not a part. Please wait for recovery to complete, abort, or time out.\n",
- md);
- goto out;
- }
-
- if (err) {
- CERROR("cannot connect to %s: rc = %d\n", md, err);
- goto out;
- }
-
- sbi->ll_md_exp->exp_connect_data = *data;
-
- err = obd_fid_init(sbi->ll_md_exp->exp_obd, sbi->ll_md_exp,
- LUSTRE_SEQ_METADATA);
- if (err) {
- CERROR("%s: Can't init metadata layer FID infrastructure, rc = %d\n",
- sbi->ll_md_exp->exp_obd->obd_name, err);
- goto out_md;
- }
-
- /* For mount, we only need fs info from MDT0, and also in DNE, it
- * can make sure the client can be mounted as long as MDT0 is
- * available
- */
- err = obd_statfs(NULL, sbi->ll_md_exp, osfs,
- get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ,
- OBD_STATFS_FOR_MDT0);
- if (err)
- goto out_md_fid;
-
- /* This needs to be after statfs to ensure connect has finished.
- * Note that "data" does NOT contain the valid connect reply.
- * If connecting to a 1.8 server there will be no LMV device, so
- * we can access the MDC export directly and exp_connect_flags will
- * be non-zero, but if accessing an upgraded 2.1 server it will
- * have the correct flags filled in.
- * XXX: fill in the LMV exp_connect_flags from MDC(s).
- */
- valid = exp_connect_flags(sbi->ll_md_exp) & CLIENT_CONNECT_MDT_REQD;
- if (exp_connect_flags(sbi->ll_md_exp) != 0 &&
- valid != CLIENT_CONNECT_MDT_REQD) {
- char *buf;
-
- buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
- if (!buf) {
- err = -ENOMEM;
- goto out_md_fid;
- }
- obd_connect_flags2str(buf, PAGE_SIZE,
- valid ^ CLIENT_CONNECT_MDT_REQD, ",");
- LCONSOLE_ERROR_MSG(0x170,
- "Server %s does not support feature(s) needed for correct operation of this client (%s). Please upgrade server or downgrade client.\n",
- sbi->ll_md_exp->exp_obd->obd_name, buf);
- kfree(buf);
- err = -EPROTO;
- goto out_md_fid;
- }
-
- size = sizeof(*data);
- err = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_CONN_DATA),
- KEY_CONN_DATA, &size, data);
- if (err) {
- CERROR("%s: Get connect data failed: rc = %d\n",
- sbi->ll_md_exp->exp_obd->obd_name, err);
- goto out_md_fid;
- }
-
- LASSERT(osfs->os_bsize);
- sb->s_blocksize = osfs->os_bsize;
- sb->s_blocksize_bits = log2(osfs->os_bsize);
- sb->s_magic = LL_SUPER_MAGIC;
- sb->s_maxbytes = MAX_LFS_FILESIZE;
- sbi->ll_namelen = osfs->os_namelen;
- sbi->ll_mnt.mnt = current->fs->root.mnt;
-
- if ((sbi->ll_flags & LL_SBI_USER_XATTR) &&
- !(data->ocd_connect_flags & OBD_CONNECT_XATTR)) {
- LCONSOLE_INFO("Disabling user_xattr feature because it is not supported on the server\n");
- sbi->ll_flags &= ~LL_SBI_USER_XATTR;
- }
-
- if (data->ocd_connect_flags & OBD_CONNECT_ACL) {
- sb->s_flags |= SB_POSIXACL;
- sbi->ll_flags |= LL_SBI_ACL;
- } else {
- LCONSOLE_INFO("client wants to enable acl, but mdt not!\n");
- sb->s_flags &= ~SB_POSIXACL;
- sbi->ll_flags &= ~LL_SBI_ACL;
- }
-
- if (data->ocd_connect_flags & OBD_CONNECT_64BITHASH)
- sbi->ll_flags |= LL_SBI_64BIT_HASH;
-
- if (data->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
- sbi->ll_md_brw_pages = data->ocd_brw_size >> PAGE_SHIFT;
- else
- sbi->ll_md_brw_pages = 1;
-
- if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK)
- sbi->ll_flags |= LL_SBI_LAYOUT_LOCK;
-
- if (data->ocd_ibits_known & MDS_INODELOCK_XATTR) {
- if (!(data->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)) {
- LCONSOLE_INFO(
- "%s: disabling xattr cache due to unknown maximum xattr size.\n",
- dt);
- } else {
- sbi->ll_flags |= LL_SBI_XATTR_CACHE;
- sbi->ll_xattr_cache_enabled = 1;
- }
- }
-
- obd = class_name2obd(dt);
- if (!obd) {
- CERROR("DT %s: not setup or attached\n", dt);
- err = -ENODEV;
- goto out_md_fid;
- }
-
- data->ocd_connect_flags = OBD_CONNECT_GRANT | OBD_CONNECT_VERSION |
- OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE |
- OBD_CONNECT_CANCELSET | OBD_CONNECT_FID |
- OBD_CONNECT_SRVLOCK | OBD_CONNECT_TRUNCLOCK|
- OBD_CONNECT_AT | OBD_CONNECT_OSS_CAPA |
- OBD_CONNECT_VBR | OBD_CONNECT_FULL20 |
- OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES |
- OBD_CONNECT_EINPROGRESS |
- OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
- OBD_CONNECT_LAYOUTLOCK |
- OBD_CONNECT_PINGLESS | OBD_CONNECT_LFSCK |
- OBD_CONNECT_BULK_MBITS;
-
- if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_CKSUM)) {
- /* OBD_CONNECT_CKSUM should always be set, even if checksums are
- * disabled by default, because it can still be enabled on the
- * fly via /sys. As a consequence, we still need to come to an
- * agreement on the supported algorithms at connect time
- */
- data->ocd_connect_flags |= OBD_CONNECT_CKSUM;
-
- if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CKSUM_ADLER_ONLY))
- data->ocd_cksum_types = OBD_CKSUM_ADLER;
- else
- data->ocd_cksum_types = cksum_types_supported_client();
- }
-
- data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
-
- /* always ping even if server suppress_pings */
- if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
- data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS;
-
- CDEBUG(D_RPCTRACE,
- "ocd_connect_flags: %#llx ocd_version: %d ocd_grant: %d\n",
- data->ocd_connect_flags,
- data->ocd_version, data->ocd_grant);
-
- obd->obd_upcall.onu_owner = &sbi->ll_lco;
- obd->obd_upcall.onu_upcall = cl_ocd_update;
-
- data->ocd_brw_size = DT_MAX_BRW_SIZE;
-
- err = obd_connect(NULL, &sbi->ll_dt_exp, obd, &sbi->ll_sb_uuid, data,
- NULL);
- if (err == -EBUSY) {
- LCONSOLE_ERROR_MSG(0x150,
- "An OST (dt %s) is performing recovery, of which this client is not a part. Please wait for recovery to complete, abort, or time out.\n",
- dt);
- goto out_md_fid;
- } else if (err) {
- CERROR("%s: Cannot connect to %s: rc = %d\n",
- sbi->ll_dt_exp->exp_obd->obd_name, dt, err);
- goto out_md_fid;
- }
-
- sbi->ll_dt_exp->exp_connect_data = *data;
-
- err = obd_fid_init(sbi->ll_dt_exp->exp_obd, sbi->ll_dt_exp,
- LUSTRE_SEQ_METADATA);
- if (err) {
- CERROR("%s: Can't init data layer FID infrastructure, rc = %d\n",
- sbi->ll_dt_exp->exp_obd->obd_name, err);
- goto out_dt;
- }
-
- mutex_lock(&sbi->ll_lco.lco_lock);
- sbi->ll_lco.lco_flags = data->ocd_connect_flags;
- sbi->ll_lco.lco_md_exp = sbi->ll_md_exp;
- sbi->ll_lco.lco_dt_exp = sbi->ll_dt_exp;
- mutex_unlock(&sbi->ll_lco.lco_lock);
-
- fid_zero(&sbi->ll_root_fid);
- err = md_getstatus(sbi->ll_md_exp, &sbi->ll_root_fid);
- if (err) {
- CERROR("cannot mds_connect: rc = %d\n", err);
- goto out_lock_cn_cb;
- }
- if (!fid_is_sane(&sbi->ll_root_fid)) {
- CERROR("%s: Invalid root fid " DFID " during mount\n",
- sbi->ll_md_exp->exp_obd->obd_name,
- PFID(&sbi->ll_root_fid));
- err = -EINVAL;
- goto out_lock_cn_cb;
- }
- CDEBUG(D_SUPER, "rootfid " DFID "\n", PFID(&sbi->ll_root_fid));
-
- sb->s_op = &lustre_super_operations;
- sb->s_xattr = ll_xattr_handlers;
-#if THREAD_SIZE >= 8192 /*b=17630*/
- sb->s_export_op = &lustre_export_operations;
-#endif
-
- /* make root inode
- * XXX: move this to after cbd setup?
- */
- valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMODEASIZE;
- if (sbi->ll_flags & LL_SBI_ACL)
- valid |= OBD_MD_FLACL;
-
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
- if (!op_data) {
- err = -ENOMEM;
- goto out_lock_cn_cb;
- }
-
- op_data->op_fid1 = sbi->ll_root_fid;
- op_data->op_mode = 0;
- op_data->op_valid = valid;
-
- err = md_getattr(sbi->ll_md_exp, op_data, &request);
- kfree(op_data);
- if (err) {
- CERROR("%s: md_getattr failed for root: rc = %d\n",
- sbi->ll_md_exp->exp_obd->obd_name, err);
- goto out_lock_cn_cb;
- }
-
- err = md_get_lustre_md(sbi->ll_md_exp, request, sbi->ll_dt_exp,
- sbi->ll_md_exp, &lmd);
- if (err) {
- CERROR("failed to understand root inode md: rc = %d\n", err);
- ptlrpc_req_finished(request);
- goto out_lock_cn_cb;
- }
-
- LASSERT(fid_is_sane(&sbi->ll_root_fid));
- root = ll_iget(sb, cl_fid_build_ino(&sbi->ll_root_fid,
- sbi->ll_flags & LL_SBI_32BIT_API),
- &lmd);
- md_free_lustre_md(sbi->ll_md_exp, &lmd);
- ptlrpc_req_finished(request);
-
- if (IS_ERR(root)) {
-#ifdef CONFIG_FS_POSIX_ACL
- if (lmd.posix_acl) {
- posix_acl_release(lmd.posix_acl);
- lmd.posix_acl = NULL;
- }
-#endif
- err = -EBADF;
- CERROR("lustre_lite: bad iget4 for root\n");
- goto out_root;
- }
-
- checksum = sbi->ll_flags & LL_SBI_CHECKSUM;
- err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CHECKSUM),
- KEY_CHECKSUM, sizeof(checksum), &checksum,
- NULL);
- if (err) {
- CERROR("%s: Set checksum failed: rc = %d\n",
- sbi->ll_dt_exp->exp_obd->obd_name, err);
- goto out_root;
- }
- cl_sb_init(sb);
-
- err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CACHE_SET),
- KEY_CACHE_SET, sizeof(*sbi->ll_cache),
- sbi->ll_cache, NULL);
- if (err) {
- CERROR("%s: Set cache_set failed: rc = %d\n",
- sbi->ll_dt_exp->exp_obd->obd_name, err);
- goto out_root;
- }
-
- sb->s_root = d_make_root(root);
- if (!sb->s_root) {
- CERROR("%s: can't make root dentry\n",
- ll_get_fsname(sb, NULL, 0));
- err = -ENOMEM;
- goto out_lock_cn_cb;
- }
-
- sbi->ll_sdev_orig = sb->s_dev;
-
- /* We set sb->s_dev equal on all lustre clients in order to support
- * NFS export clustering. NFSD requires that the FSID be the same
- * on all clients.
- */
- /* s_dev is also used in lt_compare() to compare two fs, but that is
- * only a node-local comparison.
- */
- uuid = obd_get_uuid(sbi->ll_md_exp);
- if (uuid) {
- sb->s_dev = get_uuid2int(uuid->uuid, strlen(uuid->uuid));
- get_uuid2fsid(uuid->uuid, strlen(uuid->uuid), &sbi->ll_fsid);
- }
-
- kfree(data);
- kfree(osfs);
-
- if (llite_root) {
- err = ldebugfs_register_mountpoint(llite_root, sb, dt, md);
- if (err < 0) {
- CERROR("%s: could not register mount in debugfs: "
- "rc = %d\n", ll_get_fsname(sb, NULL, 0), err);
- err = 0;
- }
- }
-
- return err;
-out_root:
- iput(root);
-out_lock_cn_cb:
- obd_fid_fini(sbi->ll_dt_exp->exp_obd);
-out_dt:
- obd_disconnect(sbi->ll_dt_exp);
- sbi->ll_dt_exp = NULL;
-out_md_fid:
- obd_fid_fini(sbi->ll_md_exp->exp_obd);
-out_md:
- obd_disconnect(sbi->ll_md_exp);
- sbi->ll_md_exp = NULL;
-out:
- kfree(data);
- kfree(osfs);
- return err;
-}
-
-int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize)
-{
- int size, rc;
-
- size = sizeof(*lmmsize);
- rc = obd_get_info(NULL, sbi->ll_dt_exp, sizeof(KEY_MAX_EASIZE),
- KEY_MAX_EASIZE, &size, lmmsize);
- if (rc) {
- CERROR("%s: cannot get max LOV EA size: rc = %d\n",
- sbi->ll_dt_exp->exp_obd->obd_name, rc);
- return rc;
- }
-
- size = sizeof(int);
- rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_EASIZE),
- KEY_MAX_EASIZE, &size, lmmsize);
- if (rc)
- CERROR("Get max mdsize error rc %d\n", rc);
-
- return rc;
-}
-
-/**
- * Get the value of the default_easize parameter.
- *
- * \see client_obd::cl_default_mds_easize
- *
- * \param[in] sbi superblock info for this filesystem
- * \param[out] lmmsize pointer to storage location for value
- *
- * \retval 0 on success
- * \retval negative negated errno on failure
- */
-int ll_get_default_mdsize(struct ll_sb_info *sbi, int *lmmsize)
-{
- int size, rc;
-
- size = sizeof(int);
- rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_DEFAULT_EASIZE),
- KEY_DEFAULT_EASIZE, &size, lmmsize);
- if (rc)
- CERROR("Get default mdsize error rc %d\n", rc);
-
- return rc;
-}
-
-/**
- * Set the default_easize parameter to the given value.
- *
- * \see client_obd::cl_default_mds_easize
- *
- * \param[in] sbi superblock info for this filesystem
- * \param[in] lmmsize the size to set
- *
- * \retval 0 on success
- * \retval negative negated errno on failure
- */
-int ll_set_default_mdsize(struct ll_sb_info *sbi, int lmmsize)
-{
- if (lmmsize < sizeof(struct lov_mds_md) ||
- lmmsize > OBD_MAX_DEFAULT_EA_SIZE)
- return -EINVAL;
-
- return obd_set_info_async(NULL, sbi->ll_md_exp,
- sizeof(KEY_DEFAULT_EASIZE),
- KEY_DEFAULT_EASIZE,
- sizeof(int), &lmmsize, NULL);
-}
-
-static void client_common_put_super(struct super_block *sb)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
-
- cl_sb_fini(sb);
-
- obd_fid_fini(sbi->ll_dt_exp->exp_obd);
- obd_disconnect(sbi->ll_dt_exp);
- sbi->ll_dt_exp = NULL;
-
- ldebugfs_unregister_mountpoint(sbi);
-
- obd_fid_fini(sbi->ll_md_exp->exp_obd);
- obd_disconnect(sbi->ll_md_exp);
- sbi->ll_md_exp = NULL;
-}
-
-void ll_kill_super(struct super_block *sb)
-{
- struct ll_sb_info *sbi;
-
- /* not init sb ?*/
- if (!(sb->s_flags & SB_ACTIVE))
- return;
-
- sbi = ll_s2sbi(sb);
- /* we need to restore s_dev from changed for clustered NFS before
- * put_super because new kernels have cached s_dev and change sb->s_dev
- * in put_super not affected real removing devices
- */
- if (sbi) {
- sb->s_dev = sbi->ll_sdev_orig;
- sbi->ll_umounting = 1;
-
- /* wait running statahead threads to quit */
- while (atomic_read(&sbi->ll_sa_running) > 0) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC >> 3));
- }
- }
-}
-
-static inline int ll_set_opt(const char *opt, char *data, int fl)
-{
- if (strncmp(opt, data, strlen(opt)) != 0)
- return 0;
- else
- return fl;
-}
-
-/* non-client-specific mount options are parsed in lmd_parse */
-static int ll_options(char *options, int *flags)
-{
- int tmp;
- char *s1 = options, *s2;
-
- if (!options)
- return 0;
-
- CDEBUG(D_CONFIG, "Parsing opts %s\n", options);
-
- while (*s1) {
- CDEBUG(D_SUPER, "next opt=%s\n", s1);
- tmp = ll_set_opt("nolock", s1, LL_SBI_NOLCK);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("flock", s1, LL_SBI_FLOCK);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("localflock", s1, LL_SBI_LOCALFLOCK);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("noflock", s1,
- LL_SBI_FLOCK | LL_SBI_LOCALFLOCK);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("user_xattr", s1, LL_SBI_USER_XATTR);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("nouser_xattr", s1, LL_SBI_USER_XATTR);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("context", s1, 1);
- if (tmp)
- goto next;
- tmp = ll_set_opt("fscontext", s1, 1);
- if (tmp)
- goto next;
- tmp = ll_set_opt("defcontext", s1, 1);
- if (tmp)
- goto next;
- tmp = ll_set_opt("rootcontext", s1, 1);
- if (tmp)
- goto next;
- tmp = ll_set_opt("user_fid2path", s1, LL_SBI_USER_FID2PATH);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("nouser_fid2path", s1, LL_SBI_USER_FID2PATH);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
-
- tmp = ll_set_opt("checksum", s1, LL_SBI_CHECKSUM);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("nochecksum", s1, LL_SBI_CHECKSUM);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("lruresize", s1, LL_SBI_LRU_RESIZE);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("nolruresize", s1, LL_SBI_LRU_RESIZE);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("lazystatfs", s1, LL_SBI_LAZYSTATFS);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("nolazystatfs", s1, LL_SBI_LAZYSTATFS);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("32bitapi", s1, LL_SBI_32BIT_API);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("verbose", s1, LL_SBI_VERBOSE);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("noverbose", s1, LL_SBI_VERBOSE);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("always_ping", s1, LL_SBI_ALWAYS_PING);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n",
- s1);
- return -EINVAL;
-
-next:
- /* Find next opt */
- s2 = strchr(s1, ',');
- if (!s2)
- break;
- s1 = s2 + 1;
- }
- return 0;
-}
-
-void ll_lli_init(struct ll_inode_info *lli)
-{
- lli->lli_inode_magic = LLI_INODE_MAGIC;
- lli->lli_flags = 0;
- spin_lock_init(&lli->lli_lock);
- lli->lli_posix_acl = NULL;
- /* Do not set lli_fid, it has been initialized already. */
- fid_zero(&lli->lli_pfid);
- lli->lli_mds_read_och = NULL;
- lli->lli_mds_write_och = NULL;
- lli->lli_mds_exec_och = NULL;
- lli->lli_open_fd_read_count = 0;
- lli->lli_open_fd_write_count = 0;
- lli->lli_open_fd_exec_count = 0;
- mutex_init(&lli->lli_och_mutex);
- spin_lock_init(&lli->lli_agl_lock);
- spin_lock_init(&lli->lli_layout_lock);
- ll_layout_version_set(lli, CL_LAYOUT_GEN_NONE);
- lli->lli_clob = NULL;
-
- init_rwsem(&lli->lli_xattrs_list_rwsem);
- mutex_init(&lli->lli_xattrs_enq_lock);
-
- LASSERT(lli->lli_vfs_inode.i_mode != 0);
- if (S_ISDIR(lli->lli_vfs_inode.i_mode)) {
- mutex_init(&lli->lli_readdir_mutex);
- lli->lli_opendir_key = NULL;
- lli->lli_sai = NULL;
- spin_lock_init(&lli->lli_sa_lock);
- lli->lli_opendir_pid = 0;
- lli->lli_sa_enabled = 0;
- lli->lli_def_stripe_offset = -1;
- } else {
- mutex_init(&lli->lli_size_mutex);
- lli->lli_symlink_name = NULL;
- init_rwsem(&lli->lli_trunc_sem);
- range_lock_tree_init(&lli->lli_write_tree);
- init_rwsem(&lli->lli_glimpse_sem);
- lli->lli_glimpse_time = 0;
- INIT_LIST_HEAD(&lli->lli_agl_list);
- lli->lli_agl_index = 0;
- lli->lli_async_rc = 0;
- }
- mutex_init(&lli->lli_layout_mutex);
-}
-
-int ll_fill_super(struct super_block *sb)
-{
- struct lustre_profile *lprof = NULL;
- struct lustre_sb_info *lsi = s2lsi(sb);
- struct ll_sb_info *sbi;
- char *dt = NULL, *md = NULL;
- char *profilenm = get_profile_name(sb);
- struct config_llog_instance *cfg;
- int err;
- static atomic_t ll_bdi_num = ATOMIC_INIT(0);
-
- CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
-
- err = ptlrpc_inc_ref();
- if (err)
- return err;
-
- cfg = kzalloc(sizeof(*cfg), GFP_NOFS);
- if (!cfg) {
- err = -ENOMEM;
- goto out_put;
- }
-
- try_module_get(THIS_MODULE);
-
- /* client additional sb info */
- sbi = ll_init_sbi(sb);
- lsi->lsi_llsbi = sbi;
- if (!sbi) {
- module_put(THIS_MODULE);
- kfree(cfg);
- err = -ENOMEM;
- goto out_put;
- }
-
- err = ll_options(lsi->lsi_lmd->lmd_opts, &sbi->ll_flags);
- if (err)
- goto out_free;
-
- err = super_setup_bdi_name(sb, "lustre-%d",
- atomic_inc_return(&ll_bdi_num));
- if (err)
- goto out_free;
-
- /* kernel >= 2.6.38 store dentry operations in sb->s_d_op. */
- sb->s_d_op = &ll_d_ops;
-
- /* Generate a string unique to this super, in case some joker tries
- * to mount the same fs at two mount points.
- * Use the address of the super itself.
- */
- cfg->cfg_instance = sb;
- cfg->cfg_uuid = lsi->lsi_llsbi->ll_sb_uuid;
- cfg->cfg_callback = class_config_llog_handler;
- /* set up client obds */
- err = lustre_process_log(sb, profilenm, cfg);
- if (err < 0)
- goto out_free;
-
- /* Profile set with LCFG_MOUNTOPT so we can find our mdc and osc obds */
- lprof = class_get_profile(profilenm);
- if (!lprof) {
- LCONSOLE_ERROR_MSG(0x156,
- "The client profile '%s' could not be read from the MGS. Does that filesystem exist?\n",
- profilenm);
- err = -EINVAL;
- goto out_free;
- }
- CDEBUG(D_CONFIG, "Found profile %s: mdc=%s osc=%s\n", profilenm,
- lprof->lp_md, lprof->lp_dt);
-
- dt = kasprintf(GFP_NOFS, "%s-%p", lprof->lp_dt, cfg->cfg_instance);
- if (!dt) {
- err = -ENOMEM;
- goto out_free;
- }
-
- md = kasprintf(GFP_NOFS, "%s-%p", lprof->lp_md, cfg->cfg_instance);
- if (!md) {
- err = -ENOMEM;
- goto out_free;
- }
-
- /* connections, registrations, sb setup */
- err = client_common_fill_super(sb, md, dt);
- if (!err)
- sbi->ll_client_common_fill_super_succeeded = 1;
-
-out_free:
- kfree(md);
- kfree(dt);
- if (lprof)
- class_put_profile(lprof);
- if (err)
- ll_put_super(sb);
- else if (sbi->ll_flags & LL_SBI_VERBOSE)
- LCONSOLE_WARN("Mounted %s\n", profilenm);
-
- kfree(cfg);
-out_put:
- if (err)
- ptlrpc_dec_ref();
- return err;
-} /* ll_fill_super */
-
-void ll_put_super(struct super_block *sb)
-{
- struct config_llog_instance cfg, params_cfg;
- struct obd_device *obd;
- struct lustre_sb_info *lsi = s2lsi(sb);
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- char *profilenm = get_profile_name(sb);
- int next, force = 1, rc = 0;
- long ccc_count;
-
- CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
-
- cfg.cfg_instance = sb;
- lustre_end_log(sb, profilenm, &cfg);
-
- params_cfg.cfg_instance = sb;
- lustre_end_log(sb, PARAMS_FILENAME, &params_cfg);
-
- if (sbi->ll_md_exp) {
- obd = class_exp2obd(sbi->ll_md_exp);
- if (obd)
- force = obd->obd_force;
- }
-
- /* Wait for unstable pages to be committed to stable storage */
- if (!force)
- rc = l_wait_event_abortable(sbi->ll_cache->ccc_unstable_waitq,
- !atomic_long_read(&sbi->ll_cache->ccc_unstable_nr));
-
- ccc_count = atomic_long_read(&sbi->ll_cache->ccc_unstable_nr);
- if (!force && rc != -ERESTARTSYS)
- LASSERTF(!ccc_count, "count: %li\n", ccc_count);
-
- /* We need to set force before the lov_disconnect in
- * lustre_common_put_super, since l_d cleans up osc's as well.
- */
- if (force) {
- next = 0;
- while ((obd = class_devices_in_group(&sbi->ll_sb_uuid,
- &next)) != NULL) {
- obd->obd_force = force;
- }
- }
-
- if (sbi->ll_client_common_fill_super_succeeded) {
- /* Only if client_common_fill_super succeeded */
- client_common_put_super(sb);
- }
-
- next = 0;
- while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)))
- class_manual_cleanup(obd);
-
- if (sbi->ll_flags & LL_SBI_VERBOSE)
- LCONSOLE_WARN("Unmounted %s\n", profilenm ? profilenm : "");
-
- if (profilenm)
- class_del_profile(profilenm);
-
- ll_free_sbi(sb);
- lsi->lsi_llsbi = NULL;
-
- lustre_common_put_super(sb);
-
- cl_env_cache_purge(~0);
-
- module_put(THIS_MODULE);
-
- ptlrpc_dec_ref();
-} /* client_put_super */
-
-struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock)
-{
- struct inode *inode = NULL;
-
- /* NOTE: we depend on atomic igrab() -bzzz */
- lock_res_and_lock(lock);
- if (lock->l_resource->lr_lvb_inode) {
- struct ll_inode_info *lli;
-
- lli = ll_i2info(lock->l_resource->lr_lvb_inode);
- if (lli->lli_inode_magic == LLI_INODE_MAGIC) {
- inode = igrab(lock->l_resource->lr_lvb_inode);
- } else {
- inode = lock->l_resource->lr_lvb_inode;
- LDLM_DEBUG_LIMIT(inode->i_state & I_FREEING ? D_INFO :
- D_WARNING, lock,
- "lr_lvb_inode %p is bogus: magic %08x",
- lock->l_resource->lr_lvb_inode,
- lli->lli_inode_magic);
- inode = NULL;
- }
- }
- unlock_res_and_lock(lock);
- return inode;
-}
-
-void ll_dir_clear_lsm_md(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
-
- LASSERT(S_ISDIR(inode->i_mode));
-
- if (lli->lli_lsm_md) {
- lmv_free_memmd(lli->lli_lsm_md);
- lli->lli_lsm_md = NULL;
- }
-}
-
-static struct inode *ll_iget_anon_dir(struct super_block *sb,
- const struct lu_fid *fid,
- struct lustre_md *md)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct mdt_body *body = md->body;
- struct inode *inode;
- ino_t ino;
-
- ino = cl_fid_build_ino(fid, sbi->ll_flags & LL_SBI_32BIT_API);
- inode = iget_locked(sb, ino);
- if (!inode) {
- CERROR("%s: failed get simple inode " DFID ": rc = -ENOENT\n",
- ll_get_fsname(sb, NULL, 0), PFID(fid));
- return ERR_PTR(-ENOENT);
- }
-
- if (inode->i_state & I_NEW) {
- struct ll_inode_info *lli = ll_i2info(inode);
- struct lmv_stripe_md *lsm = md->lmv;
-
- inode->i_mode = (inode->i_mode & ~S_IFMT) |
- (body->mbo_mode & S_IFMT);
- LASSERTF(S_ISDIR(inode->i_mode), "Not slave inode " DFID "\n",
- PFID(fid));
-
- LTIME_S(inode->i_mtime) = 0;
- LTIME_S(inode->i_atime) = 0;
- LTIME_S(inode->i_ctime) = 0;
- inode->i_rdev = 0;
-
- inode->i_op = &ll_dir_inode_operations;
- inode->i_fop = &ll_dir_operations;
- lli->lli_fid = *fid;
- ll_lli_init(lli);
-
- LASSERT(lsm);
- /* master object FID */
- lli->lli_pfid = body->mbo_fid1;
- CDEBUG(D_INODE, "lli %p slave " DFID " master " DFID "\n",
- lli, PFID(fid), PFID(&lli->lli_pfid));
- unlock_new_inode(inode);
- }
-
- return inode;
-}
-
-static int ll_init_lsm_md(struct inode *inode, struct lustre_md *md)
-{
- struct lmv_stripe_md *lsm = md->lmv;
- struct lu_fid *fid;
- int i;
-
- LASSERT(lsm);
- /*
- * XXX sigh, this lsm_root initialization should be in
- * LMV layer, but it needs ll_iget right now, so we
- * put this here right now.
- */
- for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
- fid = &lsm->lsm_md_oinfo[i].lmo_fid;
- LASSERT(!lsm->lsm_md_oinfo[i].lmo_root);
- /* Unfortunately ll_iget will call ll_update_inode,
- * where the initialization of slave inode is slightly
- * different, so it reset lsm_md to NULL to avoid
- * initializing lsm for slave inode.
- */
- /* For migrating inode, master stripe and master object will
- * be same, so we only need assign this inode
- */
- if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION && !i)
- lsm->lsm_md_oinfo[i].lmo_root = inode;
- else
- lsm->lsm_md_oinfo[i].lmo_root =
- ll_iget_anon_dir(inode->i_sb, fid, md);
- if (IS_ERR(lsm->lsm_md_oinfo[i].lmo_root)) {
- int rc = PTR_ERR(lsm->lsm_md_oinfo[i].lmo_root);
-
- lsm->lsm_md_oinfo[i].lmo_root = NULL;
- return rc;
- }
- }
-
- return 0;
-}
-
-static inline int lli_lsm_md_eq(const struct lmv_stripe_md *lsm_md1,
- const struct lmv_stripe_md *lsm_md2)
-{
- return lsm_md1->lsm_md_magic == lsm_md2->lsm_md_magic &&
- lsm_md1->lsm_md_stripe_count == lsm_md2->lsm_md_stripe_count &&
- lsm_md1->lsm_md_master_mdt_index ==
- lsm_md2->lsm_md_master_mdt_index &&
- lsm_md1->lsm_md_hash_type == lsm_md2->lsm_md_hash_type &&
- lsm_md1->lsm_md_layout_version ==
- lsm_md2->lsm_md_layout_version &&
- !strcmp(lsm_md1->lsm_md_pool_name,
- lsm_md2->lsm_md_pool_name);
-}
-
-static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct lmv_stripe_md *lsm = md->lmv;
- int rc;
-
- LASSERT(S_ISDIR(inode->i_mode));
- CDEBUG(D_INODE, "update lsm %p of " DFID "\n", lli->lli_lsm_md,
- PFID(ll_inode2fid(inode)));
-
- /* no striped information from request. */
- if (!lsm) {
- if (!lli->lli_lsm_md) {
- return 0;
- } else if (lli->lli_lsm_md->lsm_md_hash_type &
- LMV_HASH_FLAG_MIGRATION) {
- /*
- * migration is done, the temporay MIGRATE layout has
- * been removed
- */
- CDEBUG(D_INODE, DFID " finish migration.\n",
- PFID(ll_inode2fid(inode)));
- lmv_free_memmd(lli->lli_lsm_md);
- lli->lli_lsm_md = NULL;
- return 0;
- }
- /*
- * The lustre_md from req does not include stripeEA,
- * see ll_md_setattr
- */
- return 0;
- }
-
- /* set the directory layout */
- if (!lli->lli_lsm_md) {
- struct cl_attr *attr;
-
- rc = ll_init_lsm_md(inode, md);
- if (rc)
- return rc;
-
- /*
- * set lsm_md to NULL, so the following free lustre_md
- * will not free this lsm
- */
- md->lmv = NULL;
- lli->lli_lsm_md = lsm;
-
- attr = kzalloc(sizeof(*attr), GFP_NOFS);
- if (!attr)
- return -ENOMEM;
-
- /* validate the lsm */
- rc = md_merge_attr(ll_i2mdexp(inode), lsm, attr,
- ll_md_blocking_ast);
- if (rc) {
- kfree(attr);
- return rc;
- }
-
- if (md->body->mbo_valid & OBD_MD_FLNLINK)
- md->body->mbo_nlink = attr->cat_nlink;
- if (md->body->mbo_valid & OBD_MD_FLSIZE)
- md->body->mbo_size = attr->cat_size;
- if (md->body->mbo_valid & OBD_MD_FLATIME)
- md->body->mbo_atime = attr->cat_atime;
- if (md->body->mbo_valid & OBD_MD_FLCTIME)
- md->body->mbo_ctime = attr->cat_ctime;
- if (md->body->mbo_valid & OBD_MD_FLMTIME)
- md->body->mbo_mtime = attr->cat_mtime;
-
- kfree(attr);
-
- CDEBUG(D_INODE, "Set lsm %p magic %x to " DFID "\n", lsm,
- lsm->lsm_md_magic, PFID(ll_inode2fid(inode)));
- return 0;
- }
-
- /* Compare the old and new stripe information */
- if (!lsm_md_eq(lli->lli_lsm_md, lsm)) {
- struct lmv_stripe_md *old_lsm = lli->lli_lsm_md;
- int idx;
-
- CERROR("%s: inode " DFID "(%p)'s lmv layout mismatch (%p)/(%p) magic:0x%x/0x%x stripe count: %d/%d master_mdt: %d/%d hash_type:0x%x/0x%x layout: 0x%x/0x%x pool:%s/%s\n",
- ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid),
- inode, lsm, old_lsm,
- lsm->lsm_md_magic, old_lsm->lsm_md_magic,
- lsm->lsm_md_stripe_count,
- old_lsm->lsm_md_stripe_count,
- lsm->lsm_md_master_mdt_index,
- old_lsm->lsm_md_master_mdt_index,
- lsm->lsm_md_hash_type, old_lsm->lsm_md_hash_type,
- lsm->lsm_md_layout_version,
- old_lsm->lsm_md_layout_version,
- lsm->lsm_md_pool_name,
- old_lsm->lsm_md_pool_name);
-
- for (idx = 0; idx < old_lsm->lsm_md_stripe_count; idx++) {
- CERROR("%s: sub FIDs in old lsm idx %d, old: " DFID "\n",
- ll_get_fsname(inode->i_sb, NULL, 0), idx,
- PFID(&old_lsm->lsm_md_oinfo[idx].lmo_fid));
- }
-
- for (idx = 0; idx < lsm->lsm_md_stripe_count; idx++) {
- CERROR("%s: sub FIDs in new lsm idx %d, new: " DFID "\n",
- ll_get_fsname(inode->i_sb, NULL, 0), idx,
- PFID(&lsm->lsm_md_oinfo[idx].lmo_fid));
- }
-
- return -EIO;
- }
-
- return 0;
-}
-
-void ll_clear_inode(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n",
- PFID(ll_inode2fid(inode)), inode);
-
- if (S_ISDIR(inode->i_mode)) {
- /* these should have been cleared in ll_file_release */
- LASSERT(!lli->lli_opendir_key);
- LASSERT(!lli->lli_sai);
- LASSERT(lli->lli_opendir_pid == 0);
- }
-
- md_null_inode(sbi->ll_md_exp, ll_inode2fid(inode));
-
- LASSERT(!lli->lli_open_fd_write_count);
- LASSERT(!lli->lli_open_fd_read_count);
- LASSERT(!lli->lli_open_fd_exec_count);
-
- if (lli->lli_mds_write_och)
- ll_md_real_close(inode, FMODE_WRITE);
- if (lli->lli_mds_exec_och)
- ll_md_real_close(inode, FMODE_EXEC);
- if (lli->lli_mds_read_och)
- ll_md_real_close(inode, FMODE_READ);
-
- if (S_ISLNK(inode->i_mode)) {
- kfree(lli->lli_symlink_name);
- lli->lli_symlink_name = NULL;
- }
-
- ll_xattr_cache_destroy(inode);
-
-#ifdef CONFIG_FS_POSIX_ACL
- forget_all_cached_acls(inode);
- if (lli->lli_posix_acl) {
- posix_acl_release(lli->lli_posix_acl);
- lli->lli_posix_acl = NULL;
- }
-#endif
- lli->lli_inode_magic = LLI_INODE_DEAD;
-
- if (S_ISDIR(inode->i_mode))
- ll_dir_clear_lsm_md(inode);
- if (S_ISREG(inode->i_mode) && !is_bad_inode(inode))
- LASSERT(list_empty(&lli->lli_agl_list));
-
- /*
- * XXX This has to be done before lsm is freed below, because
- * cl_object still uses inode lsm.
- */
- cl_inode_fini(inode);
-}
-
-#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
-
-static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data)
-{
- struct lustre_md md;
- struct inode *inode = d_inode(dentry);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *request = NULL;
- int rc, ia_valid;
-
- op_data = ll_prep_md_op_data(op_data, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, &request);
- if (rc) {
- ptlrpc_req_finished(request);
- if (rc == -ENOENT) {
- clear_nlink(inode);
- /* Unlinked special device node? Or just a race?
- * Pretend we did everything.
- */
- if (!S_ISREG(inode->i_mode) &&
- !S_ISDIR(inode->i_mode)) {
- ia_valid = op_data->op_attr.ia_valid;
- op_data->op_attr.ia_valid &= ~TIMES_SET_FLAGS;
- rc = simple_setattr(dentry, &op_data->op_attr);
- op_data->op_attr.ia_valid = ia_valid;
- }
- } else if (rc != -EPERM && rc != -EACCES && rc != -ETXTBSY) {
- CERROR("md_setattr fails: rc = %d\n", rc);
- }
- return rc;
- }
-
- rc = md_get_lustre_md(sbi->ll_md_exp, request, sbi->ll_dt_exp,
- sbi->ll_md_exp, &md);
- if (rc) {
- ptlrpc_req_finished(request);
- return rc;
- }
-
- ia_valid = op_data->op_attr.ia_valid;
- /* inode size will be in cl_setattr_ost, can't do it now since dirty
- * cache is not cleared yet.
- */
- op_data->op_attr.ia_valid &= ~(TIMES_SET_FLAGS | ATTR_SIZE);
- if (S_ISREG(inode->i_mode))
- inode_lock(inode);
- rc = simple_setattr(dentry, &op_data->op_attr);
- if (S_ISREG(inode->i_mode))
- inode_unlock(inode);
- op_data->op_attr.ia_valid = ia_valid;
-
- rc = ll_update_inode(inode, &md);
- ptlrpc_req_finished(request);
-
- return rc;
-}
-
-/* If this inode has objects allocated to it (lsm != NULL), then the OST
- * object(s) determine the file size and mtime. Otherwise, the MDS will
- * keep these values until such a time that objects are allocated for it.
- * We do the MDS operations first, as it is checking permissions for us.
- * We don't to the MDS RPC if there is nothing that we want to store there,
- * otherwise there is no harm in updating mtime/atime on the MDS if we are
- * going to do an RPC anyways.
- *
- * If we are doing a truncate, we will send the mtime and ctime updates
- * to the OST with the punch RPC, otherwise we do an explicit setattr RPC.
- * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
- * at the same time.
- *
- * In case of HSMimport, we only set attr on MDS.
- */
-int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
-{
- struct inode *inode = d_inode(dentry);
- struct ll_inode_info *lli = ll_i2info(inode);
- struct md_op_data *op_data = NULL;
- int rc = 0;
-
- CDEBUG(D_VFSTRACE, "%s: setattr inode " DFID "(%p) from %llu to %llu, valid %x, hsm_import %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid), inode,
- i_size_read(inode), attr->ia_size, attr->ia_valid, hsm_import);
-
- if (attr->ia_valid & ATTR_SIZE) {
- /* Check new size against VFS/VM file size limit and rlimit */
- rc = inode_newsize_ok(inode, attr->ia_size);
- if (rc)
- return rc;
-
- /* The maximum Lustre file size is variable, based on the
- * OST maximum object size and number of stripes. This
- * needs another check in addition to the VFS check above.
- */
- if (attr->ia_size > ll_file_maxbytes(inode)) {
- CDEBUG(D_INODE, "file " DFID " too large %llu > %llu\n",
- PFID(&lli->lli_fid), attr->ia_size,
- ll_file_maxbytes(inode));
- return -EFBIG;
- }
-
- attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
- }
-
- /* POSIX: check before ATTR_*TIME_SET set (from setattr_prepare) */
- if (attr->ia_valid & TIMES_SET_FLAGS) {
- if ((!uid_eq(current_fsuid(), inode->i_uid)) &&
- !capable(CAP_FOWNER))
- return -EPERM;
- }
-
- /* We mark all of the fields "set" so MDS/OST does not re-set them */
- if (attr->ia_valid & ATTR_CTIME) {
- attr->ia_ctime = current_time(inode);
- attr->ia_valid |= ATTR_CTIME_SET;
- }
- if (!(attr->ia_valid & ATTR_ATIME_SET) &&
- (attr->ia_valid & ATTR_ATIME)) {
- attr->ia_atime = current_time(inode);
- attr->ia_valid |= ATTR_ATIME_SET;
- }
- if (!(attr->ia_valid & ATTR_MTIME_SET) &&
- (attr->ia_valid & ATTR_MTIME)) {
- attr->ia_mtime = current_time(inode);
- attr->ia_valid |= ATTR_MTIME_SET;
- }
-
- if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
- CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %llu\n",
- LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
- (s64)ktime_get_real_seconds());
-
- if (S_ISREG(inode->i_mode))
- inode_unlock(inode);
-
- /*
- * We always do an MDS RPC, even if we're only changing the size;
- * only the MDS knows whether truncate() should fail with -ETXTBUSY
- */
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
- if (!op_data) {
- rc = -ENOMEM;
- goto out;
- }
-
- if (!hsm_import && attr->ia_valid & ATTR_SIZE) {
- /*
- * If we are changing file size, file content is
- * modified, flag it.
- */
- attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
- op_data->op_bias |= MDS_DATA_MODIFIED;
- clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags);
- }
-
- op_data->op_attr = *attr;
-
- rc = ll_md_setattr(dentry, op_data);
- if (rc)
- goto out;
-
- if (!S_ISREG(inode->i_mode) || hsm_import) {
- rc = 0;
- goto out;
- }
-
- if (attr->ia_valid & (ATTR_SIZE |
- ATTR_ATIME | ATTR_ATIME_SET |
- ATTR_MTIME | ATTR_MTIME_SET)) {
- /* For truncate and utimes sending attributes to OSTs, setting
- * mtime/atime to the past will be performed under PW [0:EOF]
- * extent lock (new_size:EOF for truncate). It may seem
- * excessive to send mtime/atime updates to OSTs when not
- * setting times to past, but it is necessary due to possible
- * time de-synchronization between MDT inode and OST objects
- */
- rc = cl_setattr_ost(ll_i2info(inode)->lli_clob, attr, 0);
- }
-
- /*
- * If the file was restored, it needs to set dirty flag.
- *
- * We've already sent MDS_DATA_MODIFIED flag in
- * ll_md_setattr() for truncate. However, the MDT refuses to
- * set the HS_DIRTY flag on released files, so we have to set
- * it again if the file has been restored. Please check how
- * LLIF_DATA_MODIFIED is set in vvp_io_setattr_fini().
- *
- * Please notice that if the file is not released, the previous
- * MDS_DATA_MODIFIED has taken effect and usually
- * LLIF_DATA_MODIFIED is not set(see vvp_io_setattr_fini()).
- * This way we can save an RPC for common open + trunc
- * operation.
- */
- if (test_and_clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags)) {
- struct hsm_state_set hss = {
- .hss_valid = HSS_SETMASK,
- .hss_setmask = HS_DIRTY,
- };
- int rc2;
-
- rc2 = ll_hsm_state_set(inode, &hss);
- /*
- * truncate and write can happen at the same time, so that
- * the file can be set modified even though the file is not
- * restored from released state, and ll_hsm_state_set() is
- * not applicable for the file, and rc2 < 0 is normal in this
- * case.
- */
- if (rc2 < 0)
- CDEBUG(D_INFO, DFID "HSM set dirty failed: rc2 = %d\n",
- PFID(ll_inode2fid(inode)), rc2);
- }
-
-out:
- if (op_data)
- ll_finish_md_op_data(op_data);
-
- if (S_ISREG(inode->i_mode)) {
- inode_lock(inode);
- if ((attr->ia_valid & ATTR_SIZE) && !hsm_import)
- inode_dio_wait(inode);
- }
-
- ll_stats_ops_tally(ll_i2sbi(inode), (attr->ia_valid & ATTR_SIZE) ?
- LPROC_LL_TRUNC : LPROC_LL_SETATTR, 1);
-
- return rc;
-}
-
-int ll_setattr(struct dentry *de, struct iattr *attr)
-{
- int mode = d_inode(de)->i_mode;
-
- if ((attr->ia_valid & (ATTR_CTIME | ATTR_SIZE | ATTR_MODE)) ==
- (ATTR_CTIME | ATTR_SIZE | ATTR_MODE))
- attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
-
- if (((attr->ia_valid & (ATTR_MODE | ATTR_FORCE | ATTR_SIZE)) ==
- (ATTR_SIZE | ATTR_MODE)) &&
- (((mode & S_ISUID) && !(attr->ia_mode & S_ISUID)) ||
- (((mode & (S_ISGID | 0010)) == (S_ISGID | 0010)) &&
- !(attr->ia_mode & S_ISGID))))
- attr->ia_valid |= ATTR_FORCE;
-
- if ((attr->ia_valid & ATTR_MODE) &&
- (mode & S_ISUID) &&
- !(attr->ia_mode & S_ISUID) &&
- !(attr->ia_valid & ATTR_KILL_SUID))
- attr->ia_valid |= ATTR_KILL_SUID;
-
- if ((attr->ia_valid & ATTR_MODE) &&
- ((mode & (S_ISGID | 0010)) == (S_ISGID | 0010)) &&
- !(attr->ia_mode & S_ISGID) &&
- !(attr->ia_valid & ATTR_KILL_SGID))
- attr->ia_valid |= ATTR_KILL_SGID;
-
- return ll_setattr_raw(de, attr, false);
-}
-
-int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
- __u64 max_age, __u32 flags)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct obd_statfs obd_osfs;
- int rc;
-
- rc = obd_statfs(NULL, sbi->ll_md_exp, osfs, max_age, flags);
- if (rc) {
- CERROR("md_statfs fails: rc = %d\n", rc);
- return rc;
- }
-
- osfs->os_type = sb->s_magic;
-
- CDEBUG(D_SUPER, "MDC blocks %llu/%llu objects %llu/%llu\n",
- osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,
- osfs->os_files);
-
- if (sbi->ll_flags & LL_SBI_LAZYSTATFS)
- flags |= OBD_STATFS_NODELAY;
-
- rc = obd_statfs_rqset(sbi->ll_dt_exp, &obd_osfs, max_age, flags);
- if (rc) {
- CERROR("obd_statfs fails: rc = %d\n", rc);
- return rc;
- }
-
- CDEBUG(D_SUPER, "OSC blocks %llu/%llu objects %llu/%llu\n",
- obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
- obd_osfs.os_files);
-
- osfs->os_bsize = obd_osfs.os_bsize;
- osfs->os_blocks = obd_osfs.os_blocks;
- osfs->os_bfree = obd_osfs.os_bfree;
- osfs->os_bavail = obd_osfs.os_bavail;
-
- /* If we don't have as many objects free on the OST as inodes
- * on the MDS, we reduce the total number of inodes to
- * compensate, so that the "inodes in use" number is correct.
- */
- if (obd_osfs.os_ffree < osfs->os_ffree) {
- osfs->os_files = (osfs->os_files - osfs->os_ffree) +
- obd_osfs.os_ffree;
- osfs->os_ffree = obd_osfs.os_ffree;
- }
-
- return rc;
-}
-
-int ll_statfs(struct dentry *de, struct kstatfs *sfs)
-{
- struct super_block *sb = de->d_sb;
- struct obd_statfs osfs;
- int rc;
-
- CDEBUG(D_VFSTRACE, "VFS Op: at %llu jiffies\n", get_jiffies_64());
- ll_stats_ops_tally(ll_s2sbi(sb), LPROC_LL_STAFS, 1);
-
- /* Some amount of caching on the client is allowed */
- rc = ll_statfs_internal(sb, &osfs,
- get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ,
- 0);
- if (rc)
- return rc;
-
- statfs_unpack(sfs, &osfs);
-
- /* We need to downshift for all 32-bit kernels, because we can't
- * tell if the kernel is being called via sys_statfs64() or not.
- * Stop before overflowing f_bsize - in which case it is better
- * to just risk EOVERFLOW if caller is using old sys_statfs().
- */
- if (sizeof(long) < 8) {
- while (osfs.os_blocks > ~0UL && sfs->f_bsize < 0x40000000) {
- sfs->f_bsize <<= 1;
-
- osfs.os_blocks >>= 1;
- osfs.os_bfree >>= 1;
- osfs.os_bavail >>= 1;
- }
- }
-
- sfs->f_blocks = osfs.os_blocks;
- sfs->f_bfree = osfs.os_bfree;
- sfs->f_bavail = osfs.os_bavail;
- sfs->f_fsid = ll_s2sbi(sb)->ll_fsid;
- return 0;
-}
-
-void ll_inode_size_lock(struct inode *inode)
-{
- struct ll_inode_info *lli;
-
- LASSERT(!S_ISDIR(inode->i_mode));
-
- lli = ll_i2info(inode);
- mutex_lock(&lli->lli_size_mutex);
-}
-
-void ll_inode_size_unlock(struct inode *inode)
-{
- struct ll_inode_info *lli;
-
- lli = ll_i2info(inode);
- mutex_unlock(&lli->lli_size_mutex);
-}
-
-int ll_update_inode(struct inode *inode, struct lustre_md *md)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct mdt_body *body = md->body;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
-
- if (body->mbo_valid & OBD_MD_FLEASIZE)
- cl_file_inode_init(inode, md);
-
- if (S_ISDIR(inode->i_mode)) {
- int rc;
-
- rc = ll_update_lsm_md(inode, md);
- if (rc)
- return rc;
- }
-
-#ifdef CONFIG_FS_POSIX_ACL
- if (body->mbo_valid & OBD_MD_FLACL) {
- spin_lock(&lli->lli_lock);
- if (lli->lli_posix_acl)
- posix_acl_release(lli->lli_posix_acl);
- lli->lli_posix_acl = md->posix_acl;
- spin_unlock(&lli->lli_lock);
- }
-#endif
- inode->i_ino = cl_fid_build_ino(&body->mbo_fid1,
- sbi->ll_flags & LL_SBI_32BIT_API);
- inode->i_generation = cl_fid_build_gen(&body->mbo_fid1);
-
- if (body->mbo_valid & OBD_MD_FLATIME) {
- if (body->mbo_atime > LTIME_S(inode->i_atime))
- LTIME_S(inode->i_atime) = body->mbo_atime;
- lli->lli_atime = body->mbo_atime;
- }
- if (body->mbo_valid & OBD_MD_FLMTIME) {
- if (body->mbo_mtime > LTIME_S(inode->i_mtime)) {
- CDEBUG(D_INODE,
- "setting ino %lu mtime from %lu to %llu\n",
- inode->i_ino, LTIME_S(inode->i_mtime),
- body->mbo_mtime);
- LTIME_S(inode->i_mtime) = body->mbo_mtime;
- }
- lli->lli_mtime = body->mbo_mtime;
- }
- if (body->mbo_valid & OBD_MD_FLCTIME) {
- if (body->mbo_ctime > LTIME_S(inode->i_ctime))
- LTIME_S(inode->i_ctime) = body->mbo_ctime;
- lli->lli_ctime = body->mbo_ctime;
- }
- if (body->mbo_valid & OBD_MD_FLMODE)
- inode->i_mode = (inode->i_mode & S_IFMT) |
- (body->mbo_mode & ~S_IFMT);
- if (body->mbo_valid & OBD_MD_FLTYPE)
- inode->i_mode = (inode->i_mode & ~S_IFMT) |
- (body->mbo_mode & S_IFMT);
- LASSERT(inode->i_mode != 0);
- if (S_ISREG(inode->i_mode))
- inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS + 1,
- LL_MAX_BLKSIZE_BITS);
- else
- inode->i_blkbits = inode->i_sb->s_blocksize_bits;
- if (body->mbo_valid & OBD_MD_FLUID)
- inode->i_uid = make_kuid(&init_user_ns, body->mbo_uid);
- if (body->mbo_valid & OBD_MD_FLGID)
- inode->i_gid = make_kgid(&init_user_ns, body->mbo_gid);
- if (body->mbo_valid & OBD_MD_FLFLAGS)
- inode->i_flags = ll_ext_to_inode_flags(body->mbo_flags);
- if (body->mbo_valid & OBD_MD_FLNLINK)
- set_nlink(inode, body->mbo_nlink);
- if (body->mbo_valid & OBD_MD_FLRDEV)
- inode->i_rdev = old_decode_dev(body->mbo_rdev);
-
- if (body->mbo_valid & OBD_MD_FLID) {
- /* FID shouldn't be changed! */
- if (fid_is_sane(&lli->lli_fid)) {
- LASSERTF(lu_fid_eq(&lli->lli_fid, &body->mbo_fid1),
- "Trying to change FID " DFID " to the " DFID ", inode " DFID "(%p)\n",
- PFID(&lli->lli_fid), PFID(&body->mbo_fid1),
- PFID(ll_inode2fid(inode)), inode);
- } else {
- lli->lli_fid = body->mbo_fid1;
- }
- }
-
- LASSERT(fid_seq(&lli->lli_fid) != 0);
-
- if (body->mbo_valid & OBD_MD_FLSIZE) {
- i_size_write(inode, body->mbo_size);
-
- CDEBUG(D_VFSTRACE, "inode=" DFID ", updating i_size %llu\n",
- PFID(ll_inode2fid(inode)),
- (unsigned long long)body->mbo_size);
-
- if (body->mbo_valid & OBD_MD_FLBLOCKS)
- inode->i_blocks = body->mbo_blocks;
- }
-
- if (body->mbo_valid & OBD_MD_TSTATE) {
- if (body->mbo_t_state & MS_RESTORE)
- set_bit(LLIF_FILE_RESTORING, &lli->lli_flags);
- }
-
- return 0;
-}
-
-int ll_read_inode2(struct inode *inode, void *opaque)
-{
- struct lustre_md *md = opaque;
- struct ll_inode_info *lli = ll_i2info(inode);
- int rc;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n",
- PFID(&lli->lli_fid), inode);
-
- /* Core attributes from the MDS first. This is a new inode, and
- * the VFS doesn't zero times in the core inode so we have to do
- * it ourselves. They will be overwritten by either MDS or OST
- * attributes - we just need to make sure they aren't newer.
- */
- LTIME_S(inode->i_mtime) = 0;
- LTIME_S(inode->i_atime) = 0;
- LTIME_S(inode->i_ctime) = 0;
- inode->i_rdev = 0;
- rc = ll_update_inode(inode, md);
- if (rc)
- return rc;
-
- /* OIDEBUG(inode); */
-
- if (S_ISREG(inode->i_mode)) {
- struct ll_sb_info *sbi = ll_i2sbi(inode);
-
- inode->i_op = &ll_file_inode_operations;
- inode->i_fop = sbi->ll_fop;
- inode->i_mapping->a_ops = (struct address_space_operations *)&ll_aops;
- } else if (S_ISDIR(inode->i_mode)) {
- inode->i_op = &ll_dir_inode_operations;
- inode->i_fop = &ll_dir_operations;
- } else if (S_ISLNK(inode->i_mode)) {
- inode->i_op = &ll_fast_symlink_inode_operations;
- } else {
- inode->i_op = &ll_special_inode_operations;
-
- init_special_inode(inode, inode->i_mode,
- inode->i_rdev);
- }
-
- return 0;
-}
-
-void ll_delete_inode(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
-
- if (S_ISREG(inode->i_mode) && lli->lli_clob)
- /* discard all dirty pages before truncating them, required by
- * osc_extent implementation at LU-1030.
- */
- cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
- CL_FSYNC_LOCAL, 1);
-
- truncate_inode_pages_final(&inode->i_data);
-
- LASSERTF(!inode->i_data.nrpages,
- "inode=" DFID "(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n",
- PFID(ll_inode2fid(inode)), inode, inode->i_data.nrpages);
-
- ll_clear_inode(inode);
- clear_inode(inode);
-}
-
-int ll_iocontrol(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *req = NULL;
- int rc, flags = 0;
-
- switch (cmd) {
- case FSFILT_IOC_GETFLAGS: {
- struct mdt_body *body;
- struct md_op_data *op_data;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
- 0, 0, LUSTRE_OPC_ANY,
- NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- op_data->op_valid = OBD_MD_FLFLAGS;
- rc = md_getattr(sbi->ll_md_exp, op_data, &req);
- ll_finish_md_op_data(op_data);
- if (rc) {
- CERROR("%s: failure inode " DFID ": rc = %d\n",
- sbi->ll_md_exp->exp_obd->obd_name,
- PFID(ll_inode2fid(inode)), rc);
- return -abs(rc);
- }
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
-
- flags = body->mbo_flags;
-
- ptlrpc_req_finished(req);
-
- return put_user(flags, (int __user *)arg);
- }
- case FSFILT_IOC_SETFLAGS: {
- struct md_op_data *op_data;
- struct cl_object *obj;
- struct iattr *attr;
-
- if (get_user(flags, (int __user *)arg))
- return -EFAULT;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- op_data->op_attr_flags = flags;
- op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG;
- rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, &req);
- ll_finish_md_op_data(op_data);
- ptlrpc_req_finished(req);
- if (rc)
- return rc;
-
- inode->i_flags = ll_ext_to_inode_flags(flags);
-
- obj = ll_i2info(inode)->lli_clob;
- if (!obj)
- return 0;
-
- attr = kzalloc(sizeof(*attr), GFP_NOFS);
- if (!attr)
- return -ENOMEM;
-
- attr->ia_valid = ATTR_ATTR_FLAG;
- rc = cl_setattr_ost(obj, attr, flags);
- kfree(attr);
- return rc;
- }
- default:
- return -ENOSYS;
- }
-
- return 0;
-}
-
-int ll_flush_ctx(struct inode *inode)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
-
- CDEBUG(D_SEC, "flush context for user %d\n",
- from_kuid(&init_user_ns, current_uid()));
-
- obd_set_info_async(NULL, sbi->ll_md_exp,
- sizeof(KEY_FLUSH_CTX), KEY_FLUSH_CTX,
- 0, NULL, NULL);
- obd_set_info_async(NULL, sbi->ll_dt_exp,
- sizeof(KEY_FLUSH_CTX), KEY_FLUSH_CTX,
- 0, NULL, NULL);
- return 0;
-}
-
-/* umount -f client means force down, don't save state */
-void ll_umount_begin(struct super_block *sb)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct obd_device *obd;
- struct obd_ioctl_data *ioc_data;
- int cnt = 0;
-
- CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb,
- sb->s_count, atomic_read(&sb->s_active));
-
- obd = class_exp2obd(sbi->ll_md_exp);
- if (!obd) {
- CERROR("Invalid MDC connection handle %#llx\n",
- sbi->ll_md_exp->exp_handle.h_cookie);
- return;
- }
- obd->obd_force = 1;
-
- obd = class_exp2obd(sbi->ll_dt_exp);
- if (!obd) {
- CERROR("Invalid LOV connection handle %#llx\n",
- sbi->ll_dt_exp->exp_handle.h_cookie);
- return;
- }
- obd->obd_force = 1;
-
- ioc_data = kzalloc(sizeof(*ioc_data), GFP_NOFS);
- if (ioc_data) {
- obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_md_exp,
- sizeof(*ioc_data), ioc_data, NULL);
-
- obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_dt_exp,
- sizeof(*ioc_data), ioc_data, NULL);
-
- kfree(ioc_data);
- }
-
- /* Really, we'd like to wait until there are no requests outstanding,
- * and then continue. For now, we just periodically checking for vfs
- * to decrement mnt_cnt and hope to finish it within 10sec.
- */
- while (cnt < 10 && !may_umount(sbi->ll_mnt.mnt)) {
- schedule_timeout_uninterruptible(HZ);
- cnt++;
- }
-
- schedule();
-}
-
-int ll_remount_fs(struct super_block *sb, int *flags, char *data)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- char *profilenm = get_profile_name(sb);
- int err;
- __u32 read_only;
-
- if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
- read_only = *flags & SB_RDONLY;
- err = obd_set_info_async(NULL, sbi->ll_md_exp,
- sizeof(KEY_READ_ONLY),
- KEY_READ_ONLY, sizeof(read_only),
- &read_only, NULL);
- if (err) {
- LCONSOLE_WARN("Failed to remount %s %s (%d)\n",
- profilenm, read_only ?
- "read-only" : "read-write", err);
- return err;
- }
-
- if (read_only)
- sb->s_flags |= SB_RDONLY;
- else
- sb->s_flags &= ~SB_RDONLY;
-
- if (sbi->ll_flags & LL_SBI_VERBOSE)
- LCONSOLE_WARN("Remounted %s %s\n", profilenm,
- read_only ? "read-only" : "read-write");
- }
- return 0;
-}
-
-/**
- * Cleanup the open handle that is cached on MDT-side.
- *
- * For open case, the client side open handling thread may hit error
- * after the MDT grant the open. Under such case, the client should
- * send close RPC to the MDT as cleanup; otherwise, the open handle
- * on the MDT will be leaked there until the client umount or evicted.
- *
- * In further, if someone unlinked the file, because the open handle
- * holds the reference on such file/object, then it will block the
- * subsequent threads that want to locate such object via FID.
- *
- * \param[in] sb super block for this file-system
- * \param[in] open_req pointer to the original open request
- */
-void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req)
-{
- struct mdt_body *body;
- struct md_op_data *op_data;
- struct ptlrpc_request *close_req = NULL;
- struct obd_export *exp = ll_s2sbi(sb)->ll_md_exp;
-
- body = req_capsule_server_get(&open_req->rq_pill, &RMF_MDT_BODY);
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
- if (!op_data)
- return;
-
- op_data->op_fid1 = body->mbo_fid1;
- op_data->op_handle = body->mbo_handle;
- op_data->op_mod_time = get_seconds();
- md_close(exp, op_data, NULL, &close_req);
- ptlrpc_req_finished(close_req);
- ll_finish_md_op_data(op_data);
-}
-
-int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
- struct super_block *sb, struct lookup_intent *it)
-{
- struct ll_sb_info *sbi = NULL;
- struct lustre_md md = { NULL };
- int rc;
-
- LASSERT(*inode || sb);
- sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode);
- rc = md_get_lustre_md(sbi->ll_md_exp, req, sbi->ll_dt_exp,
- sbi->ll_md_exp, &md);
- if (rc)
- goto cleanup;
-
- if (*inode) {
- rc = ll_update_inode(*inode, &md);
- if (rc)
- goto out;
- } else {
- LASSERT(sb);
-
- /*
- * At this point server returns to client's same fid as client
- * generated for creating. So using ->fid1 is okay here.
- */
- if (!fid_is_sane(&md.body->mbo_fid1)) {
- CERROR("%s: Fid is insane " DFID "\n",
- ll_get_fsname(sb, NULL, 0),
- PFID(&md.body->mbo_fid1));
- rc = -EINVAL;
- goto out;
- }
-
- *inode = ll_iget(sb, cl_fid_build_ino(&md.body->mbo_fid1,
- sbi->ll_flags & LL_SBI_32BIT_API),
- &md);
- if (IS_ERR(*inode)) {
-#ifdef CONFIG_FS_POSIX_ACL
- if (md.posix_acl) {
- posix_acl_release(md.posix_acl);
- md.posix_acl = NULL;
- }
-#endif
- rc = PTR_ERR(*inode);
- CERROR("new_inode -fatal: rc %d\n", rc);
- goto out;
- }
- }
-
- /* Handling piggyback layout lock.
- * Layout lock can be piggybacked by getattr and open request.
- * The lsm can be applied to inode only if it comes with a layout lock
- * otherwise correct layout may be overwritten, for example:
- * 1. proc1: mdt returns a lsm but not granting layout
- * 2. layout was changed by another client
- * 3. proc2: refresh layout and layout lock granted
- * 4. proc1: to apply a stale layout
- */
- if (it && it->it_lock_mode != 0) {
- struct lustre_handle lockh;
- struct ldlm_lock *lock;
-
- lockh.cookie = it->it_lock_handle;
- lock = ldlm_handle2lock(&lockh);
- LASSERT(lock);
- if (ldlm_has_layout(lock)) {
- struct cl_object_conf conf;
-
- memset(&conf, 0, sizeof(conf));
- conf.coc_opc = OBJECT_CONF_SET;
- conf.coc_inode = *inode;
- conf.coc_lock = lock;
- conf.u.coc_layout = md.layout;
- (void)ll_layout_conf(*inode, &conf);
- }
- LDLM_LOCK_PUT(lock);
- }
-
-out:
- md_free_lustre_md(sbi->ll_md_exp, &md);
-cleanup:
- if (rc != 0 && it && it->it_op & IT_OPEN)
- ll_open_cleanup(sb ? sb : (*inode)->i_sb, req);
-
- return rc;
-}
-
-int ll_obd_statfs(struct inode *inode, void __user *arg)
-{
- struct ll_sb_info *sbi = NULL;
- struct obd_export *exp;
- char *buf = NULL;
- struct obd_ioctl_data *data = NULL;
- __u32 type;
- int len = 0, rc;
-
- if (!inode) {
- rc = -EINVAL;
- goto out_statfs;
- }
-
- sbi = ll_i2sbi(inode);
- if (!sbi) {
- rc = -EINVAL;
- goto out_statfs;
- }
-
- rc = obd_ioctl_getdata(&buf, &len, arg);
- if (rc)
- goto out_statfs;
-
- data = (void *)buf;
- if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 ||
- !data->ioc_pbuf1 || !data->ioc_pbuf2) {
- rc = -EINVAL;
- goto out_statfs;
- }
-
- if (data->ioc_inllen1 != sizeof(__u32) ||
- data->ioc_inllen2 != sizeof(__u32) ||
- data->ioc_plen1 != sizeof(struct obd_statfs) ||
- data->ioc_plen2 != sizeof(struct obd_uuid)) {
- rc = -EINVAL;
- goto out_statfs;
- }
-
- memcpy(&type, data->ioc_inlbuf1, sizeof(__u32));
- if (type & LL_STATFS_LMV) {
- exp = sbi->ll_md_exp;
- } else if (type & LL_STATFS_LOV) {
- exp = sbi->ll_dt_exp;
- } else {
- rc = -ENODEV;
- goto out_statfs;
- }
-
- rc = obd_iocontrol(IOC_OBD_STATFS, exp, len, buf, NULL);
- if (rc)
- goto out_statfs;
-out_statfs:
- kvfree(buf);
- return rc;
-}
-
-int ll_process_config(struct lustre_cfg *lcfg)
-{
- char *ptr;
- void *sb;
- struct lprocfs_static_vars lvars;
- unsigned long x;
- int rc = 0;
-
- lprocfs_llite_init_vars(&lvars);
-
- /* The instance name contains the sb: lustre-client-aacfe000 */
- ptr = strrchr(lustre_cfg_string(lcfg, 0), '-');
- if (!ptr || !*(++ptr))
- return -EINVAL;
- rc = kstrtoul(ptr, 16, &x);
- if (rc != 0)
- return -EINVAL;
- sb = (void *)x;
- /* This better be a real Lustre superblock! */
- LASSERT(s2lsi((struct super_block *)sb)->lsi_lmd->lmd_magic ==
- LMD_MAGIC);
-
- /* Note we have not called client_common_fill_super yet, so
- * proc fns must be able to handle that!
- */
- rc = class_process_proc_param(PARAM_LLITE, lvars.obd_vars,
- lcfg, sb);
- if (rc > 0)
- rc = 0;
- return rc;
-}
-
-/* this function prepares md_op_data hint for passing ot down to MD stack. */
-struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
- struct inode *i1, struct inode *i2,
- const char *name, size_t namelen,
- u32 mode, __u32 opc, void *data)
-{
- if (!name) {
- /* Do not reuse namelen for something else. */
- if (namelen)
- return ERR_PTR(-EINVAL);
- } else {
- if (namelen > ll_i2sbi(i1)->ll_namelen)
- return ERR_PTR(-ENAMETOOLONG);
-
- if (!lu_name_is_valid_2(name, namelen))
- return ERR_PTR(-EINVAL);
- }
-
- if (!op_data)
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
-
- if (!op_data)
- return ERR_PTR(-ENOMEM);
-
- ll_i2gids(op_data->op_suppgids, i1, i2);
- op_data->op_fid1 = *ll_inode2fid(i1);
- op_data->op_default_stripe_offset = -1;
- if (S_ISDIR(i1->i_mode)) {
- op_data->op_mea1 = ll_i2info(i1)->lli_lsm_md;
- if (opc == LUSTRE_OPC_MKDIR)
- op_data->op_default_stripe_offset =
- ll_i2info(i1)->lli_def_stripe_offset;
- }
-
- if (i2) {
- op_data->op_fid2 = *ll_inode2fid(i2);
- if (S_ISDIR(i2->i_mode))
- op_data->op_mea2 = ll_i2info(i2)->lli_lsm_md;
- } else {
- fid_zero(&op_data->op_fid2);
- }
-
- if (ll_i2sbi(i1)->ll_flags & LL_SBI_64BIT_HASH)
- op_data->op_cli_flags |= CLI_HASH64;
-
- if (ll_need_32bit_api(ll_i2sbi(i1)))
- op_data->op_cli_flags |= CLI_API32;
-
- op_data->op_name = name;
- op_data->op_namelen = namelen;
- op_data->op_mode = mode;
- op_data->op_mod_time = ktime_get_real_seconds();
- op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid());
- op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid());
- op_data->op_cap = current_cap();
- if ((opc == LUSTRE_OPC_CREATE) && name &&
- filename_is_volatile(name, namelen, &op_data->op_mds))
- op_data->op_bias |= MDS_CREATE_VOLATILE;
- else
- op_data->op_mds = 0;
- op_data->op_data = data;
-
- return op_data;
-}
-
-void ll_finish_md_op_data(struct md_op_data *op_data)
-{
- kfree(op_data);
-}
-
-int ll_show_options(struct seq_file *seq, struct dentry *dentry)
-{
- struct ll_sb_info *sbi;
-
- LASSERT(seq && dentry);
- sbi = ll_s2sbi(dentry->d_sb);
-
- if (sbi->ll_flags & LL_SBI_NOLCK)
- seq_puts(seq, ",nolock");
-
- if (sbi->ll_flags & LL_SBI_FLOCK)
- seq_puts(seq, ",flock");
-
- if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
- seq_puts(seq, ",localflock");
-
- if (sbi->ll_flags & LL_SBI_USER_XATTR)
- seq_puts(seq, ",user_xattr");
-
- if (sbi->ll_flags & LL_SBI_LAZYSTATFS)
- seq_puts(seq, ",lazystatfs");
-
- if (sbi->ll_flags & LL_SBI_USER_FID2PATH)
- seq_puts(seq, ",user_fid2path");
-
- if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
- seq_puts(seq, ",always_ping");
-
- return 0;
-}
-
-/**
- * Get obd name by cmd, and copy out to user space
- */
-int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct obd_device *obd;
-
- if (cmd == OBD_IOC_GETDTNAME)
- obd = class_exp2obd(sbi->ll_dt_exp);
- else if (cmd == OBD_IOC_GETMDNAME)
- obd = class_exp2obd(sbi->ll_md_exp);
- else
- return -EINVAL;
-
- if (!obd)
- return -ENOENT;
-
- if (copy_to_user((void __user *)arg, obd->obd_name,
- strlen(obd->obd_name) + 1))
- return -EFAULT;
-
- return 0;
-}
-
-/**
- * Get lustre file system name by \a sbi. If \a buf is provided(non-NULL), the
- * fsname will be returned in this buffer; otherwise, a static buffer will be
- * used to store the fsname and returned to caller.
- */
-char *ll_get_fsname(struct super_block *sb, char *buf, int buflen)
-{
- static char fsname_static[MTI_NAME_MAXLEN];
- struct lustre_sb_info *lsi = s2lsi(sb);
- char *ptr;
- int len;
-
- if (!buf) {
- /* this means the caller wants to use static buffer
- * and it doesn't care about race. Usually this is
- * in error reporting path
- */
- buf = fsname_static;
- buflen = sizeof(fsname_static);
- }
-
- len = strlen(lsi->lsi_lmd->lmd_profile);
- ptr = strrchr(lsi->lsi_lmd->lmd_profile, '-');
- if (ptr && (strcmp(ptr, "-client") == 0))
- len -= 7;
-
- if (unlikely(len >= buflen))
- len = buflen - 1;
- strncpy(buf, lsi->lsi_lmd->lmd_profile, len);
- buf[len] = '\0';
-
- return buf;
-}
-
-void ll_dirty_page_discard_warn(struct page *page, int ioret)
-{
- char *buf, *path = NULL;
- struct dentry *dentry = NULL;
- struct vvp_object *obj = cl_inode2vvp(page->mapping->host);
-
- /* this can be called inside spin lock so use GFP_ATOMIC. */
- buf = (char *)__get_free_page(GFP_ATOMIC);
- if (buf) {
- dentry = d_find_alias(page->mapping->host);
- if (dentry)
- path = dentry_path_raw(dentry, buf, PAGE_SIZE);
- }
-
- CDEBUG(D_WARNING,
- "%s: dirty page discard: %s/fid: " DFID "/%s may get corrupted (rc %d)\n",
- ll_get_fsname(page->mapping->host->i_sb, NULL, 0),
- s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev,
- PFID(&obj->vob_header.coh_lu.loh_fid),
- (path && !IS_ERR(path)) ? path : "", ioret);
-
- if (dentry)
- dput(dentry);
-
- if (buf)
- free_page((unsigned long)buf);
-}
-
-ssize_t ll_copy_user_md(const struct lov_user_md __user *md,
- struct lov_user_md **kbuf)
-{
- struct lov_user_md lum;
- ssize_t lum_size;
-
- if (copy_from_user(&lum, md, sizeof(lum))) {
- lum_size = -EFAULT;
- goto no_kbuf;
- }
-
- lum_size = ll_lov_user_md_size(&lum);
- if (lum_size < 0)
- goto no_kbuf;
-
- *kbuf = kzalloc(lum_size, GFP_NOFS);
- if (!*kbuf) {
- lum_size = -ENOMEM;
- goto no_kbuf;
- }
-
- if (copy_from_user(*kbuf, md, lum_size) != 0) {
- kfree(*kbuf);
- *kbuf = NULL;
- lum_size = -EFAULT;
- }
-no_kbuf:
- return lum_size;
-}
-
-/*
- * Compute llite root squash state after a change of root squash
- * configuration setting or add/remove of a lnet nid
- */
-void ll_compute_rootsquash_state(struct ll_sb_info *sbi)
-{
- struct root_squash_info *squash = &sbi->ll_squash;
- struct lnet_process_id id;
- bool matched;
- int i;
-
- /* Update norootsquash flag */
- down_write(&squash->rsi_sem);
- if (list_empty(&squash->rsi_nosquash_nids)) {
- sbi->ll_flags &= ~LL_SBI_NOROOTSQUASH;
- } else {
- /*
- * Do not apply root squash as soon as one of our NIDs is
- * in the nosquash_nids list
- */
- matched = false;
- i = 0;
-
- while (LNetGetId(i++, &id) != -ENOENT) {
- if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
- continue;
- if (cfs_match_nid(id.nid, &squash->rsi_nosquash_nids)) {
- matched = true;
- break;
- }
- }
- if (matched)
- sbi->ll_flags |= LL_SBI_NOROOTSQUASH;
- else
- sbi->ll_flags &= ~LL_SBI_NOROOTSQUASH;
- }
- up_write(&squash->rsi_sem);
-}
-
-/**
- * Parse linkea content to extract information about a given hardlink
- *
- * \param[in] ldata - Initialized linkea data
- * \param[in] linkno - Link identifier
- * \param[out] parent_fid - The entry's parent FID
- * \param[in] size - Entry name destination buffer
- *
- * \retval 0 on success
- * \retval Appropriate negative error code on failure
- */
-static int ll_linkea_decode(struct linkea_data *ldata, unsigned int linkno,
- struct lu_fid *parent_fid, struct lu_name *ln)
-{
- unsigned int idx;
- int rc;
-
- rc = linkea_init_with_rec(ldata);
- if (rc < 0)
- return rc;
-
- if (linkno >= ldata->ld_leh->leh_reccount)
- /* beyond last link */
- return -ENODATA;
-
- linkea_first_entry(ldata);
- for (idx = 0; ldata->ld_lee; idx++) {
- linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen, ln,
- parent_fid);
- if (idx == linkno)
- break;
-
- linkea_next_entry(ldata);
- }
-
- if (idx < linkno)
- return -ENODATA;
-
- return 0;
-}
-
-/**
- * Get parent FID and name of an identified link. Operation is performed for
- * a given link number, letting the caller iterate over linkno to list one or
- * all links of an entry.
- *
- * \param[in] file - File descriptor against which to perform the operation
- * \param[in,out] arg - User-filled structure containing the linkno to operate
- * on and the available size. It is eventually filled
- * with the requested information or left untouched on
- * error
- *
- * \retval - 0 on success
- * \retval - Appropriate negative error code on failure
- */
-int ll_getparent(struct file *file, struct getparent __user *arg)
-{
- struct inode *inode = file_inode(file);
- struct linkea_data *ldata;
- struct lu_fid parent_fid;
- struct lu_buf buf = {
- .lb_buf = NULL,
- .lb_len = 0
- };
- struct lu_name ln;
- u32 name_size;
- u32 linkno;
- int rc;
-
- if (!capable(CAP_DAC_READ_SEARCH) &&
- !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
- return -EPERM;
-
- if (get_user(name_size, &arg->gp_name_size))
- return -EFAULT;
-
- if (get_user(linkno, &arg->gp_linkno))
- return -EFAULT;
-
- if (name_size > PATH_MAX)
- return -EINVAL;
-
- ldata = kzalloc(sizeof(*ldata), GFP_NOFS);
- if (!ldata)
- return -ENOMEM;
-
- rc = linkea_data_new(ldata, &buf);
- if (rc < 0)
- goto ldata_free;
-
- rc = ll_xattr_list(inode, XATTR_NAME_LINK, XATTR_TRUSTED_T, buf.lb_buf,
- buf.lb_len, OBD_MD_FLXATTR);
- if (rc < 0)
- goto lb_free;
-
- rc = ll_linkea_decode(ldata, linkno, &parent_fid, &ln);
- if (rc < 0)
- goto lb_free;
-
- if (ln.ln_namelen >= name_size) {
- rc = -EOVERFLOW;
- goto lb_free;
- }
-
- if (copy_to_user(&arg->gp_fid, &parent_fid, sizeof(arg->gp_fid))) {
- rc = -EFAULT;
- goto lb_free;
- }
-
- if (copy_to_user(&arg->gp_name, ln.ln_name, ln.ln_namelen)) {
- rc = -EFAULT;
- goto lb_free;
- }
-
- if (put_user('\0', arg->gp_name + ln.ln_namelen)) {
- rc = -EFAULT;
- goto lb_free;
- }
-
-lb_free:
- kvfree(buf.lb_buf);
-ldata_free:
- kfree(ldata);
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c
deleted file mode 100644
index d7fb5533f707..000000000000
--- a/drivers/staging/lustre/lustre/llite/llite_mmap.c
+++ /dev/null
@@ -1,480 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <linux/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/pagemap.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include "llite_internal.h"
-
-static const struct vm_operations_struct ll_file_vm_ops;
-
-void policy_from_vma(union ldlm_policy_data *policy,
- struct vm_area_struct *vma, unsigned long addr,
- size_t count)
-{
- policy->l_extent.start = ((addr - vma->vm_start) & PAGE_MASK) +
- (vma->vm_pgoff << PAGE_SHIFT);
- policy->l_extent.end = (policy->l_extent.start + count - 1) |
- ~PAGE_MASK;
-}
-
-struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr,
- size_t count)
-{
- struct vm_area_struct *vma, *ret = NULL;
-
- /* mmap_sem must have been held by caller. */
- LASSERT(!down_write_trylock(&mm->mmap_sem));
-
- for (vma = find_vma(mm, addr);
- vma && vma->vm_start < (addr + count); vma = vma->vm_next) {
- if (vma->vm_ops && vma->vm_ops == &ll_file_vm_ops &&
- vma->vm_flags & VM_SHARED) {
- ret = vma;
- break;
- }
- }
- return ret;
-}
-
-/**
- * API independent part for page fault initialization.
- * \param vma - virtual memory area addressed to page fault
- * \param env - corespondent lu_env to processing
- * \param index - page index corespondent to fault.
- * \parm ra_flags - vma readahead flags.
- *
- * \return error codes from cl_io_init.
- */
-static struct cl_io *
-ll_fault_io_init(struct lu_env *env, struct vm_area_struct *vma,
- pgoff_t index, unsigned long *ra_flags)
-{
- struct file *file = vma->vm_file;
- struct inode *inode = file_inode(file);
- struct cl_io *io;
- struct cl_fault_io *fio;
- int rc;
-
- if (ll_file_nolock(file))
- return ERR_PTR(-EOPNOTSUPP);
-
-restart:
- io = vvp_env_thread_io(env);
- io->ci_obj = ll_i2info(inode)->lli_clob;
- LASSERT(io->ci_obj);
-
- fio = &io->u.ci_fault;
- fio->ft_index = index;
- fio->ft_executable = vma->vm_flags & VM_EXEC;
-
- /*
- * disable VM_SEQ_READ and use VM_RAND_READ to make sure that
- * the kernel will not read other pages not covered by ldlm in
- * filemap_nopage. we do our readahead in ll_readpage.
- */
- if (ra_flags)
- *ra_flags = vma->vm_flags & (VM_RAND_READ | VM_SEQ_READ);
- vma->vm_flags &= ~VM_SEQ_READ;
- vma->vm_flags |= VM_RAND_READ;
-
- CDEBUG(D_MMAP, "vm_flags: %lx (%lu %d)\n", vma->vm_flags,
- fio->ft_index, fio->ft_executable);
-
- rc = cl_io_init(env, io, CIT_FAULT, io->ci_obj);
- if (rc == 0) {
- struct vvp_io *vio = vvp_env_io(env);
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-
- LASSERT(vio->vui_cl.cis_io == io);
-
- /* mmap lock must be MANDATORY it has to cache pages. */
- io->ci_lockreq = CILR_MANDATORY;
- vio->vui_fd = fd;
- } else {
- LASSERT(rc < 0);
- cl_io_fini(env, io);
- if (io->ci_need_restart)
- goto restart;
-
- io = ERR_PTR(rc);
- }
-
- return io;
-}
-
-/* Sharing code of page_mkwrite method for rhel5 and rhel6 */
-static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage,
- bool *retry)
-{
- struct lu_env *env;
- struct cl_io *io;
- struct vvp_io *vio;
- int result;
- u16 refcheck;
- sigset_t old, new;
- struct inode *inode;
- struct ll_inode_info *lli;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- io = ll_fault_io_init(env, vma, vmpage->index, NULL);
- if (IS_ERR(io)) {
- result = PTR_ERR(io);
- goto out;
- }
-
- result = io->ci_result;
- if (result < 0)
- goto out_io;
-
- io->u.ci_fault.ft_mkwrite = 1;
- io->u.ci_fault.ft_writable = 1;
-
- vio = vvp_env_io(env);
- vio->u.fault.ft_vma = vma;
- vio->u.fault.ft_vmpage = vmpage;
-
- siginitsetinv(&new, sigmask(SIGKILL) | sigmask(SIGTERM));
- sigprocmask(SIG_BLOCK, &new, &old);
-
- inode = vvp_object_inode(io->ci_obj);
- lli = ll_i2info(inode);
-
- result = cl_io_loop(env, io);
-
- sigprocmask(SIG_SETMASK, &old, NULL);
-
- if (result == 0) {
- struct inode *inode = file_inode(vma->vm_file);
- struct ll_inode_info *lli = ll_i2info(inode);
-
- lock_page(vmpage);
- if (!vmpage->mapping) {
- unlock_page(vmpage);
-
- /* page was truncated and lock was cancelled, return
- * ENODATA so that VM_FAULT_NOPAGE will be returned
- * to handle_mm_fault().
- */
- if (result == 0)
- result = -ENODATA;
- } else if (!PageDirty(vmpage)) {
- /* race, the page has been cleaned by ptlrpcd after
- * it was unlocked, it has to be added into dirty
- * cache again otherwise this soon-to-dirty page won't
- * consume any grants, even worse if this page is being
- * transferred because it will break RPC checksum.
- */
- unlock_page(vmpage);
-
- CDEBUG(D_MMAP,
- "Race on page_mkwrite %p/%lu, page has been written out, retry.\n",
- vmpage, vmpage->index);
-
- *retry = true;
- result = -EAGAIN;
- }
-
- if (!result)
- set_bit(LLIF_DATA_MODIFIED, &lli->lli_flags);
- }
-
-out_io:
- cl_io_fini(env, io);
-out:
- cl_env_put(env, &refcheck);
- CDEBUG(D_MMAP, "%s mkwrite with %d\n", current->comm, result);
- LASSERT(ergo(result == 0, PageLocked(vmpage)));
-
- return result;
-}
-
-static inline int to_fault_error(int result)
-{
- switch (result) {
- case 0:
- result = VM_FAULT_LOCKED;
- break;
- case -EFAULT:
- result = VM_FAULT_NOPAGE;
- break;
- case -ENOMEM:
- result = VM_FAULT_OOM;
- break;
- default:
- result = VM_FAULT_SIGBUS;
- break;
- }
- return result;
-}
-
-/**
- * Lustre implementation of a vm_operations_struct::fault() method, called by
- * VM to server page fault (both in kernel and user space).
- *
- * \param vma - is virtual area struct related to page fault
- * \param vmf - structure which describe type and address where hit fault
- *
- * \return allocated and filled _locked_ page for address
- * \retval VM_FAULT_ERROR on general error
- * \retval NOPAGE_OOM not have memory for allocate new page
- */
-static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- struct lu_env *env;
- struct cl_io *io;
- struct vvp_io *vio = NULL;
- struct page *vmpage;
- unsigned long ra_flags;
- int result = 0;
- int fault_ret = 0;
- u16 refcheck;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- io = ll_fault_io_init(env, vma, vmf->pgoff, &ra_flags);
- if (IS_ERR(io)) {
- result = to_fault_error(PTR_ERR(io));
- goto out;
- }
-
- result = io->ci_result;
- if (result == 0) {
- vio = vvp_env_io(env);
- vio->u.fault.ft_vma = vma;
- vio->u.fault.ft_vmpage = NULL;
- vio->u.fault.ft_vmf = vmf;
- vio->u.fault.ft_flags = 0;
- vio->u.fault.ft_flags_valid = false;
-
- /* May call ll_readpage() */
- ll_cl_add(vma->vm_file, env, io);
-
- result = cl_io_loop(env, io);
-
- ll_cl_remove(vma->vm_file, env);
-
- /* ft_flags are only valid if we reached
- * the call to filemap_fault
- */
- if (vio->u.fault.ft_flags_valid)
- fault_ret = vio->u.fault.ft_flags;
-
- vmpage = vio->u.fault.ft_vmpage;
- if (result != 0 && vmpage) {
- put_page(vmpage);
- vmf->page = NULL;
- }
- }
- cl_io_fini(env, io);
-
- vma->vm_flags |= ra_flags;
-
-out:
- cl_env_put(env, &refcheck);
- if (result != 0 && !(fault_ret & VM_FAULT_RETRY))
- fault_ret |= to_fault_error(result);
-
- CDEBUG(D_MMAP, "%s fault %d/%d\n", current->comm, fault_ret, result);
- return fault_ret;
-}
-
-static int ll_fault(struct vm_fault *vmf)
-{
- int count = 0;
- bool printed = false;
- int result;
- sigset_t old, new;
-
- /* Only SIGKILL and SIGTERM are allowed for fault/nopage/mkwrite
- * so that it can be killed by admin but not cause segfault by
- * other signals.
- */
- siginitsetinv(&new, sigmask(SIGKILL) | sigmask(SIGTERM));
- sigprocmask(SIG_BLOCK, &new, &old);
-
-restart:
- result = ll_fault0(vmf->vma, vmf);
- LASSERT(!(result & VM_FAULT_LOCKED));
- if (result == 0) {
- struct page *vmpage = vmf->page;
-
- /* check if this page has been truncated */
- lock_page(vmpage);
- if (unlikely(!vmpage->mapping)) { /* unlucky */
- unlock_page(vmpage);
- put_page(vmpage);
- vmf->page = NULL;
-
- if (!printed && ++count > 16) {
- CWARN("the page is under heavy contention, maybe your app(%s) needs revising :-)\n",
- current->comm);
- printed = true;
- }
-
- goto restart;
- }
-
- result = VM_FAULT_LOCKED;
- }
- sigprocmask(SIG_SETMASK, &old, NULL);
- return result;
-}
-
-static int ll_page_mkwrite(struct vm_fault *vmf)
-{
- struct vm_area_struct *vma = vmf->vma;
- int count = 0;
- bool printed = false;
- bool retry;
- int result;
-
- file_update_time(vma->vm_file);
- do {
- retry = false;
- result = ll_page_mkwrite0(vma, vmf->page, &retry);
-
- if (!printed && ++count > 16) {
- const struct dentry *de = vma->vm_file->f_path.dentry;
-
- CWARN("app(%s): the page %lu of file " DFID " is under heavy contention\n",
- current->comm, vmf->pgoff,
- PFID(ll_inode2fid(de->d_inode)));
- printed = true;
- }
- } while (retry);
-
- switch (result) {
- case 0:
- LASSERT(PageLocked(vmf->page));
- result = VM_FAULT_LOCKED;
- break;
- case -ENODATA:
- case -EAGAIN:
- case -EFAULT:
- result = VM_FAULT_NOPAGE;
- break;
- case -ENOMEM:
- result = VM_FAULT_OOM;
- break;
- default:
- result = VM_FAULT_SIGBUS;
- break;
- }
-
- return result;
-}
-
-/**
- * To avoid cancel the locks covering mmapped region for lock cache pressure,
- * we track the mapped vma count in vvp_object::vob_mmap_cnt.
- */
-static void ll_vm_open(struct vm_area_struct *vma)
-{
- struct inode *inode = file_inode(vma->vm_file);
- struct vvp_object *vob = cl_inode2vvp(inode);
-
- LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0);
- atomic_inc(&vob->vob_mmap_cnt);
-}
-
-/**
- * Dual to ll_vm_open().
- */
-static void ll_vm_close(struct vm_area_struct *vma)
-{
- struct inode *inode = file_inode(vma->vm_file);
- struct vvp_object *vob = cl_inode2vvp(inode);
-
- atomic_dec(&vob->vob_mmap_cnt);
- LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0);
-}
-
-/* XXX put nice comment here. talk about __free_pte -> dirty pages and
- * nopage's reference passing to the pte
- */
-int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last)
-{
- int rc = -ENOENT;
-
- LASSERTF(last > first, "last %llu first %llu\n", last, first);
- if (mapping_mapped(mapping)) {
- rc = 0;
- unmap_mapping_range(mapping, first + PAGE_SIZE - 1,
- last - first + 1, 0);
- }
-
- return rc;
-}
-
-static const struct vm_operations_struct ll_file_vm_ops = {
- .fault = ll_fault,
- .page_mkwrite = ll_page_mkwrite,
- .open = ll_vm_open,
- .close = ll_vm_close,
-};
-
-int ll_file_mmap(struct file *file, struct vm_area_struct *vma)
-{
- struct inode *inode = file_inode(file);
- int rc;
-
- if (ll_file_nolock(file))
- return -EOPNOTSUPP;
-
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_MAP, 1);
- rc = generic_file_mmap(file, vma);
- if (rc == 0) {
- vma->vm_ops = &ll_file_vm_ops;
- vma->vm_ops->open(vma);
- /* update the inode's size and mtime */
- rc = ll_glimpse_size(inode);
- }
-
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c
deleted file mode 100644
index 14172688d55f..000000000000
--- a/drivers/staging/lustre/lustre/llite/llite_nfs.c
+++ /dev/null
@@ -1,375 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/lustre/llite/llite_nfs.c
- *
- * NFS export of Lustre Light File System
- *
- * Author: Yury Umanets <umka@clusterfs.com>
- * Author: Huang Hua <huanghua@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-#include "llite_internal.h"
-#include <linux/exportfs.h>
-
-__u32 get_uuid2int(const char *name, int len)
-{
- __u32 key0 = 0x12a3fe2d, key1 = 0x37abe8f9;
-
- while (len--) {
- __u32 key = key1 + (key0 ^ (*name++ * 7152373));
-
- if (key & 0x80000000)
- key -= 0x7fffffff;
- key1 = key0;
- key0 = key;
- }
- return (key0 << 1);
-}
-
-void get_uuid2fsid(const char *name, int len, __kernel_fsid_t *fsid)
-{
- __u64 key = 0, key0 = 0x12a3fe2d, key1 = 0x37abe8f9;
-
- while (len--) {
- key = key1 + (key0 ^ (*name++ * 7152373));
- if (key & 0x8000000000000000ULL)
- key -= 0x7fffffffffffffffULL;
- key1 = key0;
- key0 = key;
- }
-
- fsid->val[0] = key;
- fsid->val[1] = key >> 32;
-}
-
-struct inode *search_inode_for_lustre(struct super_block *sb,
- const struct lu_fid *fid)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct ptlrpc_request *req = NULL;
- struct inode *inode = NULL;
- int eadatalen = 0;
- unsigned long hash = cl_fid_build_ino(fid,
- ll_need_32bit_api(sbi));
- struct md_op_data *op_data;
- int rc;
-
- CDEBUG(D_INFO, "searching inode for:(%lu," DFID ")\n", hash, PFID(fid));
-
- inode = ilookup5(sb, hash, ll_test_inode_by_fid, (void *)fid);
- if (inode)
- return inode;
-
- rc = ll_get_default_mdsize(sbi, &eadatalen);
- if (rc)
- return ERR_PTR(rc);
-
- /* Because inode is NULL, ll_prep_md_op_data can not
- * be used here. So we allocate op_data ourselves
- */
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
- if (!op_data)
- return ERR_PTR(-ENOMEM);
-
- op_data->op_fid1 = *fid;
- op_data->op_mode = eadatalen;
- op_data->op_valid = OBD_MD_FLEASIZE;
-
- /* mds_fid2dentry ignores f_type */
- rc = md_getattr(sbi->ll_md_exp, op_data, &req);
- kfree(op_data);
- if (rc) {
- CDEBUG(D_INFO, "can't get object attrs, fid " DFID ", rc %d\n",
- PFID(fid), rc);
- return ERR_PTR(rc);
- }
- rc = ll_prep_inode(&inode, req, sb, NULL);
- ptlrpc_req_finished(req);
- if (rc)
- return ERR_PTR(rc);
-
- return inode;
-}
-
-struct lustre_nfs_fid {
- struct lu_fid lnf_child;
- struct lu_fid lnf_parent;
-};
-
-static struct dentry *
-ll_iget_for_nfs(struct super_block *sb,
- struct lu_fid *fid, struct lu_fid *parent)
-{
- struct inode *inode;
- struct dentry *result;
-
- if (!fid_is_sane(fid))
- return ERR_PTR(-ESTALE);
-
- CDEBUG(D_INFO, "Get dentry for fid: " DFID "\n", PFID(fid));
-
- inode = search_inode_for_lustre(sb, fid);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
-
- if (is_bad_inode(inode)) {
- /* we didn't find the right inode.. */
- iput(inode);
- return ERR_PTR(-ESTALE);
- }
-
- result = d_obtain_alias(inode);
- if (IS_ERR(result)) {
- iput(inode);
- return result;
- }
-
- /**
- * In case d_obtain_alias() found a disconnected dentry, always update
- * lli_pfid to allow later operation (normally open) have parent fid,
- * which may be used by MDS to create data.
- */
- if (parent) {
- struct ll_inode_info *lli = ll_i2info(inode);
-
- spin_lock(&lli->lli_lock);
- lli->lli_pfid = *parent;
- spin_unlock(&lli->lli_lock);
- }
-
- /* N.B. d_obtain_alias() drops inode ref on error */
- result = d_obtain_alias(inode);
- if (!IS_ERR(result)) {
- /*
- * Need to signal to the ll_intent_file_open that
- * we came from NFS and so opencache needs to be
- * enabled for this one
- */
- ll_d2d(result)->lld_nfs_dentry = 1;
- }
-
- return result;
-}
-
-/**
- * \a connectable - is nfsd will connect himself or this should be done
- * at lustre
- *
- * The return value is file handle type:
- * 1 -- contains child file handle;
- * 2 -- contains child file handle and parent file handle;
- * 255 -- error.
- */
-static int ll_encode_fh(struct inode *inode, __u32 *fh, int *plen,
- struct inode *parent)
-{
- int fileid_len = sizeof(struct lustre_nfs_fid) / 4;
- struct lustre_nfs_fid *nfs_fid = (void *)fh;
-
- CDEBUG(D_INFO, "%s: encoding for (" DFID ") maxlen=%d minlen=%d\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(ll_inode2fid(inode)), *plen, fileid_len);
-
- if (*plen < fileid_len) {
- *plen = fileid_len;
- return FILEID_INVALID;
- }
-
- nfs_fid->lnf_child = *ll_inode2fid(inode);
- if (parent)
- nfs_fid->lnf_parent = *ll_inode2fid(parent);
- else
- fid_zero(&nfs_fid->lnf_parent);
- *plen = fileid_len;
-
- return FILEID_LUSTRE;
-}
-
-static int ll_nfs_get_name_filldir(struct dir_context *ctx, const char *name,
- int namelen, loff_t hash, u64 ino,
- unsigned int type)
-{
- /* It is hack to access lde_fid for comparison with lgd_fid.
- * So the input 'name' must be part of the 'lu_dirent'.
- */
- struct lu_dirent *lde = container_of((void*)name, struct lu_dirent, lde_name);
- struct ll_getname_data *lgd =
- container_of(ctx, struct ll_getname_data, ctx);
- struct lu_fid fid;
-
- fid_le_to_cpu(&fid, &lde->lde_fid);
- if (lu_fid_eq(&fid, &lgd->lgd_fid)) {
- memcpy(lgd->lgd_name, name, namelen);
- lgd->lgd_name[namelen] = 0;
- lgd->lgd_found = 1;
- }
- return lgd->lgd_found;
-}
-
-static int ll_get_name(struct dentry *dentry, char *name,
- struct dentry *child)
-{
- struct inode *dir = d_inode(dentry);
- int rc;
- struct ll_getname_data lgd = {
- .lgd_name = name,
- .lgd_fid = ll_i2info(d_inode(child))->lli_fid,
- .ctx.actor = ll_nfs_get_name_filldir,
- };
- struct md_op_data *op_data;
- __u64 pos = 0;
-
- if (!dir || !S_ISDIR(dir->i_mode)) {
- rc = -ENOTDIR;
- goto out;
- }
-
- if (!dir->i_fop) {
- rc = -EINVAL;
- goto out;
- }
-
- op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0,
- LUSTRE_OPC_ANY, dir);
- if (IS_ERR(op_data)) {
- rc = PTR_ERR(op_data);
- goto out;
- }
-
- op_data->op_max_pages = ll_i2sbi(dir)->ll_md_brw_pages;
- inode_lock(dir);
- rc = ll_dir_read(dir, &pos, op_data, &lgd.ctx);
- inode_unlock(dir);
- ll_finish_md_op_data(op_data);
- if (!rc && !lgd.lgd_found)
- rc = -ENOENT;
-out:
- return rc;
-}
-
-static struct dentry *ll_fh_to_dentry(struct super_block *sb, struct fid *fid,
- int fh_len, int fh_type)
-{
- struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;
-
- if (fh_type != FILEID_LUSTRE)
- return ERR_PTR(-EPROTO);
-
- return ll_iget_for_nfs(sb, &nfs_fid->lnf_child, &nfs_fid->lnf_parent);
-}
-
-static struct dentry *ll_fh_to_parent(struct super_block *sb, struct fid *fid,
- int fh_len, int fh_type)
-{
- struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;
-
- if (fh_type != FILEID_LUSTRE)
- return ERR_PTR(-EPROTO);
-
- return ll_iget_for_nfs(sb, &nfs_fid->lnf_parent, NULL);
-}
-
-int ll_dir_get_parent_fid(struct inode *dir, struct lu_fid *parent_fid)
-{
- struct ptlrpc_request *req = NULL;
- struct ll_sb_info *sbi;
- struct mdt_body *body;
- static const char dotdot[] = "..";
- struct md_op_data *op_data;
- int rc;
- int lmmsize;
-
- LASSERT(dir && S_ISDIR(dir->i_mode));
-
- sbi = ll_s2sbi(dir->i_sb);
-
- CDEBUG(D_INFO, "%s: getting parent for (" DFID ")\n",
- ll_get_fsname(dir->i_sb, NULL, 0),
- PFID(ll_inode2fid(dir)));
-
- rc = ll_get_default_mdsize(sbi, &lmmsize);
- if (rc != 0)
- return rc;
-
- op_data = ll_prep_md_op_data(NULL, dir, NULL, dotdot,
- strlen(dotdot), lmmsize,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
- ll_finish_md_op_data(op_data);
- if (rc) {
- CERROR("%s: failure inode " DFID " get parent: rc = %d\n",
- ll_get_fsname(dir->i_sb, NULL, 0),
- PFID(ll_inode2fid(dir)), rc);
- return rc;
- }
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- /*
- * LU-3952: MDT may lost the FID of its parent, we should not crash
- * the NFS server, ll_iget_for_nfs() will handle the error.
- */
- if (body->mbo_valid & OBD_MD_FLID) {
- CDEBUG(D_INFO, "parent for " DFID " is " DFID "\n",
- PFID(ll_inode2fid(dir)), PFID(&body->mbo_fid1));
- *parent_fid = body->mbo_fid1;
- }
-
- ptlrpc_req_finished(req);
- return 0;
-}
-
-static struct dentry *ll_get_parent(struct dentry *dchild)
-{
- struct lu_fid parent_fid = { 0 };
- struct dentry *dentry;
- int rc;
-
- rc = ll_dir_get_parent_fid(dchild->d_inode, &parent_fid);
- if (rc)
- return ERR_PTR(rc);
-
- dentry = ll_iget_for_nfs(dchild->d_inode->i_sb, &parent_fid, NULL);
-
- return dentry;
-}
-
-const struct export_operations lustre_export_operations = {
- .get_parent = ll_get_parent,
- .encode_fh = ll_encode_fh,
- .get_name = ll_get_name,
- .fh_to_dentry = ll_fh_to_dentry,
- .fh_to_parent = ll_fh_to_parent,
-};
diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c
deleted file mode 100644
index 49bf1b7ee311..000000000000
--- a/drivers/staging/lustre/lustre/llite/lproc_llite.c
+++ /dev/null
@@ -1,1659 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <lprocfs_status.h>
-#include <linux/seq_file.h>
-#include <obd_support.h>
-
-#include "llite_internal.h"
-#include "vvp_internal.h"
-
-/* debugfs llite mount point registration */
-static const struct file_operations ll_rw_extents_stats_fops;
-static const struct file_operations ll_rw_extents_stats_pp_fops;
-static const struct file_operations ll_rw_offset_stats_fops;
-
-static ssize_t blocksize_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- struct obd_statfs osfs;
- int rc;
-
- rc = ll_statfs_internal(sbi->ll_sb, &osfs,
- get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ,
- OBD_STATFS_NODELAY);
- if (!rc)
- return sprintf(buf, "%u\n", osfs.os_bsize);
-
- return rc;
-}
-LUSTRE_RO_ATTR(blocksize);
-
-static ssize_t kbytestotal_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- struct obd_statfs osfs;
- int rc;
-
- rc = ll_statfs_internal(sbi->ll_sb, &osfs,
- get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ,
- OBD_STATFS_NODELAY);
- if (!rc) {
- __u32 blk_size = osfs.os_bsize >> 10;
- __u64 result = osfs.os_blocks;
-
- while (blk_size >>= 1)
- result <<= 1;
-
- rc = sprintf(buf, "%llu\n", result);
- }
-
- return rc;
-}
-LUSTRE_RO_ATTR(kbytestotal);
-
-static ssize_t kbytesfree_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- struct obd_statfs osfs;
- int rc;
-
- rc = ll_statfs_internal(sbi->ll_sb, &osfs,
- get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ,
- OBD_STATFS_NODELAY);
- if (!rc) {
- __u32 blk_size = osfs.os_bsize >> 10;
- __u64 result = osfs.os_bfree;
-
- while (blk_size >>= 1)
- result <<= 1;
-
- rc = sprintf(buf, "%llu\n", result);
- }
-
- return rc;
-}
-LUSTRE_RO_ATTR(kbytesfree);
-
-static ssize_t kbytesavail_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- struct obd_statfs osfs;
- int rc;
-
- rc = ll_statfs_internal(sbi->ll_sb, &osfs,
- get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ,
- OBD_STATFS_NODELAY);
- if (!rc) {
- __u32 blk_size = osfs.os_bsize >> 10;
- __u64 result = osfs.os_bavail;
-
- while (blk_size >>= 1)
- result <<= 1;
-
- rc = sprintf(buf, "%llu\n", result);
- }
-
- return rc;
-}
-LUSTRE_RO_ATTR(kbytesavail);
-
-static ssize_t filestotal_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- struct obd_statfs osfs;
- int rc;
-
- rc = ll_statfs_internal(sbi->ll_sb, &osfs,
- get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ,
- OBD_STATFS_NODELAY);
- if (!rc)
- return sprintf(buf, "%llu\n", osfs.os_files);
-
- return rc;
-}
-LUSTRE_RO_ATTR(filestotal);
-
-static ssize_t filesfree_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- struct obd_statfs osfs;
- int rc;
-
- rc = ll_statfs_internal(sbi->ll_sb, &osfs,
- get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ,
- OBD_STATFS_NODELAY);
- if (!rc)
- return sprintf(buf, "%llu\n", osfs.os_ffree);
-
- return rc;
-}
-LUSTRE_RO_ATTR(filesfree);
-
-static ssize_t client_type_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- return sprintf(buf, "local client\n");
-}
-LUSTRE_RO_ATTR(client_type);
-
-static ssize_t fstype_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- return sprintf(buf, "%s\n", sbi->ll_sb->s_type->name);
-}
-LUSTRE_RO_ATTR(fstype);
-
-static ssize_t uuid_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- return sprintf(buf, "%s\n", sbi->ll_sb_uuid.uuid);
-}
-LUSTRE_RO_ATTR(uuid);
-
-static int ll_site_stats_seq_show(struct seq_file *m, void *v)
-{
- struct super_block *sb = m->private;
-
- /*
- * See description of statistical counters in struct cl_site, and
- * struct lu_site.
- */
- return cl_site_stats_print(lu2cl_site(ll_s2sbi(sb)->ll_site), m);
-}
-
-LPROC_SEQ_FOPS_RO(ll_site_stats);
-
-static ssize_t max_read_ahead_mb_show(struct kobject *kobj,
- struct attribute *attr, char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- long pages_number;
- int mult;
-
- spin_lock(&sbi->ll_lock);
- pages_number = sbi->ll_ra_info.ra_max_pages;
- spin_unlock(&sbi->ll_lock);
-
- mult = 1 << (20 - PAGE_SHIFT);
- return lprocfs_read_frac_helper(buf, PAGE_SIZE, pages_number, mult);
-}
-
-static ssize_t max_read_ahead_mb_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long pages_number;
-
- rc = kstrtoul(buffer, 10, &pages_number);
- if (rc)
- return rc;
-
- pages_number *= 1 << (20 - PAGE_SHIFT); /* MB -> pages */
-
- if (pages_number > totalram_pages / 2) {
- CERROR("can't set file readahead more than %lu MB\n",
- totalram_pages >> (20 - PAGE_SHIFT + 1)); /*1/2 of RAM*/
- return -ERANGE;
- }
-
- spin_lock(&sbi->ll_lock);
- sbi->ll_ra_info.ra_max_pages = pages_number;
- spin_unlock(&sbi->ll_lock);
-
- return count;
-}
-LUSTRE_RW_ATTR(max_read_ahead_mb);
-
-static ssize_t max_read_ahead_per_file_mb_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- long pages_number;
- int mult;
-
- spin_lock(&sbi->ll_lock);
- pages_number = sbi->ll_ra_info.ra_max_pages_per_file;
- spin_unlock(&sbi->ll_lock);
-
- mult = 1 << (20 - PAGE_SHIFT);
- return lprocfs_read_frac_helper(buf, PAGE_SIZE, pages_number, mult);
-}
-
-static ssize_t max_read_ahead_per_file_mb_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long pages_number;
-
- rc = kstrtoul(buffer, 10, &pages_number);
- if (rc)
- return rc;
-
- if (pages_number > sbi->ll_ra_info.ra_max_pages) {
- CERROR("can't set file readahead more than max_read_ahead_mb %lu MB\n",
- sbi->ll_ra_info.ra_max_pages);
- return -ERANGE;
- }
-
- spin_lock(&sbi->ll_lock);
- sbi->ll_ra_info.ra_max_pages_per_file = pages_number;
- spin_unlock(&sbi->ll_lock);
-
- return count;
-}
-LUSTRE_RW_ATTR(max_read_ahead_per_file_mb);
-
-static ssize_t max_read_ahead_whole_mb_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- long pages_number;
- int mult;
-
- spin_lock(&sbi->ll_lock);
- pages_number = sbi->ll_ra_info.ra_max_read_ahead_whole_pages;
- spin_unlock(&sbi->ll_lock);
-
- mult = 1 << (20 - PAGE_SHIFT);
- return lprocfs_read_frac_helper(buf, PAGE_SIZE, pages_number, mult);
-}
-
-static ssize_t max_read_ahead_whole_mb_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long pages_number;
-
- rc = kstrtoul(buffer, 10, &pages_number);
- if (rc)
- return rc;
-
- /* Cap this at the current max readahead window size, the readahead
- * algorithm does this anyway so it's pointless to set it larger.
- */
- if (pages_number > sbi->ll_ra_info.ra_max_pages_per_file) {
- CERROR("can't set max_read_ahead_whole_mb more than max_read_ahead_per_file_mb: %lu\n",
- sbi->ll_ra_info.ra_max_pages_per_file >> (20 - PAGE_SHIFT));
- return -ERANGE;
- }
-
- spin_lock(&sbi->ll_lock);
- sbi->ll_ra_info.ra_max_read_ahead_whole_pages = pages_number;
- spin_unlock(&sbi->ll_lock);
-
- return count;
-}
-LUSTRE_RW_ATTR(max_read_ahead_whole_mb);
-
-static int ll_max_cached_mb_seq_show(struct seq_file *m, void *v)
-{
- struct super_block *sb = m->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct cl_client_cache *cache = sbi->ll_cache;
- int shift = 20 - PAGE_SHIFT;
- long max_cached_mb;
- long unused_mb;
-
- max_cached_mb = cache->ccc_lru_max >> shift;
- unused_mb = atomic_long_read(&cache->ccc_lru_left) >> shift;
- seq_printf(m,
- "users: %d\n"
- "max_cached_mb: %ld\n"
- "used_mb: %ld\n"
- "unused_mb: %ld\n"
- "reclaim_count: %u\n",
- atomic_read(&cache->ccc_users),
- max_cached_mb,
- max_cached_mb - unused_mb,
- unused_mb,
- cache->ccc_lru_shrinkers);
- return 0;
-}
-
-static ssize_t ll_max_cached_mb_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct super_block *sb = ((struct seq_file *)file->private_data)->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct cl_client_cache *cache = sbi->ll_cache;
- struct lu_env *env;
- long diff = 0;
- long nrpages = 0;
- u16 refcheck;
- long pages_number;
- int mult;
- long rc;
- u64 val;
- char kernbuf[128];
-
- if (count >= sizeof(kernbuf))
- return -EINVAL;
-
- if (copy_from_user(kernbuf, buffer, count))
- return -EFAULT;
- kernbuf[count] = 0;
-
- mult = 1 << (20 - PAGE_SHIFT);
- buffer += lprocfs_find_named_value(kernbuf, "max_cached_mb:", &count) -
- kernbuf;
- rc = lprocfs_write_frac_u64_helper(buffer, count, &val, mult);
- if (rc)
- return rc;
-
- if (val > LONG_MAX)
- return -ERANGE;
- pages_number = (long)val;
-
- if (pages_number < 0 || pages_number > totalram_pages) {
- CERROR("%s: can't set max cache more than %lu MB\n",
- ll_get_fsname(sb, NULL, 0),
- totalram_pages >> (20 - PAGE_SHIFT));
- return -ERANGE;
- }
-
- spin_lock(&sbi->ll_lock);
- diff = pages_number - cache->ccc_lru_max;
- spin_unlock(&sbi->ll_lock);
-
- /* easy - add more LRU slots. */
- if (diff >= 0) {
- atomic_long_add(diff, &cache->ccc_lru_left);
- rc = 0;
- goto out;
- }
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return 0;
-
- diff = -diff;
- while (diff > 0) {
- long tmp;
-
- /* reduce LRU budget from free slots. */
- do {
- long ov, nv;
-
- ov = atomic_long_read(&cache->ccc_lru_left);
- if (ov == 0)
- break;
-
- nv = ov > diff ? ov - diff : 0;
- rc = atomic_long_cmpxchg(&cache->ccc_lru_left, ov, nv);
- if (likely(ov == rc)) {
- diff -= ov - nv;
- nrpages += ov - nv;
- break;
- }
- } while (1);
-
- if (diff <= 0)
- break;
-
- if (!sbi->ll_dt_exp) { /* being initialized */
- rc = 0;
- goto out;
- }
-
- /* difficult - have to ask OSCs to drop LRU slots. */
- tmp = diff << 1;
- rc = obd_set_info_async(env, sbi->ll_dt_exp,
- sizeof(KEY_CACHE_LRU_SHRINK),
- KEY_CACHE_LRU_SHRINK,
- sizeof(tmp), &tmp, NULL);
- if (rc < 0)
- break;
- }
- cl_env_put(env, &refcheck);
-
-out:
- if (rc >= 0) {
- spin_lock(&sbi->ll_lock);
- cache->ccc_lru_max = pages_number;
- spin_unlock(&sbi->ll_lock);
- rc = count;
- } else {
- atomic_long_add(nrpages, &cache->ccc_lru_left);
- }
- return rc;
-}
-
-LPROC_SEQ_FOPS(ll_max_cached_mb);
-
-static ssize_t checksum_pages_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- return sprintf(buf, "%u\n", (sbi->ll_flags & LL_SBI_CHECKSUM) ? 1 : 0);
-}
-
-static ssize_t checksum_pages_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long val;
-
- if (!sbi->ll_dt_exp)
- /* Not set up yet */
- return -EAGAIN;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
- if (val)
- sbi->ll_flags |= LL_SBI_CHECKSUM;
- else
- sbi->ll_flags &= ~LL_SBI_CHECKSUM;
-
- rc = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CHECKSUM),
- KEY_CHECKSUM, sizeof(val), &val, NULL);
- if (rc)
- CWARN("Failed to set OSC checksum flags: %d\n", rc);
-
- return count;
-}
-LUSTRE_RW_ATTR(checksum_pages);
-
-static ssize_t ll_rd_track_id(struct kobject *kobj, char *buf,
- enum stats_track_type type)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- if (sbi->ll_stats_track_type == type)
- return sprintf(buf, "%d\n", sbi->ll_stats_track_id);
- else if (sbi->ll_stats_track_type == STATS_TRACK_ALL)
- return sprintf(buf, "0 (all)\n");
- else
- return sprintf(buf, "untracked\n");
-}
-
-static ssize_t ll_wr_track_id(struct kobject *kobj, const char *buffer,
- size_t count,
- enum stats_track_type type)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long pid;
-
- rc = kstrtoul(buffer, 10, &pid);
- if (rc)
- return rc;
- sbi->ll_stats_track_id = pid;
- if (pid == 0)
- sbi->ll_stats_track_type = STATS_TRACK_ALL;
- else
- sbi->ll_stats_track_type = type;
- lprocfs_clear_stats(sbi->ll_stats);
- return count;
-}
-
-static ssize_t stats_track_pid_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- return ll_rd_track_id(kobj, buf, STATS_TRACK_PID);
-}
-
-static ssize_t stats_track_pid_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- return ll_wr_track_id(kobj, buffer, count, STATS_TRACK_PID);
-}
-LUSTRE_RW_ATTR(stats_track_pid);
-
-static ssize_t stats_track_ppid_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- return ll_rd_track_id(kobj, buf, STATS_TRACK_PPID);
-}
-
-static ssize_t stats_track_ppid_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- return ll_wr_track_id(kobj, buffer, count, STATS_TRACK_PPID);
-}
-LUSTRE_RW_ATTR(stats_track_ppid);
-
-static ssize_t stats_track_gid_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- return ll_rd_track_id(kobj, buf, STATS_TRACK_GID);
-}
-
-static ssize_t stats_track_gid_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- return ll_wr_track_id(kobj, buffer, count, STATS_TRACK_GID);
-}
-LUSTRE_RW_ATTR(stats_track_gid);
-
-static ssize_t statahead_max_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- return sprintf(buf, "%u\n", sbi->ll_sa_max);
-}
-
-static ssize_t statahead_max_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long val;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- if (val <= LL_SA_RPC_MAX)
- sbi->ll_sa_max = val;
- else
- CERROR("Bad statahead_max value %lu. Valid values are in the range [0, %d]\n",
- val, LL_SA_RPC_MAX);
-
- return count;
-}
-LUSTRE_RW_ATTR(statahead_max);
-
-static ssize_t statahead_agl_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- return sprintf(buf, "%u\n", sbi->ll_flags & LL_SBI_AGL_ENABLED ? 1 : 0);
-}
-
-static ssize_t statahead_agl_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long val;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- if (val)
- sbi->ll_flags |= LL_SBI_AGL_ENABLED;
- else
- sbi->ll_flags &= ~LL_SBI_AGL_ENABLED;
-
- return count;
-}
-LUSTRE_RW_ATTR(statahead_agl);
-
-static int ll_statahead_stats_seq_show(struct seq_file *m, void *v)
-{
- struct super_block *sb = m->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
-
- seq_printf(m,
- "statahead total: %u\n"
- "statahead wrong: %u\n"
- "agl total: %u\n",
- atomic_read(&sbi->ll_sa_total),
- atomic_read(&sbi->ll_sa_wrong),
- atomic_read(&sbi->ll_agl_total));
- return 0;
-}
-
-LPROC_SEQ_FOPS_RO(ll_statahead_stats);
-
-static ssize_t lazystatfs_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- return sprintf(buf, "%u\n", sbi->ll_flags & LL_SBI_LAZYSTATFS ? 1 : 0);
-}
-
-static ssize_t lazystatfs_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long val;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- if (val)
- sbi->ll_flags |= LL_SBI_LAZYSTATFS;
- else
- sbi->ll_flags &= ~LL_SBI_LAZYSTATFS;
-
- return count;
-}
-LUSTRE_RW_ATTR(lazystatfs);
-
-static ssize_t max_easize_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- unsigned int ealen;
- int rc;
-
- rc = ll_get_max_mdsize(sbi, &ealen);
- if (rc)
- return rc;
-
- return sprintf(buf, "%u\n", ealen);
-}
-LUSTRE_RO_ATTR(max_easize);
-
-/**
- * Get default_easize.
- *
- * \see client_obd::cl_default_mds_easize
- *
- * \param[in] kobj kernel object for sysfs tree
- * \param[in] attr attribute of this kernel object
- * \param[in] buf buffer to write data into
- *
- * \retval positive \a count on success
- * \retval negative negated errno on failure
- */
-static ssize_t default_easize_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- unsigned int ealen;
- int rc;
-
- rc = ll_get_default_mdsize(sbi, &ealen);
- if (rc)
- return rc;
-
- return sprintf(buf, "%u\n", ealen);
-}
-
-/**
- * Set default_easize.
- *
- * Range checking on the passed value is handled by
- * ll_set_default_mdsize().
- *
- * \see client_obd::cl_default_mds_easize
- *
- * \param[in] kobj kernel object for sysfs tree
- * \param[in] attr attribute of this kernel object
- * \param[in] buffer string passed from user space
- * \param[in] count \a buffer length
- *
- * \retval positive \a count on success
- * \retval negative negated errno on failure
- */
-static ssize_t default_easize_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- unsigned long val;
- int rc;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- rc = ll_set_default_mdsize(sbi, val);
- if (rc)
- return rc;
-
- return count;
-}
-LUSTRE_RW_ATTR(default_easize);
-
-static int ll_sbi_flags_seq_show(struct seq_file *m, void *v)
-{
- const char *str[] = LL_SBI_FLAGS;
- struct super_block *sb = m->private;
- int flags = ll_s2sbi(sb)->ll_flags;
- int i = 0;
-
- while (flags != 0) {
- if (ARRAY_SIZE(str) <= i) {
- CERROR("%s: Revise array LL_SBI_FLAGS to match sbi flags please.\n",
- ll_get_fsname(sb, NULL, 0));
- return -EINVAL;
- }
-
- if (flags & 0x1)
- seq_printf(m, "%s ", str[i]);
- flags >>= 1;
- ++i;
- }
- seq_puts(m, "\b\n");
- return 0;
-}
-
-LPROC_SEQ_FOPS_RO(ll_sbi_flags);
-
-static ssize_t xattr_cache_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- return sprintf(buf, "%u\n", sbi->ll_xattr_cache_enabled);
-}
-
-static ssize_t xattr_cache_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long val;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- if (val != 0 && val != 1)
- return -ERANGE;
-
- if (val == 1 && !(sbi->ll_flags & LL_SBI_XATTR_CACHE))
- return -ENOTSUPP;
-
- sbi->ll_xattr_cache_enabled = val;
-
- return count;
-}
-LUSTRE_RW_ATTR(xattr_cache);
-
-static int ll_unstable_stats_seq_show(struct seq_file *m, void *v)
-{
- struct super_block *sb = m->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct cl_client_cache *cache = sbi->ll_cache;
- long pages;
- int mb;
-
- pages = atomic_long_read(&cache->ccc_unstable_nr);
- mb = (pages * PAGE_SIZE) >> 20;
-
- seq_printf(m,
- "unstable_check: %8d\n"
- "unstable_pages: %12ld\n"
- "unstable_mb: %8d\n",
- cache->ccc_unstable_check, pages, mb);
-
- return 0;
-}
-
-static ssize_t ll_unstable_stats_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct super_block *sb = ((struct seq_file *)file->private_data)->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- char kernbuf[128];
- int val, rc;
-
- if (!count)
- return 0;
- if (count >= sizeof(kernbuf))
- return -EINVAL;
-
- if (copy_from_user(kernbuf, buffer, count))
- return -EFAULT;
- kernbuf[count] = 0;
-
- buffer += lprocfs_find_named_value(kernbuf, "unstable_check:", &count) -
- kernbuf;
- rc = lprocfs_write_helper(buffer, count, &val);
- if (rc < 0)
- return rc;
-
- /* borrow lru lock to set the value */
- spin_lock(&sbi->ll_cache->ccc_lru_lock);
- sbi->ll_cache->ccc_unstable_check = !!val;
- spin_unlock(&sbi->ll_cache->ccc_lru_lock);
-
- return count;
-}
-LPROC_SEQ_FOPS(ll_unstable_stats);
-
-static int ll_root_squash_seq_show(struct seq_file *m, void *v)
-{
- struct super_block *sb = m->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct root_squash_info *squash = &sbi->ll_squash;
-
- seq_printf(m, "%u:%u\n", squash->rsi_uid, squash->rsi_gid);
- return 0;
-}
-
-static ssize_t ll_root_squash_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct seq_file *m = file->private_data;
- struct super_block *sb = m->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct root_squash_info *squash = &sbi->ll_squash;
-
- return lprocfs_wr_root_squash(buffer, count, squash,
- ll_get_fsname(sb, NULL, 0));
-}
-LPROC_SEQ_FOPS(ll_root_squash);
-
-static int ll_nosquash_nids_seq_show(struct seq_file *m, void *v)
-{
- struct super_block *sb = m->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct root_squash_info *squash = &sbi->ll_squash;
- int len;
-
- down_read(&squash->rsi_sem);
- if (!list_empty(&squash->rsi_nosquash_nids)) {
- len = cfs_print_nidlist(m->buf + m->count, m->size - m->count,
- &squash->rsi_nosquash_nids);
- m->count += len;
- seq_puts(m, "\n");
- } else {
- seq_puts(m, "NONE\n");
- }
- up_read(&squash->rsi_sem);
-
- return 0;
-}
-
-static ssize_t ll_nosquash_nids_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct seq_file *m = file->private_data;
- struct super_block *sb = m->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct root_squash_info *squash = &sbi->ll_squash;
- int rc;
-
- rc = lprocfs_wr_nosquash_nids(buffer, count, squash,
- ll_get_fsname(sb, NULL, 0));
- if (rc < 0)
- return rc;
-
- ll_compute_rootsquash_state(sbi);
-
- return rc;
-}
-
-LPROC_SEQ_FOPS(ll_nosquash_nids);
-
-static struct lprocfs_vars lprocfs_llite_obd_vars[] = {
- /* { "mntpt_path", ll_rd_path, 0, 0 }, */
- { "site", &ll_site_stats_fops, NULL, 0 },
- /* { "filegroups", lprocfs_rd_filegroups, 0, 0 }, */
- { "max_cached_mb", &ll_max_cached_mb_fops, NULL },
- { "statahead_stats", &ll_statahead_stats_fops, NULL, 0 },
- { "unstable_stats", &ll_unstable_stats_fops, NULL },
- { "sbi_flags", &ll_sbi_flags_fops, NULL, 0 },
- { .name = "root_squash",
- .fops = &ll_root_squash_fops },
- { .name = "nosquash_nids",
- .fops = &ll_nosquash_nids_fops },
- { NULL }
-};
-
-#define MAX_STRING_SIZE 128
-
-static struct attribute *llite_attrs[] = {
- &lustre_attr_blocksize.attr,
- &lustre_attr_kbytestotal.attr,
- &lustre_attr_kbytesfree.attr,
- &lustre_attr_kbytesavail.attr,
- &lustre_attr_filestotal.attr,
- &lustre_attr_filesfree.attr,
- &lustre_attr_client_type.attr,
- &lustre_attr_fstype.attr,
- &lustre_attr_uuid.attr,
- &lustre_attr_max_read_ahead_mb.attr,
- &lustre_attr_max_read_ahead_per_file_mb.attr,
- &lustre_attr_max_read_ahead_whole_mb.attr,
- &lustre_attr_checksum_pages.attr,
- &lustre_attr_stats_track_pid.attr,
- &lustre_attr_stats_track_ppid.attr,
- &lustre_attr_stats_track_gid.attr,
- &lustre_attr_statahead_max.attr,
- &lustre_attr_statahead_agl.attr,
- &lustre_attr_lazystatfs.attr,
- &lustre_attr_max_easize.attr,
- &lustre_attr_default_easize.attr,
- &lustre_attr_xattr_cache.attr,
- NULL,
-};
-
-static void llite_sb_release(struct kobject *kobj)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- complete(&sbi->ll_kobj_unregister);
-}
-
-static struct kobj_type llite_ktype = {
- .default_attrs = llite_attrs,
- .sysfs_ops = &lustre_sysfs_ops,
- .release = llite_sb_release,
-};
-
-static const struct llite_file_opcode {
- __u32 opcode;
- __u32 type;
- const char *opname;
-} llite_opcode_table[LPROC_LL_FILE_OPCODES] = {
- /* file operation */
- { LPROC_LL_DIRTY_HITS, LPROCFS_TYPE_REGS, "dirty_pages_hits" },
- { LPROC_LL_DIRTY_MISSES, LPROCFS_TYPE_REGS, "dirty_pages_misses" },
- { LPROC_LL_READ_BYTES, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES,
- "read_bytes" },
- { LPROC_LL_WRITE_BYTES, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES,
- "write_bytes" },
- { LPROC_LL_BRW_READ, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_PAGES,
- "brw_read" },
- { LPROC_LL_BRW_WRITE, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_PAGES,
- "brw_write" },
- { LPROC_LL_IOCTL, LPROCFS_TYPE_REGS, "ioctl" },
- { LPROC_LL_OPEN, LPROCFS_TYPE_REGS, "open" },
- { LPROC_LL_RELEASE, LPROCFS_TYPE_REGS, "close" },
- { LPROC_LL_MAP, LPROCFS_TYPE_REGS, "mmap" },
- { LPROC_LL_LLSEEK, LPROCFS_TYPE_REGS, "seek" },
- { LPROC_LL_FSYNC, LPROCFS_TYPE_REGS, "fsync" },
- { LPROC_LL_READDIR, LPROCFS_TYPE_REGS, "readdir" },
- /* inode operation */
- { LPROC_LL_SETATTR, LPROCFS_TYPE_REGS, "setattr" },
- { LPROC_LL_TRUNC, LPROCFS_TYPE_REGS, "truncate" },
- { LPROC_LL_FLOCK, LPROCFS_TYPE_REGS, "flock" },
- { LPROC_LL_GETATTR, LPROCFS_TYPE_REGS, "getattr" },
- /* dir inode operation */
- { LPROC_LL_CREATE, LPROCFS_TYPE_REGS, "create" },
- { LPROC_LL_LINK, LPROCFS_TYPE_REGS, "link" },
- { LPROC_LL_UNLINK, LPROCFS_TYPE_REGS, "unlink" },
- { LPROC_LL_SYMLINK, LPROCFS_TYPE_REGS, "symlink" },
- { LPROC_LL_MKDIR, LPROCFS_TYPE_REGS, "mkdir" },
- { LPROC_LL_RMDIR, LPROCFS_TYPE_REGS, "rmdir" },
- { LPROC_LL_MKNOD, LPROCFS_TYPE_REGS, "mknod" },
- { LPROC_LL_RENAME, LPROCFS_TYPE_REGS, "rename" },
- /* special inode operation */
- { LPROC_LL_STAFS, LPROCFS_TYPE_REGS, "statfs" },
- { LPROC_LL_ALLOC_INODE, LPROCFS_TYPE_REGS, "alloc_inode" },
- { LPROC_LL_SETXATTR, LPROCFS_TYPE_REGS, "setxattr" },
- { LPROC_LL_GETXATTR, LPROCFS_TYPE_REGS, "getxattr" },
- { LPROC_LL_GETXATTR_HITS, LPROCFS_TYPE_REGS, "getxattr_hits" },
- { LPROC_LL_LISTXATTR, LPROCFS_TYPE_REGS, "listxattr" },
- { LPROC_LL_REMOVEXATTR, LPROCFS_TYPE_REGS, "removexattr" },
- { LPROC_LL_INODE_PERM, LPROCFS_TYPE_REGS, "inode_permission" },
-};
-
-void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count)
-{
- if (!sbi->ll_stats)
- return;
- if (sbi->ll_stats_track_type == STATS_TRACK_ALL)
- lprocfs_counter_add(sbi->ll_stats, op, count);
- else if (sbi->ll_stats_track_type == STATS_TRACK_PID &&
- sbi->ll_stats_track_id == current->pid)
- lprocfs_counter_add(sbi->ll_stats, op, count);
- else if (sbi->ll_stats_track_type == STATS_TRACK_PPID &&
- sbi->ll_stats_track_id == current->real_parent->pid)
- lprocfs_counter_add(sbi->ll_stats, op, count);
- else if (sbi->ll_stats_track_type == STATS_TRACK_GID &&
- sbi->ll_stats_track_id ==
- from_kgid(&init_user_ns, current_gid()))
- lprocfs_counter_add(sbi->ll_stats, op, count);
-}
-EXPORT_SYMBOL(ll_stats_ops_tally);
-
-static const char *ra_stat_string[] = {
- [RA_STAT_HIT] = "hits",
- [RA_STAT_MISS] = "misses",
- [RA_STAT_DISTANT_READPAGE] = "readpage not consecutive",
- [RA_STAT_MISS_IN_WINDOW] = "miss inside window",
- [RA_STAT_FAILED_GRAB_PAGE] = "failed grab_cache_page",
- [RA_STAT_FAILED_MATCH] = "failed lock match",
- [RA_STAT_DISCARDED] = "read but discarded",
- [RA_STAT_ZERO_LEN] = "zero length file",
- [RA_STAT_ZERO_WINDOW] = "zero size window",
- [RA_STAT_EOF] = "read-ahead to EOF",
- [RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue",
- [RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page",
- [RA_STAT_FAILED_REACH_END] = "failed to reach end"
-};
-
-int ldebugfs_register_mountpoint(struct dentry *parent,
- struct super_block *sb, char *osc, char *mdc)
-{
- struct lustre_sb_info *lsi = s2lsi(sb);
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct obd_device *obd;
- struct dentry *dir;
- char name[MAX_STRING_SIZE + 1], *ptr;
- int err, id, len;
-
- name[MAX_STRING_SIZE] = '\0';
-
- LASSERT(sbi);
- LASSERT(mdc);
- LASSERT(osc);
-
- /* Get fsname */
- len = strlen(lsi->lsi_lmd->lmd_profile);
- ptr = strrchr(lsi->lsi_lmd->lmd_profile, '-');
- if (ptr && (strcmp(ptr, "-client") == 0))
- len -= 7;
-
- /* Mount info */
- snprintf(name, MAX_STRING_SIZE, "%.*s-%p", len,
- lsi->lsi_lmd->lmd_profile, sb);
-
- dir = debugfs_create_dir(name, parent);
- sbi->ll_debugfs_entry = dir;
-
- debugfs_create_file("dump_page_cache", 0444, dir, sbi,
- &vvp_dump_pgcache_file_ops);
- debugfs_create_file("extents_stats", 0644, dir, sbi,
- &ll_rw_extents_stats_fops);
- debugfs_create_file("extents_stats_per_process", 0644,
- dir, sbi, &ll_rw_extents_stats_pp_fops);
- debugfs_create_file("offset_stats", 0644, dir, sbi,
- &ll_rw_offset_stats_fops);
-
- /* File operations stats */
- sbi->ll_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES,
- LPROCFS_STATS_FLAG_NONE);
- if (!sbi->ll_stats) {
- err = -ENOMEM;
- goto out;
- }
- /* do counter init */
- for (id = 0; id < LPROC_LL_FILE_OPCODES; id++) {
- __u32 type = llite_opcode_table[id].type;
- void *ptr = NULL;
-
- if (type & LPROCFS_TYPE_REGS)
- ptr = "regs";
- else if (type & LPROCFS_TYPE_BYTES)
- ptr = "bytes";
- else if (type & LPROCFS_TYPE_PAGES)
- ptr = "pages";
- lprocfs_counter_init(sbi->ll_stats,
- llite_opcode_table[id].opcode,
- (type & LPROCFS_CNTR_AVGMINMAX),
- llite_opcode_table[id].opname, ptr);
- }
-
- debugfs_create_file("stats", 0644, sbi->ll_debugfs_entry, sbi->ll_stats,
- &lprocfs_stats_seq_fops);
-
- sbi->ll_ra_stats = lprocfs_alloc_stats(ARRAY_SIZE(ra_stat_string),
- LPROCFS_STATS_FLAG_NONE);
- if (!sbi->ll_ra_stats) {
- err = -ENOMEM;
- goto out;
- }
-
- for (id = 0; id < ARRAY_SIZE(ra_stat_string); id++)
- lprocfs_counter_init(sbi->ll_ra_stats, id, 0,
- ra_stat_string[id], "pages");
-
- debugfs_create_file("stats", 0644, sbi->ll_debugfs_entry,
- sbi->ll_ra_stats, &lprocfs_stats_seq_fops);
-
- ldebugfs_add_vars(sbi->ll_debugfs_entry, lprocfs_llite_obd_vars, sb);
-
- sbi->ll_kobj.kset = llite_kset;
- init_completion(&sbi->ll_kobj_unregister);
- err = kobject_init_and_add(&sbi->ll_kobj, &llite_ktype, NULL,
- "%s", name);
- if (err)
- goto out;
-
- /* MDC info */
- obd = class_name2obd(mdc);
-
- err = sysfs_create_link(&sbi->ll_kobj, &obd->obd_kobj,
- obd->obd_type->typ_name);
- if (err)
- goto out;
-
- /* OSC */
- obd = class_name2obd(osc);
-
- err = sysfs_create_link(&sbi->ll_kobj, &obd->obd_kobj,
- obd->obd_type->typ_name);
-out:
- if (err) {
- debugfs_remove_recursive(sbi->ll_debugfs_entry);
- lprocfs_free_stats(&sbi->ll_ra_stats);
- lprocfs_free_stats(&sbi->ll_stats);
- }
- return err;
-}
-
-void ldebugfs_unregister_mountpoint(struct ll_sb_info *sbi)
-{
- debugfs_remove_recursive(sbi->ll_debugfs_entry);
- kobject_put(&sbi->ll_kobj);
- wait_for_completion(&sbi->ll_kobj_unregister);
- lprocfs_free_stats(&sbi->ll_ra_stats);
- lprocfs_free_stats(&sbi->ll_stats);
-}
-
-#undef MAX_STRING_SIZE
-
-#define pct(a, b) (b ? a * 100 / b : 0)
-
-static void ll_display_extents_info(struct ll_rw_extents_info *io_extents,
- struct seq_file *seq, int which)
-{
- unsigned long read_tot = 0, write_tot = 0, read_cum, write_cum;
- unsigned long start, end, r, w;
- char *unitp = "KMGTPEZY";
- int i, units = 10;
- struct per_process_info *pp_info = &io_extents->pp_extents[which];
-
- read_cum = 0;
- write_cum = 0;
- start = 0;
-
- for (i = 0; i < LL_HIST_MAX; i++) {
- read_tot += pp_info->pp_r_hist.oh_buckets[i];
- write_tot += pp_info->pp_w_hist.oh_buckets[i];
- }
-
- for (i = 0; i < LL_HIST_MAX; i++) {
- r = pp_info->pp_r_hist.oh_buckets[i];
- w = pp_info->pp_w_hist.oh_buckets[i];
- read_cum += r;
- write_cum += w;
- end = 1 << (i + LL_HIST_START - units);
- seq_printf(seq, "%4lu%c - %4lu%c%c: %14lu %4lu %4lu | %14lu %4lu %4lu\n",
- start, *unitp, end, *unitp,
- (i == LL_HIST_MAX - 1) ? '+' : ' ',
- r, pct(r, read_tot), pct(read_cum, read_tot),
- w, pct(w, write_tot), pct(write_cum, write_tot));
- start = end;
- if (start == 1024) {
- start = 1;
- units += 10;
- unitp++;
- }
- if (read_cum == read_tot && write_cum == write_tot)
- break;
- }
-}
-
-static int ll_rw_extents_stats_pp_seq_show(struct seq_file *seq, void *v)
-{
- struct timespec64 now;
- struct ll_sb_info *sbi = seq->private;
- struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
- int k;
-
- ktime_get_real_ts64(&now);
-
- if (!sbi->ll_rw_stats_on) {
- seq_printf(seq, "disabled\n"
- "write anything in this file to activate, then 0 or \"[D/d]isabled\" to deactivate\n");
- return 0;
- }
- seq_printf(seq, "snapshot_time: %llu.%09lu (secs.usecs)\n",
- (s64)now.tv_sec, (unsigned long)now.tv_nsec);
- seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write");
- seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n",
- "extents", "calls", "%", "cum%",
- "calls", "%", "cum%");
- spin_lock(&sbi->ll_pp_extent_lock);
- for (k = 0; k < LL_PROCESS_HIST_MAX; k++) {
- if (io_extents->pp_extents[k].pid != 0) {
- seq_printf(seq, "\nPID: %d\n",
- io_extents->pp_extents[k].pid);
- ll_display_extents_info(io_extents, seq, k);
- }
- }
- spin_unlock(&sbi->ll_pp_extent_lock);
- return 0;
-}
-
-static ssize_t ll_rw_extents_stats_pp_seq_write(struct file *file,
- const char __user *buf,
- size_t len,
- loff_t *off)
-{
- struct seq_file *seq = file->private_data;
- struct ll_sb_info *sbi = seq->private;
- struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
- int i;
- int value = 1, rc = 0;
-
- if (len == 0)
- return -EINVAL;
-
- rc = lprocfs_write_helper(buf, len, &value);
- if (rc < 0 && len < 16) {
- char kernbuf[16];
-
- if (copy_from_user(kernbuf, buf, len))
- return -EFAULT;
- kernbuf[len] = 0;
-
- if (kernbuf[len - 1] == '\n')
- kernbuf[len - 1] = 0;
-
- if (strcmp(kernbuf, "disabled") == 0 ||
- strcmp(kernbuf, "Disabled") == 0)
- value = 0;
- }
-
- if (value == 0)
- sbi->ll_rw_stats_on = 0;
- else
- sbi->ll_rw_stats_on = 1;
-
- spin_lock(&sbi->ll_pp_extent_lock);
- for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
- io_extents->pp_extents[i].pid = 0;
- lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist);
- lprocfs_oh_clear(&io_extents->pp_extents[i].pp_w_hist);
- }
- spin_unlock(&sbi->ll_pp_extent_lock);
- return len;
-}
-
-LPROC_SEQ_FOPS(ll_rw_extents_stats_pp);
-
-static int ll_rw_extents_stats_seq_show(struct seq_file *seq, void *v)
-{
- struct timespec64 now;
- struct ll_sb_info *sbi = seq->private;
- struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
-
- ktime_get_real_ts64(&now);
-
- if (!sbi->ll_rw_stats_on) {
- seq_printf(seq, "disabled\n"
- "write anything in this file to activate, then 0 or \"[D/d]isabled\" to deactivate\n");
- return 0;
- }
- seq_printf(seq, "snapshot_time: %llu.%09lu (secs.usecs)\n",
- (u64)now.tv_sec, (unsigned long)now.tv_nsec);
-
- seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write");
- seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n",
- "extents", "calls", "%", "cum%",
- "calls", "%", "cum%");
- spin_lock(&sbi->ll_lock);
- ll_display_extents_info(io_extents, seq, LL_PROCESS_HIST_MAX);
- spin_unlock(&sbi->ll_lock);
-
- return 0;
-}
-
-static ssize_t ll_rw_extents_stats_seq_write(struct file *file,
- const char __user *buf,
- size_t len, loff_t *off)
-{
- struct seq_file *seq = file->private_data;
- struct ll_sb_info *sbi = seq->private;
- struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
- int i;
- int value = 1, rc = 0;
-
- if (len == 0)
- return -EINVAL;
-
- rc = lprocfs_write_helper(buf, len, &value);
- if (rc < 0 && len < 16) {
- char kernbuf[16];
-
- if (copy_from_user(kernbuf, buf, len))
- return -EFAULT;
- kernbuf[len] = 0;
-
- if (kernbuf[len - 1] == '\n')
- kernbuf[len - 1] = 0;
-
- if (strcmp(kernbuf, "disabled") == 0 ||
- strcmp(kernbuf, "Disabled") == 0)
- value = 0;
- }
-
- if (value == 0)
- sbi->ll_rw_stats_on = 0;
- else
- sbi->ll_rw_stats_on = 1;
-
- spin_lock(&sbi->ll_pp_extent_lock);
- for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) {
- io_extents->pp_extents[i].pid = 0;
- lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist);
- lprocfs_oh_clear(&io_extents->pp_extents[i].pp_w_hist);
- }
- spin_unlock(&sbi->ll_pp_extent_lock);
-
- return len;
-}
-
-LPROC_SEQ_FOPS(ll_rw_extents_stats);
-
-void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid,
- struct ll_file_data *file, loff_t pos,
- size_t count, int rw)
-{
- int i, cur = -1;
- struct ll_rw_process_info *process;
- struct ll_rw_process_info *offset;
- int *off_count = &sbi->ll_rw_offset_entry_count;
- int *process_count = &sbi->ll_offset_process_count;
- struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
-
- if (!sbi->ll_rw_stats_on)
- return;
- process = sbi->ll_rw_process_info;
- offset = sbi->ll_rw_offset_info;
-
- spin_lock(&sbi->ll_pp_extent_lock);
- /* Extent statistics */
- for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
- if (io_extents->pp_extents[i].pid == pid) {
- cur = i;
- break;
- }
- }
-
- if (cur == -1) {
- /* new process */
- sbi->ll_extent_process_count =
- (sbi->ll_extent_process_count + 1) % LL_PROCESS_HIST_MAX;
- cur = sbi->ll_extent_process_count;
- io_extents->pp_extents[cur].pid = pid;
- lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_r_hist);
- lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_w_hist);
- }
-
- for (i = 0; (count >= (1 << LL_HIST_START << i)) &&
- (i < (LL_HIST_MAX - 1)); i++)
- ;
- if (rw == 0) {
- io_extents->pp_extents[cur].pp_r_hist.oh_buckets[i]++;
- io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist.oh_buckets[i]++;
- } else {
- io_extents->pp_extents[cur].pp_w_hist.oh_buckets[i]++;
- io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist.oh_buckets[i]++;
- }
- spin_unlock(&sbi->ll_pp_extent_lock);
-
- spin_lock(&sbi->ll_process_lock);
- /* Offset statistics */
- for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
- if (process[i].rw_pid == pid) {
- if (process[i].rw_last_file != file) {
- process[i].rw_range_start = pos;
- process[i].rw_last_file_pos = pos + count;
- process[i].rw_smallest_extent = count;
- process[i].rw_largest_extent = count;
- process[i].rw_offset = 0;
- process[i].rw_last_file = file;
- spin_unlock(&sbi->ll_process_lock);
- return;
- }
- if (process[i].rw_last_file_pos != pos) {
- *off_count =
- (*off_count + 1) % LL_OFFSET_HIST_MAX;
- offset[*off_count].rw_op = process[i].rw_op;
- offset[*off_count].rw_pid = pid;
- offset[*off_count].rw_range_start =
- process[i].rw_range_start;
- offset[*off_count].rw_range_end =
- process[i].rw_last_file_pos;
- offset[*off_count].rw_smallest_extent =
- process[i].rw_smallest_extent;
- offset[*off_count].rw_largest_extent =
- process[i].rw_largest_extent;
- offset[*off_count].rw_offset =
- process[i].rw_offset;
- process[i].rw_op = rw;
- process[i].rw_range_start = pos;
- process[i].rw_smallest_extent = count;
- process[i].rw_largest_extent = count;
- process[i].rw_offset = pos -
- process[i].rw_last_file_pos;
- }
- if (process[i].rw_smallest_extent > count)
- process[i].rw_smallest_extent = count;
- if (process[i].rw_largest_extent < count)
- process[i].rw_largest_extent = count;
- process[i].rw_last_file_pos = pos + count;
- spin_unlock(&sbi->ll_process_lock);
- return;
- }
- }
- *process_count = (*process_count + 1) % LL_PROCESS_HIST_MAX;
- process[*process_count].rw_pid = pid;
- process[*process_count].rw_op = rw;
- process[*process_count].rw_range_start = pos;
- process[*process_count].rw_last_file_pos = pos + count;
- process[*process_count].rw_smallest_extent = count;
- process[*process_count].rw_largest_extent = count;
- process[*process_count].rw_offset = 0;
- process[*process_count].rw_last_file = file;
- spin_unlock(&sbi->ll_process_lock);
-}
-
-static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v)
-{
- struct timespec64 now;
- struct ll_sb_info *sbi = seq->private;
- struct ll_rw_process_info *offset = sbi->ll_rw_offset_info;
- struct ll_rw_process_info *process = sbi->ll_rw_process_info;
- int i;
-
- ktime_get_real_ts64(&now);
-
- if (!sbi->ll_rw_stats_on) {
- seq_printf(seq, "disabled\n"
- "write anything in this file to activate, then 0 or \"[D/d]isabled\" to deactivate\n");
- return 0;
- }
- spin_lock(&sbi->ll_process_lock);
-
- seq_printf(seq, "snapshot_time: %llu.%09lu (secs.usecs)\n",
- (s64)now.tv_sec, (unsigned long)now.tv_nsec);
- seq_printf(seq, "%3s %10s %14s %14s %17s %17s %14s\n",
- "R/W", "PID", "RANGE START", "RANGE END",
- "SMALLEST EXTENT", "LARGEST EXTENT", "OFFSET");
- /* We stored the discontiguous offsets here; print them first */
- for (i = 0; i < LL_OFFSET_HIST_MAX; i++) {
- if (offset[i].rw_pid != 0)
- seq_printf(seq,
- "%3c %10d %14Lu %14Lu %17lu %17lu %14Lu",
- offset[i].rw_op == READ ? 'R' : 'W',
- offset[i].rw_pid,
- offset[i].rw_range_start,
- offset[i].rw_range_end,
- (unsigned long)offset[i].rw_smallest_extent,
- (unsigned long)offset[i].rw_largest_extent,
- offset[i].rw_offset);
- }
- /* Then print the current offsets for each process */
- for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
- if (process[i].rw_pid != 0)
- seq_printf(seq,
- "%3c %10d %14Lu %14Lu %17lu %17lu %14Lu",
- process[i].rw_op == READ ? 'R' : 'W',
- process[i].rw_pid,
- process[i].rw_range_start,
- process[i].rw_last_file_pos,
- (unsigned long)process[i].rw_smallest_extent,
- (unsigned long)process[i].rw_largest_extent,
- process[i].rw_offset);
- }
- spin_unlock(&sbi->ll_process_lock);
-
- return 0;
-}
-
-static ssize_t ll_rw_offset_stats_seq_write(struct file *file,
- const char __user *buf,
- size_t len, loff_t *off)
-{
- struct seq_file *seq = file->private_data;
- struct ll_sb_info *sbi = seq->private;
- struct ll_rw_process_info *process_info = sbi->ll_rw_process_info;
- struct ll_rw_process_info *offset_info = sbi->ll_rw_offset_info;
- int value = 1, rc = 0;
-
- if (len == 0)
- return -EINVAL;
-
- rc = lprocfs_write_helper(buf, len, &value);
-
- if (rc < 0 && len < 16) {
- char kernbuf[16];
-
- if (copy_from_user(kernbuf, buf, len))
- return -EFAULT;
- kernbuf[len] = 0;
-
- if (kernbuf[len - 1] == '\n')
- kernbuf[len - 1] = 0;
-
- if (strcmp(kernbuf, "disabled") == 0 ||
- strcmp(kernbuf, "Disabled") == 0)
- value = 0;
- }
-
- if (value == 0)
- sbi->ll_rw_stats_on = 0;
- else
- sbi->ll_rw_stats_on = 1;
-
- spin_lock(&sbi->ll_process_lock);
- sbi->ll_offset_process_count = 0;
- sbi->ll_rw_offset_entry_count = 0;
- memset(process_info, 0, sizeof(struct ll_rw_process_info) *
- LL_PROCESS_HIST_MAX);
- memset(offset_info, 0, sizeof(struct ll_rw_process_info) *
- LL_OFFSET_HIST_MAX);
- spin_unlock(&sbi->ll_process_lock);
-
- return len;
-}
-
-LPROC_SEQ_FOPS(ll_rw_offset_stats);
-
-void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars)
-{
- lvars->obd_vars = lprocfs_llite_obd_vars;
-}
diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
deleted file mode 100644
index d5f6d20afe8c..000000000000
--- a/drivers/staging/lustre/lustre/llite/namei.c
+++ /dev/null
@@ -1,1207 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/quotaops.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
-#include <linux/security.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd_support.h>
-#include <lustre_fid.h>
-#include <lustre_dlm.h>
-#include "llite_internal.h"
-
-static int ll_create_it(struct inode *dir, struct dentry *dentry,
- struct lookup_intent *it);
-
-/* called from iget5_locked->find_inode() under inode_hash_lock spinlock */
-static int ll_test_inode(struct inode *inode, void *opaque)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct lustre_md *md = opaque;
-
- if (unlikely(!(md->body->mbo_valid & OBD_MD_FLID))) {
- CERROR("MDS body missing FID\n");
- return 0;
- }
-
- if (!lu_fid_eq(&lli->lli_fid, &md->body->mbo_fid1))
- return 0;
-
- return 1;
-}
-
-static int ll_set_inode(struct inode *inode, void *opaque)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct mdt_body *body = ((struct lustre_md *)opaque)->body;
-
- if (unlikely(!(body->mbo_valid & OBD_MD_FLID))) {
- CERROR("MDS body missing FID\n");
- return -EINVAL;
- }
-
- lli->lli_fid = body->mbo_fid1;
- if (unlikely(!(body->mbo_valid & OBD_MD_FLTYPE))) {
- CERROR("Can not initialize inode " DFID
- " without object type: valid = %#llx\n",
- PFID(&lli->lli_fid), body->mbo_valid);
- return -EINVAL;
- }
-
- inode->i_mode = (inode->i_mode & ~S_IFMT) | (body->mbo_mode & S_IFMT);
- if (unlikely(inode->i_mode == 0)) {
- CERROR("Invalid inode " DFID " type\n", PFID(&lli->lli_fid));
- return -EINVAL;
- }
-
- ll_lli_init(lli);
-
- return 0;
-}
-
-/**
- * Get an inode by inode number(@hash), which is already instantiated by
- * the intent lookup).
- */
-struct inode *ll_iget(struct super_block *sb, ino_t hash,
- struct lustre_md *md)
-{
- struct inode *inode;
- int rc = 0;
-
- LASSERT(hash != 0);
- inode = iget5_locked(sb, hash, ll_test_inode, ll_set_inode, md);
- if (!inode)
- return ERR_PTR(-ENOMEM);
-
- if (inode->i_state & I_NEW) {
- rc = ll_read_inode2(inode, md);
- if (!rc && S_ISREG(inode->i_mode) &&
- !ll_i2info(inode)->lli_clob)
- rc = cl_file_inode_init(inode, md);
-
- if (rc) {
- /*
- * Let's clear directory lsm here, otherwise
- * make_bad_inode() will reset the inode mode
- * to regular, then ll_clear_inode will not
- * be able to clear lsm_md
- */
- if (S_ISDIR(inode->i_mode))
- ll_dir_clear_lsm_md(inode);
- make_bad_inode(inode);
- unlock_new_inode(inode);
- iput(inode);
- inode = ERR_PTR(rc);
- } else {
- unlock_new_inode(inode);
- }
- } else if (!(inode->i_state & (I_FREEING | I_CLEAR))) {
- rc = ll_update_inode(inode, md);
- CDEBUG(D_VFSTRACE, "got inode: " DFID "(%p): rc = %d\n",
- PFID(&md->body->mbo_fid1), inode, rc);
- if (rc) {
- if (S_ISDIR(inode->i_mode))
- ll_dir_clear_lsm_md(inode);
- iput(inode);
- inode = ERR_PTR(rc);
- }
- }
- return inode;
-}
-
-static void ll_invalidate_negative_children(struct inode *dir)
-{
- struct dentry *dentry, *tmp_subdir;
-
- spin_lock(&dir->i_lock);
- hlist_for_each_entry(dentry, &dir->i_dentry, d_u.d_alias) {
- spin_lock(&dentry->d_lock);
- if (!list_empty(&dentry->d_subdirs)) {
- struct dentry *child;
-
- list_for_each_entry_safe(child, tmp_subdir,
- &dentry->d_subdirs,
- d_child) {
- if (d_really_is_negative(child))
- d_lustre_invalidate(child, 1);
- }
- }
- spin_unlock(&dentry->d_lock);
- }
- spin_unlock(&dir->i_lock);
-}
-
-int ll_test_inode_by_fid(struct inode *inode, void *opaque)
-{
- return lu_fid_eq(&ll_i2info(inode)->lli_fid, opaque);
-}
-
-int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
- void *data, int flag)
-{
- struct lustre_handle lockh;
- int rc;
-
- switch (flag) {
- case LDLM_CB_BLOCKING:
- ldlm_lock2handle(lock, &lockh);
- rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
- if (rc < 0) {
- CDEBUG(D_INODE, "ldlm_cli_cancel: rc = %d\n", rc);
- return rc;
- }
- break;
- case LDLM_CB_CANCELING: {
- struct inode *inode = ll_inode_from_resource_lock(lock);
- __u64 bits = lock->l_policy_data.l_inodebits.bits;
-
- /* Inode is set to lock->l_resource->lr_lvb_inode
- * for mdc - bug 24555
- */
- LASSERT(!lock->l_ast_data);
-
- if (!inode)
- break;
-
- /* Invalidate all dentries associated with this inode */
- LASSERT(ldlm_is_canceling(lock));
-
- if (!fid_res_name_eq(ll_inode2fid(inode),
- &lock->l_resource->lr_name)) {
- LDLM_ERROR(lock,
- "data mismatch with object " DFID "(%p)",
- PFID(ll_inode2fid(inode)), inode);
- LBUG();
- }
-
- if (bits & MDS_INODELOCK_XATTR) {
- if (S_ISDIR(inode->i_mode))
- ll_i2info(inode)->lli_def_stripe_offset = -1;
- ll_xattr_cache_destroy(inode);
- bits &= ~MDS_INODELOCK_XATTR;
- }
-
- /* For OPEN locks we differentiate between lock modes
- * LCK_CR, LCK_CW, LCK_PR - bug 22891
- */
- if (bits & MDS_INODELOCK_OPEN)
- ll_have_md_lock(inode, &bits, lock->l_req_mode);
-
- if (bits & MDS_INODELOCK_OPEN) {
- fmode_t fmode;
-
- switch (lock->l_req_mode) {
- case LCK_CW:
- fmode = FMODE_WRITE;
- break;
- case LCK_PR:
- fmode = FMODE_EXEC;
- break;
- case LCK_CR:
- fmode = FMODE_READ;
- break;
- default:
- LDLM_ERROR(lock, "bad lock mode for OPEN lock");
- LBUG();
- }
-
- ll_md_real_close(inode, fmode);
- }
-
- if (bits & (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE |
- MDS_INODELOCK_LAYOUT | MDS_INODELOCK_PERM))
- ll_have_md_lock(inode, &bits, LCK_MINMODE);
-
- if (bits & MDS_INODELOCK_LAYOUT) {
- struct cl_object_conf conf = {
- .coc_opc = OBJECT_CONF_INVALIDATE,
- .coc_inode = inode,
- };
-
- rc = ll_layout_conf(inode, &conf);
- if (rc < 0)
- CDEBUG(D_INODE, "cannot invalidate layout of "
- DFID ": rc = %d\n",
- PFID(ll_inode2fid(inode)), rc);
- }
-
- if (bits & MDS_INODELOCK_UPDATE) {
- struct ll_inode_info *lli = ll_i2info(inode);
-
- spin_lock(&lli->lli_lock);
- LTIME_S(inode->i_mtime) = 0;
- LTIME_S(inode->i_atime) = 0;
- LTIME_S(inode->i_ctime) = 0;
- spin_unlock(&lli->lli_lock);
- }
-
- if ((bits & MDS_INODELOCK_UPDATE) && S_ISDIR(inode->i_mode)) {
- struct ll_inode_info *lli = ll_i2info(inode);
-
- CDEBUG(D_INODE, "invalidating inode " DFID " lli = %p, pfid = " DFID "\n",
- PFID(ll_inode2fid(inode)), lli,
- PFID(&lli->lli_pfid));
-
- truncate_inode_pages(inode->i_mapping, 0);
-
- if (unlikely(!fid_is_zero(&lli->lli_pfid))) {
- struct inode *master_inode = NULL;
- unsigned long hash;
-
- /*
- * This is slave inode, since all of the child
- * dentry is connected on the master inode, so
- * we have to invalidate the negative children
- * on master inode
- */
- CDEBUG(D_INODE,
- "Invalidate s" DFID " m" DFID "\n",
- PFID(ll_inode2fid(inode)),
- PFID(&lli->lli_pfid));
-
- hash = cl_fid_build_ino(&lli->lli_pfid,
- ll_need_32bit_api(ll_i2sbi(inode)));
- /*
- * Do not lookup the inode with ilookup5,
- * otherwise it will cause dead lock,
- *
- * 1. Client1 send chmod req to the MDT0, then
- * on MDT0, it enqueues master and all of its
- * slaves lock, (mdt_attr_set() ->
- * mdt_lock_slaves()), after gets master and
- * stripe0 lock, it will send the enqueue req
- * (for stripe1) to MDT1, then MDT1 finds the
- * lock has been granted to client2. Then MDT1
- * sends blocking ast to client2.
- *
- * 2. At the same time, client2 tries to unlink
- * the striped dir (rm -rf striped_dir), and
- * during lookup, it will hold the master inode
- * of the striped directory, whose inode state
- * is NEW, then tries to revalidate all of its
- * slaves, (ll_prep_inode()->ll_iget()->
- * ll_read_inode2()-> ll_update_inode().). And
- * it will be blocked on the server side because
- * of 1.
- *
- * 3. Then the client get the blocking_ast req,
- * cancel the lock, but being blocked if using
- * ->ilookup5()), because master inode state is
- * NEW.
- */
- master_inode = ilookup5_nowait(inode->i_sb,
- hash,
- ll_test_inode_by_fid,
- (void *)&lli->lli_pfid);
- if (master_inode) {
- ll_invalidate_negative_children(master_inode);
- iput(master_inode);
- }
- } else {
- ll_invalidate_negative_children(inode);
- }
- }
-
- if ((bits & (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_PERM)) &&
- inode->i_sb->s_root &&
- !is_root_inode(inode))
- ll_invalidate_aliases(inode);
-
- iput(inode);
- break;
- }
- default:
- LBUG();
- }
-
- return 0;
-}
-
-__u32 ll_i2suppgid(struct inode *i)
-{
- if (in_group_p(i->i_gid))
- return (__u32)from_kgid(&init_user_ns, i->i_gid);
- else
- return (__u32)(-1);
-}
-
-/* Pack the required supplementary groups into the supplied groups array.
- * If we don't need to use the groups from the target inode(s) then we
- * instead pack one or more groups from the user's supplementary group
- * array in case it might be useful. Not needed if doing an MDS-side upcall.
- */
-void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2)
-{
- LASSERT(i1);
-
- suppgids[0] = ll_i2suppgid(i1);
-
- if (i2)
- suppgids[1] = ll_i2suppgid(i2);
- else
- suppgids[1] = -1;
-}
-
-/*
- * Try to reuse unhashed or invalidated dentries.
- * This is very similar to d_exact_alias(), and any changes in one should be
- * considered for inclusion in the other. The differences are that we don't
- * need an unhashed alias, and we don't want d_compare to be used for
- * comparison.
- */
-static struct dentry *ll_find_alias(struct inode *inode, struct dentry *dentry)
-{
- struct dentry *alias;
-
- if (hlist_empty(&inode->i_dentry))
- return NULL;
-
- spin_lock(&inode->i_lock);
- hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
- LASSERT(alias != dentry);
- /*
- * Don't need alias->d_lock here, because aliases with
- * d_parent == entry->d_parent are not subject to name or
- * parent changes, because the parent inode i_mutex is held.
- */
-
- if (alias->d_parent != dentry->d_parent)
- continue;
- if (alias->d_name.hash != dentry->d_name.hash)
- continue;
- if (alias->d_name.len != dentry->d_name.len ||
- memcmp(alias->d_name.name, dentry->d_name.name,
- dentry->d_name.len) != 0)
- continue;
- spin_lock(&alias->d_lock);
- dget_dlock(alias);
- spin_unlock(&alias->d_lock);
- spin_unlock(&inode->i_lock);
- return alias;
- }
- spin_unlock(&inode->i_lock);
-
- return NULL;
-}
-
-/*
- * Similar to d_splice_alias(), but lustre treats invalid alias
- * similar to DCACHE_DISCONNECTED, and tries to use it anyway.
- */
-struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de)
-{
- if (inode && !S_ISDIR(inode->i_mode)) {
- struct dentry *new = ll_find_alias(inode, de);
-
- if (new) {
- d_move(new, de);
- iput(inode);
- CDEBUG(D_DENTRY,
- "Reuse dentry %p inode %p refc %d flags %#x\n",
- new, d_inode(new), d_count(new), new->d_flags);
- return new;
- }
- d_add(de, inode);
- } else {
- struct dentry *new = d_splice_alias(inode, de);
-
- if (IS_ERR(new))
- CDEBUG(D_DENTRY,
- "splice inode %p as %pd gives error %lu\n",
- inode, de, PTR_ERR(new));
- if (new)
- de = new;
- }
- if (!IS_ERR(de))
- CDEBUG(D_DENTRY, "Add dentry %p inode %p refc %d flags %#x\n",
- de, d_inode(de), d_count(de), de->d_flags);
- return de;
-}
-
-static int ll_lookup_it_finish(struct ptlrpc_request *request,
- struct lookup_intent *it,
- struct inode *parent, struct dentry **de)
-{
- struct inode *inode = NULL;
- __u64 bits = 0;
- int rc = 0;
- struct dentry *alias;
-
- /* NB 1 request reference will be taken away by ll_intent_lock()
- * when I return
- */
- CDEBUG(D_DENTRY, "it %p it_disposition %x\n", it,
- it->it_disposition);
- if (!it_disposition(it, DISP_LOOKUP_NEG)) {
- rc = ll_prep_inode(&inode, request, (*de)->d_sb, it);
- if (rc)
- return rc;
-
- ll_set_lock_data(ll_i2sbi(parent)->ll_md_exp, inode, it, &bits);
-
- /* We used to query real size from OSTs here, but actually
- * this is not needed. For stat() calls size would be updated
- * from subsequent do_revalidate()->ll_inode_revalidate_it() in
- * 2.4 and
- * vfs_getattr_it->ll_getattr()->ll_inode_revalidate_it() in 2.6
- * Everybody else who needs correct file size would call
- * ll_glimpse_size or some equivalent themselves anyway.
- * Also see bug 7198.
- */
- }
-
- alias = ll_splice_alias(inode, *de);
- if (IS_ERR(alias)) {
- rc = PTR_ERR(alias);
- goto out;
- }
- *de = alias;
-
- if (!it_disposition(it, DISP_LOOKUP_NEG)) {
- /* We have the "lookup" lock, so unhide dentry */
- if (bits & MDS_INODELOCK_LOOKUP)
- d_lustre_revalidate(*de);
- } else if (!it_disposition(it, DISP_OPEN_CREATE)) {
- /* If file created on server, don't depend on parent UPDATE
- * lock to unhide it. It is left hidden and next lookup can
- * find it in ll_splice_alias.
- */
- /* Check that parent has UPDATE lock. */
- struct lookup_intent parent_it = {
- .it_op = IT_GETATTR,
- .it_lock_handle = 0 };
- struct lu_fid fid = ll_i2info(parent)->lli_fid;
-
- /* If it is striped directory, get the real stripe parent */
- if (unlikely(ll_i2info(parent)->lli_lsm_md)) {
- rc = md_get_fid_from_lsm(ll_i2mdexp(parent),
- ll_i2info(parent)->lli_lsm_md,
- (*de)->d_name.name,
- (*de)->d_name.len, &fid);
- if (rc)
- return rc;
- }
-
- if (md_revalidate_lock(ll_i2mdexp(parent), &parent_it, &fid,
- NULL)) {
- d_lustre_revalidate(*de);
- ll_intent_release(&parent_it);
- }
- }
-
-out:
- if (rc != 0 && it->it_op & IT_OPEN)
- ll_open_cleanup((*de)->d_sb, request);
-
- return rc;
-}
-
-static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
- struct lookup_intent *it)
-{
- struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
- struct dentry *save = dentry, *retval;
- struct ptlrpc_request *req = NULL;
- struct md_op_data *op_data = NULL;
- struct inode *inode;
- __u32 opc;
- int rc;
-
- if (dentry->d_name.len > ll_i2sbi(parent)->ll_namelen)
- return ERR_PTR(-ENAMETOOLONG);
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p),intent=%s\n",
- dentry, PFID(ll_inode2fid(parent)), parent, LL_IT2STR(it));
-
- if (d_mountpoint(dentry))
- CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it));
-
- if (!it || it->it_op == IT_GETXATTR)
- it = &lookup_it;
-
- if (it->it_op == IT_GETATTR && dentry_may_statahead(parent, dentry)) {
- rc = ll_statahead(parent, &dentry, 0);
- if (rc == 1) {
- if (dentry == save)
- retval = NULL;
- else
- retval = dentry;
- goto out;
- }
- }
-
- if (it->it_op & IT_OPEN && it->it_flags & FMODE_WRITE && sb_rdonly(dentry->d_sb))
- return ERR_PTR(-EROFS);
-
- if (it->it_op & IT_CREAT)
- opc = LUSTRE_OPC_CREATE;
- else
- opc = LUSTRE_OPC_ANY;
-
- op_data = ll_prep_md_op_data(NULL, parent, NULL, dentry->d_name.name,
- dentry->d_name.len, 0, opc, NULL);
- if (IS_ERR(op_data))
- return (void *)op_data;
-
- /* enforce umask if acl disabled or MDS doesn't support umask */
- if (!IS_POSIXACL(parent) || !exp_connect_umask(ll_i2mdexp(parent)))
- it->it_create_mode &= ~current_umask();
-
- rc = md_intent_lock(ll_i2mdexp(parent), op_data, it, &req,
- &ll_md_blocking_ast, 0);
- /*
- * If the MDS allows the client to chgrp (CFS_SETGRP_PERM), but the
- * client does not know which suppgid should be sent to the MDS, or
- * some other(s) changed the target file's GID after this RPC sent
- * to the MDS with the suppgid as the original GID, then we should
- * try again with right suppgid.
- */
- if (rc == -EACCES && it->it_op & IT_OPEN &&
- it_disposition(it, DISP_OPEN_DENY)) {
- struct mdt_body *body;
-
- LASSERT(req);
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (op_data->op_suppgids[0] == body->mbo_gid ||
- op_data->op_suppgids[1] == body->mbo_gid ||
- !in_group_p(make_kgid(&init_user_ns, body->mbo_gid))) {
- retval = ERR_PTR(-EACCES);
- goto out;
- }
-
- fid_zero(&op_data->op_fid2);
- op_data->op_suppgids[1] = body->mbo_gid;
- ptlrpc_req_finished(req);
- req = NULL;
- ll_intent_release(it);
- rc = md_intent_lock(ll_i2mdexp(parent), op_data, it, &req,
- ll_md_blocking_ast, 0);
- }
-
- if (rc < 0) {
- retval = ERR_PTR(rc);
- goto out;
- }
-
- rc = ll_lookup_it_finish(req, it, parent, &dentry);
- if (rc != 0) {
- ll_intent_release(it);
- retval = ERR_PTR(rc);
- goto out;
- }
-
- inode = d_inode(dentry);
- if ((it->it_op & IT_OPEN) && inode &&
- !S_ISREG(inode->i_mode) &&
- !S_ISDIR(inode->i_mode)) {
- ll_release_openhandle(inode, it);
- }
- ll_lookup_finish_locks(it, inode);
-
- if (dentry == save)
- retval = NULL;
- else
- retval = dentry;
-out:
- if (op_data && !IS_ERR(op_data))
- ll_finish_md_op_data(op_data);
-
- ptlrpc_req_finished(req);
- return retval;
-}
-
-static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry,
- unsigned int flags)
-{
- struct lookup_intent *itp, it = { .it_op = IT_GETATTR };
- struct dentry *de;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p),flags=%u\n",
- dentry, PFID(ll_inode2fid(parent)), parent, flags);
-
- /* Optimize away (CREATE && !OPEN). Let .create handle the race.
- * but only if we have write permissions there, otherwise we need
- * to proceed with lookup. LU-4185
- */
- if ((flags & LOOKUP_CREATE) && !(flags & LOOKUP_OPEN) &&
- (inode_permission(parent, MAY_WRITE | MAY_EXEC) == 0))
- return NULL;
-
- if (flags & (LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE))
- itp = NULL;
- else
- itp = &it;
- de = ll_lookup_it(parent, dentry, itp);
-
- if (itp)
- ll_intent_release(itp);
-
- return de;
-}
-
-/*
- * For cached negative dentry and new dentry, handle lookup/create/open
- * together.
- */
-static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
- struct file *file, unsigned int open_flags,
- umode_t mode, int *opened)
-{
- struct lookup_intent *it;
- struct dentry *de;
- int rc = 0;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p),file %p,open_flags %x,mode %x opened %d\n",
- dentry, PFID(ll_inode2fid(dir)), dir, file, open_flags, mode,
- *opened);
-
- /* Only negative dentries enter here */
- LASSERT(!d_inode(dentry));
-
- if (!d_in_lookup(dentry)) {
- /* A valid negative dentry that just passed revalidation,
- * there's little point to try and open it server-side,
- * even though there's a minuscle chance it might succeed.
- * Either way it's a valid race to just return -ENOENT here.
- */
- if (!(open_flags & O_CREAT))
- return -ENOENT;
-
- /* Otherwise we just unhash it to be rehashed afresh via
- * lookup if necessary
- */
- d_drop(dentry);
- }
-
- it = kzalloc(sizeof(*it), GFP_NOFS);
- if (!it)
- return -ENOMEM;
-
- it->it_op = IT_OPEN;
- if (open_flags & O_CREAT)
- it->it_op |= IT_CREAT;
- it->it_create_mode = (mode & S_IALLUGO) | S_IFREG;
- it->it_flags = (open_flags & ~O_ACCMODE) | OPEN_FMODE(open_flags);
- it->it_flags &= ~MDS_OPEN_FL_INTERNAL;
-
- /* Dentry added to dcache tree in ll_lookup_it */
- de = ll_lookup_it(dir, dentry, it);
- if (IS_ERR(de))
- rc = PTR_ERR(de);
- else if (de)
- dentry = de;
-
- if (!rc) {
- if (it_disposition(it, DISP_OPEN_CREATE)) {
- /* Dentry instantiated in ll_create_it. */
- rc = ll_create_it(dir, dentry, it);
- if (rc) {
- /* We dget in ll_splice_alias. */
- if (de)
- dput(de);
- goto out_release;
- }
-
- *opened |= FILE_CREATED;
- }
- if (d_really_is_positive(dentry) &&
- it_disposition(it, DISP_OPEN_OPEN)) {
- /* Open dentry. */
- if (S_ISFIFO(d_inode(dentry)->i_mode)) {
- /* We cannot call open here as it might
- * deadlock. This case is unreachable in
- * practice because of OBD_CONNECT_NODEVOH.
- */
- rc = finish_no_open(file, de);
- } else {
- file->private_data = it;
- rc = finish_open(file, dentry, NULL, opened);
- /* We dget in ll_splice_alias. finish_open takes
- * care of dget for fd open.
- */
- if (de)
- dput(de);
- }
- } else {
- rc = finish_no_open(file, de);
- }
- }
-
-out_release:
- ll_intent_release(it);
- kfree(it);
-
- return rc;
-}
-
-/* We depend on "mode" being set with the proper file type/umask by now */
-static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it)
-{
- struct inode *inode = NULL;
- struct ptlrpc_request *request = NULL;
- struct ll_sb_info *sbi = ll_i2sbi(dir);
- int rc;
-
- LASSERT(it && it->it_disposition);
-
- LASSERT(it_disposition(it, DISP_ENQ_CREATE_REF));
- request = it->it_request;
- it_clear_disposition(it, DISP_ENQ_CREATE_REF);
- rc = ll_prep_inode(&inode, request, dir->i_sb, it);
- if (rc) {
- inode = ERR_PTR(rc);
- goto out;
- }
-
- LASSERT(hlist_empty(&inode->i_dentry));
-
- /* We asked for a lock on the directory, but were granted a
- * lock on the inode. Since we finally have an inode pointer,
- * stuff it in the lock.
- */
- CDEBUG(D_DLMTRACE, "setting l_ast_data to inode " DFID "(%p)\n",
- PFID(ll_inode2fid(dir)), inode);
- ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL);
- out:
- ptlrpc_req_finished(request);
- return inode;
-}
-
-/*
- * By the time this is called, we already have created the directory cache
- * entry for the new file, but it is so far negative - it has no inode.
- *
- * We defer creating the OBD object(s) until open, to keep the intent and
- * non-intent code paths similar, and also because we do not have the MDS
- * inode number before calling ll_create_node() (which is needed for LOV),
- * so we would need to do yet another RPC to the MDS to store the LOV EA
- * data on the MDS. If needed, we would pass the PACKED lmm as data and
- * lmm_size in datalen (the MDS still has code which will handle that).
- *
- * If the create succeeds, we fill in the inode information
- * with d_instantiate().
- */
-static int ll_create_it(struct inode *dir, struct dentry *dentry,
- struct lookup_intent *it)
-{
- struct inode *inode;
- int rc = 0;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p), intent=%s\n",
- dentry, PFID(ll_inode2fid(dir)), dir, LL_IT2STR(it));
-
- rc = it_open_error(DISP_OPEN_CREATE, it);
- if (rc)
- return rc;
-
- inode = ll_create_node(dir, it);
- if (IS_ERR(inode))
- return PTR_ERR(inode);
-
- d_instantiate(dentry, inode);
-
- return ll_init_security(dentry, inode, dir);
-}
-
-void ll_update_times(struct ptlrpc_request *request, struct inode *inode)
-{
- struct mdt_body *body = req_capsule_server_get(&request->rq_pill,
- &RMF_MDT_BODY);
-
- LASSERT(body);
- if (body->mbo_valid & OBD_MD_FLMTIME &&
- body->mbo_mtime > LTIME_S(inode->i_mtime)) {
- CDEBUG(D_INODE, "setting fid " DFID " mtime from %lu to %llu\n",
- PFID(ll_inode2fid(inode)), LTIME_S(inode->i_mtime),
- body->mbo_mtime);
- LTIME_S(inode->i_mtime) = body->mbo_mtime;
- }
- if (body->mbo_valid & OBD_MD_FLCTIME &&
- body->mbo_ctime > LTIME_S(inode->i_ctime))
- LTIME_S(inode->i_ctime) = body->mbo_ctime;
-}
-
-static int ll_new_node(struct inode *dir, struct dentry *dentry,
- const char *tgt, umode_t mode, int rdev,
- __u32 opc)
-{
- struct ptlrpc_request *request = NULL;
- struct md_op_data *op_data;
- struct inode *inode = NULL;
- struct ll_sb_info *sbi = ll_i2sbi(dir);
- int tgt_len = 0;
- int err;
-
- if (unlikely(tgt))
- tgt_len = strlen(tgt) + 1;
-again:
- op_data = ll_prep_md_op_data(NULL, dir, NULL,
- dentry->d_name.name,
- dentry->d_name.len,
- 0, opc, NULL);
- if (IS_ERR(op_data)) {
- err = PTR_ERR(op_data);
- goto err_exit;
- }
-
- err = md_create(sbi->ll_md_exp, op_data, tgt, tgt_len, mode,
- from_kuid(&init_user_ns, current_fsuid()),
- from_kgid(&init_user_ns, current_fsgid()),
- current_cap(), rdev, &request);
- ll_finish_md_op_data(op_data);
- if (err < 0 && err != -EREMOTE)
- goto err_exit;
-
- /*
- * If the client doesn't know where to create a subdirectory (or
- * in case of a race that sends the RPC to the wrong MDS), the
- * MDS will return -EREMOTE and the client will fetch the layout
- * of the directory, then create the directory on the right MDT.
- */
- if (unlikely(err == -EREMOTE)) {
- struct ll_inode_info *lli = ll_i2info(dir);
- struct lmv_user_md *lum;
- int lumsize, err2;
-
- ptlrpc_req_finished(request);
- request = NULL;
-
- err2 = ll_dir_getstripe(dir, (void **)&lum, &lumsize, &request,
- OBD_MD_DEFAULT_MEA);
- if (!err2) {
- /* Update stripe_offset and retry */
- lli->lli_def_stripe_offset = lum->lum_stripe_offset;
- } else if (err2 == -ENODATA &&
- lli->lli_def_stripe_offset != -1) {
- /*
- * If there are no default stripe EA on the MDT, but the
- * client has default stripe, then it probably means
- * default stripe EA has just been deleted.
- */
- lli->lli_def_stripe_offset = -1;
- } else {
- goto err_exit;
- }
-
- ptlrpc_req_finished(request);
- request = NULL;
- goto again;
- }
-
- ll_update_times(request, dir);
-
- err = ll_prep_inode(&inode, request, dir->i_sb, NULL);
- if (err)
- goto err_exit;
-
- d_instantiate(dentry, inode);
-
- err = ll_init_security(dentry, inode, dir);
-err_exit:
- if (request)
- ptlrpc_req_finished(request);
-
- return err;
-}
-
-static int ll_mknod(struct inode *dir, struct dentry *dchild,
- umode_t mode, dev_t rdev)
-{
- int err;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p) mode %o dev %x\n",
- dchild, PFID(ll_inode2fid(dir)), dir, mode,
- old_encode_dev(rdev));
-
- if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir)))
- mode &= ~current_umask();
-
- switch (mode & S_IFMT) {
- case 0:
- mode |= S_IFREG;
- /* for mode = 0 case */
- /* fall through */
- case S_IFREG:
- case S_IFCHR:
- case S_IFBLK:
- case S_IFIFO:
- case S_IFSOCK:
- err = ll_new_node(dir, dchild, NULL, mode,
- old_encode_dev(rdev),
- LUSTRE_OPC_MKNOD);
- break;
- case S_IFDIR:
- err = -EPERM;
- break;
- default:
- err = -EINVAL;
- }
-
- if (!err)
- ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_MKNOD, 1);
-
- return err;
-}
-
-/*
- * Plain create. Intent create is handled in atomic_open.
- */
-static int ll_create_nd(struct inode *dir, struct dentry *dentry,
- umode_t mode, bool want_excl)
-{
- int rc;
-
- CDEBUG(D_VFSTRACE,
- "VFS Op:name=%pd, dir=" DFID "(%p), flags=%u, excl=%d\n",
- dentry, PFID(ll_inode2fid(dir)), dir, mode, want_excl);
-
- rc = ll_mknod(dir, dentry, mode, 0);
-
- ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_CREATE, 1);
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, unhashed %d\n",
- dentry, d_unhashed(dentry));
-
- return rc;
-}
-
-static int ll_unlink(struct inode *dir, struct dentry *dchild)
-{
- struct ptlrpc_request *request = NULL;
- struct md_op_data *op_data;
- int rc;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n",
- dchild, dir->i_ino, dir->i_generation, dir);
-
- op_data = ll_prep_md_op_data(NULL, dir, NULL,
- dchild->d_name.name,
- dchild->d_name.len,
- 0, LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- if (dchild->d_inode)
- op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
-
- op_data->op_fid2 = op_data->op_fid3;
- rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
- ll_finish_md_op_data(op_data);
- if (rc)
- goto out;
-
- ll_update_times(request, dir);
- ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_UNLINK, 1);
-
- out:
- ptlrpc_req_finished(request);
- return rc;
-}
-
-static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
-{
- int err;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir" DFID "(%p)\n",
- dentry, PFID(ll_inode2fid(dir)), dir);
-
- if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir)))
- mode &= ~current_umask();
- mode = (mode & (0777 | S_ISVTX)) | S_IFDIR;
-
- err = ll_new_node(dir, dentry, NULL, mode, 0, LUSTRE_OPC_MKDIR);
- if (!err)
- ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_MKDIR, 1);
-
- return err;
-}
-
-static int ll_rmdir(struct inode *dir, struct dentry *dchild)
-{
- struct ptlrpc_request *request = NULL;
- struct md_op_data *op_data;
- int rc;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p)\n",
- dchild, PFID(ll_inode2fid(dir)), dir);
-
- op_data = ll_prep_md_op_data(NULL, dir, NULL,
- dchild->d_name.name,
- dchild->d_name.len,
- S_IFDIR, LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- if (dchild->d_inode)
- op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
-
- op_data->op_fid2 = op_data->op_fid3;
- rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
- ll_finish_md_op_data(op_data);
- if (rc == 0) {
- ll_update_times(request, dir);
- ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_RMDIR, 1);
- }
-
- ptlrpc_req_finished(request);
- return rc;
-}
-
-static int ll_symlink(struct inode *dir, struct dentry *dentry,
- const char *oldname)
-{
- int err;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p),target=%.*s\n",
- dentry, PFID(ll_inode2fid(dir)), dir, 3000, oldname);
-
- err = ll_new_node(dir, dentry, oldname, S_IFLNK | 0777,
- 0, LUSTRE_OPC_SYMLINK);
-
- if (!err)
- ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_SYMLINK, 1);
-
- return err;
-}
-
-static int ll_link(struct dentry *old_dentry, struct inode *dir,
- struct dentry *new_dentry)
-{
- struct inode *src = d_inode(old_dentry);
- struct ll_sb_info *sbi = ll_i2sbi(dir);
- struct ptlrpc_request *request = NULL;
- struct md_op_data *op_data;
- int err;
-
- CDEBUG(D_VFSTRACE,
- "VFS Op: inode=" DFID "(%p), dir=" DFID "(%p), target=%pd\n",
- PFID(ll_inode2fid(src)), src, PFID(ll_inode2fid(dir)), dir,
- new_dentry);
-
- op_data = ll_prep_md_op_data(NULL, src, dir, new_dentry->d_name.name,
- new_dentry->d_name.len,
- 0, LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- err = md_link(sbi->ll_md_exp, op_data, &request);
- ll_finish_md_op_data(op_data);
- if (err)
- goto out;
-
- ll_update_times(request, dir);
- ll_stats_ops_tally(sbi, LPROC_LL_LINK, 1);
-out:
- ptlrpc_req_finished(request);
- return err;
-}
-
-static int ll_rename(struct inode *src, struct dentry *src_dchild,
- struct inode *tgt, struct dentry *tgt_dchild,
- unsigned int flags)
-{
- struct ptlrpc_request *request = NULL;
- struct ll_sb_info *sbi = ll_i2sbi(src);
- struct md_op_data *op_data;
- int err;
-
- if (flags)
- return -EINVAL;
-
- CDEBUG(D_VFSTRACE,
- "VFS Op:oldname=%pd, src_dir=" DFID "(%p), newname=%pd, tgt_dir=" DFID "(%p)\n",
- src_dchild, PFID(ll_inode2fid(src)), src,
- tgt_dchild, PFID(ll_inode2fid(tgt)), tgt);
-
- op_data = ll_prep_md_op_data(NULL, src, tgt, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- if (src_dchild->d_inode)
- op_data->op_fid3 = *ll_inode2fid(src_dchild->d_inode);
- if (tgt_dchild->d_inode)
- op_data->op_fid4 = *ll_inode2fid(tgt_dchild->d_inode);
-
- err = md_rename(sbi->ll_md_exp, op_data,
- src_dchild->d_name.name,
- src_dchild->d_name.len,
- tgt_dchild->d_name.name,
- tgt_dchild->d_name.len, &request);
- ll_finish_md_op_data(op_data);
- if (!err) {
- ll_update_times(request, src);
- ll_update_times(request, tgt);
- ll_stats_ops_tally(sbi, LPROC_LL_RENAME, 1);
- }
-
- ptlrpc_req_finished(request);
- if (!err)
- d_move(src_dchild, tgt_dchild);
- return err;
-}
-
-const struct inode_operations ll_dir_inode_operations = {
- .mknod = ll_mknod,
- .atomic_open = ll_atomic_open,
- .lookup = ll_lookup_nd,
- .create = ll_create_nd,
- /* We need all these non-raw things for NFSD, to not patch it. */
- .unlink = ll_unlink,
- .mkdir = ll_mkdir,
- .rmdir = ll_rmdir,
- .symlink = ll_symlink,
- .link = ll_link,
- .rename = ll_rename,
- .setattr = ll_setattr,
- .getattr = ll_getattr,
- .permission = ll_inode_permission,
- .listxattr = ll_listxattr,
- .get_acl = ll_get_acl,
-};
-
-const struct inode_operations ll_special_inode_operations = {
- .setattr = ll_setattr,
- .getattr = ll_getattr,
- .permission = ll_inode_permission,
- .listxattr = ll_listxattr,
- .get_acl = ll_get_acl,
-};
diff --git a/drivers/staging/lustre/lustre/llite/range_lock.c b/drivers/staging/lustre/lustre/llite/range_lock.c
deleted file mode 100644
index 008a8874118d..000000000000
--- a/drivers/staging/lustre/lustre/llite/range_lock.c
+++ /dev/null
@@ -1,241 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Range lock is used to allow multiple threads writing a single shared
- * file given each thread is writing to a non-overlapping portion of the
- * file.
- *
- * Refer to the possible upstream kernel version of range lock by
- * Jan Kara <jack@suse.cz>: https://lkml.org/lkml/2013/1/31/480
- *
- * This file could later replaced by the upstream kernel version.
- */
-/*
- * Author: Prakash Surya <surya1@llnl.gov>
- * Author: Bobi Jam <bobijam.xu@intel.com>
- */
-#include "range_lock.h"
-#include <uapi/linux/lustre/lustre_idl.h>
-#include <linux/libcfs/libcfs.h>
-
-/**
- * Initialize a range lock tree
- *
- * \param tree [in] an empty range lock tree
- *
- * Pre: Caller should have allocated the range lock tree.
- * Post: The range lock tree is ready to function.
- */
-void range_lock_tree_init(struct range_lock_tree *tree)
-{
- tree->rlt_root = NULL;
- tree->rlt_sequence = 0;
- spin_lock_init(&tree->rlt_lock);
-}
-
-/**
- * Initialize a range lock node
- *
- * \param lock [in] an empty range lock node
- * \param start [in] start of the covering region
- * \param end [in] end of the covering region
- *
- * Pre: Caller should have allocated the range lock node.
- * Post: The range lock node is meant to cover [start, end] region
- */
-int range_lock_init(struct range_lock *lock, __u64 start, __u64 end)
-{
- int rc;
-
- memset(&lock->rl_node, 0, sizeof(lock->rl_node));
- if (end != LUSTRE_EOF)
- end >>= PAGE_SHIFT;
- rc = interval_set(&lock->rl_node, start >> PAGE_SHIFT, end);
- if (rc)
- return rc;
-
- INIT_LIST_HEAD(&lock->rl_next_lock);
- lock->rl_task = NULL;
- lock->rl_lock_count = 0;
- lock->rl_blocking_ranges = 0;
- lock->rl_sequence = 0;
- return rc;
-}
-
-static inline struct range_lock *next_lock(struct range_lock *lock)
-{
- return list_entry(lock->rl_next_lock.next, typeof(*lock), rl_next_lock);
-}
-
-/**
- * Helper function of range_unlock()
- *
- * \param node [in] a range lock found overlapped during interval node
- * search
- * \param arg [in] the range lock to be tested
- *
- * \retval INTERVAL_ITER_CONT indicate to continue the search for next
- * overlapping range node
- * \retval INTERVAL_ITER_STOP indicate to stop the search
- */
-static enum interval_iter range_unlock_cb(struct interval_node *node, void *arg)
-{
- struct range_lock *lock = arg;
- struct range_lock *overlap = node2rangelock(node);
- struct range_lock *iter;
-
- list_for_each_entry(iter, &overlap->rl_next_lock, rl_next_lock) {
- if (iter->rl_sequence > lock->rl_sequence) {
- --iter->rl_blocking_ranges;
- LASSERT(iter->rl_blocking_ranges > 0);
- }
- }
- if (overlap->rl_sequence > lock->rl_sequence) {
- --overlap->rl_blocking_ranges;
- if (overlap->rl_blocking_ranges == 0)
- wake_up_process(overlap->rl_task);
- }
- return INTERVAL_ITER_CONT;
-}
-
-/**
- * Unlock a range lock, wake up locks blocked by this lock.
- *
- * \param tree [in] range lock tree
- * \param lock [in] range lock to be deleted
- *
- * If this lock has been granted, relase it; if not, just delete it from
- * the tree or the same region lock list. Wake up those locks only blocked
- * by this lock through range_unlock_cb().
- */
-void range_unlock(struct range_lock_tree *tree, struct range_lock *lock)
-{
- spin_lock(&tree->rlt_lock);
- if (!list_empty(&lock->rl_next_lock)) {
- struct range_lock *next;
-
- if (interval_is_intree(&lock->rl_node)) { /* first lock */
- /* Insert the next same range lock into the tree */
- next = next_lock(lock);
- next->rl_lock_count = lock->rl_lock_count - 1;
- interval_erase(&lock->rl_node, &tree->rlt_root);
- interval_insert(&next->rl_node, &tree->rlt_root);
- } else {
- /* find the first lock in tree */
- list_for_each_entry(next, &lock->rl_next_lock,
- rl_next_lock) {
- if (!interval_is_intree(&next->rl_node))
- continue;
-
- LASSERT(next->rl_lock_count > 0);
- next->rl_lock_count--;
- break;
- }
- }
- list_del_init(&lock->rl_next_lock);
- } else {
- LASSERT(interval_is_intree(&lock->rl_node));
- interval_erase(&lock->rl_node, &tree->rlt_root);
- }
-
- interval_search(tree->rlt_root, &lock->rl_node.in_extent,
- range_unlock_cb, lock);
- spin_unlock(&tree->rlt_lock);
-}
-
-/**
- * Helper function of range_lock()
- *
- * \param node [in] a range lock found overlapped during interval node
- * search
- * \param arg [in] the range lock to be tested
- *
- * \retval INTERVAL_ITER_CONT indicate to continue the search for next
- * overlapping range node
- * \retval INTERVAL_ITER_STOP indicate to stop the search
- */
-static enum interval_iter range_lock_cb(struct interval_node *node, void *arg)
-{
- struct range_lock *lock = arg;
- struct range_lock *overlap = node2rangelock(node);
-
- lock->rl_blocking_ranges += overlap->rl_lock_count + 1;
- return INTERVAL_ITER_CONT;
-}
-
-/**
- * Lock a region
- *
- * \param tree [in] range lock tree
- * \param lock [in] range lock node containing the region span
- *
- * \retval 0 get the range lock
- * \retval <0 error code while not getting the range lock
- *
- * If there exists overlapping range lock, the new lock will wait and
- * retry, if later it find that it is not the chosen one to wake up,
- * it wait again.
- */
-int range_lock(struct range_lock_tree *tree, struct range_lock *lock)
-{
- struct interval_node *node;
- int rc = 0;
-
- spin_lock(&tree->rlt_lock);
- /*
- * We need to check for all conflicting intervals
- * already in the tree.
- */
- interval_search(tree->rlt_root, &lock->rl_node.in_extent,
- range_lock_cb, lock);
- /*
- * Insert to the tree if I am unique, otherwise I've been linked to
- * the rl_next_lock of another lock which has the same range as mine
- * in range_lock_cb().
- */
- node = interval_insert(&lock->rl_node, &tree->rlt_root);
- if (node) {
- struct range_lock *tmp = node2rangelock(node);
-
- list_add_tail(&lock->rl_next_lock, &tmp->rl_next_lock);
- tmp->rl_lock_count++;
- }
- lock->rl_sequence = ++tree->rlt_sequence;
-
- while (lock->rl_blocking_ranges > 0) {
- lock->rl_task = current;
- __set_current_state(TASK_INTERRUPTIBLE);
- spin_unlock(&tree->rlt_lock);
- schedule();
-
- if (signal_pending(current)) {
- range_unlock(tree, lock);
- rc = -EINTR;
- goto out;
- }
- spin_lock(&tree->rlt_lock);
- }
- spin_unlock(&tree->rlt_lock);
-out:
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/llite/range_lock.h b/drivers/staging/lustre/lustre/llite/range_lock.h
deleted file mode 100644
index 9ebac09160f2..000000000000
--- a/drivers/staging/lustre/lustre/llite/range_lock.h
+++ /dev/null
@@ -1,83 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Range lock is used to allow multiple threads writing a single shared
- * file given each thread is writing to a non-overlapping portion of the
- * file.
- *
- * Refer to the possible upstream kernel version of range lock by
- * Jan Kara <jack@suse.cz>: https://lkml.org/lkml/2013/1/31/480
- *
- * This file could later replaced by the upstream kernel version.
- */
-/*
- * Author: Prakash Surya <surya1@llnl.gov>
- * Author: Bobi Jam <bobijam.xu@intel.com>
- */
-#ifndef _RANGE_LOCK_H
-#define _RANGE_LOCK_H
-
-#include <linux/spinlock.h>
-#include <interval_tree.h>
-
-struct range_lock {
- struct interval_node rl_node;
- /**
- * Process to enqueue this lock.
- */
- struct task_struct *rl_task;
- /**
- * List of locks with the same range.
- */
- struct list_head rl_next_lock;
- /**
- * Number of locks in the list rl_next_lock
- */
- unsigned int rl_lock_count;
- /**
- * Number of ranges which are blocking acquisition of the lock
- */
- unsigned int rl_blocking_ranges;
- /**
- * Sequence number of range lock. This number is used to get to know
- * the order the locks are queued; this is required for range_cancel().
- */
- __u64 rl_sequence;
-};
-
-static inline struct range_lock *node2rangelock(const struct interval_node *n)
-{
- return container_of(n, struct range_lock, rl_node);
-}
-
-struct range_lock_tree {
- struct interval_node *rlt_root;
- spinlock_t rlt_lock; /* protect range lock tree */
- __u64 rlt_sequence;
-};
-
-void range_lock_tree_init(struct range_lock_tree *tree);
-int range_lock_init(struct range_lock *lock, __u64 start, __u64 end);
-int range_lock(struct range_lock_tree *tree, struct range_lock *lock);
-void range_unlock(struct range_lock_tree *tree, struct range_lock *lock);
-#endif
diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
deleted file mode 100644
index 3e008ce7275d..000000000000
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ /dev/null
@@ -1,1214 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/llite/rw.c
- *
- * Lustre Lite I/O page cache routines shared by different kernel revs
- */
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <linux/writeback.h>
-#include <linux/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/pagemap.h>
-/* current_is_kswapd() */
-#include <linux/swap.h>
-#include <linux/bvec.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd_cksum.h>
-#include "llite_internal.h"
-
-static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);
-
-/**
- * Get readahead pages from the filesystem readahead pool of the client for a
- * thread.
- *
- * /param sbi superblock for filesystem readahead state ll_ra_info
- * /param ria per-thread readahead state
- * /param pages number of pages requested for readahead for the thread.
- *
- * WARNING: This algorithm is used to reduce contention on sbi->ll_lock.
- * It should work well if the ra_max_pages is much greater than the single
- * file's read-ahead window, and not too many threads contending for
- * these readahead pages.
- *
- * TODO: There may be a 'global sync problem' if many threads are trying
- * to get an ra budget that is larger than the remaining readahead pages
- * and reach here at exactly the same time. They will compute /a ret to
- * consume the remaining pages, but will fail at atomic_add_return() and
- * get a zero ra window, although there is still ra space remaining. - Jay
- */
-static unsigned long ll_ra_count_get(struct ll_sb_info *sbi,
- struct ra_io_arg *ria,
- unsigned long pages, unsigned long min)
-{
- struct ll_ra_info *ra = &sbi->ll_ra_info;
- long ret;
-
- /* If read-ahead pages left are less than 1M, do not do read-ahead,
- * otherwise it will form small read RPC(< 1M), which hurt server
- * performance a lot.
- */
- ret = min(ra->ra_max_pages - atomic_read(&ra->ra_cur_pages), pages);
- if (ret < 0 || ret < min_t(long, PTLRPC_MAX_BRW_PAGES, pages)) {
- ret = 0;
- goto out;
- }
-
- if (atomic_add_return(ret, &ra->ra_cur_pages) > ra->ra_max_pages) {
- atomic_sub(ret, &ra->ra_cur_pages);
- ret = 0;
- }
-
-out:
- if (ret < min) {
- /* override ra limit for maximum performance */
- atomic_add(min - ret, &ra->ra_cur_pages);
- ret = min;
- }
- return ret;
-}
-
-void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len)
-{
- struct ll_ra_info *ra = &sbi->ll_ra_info;
-
- atomic_sub(len, &ra->ra_cur_pages);
-}
-
-static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which)
-{
- LASSERTF(which < _NR_RA_STAT, "which: %u\n", which);
- lprocfs_counter_incr(sbi->ll_ra_stats, which);
-}
-
-void ll_ra_stats_inc(struct inode *inode, enum ra_stat which)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
-
- ll_ra_stats_inc_sbi(sbi, which);
-}
-
-#define RAS_CDEBUG(ras) \
- CDEBUG(D_READA, \
- "lrp %lu cr %lu cp %lu ws %lu wl %lu nra %lu rpc %lu " \
- "r %lu ri %lu csr %lu sf %lu sp %lu sl %lu\n", \
- ras->ras_last_readpage, ras->ras_consecutive_requests, \
- ras->ras_consecutive_pages, ras->ras_window_start, \
- ras->ras_window_len, ras->ras_next_readahead, \
- ras->ras_rpc_size, \
- ras->ras_requests, ras->ras_request_index, \
- ras->ras_consecutive_stride_requests, ras->ras_stride_offset, \
- ras->ras_stride_pages, ras->ras_stride_length)
-
-static int index_in_window(unsigned long index, unsigned long point,
- unsigned long before, unsigned long after)
-{
- unsigned long start = point - before, end = point + after;
-
- if (start > point)
- start = 0;
- if (end < point)
- end = ~0;
-
- return start <= index && index <= end;
-}
-
-void ll_ras_enter(struct file *f)
-{
- struct ll_file_data *fd = LUSTRE_FPRIVATE(f);
- struct ll_readahead_state *ras = &fd->fd_ras;
-
- spin_lock(&ras->ras_lock);
- ras->ras_requests++;
- ras->ras_request_index = 0;
- ras->ras_consecutive_requests++;
- spin_unlock(&ras->ras_lock);
-}
-
-/**
- * Initiates read-ahead of a page with given index.
- *
- * \retval +ve: page was already uptodate so it will be skipped
- * from being added;
- * \retval -ve: page wasn't added to \a queue for error;
- * \retval 0: page was added into \a queue for read ahead.
- */
-static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue, pgoff_t index)
-{
- enum ra_stat which = _NR_RA_STAT; /* keep gcc happy */
- struct cl_object *clob = io->ci_obj;
- struct inode *inode = vvp_object_inode(clob);
- const char *msg = NULL;
- struct cl_page *page;
- struct vvp_page *vpg;
- struct page *vmpage;
- int rc = 0;
-
- vmpage = grab_cache_page_nowait(inode->i_mapping, index);
- if (!vmpage) {
- which = RA_STAT_FAILED_GRAB_PAGE;
- msg = "g_c_p_n failed";
- rc = -EBUSY;
- goto out;
- }
-
- /* Check if vmpage was truncated or reclaimed */
- if (vmpage->mapping != inode->i_mapping) {
- which = RA_STAT_WRONG_GRAB_PAGE;
- msg = "g_c_p_n returned invalid page";
- rc = -EBUSY;
- goto out;
- }
-
- page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
- if (IS_ERR(page)) {
- which = RA_STAT_FAILED_GRAB_PAGE;
- msg = "cl_page_find failed";
- rc = PTR_ERR(page);
- goto out;
- }
-
- lu_ref_add(&page->cp_reference, "ra", current);
- cl_page_assume(env, io, page);
- vpg = cl2vvp_page(cl_object_page_slice(clob, page));
- if (!vpg->vpg_defer_uptodate && !PageUptodate(vmpage)) {
- vpg->vpg_defer_uptodate = 1;
- vpg->vpg_ra_used = 0;
- cl_page_list_add(queue, page);
- } else {
- /* skip completed pages */
- cl_page_unassume(env, io, page);
- /* This page is already uptodate, returning a positive number
- * to tell the callers about this
- */
- rc = 1;
- }
-
- lu_ref_del(&page->cp_reference, "ra", current);
- cl_page_put(env, page);
-out:
- if (vmpage) {
- if (rc)
- unlock_page(vmpage);
- put_page(vmpage);
- }
- if (msg) {
- ll_ra_stats_inc(inode, which);
- CDEBUG(D_READA, "%s\n", msg);
- }
- return rc;
-}
-
-#define RIA_DEBUG(ria) \
- CDEBUG(D_READA, "rs %lu re %lu ro %lu rl %lu rp %lu\n", \
- ria->ria_start, ria->ria_end, ria->ria_stoff, ria->ria_length,\
- ria->ria_pages)
-
-static inline int stride_io_mode(struct ll_readahead_state *ras)
-{
- return ras->ras_consecutive_stride_requests > 1;
-}
-
-/* The function calculates how much pages will be read in
- * [off, off + length], in such stride IO area,
- * stride_offset = st_off, stride_length = st_len,
- * stride_pages = st_pgs
- *
- * |------------------|*****|------------------|*****|------------|*****|....
- * st_off
- * |--- st_pgs ---|
- * |----- st_len -----|
- *
- * How many pages it should read in such pattern
- * |-------------------------------------------------------------|
- * off
- * |<------ length ------->|
- *
- * = |<----->| + |-------------------------------------| + |---|
- * start_left st_pgs * i end_left
- */
-static unsigned long
-stride_pg_count(pgoff_t st_off, unsigned long st_len, unsigned long st_pgs,
- unsigned long off, unsigned long length)
-{
- __u64 start = off > st_off ? off - st_off : 0;
- __u64 end = off + length > st_off ? off + length - st_off : 0;
- unsigned long start_left = 0;
- unsigned long end_left = 0;
- unsigned long pg_count;
-
- if (st_len == 0 || length == 0 || end == 0)
- return length;
-
- start_left = do_div(start, st_len);
- if (start_left < st_pgs)
- start_left = st_pgs - start_left;
- else
- start_left = 0;
-
- end_left = do_div(end, st_len);
- if (end_left > st_pgs)
- end_left = st_pgs;
-
- CDEBUG(D_READA, "start %llu, end %llu start_left %lu end_left %lu\n",
- start, end, start_left, end_left);
-
- if (start == end)
- pg_count = end_left - (st_pgs - start_left);
- else
- pg_count = start_left + st_pgs * (end - start - 1) + end_left;
-
- CDEBUG(D_READA,
- "st_off %lu, st_len %lu st_pgs %lu off %lu length %lu pgcount %lu\n",
- st_off, st_len, st_pgs, off, length, pg_count);
-
- return pg_count;
-}
-
-static int ria_page_count(struct ra_io_arg *ria)
-{
- __u64 length = ria->ria_end >= ria->ria_start ?
- ria->ria_end - ria->ria_start + 1 : 0;
-
- return stride_pg_count(ria->ria_stoff, ria->ria_length,
- ria->ria_pages, ria->ria_start,
- length);
-}
-
-static unsigned long ras_align(struct ll_readahead_state *ras,
- unsigned long index,
- unsigned long *remainder)
-{
- unsigned long rem = index % ras->ras_rpc_size;
-
- if (remainder)
- *remainder = rem;
- return index - rem;
-}
-
-/*Check whether the index is in the defined ra-window */
-static int ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria)
-{
- /* If ria_length == ria_pages, it means non-stride I/O mode,
- * idx should always inside read-ahead window in this case
- * For stride I/O mode, just check whether the idx is inside
- * the ria_pages.
- */
- return ria->ria_length == 0 || ria->ria_length == ria->ria_pages ||
- (idx >= ria->ria_stoff && (idx - ria->ria_stoff) %
- ria->ria_length < ria->ria_pages);
-}
-
-static unsigned long
-ll_read_ahead_pages(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue, struct ll_readahead_state *ras,
- struct ra_io_arg *ria)
-{
- struct cl_read_ahead ra = { 0 };
- unsigned long ra_end = 0;
- bool stride_ria;
- pgoff_t page_idx;
- int rc;
-
- LASSERT(ria);
- RIA_DEBUG(ria);
-
- stride_ria = ria->ria_length > ria->ria_pages && ria->ria_pages > 0;
- for (page_idx = ria->ria_start;
- page_idx <= ria->ria_end && ria->ria_reserved > 0; page_idx++) {
- if (ras_inside_ra_window(page_idx, ria)) {
- if (!ra.cra_end || ra.cra_end < page_idx) {
- unsigned long end;
-
- cl_read_ahead_release(env, &ra);
-
- rc = cl_io_read_ahead(env, io, page_idx, &ra);
- if (rc < 0)
- break;
-
- CDEBUG(D_READA, "idx: %lu, ra: %lu, rpc: %lu\n",
- page_idx, ra.cra_end, ra.cra_rpc_size);
- LASSERTF(ra.cra_end >= page_idx,
- "object: %p, indcies %lu / %lu\n",
- io->ci_obj, ra.cra_end, page_idx);
- /*
- * update read ahead RPC size.
- * NB: it's racy but doesn't matter
- */
- if (ras->ras_rpc_size > ra.cra_rpc_size &&
- ra.cra_rpc_size > 0)
- ras->ras_rpc_size = ra.cra_rpc_size;
- /* trim it to align with optimal RPC size */
- end = ras_align(ras, ria->ria_end + 1, NULL);
- if (end > 0 && !ria->ria_eof)
- ria->ria_end = end - 1;
- if (ria->ria_end < ria->ria_end_min)
- ria->ria_end = ria->ria_end_min;
- if (ria->ria_end > ra.cra_end)
- ria->ria_end = ra.cra_end;
- }
-
- /* If the page is inside the read-ahead window */
- rc = ll_read_ahead_page(env, io, queue, page_idx);
- if (rc < 0)
- break;
-
- ra_end = page_idx;
- if (!rc)
- ria->ria_reserved--;
- } else if (stride_ria) {
- /* If it is not in the read-ahead window, and it is
- * read-ahead mode, then check whether it should skip
- * the stride gap
- */
- pgoff_t offset;
- /* FIXME: This assertion only is valid when it is for
- * forward read-ahead, it will be fixed when backward
- * read-ahead is implemented
- */
- LASSERTF(page_idx >= ria->ria_stoff,
- "Invalid page_idx %lu rs %lu re %lu ro %lu rl %lu rp %lu\n",
- page_idx,
- ria->ria_start, ria->ria_end, ria->ria_stoff,
- ria->ria_length, ria->ria_pages);
- offset = page_idx - ria->ria_stoff;
- offset = offset % (ria->ria_length);
- if (offset > ria->ria_pages) {
- page_idx += ria->ria_length - offset;
- CDEBUG(D_READA, "i %lu skip %lu\n", page_idx,
- ria->ria_length - offset);
- continue;
- }
- }
- }
- cl_read_ahead_release(env, &ra);
-
- return ra_end;
-}
-
-static int ll_readahead(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue,
- struct ll_readahead_state *ras, bool hit)
-{
- struct vvp_io *vio = vvp_env_io(env);
- struct ll_thread_info *lti = ll_env_info(env);
- struct cl_attr *attr = vvp_env_thread_attr(env);
- unsigned long len, mlen = 0;
- pgoff_t ra_end, start = 0, end = 0;
- struct inode *inode;
- struct ra_io_arg *ria = &lti->lti_ria;
- struct cl_object *clob;
- int ret = 0;
- __u64 kms;
-
- clob = io->ci_obj;
- inode = vvp_object_inode(clob);
-
- memset(ria, 0, sizeof(*ria));
-
- cl_object_attr_lock(clob);
- ret = cl_object_attr_get(env, clob, attr);
- cl_object_attr_unlock(clob);
-
- if (ret != 0)
- return ret;
- kms = attr->cat_kms;
- if (kms == 0) {
- ll_ra_stats_inc(inode, RA_STAT_ZERO_LEN);
- return 0;
- }
-
- spin_lock(&ras->ras_lock);
-
- /**
- * Note: other thread might rollback the ras_next_readahead,
- * if it can not get the full size of prepared pages, see the
- * end of this function. For stride read ahead, it needs to
- * make sure the offset is no less than ras_stride_offset,
- * so that stride read ahead can work correctly.
- */
- if (stride_io_mode(ras))
- start = max(ras->ras_next_readahead, ras->ras_stride_offset);
- else
- start = ras->ras_next_readahead;
-
- if (ras->ras_window_len > 0)
- end = ras->ras_window_start + ras->ras_window_len - 1;
-
- /* Enlarge the RA window to encompass the full read */
- if (vio->vui_ra_valid &&
- end < vio->vui_ra_start + vio->vui_ra_count - 1)
- end = vio->vui_ra_start + vio->vui_ra_count - 1;
-
- if (end) {
- unsigned long end_index;
-
- /* Truncate RA window to end of file */
- end_index = (unsigned long)((kms - 1) >> PAGE_SHIFT);
- if (end_index <= end) {
- end = end_index;
- ria->ria_eof = true;
- }
-
- ras->ras_next_readahead = max(end, end + 1);
- RAS_CDEBUG(ras);
- }
- ria->ria_start = start;
- ria->ria_end = end;
- /* If stride I/O mode is detected, get stride window*/
- if (stride_io_mode(ras)) {
- ria->ria_stoff = ras->ras_stride_offset;
- ria->ria_length = ras->ras_stride_length;
- ria->ria_pages = ras->ras_stride_pages;
- }
- spin_unlock(&ras->ras_lock);
-
- if (end == 0) {
- ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW);
- return 0;
- }
- len = ria_page_count(ria);
- if (len == 0) {
- ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW);
- return 0;
- }
-
- CDEBUG(D_READA, DFID ": ria: %lu/%lu, bead: %lu/%lu, hit: %d\n",
- PFID(lu_object_fid(&clob->co_lu)),
- ria->ria_start, ria->ria_end,
- vio->vui_ra_valid ? vio->vui_ra_start : 0,
- vio->vui_ra_valid ? vio->vui_ra_count : 0,
- hit);
-
- /* at least to extend the readahead window to cover current read */
- if (!hit && vio->vui_ra_valid &&
- vio->vui_ra_start + vio->vui_ra_count > ria->ria_start) {
- unsigned long remainder;
-
- /* to the end of current read window. */
- mlen = vio->vui_ra_start + vio->vui_ra_count - ria->ria_start;
- /* trim to RPC boundary */
- ras_align(ras, ria->ria_start, &remainder);
- mlen = min(mlen, ras->ras_rpc_size - remainder);
- ria->ria_end_min = ria->ria_start + mlen;
- }
-
- ria->ria_reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len, mlen);
- if (ria->ria_reserved < len)
- ll_ra_stats_inc(inode, RA_STAT_MAX_IN_FLIGHT);
-
- CDEBUG(D_READA, "reserved pages %lu/%lu/%lu, ra_cur %d, ra_max %lu\n",
- ria->ria_reserved, len, mlen,
- atomic_read(&ll_i2sbi(inode)->ll_ra_info.ra_cur_pages),
- ll_i2sbi(inode)->ll_ra_info.ra_max_pages);
-
- ra_end = ll_read_ahead_pages(env, io, queue, ras, ria);
-
- if (ria->ria_reserved)
- ll_ra_count_put(ll_i2sbi(inode), ria->ria_reserved);
-
- if (ra_end == end && ra_end == (kms >> PAGE_SHIFT))
- ll_ra_stats_inc(inode, RA_STAT_EOF);
-
- /* if we didn't get to the end of the region we reserved from
- * the ras we need to go back and update the ras so that the
- * next read-ahead tries from where we left off. we only do so
- * if the region we failed to issue read-ahead on is still ahead
- * of the app and behind the next index to start read-ahead from
- */
- CDEBUG(D_READA, "ra_end = %lu end = %lu stride end = %lu pages = %d\n",
- ra_end, end, ria->ria_end, ret);
-
- if (ra_end > 0 && ra_end != end) {
- ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END);
- spin_lock(&ras->ras_lock);
- if (ra_end <= ras->ras_next_readahead &&
- index_in_window(ra_end, ras->ras_window_start, 0,
- ras->ras_window_len)) {
- ras->ras_next_readahead = ra_end + 1;
- RAS_CDEBUG(ras);
- }
- spin_unlock(&ras->ras_lock);
- }
-
- return ret;
-}
-
-static void ras_set_start(struct inode *inode, struct ll_readahead_state *ras,
- unsigned long index)
-{
- ras->ras_window_start = ras_align(ras, index, NULL);
-}
-
-/* called with the ras_lock held or from places where it doesn't matter */
-static void ras_reset(struct inode *inode, struct ll_readahead_state *ras,
- unsigned long index)
-{
- ras->ras_last_readpage = index;
- ras->ras_consecutive_requests = 0;
- ras->ras_consecutive_pages = 0;
- ras->ras_window_len = 0;
- ras_set_start(inode, ras, index);
- ras->ras_next_readahead = max(ras->ras_window_start, index + 1);
-
- RAS_CDEBUG(ras);
-}
-
-/* called with the ras_lock held or from places where it doesn't matter */
-static void ras_stride_reset(struct ll_readahead_state *ras)
-{
- ras->ras_consecutive_stride_requests = 0;
- ras->ras_stride_length = 0;
- ras->ras_stride_pages = 0;
- RAS_CDEBUG(ras);
-}
-
-void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras)
-{
- spin_lock_init(&ras->ras_lock);
- ras->ras_rpc_size = PTLRPC_MAX_BRW_PAGES;
- ras_reset(inode, ras, 0);
- ras->ras_requests = 0;
-}
-
-/*
- * Check whether the read request is in the stride window.
- * If it is in the stride window, return 1, otherwise return 0.
- */
-static int index_in_stride_window(struct ll_readahead_state *ras,
- unsigned long index)
-{
- unsigned long stride_gap;
-
- if (ras->ras_stride_length == 0 || ras->ras_stride_pages == 0 ||
- ras->ras_stride_pages == ras->ras_stride_length)
- return 0;
-
- stride_gap = index - ras->ras_last_readpage - 1;
-
- /* If it is contiguous read */
- if (stride_gap == 0)
- return ras->ras_consecutive_pages + 1 <= ras->ras_stride_pages;
-
- /* Otherwise check the stride by itself */
- return (ras->ras_stride_length - ras->ras_stride_pages) == stride_gap &&
- ras->ras_consecutive_pages == ras->ras_stride_pages;
-}
-
-static void ras_update_stride_detector(struct ll_readahead_state *ras,
- unsigned long index)
-{
- unsigned long stride_gap = index - ras->ras_last_readpage - 1;
-
- if ((stride_gap != 0 || ras->ras_consecutive_stride_requests == 0) &&
- !stride_io_mode(ras)) {
- ras->ras_stride_pages = ras->ras_consecutive_pages;
- ras->ras_stride_length = ras->ras_consecutive_pages +
- stride_gap;
- }
- LASSERT(ras->ras_request_index == 0);
- LASSERT(ras->ras_consecutive_stride_requests == 0);
-
- if (index <= ras->ras_last_readpage) {
- /*Reset stride window for forward read*/
- ras_stride_reset(ras);
- return;
- }
-
- ras->ras_stride_pages = ras->ras_consecutive_pages;
- ras->ras_stride_length = stride_gap + ras->ras_consecutive_pages;
-
- RAS_CDEBUG(ras);
-}
-
-/* Stride Read-ahead window will be increased inc_len according to
- * stride I/O pattern
- */
-static void ras_stride_increase_window(struct ll_readahead_state *ras,
- struct ll_ra_info *ra,
- unsigned long inc_len)
-{
- unsigned long left, step, window_len;
- unsigned long stride_len;
-
- LASSERT(ras->ras_stride_length > 0);
- LASSERTF(ras->ras_window_start + ras->ras_window_len >=
- ras->ras_stride_offset,
- "window_start %lu, window_len %lu stride_offset %lu\n",
- ras->ras_window_start,
- ras->ras_window_len, ras->ras_stride_offset);
-
- stride_len = ras->ras_window_start + ras->ras_window_len -
- ras->ras_stride_offset;
-
- left = stride_len % ras->ras_stride_length;
- window_len = ras->ras_window_len - left;
-
- if (left < ras->ras_stride_pages)
- left += inc_len;
- else
- left = ras->ras_stride_pages + inc_len;
-
- LASSERT(ras->ras_stride_pages != 0);
-
- step = left / ras->ras_stride_pages;
- left %= ras->ras_stride_pages;
-
- window_len += step * ras->ras_stride_length + left;
-
- if (stride_pg_count(ras->ras_stride_offset, ras->ras_stride_length,
- ras->ras_stride_pages, ras->ras_stride_offset,
- window_len) <= ra->ra_max_pages_per_file)
- ras->ras_window_len = window_len;
-
- RAS_CDEBUG(ras);
-}
-
-static void ras_increase_window(struct inode *inode,
- struct ll_readahead_state *ras,
- struct ll_ra_info *ra)
-{
- /* The stretch of ra-window should be aligned with max rpc_size
- * but current clio architecture does not support retrieve such
- * information from lower layer. FIXME later
- */
- if (stride_io_mode(ras)) {
- ras_stride_increase_window(ras, ra, ras->ras_rpc_size);
- } else {
- unsigned long wlen;
-
- wlen = min(ras->ras_window_len + ras->ras_rpc_size,
- ra->ra_max_pages_per_file);
- ras->ras_window_len = ras_align(ras, wlen, NULL);
- }
-}
-
-static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
- struct ll_readahead_state *ras, unsigned long index,
- enum ras_update_flags flags)
-{
- struct ll_ra_info *ra = &sbi->ll_ra_info;
- int zero = 0, stride_detect = 0, ra_miss = 0;
- bool hit = flags & LL_RAS_HIT;
-
- spin_lock(&ras->ras_lock);
-
- if (!hit)
- CDEBUG(D_READA, DFID " pages at %lu miss.\n",
- PFID(ll_inode2fid(inode)), index);
-
- ll_ra_stats_inc_sbi(sbi, hit ? RA_STAT_HIT : RA_STAT_MISS);
-
- /* reset the read-ahead window in two cases. First when the app seeks
- * or reads to some other part of the file. Secondly if we get a
- * read-ahead miss that we think we've previously issued. This can
- * be a symptom of there being so many read-ahead pages that the VM is
- * reclaiming it before we get to it.
- */
- if (!index_in_window(index, ras->ras_last_readpage, 8, 8)) {
- zero = 1;
- ll_ra_stats_inc_sbi(sbi, RA_STAT_DISTANT_READPAGE);
- } else if (!hit && ras->ras_window_len &&
- index < ras->ras_next_readahead &&
- index_in_window(index, ras->ras_window_start, 0,
- ras->ras_window_len)) {
- ra_miss = 1;
- ll_ra_stats_inc_sbi(sbi, RA_STAT_MISS_IN_WINDOW);
- }
-
- /* On the second access to a file smaller than the tunable
- * ra_max_read_ahead_whole_pages trigger RA on all pages in the
- * file up to ra_max_pages_per_file. This is simply a best effort
- * and only occurs once per open file. Normal RA behavior is reverted
- * to for subsequent IO. The mmap case does not increment
- * ras_requests and thus can never trigger this behavior.
- */
- if (ras->ras_requests >= 2 && !ras->ras_request_index) {
- __u64 kms_pages;
-
- kms_pages = (i_size_read(inode) + PAGE_SIZE - 1) >>
- PAGE_SHIFT;
-
- CDEBUG(D_READA, "kmsp %llu mwp %lu mp %lu\n", kms_pages,
- ra->ra_max_read_ahead_whole_pages,
- ra->ra_max_pages_per_file);
-
- if (kms_pages &&
- kms_pages <= ra->ra_max_read_ahead_whole_pages) {
- ras->ras_window_start = 0;
- ras->ras_next_readahead = index + 1;
- ras->ras_window_len = min(ra->ra_max_pages_per_file,
- ra->ra_max_read_ahead_whole_pages);
- goto out_unlock;
- }
- }
- if (zero) {
- /* check whether it is in stride I/O mode*/
- if (!index_in_stride_window(ras, index)) {
- if (ras->ras_consecutive_stride_requests == 0 &&
- ras->ras_request_index == 0) {
- ras_update_stride_detector(ras, index);
- ras->ras_consecutive_stride_requests++;
- } else {
- ras_stride_reset(ras);
- }
- ras_reset(inode, ras, index);
- ras->ras_consecutive_pages++;
- goto out_unlock;
- } else {
- ras->ras_consecutive_pages = 0;
- ras->ras_consecutive_requests = 0;
- if (++ras->ras_consecutive_stride_requests > 1)
- stride_detect = 1;
- RAS_CDEBUG(ras);
- }
- } else {
- if (ra_miss) {
- if (index_in_stride_window(ras, index) &&
- stride_io_mode(ras)) {
- if (index != ras->ras_last_readpage + 1)
- ras->ras_consecutive_pages = 0;
- ras_reset(inode, ras, index);
-
- /* If stride-RA hit cache miss, the stride
- * detector will not be reset to avoid the
- * overhead of redetecting read-ahead mode,
- * but on the condition that the stride window
- * is still intersect with normal sequential
- * read-ahead window.
- */
- if (ras->ras_window_start <
- ras->ras_stride_offset)
- ras_stride_reset(ras);
- RAS_CDEBUG(ras);
- } else {
- /* Reset both stride window and normal RA
- * window
- */
- ras_reset(inode, ras, index);
- ras->ras_consecutive_pages++;
- ras_stride_reset(ras);
- goto out_unlock;
- }
- } else if (stride_io_mode(ras)) {
- /* If this is contiguous read but in stride I/O mode
- * currently, check whether stride step still is valid,
- * if invalid, it will reset the stride ra window
- */
- if (!index_in_stride_window(ras, index)) {
- /* Shrink stride read-ahead window to be zero */
- ras_stride_reset(ras);
- ras->ras_window_len = 0;
- ras->ras_next_readahead = index;
- }
- }
- }
- ras->ras_consecutive_pages++;
- ras->ras_last_readpage = index;
- ras_set_start(inode, ras, index);
-
- if (stride_io_mode(ras)) {
- /* Since stride readahead is sensitive to the offset
- * of read-ahead, so we use original offset here,
- * instead of ras_window_start, which is RPC aligned
- */
- ras->ras_next_readahead = max(index, ras->ras_next_readahead);
- ras->ras_window_start = max(ras->ras_stride_offset,
- ras->ras_window_start);
- } else {
- if (ras->ras_next_readahead < ras->ras_window_start)
- ras->ras_next_readahead = ras->ras_window_start;
- if (!hit)
- ras->ras_next_readahead = index + 1;
- }
- RAS_CDEBUG(ras);
-
- /* Trigger RA in the mmap case where ras_consecutive_requests
- * is not incremented and thus can't be used to trigger RA
- */
- if (ras->ras_consecutive_pages >= 4 && flags & LL_RAS_MMAP) {
- ras_increase_window(inode, ras, ra);
- /*
- * reset consecutive pages so that the readahead window can
- * grow gradually.
- */
- ras->ras_consecutive_pages = 0;
- goto out_unlock;
- }
-
- /* Initially reset the stride window offset to next_readahead*/
- if (ras->ras_consecutive_stride_requests == 2 && stride_detect) {
- /**
- * Once stride IO mode is detected, next_readahead should be
- * reset to make sure next_readahead > stride offset
- */
- ras->ras_next_readahead = max(index, ras->ras_next_readahead);
- ras->ras_stride_offset = index;
- ras->ras_window_start = max(index, ras->ras_window_start);
- }
-
- /* The initial ras_window_len is set to the request size. To avoid
- * uselessly reading and discarding pages for random IO the window is
- * only increased once per consecutive request received.
- */
- if ((ras->ras_consecutive_requests > 1 || stride_detect) &&
- !ras->ras_request_index)
- ras_increase_window(inode, ras, ra);
-out_unlock:
- RAS_CDEBUG(ras);
- ras->ras_request_index++;
- spin_unlock(&ras->ras_lock);
-}
-
-int ll_writepage(struct page *vmpage, struct writeback_control *wbc)
-{
- struct inode *inode = vmpage->mapping->host;
- struct ll_inode_info *lli = ll_i2info(inode);
- struct lu_env *env;
- struct cl_io *io;
- struct cl_page *page;
- struct cl_object *clob;
- bool redirtied = false;
- bool unlocked = false;
- int result;
- u16 refcheck;
-
- LASSERT(PageLocked(vmpage));
- LASSERT(!PageWriteback(vmpage));
-
- LASSERT(ll_i2dtexp(inode));
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env)) {
- result = PTR_ERR(env);
- goto out;
- }
-
- clob = ll_i2info(inode)->lli_clob;
- LASSERT(clob);
-
- io = vvp_env_thread_io(env);
- io->ci_obj = clob;
- io->ci_ignore_layout = 1;
- result = cl_io_init(env, io, CIT_MISC, clob);
- if (result == 0) {
- page = cl_page_find(env, clob, vmpage->index,
- vmpage, CPT_CACHEABLE);
- if (!IS_ERR(page)) {
- lu_ref_add(&page->cp_reference, "writepage",
- current);
- cl_page_assume(env, io, page);
- result = cl_page_flush(env, io, page);
- if (result != 0) {
- /*
- * Re-dirty page on error so it retries write,
- * but not in case when IO has actually
- * occurred and completed with an error.
- */
- if (!PageError(vmpage)) {
- redirty_page_for_writepage(wbc, vmpage);
- result = 0;
- redirtied = true;
- }
- }
- cl_page_disown(env, io, page);
- unlocked = true;
- lu_ref_del(&page->cp_reference,
- "writepage", current);
- cl_page_put(env, page);
- } else {
- result = PTR_ERR(page);
- }
- }
- cl_io_fini(env, io);
-
- if (redirtied && wbc->sync_mode == WB_SYNC_ALL) {
- loff_t offset = cl_offset(clob, vmpage->index);
-
- /* Flush page failed because the extent is being written out.
- * Wait for the write of extent to be finished to avoid
- * breaking kernel which assumes ->writepage should mark
- * PageWriteback or clean the page.
- */
- result = cl_sync_file_range(inode, offset,
- offset + PAGE_SIZE - 1,
- CL_FSYNC_LOCAL, 1);
- if (result > 0) {
- /* actually we may have written more than one page.
- * decreasing this page because the caller will count
- * it.
- */
- wbc->nr_to_write -= result - 1;
- result = 0;
- }
- }
-
- cl_env_put(env, &refcheck);
- goto out;
-
-out:
- if (result < 0) {
- if (!lli->lli_async_rc)
- lli->lli_async_rc = result;
- SetPageError(vmpage);
- if (!unlocked)
- unlock_page(vmpage);
- }
- return result;
-}
-
-int ll_writepages(struct address_space *mapping, struct writeback_control *wbc)
-{
- struct inode *inode = mapping->host;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- loff_t start;
- loff_t end;
- enum cl_fsync_mode mode;
- int range_whole = 0;
- int result;
- int ignore_layout = 0;
-
- if (wbc->range_cyclic) {
- start = mapping->writeback_index << PAGE_SHIFT;
- end = OBD_OBJECT_EOF;
- } else {
- start = wbc->range_start;
- end = wbc->range_end;
- if (end == LLONG_MAX) {
- end = OBD_OBJECT_EOF;
- range_whole = start == 0;
- }
- }
-
- mode = CL_FSYNC_NONE;
- if (wbc->sync_mode == WB_SYNC_ALL)
- mode = CL_FSYNC_LOCAL;
-
- if (sbi->ll_umounting)
- /* if the mountpoint is being umounted, all pages have to be
- * evicted to avoid hitting LBUG when truncate_inode_pages()
- * is called later on.
- */
- ignore_layout = 1;
-
- if (!ll_i2info(inode)->lli_clob)
- return 0;
-
- result = cl_sync_file_range(inode, start, end, mode, ignore_layout);
- if (result > 0) {
- wbc->nr_to_write -= result;
- result = 0;
- }
-
- if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) {
- if (end == OBD_OBJECT_EOF)
- mapping->writeback_index = 0;
- else
- mapping->writeback_index = (end >> PAGE_SHIFT) + 1;
- }
- return result;
-}
-
-struct ll_cl_context *ll_cl_find(struct file *file)
-{
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ll_cl_context *lcc;
- struct ll_cl_context *found = NULL;
-
- read_lock(&fd->fd_lock);
- list_for_each_entry(lcc, &fd->fd_lccs, lcc_list) {
- if (lcc->lcc_cookie == current) {
- found = lcc;
- break;
- }
- }
- read_unlock(&fd->fd_lock);
-
- return found;
-}
-
-void ll_cl_add(struct file *file, const struct lu_env *env, struct cl_io *io)
-{
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ll_cl_context *lcc = &ll_env_info(env)->lti_io_ctx;
-
- memset(lcc, 0, sizeof(*lcc));
- INIT_LIST_HEAD(&lcc->lcc_list);
- lcc->lcc_cookie = current;
- lcc->lcc_env = env;
- lcc->lcc_io = io;
-
- write_lock(&fd->fd_lock);
- list_add(&lcc->lcc_list, &fd->fd_lccs);
- write_unlock(&fd->fd_lock);
-}
-
-void ll_cl_remove(struct file *file, const struct lu_env *env)
-{
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ll_cl_context *lcc = &ll_env_info(env)->lti_io_ctx;
-
- write_lock(&fd->fd_lock);
- list_del_init(&lcc->lcc_list);
- write_unlock(&fd->fd_lock);
-}
-
-static int ll_io_read_page(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page)
-{
- struct inode *inode = vvp_object_inode(page->cp_obj);
- struct ll_file_data *fd = vvp_env_io(env)->vui_fd;
- struct ll_readahead_state *ras = &fd->fd_ras;
- struct cl_2queue *queue = &io->ci_queue;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct vvp_page *vpg;
- bool uptodate;
- int rc = 0;
-
- vpg = cl2vvp_page(cl_object_page_slice(page->cp_obj, page));
- uptodate = vpg->vpg_defer_uptodate;
-
- if (sbi->ll_ra_info.ra_max_pages_per_file > 0 &&
- sbi->ll_ra_info.ra_max_pages > 0) {
- struct vvp_io *vio = vvp_env_io(env);
- enum ras_update_flags flags = 0;
-
- if (uptodate)
- flags |= LL_RAS_HIT;
- if (!vio->vui_ra_valid)
- flags |= LL_RAS_MMAP;
- ras_update(sbi, inode, ras, vvp_index(vpg), flags);
- }
-
- cl_2queue_init(queue);
- if (uptodate) {
- vpg->vpg_ra_used = 1;
- cl_page_export(env, page, 1);
- cl_page_disown(env, io, page);
- } else {
- cl_page_list_add(&queue->c2_qin, page);
- }
-
- if (sbi->ll_ra_info.ra_max_pages_per_file > 0 &&
- sbi->ll_ra_info.ra_max_pages > 0) {
- int rc2;
-
- rc2 = ll_readahead(env, io, &queue->c2_qin, ras,
- uptodate);
- CDEBUG(D_READA, DFID "%d pages read ahead at %lu\n",
- PFID(ll_inode2fid(inode)), rc2, vvp_index(vpg));
- }
-
- if (queue->c2_qin.pl_nr > 0)
- rc = cl_io_submit_rw(env, io, CRT_READ, queue);
-
- /*
- * Unlock unsent pages in case of error.
- */
- cl_page_list_disown(env, io, &queue->c2_qin);
- cl_2queue_fini(env, queue);
-
- return rc;
-}
-
-int ll_readpage(struct file *file, struct page *vmpage)
-{
- struct cl_object *clob = ll_i2info(file_inode(file))->lli_clob;
- struct ll_cl_context *lcc;
- const struct lu_env *env;
- struct cl_io *io;
- struct cl_page *page;
- int result;
-
- lcc = ll_cl_find(file);
- if (!lcc) {
- unlock_page(vmpage);
- return -EIO;
- }
-
- env = lcc->lcc_env;
- io = lcc->lcc_io;
- LASSERT(io->ci_state == CIS_IO_GOING);
- page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
- if (!IS_ERR(page)) {
- LASSERT(page->cp_type == CPT_CACHEABLE);
- if (likely(!PageUptodate(vmpage))) {
- cl_page_assume(env, io, page);
- result = ll_io_read_page(env, io, page);
- } else {
- /* Page from a non-object file. */
- unlock_page(vmpage);
- result = 0;
- }
- cl_page_put(env, page);
- } else {
- unlock_page(vmpage);
- result = PTR_ERR(page);
- }
- return result;
-}
-
-int ll_page_sync_io(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, enum cl_req_type crt)
-{
- struct cl_2queue *queue;
- int result;
-
- LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
-
- queue = &io->ci_queue;
- cl_2queue_init_page(queue, page);
-
- result = cl_io_submit_sync(env, io, crt, queue, 0);
- LASSERT(cl_page_is_owned(page, io));
-
- if (crt == CRT_READ)
- /*
- * in CRT_WRITE case page is left locked even in case of
- * error.
- */
- cl_page_list_disown(env, io, &queue->c2_qin);
- cl_2queue_fini(env, queue);
-
- return result;
-}
diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
deleted file mode 100644
index 722e5ea1af5f..000000000000
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ /dev/null
@@ -1,641 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/lustre/llite/rw26.c
- *
- * Lustre Lite I/O page cache routines for the 2.5/2.6 kernel version
- */
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <linux/uaccess.h>
-
-#include <linux/migrate.h>
-#include <linux/fs.h>
-#include <linux/buffer_head.h>
-#include <linux/mpage.h>
-#include <linux/writeback.h>
-#include <linux/pagemap.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include "llite_internal.h"
-
-/**
- * Implements Linux VM address_space::invalidatepage() method. This method is
- * called when the page is truncate from a file, either as a result of
- * explicit truncate, or when inode is removed from memory (as a result of
- * final iput(), umount, or memory pressure induced icache shrinking).
- *
- * [0, offset] bytes of the page remain valid (this is for a case of not-page
- * aligned truncate). Lustre leaves partially truncated page in the cache,
- * relying on struct inode::i_size to limit further accesses.
- */
-static void ll_invalidatepage(struct page *vmpage, unsigned int offset,
- unsigned int length)
-{
- struct inode *inode;
- struct lu_env *env;
- struct cl_page *page;
- struct cl_object *obj;
-
- LASSERT(PageLocked(vmpage));
- LASSERT(!PageWriteback(vmpage));
-
- /*
- * It is safe to not check anything in invalidatepage/releasepage
- * below because they are run with page locked and all our io is
- * happening with locked page too
- */
- if (offset == 0 && length == PAGE_SIZE) {
- /* See the comment in ll_releasepage() */
- env = cl_env_percpu_get();
- LASSERT(!IS_ERR(env));
- inode = vmpage->mapping->host;
- obj = ll_i2info(inode)->lli_clob;
- if (obj) {
- page = cl_vmpage_page(vmpage, obj);
- if (page) {
- cl_page_delete(env, page);
- cl_page_put(env, page);
- }
- } else {
- LASSERT(vmpage->private == 0);
- }
- cl_env_percpu_put(env);
- }
-}
-
-static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask)
-{
- struct lu_env *env;
- struct cl_object *obj;
- struct cl_page *page;
- struct address_space *mapping;
- int result = 0;
-
- LASSERT(PageLocked(vmpage));
- if (PageWriteback(vmpage) || PageDirty(vmpage))
- return 0;
-
- mapping = vmpage->mapping;
- if (!mapping)
- return 1;
-
- obj = ll_i2info(mapping->host)->lli_clob;
- if (!obj)
- return 1;
-
- /* 1 for caller, 1 for cl_page and 1 for page cache */
- if (page_count(vmpage) > 3)
- return 0;
-
- page = cl_vmpage_page(vmpage, obj);
- if (!page)
- return 1;
-
- env = cl_env_percpu_get();
- LASSERT(!IS_ERR(env));
-
- if (!cl_page_in_use(page)) {
- result = 1;
- cl_page_delete(env, page);
- }
-
- /* To use percpu env array, the call path can not be rescheduled;
- * otherwise percpu array will be messed if ll_releaspage() called
- * again on the same CPU.
- *
- * If this page holds the last refc of cl_object, the following
- * call path may cause reschedule:
- * cl_page_put -> cl_page_free -> cl_object_put ->
- * lu_object_put -> lu_object_free -> lov_delete_raid0.
- *
- * However, the kernel can't get rid of this inode until all pages have
- * been cleaned up. Now that we hold page lock here, it's pretty safe
- * that we won't get into object delete path.
- */
- LASSERT(cl_object_refc(obj) > 1);
- cl_page_put(env, page);
-
- cl_env_percpu_put(env);
- return result;
-}
-
-#define MAX_DIRECTIO_SIZE (2 * 1024 * 1024 * 1024UL)
-
-/* ll_free_user_pages - tear down page struct array
- * @pages: array of page struct pointers underlying target buffer
- */
-static void ll_free_user_pages(struct page **pages, int npages, int do_dirty)
-{
- int i;
-
- for (i = 0; i < npages; i++) {
- if (do_dirty)
- set_page_dirty_lock(pages[i]);
- put_page(pages[i]);
- }
- kvfree(pages);
-}
-
-ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
- int rw, struct inode *inode,
- struct ll_dio_pages *pv)
-{
- struct cl_page *clp;
- struct cl_2queue *queue;
- struct cl_object *obj = io->ci_obj;
- int i;
- ssize_t rc = 0;
- loff_t file_offset = pv->ldp_start_offset;
- size_t size = pv->ldp_size;
- int page_count = pv->ldp_nr;
- struct page **pages = pv->ldp_pages;
- size_t page_size = cl_page_size(obj);
- bool do_io;
- int io_pages = 0;
-
- queue = &io->ci_queue;
- cl_2queue_init(queue);
- for (i = 0; i < page_count; i++) {
- if (pv->ldp_offsets)
- file_offset = pv->ldp_offsets[i];
-
- LASSERT(!(file_offset & (page_size - 1)));
- clp = cl_page_find(env, obj, cl_index(obj, file_offset),
- pv->ldp_pages[i], CPT_TRANSIENT);
- if (IS_ERR(clp)) {
- rc = PTR_ERR(clp);
- break;
- }
-
- rc = cl_page_own(env, io, clp);
- if (rc) {
- LASSERT(clp->cp_state == CPS_FREEING);
- cl_page_put(env, clp);
- break;
- }
-
- do_io = true;
-
- /* check the page type: if the page is a host page, then do
- * write directly
- */
- if (clp->cp_type == CPT_CACHEABLE) {
- struct page *vmpage = cl_page_vmpage(clp);
- struct page *src_page;
- struct page *dst_page;
- void *src;
- void *dst;
-
- src_page = (rw == WRITE) ? pages[i] : vmpage;
- dst_page = (rw == WRITE) ? vmpage : pages[i];
-
- src = kmap_atomic(src_page);
- dst = kmap_atomic(dst_page);
- memcpy(dst, src, min(page_size, size));
- kunmap_atomic(dst);
- kunmap_atomic(src);
-
- /* make sure page will be added to the transfer by
- * cl_io_submit()->...->vvp_page_prep_write().
- */
- if (rw == WRITE)
- set_page_dirty(vmpage);
-
- if (rw == READ) {
- /* do not issue the page for read, since it
- * may reread a ra page which has NOT uptodate
- * bit set.
- */
- cl_page_disown(env, io, clp);
- do_io = false;
- }
- }
-
- if (likely(do_io)) {
- /*
- * Add a page to the incoming page list of 2-queue.
- */
- cl_page_list_add(&queue->c2_qin, clp);
-
- /*
- * Set page clip to tell transfer formation engine
- * that page has to be sent even if it is beyond KMS.
- */
- cl_page_clip(env, clp, 0, min(size, page_size));
-
- ++io_pages;
- }
-
- /* drop the reference count for cl_page_find */
- cl_page_put(env, clp);
- size -= page_size;
- file_offset += page_size;
- }
-
- if (rc == 0 && io_pages) {
- rc = cl_io_submit_sync(env, io,
- rw == READ ? CRT_READ : CRT_WRITE,
- queue, 0);
- }
- if (rc == 0)
- rc = pv->ldp_size;
-
- cl_2queue_discard(env, io, queue);
- cl_2queue_disown(env, io, queue);
- cl_2queue_fini(env, queue);
- return rc;
-}
-EXPORT_SYMBOL(ll_direct_rw_pages);
-
-static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io,
- int rw, struct inode *inode,
- struct address_space *mapping,
- size_t size, loff_t file_offset,
- struct page **pages, int page_count)
-{
- struct ll_dio_pages pvec = {
- .ldp_pages = pages,
- .ldp_nr = page_count,
- .ldp_size = size,
- .ldp_offsets = NULL,
- .ldp_start_offset = file_offset
- };
-
- return ll_direct_rw_pages(env, io, rw, inode, &pvec);
-}
-
-/* This is the maximum size of a single O_DIRECT request, based on the
- * kmalloc limit. We need to fit all of the brw_page structs, each one
- * representing PAGE_SIZE worth of user data, into a single buffer, and
- * then truncate this to be a full-sized RPC. For 4kB PAGE_SIZE this is
- * up to 22MB for 128kB kmalloc and up to 682MB for 4MB kmalloc.
- */
-#define MAX_DIO_SIZE ((KMALLOC_MAX_SIZE / sizeof(struct brw_page) * \
- PAGE_SIZE) & ~(DT_MAX_BRW_SIZE - 1))
-static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter)
-{
- struct ll_cl_context *lcc;
- const struct lu_env *env;
- struct cl_io *io;
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
- loff_t file_offset = iocb->ki_pos;
- ssize_t count = iov_iter_count(iter);
- ssize_t tot_bytes = 0, result = 0;
- long size = MAX_DIO_SIZE;
-
- /* Check EOF by ourselves */
- if (iov_iter_rw(iter) == READ && file_offset >= i_size_read(inode))
- return 0;
-
- /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
- if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK))
- return -EINVAL;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), size=%zd (max %lu), offset=%lld=%llx, pages %zd (max %lu)\n",
- PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE,
- file_offset, file_offset, count >> PAGE_SHIFT,
- MAX_DIO_SIZE >> PAGE_SHIFT);
-
- /* Check that all user buffers are aligned as well */
- if (iov_iter_alignment(iter) & ~PAGE_MASK)
- return -EINVAL;
-
- lcc = ll_cl_find(file);
- if (!lcc)
- return -EIO;
-
- env = lcc->lcc_env;
- LASSERT(!IS_ERR(env));
- io = lcc->lcc_io;
- LASSERT(io);
-
- while (iov_iter_count(iter)) {
- struct page **pages;
- size_t offs;
-
- count = min_t(size_t, iov_iter_count(iter), size);
- if (iov_iter_rw(iter) == READ) {
- if (file_offset >= i_size_read(inode))
- break;
- if (file_offset + count > i_size_read(inode))
- count = i_size_read(inode) - file_offset;
- }
-
- result = iov_iter_get_pages_alloc(iter, &pages, count, &offs);
- if (likely(result > 0)) {
- int n = DIV_ROUND_UP(result + offs, PAGE_SIZE);
-
- result = ll_direct_IO_26_seg(env, io, iov_iter_rw(iter),
- inode, file->f_mapping,
- result, file_offset, pages,
- n);
- ll_free_user_pages(pages, n, iov_iter_rw(iter) == READ);
- }
- if (unlikely(result <= 0)) {
- /* If we can't allocate a large enough buffer
- * for the request, shrink it to a smaller
- * PAGE_SIZE multiple and try again.
- * We should always be able to kmalloc for a
- * page worth of page pointers = 4MB on i386.
- */
- if (result == -ENOMEM &&
- size > (PAGE_SIZE / sizeof(*pages)) *
- PAGE_SIZE) {
- size = ((((size / 2) - 1) |
- ~PAGE_MASK) + 1) &
- PAGE_MASK;
- CDEBUG(D_VFSTRACE, "DIO size now %lu\n",
- size);
- continue;
- }
-
- goto out;
- }
- iov_iter_advance(iter, result);
- tot_bytes += result;
- file_offset += result;
- }
-out:
- if (tot_bytes > 0) {
- struct vvp_io *vio = vvp_env_io(env);
-
- /* no commit async for direct IO */
- vio->u.write.vui_written += tot_bytes;
- }
-
- return tot_bytes ? tot_bytes : result;
-}
-
-/**
- * Prepare partially written-to page for a write.
- */
-static int ll_prepare_partial_page(const struct lu_env *env, struct cl_io *io,
- struct cl_page *pg)
-{
- struct cl_attr *attr = vvp_env_thread_attr(env);
- struct cl_object *obj = io->ci_obj;
- struct vvp_page *vpg = cl_object_page_slice(obj, pg);
- loff_t offset = cl_offset(obj, vvp_index(vpg));
- int result;
-
- cl_object_attr_lock(obj);
- result = cl_object_attr_get(env, obj, attr);
- cl_object_attr_unlock(obj);
- if (result == 0) {
- /*
- * If are writing to a new page, no need to read old data.
- * The extent locking will have updated the KMS, and for our
- * purposes here we can treat it like i_size.
- */
- if (attr->cat_kms <= offset) {
- char *kaddr = kmap_atomic(vpg->vpg_page);
-
- memset(kaddr, 0, cl_page_size(obj));
- kunmap_atomic(kaddr);
- } else if (vpg->vpg_defer_uptodate) {
- vpg->vpg_ra_used = 1;
- } else {
- result = ll_page_sync_io(env, io, pg, CRT_READ);
- }
- }
- return result;
-}
-
-static int ll_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned int len, unsigned int flags,
- struct page **pagep, void **fsdata)
-{
- struct ll_cl_context *lcc;
- const struct lu_env *env = NULL;
- struct cl_io *io;
- struct cl_page *page = NULL;
- struct cl_object *clob = ll_i2info(mapping->host)->lli_clob;
- pgoff_t index = pos >> PAGE_SHIFT;
- struct page *vmpage = NULL;
- unsigned int from = pos & (PAGE_SIZE - 1);
- unsigned int to = from + len;
- int result = 0;
-
- CDEBUG(D_VFSTRACE, "Writing %lu of %d to %d bytes\n", index, from, len);
-
- lcc = ll_cl_find(file);
- if (!lcc) {
- io = NULL;
- result = -EIO;
- goto out;
- }
-
- env = lcc->lcc_env;
- io = lcc->lcc_io;
-
- /* To avoid deadlock, try to lock page first. */
- vmpage = grab_cache_page_nowait(mapping, index);
- if (unlikely(!vmpage || PageDirty(vmpage) || PageWriteback(vmpage))) {
- struct vvp_io *vio = vvp_env_io(env);
- struct cl_page_list *plist = &vio->u.write.vui_queue;
-
- /* if the page is already in dirty cache, we have to commit
- * the pages right now; otherwise, it may cause deadlock
- * because it holds page lock of a dirty page and request for
- * more grants. It's okay for the dirty page to be the first
- * one in commit page list, though.
- */
- if (vmpage && plist->pl_nr > 0) {
- unlock_page(vmpage);
- put_page(vmpage);
- vmpage = NULL;
- }
-
- /* commit pages and then wait for page lock */
- result = vvp_io_write_commit(env, io);
- if (result < 0)
- goto out;
-
- if (!vmpage) {
- vmpage = grab_cache_page_write_begin(mapping, index,
- flags);
- if (!vmpage) {
- result = -ENOMEM;
- goto out;
- }
- }
- }
-
- page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
- if (IS_ERR(page)) {
- result = PTR_ERR(page);
- goto out;
- }
-
- lcc->lcc_page = page;
- lu_ref_add(&page->cp_reference, "cl_io", io);
-
- cl_page_assume(env, io, page);
- if (!PageUptodate(vmpage)) {
- /*
- * We're completely overwriting an existing page,
- * so _don't_ set it up to date until commit_write
- */
- if (from == 0 && to == PAGE_SIZE) {
- CL_PAGE_HEADER(D_PAGE, env, page, "full page write\n");
- POISON_PAGE(vmpage, 0x11);
- } else {
- /* TODO: can be optimized at OSC layer to check if it
- * is a lockless IO. In that case, it's not necessary
- * to read the data.
- */
- result = ll_prepare_partial_page(env, io, page);
- if (result == 0)
- SetPageUptodate(vmpage);
- }
- }
- if (result < 0)
- cl_page_unassume(env, io, page);
-out:
- if (result < 0) {
- if (vmpage) {
- unlock_page(vmpage);
- put_page(vmpage);
- }
- if (!IS_ERR_OR_NULL(page)) {
- lu_ref_del(&page->cp_reference, "cl_io", io);
- cl_page_put(env, page);
- }
- if (io)
- io->ci_result = result;
- } else {
- *pagep = vmpage;
- *fsdata = lcc;
- }
- return result;
-}
-
-static int ll_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned int len, unsigned int copied,
- struct page *vmpage, void *fsdata)
-{
- struct ll_cl_context *lcc = fsdata;
- const struct lu_env *env;
- struct cl_io *io;
- struct vvp_io *vio;
- struct cl_page *page;
- unsigned int from = pos & (PAGE_SIZE - 1);
- bool unplug = false;
- int result = 0;
-
- put_page(vmpage);
-
- env = lcc->lcc_env;
- page = lcc->lcc_page;
- io = lcc->lcc_io;
- vio = vvp_env_io(env);
-
- LASSERT(cl_page_is_owned(page, io));
- if (copied > 0) {
- struct cl_page_list *plist = &vio->u.write.vui_queue;
-
- lcc->lcc_page = NULL; /* page will be queued */
-
- /* Add it into write queue */
- cl_page_list_add(plist, page);
- if (plist->pl_nr == 1) /* first page */
- vio->u.write.vui_from = from;
- else
- LASSERT(from == 0);
- vio->u.write.vui_to = from + copied;
-
- /*
- * To address the deadlock in balance_dirty_pages() where
- * this dirty page may be written back in the same thread.
- */
- if (PageDirty(vmpage))
- unplug = true;
-
- /* We may have one full RPC, commit it soon */
- if (plist->pl_nr >= PTLRPC_MAX_BRW_PAGES)
- unplug = true;
-
- CL_PAGE_DEBUG(D_VFSTRACE, env, page,
- "queued page: %d.\n", plist->pl_nr);
- } else {
- cl_page_disown(env, io, page);
-
- lcc->lcc_page = NULL;
- lu_ref_del(&page->cp_reference, "cl_io", io);
- cl_page_put(env, page);
-
- /* page list is not contiguous now, commit it now */
- unplug = true;
- }
-
- if (unplug ||
- file->f_flags & O_SYNC || IS_SYNC(file_inode(file)))
- result = vvp_io_write_commit(env, io);
-
- if (result < 0)
- io->ci_result = result;
- return result >= 0 ? copied : result;
-}
-
-#ifdef CONFIG_MIGRATION
-static int ll_migratepage(struct address_space *mapping,
- struct page *newpage, struct page *page,
- enum migrate_mode mode
- )
-{
- /* Always fail page migration until we have a proper implementation */
- return -EIO;
-}
-#endif
-
-const struct address_space_operations ll_aops = {
- .readpage = ll_readpage,
- .direct_IO = ll_direct_IO_26,
- .writepage = ll_writepage,
- .writepages = ll_writepages,
- .set_page_dirty = __set_page_dirty_nobuffers,
- .write_begin = ll_write_begin,
- .write_end = ll_write_end,
- .invalidatepage = ll_invalidatepage,
- .releasepage = (void *)ll_releasepage,
-#ifdef CONFIG_MIGRATION
- .migratepage = ll_migratepage,
-#endif
-};
diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c
deleted file mode 100644
index d864f5f36d85..000000000000
--- a/drivers/staging/lustre/lustre/llite/statahead.c
+++ /dev/null
@@ -1,1577 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd_support.h>
-#include <lustre_dlm.h>
-#include "llite_internal.h"
-
-#define SA_OMITTED_ENTRY_MAX 8ULL
-
-enum se_stat {
- /** negative values are for error cases */
- SA_ENTRY_INIT = 0, /** init entry */
- SA_ENTRY_SUCC = 1, /** stat succeed */
- SA_ENTRY_INVA = 2, /** invalid entry */
-};
-
-/*
- * sa_entry is not refcounted: statahead thread allocates it and do async stat,
- * and in async stat callback ll_statahead_interpret() will add it into
- * sai_interim_entries, later statahead thread will call sa_handle_callback() to
- * instantiate entry and move it into sai_entries, and then only scanner process
- * can access and free it.
- */
-struct sa_entry {
- /* link into sai_interim_entries or sai_entries */
- struct list_head se_list;
- /* link into sai hash table locally */
- struct list_head se_hash;
- /* entry index in the sai */
- __u64 se_index;
- /* low layer ldlm lock handle */
- __u64 se_handle;
- /* entry status */
- enum se_stat se_state;
- /* entry size, contains name */
- int se_size;
- /* pointer to async getattr enqueue info */
- struct md_enqueue_info *se_minfo;
- /* pointer to the async getattr request */
- struct ptlrpc_request *se_req;
- /* pointer to the target inode */
- struct inode *se_inode;
- /* entry name */
- struct qstr se_qstr;
- /* entry fid */
- struct lu_fid se_fid;
-};
-
-static unsigned int sai_generation;
-static DEFINE_SPINLOCK(sai_generation_lock);
-
-/* sa_entry is ready to use */
-static inline int sa_ready(struct sa_entry *entry)
-{
- smp_rmb();
- return (entry->se_state != SA_ENTRY_INIT);
-}
-
-/* hash value to put in sai_cache */
-static inline int sa_hash(int val)
-{
- return val & LL_SA_CACHE_MASK;
-}
-
-/* hash entry into sai_cache */
-static inline void
-sa_rehash(struct ll_statahead_info *sai, struct sa_entry *entry)
-{
- int i = sa_hash(entry->se_qstr.hash);
-
- spin_lock(&sai->sai_cache_lock[i]);
- list_add_tail(&entry->se_hash, &sai->sai_cache[i]);
- spin_unlock(&sai->sai_cache_lock[i]);
-}
-
-/*
- * Remove entry from SA table.
- */
-static inline void
-sa_unhash(struct ll_statahead_info *sai, struct sa_entry *entry)
-{
- int i = sa_hash(entry->se_qstr.hash);
-
- spin_lock(&sai->sai_cache_lock[i]);
- list_del_init(&entry->se_hash);
- spin_unlock(&sai->sai_cache_lock[i]);
-}
-
-static inline int agl_should_run(struct ll_statahead_info *sai,
- struct inode *inode)
-{
- return (inode && S_ISREG(inode->i_mode) && sai->sai_agl_valid);
-}
-
-/* statahead window is full */
-static inline int sa_sent_full(struct ll_statahead_info *sai)
-{
- return atomic_read(&sai->sai_cache_count) >= sai->sai_max;
-}
-
-/* got async stat replies */
-static inline int sa_has_callback(struct ll_statahead_info *sai)
-{
- return !list_empty(&sai->sai_interim_entries);
-}
-
-static inline int agl_list_empty(struct ll_statahead_info *sai)
-{
- return list_empty(&sai->sai_agls);
-}
-
-/**
- * (1) hit ratio less than 80%
- * or
- * (2) consecutive miss more than 8
- * then means low hit.
- */
-static inline int sa_low_hit(struct ll_statahead_info *sai)
-{
- return ((sai->sai_hit > 7 && sai->sai_hit < 4 * sai->sai_miss) ||
- (sai->sai_consecutive_miss > 8));
-}
-
-/*
- * if the given index is behind of statahead window more than
- * SA_OMITTED_ENTRY_MAX, then it is old.
- */
-static inline int is_omitted_entry(struct ll_statahead_info *sai, __u64 index)
-{
- return ((__u64)sai->sai_max + index + SA_OMITTED_ENTRY_MAX <
- sai->sai_index);
-}
-
-/* allocate sa_entry and hash it to allow scanner process to find it */
-static struct sa_entry *
-sa_alloc(struct dentry *parent, struct ll_statahead_info *sai, __u64 index,
- const char *name, int len, const struct lu_fid *fid)
-{
- struct ll_inode_info *lli;
- struct sa_entry *entry;
- int entry_size;
- char *dname;
-
- entry_size = sizeof(struct sa_entry) + (len & ~3) + 4;
- entry = kzalloc(entry_size, GFP_NOFS);
- if (unlikely(!entry))
- return ERR_PTR(-ENOMEM);
-
- CDEBUG(D_READA, "alloc sa entry %.*s(%p) index %llu\n",
- len, name, entry, index);
-
- entry->se_index = index;
- entry->se_state = SA_ENTRY_INIT;
- entry->se_size = entry_size;
- dname = (char *)entry + sizeof(struct sa_entry);
- memcpy(dname, name, len);
- dname[len] = 0;
-
- entry->se_qstr.hash = full_name_hash(parent, name, len);
- entry->se_qstr.len = len;
- entry->se_qstr.name = dname;
- entry->se_fid = *fid;
-
- lli = ll_i2info(sai->sai_dentry->d_inode);
- spin_lock(&lli->lli_sa_lock);
- INIT_LIST_HEAD(&entry->se_list);
- sa_rehash(sai, entry);
- spin_unlock(&lli->lli_sa_lock);
-
- atomic_inc(&sai->sai_cache_count);
-
- return entry;
-}
-
-/* free sa_entry, which should have been unhashed and not in any list */
-static void sa_free(struct ll_statahead_info *sai, struct sa_entry *entry)
-{
- CDEBUG(D_READA, "free sa entry %.*s(%p) index %llu\n",
- entry->se_qstr.len, entry->se_qstr.name, entry,
- entry->se_index);
-
- LASSERT(list_empty(&entry->se_list));
- LASSERT(list_empty(&entry->se_hash));
-
- kfree(entry);
- atomic_dec(&sai->sai_cache_count);
-}
-
-/*
- * find sa_entry by name, used by directory scanner, lock is not needed because
- * only scanner can remove the entry from cache.
- */
-static struct sa_entry *
-sa_get(struct ll_statahead_info *sai, const struct qstr *qstr)
-{
- struct sa_entry *entry;
- int i = sa_hash(qstr->hash);
-
- list_for_each_entry(entry, &sai->sai_cache[i], se_hash) {
- if (entry->se_qstr.hash == qstr->hash &&
- entry->se_qstr.len == qstr->len &&
- memcmp(entry->se_qstr.name, qstr->name, qstr->len) == 0)
- return entry;
- }
- return NULL;
-}
-
-/* unhash and unlink sa_entry, and then free it */
-static inline void
-sa_kill(struct ll_statahead_info *sai, struct sa_entry *entry)
-{
- struct ll_inode_info *lli = ll_i2info(sai->sai_dentry->d_inode);
-
- LASSERT(!list_empty(&entry->se_hash));
- LASSERT(!list_empty(&entry->se_list));
- LASSERT(sa_ready(entry));
-
- sa_unhash(sai, entry);
-
- spin_lock(&lli->lli_sa_lock);
- list_del_init(&entry->se_list);
- spin_unlock(&lli->lli_sa_lock);
-
- if (entry->se_inode)
- iput(entry->se_inode);
-
- sa_free(sai, entry);
-}
-
-/* called by scanner after use, sa_entry will be killed */
-static void
-sa_put(struct ll_statahead_info *sai, struct sa_entry *entry, struct ll_inode_info *lli)
-{
- struct sa_entry *tmp, *next;
-
- if (entry && entry->se_state == SA_ENTRY_SUCC) {
- struct ll_sb_info *sbi = ll_i2sbi(sai->sai_dentry->d_inode);
-
- sai->sai_hit++;
- sai->sai_consecutive_miss = 0;
- sai->sai_max = min(2 * sai->sai_max, sbi->ll_sa_max);
- } else {
- sai->sai_miss++;
- sai->sai_consecutive_miss++;
- }
-
- if (entry)
- sa_kill(sai, entry);
-
- /*
- * kill old completed entries, only scanner process does this, no need
- * to lock
- */
- list_for_each_entry_safe(tmp, next, &sai->sai_entries, se_list) {
- if (!is_omitted_entry(sai, tmp->se_index))
- break;
- sa_kill(sai, tmp);
- }
-
- spin_lock(&lli->lli_sa_lock);
- if (sai->sai_task)
- wake_up_process(sai->sai_task);
- spin_unlock(&lli->lli_sa_lock);
-
-}
-
-/*
- * update state and sort add entry to sai_entries by index, return true if
- * scanner is waiting on this entry.
- */
-static bool
-__sa_make_ready(struct ll_statahead_info *sai, struct sa_entry *entry, int ret)
-{
- struct list_head *pos = &sai->sai_entries;
- __u64 index = entry->se_index;
- struct sa_entry *se;
-
- LASSERT(!sa_ready(entry));
- LASSERT(list_empty(&entry->se_list));
-
- list_for_each_entry_reverse(se, &sai->sai_entries, se_list) {
- if (se->se_index < entry->se_index) {
- pos = &se->se_list;
- break;
- }
- }
- list_add(&entry->se_list, pos);
- entry->se_state = ret < 0 ? SA_ENTRY_INVA : SA_ENTRY_SUCC;
-
- return (index == sai->sai_index_wait);
-}
-
-/*
- * release resources used in async stat RPC, update entry state and wakeup if
- * scanner process it waiting on this entry.
- */
-static void
-sa_make_ready(struct ll_statahead_info *sai, struct sa_entry *entry, int ret)
-{
- struct ll_inode_info *lli = ll_i2info(sai->sai_dentry->d_inode);
- struct md_enqueue_info *minfo = entry->se_minfo;
- struct ptlrpc_request *req = entry->se_req;
- bool wakeup;
-
- /* release resources used in RPC */
- if (minfo) {
- entry->se_minfo = NULL;
- ll_intent_release(&minfo->mi_it);
- iput(minfo->mi_dir);
- kfree(minfo);
- }
-
- if (req) {
- entry->se_req = NULL;
- ptlrpc_req_finished(req);
- }
-
- spin_lock(&lli->lli_sa_lock);
- wakeup = __sa_make_ready(sai, entry, ret);
- spin_unlock(&lli->lli_sa_lock);
-
- if (wakeup)
- wake_up(&sai->sai_waitq);
-}
-
-/* Insert inode into the list of sai_agls. */
-static void ll_agl_add(struct ll_statahead_info *sai,
- struct inode *inode, int index)
-{
- struct ll_inode_info *child = ll_i2info(inode);
- struct ll_inode_info *parent = ll_i2info(sai->sai_dentry->d_inode);
- int added = 0;
-
- spin_lock(&child->lli_agl_lock);
- if (child->lli_agl_index == 0) {
- child->lli_agl_index = index;
- spin_unlock(&child->lli_agl_lock);
-
- LASSERT(list_empty(&child->lli_agl_list));
-
- igrab(inode);
- spin_lock(&parent->lli_agl_lock);
- if (list_empty(&sai->sai_agls))
- added = 1;
- list_add_tail(&child->lli_agl_list, &sai->sai_agls);
- spin_unlock(&parent->lli_agl_lock);
- } else {
- spin_unlock(&child->lli_agl_lock);
- }
-
- if (added > 0)
- wake_up_process(sai->sai_agl_task);
-}
-
-/* allocate sai */
-static struct ll_statahead_info *ll_sai_alloc(struct dentry *dentry)
-{
- struct ll_inode_info *lli = ll_i2info(dentry->d_inode);
- struct ll_statahead_info *sai;
- int i;
-
- sai = kzalloc(sizeof(*sai), GFP_NOFS);
- if (!sai)
- return NULL;
-
- sai->sai_dentry = dget(dentry);
- atomic_set(&sai->sai_refcount, 1);
-
- sai->sai_max = LL_SA_RPC_MIN;
- sai->sai_index = 1;
- init_waitqueue_head(&sai->sai_waitq);
-
- INIT_LIST_HEAD(&sai->sai_interim_entries);
- INIT_LIST_HEAD(&sai->sai_entries);
- INIT_LIST_HEAD(&sai->sai_agls);
-
- for (i = 0; i < LL_SA_CACHE_SIZE; i++) {
- INIT_LIST_HEAD(&sai->sai_cache[i]);
- spin_lock_init(&sai->sai_cache_lock[i]);
- }
- atomic_set(&sai->sai_cache_count, 0);
-
- spin_lock(&sai_generation_lock);
- lli->lli_sa_generation = ++sai_generation;
- if (unlikely(!sai_generation))
- lli->lli_sa_generation = ++sai_generation;
- spin_unlock(&sai_generation_lock);
-
- return sai;
-}
-
-/* free sai */
-static inline void ll_sai_free(struct ll_statahead_info *sai)
-{
- LASSERT(sai->sai_dentry);
- dput(sai->sai_dentry);
- kfree(sai);
-}
-
-/*
- * take refcount of sai if sai for @dir exists, which means statahead is on for
- * this directory.
- */
-static inline struct ll_statahead_info *ll_sai_get(struct inode *dir)
-{
- struct ll_inode_info *lli = ll_i2info(dir);
- struct ll_statahead_info *sai = NULL;
-
- spin_lock(&lli->lli_sa_lock);
- sai = lli->lli_sai;
- if (sai)
- atomic_inc(&sai->sai_refcount);
- spin_unlock(&lli->lli_sa_lock);
-
- return sai;
-}
-
-/*
- * put sai refcount after use, if refcount reaches zero, free sai and sa_entries
- * attached to it.
- */
-static void ll_sai_put(struct ll_statahead_info *sai)
-{
- struct ll_inode_info *lli = ll_i2info(sai->sai_dentry->d_inode);
-
- if (atomic_dec_and_lock(&sai->sai_refcount, &lli->lli_sa_lock)) {
- struct ll_sb_info *sbi = ll_i2sbi(sai->sai_dentry->d_inode);
- struct sa_entry *entry, *next;
-
- lli->lli_sai = NULL;
- spin_unlock(&lli->lli_sa_lock);
-
- LASSERT(sai->sai_task == NULL);
- LASSERT(sai->sai_agl_task == NULL);
- LASSERT(sai->sai_sent == sai->sai_replied);
- LASSERT(!sa_has_callback(sai));
-
- list_for_each_entry_safe(entry, next, &sai->sai_entries,
- se_list)
- sa_kill(sai, entry);
-
- LASSERT(atomic_read(&sai->sai_cache_count) == 0);
- LASSERT(list_empty(&sai->sai_agls));
-
- ll_sai_free(sai);
- atomic_dec(&sbi->ll_sa_running);
- }
-}
-
-/* Do NOT forget to drop inode refcount when into sai_agls. */
-static void ll_agl_trigger(struct inode *inode, struct ll_statahead_info *sai)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- __u64 index = lli->lli_agl_index;
- int rc;
-
- LASSERT(list_empty(&lli->lli_agl_list));
-
- /* AGL maybe fall behind statahead with one entry */
- if (is_omitted_entry(sai, index + 1)) {
- lli->lli_agl_index = 0;
- iput(inode);
- return;
- }
-
- /* Someone is in glimpse (sync or async), do nothing. */
- rc = down_write_trylock(&lli->lli_glimpse_sem);
- if (rc == 0) {
- lli->lli_agl_index = 0;
- iput(inode);
- return;
- }
-
- /*
- * Someone triggered glimpse within 1 sec before.
- * 1) The former glimpse succeeded with glimpse lock granted by OST, and
- * if the lock is still cached on client, AGL needs to do nothing. If
- * it is cancelled by other client, AGL maybe cannot obtain new lock
- * for no glimpse callback triggered by AGL.
- * 2) The former glimpse succeeded, but OST did not grant glimpse lock.
- * Under such case, it is quite possible that the OST will not grant
- * glimpse lock for AGL also.
- * 3) The former glimpse failed, compared with other two cases, it is
- * relative rare. AGL can ignore such case, and it will not muchly
- * affect the performance.
- */
- if (lli->lli_glimpse_time != 0 &&
- time_before(jiffies - 1 * HZ, lli->lli_glimpse_time)) {
- up_write(&lli->lli_glimpse_sem);
- lli->lli_agl_index = 0;
- iput(inode);
- return;
- }
-
- CDEBUG(D_READA, "Handling (init) async glimpse: inode = "
- DFID ", idx = %llu\n", PFID(&lli->lli_fid), index);
-
- cl_agl(inode);
- lli->lli_agl_index = 0;
- lli->lli_glimpse_time = jiffies;
- up_write(&lli->lli_glimpse_sem);
-
- CDEBUG(D_READA, "Handled (init) async glimpse: inode= "
- DFID ", idx = %llu, rc = %d\n",
- PFID(&lli->lli_fid), index, rc);
-
- iput(inode);
-}
-
-/*
- * prepare inode for sa entry, add it into agl list, now sa_entry is ready
- * to be used by scanner process.
- */
-static void sa_instantiate(struct ll_statahead_info *sai,
- struct sa_entry *entry)
-{
- struct inode *dir = sai->sai_dentry->d_inode;
- struct inode *child;
- struct md_enqueue_info *minfo;
- struct lookup_intent *it;
- struct ptlrpc_request *req;
- struct mdt_body *body;
- int rc = 0;
-
- LASSERT(entry->se_handle != 0);
-
- minfo = entry->se_minfo;
- it = &minfo->mi_it;
- req = entry->se_req;
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (!body) {
- rc = -EFAULT;
- goto out;
- }
-
- child = entry->se_inode;
- if (child) {
- /* revalidate; unlinked and re-created with the same name */
- if (unlikely(!lu_fid_eq(&minfo->mi_data.op_fid2, &body->mbo_fid1))) {
- entry->se_inode = NULL;
- iput(child);
- child = NULL;
- }
- }
-
- it->it_lock_handle = entry->se_handle;
- rc = md_revalidate_lock(ll_i2mdexp(dir), it, ll_inode2fid(dir), NULL);
- if (rc != 1) {
- rc = -EAGAIN;
- goto out;
- }
-
- rc = ll_prep_inode(&child, req, dir->i_sb, it);
- if (rc)
- goto out;
-
- CDEBUG(D_READA, "%s: setting %.*s" DFID " l_data to inode %p\n",
- ll_get_fsname(child->i_sb, NULL, 0),
- entry->se_qstr.len, entry->se_qstr.name,
- PFID(ll_inode2fid(child)), child);
- ll_set_lock_data(ll_i2sbi(dir)->ll_md_exp, child, it, NULL);
-
- entry->se_inode = child;
-
- if (agl_should_run(sai, child))
- ll_agl_add(sai, child, entry->se_index);
-
-out:
- /*
- * sa_make_ready() will drop ldlm ibits lock refcount by calling
- * ll_intent_drop_lock() in spite of failures. Do not worry about
- * calling ll_intent_drop_lock() more than once.
- */
- sa_make_ready(sai, entry, rc);
-}
-
-/* once there are async stat replies, instantiate sa_entry from replies */
-static void sa_handle_callback(struct ll_statahead_info *sai)
-{
- struct ll_inode_info *lli;
-
- lli = ll_i2info(sai->sai_dentry->d_inode);
-
- while (sa_has_callback(sai)) {
- struct sa_entry *entry;
-
- spin_lock(&lli->lli_sa_lock);
- if (unlikely(!sa_has_callback(sai))) {
- spin_unlock(&lli->lli_sa_lock);
- break;
- }
- entry = list_entry(sai->sai_interim_entries.next,
- struct sa_entry, se_list);
- list_del_init(&entry->se_list);
- spin_unlock(&lli->lli_sa_lock);
-
- sa_instantiate(sai, entry);
- }
-}
-
-/*
- * callback for async stat, because this is called in ptlrpcd context, we only
- * put sa_entry in sai_cb_entries list, and let sa_handle_callback() to really
- * prepare inode and instantiate sa_entry later.
- */
-static int ll_statahead_interpret(struct ptlrpc_request *req,
- struct md_enqueue_info *minfo, int rc)
-{
- struct lookup_intent *it = &minfo->mi_it;
- struct inode *dir = minfo->mi_dir;
- struct ll_inode_info *lli = ll_i2info(dir);
- struct ll_statahead_info *sai = lli->lli_sai;
- struct sa_entry *entry = (struct sa_entry *)minfo->mi_cbdata;
- __u64 handle = 0;
-
- if (it_disposition(it, DISP_LOOKUP_NEG))
- rc = -ENOENT;
-
- /*
- * because statahead thread will wait for all inflight RPC to finish,
- * sai should be always valid, no need to refcount
- */
- LASSERT(sai);
- LASSERT(entry);
-
- CDEBUG(D_READA, "sa_entry %.*s rc %d\n",
- entry->se_qstr.len, entry->se_qstr.name, rc);
-
- if (rc) {
- ll_intent_release(it);
- iput(dir);
- kfree(minfo);
- } else {
- /*
- * release ibits lock ASAP to avoid deadlock when statahead
- * thread enqueues lock on parent in readdir and another
- * process enqueues lock on child with parent lock held, eg.
- * unlink.
- */
- handle = it->it_lock_handle;
- ll_intent_drop_lock(it);
- }
-
- spin_lock(&lli->lli_sa_lock);
- if (rc) {
- if (__sa_make_ready(sai, entry, rc))
- wake_up(&sai->sai_waitq);
- } else {
- int first = 0;
- entry->se_minfo = minfo;
- entry->se_req = ptlrpc_request_addref(req);
- /*
- * Release the async ibits lock ASAP to avoid deadlock
- * when statahead thread tries to enqueue lock on parent
- * for readpage and other tries to enqueue lock on child
- * with parent's lock held, for example: unlink.
- */
- entry->se_handle = handle;
- if (!sa_has_callback(sai))
- first = 1;
-
- list_add_tail(&entry->se_list, &sai->sai_interim_entries);
-
- if (first && sai->sai_task)
- wake_up_process(sai->sai_task);
- }
- sai->sai_replied++;
-
- spin_unlock(&lli->lli_sa_lock);
-
- return rc;
-}
-
-/* finish async stat RPC arguments */
-static void sa_fini_data(struct md_enqueue_info *minfo)
-{
- iput(minfo->mi_dir);
- kfree(minfo);
-}
-
-/**
- * prepare arguments for async stat RPC.
- */
-static struct md_enqueue_info *
-sa_prep_data(struct inode *dir, struct inode *child, struct sa_entry *entry)
-{
- struct md_enqueue_info *minfo;
- struct ldlm_enqueue_info *einfo;
- struct md_op_data *op_data;
-
- minfo = kzalloc(sizeof(*minfo), GFP_NOFS);
- if (!minfo)
- return ERR_PTR(-ENOMEM);
-
- op_data = ll_prep_md_op_data(&minfo->mi_data, dir, child, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data)) {
- kfree(minfo);
- return (struct md_enqueue_info *)op_data;
- }
-
- if (!child)
- op_data->op_fid2 = entry->se_fid;
-
- minfo->mi_it.it_op = IT_GETATTR;
- minfo->mi_dir = igrab(dir);
- minfo->mi_cb = ll_statahead_interpret;
- minfo->mi_cbdata = entry;
-
- einfo = &minfo->mi_einfo;
- einfo->ei_type = LDLM_IBITS;
- einfo->ei_mode = it_to_lock_mode(&minfo->mi_it);
- einfo->ei_cb_bl = ll_md_blocking_ast;
- einfo->ei_cb_cp = ldlm_completion_ast;
- einfo->ei_cb_gl = NULL;
- einfo->ei_cbdata = NULL;
-
- return minfo;
-}
-
-/* async stat for file not found in dcache */
-static int sa_lookup(struct inode *dir, struct sa_entry *entry)
-{
- struct md_enqueue_info *minfo;
- int rc;
-
- minfo = sa_prep_data(dir, NULL, entry);
- if (IS_ERR(minfo))
- return PTR_ERR(minfo);
-
- rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo);
- if (rc)
- sa_fini_data(minfo);
-
- return rc;
-}
-
-/**
- * async stat for file found in dcache, similar to .revalidate
- *
- * \retval 1 dentry valid, no RPC sent
- * \retval 0 dentry invalid, will send async stat RPC
- * \retval negative number upon error
- */
-static int sa_revalidate(struct inode *dir, struct sa_entry *entry,
- struct dentry *dentry)
-{
- struct inode *inode = d_inode(dentry);
- struct lookup_intent it = { .it_op = IT_GETATTR,
- .it_lock_handle = 0 };
- struct md_enqueue_info *minfo;
- int rc;
-
- if (unlikely(!inode))
- return 1;
-
- if (d_mountpoint(dentry))
- return 1;
-
- entry->se_inode = igrab(inode);
- rc = md_revalidate_lock(ll_i2mdexp(dir), &it, ll_inode2fid(inode),
- NULL);
- if (rc == 1) {
- entry->se_handle = it.it_lock_handle;
- ll_intent_release(&it);
- return 1;
- }
-
- minfo = sa_prep_data(dir, inode, entry);
- if (IS_ERR(minfo)) {
- entry->se_inode = NULL;
- iput(inode);
- return PTR_ERR(minfo);
- }
-
- rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo);
- if (rc) {
- entry->se_inode = NULL;
- iput(inode);
- sa_fini_data(minfo);
- }
-
- return rc;
-}
-
-/* async stat for file with @name */
-static void sa_statahead(struct dentry *parent, const char *name, int len,
- const struct lu_fid *fid)
-{
- struct inode *dir = d_inode(parent);
- struct ll_inode_info *lli = ll_i2info(dir);
- struct ll_statahead_info *sai = lli->lli_sai;
- struct dentry *dentry = NULL;
- struct sa_entry *entry;
- int rc;
-
- entry = sa_alloc(parent, sai, sai->sai_index, name, len, fid);
- if (IS_ERR(entry))
- return;
-
- dentry = d_lookup(parent, &entry->se_qstr);
- if (!dentry) {
- rc = sa_lookup(dir, entry);
- } else {
- rc = sa_revalidate(dir, entry, dentry);
- if (rc == 1 && agl_should_run(sai, d_inode(dentry)))
- ll_agl_add(sai, d_inode(dentry), entry->se_index);
- }
-
- if (dentry)
- dput(dentry);
-
- if (rc)
- sa_make_ready(sai, entry, rc);
- else
- sai->sai_sent++;
-
- sai->sai_index++;
-}
-
-/* async glimpse (agl) thread main function */
-static int ll_agl_thread(void *arg)
-{
- struct dentry *parent = arg;
- struct inode *dir = d_inode(parent);
- struct ll_inode_info *plli = ll_i2info(dir);
- struct ll_inode_info *clli;
- /* We already own this reference, so it is safe to take it without a lock. */
- struct ll_statahead_info *sai = plli->lli_sai;
-
- CDEBUG(D_READA, "agl thread started: sai %p, parent %pd\n",
- sai, parent);
-
- while (!kthread_should_stop()) {
-
- spin_lock(&plli->lli_agl_lock);
- /* The statahead thread maybe help to process AGL entries,
- * so check whether list empty again.
- */
- if (!list_empty(&sai->sai_agls)) {
- clli = list_entry(sai->sai_agls.next,
- struct ll_inode_info, lli_agl_list);
- list_del_init(&clli->lli_agl_list);
- spin_unlock(&plli->lli_agl_lock);
- ll_agl_trigger(&clli->lli_vfs_inode, sai);
- } else {
- spin_unlock(&plli->lli_agl_lock);
- }
-
- set_current_state(TASK_IDLE);
- if (list_empty(&sai->sai_agls) &&
- !kthread_should_stop())
- schedule();
- __set_current_state(TASK_RUNNING);
- }
-
- spin_lock(&plli->lli_agl_lock);
- sai->sai_agl_valid = 0;
- while (!list_empty(&sai->sai_agls)) {
- clli = list_entry(sai->sai_agls.next,
- struct ll_inode_info, lli_agl_list);
- list_del_init(&clli->lli_agl_list);
- spin_unlock(&plli->lli_agl_lock);
- clli->lli_agl_index = 0;
- iput(&clli->lli_vfs_inode);
- spin_lock(&plli->lli_agl_lock);
- }
- spin_unlock(&plli->lli_agl_lock);
- CDEBUG(D_READA, "agl thread stopped: sai %p, parent %pd\n",
- sai, parent);
- ll_sai_put(sai);
- return 0;
-}
-
-/* start agl thread */
-static void ll_start_agl(struct dentry *parent, struct ll_statahead_info *sai)
-{
- struct ll_inode_info *plli;
- struct task_struct *task;
-
- CDEBUG(D_READA, "start agl thread: sai %p, parent %pd\n",
- sai, parent);
-
- plli = ll_i2info(d_inode(parent));
- task = kthread_create(ll_agl_thread, parent, "ll_agl_%u",
- plli->lli_opendir_pid);
- if (IS_ERR(task)) {
- CERROR("can't start ll_agl thread, rc: %ld\n", PTR_ERR(task));
- return;
- }
-
- sai->sai_agl_task = task;
- atomic_inc(&ll_i2sbi(d_inode(parent))->ll_agl_total);
- spin_lock(&plli->lli_agl_lock);
- sai->sai_agl_valid = 1;
- spin_unlock(&plli->lli_agl_lock);
- /* Get an extra reference that the thread holds */
- ll_sai_get(d_inode(parent));
-
- wake_up_process(task);
-}
-
-/* statahead thread main function */
-static int ll_statahead_thread(void *arg)
-{
- struct dentry *parent = arg;
- struct inode *dir = d_inode(parent);
- struct ll_inode_info *lli = ll_i2info(dir);
- struct ll_sb_info *sbi = ll_i2sbi(dir);
- struct ll_statahead_info *sai = lli->lli_sai;
- struct page *page = NULL;
- __u64 pos = 0;
- int first = 0;
- int rc = 0;
- struct md_op_data *op_data;
-
- CDEBUG(D_READA, "statahead thread starting: sai %p, parent %pd\n",
- sai, parent);
-
- op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0,
- LUSTRE_OPC_ANY, dir);
- if (IS_ERR(op_data)) {
- rc = PTR_ERR(op_data);
- goto out;
- }
-
- op_data->op_max_pages = ll_i2sbi(dir)->ll_md_brw_pages;
-
- while (pos != MDS_DIR_END_OFF && sai->sai_task) {
- struct lu_dirpage *dp;
- struct lu_dirent *ent;
-
- sai->sai_in_readpage = 1;
- page = ll_get_dir_page(dir, op_data, pos);
- sai->sai_in_readpage = 0;
- if (IS_ERR(page)) {
- rc = PTR_ERR(page);
- CDEBUG(D_READA, "error reading dir " DFID " at %llu/%llu: opendir_pid = %u: rc = %d\n",
- PFID(ll_inode2fid(dir)), pos, sai->sai_index,
- lli->lli_opendir_pid, rc);
- break;
- }
-
- dp = page_address(page);
- for (ent = lu_dirent_start(dp);
- ent && sai->sai_task && !sa_low_hit(sai);
- ent = lu_dirent_next(ent)) {
- struct lu_fid fid;
- __u64 hash;
- int namelen;
- char *name;
-
- hash = le64_to_cpu(ent->lde_hash);
- if (unlikely(hash < pos))
- /*
- * Skip until we find target hash value.
- */
- continue;
-
- namelen = le16_to_cpu(ent->lde_namelen);
- if (unlikely(namelen == 0))
- /*
- * Skip dummy record.
- */
- continue;
-
- name = ent->lde_name;
- if (name[0] == '.') {
- if (namelen == 1) {
- /*
- * skip "."
- */
- continue;
- } else if (name[1] == '.' && namelen == 2) {
- /*
- * skip ".."
- */
- continue;
- } else if (!sai->sai_ls_all) {
- /*
- * skip hidden files.
- */
- sai->sai_skip_hidden++;
- continue;
- }
- }
-
- /*
- * don't stat-ahead first entry.
- */
- if (unlikely(++first == 1))
- continue;
-
- fid_le_to_cpu(&fid, &ent->lde_fid);
-
- do {
- sa_handle_callback(sai);
-
- spin_lock(&lli->lli_agl_lock);
- while (sa_sent_full(sai) &&
- !agl_list_empty(sai)) {
- struct ll_inode_info *clli;
-
- clli = list_entry(sai->sai_agls.next,
- struct ll_inode_info,
- lli_agl_list);
- list_del_init(&clli->lli_agl_list);
- spin_unlock(&lli->lli_agl_lock);
-
- ll_agl_trigger(&clli->lli_vfs_inode,
- sai);
-
- spin_lock(&lli->lli_agl_lock);
- }
- spin_unlock(&lli->lli_agl_lock);
-
- set_current_state(TASK_IDLE);
- if (sa_sent_full(sai) &&
- !sa_has_callback(sai) &&
- agl_list_empty(sai) &&
- sai->sai_task)
- /* wait for spare statahead window */
- schedule();
- __set_current_state(TASK_RUNNING);
- } while (sa_sent_full(sai) && sai->sai_task);
-
- sa_statahead(parent, name, namelen, &fid);
- }
-
- pos = le64_to_cpu(dp->ldp_hash_end);
- ll_release_page(dir, page,
- le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
-
- if (sa_low_hit(sai)) {
- rc = -EFAULT;
- atomic_inc(&sbi->ll_sa_wrong);
- CDEBUG(D_READA, "Statahead for dir " DFID " hit ratio too low: hit/miss %llu/%llu, sent/replied %llu/%llu, stopping statahead thread: pid %d\n",
- PFID(&lli->lli_fid), sai->sai_hit,
- sai->sai_miss, sai->sai_sent,
- sai->sai_replied, current->pid);
- break;
- }
- }
- ll_finish_md_op_data(op_data);
-
- if (rc < 0) {
- spin_lock(&lli->lli_sa_lock);
- sai->sai_task = NULL;
- lli->lli_sa_enabled = 0;
- spin_unlock(&lli->lli_sa_lock);
- }
-
- /*
- * statahead is finished, but statahead entries need to be cached, wait
- * for file release to stop me.
- */
- while (sai->sai_task) {
- sa_handle_callback(sai);
-
- set_current_state(TASK_IDLE);
- if (!sa_has_callback(sai) &&
- sai->sai_task)
- schedule();
- __set_current_state(TASK_RUNNING);
- }
-out:
- if (sai->sai_agl_task) {
- kthread_stop(sai->sai_agl_task);
-
- CDEBUG(D_READA, "stop agl thread: sai %p pid %u\n",
- sai, (unsigned int)sai->sai_agl_task->pid);
- sai->sai_agl_task = NULL;
- }
- /*
- * wait for inflight statahead RPCs to finish, and then we can free sai
- * safely because statahead RPC will access sai data
- */
- while (sai->sai_sent != sai->sai_replied) {
- /* in case we're not woken up, timeout wait */
- schedule_timeout_idle(HZ>>3);
- }
-
- /* release resources held by statahead RPCs */
- sa_handle_callback(sai);
-
- CDEBUG(D_READA, "statahead thread stopped: sai %p, parent %pd\n",
- sai, parent);
-
- spin_lock(&lli->lli_sa_lock);
- sai->sai_task = NULL;
- spin_unlock(&lli->lli_sa_lock);
-
- wake_up(&sai->sai_waitq);
- ll_sai_put(sai);
-
- do_exit(rc);
-}
-
-/* authorize opened dir handle @key to statahead */
-void ll_authorize_statahead(struct inode *dir, void *key)
-{
- struct ll_inode_info *lli = ll_i2info(dir);
-
- spin_lock(&lli->lli_sa_lock);
- if (!lli->lli_opendir_key && !lli->lli_sai) {
- /*
- * if lli_sai is not NULL, it means previous statahead is not
- * finished yet, we'd better not start a new statahead for now.
- */
- LASSERT(!lli->lli_opendir_pid);
- lli->lli_opendir_key = key;
- lli->lli_opendir_pid = current->pid;
- lli->lli_sa_enabled = 1;
- }
- spin_unlock(&lli->lli_sa_lock);
-}
-
-/*
- * deauthorize opened dir handle @key to statahead, but statahead thread may
- * still be running, notify it to quit.
- */
-void ll_deauthorize_statahead(struct inode *dir, void *key)
-{
- struct ll_inode_info *lli = ll_i2info(dir);
- struct ll_statahead_info *sai;
-
- LASSERT(lli->lli_opendir_key == key);
- LASSERT(lli->lli_opendir_pid);
-
- CDEBUG(D_READA, "deauthorize statahead for " DFID "\n",
- PFID(&lli->lli_fid));
-
- spin_lock(&lli->lli_sa_lock);
- lli->lli_opendir_key = NULL;
- lli->lli_opendir_pid = 0;
- lli->lli_sa_enabled = 0;
- sai = lli->lli_sai;
- if (sai && sai->sai_task) {
- /*
- * statahead thread may not quit yet because it needs to cache
- * entries, now it's time to tell it to quit.
- */
- wake_up_process(sai->sai_task);
- sai->sai_task = NULL;
- }
- spin_unlock(&lli->lli_sa_lock);
-}
-
-enum {
- /**
- * not first dirent, or is "."
- */
- LS_NOT_FIRST_DE = 0,
- /**
- * the first non-hidden dirent
- */
- LS_FIRST_DE,
- /**
- * the first hidden dirent, that is "."
- */
- LS_FIRST_DOT_DE
-};
-
-/* file is first dirent under @dir */
-static int is_first_dirent(struct inode *dir, struct dentry *dentry)
-{
- const struct qstr *target = &dentry->d_name;
- struct md_op_data *op_data;
- struct page *page;
- __u64 pos = 0;
- int dot_de;
- int rc = LS_NOT_FIRST_DE;
-
- op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0,
- LUSTRE_OPC_ANY, dir);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
- /**
- * FIXME choose the start offset of the readdir
- */
- op_data->op_max_pages = ll_i2sbi(dir)->ll_md_brw_pages;
-
- page = ll_get_dir_page(dir, op_data, pos);
-
- while (1) {
- struct lu_dirpage *dp;
- struct lu_dirent *ent;
-
- if (IS_ERR(page)) {
- struct ll_inode_info *lli = ll_i2info(dir);
-
- rc = PTR_ERR(page);
- CERROR("%s: error reading dir " DFID " at %llu: opendir_pid = %u : rc = %d\n",
- ll_get_fsname(dir->i_sb, NULL, 0),
- PFID(ll_inode2fid(dir)), pos,
- lli->lli_opendir_pid, rc);
- break;
- }
-
- dp = page_address(page);
- for (ent = lu_dirent_start(dp); ent;
- ent = lu_dirent_next(ent)) {
- __u64 hash;
- int namelen;
- char *name;
-
- hash = le64_to_cpu(ent->lde_hash);
- /* The ll_get_dir_page() can return any page containing
- * the given hash which may be not the start hash.
- */
- if (unlikely(hash < pos))
- continue;
-
- namelen = le16_to_cpu(ent->lde_namelen);
- if (unlikely(namelen == 0))
- /*
- * skip dummy record.
- */
- continue;
-
- name = ent->lde_name;
- if (name[0] == '.') {
- if (namelen == 1)
- /*
- * skip "."
- */
- continue;
- else if (name[1] == '.' && namelen == 2)
- /*
- * skip ".."
- */
- continue;
- else
- dot_de = 1;
- } else {
- dot_de = 0;
- }
-
- if (dot_de && target->name[0] != '.') {
- CDEBUG(D_READA, "%.*s skip hidden file %.*s\n",
- target->len, target->name,
- namelen, name);
- continue;
- }
-
- if (target->len != namelen ||
- memcmp(target->name, name, namelen) != 0)
- rc = LS_NOT_FIRST_DE;
- else if (!dot_de)
- rc = LS_FIRST_DE;
- else
- rc = LS_FIRST_DOT_DE;
-
- ll_release_page(dir, page, false);
- goto out;
- }
- pos = le64_to_cpu(dp->ldp_hash_end);
- if (pos == MDS_DIR_END_OFF) {
- /*
- * End of directory reached.
- */
- ll_release_page(dir, page, false);
- goto out;
- } else {
- /*
- * chain is exhausted
- * Normal case: continue to the next page.
- */
- ll_release_page(dir, page,
- le32_to_cpu(dp->ldp_flags) &
- LDF_COLLIDE);
- page = ll_get_dir_page(dir, op_data, pos);
- }
- }
-out:
- ll_finish_md_op_data(op_data);
- return rc;
-}
-
-/**
- * revalidate @dentryp from statahead cache
- *
- * \param[in] dir parent directory
- * \param[in] sai sai structure
- * \param[out] dentryp pointer to dentry which will be revalidated
- * \param[in] unplug unplug statahead window only (normally for negative
- * dentry)
- * \retval 1 on success, dentry is saved in @dentryp
- * \retval 0 if revalidation failed (no proper lock on client)
- * \retval negative number upon error
- */
-static int revalidate_statahead_dentry(struct inode *dir,
- struct ll_statahead_info *sai,
- struct dentry **dentryp,
- bool unplug)
-{
- struct ll_inode_info *lli = ll_i2info(dir);
- struct sa_entry *entry = NULL;
- struct ll_dentry_data *ldd;
- int rc = 0;
-
- if ((*dentryp)->d_name.name[0] == '.') {
- if (sai->sai_ls_all ||
- sai->sai_miss_hidden >= sai->sai_skip_hidden) {
- /*
- * Hidden dentry is the first one, or statahead
- * thread does not skip so many hidden dentries
- * before "sai_ls_all" enabled as below.
- */
- } else {
- if (!sai->sai_ls_all)
- /*
- * It maybe because hidden dentry is not
- * the first one, "sai_ls_all" was not
- * set, then "ls -al" missed. Enable
- * "sai_ls_all" for such case.
- */
- sai->sai_ls_all = 1;
-
- /*
- * Such "getattr" has been skipped before
- * "sai_ls_all" enabled as above.
- */
- sai->sai_miss_hidden++;
- return -EAGAIN;
- }
- }
-
- if (unplug) {
- rc = 1;
- goto out_unplug;
- }
-
- entry = sa_get(sai, &(*dentryp)->d_name);
- if (!entry) {
- rc = -EAGAIN;
- goto out_unplug;
- }
-
- /* if statahead is busy in readdir, help it do post-work */
- if (!sa_ready(entry) && sai->sai_in_readpage)
- sa_handle_callback(sai);
-
- if (!sa_ready(entry)) {
- spin_lock(&lli->lli_sa_lock);
- sai->sai_index_wait = entry->se_index;
- spin_unlock(&lli->lli_sa_lock);
- if (0 == wait_event_idle_timeout(sai->sai_waitq,
- sa_ready(entry), 30 * HZ)) {
- /*
- * entry may not be ready, so it may be used by inflight
- * statahead RPC, don't free it.
- */
- entry = NULL;
- rc = -EAGAIN;
- goto out_unplug;
- }
- }
-
- if (entry->se_state == SA_ENTRY_SUCC && entry->se_inode) {
- struct inode *inode = entry->se_inode;
- struct lookup_intent it = { .it_op = IT_GETATTR,
- .it_lock_handle = entry->se_handle };
- __u64 bits;
-
- rc = md_revalidate_lock(ll_i2mdexp(dir), &it,
- ll_inode2fid(inode), &bits);
- if (rc == 1) {
- if (!(*dentryp)->d_inode) {
- struct dentry *alias;
-
- alias = ll_splice_alias(inode, *dentryp);
- if (IS_ERR(alias)) {
- ll_intent_release(&it);
- rc = PTR_ERR(alias);
- goto out_unplug;
- }
- *dentryp = alias;
- /**
- * statahead prepared this inode, transfer inode
- * refcount from sa_entry to dentry
- */
- entry->se_inode = NULL;
- } else if ((*dentryp)->d_inode != inode) {
- /* revalidate, but inode is recreated */
- CDEBUG(D_READA,
- "%s: stale dentry %pd inode " DFID ", statahead inode " DFID "\n",
- ll_get_fsname((*dentryp)->d_inode->i_sb,
- NULL, 0),
- *dentryp,
- PFID(ll_inode2fid((*dentryp)->d_inode)),
- PFID(ll_inode2fid(inode)));
- ll_intent_release(&it);
- rc = -ESTALE;
- goto out_unplug;
- }
-
- if ((bits & MDS_INODELOCK_LOOKUP) &&
- d_lustre_invalid(*dentryp))
- d_lustre_revalidate(*dentryp);
- ll_intent_release(&it);
- }
- }
-out_unplug:
- /*
- * statahead cached sa_entry can be used only once, and will be killed
- * right after use, so if lookup/revalidate accessed statahead cache,
- * set dentry ldd_sa_generation to parent lli_sa_generation, later if we
- * stat this file again, we know we've done statahead before, see
- * dentry_may_statahead().
- */
- ldd = ll_d2d(*dentryp);
- ldd->lld_sa_generation = lli->lli_sa_generation;
- sa_put(sai, entry, lli);
- return rc;
-}
-
-/**
- * start statahead thread
- *
- * \param[in] dir parent directory
- * \param[in] dentry dentry that triggers statahead, normally the first
- * dirent under @dir
- * \retval -EAGAIN on success, because when this function is
- * called, it's already in lookup call, so client should
- * do it itself instead of waiting for statahead thread
- * to do it asynchronously.
- * \retval negative number upon error
- */
-static int start_statahead_thread(struct inode *dir, struct dentry *dentry)
-{
- struct ll_inode_info *lli = ll_i2info(dir);
- struct ll_statahead_info *sai = NULL;
- struct task_struct *task;
- struct dentry *parent = dentry->d_parent;
- int rc;
-
- /* I am the "lli_opendir_pid" owner, only me can set "lli_sai". */
- rc = is_first_dirent(dir, dentry);
- if (rc == LS_NOT_FIRST_DE) {
- /* It is not "ls -{a}l" operation, no need statahead for it. */
- rc = -EFAULT;
- goto out;
- }
-
- sai = ll_sai_alloc(parent);
- if (!sai) {
- rc = -ENOMEM;
- goto out;
- }
-
- sai->sai_ls_all = (rc == LS_FIRST_DOT_DE);
- /*
- * if current lli_opendir_key was deauthorized, or dir re-opened by
- * another process, don't start statahead, otherwise the newly spawned
- * statahead thread won't be notified to quit.
- */
- spin_lock(&lli->lli_sa_lock);
- if (unlikely(lli->lli_sai || lli->lli_opendir_key ||
- lli->lli_opendir_pid != current->pid)) {
- spin_unlock(&lli->lli_sa_lock);
- rc = -EPERM;
- goto out;
- }
- lli->lli_sai = sai;
- spin_unlock(&lli->lli_sa_lock);
-
- atomic_inc(&ll_i2sbi(parent->d_inode)->ll_sa_running);
-
- CDEBUG(D_READA, "start statahead thread: [pid %d] [parent %pd]\n",
- current->pid, parent);
-
- task = kthread_create(ll_statahead_thread, parent, "ll_sa_%u",
- lli->lli_opendir_pid);
- if (IS_ERR(task)) {
- rc = PTR_ERR(task);
- CERROR("can't start ll_sa thread, rc : %d\n", rc);
- goto out;
- }
-
- if (ll_i2sbi(parent->d_inode)->ll_flags & LL_SBI_AGL_ENABLED)
- ll_start_agl(parent, sai);
-
- atomic_inc(&ll_i2sbi(parent->d_inode)->ll_sa_total);
- sai->sai_task = task;
-
- wake_up_process(task);
-
- /*
- * We don't stat-ahead for the first dirent since we are already in
- * lookup.
- */
- return -EAGAIN;
-
-out:
- /*
- * once we start statahead thread failed, disable statahead so
- * that subsequent stat won't waste time to try it.
- */
- spin_lock(&lli->lli_sa_lock);
- lli->lli_sa_enabled = 0;
- lli->lli_sai = NULL;
- spin_unlock(&lli->lli_sa_lock);
- if (sai)
- ll_sai_free(sai);
- return rc;
-}
-
-/**
- * statahead entry function, this is called when client getattr on a file, it
- * will start statahead thread if this is the first dir entry, else revalidate
- * dentry from statahead cache.
- *
- * \param[in] dir parent directory
- * \param[out] dentryp dentry to getattr
- * \param[in] unplug unplug statahead window only (normally for negative
- * dentry)
- * \retval 1 on success
- * \retval 0 revalidation from statahead cache failed, caller needs
- * to getattr from server directly
- * \retval negative number on error, caller often ignores this and
- * then getattr from server
- */
-int ll_statahead(struct inode *dir, struct dentry **dentryp, bool unplug)
-{
- struct ll_statahead_info *sai;
-
- sai = ll_sai_get(dir);
- if (sai) {
- int rc;
-
- rc = revalidate_statahead_dentry(dir, sai, dentryp, unplug);
- CDEBUG(D_READA, "revalidate statahead %pd: %d.\n",
- *dentryp, rc);
- ll_sai_put(sai);
- return rc;
- }
- return start_statahead_thread(dir, *dentryp);
-}
diff --git a/drivers/staging/lustre/lustre/llite/super25.c b/drivers/staging/lustre/lustre/llite/super25.c
deleted file mode 100644
index d335f29556c2..000000000000
--- a/drivers/staging/lustre/lustre/llite/super25.c
+++ /dev/null
@@ -1,189 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <lustre_ha.h>
-#include <lustre_dlm.h>
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <lprocfs_status.h>
-#include "llite_internal.h"
-
-static struct kmem_cache *ll_inode_cachep;
-
-static struct inode *ll_alloc_inode(struct super_block *sb)
-{
- struct ll_inode_info *lli;
-
- ll_stats_ops_tally(ll_s2sbi(sb), LPROC_LL_ALLOC_INODE, 1);
- lli = kmem_cache_zalloc(ll_inode_cachep, GFP_NOFS);
- if (!lli)
- return NULL;
-
- inode_init_once(&lli->lli_vfs_inode);
- return &lli->lli_vfs_inode;
-}
-
-static void ll_inode_destroy_callback(struct rcu_head *head)
-{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- struct ll_inode_info *ptr = ll_i2info(inode);
-
- kmem_cache_free(ll_inode_cachep, ptr);
-}
-
-static void ll_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, ll_inode_destroy_callback);
-}
-
-/* exported operations */
-struct super_operations lustre_super_operations = {
- .alloc_inode = ll_alloc_inode,
- .destroy_inode = ll_destroy_inode,
- .evict_inode = ll_delete_inode,
- .put_super = ll_put_super,
- .statfs = ll_statfs,
- .umount_begin = ll_umount_begin,
- .remount_fs = ll_remount_fs,
- .show_options = ll_show_options,
-};
-MODULE_ALIAS_FS("lustre");
-
-static int __init lustre_init(void)
-{
- int rc;
-
- BUILD_BUG_ON(sizeof(LUSTRE_VOLATILE_HDR) !=
- LUSTRE_VOLATILE_HDR_LEN + 1);
-
- rc = libcfs_setup();
- if (rc)
- return rc;
-
- /* print an address of _any_ initialized kernel symbol from this
- * module, to allow debugging with gdb that doesn't support data
- * symbols from modules.
- */
- CDEBUG(D_INFO, "Lustre client module (%p).\n",
- &lustre_super_operations);
-
- rc = -ENOMEM;
- ll_inode_cachep = kmem_cache_create("lustre_inode_cache",
- sizeof(struct ll_inode_info), 0,
- SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
- NULL);
- if (!ll_inode_cachep)
- goto out_cache;
-
- ll_file_data_slab = kmem_cache_create("ll_file_data",
- sizeof(struct ll_file_data), 0,
- SLAB_HWCACHE_ALIGN, NULL);
- if (!ll_file_data_slab)
- goto out_cache;
-
- llite_root = debugfs_create_dir("llite", debugfs_lustre_root);
- if (IS_ERR_OR_NULL(llite_root)) {
- rc = llite_root ? PTR_ERR(llite_root) : -ENOMEM;
- llite_root = NULL;
- goto out_cache;
- }
-
- llite_kset = kset_create_and_add("llite", NULL, lustre_kobj);
- if (!llite_kset) {
- rc = -ENOMEM;
- goto out_debugfs;
- }
-
- rc = vvp_global_init();
- if (rc != 0)
- goto out_sysfs;
-
- cl_inode_fini_env = cl_env_alloc(&cl_inode_fini_refcheck,
- LCT_REMEMBER | LCT_NOREF);
- if (IS_ERR(cl_inode_fini_env)) {
- rc = PTR_ERR(cl_inode_fini_env);
- goto out_vvp;
- }
-
- cl_inode_fini_env->le_ctx.lc_cookie = 0x4;
-
- rc = ll_xattr_init();
- if (rc != 0)
- goto out_inode_fini_env;
-
- lustre_register_super_ops(THIS_MODULE, ll_fill_super, ll_kill_super);
- lustre_register_client_process_config(ll_process_config);
-
- return 0;
-
-out_inode_fini_env:
- cl_env_put(cl_inode_fini_env, &cl_inode_fini_refcheck);
-out_vvp:
- vvp_global_fini();
-out_sysfs:
- kset_unregister(llite_kset);
-out_debugfs:
- debugfs_remove(llite_root);
-out_cache:
- kmem_cache_destroy(ll_inode_cachep);
- kmem_cache_destroy(ll_file_data_slab);
- return rc;
-}
-
-static void __exit lustre_exit(void)
-{
- lustre_register_super_ops(NULL, NULL, NULL);
- lustre_register_client_process_config(NULL);
-
- debugfs_remove(llite_root);
- kset_unregister(llite_kset);
-
- ll_xattr_fini();
- cl_env_put(cl_inode_fini_env, &cl_inode_fini_refcheck);
- vvp_global_fini();
-
- kmem_cache_destroy(ll_inode_cachep);
- kmem_cache_destroy(ll_file_data_slab);
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre Client File System");
-MODULE_VERSION(LUSTRE_VERSION_STRING);
-MODULE_LICENSE("GPL");
-
-module_init(lustre_init);
-module_exit(lustre_exit);
diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c
deleted file mode 100644
index 0690fdbf49f5..000000000000
--- a/drivers/staging/lustre/lustre/llite/symlink.c
+++ /dev/null
@@ -1,159 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/stat.h>
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include "llite_internal.h"
-
-static int ll_readlink_internal(struct inode *inode,
- struct ptlrpc_request **request, char **symname)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- int rc, symlen = i_size_read(inode) + 1;
- struct mdt_body *body;
- struct md_op_data *op_data;
-
- *request = NULL;
-
- if (lli->lli_symlink_name) {
- int print_limit = min_t(int, PAGE_SIZE - 128, symlen);
-
- *symname = lli->lli_symlink_name;
- /* If the total CDEBUG() size is larger than a page, it
- * will print a warning to the console, avoid this by
- * printing just the last part of the symlink.
- */
- CDEBUG(D_INODE, "using cached symlink %s%.*s, len = %d\n",
- print_limit < symlen ? "..." : "", print_limit,
- (*symname) + symlen - print_limit, symlen);
- return 0;
- }
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, symlen,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- op_data->op_valid = OBD_MD_LINKNAME;
- rc = md_getattr(sbi->ll_md_exp, op_data, request);
- ll_finish_md_op_data(op_data);
- if (rc) {
- if (rc != -ENOENT)
- CERROR("%s: inode " DFID ": rc = %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(ll_inode2fid(inode)), rc);
- goto failed;
- }
-
- body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY);
- if ((body->mbo_valid & OBD_MD_LINKNAME) == 0) {
- CERROR("OBD_MD_LINKNAME not set on reply\n");
- rc = -EPROTO;
- goto failed;
- }
-
- LASSERT(symlen != 0);
- if (body->mbo_eadatasize != symlen) {
- CERROR("%s: inode " DFID ": symlink length %d not expected %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(ll_inode2fid(inode)), body->mbo_eadatasize - 1,
- symlen - 1);
- rc = -EPROTO;
- goto failed;
- }
-
- *symname = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_MD);
- if (!*symname ||
- strnlen(*symname, symlen) != symlen - 1) {
- /* not full/NULL terminated */
- CERROR("inode %lu: symlink not NULL terminated string of length %d\n",
- inode->i_ino, symlen - 1);
- rc = -EPROTO;
- goto failed;
- }
-
- lli->lli_symlink_name = kzalloc(symlen, GFP_NOFS);
- /* do not return an error if we cannot cache the symlink locally */
- if (lli->lli_symlink_name) {
- memcpy(lli->lli_symlink_name, *symname, symlen);
- *symname = lli->lli_symlink_name;
- }
- return 0;
-
-failed:
- return rc;
-}
-
-static void ll_put_link(void *p)
-{
- ptlrpc_req_finished(p);
-}
-
-static const char *ll_get_link(struct dentry *dentry,
- struct inode *inode,
- struct delayed_call *done)
-{
- struct ptlrpc_request *request = NULL;
- int rc;
- char *symname = NULL;
-
- if (!dentry)
- return ERR_PTR(-ECHILD);
-
- CDEBUG(D_VFSTRACE, "VFS Op\n");
- ll_inode_size_lock(inode);
- rc = ll_readlink_internal(inode, &request, &symname);
- ll_inode_size_unlock(inode);
- if (rc) {
- ptlrpc_req_finished(request);
- return ERR_PTR(rc);
- }
-
- /* symname may contain a pointer to the request message buffer,
- * we delay request releasing then.
- */
- set_delayed_call(done, ll_put_link, request);
- return symname;
-}
-
-const struct inode_operations ll_fast_symlink_inode_operations = {
- .setattr = ll_setattr,
- .get_link = ll_get_link,
- .getattr = ll_getattr,
- .permission = ll_inode_permission,
- .listxattr = ll_listxattr,
-};
diff --git a/drivers/staging/lustre/lustre/llite/vvp_dev.c b/drivers/staging/lustre/lustre/llite/vvp_dev.c
deleted file mode 100644
index 31dc3c0ade01..000000000000
--- a/drivers/staging/lustre/lustre/llite/vvp_dev.c
+++ /dev/null
@@ -1,640 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * cl_device and cl_device_type implementation for VVP layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Jinshan Xiong <jinshan.xiong@intel.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/libcfs/libcfs_hash.h>
-#include <obd.h>
-#include "llite_internal.h"
-#include "vvp_internal.h"
-
-/*****************************************************************************
- *
- * Vvp device and device type functions.
- *
- */
-
-/*
- * vvp_ prefix stands for "Vfs Vm Posix". It corresponds to historical
- * "llite_" (var. "ll_") prefix.
- */
-
-static struct kmem_cache *ll_thread_kmem;
-struct kmem_cache *vvp_lock_kmem;
-struct kmem_cache *vvp_object_kmem;
-static struct kmem_cache *vvp_session_kmem;
-static struct kmem_cache *vvp_thread_kmem;
-
-static struct lu_kmem_descr vvp_caches[] = {
- {
- .ckd_cache = &ll_thread_kmem,
- .ckd_name = "ll_thread_kmem",
- .ckd_size = sizeof(struct ll_thread_info),
- },
- {
- .ckd_cache = &vvp_lock_kmem,
- .ckd_name = "vvp_lock_kmem",
- .ckd_size = sizeof(struct vvp_lock),
- },
- {
- .ckd_cache = &vvp_object_kmem,
- .ckd_name = "vvp_object_kmem",
- .ckd_size = sizeof(struct vvp_object),
- },
- {
- .ckd_cache = &vvp_session_kmem,
- .ckd_name = "vvp_session_kmem",
- .ckd_size = sizeof(struct vvp_session)
- },
- {
- .ckd_cache = &vvp_thread_kmem,
- .ckd_name = "vvp_thread_kmem",
- .ckd_size = sizeof(struct vvp_thread_info),
- },
- {
- .ckd_cache = NULL
- }
-};
-
-static void *ll_thread_key_init(const struct lu_context *ctx,
- struct lu_context_key *key)
-{
- struct vvp_thread_info *info;
-
- info = kmem_cache_zalloc(ll_thread_kmem, GFP_NOFS);
- if (!info)
- info = ERR_PTR(-ENOMEM);
- return info;
-}
-
-static void ll_thread_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
-{
- struct vvp_thread_info *info = data;
-
- kmem_cache_free(ll_thread_kmem, info);
-}
-
-struct lu_context_key ll_thread_key = {
- .lct_tags = LCT_CL_THREAD,
- .lct_init = ll_thread_key_init,
- .lct_fini = ll_thread_key_fini
-};
-
-static void *vvp_session_key_init(const struct lu_context *ctx,
- struct lu_context_key *key)
-{
- struct vvp_session *session;
-
- session = kmem_cache_zalloc(vvp_session_kmem, GFP_NOFS);
- if (!session)
- session = ERR_PTR(-ENOMEM);
- return session;
-}
-
-static void vvp_session_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
-{
- struct vvp_session *session = data;
-
- kmem_cache_free(vvp_session_kmem, session);
-}
-
-struct lu_context_key vvp_session_key = {
- .lct_tags = LCT_SESSION,
- .lct_init = vvp_session_key_init,
- .lct_fini = vvp_session_key_fini
-};
-
-static void *vvp_thread_key_init(const struct lu_context *ctx,
- struct lu_context_key *key)
-{
- struct vvp_thread_info *vti;
-
- vti = kmem_cache_zalloc(vvp_thread_kmem, GFP_NOFS);
- if (!vti)
- vti = ERR_PTR(-ENOMEM);
- return vti;
-}
-
-static void vvp_thread_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
-{
- struct vvp_thread_info *vti = data;
-
- kmem_cache_free(vvp_thread_kmem, vti);
-}
-
-struct lu_context_key vvp_thread_key = {
- .lct_tags = LCT_CL_THREAD,
- .lct_init = vvp_thread_key_init,
- .lct_fini = vvp_thread_key_fini
-};
-
-/* type constructor/destructor: vvp_type_{init,fini,start,stop}(). */
-LU_TYPE_INIT_FINI(vvp, &vvp_thread_key, &ll_thread_key, &vvp_session_key);
-
-static const struct lu_device_operations vvp_lu_ops = {
- .ldo_object_alloc = vvp_object_alloc
-};
-
-static struct lu_device *vvp_device_free(const struct lu_env *env,
- struct lu_device *d)
-{
- struct vvp_device *vdv = lu2vvp_dev(d);
- struct cl_site *site = lu2cl_site(d->ld_site);
- struct lu_device *next = cl2lu_dev(vdv->vdv_next);
-
- if (d->ld_site) {
- cl_site_fini(site);
- kfree(site);
- }
- cl_device_fini(lu2cl_dev(d));
- kfree(vdv);
- return next;
-}
-
-static struct lu_device *vvp_device_alloc(const struct lu_env *env,
- struct lu_device_type *t,
- struct lustre_cfg *cfg)
-{
- struct vvp_device *vdv;
- struct lu_device *lud;
- struct cl_site *site;
- int rc;
-
- vdv = kzalloc(sizeof(*vdv), GFP_NOFS);
- if (!vdv)
- return ERR_PTR(-ENOMEM);
-
- lud = &vdv->vdv_cl.cd_lu_dev;
- cl_device_init(&vdv->vdv_cl, t);
- vvp2lu_dev(vdv)->ld_ops = &vvp_lu_ops;
-
- site = kzalloc(sizeof(*site), GFP_NOFS);
- if (site) {
- rc = cl_site_init(site, &vdv->vdv_cl);
- if (rc == 0) {
- rc = lu_site_init_finish(&site->cs_lu);
- } else {
- LASSERT(!lud->ld_site);
- CERROR("Cannot init lu_site, rc %d.\n", rc);
- kfree(site);
- }
- } else {
- rc = -ENOMEM;
- }
- if (rc != 0) {
- vvp_device_free(env, lud);
- lud = ERR_PTR(rc);
- }
- return lud;
-}
-
-static int vvp_device_init(const struct lu_env *env, struct lu_device *d,
- const char *name, struct lu_device *next)
-{
- struct vvp_device *vdv;
- int rc;
-
- vdv = lu2vvp_dev(d);
- vdv->vdv_next = lu2cl_dev(next);
-
- LASSERT(d->ld_site && next->ld_type);
- next->ld_site = d->ld_site;
- rc = next->ld_type->ldt_ops->ldto_device_init(env, next,
- next->ld_type->ldt_name,
- NULL);
- if (rc == 0) {
- lu_device_get(next);
- lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init);
- }
- return rc;
-}
-
-static struct lu_device *vvp_device_fini(const struct lu_env *env,
- struct lu_device *d)
-{
- return cl2lu_dev(lu2vvp_dev(d)->vdv_next);
-}
-
-static const struct lu_device_type_operations vvp_device_type_ops = {
- .ldto_init = vvp_type_init,
- .ldto_fini = vvp_type_fini,
-
- .ldto_start = vvp_type_start,
- .ldto_stop = vvp_type_stop,
-
- .ldto_device_alloc = vvp_device_alloc,
- .ldto_device_free = vvp_device_free,
- .ldto_device_init = vvp_device_init,
- .ldto_device_fini = vvp_device_fini,
-};
-
-struct lu_device_type vvp_device_type = {
- .ldt_tags = LU_DEVICE_CL,
- .ldt_name = LUSTRE_VVP_NAME,
- .ldt_ops = &vvp_device_type_ops,
- .ldt_ctx_tags = LCT_CL_THREAD
-};
-
-/**
- * A mutex serializing calls to vvp_inode_fini() under extreme memory
- * pressure, when environments cannot be allocated.
- */
-int vvp_global_init(void)
-{
- int rc;
-
- rc = lu_kmem_init(vvp_caches);
- if (rc != 0)
- return rc;
-
- rc = lu_device_type_init(&vvp_device_type);
- if (rc != 0)
- goto out_kmem;
-
- return 0;
-
-out_kmem:
- lu_kmem_fini(vvp_caches);
-
- return rc;
-}
-
-void vvp_global_fini(void)
-{
- lu_device_type_fini(&vvp_device_type);
- lu_kmem_fini(vvp_caches);
-}
-
-/*****************************************************************************
- *
- * mirror obd-devices into cl devices.
- *
- */
-
-int cl_sb_init(struct super_block *sb)
-{
- struct ll_sb_info *sbi;
- struct cl_device *cl;
- struct lu_env *env;
- int rc = 0;
- u16 refcheck;
-
- sbi = ll_s2sbi(sb);
- env = cl_env_get(&refcheck);
- if (!IS_ERR(env)) {
- cl = cl_type_setup(env, NULL, &vvp_device_type,
- sbi->ll_dt_exp->exp_obd->obd_lu_dev);
- if (!IS_ERR(cl)) {
- sbi->ll_cl = cl;
- sbi->ll_site = cl2lu_dev(cl)->ld_site;
- }
- cl_env_put(env, &refcheck);
- } else {
- rc = PTR_ERR(env);
- }
- return rc;
-}
-
-int cl_sb_fini(struct super_block *sb)
-{
- struct ll_sb_info *sbi;
- struct lu_env *env;
- struct cl_device *cld;
- u16 refcheck;
- int result;
-
- sbi = ll_s2sbi(sb);
- env = cl_env_get(&refcheck);
- if (!IS_ERR(env)) {
- cld = sbi->ll_cl;
-
- if (cld) {
- cl_stack_fini(env, cld);
- sbi->ll_cl = NULL;
- sbi->ll_site = NULL;
- }
- cl_env_put(env, &refcheck);
- result = 0;
- } else {
- CERROR("Cannot cleanup cl-stack due to memory shortage.\n");
- result = PTR_ERR(env);
- }
- return result;
-}
-
-/****************************************************************************
- *
- * debugfs/lustre/llite/$MNT/dump_page_cache
- *
- ****************************************************************************/
-
-/*
- * To represent contents of a page cache as a byte stream, following
- * information if encoded in 64bit offset:
- *
- * - file hash bucket in lu_site::ls_hash[] 28bits
- *
- * - how far file is from bucket head 4bits
- *
- * - page index 32bits
- *
- * First two data identify a file in the cache uniquely.
- */
-
-#define PGC_OBJ_SHIFT (32 + 4)
-#define PGC_DEPTH_SHIFT (32)
-
-struct vvp_pgcache_id {
- unsigned int vpi_bucket;
- unsigned int vpi_depth;
- u32 vpi_index;
-
- unsigned int vpi_curdep;
- struct lu_object_header *vpi_obj;
-};
-
-struct seq_private {
- struct ll_sb_info *sbi;
- struct lu_env *env;
- u16 refcheck;
- struct cl_object *clob;
-};
-
-static void vvp_pgcache_id_unpack(loff_t pos, struct vvp_pgcache_id *id)
-{
- BUILD_BUG_ON(sizeof(pos) != sizeof(__u64));
-
- id->vpi_index = pos & 0xffffffff;
- id->vpi_depth = (pos >> PGC_DEPTH_SHIFT) & 0xf;
- id->vpi_bucket = (unsigned long long)pos >> PGC_OBJ_SHIFT;
-}
-
-static loff_t vvp_pgcache_id_pack(struct vvp_pgcache_id *id)
-{
- return
- ((__u64)id->vpi_index) |
- ((__u64)id->vpi_depth << PGC_DEPTH_SHIFT) |
- ((__u64)id->vpi_bucket << PGC_OBJ_SHIFT);
-}
-
-static int vvp_pgcache_obj_get(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode, void *data)
-{
- struct vvp_pgcache_id *id = data;
- struct lu_object_header *hdr = cfs_hash_object(hs, hnode);
-
- if (id->vpi_curdep-- > 0)
- return 0; /* continue */
-
- if (lu_object_is_dying(hdr))
- return 1;
-
- cfs_hash_get(hs, hnode);
- id->vpi_obj = hdr;
- return 1;
-}
-
-static struct cl_object *vvp_pgcache_obj(const struct lu_env *env,
- struct lu_device *dev,
- struct vvp_pgcache_id *id)
-{
- LASSERT(lu_device_is_cl(dev));
-
- id->vpi_depth &= 0xf;
- id->vpi_obj = NULL;
- id->vpi_curdep = id->vpi_depth;
-
- cfs_hash_hlist_for_each(dev->ld_site->ls_obj_hash, id->vpi_bucket,
- vvp_pgcache_obj_get, id);
- if (id->vpi_obj) {
- struct lu_object *lu_obj;
-
- lu_obj = lu_object_locate(id->vpi_obj, dev->ld_type);
- if (lu_obj) {
- lu_object_ref_add(lu_obj, "dump", current);
- return lu2cl(lu_obj);
- }
- lu_object_put(env, lu_object_top(id->vpi_obj));
-
- } else if (id->vpi_curdep > 0) {
- id->vpi_depth = 0xf;
- }
- return NULL;
-}
-
-static struct page *vvp_pgcache_find(const struct lu_env *env,
- struct lu_device *dev,
- struct cl_object **clobp, loff_t *pos)
-{
- struct cl_object *clob;
- struct lu_site *site;
- struct vvp_pgcache_id id;
-
- site = dev->ld_site;
- vvp_pgcache_id_unpack(*pos, &id);
-
- while (1) {
- if (id.vpi_bucket >= CFS_HASH_NHLIST(site->ls_obj_hash))
- return NULL;
- clob = vvp_pgcache_obj(env, dev, &id);
- if (clob) {
- struct inode *inode = vvp_object_inode(clob);
- struct page *vmpage;
- int nr;
-
- nr = find_get_pages_contig(inode->i_mapping,
- id.vpi_index, 1, &vmpage);
- if (nr > 0) {
- id.vpi_index = vmpage->index;
- /* Cant support over 16T file */
- if (vmpage->index <= 0xffffffff) {
- *clobp = clob;
- *pos = vvp_pgcache_id_pack(&id);
- return vmpage;
- }
- put_page(vmpage);
- }
-
- lu_object_ref_del(&clob->co_lu, "dump", current);
- cl_object_put(env, clob);
- }
- /* to the next object. */
- ++id.vpi_depth;
- id.vpi_depth &= 0xf;
- if (id.vpi_depth == 0 && ++id.vpi_bucket == 0)
- return NULL;
- id.vpi_index = 0;
- }
-}
-
-#define seq_page_flag(seq, page, flag, has_flags) do { \
- if (test_bit(PG_##flag, &(page)->flags)) { \
- seq_printf(seq, "%s"#flag, has_flags ? "|" : ""); \
- has_flags = 1; \
- } \
-} while (0)
-
-static void vvp_pgcache_page_show(const struct lu_env *env,
- struct seq_file *seq, struct cl_page *page)
-{
- struct vvp_page *vpg;
- struct page *vmpage;
- int has_flags;
-
- vpg = cl2vvp_page(cl_page_at(page, &vvp_device_type));
- vmpage = vpg->vpg_page;
- seq_printf(seq, " %5i | %p %p %s %s %s | %p " DFID "(%p) %lu %u [",
- 0 /* gen */,
- vpg, page,
- "none",
- vpg->vpg_defer_uptodate ? "du" : "- ",
- PageWriteback(vmpage) ? "wb" : "-",
- vmpage, PFID(ll_inode2fid(vmpage->mapping->host)),
- vmpage->mapping->host, vmpage->index,
- page_count(vmpage));
- has_flags = 0;
- seq_page_flag(seq, vmpage, locked, has_flags);
- seq_page_flag(seq, vmpage, error, has_flags);
- seq_page_flag(seq, vmpage, referenced, has_flags);
- seq_page_flag(seq, vmpage, uptodate, has_flags);
- seq_page_flag(seq, vmpage, dirty, has_flags);
- seq_page_flag(seq, vmpage, writeback, has_flags);
- seq_printf(seq, "%s]\n", has_flags ? "" : "-");
-}
-
-static int vvp_pgcache_show(struct seq_file *f, void *v)
-{
- struct seq_private *priv = f->private;
- struct page *vmpage = v;
- struct cl_page *page;
-
- seq_printf(f, "%8lx@" DFID ": ", vmpage->index,
- PFID(lu_object_fid(&priv->clob->co_lu)));
- lock_page(vmpage);
- page = cl_vmpage_page(vmpage, priv->clob);
- unlock_page(vmpage);
- put_page(vmpage);
-
- if (page) {
- vvp_pgcache_page_show(priv->env, f, page);
- cl_page_put(priv->env, page);
- } else {
- seq_puts(f, "missing\n");
- }
- lu_object_ref_del(&priv->clob->co_lu, "dump", current);
- cl_object_put(priv->env, priv->clob);
-
- return 0;
-}
-
-static void *vvp_pgcache_start(struct seq_file *f, loff_t *pos)
-{
- struct seq_private *priv = f->private;
- struct page *ret;
-
- if (priv->sbi->ll_site->ls_obj_hash->hs_cur_bits >
- 64 - PGC_OBJ_SHIFT)
- ret = ERR_PTR(-EFBIG);
- else
- ret = vvp_pgcache_find(priv->env, &priv->sbi->ll_cl->cd_lu_dev,
- &priv->clob, pos);
-
- return ret;
-}
-
-static void *vvp_pgcache_next(struct seq_file *f, void *v, loff_t *pos)
-{
- struct seq_private *priv = f->private;
- struct page *ret;
-
- *pos += 1;
- ret = vvp_pgcache_find(priv->env, &priv->sbi->ll_cl->cd_lu_dev,
- &priv->clob, pos);
- return ret;
-}
-
-static void vvp_pgcache_stop(struct seq_file *f, void *v)
-{
- /* Nothing to do */
-}
-
-static const struct seq_operations vvp_pgcache_ops = {
- .start = vvp_pgcache_start,
- .next = vvp_pgcache_next,
- .stop = vvp_pgcache_stop,
- .show = vvp_pgcache_show
-};
-
-static int vvp_dump_pgcache_seq_open(struct inode *inode, struct file *filp)
-{
- struct seq_private *priv;
-
- priv = __seq_open_private(filp, &vvp_pgcache_ops, sizeof(*priv));
- if (!priv)
- return -ENOMEM;
-
- priv->sbi = inode->i_private;
- priv->env = cl_env_get(&priv->refcheck);
- if (IS_ERR(priv->env)) {
- int err = PTR_ERR(priv->env);
-
- seq_release_private(inode, filp);
- return err;
- }
- return 0;
-}
-
-static int vvp_dump_pgcache_seq_release(struct inode *inode, struct file *file)
-{
- struct seq_file *seq = file->private_data;
- struct seq_private *priv = seq->private;
-
- cl_env_put(priv->env, &priv->refcheck);
- return seq_release_private(inode, file);
-}
-
-const struct file_operations vvp_dump_pgcache_file_ops = {
- .owner = THIS_MODULE,
- .open = vvp_dump_pgcache_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = vvp_dump_pgcache_seq_release,
-};
diff --git a/drivers/staging/lustre/lustre/llite/vvp_internal.h b/drivers/staging/lustre/lustre/llite/vvp_internal.h
deleted file mode 100644
index 7d3abb43584a..000000000000
--- a/drivers/staging/lustre/lustre/llite/vvp_internal.h
+++ /dev/null
@@ -1,321 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2013, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Internal definitions for VVP layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#ifndef VVP_INTERNAL_H
-#define VVP_INTERNAL_H
-
-#include <uapi/linux/lustre/lustre_idl.h>
-#include <cl_object.h>
-
-enum obd_notify_event;
-struct inode;
-struct lustre_md;
-struct obd_device;
-struct obd_export;
-struct page;
-
-/**
- * IO state private to IO state private to VVP layer.
- */
-struct vvp_io {
- /** super class */
- struct cl_io_slice vui_cl;
- struct cl_io_lock_link vui_link;
- /**
- * I/O vector information to or from which read/write is going.
- */
- struct iov_iter *vui_iter;
- /**
- * Total size for the left IO.
- */
- size_t vui_tot_count;
-
- union {
- struct vvp_fault_io {
- /**
- * Inode modification time that is checked across DLM
- * lock request.
- */
- time64_t ft_mtime;
- struct vm_area_struct *ft_vma;
- /**
- * locked page returned from vvp_io
- */
- struct page *ft_vmpage;
- /**
- * kernel fault info
- */
- struct vm_fault *ft_vmf;
- /**
- * fault API used bitflags for return code.
- */
- unsigned int ft_flags;
- /**
- * check that flags are from filemap_fault
- */
- bool ft_flags_valid;
- } fault;
- struct {
- struct cl_page_list vui_queue;
- unsigned long vui_written;
- int vui_from;
- int vui_to;
- } write;
- } u;
-
- /**
- * Layout version when this IO is initialized
- */
- __u32 vui_layout_gen;
- /**
- * File descriptor against which IO is done.
- */
- struct ll_file_data *vui_fd;
- struct kiocb *vui_iocb;
-
- /* Readahead state. */
- pgoff_t vui_ra_start;
- pgoff_t vui_ra_count;
- /* Set when vui_ra_{start,count} have been initialized. */
- bool vui_ra_valid;
-};
-
-extern struct lu_device_type vvp_device_type;
-
-extern struct lu_context_key vvp_session_key;
-extern struct lu_context_key vvp_thread_key;
-
-extern struct kmem_cache *vvp_lock_kmem;
-extern struct kmem_cache *vvp_object_kmem;
-
-struct vvp_thread_info {
- struct cl_lock vti_lock;
- struct cl_lock_descr vti_descr;
- struct cl_io vti_io;
- struct cl_attr vti_attr;
-};
-
-static inline struct vvp_thread_info *vvp_env_info(const struct lu_env *env)
-{
- struct vvp_thread_info *vti;
-
- vti = lu_context_key_get(&env->le_ctx, &vvp_thread_key);
- LASSERT(vti);
-
- return vti;
-}
-
-static inline struct cl_lock *vvp_env_lock(const struct lu_env *env)
-{
- struct cl_lock *lock = &vvp_env_info(env)->vti_lock;
-
- memset(lock, 0, sizeof(*lock));
- return lock;
-}
-
-static inline struct cl_attr *vvp_env_thread_attr(const struct lu_env *env)
-{
- struct cl_attr *attr = &vvp_env_info(env)->vti_attr;
-
- memset(attr, 0, sizeof(*attr));
-
- return attr;
-}
-
-static inline struct cl_io *vvp_env_thread_io(const struct lu_env *env)
-{
- struct cl_io *io = &vvp_env_info(env)->vti_io;
-
- memset(io, 0, sizeof(*io));
-
- return io;
-}
-
-struct vvp_session {
- struct vvp_io cs_ios;
-};
-
-static inline struct vvp_session *vvp_env_session(const struct lu_env *env)
-{
- struct vvp_session *ses;
-
- ses = lu_context_key_get(env->le_ses, &vvp_session_key);
- LASSERT(ses);
-
- return ses;
-}
-
-static inline struct vvp_io *vvp_env_io(const struct lu_env *env)
-{
- return &vvp_env_session(env)->cs_ios;
-}
-
-/**
- * ccc-private object state.
- */
-struct vvp_object {
- struct cl_object_header vob_header;
- struct cl_object vob_cl;
- struct inode *vob_inode;
-
- /**
- * Number of transient pages. This is no longer protected by i_sem,
- * and needs to be atomic. This is not actually used for anything,
- * and can probably be removed.
- */
- atomic_t vob_transient_pages;
-
- /**
- * Number of outstanding mmaps on this file.
- *
- * \see ll_vm_open(), ll_vm_close().
- */
- atomic_t vob_mmap_cnt;
-
- /**
- * various flags
- * vob_discard_page_warned
- * if pages belonging to this object are discarded when a client
- * is evicted, some debug info will be printed, this flag will be set
- * during processing the first discarded page, then avoid flooding
- * debug message for lots of discarded pages.
- *
- * \see ll_dirty_page_discard_warn.
- */
- unsigned int vob_discard_page_warned:1;
-};
-
-/**
- * VVP-private page state.
- */
-struct vvp_page {
- struct cl_page_slice vpg_cl;
- unsigned int vpg_defer_uptodate:1,
- vpg_ra_used:1;
- /** VM page */
- struct page *vpg_page;
-};
-
-static inline struct vvp_page *cl2vvp_page(const struct cl_page_slice *slice)
-{
- return container_of(slice, struct vvp_page, vpg_cl);
-}
-
-static inline pgoff_t vvp_index(struct vvp_page *vvp)
-{
- return vvp->vpg_cl.cpl_index;
-}
-
-struct vvp_device {
- struct cl_device vdv_cl;
- struct cl_device *vdv_next;
-};
-
-struct vvp_lock {
- struct cl_lock_slice vlk_cl;
-};
-
-void *ccc_key_init(const struct lu_context *ctx,
- struct lu_context_key *key);
-void ccc_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data);
-
-void ccc_umount(const struct lu_env *env, struct cl_device *dev);
-
-static inline struct lu_device *vvp2lu_dev(struct vvp_device *vdv)
-{
- return &vdv->vdv_cl.cd_lu_dev;
-}
-
-static inline struct vvp_device *lu2vvp_dev(const struct lu_device *d)
-{
- return container_of_safe(d, struct vvp_device, vdv_cl.cd_lu_dev);
-}
-
-static inline struct vvp_device *cl2vvp_dev(const struct cl_device *d)
-{
- return container_of_safe(d, struct vvp_device, vdv_cl);
-}
-
-static inline struct vvp_object *cl2vvp(const struct cl_object *obj)
-{
- return container_of_safe(obj, struct vvp_object, vob_cl);
-}
-
-static inline struct vvp_object *lu2vvp(const struct lu_object *obj)
-{
- return container_of_safe(obj, struct vvp_object, vob_cl.co_lu);
-}
-
-static inline struct inode *vvp_object_inode(const struct cl_object *obj)
-{
- return cl2vvp(obj)->vob_inode;
-}
-
-int vvp_object_invariant(const struct cl_object *obj);
-struct vvp_object *cl_inode2vvp(struct inode *inode);
-
-static inline struct page *cl2vm_page(const struct cl_page_slice *slice)
-{
- return cl2vvp_page(slice)->vpg_page;
-}
-
-static inline struct vvp_lock *cl2vvp_lock(const struct cl_lock_slice *slice)
-{
- return container_of(slice, struct vvp_lock, vlk_cl);
-}
-
-# define CLOBINVRNT(env, clob, expr) \
- ((void)sizeof(env), (void)sizeof(clob), (void)sizeof(!!(expr)))
-
-int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io);
-int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io);
-int vvp_lock_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_lock *lock, const struct cl_io *io);
-int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, pgoff_t index);
-struct lu_object *vvp_object_alloc(const struct lu_env *env,
- const struct lu_object_header *hdr,
- struct lu_device *dev);
-
-int vvp_global_init(void);
-void vvp_global_fini(void);
-
-extern const struct file_operations vvp_dump_pgcache_file_ops;
-
-#endif /* VVP_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
deleted file mode 100644
index e7a4778e02e4..000000000000
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ /dev/null
@@ -1,1374 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_io for VVP layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd.h>
-
-#include "llite_internal.h"
-#include "vvp_internal.h"
-
-static struct vvp_io *cl2vvp_io(const struct lu_env *env,
- const struct cl_io_slice *slice)
-{
- struct vvp_io *vio;
-
- vio = container_of(slice, struct vvp_io, vui_cl);
- LASSERT(vio == vvp_env_io(env));
-
- return vio;
-}
-
-/**
- * For swapping layout. The file's layout may have changed.
- * To avoid populating pages to a wrong stripe, we have to verify the
- * correctness of layout. It works because swapping layout processes
- * have to acquire group lock.
- */
-static bool can_populate_pages(const struct lu_env *env, struct cl_io *io,
- struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct vvp_io *vio = vvp_env_io(env);
- bool rc = true;
-
- switch (io->ci_type) {
- case CIT_READ:
- case CIT_WRITE:
- /* don't need lock here to check lli_layout_gen as we have held
- * extent lock and GROUP lock has to hold to swap layout
- */
- if (ll_layout_version_get(lli) != vio->vui_layout_gen ||
- OBD_FAIL_CHECK_RESET(OBD_FAIL_LLITE_LOST_LAYOUT, 0)) {
- io->ci_need_restart = 1;
- /* this will cause a short read/write */
- io->ci_continue = 0;
- rc = false;
- }
- case CIT_FAULT:
- /* fault is okay because we've already had a page. */
- default:
- break;
- }
-
- return rc;
-}
-
-static void vvp_object_size_lock(struct cl_object *obj)
-{
- struct inode *inode = vvp_object_inode(obj);
-
- ll_inode_size_lock(inode);
- cl_object_attr_lock(obj);
-}
-
-static void vvp_object_size_unlock(struct cl_object *obj)
-{
- struct inode *inode = vvp_object_inode(obj);
-
- cl_object_attr_unlock(obj);
- ll_inode_size_unlock(inode);
-}
-
-/**
- * Helper function that if necessary adjusts file size (inode->i_size), when
- * position at the offset \a pos is accessed. File size can be arbitrary stale
- * on a Lustre client, but client at least knows KMS. If accessed area is
- * inside [0, KMS], set file size to KMS, otherwise glimpse file size.
- *
- * Locking: cl_isize_lock is used to serialize changes to inode size and to
- * protect consistency between inode size and cl_object
- * attributes. cl_object_size_lock() protects consistency between cl_attr's of
- * top-object and sub-objects.
- */
-static int vvp_prep_size(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io, loff_t start, size_t count,
- int *exceed)
-{
- struct cl_attr *attr = vvp_env_thread_attr(env);
- struct inode *inode = vvp_object_inode(obj);
- loff_t pos = start + count - 1;
- loff_t kms;
- int result;
-
- /*
- * Consistency guarantees: following possibilities exist for the
- * relation between region being accessed and real file size at this
- * moment:
- *
- * (A): the region is completely inside of the file;
- *
- * (B-x): x bytes of region are inside of the file, the rest is
- * outside;
- *
- * (C): the region is completely outside of the file.
- *
- * This classification is stable under DLM lock already acquired by
- * the caller, because to change the class, other client has to take
- * DLM lock conflicting with our lock. Also, any updates to ->i_size
- * by other threads on this client are serialized by
- * ll_inode_size_lock(). This guarantees that short reads are handled
- * correctly in the face of concurrent writes and truncates.
- */
- vvp_object_size_lock(obj);
- result = cl_object_attr_get(env, obj, attr);
- if (result == 0) {
- kms = attr->cat_kms;
- if (pos > kms) {
- /*
- * A glimpse is necessary to determine whether we
- * return a short read (B) or some zeroes at the end
- * of the buffer (C)
- */
- vvp_object_size_unlock(obj);
- result = cl_glimpse_lock(env, io, inode, obj, 0);
- if (result == 0 && exceed) {
- /* If objective page index exceed end-of-file
- * page index, return directly. Do not expect
- * kernel will check such case correctly.
- * linux-2.6.18-128.1.1 miss to do that.
- * --bug 17336
- */
- loff_t size = i_size_read(inode);
- loff_t cur_index = start >> PAGE_SHIFT;
- loff_t size_index = (size - 1) >> PAGE_SHIFT;
-
- if ((size == 0 && cur_index != 0) ||
- size_index < cur_index)
- *exceed = 1;
- }
- return result;
- }
- /*
- * region is within kms and, hence, within real file
- * size (A). We need to increase i_size to cover the
- * read region so that generic_file_read() will do its
- * job, but that doesn't mean the kms size is
- * _correct_, it is only the _minimum_ size. If
- * someone does a stat they will get the correct size
- * which will always be >= the kms value here.
- * b=11081
- */
- if (i_size_read(inode) < kms) {
- i_size_write(inode, kms);
- CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n",
- PFID(lu_object_fid(&obj->co_lu)),
- (__u64)i_size_read(inode));
- }
- }
-
- vvp_object_size_unlock(obj);
-
- return result;
-}
-
-/*****************************************************************************
- *
- * io operations.
- *
- */
-
-static int vvp_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
- __u32 enqflags, enum cl_lock_mode mode,
- pgoff_t start, pgoff_t end)
-{
- struct vvp_io *vio = vvp_env_io(env);
- struct cl_lock_descr *descr = &vio->vui_link.cill_descr;
- struct cl_object *obj = io->ci_obj;
-
- CLOBINVRNT(env, obj, vvp_object_invariant(obj));
-
- CDEBUG(D_VFSTRACE, "lock: %d [%lu, %lu]\n", mode, start, end);
-
- memset(&vio->vui_link, 0, sizeof(vio->vui_link));
-
- if (vio->vui_fd && (vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
- descr->cld_mode = CLM_GROUP;
- descr->cld_gid = vio->vui_fd->fd_grouplock.lg_gid;
- enqflags |= CEF_LOCK_MATCH;
- } else {
- descr->cld_mode = mode;
- }
- descr->cld_obj = obj;
- descr->cld_start = start;
- descr->cld_end = end;
- descr->cld_enq_flags = enqflags;
-
- cl_io_lock_add(env, io, &vio->vui_link);
- return 0;
-}
-
-static int vvp_io_one_lock(const struct lu_env *env, struct cl_io *io,
- __u32 enqflags, enum cl_lock_mode mode,
- loff_t start, loff_t end)
-{
- struct cl_object *obj = io->ci_obj;
-
- return vvp_io_one_lock_index(env, io, enqflags, mode,
- cl_index(obj, start), cl_index(obj, end));
-}
-
-static int vvp_io_write_iter_init(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct vvp_io *vio = cl2vvp_io(env, ios);
-
- cl_page_list_init(&vio->u.write.vui_queue);
- vio->u.write.vui_written = 0;
- vio->u.write.vui_from = 0;
- vio->u.write.vui_to = PAGE_SIZE;
-
- return 0;
-}
-
-static void vvp_io_write_iter_fini(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct vvp_io *vio = cl2vvp_io(env, ios);
-
- LASSERT(vio->u.write.vui_queue.pl_nr == 0);
-}
-
-static int vvp_io_fault_iter_init(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct vvp_io *vio = cl2vvp_io(env, ios);
- struct inode *inode = vvp_object_inode(ios->cis_obj);
-
- LASSERT(inode == file_inode(vio->vui_fd->fd_file));
- vio->u.fault.ft_mtime = inode->i_mtime.tv_sec;
- return 0;
-}
-
-static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- struct cl_object *obj = io->ci_obj;
- struct vvp_io *vio = cl2vvp_io(env, ios);
- struct inode *inode = vvp_object_inode(obj);
-
- CLOBINVRNT(env, obj, vvp_object_invariant(obj));
-
- CDEBUG(D_VFSTRACE, DFID
- " ignore/verify layout %d/%d, layout version %d restore needed %d\n",
- PFID(lu_object_fid(&obj->co_lu)),
- io->ci_ignore_layout, io->ci_verify_layout,
- vio->vui_layout_gen, io->ci_restore_needed);
-
- if (io->ci_restore_needed) {
- int rc;
-
- /* file was detected release, we need to restore it
- * before finishing the io
- */
- rc = ll_layout_restore(inode, 0, OBD_OBJECT_EOF);
- /* if restore registration failed, no restart,
- * we will return -ENODATA
- */
- /* The layout will change after restore, so we need to
- * block on layout lock hold by the MDT
- * as MDT will not send new layout in lvb (see LU-3124)
- * we have to explicitly fetch it, all this will be done
- * by ll_layout_refresh()
- */
- if (rc == 0) {
- io->ci_restore_needed = 0;
- io->ci_need_restart = 1;
- io->ci_verify_layout = 1;
- } else {
- io->ci_restore_needed = 1;
- io->ci_need_restart = 0;
- io->ci_verify_layout = 0;
- io->ci_result = rc;
- }
- }
-
- if (!io->ci_ignore_layout && io->ci_verify_layout) {
- __u32 gen = 0;
-
- /* check layout version */
- ll_layout_refresh(inode, &gen);
- io->ci_need_restart = vio->vui_layout_gen != gen;
- if (io->ci_need_restart) {
- CDEBUG(D_VFSTRACE,
- DFID " layout changed from %d to %d.\n",
- PFID(lu_object_fid(&obj->co_lu)),
- vio->vui_layout_gen, gen);
- /* today successful restore is the only possible case */
- /* restore was done, clear restoring state */
- clear_bit(LLIF_FILE_RESTORING,
- &ll_i2info(inode)->lli_flags);
- }
- }
-}
-
-static void vvp_io_fault_fini(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- struct cl_page *page = io->u.ci_fault.ft_page;
-
- CLOBINVRNT(env, io->ci_obj, vvp_object_invariant(io->ci_obj));
-
- if (page) {
- lu_ref_del(&page->cp_reference, "fault", io);
- cl_page_put(env, page);
- io->u.ci_fault.ft_page = NULL;
- }
- vvp_io_fini(env, ios);
-}
-
-static enum cl_lock_mode vvp_mode_from_vma(struct vm_area_struct *vma)
-{
- /*
- * we only want to hold PW locks if the mmap() can generate
- * writes back to the file and that only happens in shared
- * writable vmas
- */
- if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
- return CLM_WRITE;
- return CLM_READ;
-}
-
-static int vvp_mmap_locks(const struct lu_env *env,
- struct vvp_io *vio, struct cl_io *io)
-{
- struct vvp_thread_info *cti = vvp_env_info(env);
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- struct cl_lock_descr *descr = &cti->vti_descr;
- union ldlm_policy_data policy;
- unsigned long addr;
- ssize_t count;
- int result = 0;
- struct iov_iter i;
- struct iovec iov;
-
- LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
-
- if (!vio->vui_iter) /* nfs or loop back device write */
- return 0;
-
- /* No MM (e.g. NFS)? No vmas too. */
- if (!mm)
- return 0;
-
- iov_for_each(iov, i, *vio->vui_iter) {
- addr = (unsigned long)iov.iov_base;
- count = iov.iov_len;
- if (count == 0)
- continue;
-
- count += addr & (~PAGE_MASK);
- addr &= PAGE_MASK;
-
- down_read(&mm->mmap_sem);
- while ((vma = our_vma(mm, addr, count)) != NULL) {
- struct inode *inode = file_inode(vma->vm_file);
- int flags = CEF_MUST;
-
- if (ll_file_nolock(vma->vm_file)) {
- /*
- * For no lock case is not allowed for mmap
- */
- result = -EINVAL;
- break;
- }
-
- /*
- * XXX: Required lock mode can be weakened: CIT_WRITE
- * io only ever reads user level buffer, and CIT_READ
- * only writes on it.
- */
- policy_from_vma(&policy, vma, addr, count);
- descr->cld_mode = vvp_mode_from_vma(vma);
- descr->cld_obj = ll_i2info(inode)->lli_clob;
- descr->cld_start = cl_index(descr->cld_obj,
- policy.l_extent.start);
- descr->cld_end = cl_index(descr->cld_obj,
- policy.l_extent.end);
- descr->cld_enq_flags = flags;
- result = cl_io_lock_alloc_add(env, io, descr);
-
- CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n",
- descr->cld_mode, descr->cld_start,
- descr->cld_end);
-
- if (result < 0)
- break;
-
- if (vma->vm_end - addr >= count)
- break;
-
- count -= vma->vm_end - addr;
- addr = vma->vm_end;
- }
- up_read(&mm->mmap_sem);
- if (result < 0)
- break;
- }
- return result;
-}
-
-static void vvp_io_advance(const struct lu_env *env,
- const struct cl_io_slice *ios,
- size_t nob)
-{
- struct cl_object *obj = ios->cis_io->ci_obj;
- struct vvp_io *vio = cl2vvp_io(env, ios);
-
- CLOBINVRNT(env, obj, vvp_object_invariant(obj));
-
- vio->vui_tot_count -= nob;
- iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count);
-}
-
-static void vvp_io_update_iov(const struct lu_env *env,
- struct vvp_io *vio, struct cl_io *io)
-{
- size_t size = io->u.ci_rw.crw_count;
-
- if (!vio->vui_iter)
- return;
-
- iov_iter_truncate(vio->vui_iter, size);
-}
-
-static int vvp_io_rw_lock(const struct lu_env *env, struct cl_io *io,
- enum cl_lock_mode mode, loff_t start, loff_t end)
-{
- struct vvp_io *vio = vvp_env_io(env);
- int result;
- int ast_flags = 0;
-
- LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
-
- vvp_io_update_iov(env, vio, io);
-
- if (io->u.ci_rw.crw_nonblock)
- ast_flags |= CEF_NONBLOCK;
- result = vvp_mmap_locks(env, vio, io);
- if (result == 0)
- result = vvp_io_one_lock(env, io, ast_flags, mode, start, end);
- return result;
-}
-
-static int vvp_io_read_lock(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- struct cl_io_rw_common *rd = &io->u.ci_rd.rd;
- int result;
-
- result = vvp_io_rw_lock(env, io, CLM_READ, rd->crw_pos,
- rd->crw_pos + rd->crw_count - 1);
-
- return result;
-}
-
-static int vvp_io_fault_lock(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- struct vvp_io *vio = cl2vvp_io(env, ios);
- /*
- * XXX LDLM_FL_CBPENDING
- */
- return vvp_io_one_lock_index(env,
- io, 0,
- vvp_mode_from_vma(vio->u.fault.ft_vma),
- io->u.ci_fault.ft_index,
- io->u.ci_fault.ft_index);
-}
-
-static int vvp_io_write_lock(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- loff_t start;
- loff_t end;
-
- if (io->u.ci_wr.wr_append) {
- start = 0;
- end = OBD_OBJECT_EOF;
- } else {
- start = io->u.ci_wr.wr.crw_pos;
- end = start + io->u.ci_wr.wr.crw_count - 1;
- }
- return vvp_io_rw_lock(env, io, CLM_WRITE, start, end);
-}
-
-static int vvp_io_setattr_iter_init(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- return 0;
-}
-
-/**
- * Implementation of cl_io_operations::vio_lock() method for CIT_SETATTR io.
- *
- * Handles "lockless io" mode when extent locking is done by server.
- */
-static int vvp_io_setattr_lock(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- __u64 new_size;
- __u32 enqflags = 0;
-
- if (cl_io_is_trunc(io)) {
- new_size = io->u.ci_setattr.sa_attr.lvb_size;
- if (new_size == 0)
- enqflags = CEF_DISCARD_DATA;
- } else {
- unsigned int valid = io->u.ci_setattr.sa_valid;
-
- if (!(valid & TIMES_SET_FLAGS))
- return 0;
-
- if ((!(valid & ATTR_MTIME) ||
- io->u.ci_setattr.sa_attr.lvb_mtime >=
- io->u.ci_setattr.sa_attr.lvb_ctime) &&
- (!(valid & ATTR_ATIME) ||
- io->u.ci_setattr.sa_attr.lvb_atime >=
- io->u.ci_setattr.sa_attr.lvb_ctime))
- return 0;
- new_size = 0;
- }
-
- return vvp_io_one_lock(env, io, enqflags, CLM_WRITE,
- new_size, OBD_OBJECT_EOF);
-}
-
-static int vvp_do_vmtruncate(struct inode *inode, size_t size)
-{
- int result;
- /*
- * Only ll_inode_size_lock is taken at this level.
- */
- ll_inode_size_lock(inode);
- result = inode_newsize_ok(inode, size);
- if (result < 0) {
- ll_inode_size_unlock(inode);
- return result;
- }
- truncate_setsize(inode, size);
- ll_inode_size_unlock(inode);
- return result;
-}
-
-static int vvp_io_setattr_time(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- struct cl_object *obj = io->ci_obj;
- struct cl_attr *attr = vvp_env_thread_attr(env);
- int result;
- unsigned valid = CAT_CTIME;
-
- cl_object_attr_lock(obj);
- attr->cat_ctime = io->u.ci_setattr.sa_attr.lvb_ctime;
- if (io->u.ci_setattr.sa_valid & ATTR_ATIME_SET) {
- attr->cat_atime = io->u.ci_setattr.sa_attr.lvb_atime;
- valid |= CAT_ATIME;
- }
- if (io->u.ci_setattr.sa_valid & ATTR_MTIME_SET) {
- attr->cat_mtime = io->u.ci_setattr.sa_attr.lvb_mtime;
- valid |= CAT_MTIME;
- }
- result = cl_object_attr_update(env, obj, attr, valid);
- cl_object_attr_unlock(obj);
-
- return result;
-}
-
-static int vvp_io_setattr_start(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- struct inode *inode = vvp_object_inode(io->ci_obj);
- struct ll_inode_info *lli = ll_i2info(inode);
-
- if (cl_io_is_trunc(io)) {
- down_write(&lli->lli_trunc_sem);
- inode_lock(inode);
- inode_dio_wait(inode);
- } else {
- inode_lock(inode);
- }
-
- if (io->u.ci_setattr.sa_valid & TIMES_SET_FLAGS)
- return vvp_io_setattr_time(env, ios);
-
- return 0;
-}
-
-static void vvp_io_setattr_end(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- struct inode *inode = vvp_object_inode(io->ci_obj);
- struct ll_inode_info *lli = ll_i2info(inode);
-
- if (cl_io_is_trunc(io)) {
- /* Truncate in memory pages - they must be clean pages
- * because osc has already notified to destroy osc_extents.
- */
- vvp_do_vmtruncate(inode, io->u.ci_setattr.sa_attr.lvb_size);
- inode_unlock(inode);
- up_write(&lli->lli_trunc_sem);
- } else {
- inode_unlock(inode);
- }
-}
-
-static void vvp_io_setattr_fini(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- bool restore_needed = ios->cis_io->ci_restore_needed;
- struct inode *inode = vvp_object_inode(ios->cis_obj);
-
- vvp_io_fini(env, ios);
-
- if (restore_needed && !ios->cis_io->ci_restore_needed) {
- /* restore finished, set data modified flag for HSM */
- set_bit(LLIF_DATA_MODIFIED, &(ll_i2info(inode))->lli_flags);
- }
-}
-
-static int vvp_io_read_start(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct vvp_io *vio = cl2vvp_io(env, ios);
- struct cl_io *io = ios->cis_io;
- struct cl_object *obj = io->ci_obj;
- struct inode *inode = vvp_object_inode(obj);
- struct ll_inode_info *lli = ll_i2info(inode);
- struct file *file = vio->vui_fd->fd_file;
-
- int result;
- loff_t pos = io->u.ci_rd.rd.crw_pos;
- long cnt = io->u.ci_rd.rd.crw_count;
- long tot = vio->vui_tot_count;
- int exceed = 0;
-
- CLOBINVRNT(env, obj, vvp_object_invariant(obj));
-
- CDEBUG(D_VFSTRACE, "read: -> [%lli, %lli)\n", pos, pos + cnt);
-
- down_read(&lli->lli_trunc_sem);
-
- if (!can_populate_pages(env, io, inode))
- return 0;
-
- result = vvp_prep_size(env, obj, io, pos, tot, &exceed);
- if (result != 0)
- return result;
- if (exceed != 0)
- goto out;
-
- LU_OBJECT_HEADER(D_INODE, env, &obj->co_lu,
- "Read ino %lu, %lu bytes, offset %lld, size %llu\n",
- inode->i_ino, cnt, pos, i_size_read(inode));
-
- /* turn off the kernel's read-ahead */
- vio->vui_fd->fd_file->f_ra.ra_pages = 0;
-
- /* initialize read-ahead window once per syscall */
- if (!vio->vui_ra_valid) {
- vio->vui_ra_valid = true;
- vio->vui_ra_start = cl_index(obj, pos);
- vio->vui_ra_count = cl_index(obj, tot + PAGE_SIZE - 1);
- ll_ras_enter(file);
- }
-
- /* BUG: 5972 */
- file_accessed(file);
- LASSERT(vio->vui_iocb->ki_pos == pos);
- result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter);
-
-out:
- if (result >= 0) {
- if (result < cnt)
- io->ci_continue = 0;
- io->ci_nob += result;
- ll_rw_stats_tally(ll_i2sbi(inode), current->pid,
- vio->vui_fd, pos, result, READ);
- result = 0;
- }
- return result;
-}
-
-static int vvp_io_commit_sync(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *plist, int from, int to)
-{
- struct cl_2queue *queue = &io->ci_queue;
- struct cl_page *page;
- unsigned int bytes = 0;
- int rc = 0;
-
- if (plist->pl_nr == 0)
- return 0;
-
- if (from > 0 || to != PAGE_SIZE) {
- page = cl_page_list_first(plist);
- if (plist->pl_nr == 1) {
- cl_page_clip(env, page, from, to);
- } else {
- if (from > 0)
- cl_page_clip(env, page, from, PAGE_SIZE);
- if (to != PAGE_SIZE) {
- page = cl_page_list_last(plist);
- cl_page_clip(env, page, 0, to);
- }
- }
- }
-
- cl_2queue_init(queue);
- cl_page_list_splice(plist, &queue->c2_qin);
- rc = cl_io_submit_sync(env, io, CRT_WRITE, queue, 0);
-
- /* plist is not sorted any more */
- cl_page_list_splice(&queue->c2_qin, plist);
- cl_page_list_splice(&queue->c2_qout, plist);
- cl_2queue_fini(env, queue);
-
- if (rc == 0) {
- /* calculate bytes */
- bytes = plist->pl_nr << PAGE_SHIFT;
- bytes -= from + PAGE_SIZE - to;
-
- while (plist->pl_nr > 0) {
- page = cl_page_list_first(plist);
- cl_page_list_del(env, plist, page);
-
- cl_page_clip(env, page, 0, PAGE_SIZE);
-
- SetPageUptodate(cl_page_vmpage(page));
- cl_page_disown(env, io, page);
-
- /* held in ll_cl_init() */
- lu_ref_del(&page->cp_reference, "cl_io", io);
- cl_page_put(env, page);
- }
- }
-
- return bytes > 0 ? bytes : rc;
-}
-
-static void write_commit_callback(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page)
-{
- struct page *vmpage = page->cp_vmpage;
-
- SetPageUptodate(vmpage);
- set_page_dirty(vmpage);
-
- cl_page_disown(env, io, page);
-
- /* held in ll_cl_init() */
- lu_ref_del(&page->cp_reference, "cl_io", cl_io_top(io));
- cl_page_put(env, page);
-}
-
-/* make sure the page list is contiguous */
-static bool page_list_sanity_check(struct cl_object *obj,
- struct cl_page_list *plist)
-{
- struct cl_page *page;
- pgoff_t index = CL_PAGE_EOF;
-
- cl_page_list_for_each(page, plist) {
- struct vvp_page *vpg = cl_object_page_slice(obj, page);
-
- if (index == CL_PAGE_EOF) {
- index = vvp_index(vpg);
- continue;
- }
-
- ++index;
- if (index == vvp_index(vpg))
- continue;
-
- return false;
- }
- return true;
-}
-
-/* Return how many bytes have queued or written */
-int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io)
-{
- struct cl_object *obj = io->ci_obj;
- struct inode *inode = vvp_object_inode(obj);
- struct vvp_io *vio = vvp_env_io(env);
- struct cl_page_list *queue = &vio->u.write.vui_queue;
- struct cl_page *page;
- int rc = 0;
- int bytes = 0;
- unsigned int npages = vio->u.write.vui_queue.pl_nr;
-
- if (npages == 0)
- return 0;
-
- CDEBUG(D_VFSTRACE, "commit async pages: %d, from %d, to %d\n",
- npages, vio->u.write.vui_from, vio->u.write.vui_to);
-
- LASSERT(page_list_sanity_check(obj, queue));
-
- /* submit IO with async write */
- rc = cl_io_commit_async(env, io, queue,
- vio->u.write.vui_from, vio->u.write.vui_to,
- write_commit_callback);
- npages -= queue->pl_nr; /* already committed pages */
- if (npages > 0) {
- /* calculate how many bytes were written */
- bytes = npages << PAGE_SHIFT;
-
- /* first page */
- bytes -= vio->u.write.vui_from;
- if (queue->pl_nr == 0) /* last page */
- bytes -= PAGE_SIZE - vio->u.write.vui_to;
- LASSERTF(bytes > 0, "bytes = %d, pages = %d\n", bytes, npages);
-
- vio->u.write.vui_written += bytes;
-
- CDEBUG(D_VFSTRACE, "Committed %d pages %d bytes, tot: %ld\n",
- npages, bytes, vio->u.write.vui_written);
-
- /* the first page must have been written. */
- vio->u.write.vui_from = 0;
- }
- LASSERT(page_list_sanity_check(obj, queue));
- LASSERT(ergo(rc == 0, queue->pl_nr == 0));
-
- /* out of quota, try sync write */
- if (rc == -EDQUOT && !cl_io_is_mkwrite(io)) {
- rc = vvp_io_commit_sync(env, io, queue,
- vio->u.write.vui_from,
- vio->u.write.vui_to);
- if (rc > 0) {
- vio->u.write.vui_written += rc;
- rc = 0;
- }
- }
-
- /* update inode size */
- ll_merge_attr(env, inode);
-
- /* Now the pages in queue were failed to commit, discard them
- * unless they were dirtied before.
- */
- while (queue->pl_nr > 0) {
- page = cl_page_list_first(queue);
- cl_page_list_del(env, queue, page);
-
- if (!PageDirty(cl_page_vmpage(page)))
- cl_page_discard(env, io, page);
-
- cl_page_disown(env, io, page);
-
- /* held in ll_cl_init() */
- lu_ref_del(&page->cp_reference, "cl_io", io);
- cl_page_put(env, page);
- }
- cl_page_list_fini(env, queue);
-
- return rc;
-}
-
-static int vvp_io_write_start(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct vvp_io *vio = cl2vvp_io(env, ios);
- struct cl_io *io = ios->cis_io;
- struct cl_object *obj = io->ci_obj;
- struct inode *inode = vvp_object_inode(obj);
- struct ll_inode_info *lli = ll_i2info(inode);
- ssize_t result = 0;
- loff_t pos = io->u.ci_wr.wr.crw_pos;
- size_t cnt = io->u.ci_wr.wr.crw_count;
-
- down_read(&lli->lli_trunc_sem);
-
- if (!can_populate_pages(env, io, inode))
- return 0;
-
- if (cl_io_is_append(io)) {
- /*
- * PARALLEL IO This has to be changed for parallel IO doing
- * out-of-order writes.
- */
- ll_merge_attr(env, inode);
- pos = i_size_read(inode);
- io->u.ci_wr.wr.crw_pos = pos;
- vio->vui_iocb->ki_pos = pos;
- } else {
- LASSERT(vio->vui_iocb->ki_pos == pos);
- }
-
- CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt);
-
- /*
- * The maximum Lustre file size is variable, based on the OST maximum
- * object size and number of stripes. This needs another check in
- * addition to the VFS checks earlier.
- */
- if (pos + cnt > ll_file_maxbytes(inode)) {
- CDEBUG(D_INODE,
- "%s: file " DFID " offset %llu > maxbytes %llu\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(ll_inode2fid(inode)), pos + cnt,
- ll_file_maxbytes(inode));
- return -EFBIG;
- }
-
- if (!vio->vui_iter) {
- /* from a temp io in ll_cl_init(). */
- result = 0;
- } else {
- /*
- * When using the locked AIO function (generic_file_aio_write())
- * testing has shown the inode mutex to be a limiting factor
- * with multi-threaded single shared file performance. To get
- * around this, we now use the lockless version. To maintain
- * consistency, proper locking to protect against writes,
- * trucates, etc. is handled in the higher layers of lustre.
- */
- bool lock_node = !IS_NOSEC(inode);
-
- if (lock_node)
- inode_lock(inode);
- result = __generic_file_write_iter(vio->vui_iocb,
- vio->vui_iter);
- if (lock_node)
- inode_unlock(inode);
-
- if (result > 0 || result == -EIOCBQUEUED)
- result = generic_write_sync(vio->vui_iocb, result);
- }
-
- if (result > 0) {
- result = vvp_io_write_commit(env, io);
- if (vio->u.write.vui_written > 0) {
- result = vio->u.write.vui_written;
- io->ci_nob += result;
-
- CDEBUG(D_VFSTRACE, "write: nob %zd, result: %zd\n",
- io->ci_nob, result);
- }
- }
- if (result > 0) {
- set_bit(LLIF_DATA_MODIFIED, &(ll_i2info(inode))->lli_flags);
-
- if (result < cnt)
- io->ci_continue = 0;
- ll_rw_stats_tally(ll_i2sbi(inode), current->pid,
- vio->vui_fd, pos, result, WRITE);
- result = 0;
- }
- return result;
-}
-
-static void vvp_io_rw_end(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct inode *inode = vvp_object_inode(ios->cis_obj);
- struct ll_inode_info *lli = ll_i2info(inode);
-
- up_read(&lli->lli_trunc_sem);
-}
-
-static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
-{
- struct vm_fault *vmf = cfio->ft_vmf;
-
- cfio->ft_flags = filemap_fault(vmf);
- cfio->ft_flags_valid = 1;
-
- if (vmf->page) {
- CDEBUG(D_PAGE,
- "page %p map %p index %lu flags %lx count %u priv %0lx: got addr %p type NOPAGE\n",
- vmf->page, vmf->page->mapping, vmf->page->index,
- (long)vmf->page->flags, page_count(vmf->page),
- page_private(vmf->page), (void *)vmf->address);
- if (unlikely(!(cfio->ft_flags & VM_FAULT_LOCKED))) {
- lock_page(vmf->page);
- cfio->ft_flags |= VM_FAULT_LOCKED;
- }
-
- cfio->ft_vmpage = vmf->page;
- return 0;
- }
-
- if (cfio->ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) {
- CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", (void *)vmf->address);
- return -EFAULT;
- }
-
- if (cfio->ft_flags & VM_FAULT_OOM) {
- CDEBUG(D_PAGE, "got addr %p - OOM\n", (void *)vmf->address);
- return -ENOMEM;
- }
-
- if (cfio->ft_flags & VM_FAULT_RETRY)
- return -EAGAIN;
-
- CERROR("Unknown error in page fault %d!\n", cfio->ft_flags);
- return -EINVAL;
-}
-
-static void mkwrite_commit_callback(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page)
-{
- set_page_dirty(page->cp_vmpage);
-}
-
-static int vvp_io_fault_start(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct vvp_io *vio = cl2vvp_io(env, ios);
- struct cl_io *io = ios->cis_io;
- struct cl_object *obj = io->ci_obj;
- struct inode *inode = vvp_object_inode(obj);
- struct ll_inode_info *lli = ll_i2info(inode);
- struct cl_fault_io *fio = &io->u.ci_fault;
- struct vvp_fault_io *cfio = &vio->u.fault;
- loff_t offset;
- int result = 0;
- struct page *vmpage = NULL;
- struct cl_page *page;
- loff_t size;
- pgoff_t last_index;
-
- down_read(&lli->lli_trunc_sem);
-
- /* offset of the last byte on the page */
- offset = cl_offset(obj, fio->ft_index + 1) - 1;
- LASSERT(cl_index(obj, offset) == fio->ft_index);
- result = vvp_prep_size(env, obj, io, 0, offset + 1, NULL);
- if (result != 0)
- return result;
-
- /* must return locked page */
- if (fio->ft_mkwrite) {
- LASSERT(cfio->ft_vmpage);
- lock_page(cfio->ft_vmpage);
- } else {
- result = vvp_io_kernel_fault(cfio);
- if (result != 0)
- return result;
- }
-
- vmpage = cfio->ft_vmpage;
- LASSERT(PageLocked(vmpage));
-
- if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_FAULT_TRUNC_RACE))
- ll_invalidate_page(vmpage);
-
- size = i_size_read(inode);
- /* Though we have already held a cl_lock upon this page, but
- * it still can be truncated locally.
- */
- if (unlikely((vmpage->mapping != inode->i_mapping) ||
- (page_offset(vmpage) > size))) {
- CDEBUG(D_PAGE, "llite: fault and truncate race happened!\n");
-
- /* return +1 to stop cl_io_loop() and ll_fault() will catch
- * and retry.
- */
- result = 1;
- goto out;
- }
-
- last_index = cl_index(obj, size - 1);
-
- if (fio->ft_mkwrite) {
- /*
- * Capture the size while holding the lli_trunc_sem from above
- * we want to make sure that we complete the mkwrite action
- * while holding this lock. We need to make sure that we are
- * not past the end of the file.
- */
- if (last_index < fio->ft_index) {
- CDEBUG(D_PAGE,
- "llite: mkwrite and truncate race happened: %p: 0x%lx 0x%lx\n",
- vmpage->mapping, fio->ft_index, last_index);
- /*
- * We need to return if we are
- * passed the end of the file. This will propagate
- * up the call stack to ll_page_mkwrite where
- * we will return VM_FAULT_NOPAGE. Any non-negative
- * value returned here will be silently
- * converted to 0. If the vmpage->mapping is null
- * the error code would be converted back to ENODATA
- * in ll_page_mkwrite0. Thus we return -ENODATA
- * to handle both cases
- */
- result = -ENODATA;
- goto out;
- }
- }
-
- page = cl_page_find(env, obj, fio->ft_index, vmpage, CPT_CACHEABLE);
- if (IS_ERR(page)) {
- result = PTR_ERR(page);
- goto out;
- }
-
- /* if page is going to be written, we should add this page into cache
- * earlier.
- */
- if (fio->ft_mkwrite) {
- wait_on_page_writeback(vmpage);
- if (!PageDirty(vmpage)) {
- struct cl_page_list *plist = &io->ci_queue.c2_qin;
- struct vvp_page *vpg = cl_object_page_slice(obj, page);
- int to = PAGE_SIZE;
-
- /* vvp_page_assume() calls wait_on_page_writeback(). */
- cl_page_assume(env, io, page);
-
- cl_page_list_init(plist);
- cl_page_list_add(plist, page);
-
- /* size fixup */
- if (last_index == vvp_index(vpg))
- to = size & ~PAGE_MASK;
-
- /* Do not set Dirty bit here so that in case IO is
- * started before the page is really made dirty, we
- * still have chance to detect it.
- */
- result = cl_io_commit_async(env, io, plist, 0, to,
- mkwrite_commit_callback);
- LASSERT(cl_page_is_owned(page, io));
- cl_page_list_fini(env, plist);
-
- vmpage = NULL;
- if (result < 0) {
- cl_page_discard(env, io, page);
- cl_page_disown(env, io, page);
-
- cl_page_put(env, page);
-
- /* we're in big trouble, what can we do now? */
- if (result == -EDQUOT)
- result = -ENOSPC;
- goto out;
- } else {
- cl_page_disown(env, io, page);
- }
- }
- }
-
- /*
- * The ft_index is only used in the case of
- * a mkwrite action. We need to check
- * our assertions are correct, since
- * we should have caught this above
- */
- LASSERT(!fio->ft_mkwrite || fio->ft_index <= last_index);
- if (fio->ft_index == last_index)
- /*
- * Last page is mapped partially.
- */
- fio->ft_nob = size - cl_offset(obj, fio->ft_index);
- else
- fio->ft_nob = cl_page_size(obj);
-
- lu_ref_add(&page->cp_reference, "fault", io);
- fio->ft_page = page;
-
-out:
- /* return unlocked vmpage to avoid deadlocking */
- if (vmpage)
- unlock_page(vmpage);
-
- cfio->ft_flags &= ~VM_FAULT_LOCKED;
-
- return result;
-}
-
-static void vvp_io_fault_end(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct inode *inode = vvp_object_inode(ios->cis_obj);
- struct ll_inode_info *lli = ll_i2info(inode);
-
- CLOBINVRNT(env, ios->cis_io->ci_obj,
- vvp_object_invariant(ios->cis_io->ci_obj));
- up_read(&lli->lli_trunc_sem);
-}
-
-static int vvp_io_fsync_start(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- /* we should mark TOWRITE bit to each dirty page in radix tree to
- * verify pages have been written, but this is difficult because of
- * race.
- */
- return 0;
-}
-
-static int vvp_io_read_ahead(const struct lu_env *env,
- const struct cl_io_slice *ios,
- pgoff_t start, struct cl_read_ahead *ra)
-{
- int result = 0;
-
- if (ios->cis_io->ci_type == CIT_READ ||
- ios->cis_io->ci_type == CIT_FAULT) {
- struct vvp_io *vio = cl2vvp_io(env, ios);
-
- if (unlikely(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
- ra->cra_end = CL_PAGE_EOF;
- result = 1; /* no need to call down */
- }
- }
-
- return result;
-}
-
-static const struct cl_io_operations vvp_io_ops = {
- .op = {
- [CIT_READ] = {
- .cio_fini = vvp_io_fini,
- .cio_lock = vvp_io_read_lock,
- .cio_start = vvp_io_read_start,
- .cio_end = vvp_io_rw_end,
- .cio_advance = vvp_io_advance,
- },
- [CIT_WRITE] = {
- .cio_fini = vvp_io_fini,
- .cio_iter_init = vvp_io_write_iter_init,
- .cio_iter_fini = vvp_io_write_iter_fini,
- .cio_lock = vvp_io_write_lock,
- .cio_start = vvp_io_write_start,
- .cio_end = vvp_io_rw_end,
- .cio_advance = vvp_io_advance,
- },
- [CIT_SETATTR] = {
- .cio_fini = vvp_io_setattr_fini,
- .cio_iter_init = vvp_io_setattr_iter_init,
- .cio_lock = vvp_io_setattr_lock,
- .cio_start = vvp_io_setattr_start,
- .cio_end = vvp_io_setattr_end
- },
- [CIT_FAULT] = {
- .cio_fini = vvp_io_fault_fini,
- .cio_iter_init = vvp_io_fault_iter_init,
- .cio_lock = vvp_io_fault_lock,
- .cio_start = vvp_io_fault_start,
- .cio_end = vvp_io_fault_end,
- },
- [CIT_FSYNC] = {
- .cio_start = vvp_io_fsync_start,
- .cio_fini = vvp_io_fini
- },
- [CIT_MISC] = {
- .cio_fini = vvp_io_fini
- }
- },
- .cio_read_ahead = vvp_io_read_ahead,
-};
-
-int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io)
-{
- struct vvp_io *vio = vvp_env_io(env);
- struct inode *inode = vvp_object_inode(obj);
- int result;
-
- CLOBINVRNT(env, obj, vvp_object_invariant(obj));
-
- CDEBUG(D_VFSTRACE, DFID
- " ignore/verify layout %d/%d, layout version %d restore needed %d\n",
- PFID(lu_object_fid(&obj->co_lu)),
- io->ci_ignore_layout, io->ci_verify_layout,
- vio->vui_layout_gen, io->ci_restore_needed);
-
- CL_IO_SLICE_CLEAN(vio, vui_cl);
- cl_io_slice_add(io, &vio->vui_cl, obj, &vvp_io_ops);
- vio->vui_ra_valid = false;
- result = 0;
- if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE) {
- size_t count;
- struct ll_inode_info *lli = ll_i2info(inode);
-
- count = io->u.ci_rw.crw_count;
- /* "If nbyte is 0, read() will return 0 and have no other
- * results." -- Single Unix Spec
- */
- if (count == 0)
- result = 1;
- else
- vio->vui_tot_count = count;
-
- /* for read/write, we store the jobid in the inode, and
- * it'll be fetched by osc when building RPC.
- *
- * it's not accurate if the file is shared by different
- * jobs.
- */
- lustre_get_jobid(lli->lli_jobid);
- } else if (io->ci_type == CIT_SETATTR) {
- if (!cl_io_is_trunc(io))
- io->ci_lockreq = CILR_MANDATORY;
- }
-
- /* Enqueue layout lock and get layout version. We need to do this
- * even for operations requiring to open file, such as read and write,
- * because it might not grant layout lock in IT_OPEN.
- */
- if (result == 0 && !io->ci_ignore_layout) {
- result = ll_layout_refresh(inode, &vio->vui_layout_gen);
- if (result == -ENOENT)
- /* If the inode on MDS has been removed, but the objects
- * on OSTs haven't been destroyed (async unlink), layout
- * fetch will return -ENOENT, we'd ignore this error
- * and continue with dirty flush. LU-3230.
- */
- result = 0;
- if (result < 0)
- CERROR("%s: refresh file layout " DFID " error %d.\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(lu_object_fid(&obj->co_lu)), result);
- }
-
- return result;
-}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_lock.c b/drivers/staging/lustre/lustre/llite/vvp_lock.c
deleted file mode 100644
index 4b6c7143bd2c..000000000000
--- a/drivers/staging/lustre/lustre/llite/vvp_lock.c
+++ /dev/null
@@ -1,87 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2014, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_lock for VVP layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd_support.h>
-
-#include "vvp_internal.h"
-
-/*****************************************************************************
- *
- * Vvp lock functions.
- *
- */
-
-static void vvp_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice)
-{
- struct vvp_lock *vlk = cl2vvp_lock(slice);
-
- kmem_cache_free(vvp_lock_kmem, vlk);
-}
-
-static int vvp_lock_enqueue(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- struct cl_io *unused, struct cl_sync_io *anchor)
-{
- CLOBINVRNT(env, slice->cls_obj, vvp_object_invariant(slice->cls_obj));
-
- return 0;
-}
-
-static const struct cl_lock_operations vvp_lock_ops = {
- .clo_fini = vvp_lock_fini,
- .clo_enqueue = vvp_lock_enqueue,
-};
-
-int vvp_lock_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_lock *lock, const struct cl_io *unused)
-{
- struct vvp_lock *vlk;
- int result;
-
- CLOBINVRNT(env, obj, vvp_object_invariant(obj));
-
- vlk = kmem_cache_zalloc(vvp_lock_kmem, GFP_NOFS);
- if (vlk) {
- cl_lock_slice_add(lock, &vlk->vlk_cl, obj, &vvp_lock_ops);
- result = 0;
- } else {
- result = -ENOMEM;
- }
- return result;
-}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_object.c b/drivers/staging/lustre/lustre/llite/vvp_object.c
deleted file mode 100644
index b2cb51c8f7f4..000000000000
--- a/drivers/staging/lustre/lustre/llite/vvp_object.c
+++ /dev/null
@@ -1,303 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * cl_object implementation for VVP layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd.h>
-
-#include "llite_internal.h"
-#include "vvp_internal.h"
-
-/*****************************************************************************
- *
- * Object operations.
- *
- */
-
-int vvp_object_invariant(const struct cl_object *obj)
-{
- struct inode *inode = vvp_object_inode(obj);
- struct ll_inode_info *lli = ll_i2info(inode);
-
- return (S_ISREG(inode->i_mode) || inode->i_mode == 0) &&
- lli->lli_clob == obj;
-}
-
-static int vvp_object_print(const struct lu_env *env, void *cookie,
- lu_printer_t p, const struct lu_object *o)
-{
- struct vvp_object *obj = lu2vvp(o);
- struct inode *inode = obj->vob_inode;
- struct ll_inode_info *lli;
-
- (*p)(env, cookie, "(%d %d) inode: %p ",
- atomic_read(&obj->vob_transient_pages),
- atomic_read(&obj->vob_mmap_cnt), inode);
- if (inode) {
- lli = ll_i2info(inode);
- (*p)(env, cookie, "%lu/%u %o %u %d %p " DFID,
- inode->i_ino, inode->i_generation, inode->i_mode,
- inode->i_nlink, atomic_read(&inode->i_count),
- lli->lli_clob, PFID(&lli->lli_fid));
- }
- return 0;
-}
-
-static int vvp_attr_get(const struct lu_env *env, struct cl_object *obj,
- struct cl_attr *attr)
-{
- struct inode *inode = vvp_object_inode(obj);
-
- /*
- * lov overwrites most of these fields in
- * lov_attr_get()->...lov_merge_lvb_kms(), except when inode
- * attributes are newer.
- */
-
- attr->cat_size = i_size_read(inode);
- attr->cat_mtime = inode->i_mtime.tv_sec;
- attr->cat_atime = inode->i_atime.tv_sec;
- attr->cat_ctime = inode->i_ctime.tv_sec;
- attr->cat_blocks = inode->i_blocks;
- attr->cat_uid = from_kuid(&init_user_ns, inode->i_uid);
- attr->cat_gid = from_kgid(&init_user_ns, inode->i_gid);
- /* KMS is not known by this layer */
- return 0; /* layers below have to fill in the rest */
-}
-
-static int vvp_attr_update(const struct lu_env *env, struct cl_object *obj,
- const struct cl_attr *attr, unsigned int valid)
-{
- struct inode *inode = vvp_object_inode(obj);
-
- if (valid & CAT_UID)
- inode->i_uid = make_kuid(&init_user_ns, attr->cat_uid);
- if (valid & CAT_GID)
- inode->i_gid = make_kgid(&init_user_ns, attr->cat_gid);
- if (valid & CAT_ATIME)
- inode->i_atime.tv_sec = attr->cat_atime;
- if (valid & CAT_MTIME)
- inode->i_mtime.tv_sec = attr->cat_mtime;
- if (valid & CAT_CTIME)
- inode->i_ctime.tv_sec = attr->cat_ctime;
- if (0 && valid & CAT_SIZE)
- i_size_write(inode, attr->cat_size);
- /* not currently necessary */
- if (0 && valid & (CAT_UID | CAT_GID | CAT_SIZE))
- mark_inode_dirty(inode);
- return 0;
-}
-
-static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj,
- const struct cl_object_conf *conf)
-{
- struct ll_inode_info *lli = ll_i2info(conf->coc_inode);
-
- if (conf->coc_opc == OBJECT_CONF_INVALIDATE) {
- CDEBUG(D_VFSTRACE, DFID ": losing layout lock\n",
- PFID(&lli->lli_fid));
-
- ll_layout_version_set(lli, CL_LAYOUT_GEN_NONE);
-
- /* Clean up page mmap for this inode.
- * The reason for us to do this is that if the page has
- * already been installed into memory space, the process
- * can access it without interacting with lustre, so this
- * page may be stale due to layout change, and the process
- * will never be notified.
- * This operation is expensive but mmap processes have to pay
- * a price themselves.
- */
- unmap_mapping_range(conf->coc_inode->i_mapping,
- 0, OBD_OBJECT_EOF, 0);
- }
-
- return 0;
-}
-
-static int vvp_prune(const struct lu_env *env, struct cl_object *obj)
-{
- struct inode *inode = vvp_object_inode(obj);
- int rc;
-
- rc = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_LOCAL, 1);
- if (rc < 0) {
- CDEBUG(D_VFSTRACE, DFID ": writeback failed: %d\n",
- PFID(lu_object_fid(&obj->co_lu)), rc);
- return rc;
- }
-
- truncate_inode_pages(inode->i_mapping, 0);
- return 0;
-}
-
-static int vvp_object_glimpse(const struct lu_env *env,
- const struct cl_object *obj, struct ost_lvb *lvb)
-{
- struct inode *inode = vvp_object_inode(obj);
-
- lvb->lvb_mtime = LTIME_S(inode->i_mtime);
- lvb->lvb_atime = LTIME_S(inode->i_atime);
- lvb->lvb_ctime = LTIME_S(inode->i_ctime);
- /*
- * LU-417: Add dirty pages block count lest i_blocks reports 0, some
- * "cp" or "tar" on remote node may think it's a completely sparse file
- * and skip it.
- */
- if (lvb->lvb_size > 0 && lvb->lvb_blocks == 0)
- lvb->lvb_blocks = dirty_cnt(inode);
- return 0;
-}
-
-static void vvp_req_attr_set(const struct lu_env *env, struct cl_object *obj,
- struct cl_req_attr *attr)
-{
- u64 valid_flags = OBD_MD_FLTYPE;
- struct inode *inode;
- struct obdo *oa;
-
- oa = attr->cra_oa;
- inode = vvp_object_inode(obj);
-
- if (attr->cra_type == CRT_WRITE)
- valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
- OBD_MD_FLUID | OBD_MD_FLGID;
- obdo_from_inode(oa, inode, valid_flags & attr->cra_flags);
- obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
- if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_INVALID_PFID))
- oa->o_parent_oid++;
- memcpy(attr->cra_jobid, ll_i2info(inode)->lli_jobid, LUSTRE_JOBID_SIZE);
-}
-
-static const struct cl_object_operations vvp_ops = {
- .coo_page_init = vvp_page_init,
- .coo_lock_init = vvp_lock_init,
- .coo_io_init = vvp_io_init,
- .coo_attr_get = vvp_attr_get,
- .coo_attr_update = vvp_attr_update,
- .coo_conf_set = vvp_conf_set,
- .coo_prune = vvp_prune,
- .coo_glimpse = vvp_object_glimpse,
- .coo_req_attr_set = vvp_req_attr_set
-};
-
-static int vvp_object_init0(const struct lu_env *env,
- struct vvp_object *vob,
- const struct cl_object_conf *conf)
-{
- vob->vob_inode = conf->coc_inode;
- atomic_set(&vob->vob_transient_pages, 0);
- cl_object_page_init(&vob->vob_cl, sizeof(struct vvp_page));
- return 0;
-}
-
-static int vvp_object_init(const struct lu_env *env, struct lu_object *obj,
- const struct lu_object_conf *conf)
-{
- struct vvp_device *dev = lu2vvp_dev(obj->lo_dev);
- struct vvp_object *vob = lu2vvp(obj);
- struct lu_object *below;
- struct lu_device *under;
- int result;
-
- under = &dev->vdv_next->cd_lu_dev;
- below = under->ld_ops->ldo_object_alloc(env, obj->lo_header, under);
- if (below) {
- const struct cl_object_conf *cconf;
-
- cconf = lu2cl_conf(conf);
- lu_object_add(obj, below);
- result = vvp_object_init0(env, vob, cconf);
- } else {
- result = -ENOMEM;
- }
-
- return result;
-}
-
-static void vvp_object_free(const struct lu_env *env, struct lu_object *obj)
-{
- struct vvp_object *vob = lu2vvp(obj);
-
- lu_object_fini(obj);
- lu_object_header_fini(obj->lo_header);
- kmem_cache_free(vvp_object_kmem, vob);
-}
-
-static const struct lu_object_operations vvp_lu_obj_ops = {
- .loo_object_init = vvp_object_init,
- .loo_object_free = vvp_object_free,
- .loo_object_print = vvp_object_print,
-};
-
-struct vvp_object *cl_inode2vvp(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct cl_object *obj = lli->lli_clob;
- struct lu_object *lu;
-
- lu = lu_object_locate(obj->co_lu.lo_header, &vvp_device_type);
- LASSERT(lu);
- return lu2vvp(lu);
-}
-
-struct lu_object *vvp_object_alloc(const struct lu_env *env,
- const struct lu_object_header *unused,
- struct lu_device *dev)
-{
- struct vvp_object *vob;
- struct lu_object *obj;
-
- vob = kmem_cache_zalloc(vvp_object_kmem, GFP_NOFS);
- if (vob) {
- struct cl_object_header *hdr;
-
- obj = &vob->vob_cl.co_lu;
- hdr = &vob->vob_header;
- cl_object_header_init(hdr);
- hdr->coh_page_bufsize = cfs_size_round(sizeof(struct cl_page));
-
- lu_object_init(obj, &hdr->coh_lu, dev);
- lu_object_add_top(&hdr->coh_lu, obj);
-
- vob->vob_cl.co_ops = &vvp_ops;
- obj->lo_ops = &vvp_lu_obj_ops;
- } else {
- obj = NULL;
- }
- return obj;
-}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c
deleted file mode 100644
index 6eb0565ddc22..000000000000
--- a/drivers/staging/lustre/lustre/llite/vvp_page.c
+++ /dev/null
@@ -1,523 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_page for VVP layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/atomic.h>
-#include <linux/bitops.h>
-#include <linux/mm.h>
-#include <linux/mutex.h>
-#include <linux/page-flags.h>
-#include <linux/pagemap.h>
-
-#include "llite_internal.h"
-#include "vvp_internal.h"
-
-/*****************************************************************************
- *
- * Page operations.
- *
- */
-
-static void vvp_page_fini_common(struct vvp_page *vpg)
-{
- struct page *vmpage = vpg->vpg_page;
-
- LASSERT(vmpage);
- put_page(vmpage);
-}
-
-static void vvp_page_fini(const struct lu_env *env,
- struct cl_page_slice *slice)
-{
- struct vvp_page *vpg = cl2vvp_page(slice);
- struct page *vmpage = vpg->vpg_page;
-
- /*
- * vmpage->private was already cleared when page was moved into
- * VPG_FREEING state.
- */
- LASSERT((struct cl_page *)vmpage->private != slice->cpl_page);
- vvp_page_fini_common(vpg);
-}
-
-static int vvp_page_own(const struct lu_env *env,
- const struct cl_page_slice *slice, struct cl_io *io,
- int nonblock)
-{
- struct vvp_page *vpg = cl2vvp_page(slice);
- struct page *vmpage = vpg->vpg_page;
-
- LASSERT(vmpage);
- if (nonblock) {
- if (!trylock_page(vmpage))
- return -EAGAIN;
-
- if (unlikely(PageWriteback(vmpage))) {
- unlock_page(vmpage);
- return -EAGAIN;
- }
-
- return 0;
- }
-
- lock_page(vmpage);
- wait_on_page_writeback(vmpage);
-
- return 0;
-}
-
-static void vvp_page_assume(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- struct page *vmpage = cl2vm_page(slice);
-
- LASSERT(vmpage);
- LASSERT(PageLocked(vmpage));
- wait_on_page_writeback(vmpage);
-}
-
-static void vvp_page_unassume(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- struct page *vmpage = cl2vm_page(slice);
-
- LASSERT(vmpage);
- LASSERT(PageLocked(vmpage));
-}
-
-static void vvp_page_disown(const struct lu_env *env,
- const struct cl_page_slice *slice, struct cl_io *io)
-{
- struct page *vmpage = cl2vm_page(slice);
-
- LASSERT(vmpage);
- LASSERT(PageLocked(vmpage));
-
- unlock_page(cl2vm_page(slice));
-}
-
-static void vvp_page_discard(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- struct page *vmpage = cl2vm_page(slice);
- struct vvp_page *vpg = cl2vvp_page(slice);
-
- LASSERT(vmpage);
- LASSERT(PageLocked(vmpage));
-
- if (vpg->vpg_defer_uptodate && !vpg->vpg_ra_used)
- ll_ra_stats_inc(vmpage->mapping->host, RA_STAT_DISCARDED);
-
- ll_invalidate_page(vmpage);
-}
-
-static void vvp_page_delete(const struct lu_env *env,
- const struct cl_page_slice *slice)
-{
- struct page *vmpage = cl2vm_page(slice);
- struct inode *inode = vmpage->mapping->host;
- struct cl_object *obj = slice->cpl_obj;
- struct cl_page *page = slice->cpl_page;
- int refc;
-
- LASSERT(PageLocked(vmpage));
- LASSERT((struct cl_page *)vmpage->private == page);
- LASSERT(inode == vvp_object_inode(obj));
-
- /* Drop the reference count held in vvp_page_init */
- refc = atomic_dec_return(&page->cp_ref);
- LASSERTF(refc >= 1, "page = %p, refc = %d\n", page, refc);
-
- ClearPagePrivate(vmpage);
- vmpage->private = 0;
- /*
- * Reference from vmpage to cl_page is removed, but the reference back
- * is still here. It is removed later in vvp_page_fini().
- */
-}
-
-static void vvp_page_export(const struct lu_env *env,
- const struct cl_page_slice *slice,
- int uptodate)
-{
- struct page *vmpage = cl2vm_page(slice);
-
- LASSERT(vmpage);
- LASSERT(PageLocked(vmpage));
- if (uptodate)
- SetPageUptodate(vmpage);
- else
- ClearPageUptodate(vmpage);
-}
-
-static int vvp_page_is_vmlocked(const struct lu_env *env,
- const struct cl_page_slice *slice)
-{
- return PageLocked(cl2vm_page(slice)) ? -EBUSY : -ENODATA;
-}
-
-static int vvp_page_prep_read(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- /* Skip the page already marked as PG_uptodate. */
- return PageUptodate(cl2vm_page(slice)) ? -EALREADY : 0;
-}
-
-static int vvp_page_prep_write(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- struct page *vmpage = cl2vm_page(slice);
- struct cl_page *pg = slice->cpl_page;
-
- LASSERT(PageLocked(vmpage));
- LASSERT(!PageDirty(vmpage));
-
- /* ll_writepage path is not a sync write, so need to set page writeback
- * flag
- */
- if (!pg->cp_sync_io)
- set_page_writeback(vmpage);
-
- return 0;
-}
-
-/**
- * Handles page transfer errors at VM level.
- *
- * This takes inode as a separate argument, because inode on which error is to
- * be set can be different from \a vmpage inode in case of direct-io.
- */
-static void vvp_vmpage_error(struct inode *inode, struct page *vmpage,
- int ioret)
-{
- struct vvp_object *obj = cl_inode2vvp(inode);
-
- if (ioret == 0) {
- ClearPageError(vmpage);
- obj->vob_discard_page_warned = 0;
- } else {
- SetPageError(vmpage);
- mapping_set_error(inode->i_mapping, ioret);
-
- if ((ioret == -ESHUTDOWN || ioret == -EINTR) &&
- obj->vob_discard_page_warned == 0) {
- obj->vob_discard_page_warned = 1;
- ll_dirty_page_discard_warn(vmpage, ioret);
- }
- }
-}
-
-static void vvp_page_completion_read(const struct lu_env *env,
- const struct cl_page_slice *slice,
- int ioret)
-{
- struct vvp_page *vpg = cl2vvp_page(slice);
- struct page *vmpage = vpg->vpg_page;
- struct cl_page *page = slice->cpl_page;
- struct inode *inode = vvp_object_inode(page->cp_obj);
-
- LASSERT(PageLocked(vmpage));
- CL_PAGE_HEADER(D_PAGE, env, page, "completing READ with %d\n", ioret);
-
- if (vpg->vpg_defer_uptodate)
- ll_ra_count_put(ll_i2sbi(inode), 1);
-
- if (ioret == 0) {
- if (!vpg->vpg_defer_uptodate)
- cl_page_export(env, page, 1);
- } else {
- vpg->vpg_defer_uptodate = 0;
- }
-
- if (!page->cp_sync_io)
- unlock_page(vmpage);
-}
-
-static void vvp_page_completion_write(const struct lu_env *env,
- const struct cl_page_slice *slice,
- int ioret)
-{
- struct vvp_page *vpg = cl2vvp_page(slice);
- struct cl_page *pg = slice->cpl_page;
- struct page *vmpage = vpg->vpg_page;
-
- CL_PAGE_HEADER(D_PAGE, env, pg, "completing WRITE with %d\n", ioret);
-
- if (pg->cp_sync_io) {
- LASSERT(PageLocked(vmpage));
- LASSERT(!PageWriteback(vmpage));
- } else {
- LASSERT(PageWriteback(vmpage));
- /*
- * Only mark the page error only when it's an async write
- * because applications won't wait for IO to finish.
- */
- vvp_vmpage_error(vvp_object_inode(pg->cp_obj), vmpage, ioret);
-
- end_page_writeback(vmpage);
- }
-}
-
-/**
- * Implements cl_page_operations::cpo_make_ready() method.
- *
- * This is called to yank a page from the transfer cache and to send it out as
- * a part of transfer. This function try-locks the page. If try-lock failed,
- * page is owned by some concurrent IO, and should be skipped (this is bad,
- * but hopefully rare situation, as it usually results in transfer being
- * shorter than possible).
- *
- * \retval 0 success, page can be placed into transfer
- *
- * \retval -EAGAIN page is either used by concurrent IO has been
- * truncated. Skip it.
- */
-static int vvp_page_make_ready(const struct lu_env *env,
- const struct cl_page_slice *slice)
-{
- struct page *vmpage = cl2vm_page(slice);
- struct cl_page *pg = slice->cpl_page;
- int result = 0;
-
- lock_page(vmpage);
- if (clear_page_dirty_for_io(vmpage)) {
- LASSERT(pg->cp_state == CPS_CACHED);
- /* This actually clears the dirty bit in the radix tree. */
- set_page_writeback(vmpage);
- CL_PAGE_HEADER(D_PAGE, env, pg, "readied\n");
- } else if (pg->cp_state == CPS_PAGEOUT) {
- /* is it possible for osc_flush_async_page() to already
- * make it ready?
- */
- result = -EALREADY;
- } else {
- CL_PAGE_DEBUG(D_ERROR, env, pg, "Unexpecting page state %d.\n",
- pg->cp_state);
- LBUG();
- }
- unlock_page(vmpage);
- return result;
-}
-
-static int vvp_page_print(const struct lu_env *env,
- const struct cl_page_slice *slice,
- void *cookie, lu_printer_t printer)
-{
- struct vvp_page *vpg = cl2vvp_page(slice);
- struct page *vmpage = vpg->vpg_page;
-
- (*printer)(env, cookie, LUSTRE_VVP_NAME "-page@%p(%d:%d) vm@%p ",
- vpg, vpg->vpg_defer_uptodate, vpg->vpg_ra_used, vmpage);
- if (vmpage) {
- (*printer)(env, cookie, "%lx %d:%d %lx %lu %slru",
- (long)vmpage->flags, page_count(vmpage),
- page_mapcount(vmpage), vmpage->private,
- vmpage->index,
- list_empty(&vmpage->lru) ? "not-" : "");
- }
-
- (*printer)(env, cookie, "\n");
-
- return 0;
-}
-
-static int vvp_page_fail(const struct lu_env *env,
- const struct cl_page_slice *slice)
-{
- /*
- * Cached read?
- */
- LBUG();
-
- return 0;
-}
-
-static const struct cl_page_operations vvp_page_ops = {
- .cpo_own = vvp_page_own,
- .cpo_assume = vvp_page_assume,
- .cpo_unassume = vvp_page_unassume,
- .cpo_disown = vvp_page_disown,
- .cpo_discard = vvp_page_discard,
- .cpo_delete = vvp_page_delete,
- .cpo_export = vvp_page_export,
- .cpo_is_vmlocked = vvp_page_is_vmlocked,
- .cpo_fini = vvp_page_fini,
- .cpo_print = vvp_page_print,
- .io = {
- [CRT_READ] = {
- .cpo_prep = vvp_page_prep_read,
- .cpo_completion = vvp_page_completion_read,
- .cpo_make_ready = vvp_page_fail,
- },
- [CRT_WRITE] = {
- .cpo_prep = vvp_page_prep_write,
- .cpo_completion = vvp_page_completion_write,
- .cpo_make_ready = vvp_page_make_ready,
- },
- },
-};
-
-static int vvp_transient_page_prep(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- /* transient page should always be sent. */
- return 0;
-}
-
-static int vvp_transient_page_own(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused, int nonblock)
-{
- return 0;
-}
-
-static void vvp_transient_page_assume(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
-}
-
-static void vvp_transient_page_unassume(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
-}
-
-static void vvp_transient_page_disown(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
-}
-
-static void vvp_transient_page_discard(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- struct cl_page *page = slice->cpl_page;
-
- /*
- * For transient pages, remove it from the radix tree.
- */
- cl_page_delete(env, page);
-}
-
-static int vvp_transient_page_is_vmlocked(const struct lu_env *env,
- const struct cl_page_slice *slice)
-{
- struct inode *inode = vvp_object_inode(slice->cpl_obj);
- int locked;
-
- locked = !inode_trylock(inode);
- if (!locked)
- inode_unlock(inode);
- return locked ? -EBUSY : -ENODATA;
-}
-
-static void
-vvp_transient_page_completion(const struct lu_env *env,
- const struct cl_page_slice *slice,
- int ioret)
-{
-}
-
-static void vvp_transient_page_fini(const struct lu_env *env,
- struct cl_page_slice *slice)
-{
- struct vvp_page *vpg = cl2vvp_page(slice);
- struct cl_page *clp = slice->cpl_page;
- struct vvp_object *clobj = cl2vvp(clp->cp_obj);
-
- vvp_page_fini_common(vpg);
- atomic_dec(&clobj->vob_transient_pages);
-}
-
-static const struct cl_page_operations vvp_transient_page_ops = {
- .cpo_own = vvp_transient_page_own,
- .cpo_assume = vvp_transient_page_assume,
- .cpo_unassume = vvp_transient_page_unassume,
- .cpo_disown = vvp_transient_page_disown,
- .cpo_discard = vvp_transient_page_discard,
- .cpo_fini = vvp_transient_page_fini,
- .cpo_is_vmlocked = vvp_transient_page_is_vmlocked,
- .cpo_print = vvp_page_print,
- .io = {
- [CRT_READ] = {
- .cpo_prep = vvp_transient_page_prep,
- .cpo_completion = vvp_transient_page_completion,
- },
- [CRT_WRITE] = {
- .cpo_prep = vvp_transient_page_prep,
- .cpo_completion = vvp_transient_page_completion,
- }
- }
-};
-
-int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, pgoff_t index)
-{
- struct vvp_page *vpg = cl_object_page_slice(obj, page);
- struct page *vmpage = page->cp_vmpage;
-
- CLOBINVRNT(env, obj, vvp_object_invariant(obj));
-
- vpg->vpg_page = vmpage;
- get_page(vmpage);
-
- if (page->cp_type == CPT_CACHEABLE) {
- /* in cache, decref in vvp_page_delete */
- atomic_inc(&page->cp_ref);
- SetPagePrivate(vmpage);
- vmpage->private = (unsigned long)page;
- cl_page_slice_add(page, &vpg->vpg_cl, obj, index,
- &vvp_page_ops);
- } else {
- struct vvp_object *clobj = cl2vvp(obj);
-
- cl_page_slice_add(page, &vpg->vpg_cl, obj, index,
- &vvp_transient_page_ops);
- atomic_inc(&clobj->vob_transient_pages);
- }
- return 0;
-}
diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c
deleted file mode 100644
index 7fa0a419c094..000000000000
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ /dev/null
@@ -1,665 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/xattr.h>
-#include <linux/selinux.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd_support.h>
-#include <lustre_dlm.h>
-
-#include "llite_internal.h"
-
-const struct xattr_handler *get_xattr_type(const char *name)
-{
- int i;
-
- for (i = 0; ll_xattr_handlers[i]; i++) {
- const char *prefix = xattr_prefix(ll_xattr_handlers[i]);
- size_t prefix_len = strlen(prefix);
-
- if (!strncmp(prefix, name, prefix_len))
- return ll_xattr_handlers[i];
- }
-
- return NULL;
-}
-
-static int xattr_type_filter(struct ll_sb_info *sbi,
- const struct xattr_handler *handler)
-{
- /* No handler means XATTR_OTHER_T */
- if (!handler)
- return -EOPNOTSUPP;
-
- if ((handler->flags == XATTR_ACL_ACCESS_T ||
- handler->flags == XATTR_ACL_DEFAULT_T) &&
- !(sbi->ll_flags & LL_SBI_ACL))
- return -EOPNOTSUPP;
-
- if (handler->flags == XATTR_USER_T &&
- !(sbi->ll_flags & LL_SBI_USER_XATTR))
- return -EOPNOTSUPP;
-
- if (handler->flags == XATTR_TRUSTED_T &&
- !capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- return 0;
-}
-
-static int ll_xattr_set_common(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, const void *value, size_t size,
- int flags)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *req = NULL;
- const char *pv = value;
- char *fullname;
- u64 valid;
- int rc;
-
- /* When setxattr() is called with a size of 0 the value is
- * unconditionally replaced by "". When removexattr() is
- * called we get a NULL value and XATTR_REPLACE for flags.
- */
- if (!value && flags == XATTR_REPLACE) {
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REMOVEXATTR, 1);
- valid = OBD_MD_FLXATTRRM;
- } else {
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1);
- valid = OBD_MD_FLXATTR;
- }
-
- rc = xattr_type_filter(sbi, handler);
- if (rc)
- return rc;
-
- if ((handler->flags == XATTR_ACL_ACCESS_T ||
- handler->flags == XATTR_ACL_DEFAULT_T) &&
- !inode_owner_or_capable(inode))
- return -EPERM;
-
- /* b10667: ignore lustre special xattr for now */
- if (!strcmp(name, "hsm") ||
- ((handler->flags == XATTR_TRUSTED_T && !strcmp(name, "lov")) ||
- (handler->flags == XATTR_LUSTRE_T && !strcmp(name, "lov"))))
- return 0;
-
- /* LU-549: Disable security.selinux when selinux is disabled */
- if (handler->flags == XATTR_SECURITY_T && !selinux_is_enabled() &&
- strcmp(name, "selinux") == 0)
- return -EOPNOTSUPP;
-
- /*FIXME: enable IMA when the conditions are ready */
- if (handler->flags == XATTR_SECURITY_T &&
- (!strcmp(name, "ima") || !strcmp(name, "evm")))
- return -EOPNOTSUPP;
-
- /*
- * In user.* namespace, only regular files and directories can have
- * extended attributes.
- */
- if (handler->flags == XATTR_USER_T) {
- if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
- return -EPERM;
- }
-
- fullname = kasprintf(GFP_KERNEL, "%s%s", handler->prefix, name);
- if (!fullname)
- return -ENOMEM;
-
- rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid, fullname,
- pv, size, flags, ll_i2suppgid(inode), &req);
- kfree(fullname);
- if (rc) {
- if (rc == -EOPNOTSUPP && handler->flags == XATTR_USER_T) {
- LCONSOLE_INFO("Disabling user_xattr feature because it is not supported on the server\n");
- sbi->ll_flags &= ~LL_SBI_USER_XATTR;
- }
- return rc;
- }
-
- ptlrpc_req_finished(req);
- return 0;
-}
-
-static int get_hsm_state(struct inode *inode, u32 *hus_states)
-{
- struct md_op_data *op_data;
- struct hsm_user_state *hus;
- int rc;
-
- hus = kzalloc(sizeof(*hus), GFP_NOFS);
- if (!hus)
- return -ENOMEM;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, hus);
- if (!IS_ERR(op_data)) {
- rc = obd_iocontrol(LL_IOC_HSM_STATE_GET, ll_i2mdexp(inode),
- sizeof(*op_data), op_data, NULL);
- if (!rc)
- *hus_states = hus->hus_states;
- else
- CDEBUG(D_VFSTRACE, "obd_iocontrol failed. rc = %d\n",
- rc);
-
- ll_finish_md_op_data(op_data);
- } else {
- rc = PTR_ERR(op_data);
- CDEBUG(D_VFSTRACE, "Could not prepare the opdata. rc = %d\n",
- rc);
- }
- kfree(hus);
- return rc;
-}
-
-static int ll_adjust_lum(struct inode *inode, struct lov_user_md *lump)
-{
- int rc = 0;
-
- if (!lump)
- return 0;
-
- /* Attributes that are saved via getxattr will always have
- * the stripe_offset as 0. Instead, the MDS should be
- * allowed to pick the starting OST index. b=17846
- */
- if (lump->lmm_stripe_offset == 0)
- lump->lmm_stripe_offset = -1;
-
- /* Avoid anyone directly setting the RELEASED flag. */
- if (lump->lmm_pattern & LOV_PATTERN_F_RELEASED) {
- /* Only if we have a released flag check if the file
- * was indeed archived.
- */
- u32 state = HS_NONE;
-
- rc = get_hsm_state(inode, &state);
- if (rc)
- return rc;
-
- if (!(state & HS_ARCHIVED)) {
- CDEBUG(D_VFSTRACE,
- "hus_states state = %x, pattern = %x\n",
- state, lump->lmm_pattern);
- /*
- * Here the state is: real file is not
- * archived but user is requesting to set
- * the RELEASED flag so we mask off the
- * released flag from the request
- */
- lump->lmm_pattern ^= LOV_PATTERN_F_RELEASED;
- }
- }
-
- return rc;
-}
-
-static int ll_setstripe_ea(struct dentry *dentry, struct lov_user_md *lump,
- size_t size)
-{
- struct inode *inode = d_inode(dentry);
- int rc = 0;
-
- /*
- * It is possible to set an xattr to a "" value of zero size.
- * For this case we are going to treat it as a removal.
- */
- if (!size && lump)
- lump = NULL;
-
- rc = ll_adjust_lum(inode, lump);
- if (rc)
- return rc;
-
- if (lump && S_ISREG(inode->i_mode)) {
- u64 it_flags = FMODE_WRITE;
- ssize_t lum_size;
-
- lum_size = ll_lov_user_md_size(lump);
- if (lum_size < 0 || size < lum_size)
- return -ERANGE;
-
- rc = ll_lov_setstripe_ea_info(inode, dentry, it_flags, lump,
- lum_size);
- /**
- * b=10667: ignore -EEXIST.
- * Silently eat error on setting trusted.lov/lustre.lov
- * attribute for platforms that added the default option
- * to copy all attributes in 'cp' command. Both rsync and
- * tar --xattrs also will try to set LOVEA for existing
- * files.
- */
- if (rc == -EEXIST)
- rc = 0;
- } else if (S_ISDIR(inode->i_mode)) {
- if (size != 0 && size < sizeof(struct lov_user_md))
- return -EINVAL;
-
- rc = ll_dir_setstripe(inode, lump, 0);
- }
-
- return rc;
-}
-
-static int ll_xattr_set(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, const void *value, size_t size,
- int flags)
-{
- LASSERT(inode);
- LASSERT(name);
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), xattr %s\n",
- PFID(ll_inode2fid(inode)), inode, name);
-
- /* lustre/trusted.lov.xxx would be passed through xattr API */
- if (!strcmp(name, "lov")) {
- int op_type = flags == XATTR_REPLACE ? LPROC_LL_REMOVEXATTR :
- LPROC_LL_SETXATTR;
-
- ll_stats_ops_tally(ll_i2sbi(inode), op_type, 1);
-
- return ll_setstripe_ea(dentry, (struct lov_user_md *)value,
- size);
- } else if (!strcmp(name, "lma") || !strcmp(name, "link")) {
- int op_type = flags == XATTR_REPLACE ? LPROC_LL_REMOVEXATTR :
- LPROC_LL_SETXATTR;
-
- ll_stats_ops_tally(ll_i2sbi(inode), op_type, 1);
- return 0;
- }
-
- return ll_xattr_set_common(handler, dentry, inode, name, value, size,
- flags);
-}
-
-int ll_xattr_list(struct inode *inode, const char *name, int type, void *buffer,
- size_t size, u64 valid)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *req = NULL;
- struct mdt_body *body;
- void *xdata;
- int rc;
-
- if (sbi->ll_xattr_cache_enabled && type != XATTR_ACL_ACCESS_T &&
- (type != XATTR_SECURITY_T || strcmp(name, "security.selinux"))) {
- rc = ll_xattr_cache_get(inode, name, buffer, size, valid);
- if (rc == -EAGAIN)
- goto getxattr_nocache;
- if (rc < 0)
- goto out_xattr;
-
- /* Add "system.posix_acl_access" to the list */
- if (lli->lli_posix_acl && valid & OBD_MD_FLXATTRLS) {
- if (size == 0) {
- rc += sizeof(XATTR_NAME_ACL_ACCESS);
- } else if (size - rc >= sizeof(XATTR_NAME_ACL_ACCESS)) {
- memcpy(buffer + rc, XATTR_NAME_ACL_ACCESS,
- sizeof(XATTR_NAME_ACL_ACCESS));
- rc += sizeof(XATTR_NAME_ACL_ACCESS);
- } else {
- rc = -ERANGE;
- goto out_xattr;
- }
- }
- } else {
-getxattr_nocache:
- rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid,
- name, size, &req);
- if (rc < 0)
- goto out_xattr;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- LASSERT(body);
-
- /* only detect the xattr size */
- if (size == 0) {
- rc = body->mbo_eadatasize;
- goto out;
- }
-
- if (size < body->mbo_eadatasize) {
- CERROR("server bug: replied size %u > %u\n",
- body->mbo_eadatasize, (int)size);
- rc = -ERANGE;
- goto out;
- }
-
- if (body->mbo_eadatasize == 0) {
- rc = -ENODATA;
- goto out;
- }
-
- /* do not need swab xattr data */
- xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA,
- body->mbo_eadatasize);
- if (!xdata) {
- rc = -EFAULT;
- goto out;
- }
-
- memcpy(buffer, xdata, body->mbo_eadatasize);
- rc = body->mbo_eadatasize;
- }
-
-out_xattr:
- if (rc == -EOPNOTSUPP && type == XATTR_USER_T) {
- LCONSOLE_INFO(
- "%s: disabling user_xattr feature because it is not supported on the server: rc = %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0), rc);
- sbi->ll_flags &= ~LL_SBI_USER_XATTR;
- }
-out:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-static int ll_xattr_get_common(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, void *buffer, size_t size)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- char *fullname;
- int rc;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n",
- PFID(ll_inode2fid(inode)), inode);
-
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1);
-
- rc = xattr_type_filter(sbi, handler);
- if (rc)
- return rc;
-
- /* LU-549: Disable security.selinux when selinux is disabled */
- if (handler->flags == XATTR_SECURITY_T && !selinux_is_enabled() &&
- !strcmp(name, "selinux"))
- return -EOPNOTSUPP;
-
-#ifdef CONFIG_FS_POSIX_ACL
- /* posix acl is under protection of LOOKUP lock. when calling to this,
- * we just have path resolution to the target inode, so we have great
- * chance that cached ACL is uptodate.
- */
- if (handler->flags == XATTR_ACL_ACCESS_T) {
- struct ll_inode_info *lli = ll_i2info(inode);
- struct posix_acl *acl;
-
- spin_lock(&lli->lli_lock);
- acl = posix_acl_dup(lli->lli_posix_acl);
- spin_unlock(&lli->lli_lock);
-
- if (!acl)
- return -ENODATA;
-
- rc = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
- posix_acl_release(acl);
- return rc;
- }
- if (handler->flags == XATTR_ACL_DEFAULT_T && !S_ISDIR(inode->i_mode))
- return -ENODATA;
-#endif
- fullname = kasprintf(GFP_KERNEL, "%s%s", handler->prefix, name);
- if (!fullname)
- return -ENOMEM;
-
- rc = ll_xattr_list(inode, fullname, handler->flags, buffer, size,
- OBD_MD_FLXATTR);
- kfree(fullname);
- return rc;
-}
-
-static ssize_t ll_getxattr_lov(struct inode *inode, void *buf, size_t buf_size)
-{
- ssize_t rc;
-
- if (S_ISREG(inode->i_mode)) {
- struct cl_object *obj = ll_i2info(inode)->lli_clob;
- struct cl_layout cl = {
- .cl_buf.lb_buf = buf,
- .cl_buf.lb_len = buf_size,
- };
- struct lu_env *env;
- u16 refcheck;
-
- if (!obj)
- return -ENODATA;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- rc = cl_object_layout_get(env, obj, &cl);
- if (rc < 0)
- goto out_env;
-
- if (!cl.cl_size) {
- rc = -ENODATA;
- goto out_env;
- }
-
- rc = cl.cl_size;
-
- if (!buf_size)
- goto out_env;
-
- LASSERT(buf && rc <= buf_size);
-
- /*
- * Do not return layout gen for getxattr() since
- * otherwise it would confuse tar --xattr by
- * recognizing layout gen as stripe offset when the
- * file is restored. See LU-2809.
- */
- ((struct lov_mds_md *)buf)->lmm_layout_gen = 0;
-out_env:
- cl_env_put(env, &refcheck);
-
- return rc;
- } else if (S_ISDIR(inode->i_mode)) {
- struct ptlrpc_request *req = NULL;
- struct lov_mds_md *lmm = NULL;
- int lmm_size = 0;
-
- rc = ll_dir_getstripe(inode, (void **)&lmm, &lmm_size,
- &req, 0);
- if (rc < 0)
- goto out_req;
-
- if (!buf_size) {
- rc = lmm_size;
- goto out_req;
- }
-
- if (buf_size < lmm_size) {
- rc = -ERANGE;
- goto out_req;
- }
-
- memcpy(buf, lmm, lmm_size);
- rc = lmm_size;
-out_req:
- if (req)
- ptlrpc_req_finished(req);
-
- return rc;
- } else {
- return -ENODATA;
- }
-}
-
-static int ll_xattr_get(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, void *buffer, size_t size)
-{
- LASSERT(inode);
- LASSERT(name);
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), xattr %s\n",
- PFID(ll_inode2fid(inode)), inode, name);
-
- if (!strcmp(name, "lov")) {
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1);
-
- return ll_getxattr_lov(inode, buffer, size);
- }
-
- return ll_xattr_get_common(handler, dentry, inode, name, buffer, size);
-}
-
-ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size)
-{
- struct inode *inode = d_inode(dentry);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- char *xattr_name;
- ssize_t rc, rc2;
- size_t len, rem;
-
- LASSERT(inode);
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n",
- PFID(ll_inode2fid(inode)), inode);
-
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LISTXATTR, 1);
-
- rc = ll_xattr_list(inode, NULL, XATTR_OTHER_T, buffer, size,
- OBD_MD_FLXATTRLS);
- if (rc < 0)
- return rc;
-
- /*
- * If we're being called to get the size of the xattr list
- * (size == 0) then just assume that a lustre.lov xattr
- * exists.
- */
- if (!size)
- return rc + sizeof(XATTR_LUSTRE_LOV);
-
- xattr_name = buffer;
- rem = rc;
-
- while (rem > 0) {
- len = strnlen(xattr_name, rem - 1) + 1;
- rem -= len;
- if (!xattr_type_filter(sbi, get_xattr_type(xattr_name))) {
- /* Skip OK xattr type, leave it in buffer. */
- xattr_name += len;
- continue;
- }
-
- /*
- * Move up remaining xattrs in buffer
- * removing the xattr that is not OK.
- */
- memmove(xattr_name, xattr_name + len, rem);
- rc -= len;
- }
-
- rc2 = ll_getxattr_lov(inode, NULL, 0);
- if (rc2 == -ENODATA)
- return rc;
-
- if (rc2 < 0)
- return rc2;
-
- if (size < rc + sizeof(XATTR_LUSTRE_LOV))
- return -ERANGE;
-
- memcpy(buffer + rc, XATTR_LUSTRE_LOV, sizeof(XATTR_LUSTRE_LOV));
-
- return rc + sizeof(XATTR_LUSTRE_LOV);
-}
-
-static const struct xattr_handler ll_user_xattr_handler = {
- .prefix = XATTR_USER_PREFIX,
- .flags = XATTR_USER_T,
- .get = ll_xattr_get_common,
- .set = ll_xattr_set_common,
-};
-
-static const struct xattr_handler ll_trusted_xattr_handler = {
- .prefix = XATTR_TRUSTED_PREFIX,
- .flags = XATTR_TRUSTED_T,
- .get = ll_xattr_get,
- .set = ll_xattr_set,
-};
-
-static const struct xattr_handler ll_security_xattr_handler = {
- .prefix = XATTR_SECURITY_PREFIX,
- .flags = XATTR_SECURITY_T,
- .get = ll_xattr_get_common,
- .set = ll_xattr_set_common,
-};
-
-static const struct xattr_handler ll_acl_access_xattr_handler = {
- .name = XATTR_NAME_POSIX_ACL_ACCESS,
- .flags = XATTR_ACL_ACCESS_T,
- .get = ll_xattr_get_common,
- .set = ll_xattr_set_common,
-};
-
-static const struct xattr_handler ll_acl_default_xattr_handler = {
- .name = XATTR_NAME_POSIX_ACL_DEFAULT,
- .flags = XATTR_ACL_DEFAULT_T,
- .get = ll_xattr_get_common,
- .set = ll_xattr_set_common,
-};
-
-static const struct xattr_handler ll_lustre_xattr_handler = {
- .prefix = XATTR_LUSTRE_PREFIX,
- .flags = XATTR_LUSTRE_T,
- .get = ll_xattr_get,
- .set = ll_xattr_set,
-};
-
-const struct xattr_handler *ll_xattr_handlers[] = {
- &ll_user_xattr_handler,
- &ll_trusted_xattr_handler,
- &ll_security_xattr_handler,
-#ifdef CONFIG_FS_POSIX_ACL
- &ll_acl_access_xattr_handler,
- &ll_acl_default_xattr_handler,
-#endif
- &ll_lustre_xattr_handler,
- NULL,
-};
diff --git a/drivers/staging/lustre/lustre/llite/xattr_cache.c b/drivers/staging/lustre/lustre/llite/xattr_cache.c
deleted file mode 100644
index 5da69ba088c4..000000000000
--- a/drivers/staging/lustre/lustre/llite/xattr_cache.c
+++ /dev/null
@@ -1,504 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright 2012 Xyratex Technology Limited
- *
- * Copyright (c) 2013, 2015, Intel Corporation.
- *
- * Author: Andrew Perepechko <Andrew_Perepechko@xyratex.com>
- *
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <obd_support.h>
-#include <lustre_dlm.h>
-#include "llite_internal.h"
-
-/* If we ever have hundreds of extended attributes, we might want to consider
- * using a hash or a tree structure instead of list for faster lookups.
- */
-struct ll_xattr_entry {
- struct list_head xe_list; /* protected with
- * lli_xattrs_list_rwsem
- */
- char *xe_name; /* xattr name, \0-terminated */
- char *xe_value; /* xattr value */
- unsigned int xe_namelen; /* strlen(xe_name) + 1 */
- unsigned int xe_vallen; /* xattr value length */
-};
-
-static struct kmem_cache *xattr_kmem;
-static struct lu_kmem_descr xattr_caches[] = {
- {
- .ckd_cache = &xattr_kmem,
- .ckd_name = "xattr_kmem",
- .ckd_size = sizeof(struct ll_xattr_entry)
- },
- {
- .ckd_cache = NULL
- }
-};
-
-int ll_xattr_init(void)
-{
- return lu_kmem_init(xattr_caches);
-}
-
-void ll_xattr_fini(void)
-{
- lu_kmem_fini(xattr_caches);
-}
-
-/**
- * Initializes xattr cache for an inode.
- *
- * This initializes the xattr list and marks cache presence.
- */
-static void ll_xattr_cache_init(struct ll_inode_info *lli)
-{
- INIT_LIST_HEAD(&lli->lli_xattrs);
- set_bit(LLIF_XATTR_CACHE, &lli->lli_flags);
-}
-
-/**
- * This looks for a specific extended attribute.
- *
- * Find in @cache and return @xattr_name attribute in @xattr,
- * for the NULL @xattr_name return the first cached @xattr.
- *
- * \retval 0 success
- * \retval -ENODATA if not found
- */
-static int ll_xattr_cache_find(struct list_head *cache,
- const char *xattr_name,
- struct ll_xattr_entry **xattr)
-{
- struct ll_xattr_entry *entry;
-
- list_for_each_entry(entry, cache, xe_list) {
- /* xattr_name == NULL means look for any entry */
- if (!xattr_name || strcmp(xattr_name, entry->xe_name) == 0) {
- *xattr = entry;
- CDEBUG(D_CACHE, "find: [%s]=%.*s\n",
- entry->xe_name, entry->xe_vallen,
- entry->xe_value);
- return 0;
- }
- }
-
- return -ENODATA;
-}
-
-/**
- * This adds an xattr.
- *
- * Add @xattr_name attr with @xattr_val value and @xattr_val_len length,
- *
- * \retval 0 success
- * \retval -ENOMEM if no memory could be allocated for the cached attr
- * \retval -EPROTO if duplicate xattr is being added
- */
-static int ll_xattr_cache_add(struct list_head *cache,
- const char *xattr_name,
- const char *xattr_val,
- unsigned int xattr_val_len)
-{
- struct ll_xattr_entry *xattr;
-
- if (ll_xattr_cache_find(cache, xattr_name, &xattr) == 0) {
- CDEBUG(D_CACHE, "duplicate xattr: [%s]\n", xattr_name);
- return -EPROTO;
- }
-
- xattr = kmem_cache_zalloc(xattr_kmem, GFP_NOFS);
- if (!xattr) {
- CDEBUG(D_CACHE, "failed to allocate xattr\n");
- return -ENOMEM;
- }
-
- xattr->xe_name = kstrdup(xattr_name, GFP_NOFS);
- if (!xattr->xe_name) {
- CDEBUG(D_CACHE, "failed to alloc xattr name %s\n",
- xattr_name);
- goto err_name;
- }
- xattr->xe_namelen = strlen(xattr_name) + 1;
-
- xattr->xe_value = kmemdup(xattr_val, xattr_val_len, GFP_NOFS);
- if (!xattr->xe_value)
- goto err_value;
-
- xattr->xe_vallen = xattr_val_len;
- list_add(&xattr->xe_list, cache);
-
- CDEBUG(D_CACHE, "set: [%s]=%.*s\n", xattr_name, xattr_val_len,
- xattr_val);
-
- return 0;
-err_value:
- kfree(xattr->xe_name);
-err_name:
- kmem_cache_free(xattr_kmem, xattr);
-
- return -ENOMEM;
-}
-
-/**
- * This removes an extended attribute from cache.
- *
- * Remove @xattr_name attribute from @cache.
- *
- * \retval 0 success
- * \retval -ENODATA if @xattr_name is not cached
- */
-static int ll_xattr_cache_del(struct list_head *cache,
- const char *xattr_name)
-{
- struct ll_xattr_entry *xattr;
-
- CDEBUG(D_CACHE, "del xattr: %s\n", xattr_name);
-
- if (ll_xattr_cache_find(cache, xattr_name, &xattr) == 0) {
- list_del(&xattr->xe_list);
- kfree(xattr->xe_name);
- kfree(xattr->xe_value);
- kmem_cache_free(xattr_kmem, xattr);
-
- return 0;
- }
-
- return -ENODATA;
-}
-
-/**
- * This iterates cached extended attributes.
- *
- * Walk over cached attributes in @cache and
- * fill in @xld_buffer or only calculate buffer
- * size if @xld_buffer is NULL.
- *
- * \retval >= 0 buffer list size
- * \retval -ENODATA if the list cannot fit @xld_size buffer
- */
-static int ll_xattr_cache_list(struct list_head *cache,
- char *xld_buffer,
- int xld_size)
-{
- struct ll_xattr_entry *xattr, *tmp;
- int xld_tail = 0;
-
- list_for_each_entry_safe(xattr, tmp, cache, xe_list) {
- CDEBUG(D_CACHE, "list: buffer=%p[%d] name=%s\n",
- xld_buffer, xld_tail, xattr->xe_name);
-
- if (xld_buffer) {
- xld_size -= xattr->xe_namelen;
- if (xld_size < 0)
- break;
- memcpy(&xld_buffer[xld_tail],
- xattr->xe_name, xattr->xe_namelen);
- }
- xld_tail += xattr->xe_namelen;
- }
-
- if (xld_size < 0)
- return -ERANGE;
-
- return xld_tail;
-}
-
-/**
- * Check if the xattr cache is initialized (filled).
- *
- * \retval 0 @cache is not initialized
- * \retval 1 @cache is initialized
- */
-static int ll_xattr_cache_valid(struct ll_inode_info *lli)
-{
- return test_bit(LLIF_XATTR_CACHE, &lli->lli_flags);
-}
-
-/**
- * This finalizes the xattr cache.
- *
- * Free all xattr memory. @lli is the inode info pointer.
- *
- * \retval 0 no error occurred
- */
-static int ll_xattr_cache_destroy_locked(struct ll_inode_info *lli)
-{
- if (!ll_xattr_cache_valid(lli))
- return 0;
-
- while (ll_xattr_cache_del(&lli->lli_xattrs, NULL) == 0)
- ; /* empty loop */
-
- clear_bit(LLIF_XATTR_CACHE, &lli->lli_flags);
-
- return 0;
-}
-
-int ll_xattr_cache_destroy(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- int rc;
-
- down_write(&lli->lli_xattrs_list_rwsem);
- rc = ll_xattr_cache_destroy_locked(lli);
- up_write(&lli->lli_xattrs_list_rwsem);
-
- return rc;
-}
-
-/**
- * Match or enqueue a PR lock.
- *
- * Find or request an LDLM lock with xattr data.
- * Since LDLM does not provide API for atomic match_or_enqueue,
- * the function handles it with a separate enq lock.
- * If successful, the function exits with the list lock held.
- *
- * \retval 0 no error occurred
- * \retval -ENOMEM not enough memory
- */
-static int ll_xattr_find_get_lock(struct inode *inode,
- struct lookup_intent *oit,
- struct ptlrpc_request **req)
-{
- enum ldlm_mode mode;
- struct lustre_handle lockh = { 0 };
- struct md_op_data *op_data;
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct obd_export *exp = sbi->ll_md_exp;
- int rc;
-
- mutex_lock(&lli->lli_xattrs_enq_lock);
- /* inode may have been shrunk and recreated, so data is gone, match lock
- * only when data exists.
- */
- if (ll_xattr_cache_valid(lli)) {
- /* Try matching first. */
- mode = ll_take_md_lock(inode, MDS_INODELOCK_XATTR, &lockh, 0,
- LCK_PR);
- if (mode != 0) {
- /* fake oit in mdc_revalidate_lock() manner */
- oit->it_lock_handle = lockh.cookie;
- oit->it_lock_mode = mode;
- goto out;
- }
- }
-
- /* Enqueue if the lock isn't cached locally. */
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data)) {
- mutex_unlock(&lli->lli_xattrs_enq_lock);
- return PTR_ERR(op_data);
- }
-
- op_data->op_valid = OBD_MD_FLXATTR | OBD_MD_FLXATTRLS;
-
- rc = md_intent_lock(exp, op_data, oit, req, &ll_md_blocking_ast, 0);
- ll_finish_md_op_data(op_data);
- *req = oit->it_request;
-
- if (rc < 0) {
- CDEBUG(D_CACHE,
- "md_intent_lock failed with %d for fid " DFID "\n",
- rc, PFID(ll_inode2fid(inode)));
- mutex_unlock(&lli->lli_xattrs_enq_lock);
- return rc;
- }
-
-out:
- down_write(&lli->lli_xattrs_list_rwsem);
- mutex_unlock(&lli->lli_xattrs_enq_lock);
-
- return 0;
-}
-
-/**
- * Refill the xattr cache.
- *
- * Fetch and cache the whole of xattrs for @inode, acquiring a read lock.
- *
- * \retval 0 no error occurred
- * \retval -EPROTO network protocol error
- * \retval -ENOMEM not enough memory for the cache
- */
-static int ll_xattr_cache_refill(struct inode *inode)
-{
- struct lookup_intent oit = { .it_op = IT_GETXATTR };
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *req = NULL;
- const char *xdata, *xval, *xtail, *xvtail;
- struct ll_inode_info *lli = ll_i2info(inode);
- struct mdt_body *body;
- __u32 *xsizes;
- int rc, i;
-
- rc = ll_xattr_find_get_lock(inode, &oit, &req);
- if (rc)
- goto err_req;
-
- /* Do we have the data at this point? */
- if (ll_xattr_cache_valid(lli)) {
- ll_stats_ops_tally(sbi, LPROC_LL_GETXATTR_HITS, 1);
- ll_intent_drop_lock(&oit);
- rc = 0;
- goto err_req;
- }
-
- /* Matched but no cache? Cancelled on error by a parallel refill. */
- if (unlikely(!req)) {
- CDEBUG(D_CACHE, "cancelled by a parallel getxattr\n");
- ll_intent_drop_lock(&oit);
- rc = -EAGAIN;
- goto err_unlock;
- }
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (!body) {
- CERROR("no MDT BODY in the refill xattr reply\n");
- rc = -EPROTO;
- goto err_cancel;
- }
- /* do not need swab xattr data */
- xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA,
- body->mbo_eadatasize);
- xval = req_capsule_server_sized_get(&req->rq_pill, &RMF_EAVALS,
- body->mbo_aclsize);
- xsizes = req_capsule_server_sized_get(&req->rq_pill, &RMF_EAVALS_LENS,
- body->mbo_max_mdsize * sizeof(__u32));
- if (!xdata || !xval || !xsizes) {
- CERROR("wrong setxattr reply\n");
- rc = -EPROTO;
- goto err_cancel;
- }
-
- xtail = xdata + body->mbo_eadatasize;
- xvtail = xval + body->mbo_aclsize;
-
- CDEBUG(D_CACHE, "caching: xdata=%p xtail=%p\n", xdata, xtail);
-
- ll_xattr_cache_init(lli);
-
- for (i = 0; i < body->mbo_max_mdsize; i++) {
- CDEBUG(D_CACHE, "caching [%s]=%.*s\n", xdata, *xsizes, xval);
- /* Perform consistency checks: attr names and vals in pill */
- if (!memchr(xdata, 0, xtail - xdata)) {
- CERROR("xattr protocol violation (names are broken)\n");
- rc = -EPROTO;
- } else if (xval + *xsizes > xvtail) {
- CERROR("xattr protocol violation (vals are broken)\n");
- rc = -EPROTO;
- } else if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_XATTR_ENOMEM)) {
- rc = -ENOMEM;
- } else if (!strcmp(xdata, XATTR_NAME_ACL_ACCESS)) {
- /* Filter out ACL ACCESS since it's cached separately */
- CDEBUG(D_CACHE, "not caching %s\n",
- XATTR_NAME_ACL_ACCESS);
- rc = 0;
- } else if (!strcmp(xdata, "security.selinux")) {
- /* Filter out security.selinux, it is cached in slab */
- CDEBUG(D_CACHE, "not caching security.selinux\n");
- rc = 0;
- } else {
- rc = ll_xattr_cache_add(&lli->lli_xattrs, xdata, xval,
- *xsizes);
- }
- if (rc < 0) {
- ll_xattr_cache_destroy_locked(lli);
- goto err_cancel;
- }
- xdata += strlen(xdata) + 1;
- xval += *xsizes;
- xsizes++;
- }
-
- if (xdata != xtail || xval != xvtail)
- CERROR("a hole in xattr data\n");
-
- ll_set_lock_data(sbi->ll_md_exp, inode, &oit, NULL);
- ll_intent_drop_lock(&oit);
-
- ptlrpc_req_finished(req);
- return rc;
-
-err_cancel:
- ldlm_lock_decref_and_cancel((struct lustre_handle *)
- &oit.it_lock_handle,
- oit.it_lock_mode);
-err_unlock:
- up_write(&lli->lli_xattrs_list_rwsem);
-err_req:
- if (rc == -ERANGE)
- rc = -EAGAIN;
-
- ptlrpc_req_finished(req);
- return rc;
-}
-
-/**
- * Get an xattr value or list xattrs using the write-through cache.
- *
- * Get the xattr value (@valid has OBD_MD_FLXATTR set) of @name or
- * list xattr names (@valid has OBD_MD_FLXATTRLS set) for @inode.
- * The resulting value/list is stored in @buffer if the former
- * is not larger than @size.
- *
- * \retval 0 no error occurred
- * \retval -EPROTO network protocol error
- * \retval -ENOMEM not enough memory for the cache
- * \retval -ERANGE the buffer is not large enough
- * \retval -ENODATA no such attr or the list is empty
- */
-int ll_xattr_cache_get(struct inode *inode, const char *name, char *buffer,
- size_t size, __u64 valid)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- int rc = 0;
-
- LASSERT(!!(valid & OBD_MD_FLXATTR) ^ !!(valid & OBD_MD_FLXATTRLS));
-
- down_read(&lli->lli_xattrs_list_rwsem);
- if (!ll_xattr_cache_valid(lli)) {
- up_read(&lli->lli_xattrs_list_rwsem);
- rc = ll_xattr_cache_refill(inode);
- if (rc)
- return rc;
- downgrade_write(&lli->lli_xattrs_list_rwsem);
- } else {
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR_HITS, 1);
- }
-
- if (valid & OBD_MD_FLXATTR) {
- struct ll_xattr_entry *xattr;
-
- rc = ll_xattr_cache_find(&lli->lli_xattrs, name, &xattr);
- if (rc == 0) {
- rc = xattr->xe_vallen;
- /* zero size means we are only requested size in rc */
- if (size != 0) {
- if (size >= xattr->xe_vallen)
- memcpy(buffer, xattr->xe_value,
- xattr->xe_vallen);
- else
- rc = -ERANGE;
- }
- }
- } else if (valid & OBD_MD_FLXATTRLS) {
- rc = ll_xattr_cache_list(&lli->lli_xattrs,
- size ? buffer : NULL, size);
- }
-
- goto out;
-out:
- up_read(&lli->lli_xattrs_list_rwsem);
-
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/llite/xattr_security.c b/drivers/staging/lustre/lustre/llite/xattr_security.c
deleted file mode 100644
index 93ec07531ac7..000000000000
--- a/drivers/staging/lustre/lustre/llite/xattr_security.c
+++ /dev/null
@@ -1,96 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * GPL HEADER END
- */
-
-/*
- * Copyright (c) 2014 Bull SAS
- * Author: Sebastien Buisson sebastien.buisson@bull.net
- */
-
-/*
- * lustre/llite/xattr_security.c
- * Handler for storing security labels as extended attributes.
- */
-
-#include <linux/types.h>
-#include <linux/security.h>
-#include <linux/selinux.h>
-#include <linux/xattr.h>
-#include "llite_internal.h"
-
-/**
- * A helper function for ll_security_inode_init_security()
- * that takes care of setting xattrs
- *
- * Get security context of @inode from @xattr_array,
- * and put it in 'security.xxx' xattr of dentry
- * stored in @fs_info.
- *
- * \retval 0 success
- * \retval -ENOMEM if no memory could be allocated for xattr name
- * \retval < 0 failure to set xattr
- */
-static int
-ll_initxattrs(struct inode *inode, const struct xattr *xattr_array,
- void *fs_info)
-{
- struct dentry *dentry = fs_info;
- const struct xattr *xattr;
- int err = 0;
-
- for (xattr = xattr_array; xattr->name; xattr++) {
- char *full_name;
-
- full_name = kasprintf(GFP_KERNEL, "%s%s",
- XATTR_SECURITY_PREFIX, xattr->name);
- if (!full_name) {
- err = -ENOMEM;
- break;
- }
-
- err = __vfs_setxattr(dentry, inode, full_name, xattr->value,
- xattr->value_len, XATTR_CREATE);
- kfree(full_name);
- if (err < 0)
- break;
- }
- return err;
-}
-
-/**
- * Initializes security context
- *
- * Get security context of @inode in @dir,
- * and put it in 'security.xxx' xattr of @dentry.
- *
- * \retval 0 success, or SELinux is disabled
- * \retval -ENOMEM if no memory could be allocated for xattr name
- * \retval < 0 failure to get security context or set xattr
- */
-int
-ll_init_security(struct dentry *dentry, struct inode *inode, struct inode *dir)
-{
- if (!selinux_is_enabled())
- return 0;
-
- return security_inode_init_security(inode, dir, NULL,
- &ll_initxattrs, dentry);
-}
diff --git a/drivers/staging/lustre/lustre/lmv/Makefile b/drivers/staging/lustre/lustre/lmv/Makefile
deleted file mode 100644
index 91c99114aa13..000000000000
--- a/drivers/staging/lustre/lustre/lmv/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LUSTRE_FS) += lmv.o
-lmv-y := lmv_obd.o lmv_intent.o lmv_fld.o lproc_lmv.o
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_fld.c b/drivers/staging/lustre/lustre/lmv/lmv_fld.c
deleted file mode 100644
index 00dc858c10c9..000000000000
--- a/drivers/staging/lustre/lustre/lmv/lmv_fld.c
+++ /dev/null
@@ -1,82 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2013, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LMV
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/pagemap.h>
-#include <asm/div64.h>
-#include <linux/seq_file.h>
-
-#include <obd_support.h>
-#include <lustre_fid.h>
-#include <lustre_lib.h>
-#include <lustre_net.h>
-#include <lustre_dlm.h>
-#include <obd_class.h>
-#include <lprocfs_status.h>
-#include "lmv_internal.h"
-
-int lmv_fld_lookup(struct lmv_obd *lmv, const struct lu_fid *fid, u32 *mds)
-{
- struct obd_device *obd = lmv2obd_dev(lmv);
- int rc;
-
- /*
- * FIXME: Currently ZFS still use local seq for ROOT unfortunately, and
- * this fid_is_local check should be removed once LU-2240 is fixed
- */
- if (!fid_is_sane(fid) || !(fid_seq_in_fldb(fid_seq(fid)) ||
- fid_seq_is_local_file(fid_seq(fid)))) {
- CERROR("%s: invalid FID " DFID "\n", obd->obd_name, PFID(fid));
- return -EINVAL;
- }
-
- rc = fld_client_lookup(&lmv->lmv_fld, fid_seq(fid), mds,
- LU_SEQ_RANGE_MDT, NULL);
- if (rc) {
- CERROR("Error while looking for mds number. Seq %#llx, err = %d\n",
- fid_seq(fid), rc);
- return rc;
- }
-
- CDEBUG(D_INODE, "FLD lookup got mds #%x for fid=" DFID "\n",
- *mds, PFID(fid));
-
- if (*mds >= lmv->desc.ld_tgt_count) {
- CERROR("FLD lookup got invalid mds #%x (max: %x) for fid=" DFID "\n", *mds, lmv->desc.ld_tgt_count,
- PFID(fid));
- rc = -EINVAL;
- }
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_intent.c b/drivers/staging/lustre/lustre/lmv/lmv_intent.c
deleted file mode 100644
index 1e850fdbc623..000000000000
--- a/drivers/staging/lustre/lustre/lmv/lmv_intent.c
+++ /dev/null
@@ -1,521 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LMV
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/pagemap.h>
-#include <asm/div64.h>
-#include <linux/seq_file.h>
-#include <linux/namei.h>
-#include <lustre_intent.h>
-#include <obd_support.h>
-#include <lustre_lib.h>
-#include <lustre_net.h>
-#include <lustre_dlm.h>
-#include <lustre_mdc.h>
-#include <obd_class.h>
-#include <lprocfs_status.h>
-#include "lmv_internal.h"
-
-static int lmv_intent_remote(struct obd_export *exp, struct lookup_intent *it,
- const struct lu_fid *parent_fid,
- struct ptlrpc_request **reqp,
- ldlm_blocking_callback cb_blocking,
- __u64 extra_lock_flags)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct ptlrpc_request *req = NULL;
- struct lustre_handle plock;
- struct md_op_data *op_data;
- struct lmv_tgt_desc *tgt;
- struct mdt_body *body;
- int pmode;
- int rc = 0;
-
- body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
- if (!body)
- return -EPROTO;
-
- LASSERT((body->mbo_valid & OBD_MD_MDS));
-
- /*
- * Unfortunately, we have to lie to MDC/MDS to retrieve
- * attributes llite needs and provideproper locking.
- */
- if (it->it_op & IT_LOOKUP)
- it->it_op = IT_GETATTR;
-
- /*
- * We got LOOKUP lock, but we really need attrs.
- */
- pmode = it->it_lock_mode;
- if (pmode) {
- plock.cookie = it->it_lock_handle;
- it->it_lock_mode = 0;
- it->it_request = NULL;
- }
-
- LASSERT(fid_is_sane(&body->mbo_fid1));
-
- tgt = lmv_find_target(lmv, &body->mbo_fid1);
- if (IS_ERR(tgt)) {
- rc = PTR_ERR(tgt);
- goto out;
- }
-
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
- if (!op_data) {
- rc = -ENOMEM;
- goto out;
- }
-
- op_data->op_fid1 = body->mbo_fid1;
- /* Sent the parent FID to the remote MDT */
- if (parent_fid) {
- /* The parent fid is only for remote open to
- * check whether the open is from OBF,
- * see mdt_cross_open
- */
- LASSERT(it->it_op & IT_OPEN);
- op_data->op_fid2 = *parent_fid;
- }
-
- op_data->op_bias = MDS_CROSS_REF;
- CDEBUG(D_INODE, "REMOTE_INTENT with fid=" DFID " -> mds #%u\n",
- PFID(&body->mbo_fid1), tgt->ltd_idx);
-
- rc = md_intent_lock(tgt->ltd_exp, op_data, it, &req, cb_blocking,
- extra_lock_flags);
- if (rc)
- goto out_free_op_data;
-
- /*
- * LLite needs LOOKUP lock to track dentry revocation in order to
- * maintain dcache consistency. Thus drop UPDATE|PERM lock here
- * and put LOOKUP in request.
- */
- if (it->it_lock_mode != 0) {
- it->it_remote_lock_handle =
- it->it_lock_handle;
- it->it_remote_lock_mode = it->it_lock_mode;
- }
-
- if (pmode) {
- it->it_lock_handle = plock.cookie;
- it->it_lock_mode = pmode;
- }
-
-out_free_op_data:
- kfree(op_data);
-out:
- if (rc && pmode)
- ldlm_lock_decref(&plock, pmode);
-
- ptlrpc_req_finished(*reqp);
- *reqp = req;
- return rc;
-}
-
-int lmv_revalidate_slaves(struct obd_export *exp,
- const struct lmv_stripe_md *lsm,
- ldlm_blocking_callback cb_blocking,
- int extra_lock_flags)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct ptlrpc_request *req = NULL;
- struct mdt_body *body;
- struct md_op_data *op_data;
- int rc = 0, i;
-
- /**
- * revalidate slaves has some problems, temporarily return,
- * we may not need that
- */
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
- if (!op_data)
- return -ENOMEM;
-
- /**
- * Loop over the stripe information, check validity and update them
- * from MDS if needed.
- */
- for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
- struct lookup_intent it = { .it_op = IT_GETATTR };
- struct lustre_handle *lockh = NULL;
- struct lmv_tgt_desc *tgt = NULL;
- struct inode *inode;
- struct lu_fid fid;
-
- fid = lsm->lsm_md_oinfo[i].lmo_fid;
- inode = lsm->lsm_md_oinfo[i].lmo_root;
-
- /*
- * Prepare op_data for revalidating. Note that @fid2 shluld be
- * defined otherwise it will go to server and take new lock
- * which is not needed here.
- */
- memset(op_data, 0, sizeof(*op_data));
- op_data->op_fid1 = fid;
- op_data->op_fid2 = fid;
-
- tgt = lmv_locate_mds(lmv, op_data, &fid);
- if (IS_ERR(tgt)) {
- rc = PTR_ERR(tgt);
- goto cleanup;
- }
-
- CDEBUG(D_INODE, "Revalidate slave " DFID " -> mds #%u\n",
- PFID(&fid), tgt->ltd_idx);
-
- if (req) {
- ptlrpc_req_finished(req);
- req = NULL;
- }
-
- rc = md_intent_lock(tgt->ltd_exp, op_data, &it, &req,
- cb_blocking, extra_lock_flags);
- if (rc < 0)
- goto cleanup;
-
- lockh = (struct lustre_handle *)&it.it_lock_handle;
- if (rc > 0 && !req) {
- /* slave inode is still valid */
- CDEBUG(D_INODE, "slave " DFID " is still valid.\n",
- PFID(&fid));
- rc = 0;
- } else {
- /* refresh slave from server */
- body = req_capsule_server_get(&req->rq_pill,
- &RMF_MDT_BODY);
- if (!body) {
- if (it.it_lock_mode && lockh) {
- ldlm_lock_decref(lockh, it.it_lock_mode);
- it.it_lock_mode = 0;
- }
-
- rc = -ENOENT;
- goto cleanup;
- }
-
- i_size_write(inode, body->mbo_size);
- inode->i_blocks = body->mbo_blocks;
- set_nlink(inode, body->mbo_nlink);
- LTIME_S(inode->i_atime) = body->mbo_atime;
- LTIME_S(inode->i_ctime) = body->mbo_ctime;
- LTIME_S(inode->i_mtime) = body->mbo_mtime;
- }
-
- md_set_lock_data(tgt->ltd_exp, lockh, inode, NULL);
-
- if (it.it_lock_mode && lockh) {
- ldlm_lock_decref(lockh, it.it_lock_mode);
- it.it_lock_mode = 0;
- }
- }
-
-cleanup:
- if (req)
- ptlrpc_req_finished(req);
-
- kfree(op_data);
- return rc;
-}
-
-/*
- * IT_OPEN is intended to open (and create, possible) an object. Parent (pid)
- * may be split dir.
- */
-static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
- struct lookup_intent *it,
- struct ptlrpc_request **reqp,
- ldlm_blocking_callback cb_blocking,
- __u64 extra_lock_flags)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt;
- struct mdt_body *body;
- int rc;
-
- if (it->it_flags & MDS_OPEN_BY_FID) {
- LASSERT(fid_is_sane(&op_data->op_fid2));
-
- /*
- * for striped directory, we can't know parent stripe fid
- * without name, but we can set it to child fid, and MDT
- * will obtain it from linkea in open in such case.
- */
- if (op_data->op_mea1)
- op_data->op_fid1 = op_data->op_fid2;
-
- tgt = lmv_find_target(lmv, &op_data->op_fid2);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- op_data->op_mds = tgt->ltd_idx;
- } else {
- LASSERT(fid_is_sane(&op_data->op_fid1));
- LASSERT(fid_is_zero(&op_data->op_fid2));
- LASSERT(op_data->op_name);
-
- tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
- }
-
- /* If it is ready to open the file by FID, do not need
- * allocate FID at all, otherwise it will confuse MDT
- */
- if ((it->it_op & IT_CREAT) && !(it->it_flags & MDS_OPEN_BY_FID)) {
- /*
- * For lookup(IT_CREATE) cases allocate new fid and setup FLD
- * for it.
- */
- rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
- if (rc != 0)
- return rc;
- }
-
- CDEBUG(D_INODE, "OPEN_INTENT with fid1=" DFID ", fid2=" DFID ", name='%s' -> mds #%u\n",
- PFID(&op_data->op_fid1),
- PFID(&op_data->op_fid2), op_data->op_name, tgt->ltd_idx);
-
- rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp, cb_blocking,
- extra_lock_flags);
- if (rc != 0)
- return rc;
- /*
- * Nothing is found, do not access body->mbo_fid1 as it is zero and thus
- * pointless.
- */
- if ((it->it_disposition & DISP_LOOKUP_NEG) &&
- !(it->it_disposition & DISP_OPEN_CREATE) &&
- !(it->it_disposition & DISP_OPEN_OPEN))
- return rc;
-
- body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
- if (!body)
- return -EPROTO;
-
- /* Not cross-ref case, just get out of here. */
- if (unlikely((body->mbo_valid & OBD_MD_MDS))) {
- rc = lmv_intent_remote(exp, it, &op_data->op_fid1, reqp,
- cb_blocking, extra_lock_flags);
- if (rc != 0)
- return rc;
-
- body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
- if (!body)
- return -EPROTO;
- }
-
- return rc;
-}
-
-/*
- * Handler for: getattr, lookup and revalidate cases.
- */
-static int lmv_intent_lookup(struct obd_export *exp,
- struct md_op_data *op_data,
- struct lookup_intent *it,
- struct ptlrpc_request **reqp,
- ldlm_blocking_callback cb_blocking,
- __u64 extra_lock_flags)
-{
- struct lmv_stripe_md *lsm = op_data->op_mea1;
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt = NULL;
- struct mdt_body *body;
- int rc = 0;
-
- /*
- * If it returns ERR_PTR(-EBADFD) then it is an unknown hash type
- * it will try all stripes to locate the object
- */
- tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
- if (IS_ERR(tgt) && (PTR_ERR(tgt) != -EBADFD))
- return PTR_ERR(tgt);
-
- /*
- * Both migrating dir and unknown hash dir need to try
- * all of sub-stripes
- */
- if (lsm && !lmv_is_known_hash_type(lsm->lsm_md_hash_type)) {
- struct lmv_oinfo *oinfo = &lsm->lsm_md_oinfo[0];
-
- op_data->op_fid1 = oinfo->lmo_fid;
- op_data->op_mds = oinfo->lmo_mds;
- tgt = lmv_get_target(lmv, oinfo->lmo_mds, NULL);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
- }
-
- if (!fid_is_sane(&op_data->op_fid2))
- fid_zero(&op_data->op_fid2);
-
- CDEBUG(D_INODE, "LOOKUP_INTENT with fid1=" DFID ", fid2=" DFID ", name='%s' -> mds #%u lsm=%p lsm_magic=%x\n",
- PFID(&op_data->op_fid1), PFID(&op_data->op_fid2),
- op_data->op_name ? op_data->op_name : "<NULL>",
- tgt->ltd_idx, lsm, !lsm ? -1 : lsm->lsm_md_magic);
-
- op_data->op_bias &= ~MDS_CROSS_REF;
-
- rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp, cb_blocking,
- extra_lock_flags);
- if (rc < 0)
- return rc;
-
- if (!*reqp) {
- /*
- * If RPC happens, lsm information will be revalidated
- * during update_inode process (see ll_update_lsm_md)
- */
- if (op_data->op_mea2) {
- rc = lmv_revalidate_slaves(exp, op_data->op_mea2,
- cb_blocking,
- extra_lock_flags);
- if (rc != 0)
- return rc;
- }
- return rc;
- } else if (it_disposition(it, DISP_LOOKUP_NEG) && lsm &&
- lmv_need_try_all_stripes(lsm)) {
- /*
- * For migrating and unknown hash type directory, it will
- * try to target the entry on other stripes
- */
- int stripe_index;
-
- for (stripe_index = 1;
- stripe_index < lsm->lsm_md_stripe_count &&
- it_disposition(it, DISP_LOOKUP_NEG); stripe_index++) {
- struct lmv_oinfo *oinfo;
-
- /* release the previous request */
- ptlrpc_req_finished(*reqp);
- it->it_request = NULL;
- *reqp = NULL;
-
- oinfo = &lsm->lsm_md_oinfo[stripe_index];
- tgt = lmv_find_target(lmv, &oinfo->lmo_fid);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- CDEBUG(D_INODE, "Try other stripes " DFID "\n",
- PFID(&oinfo->lmo_fid));
-
- op_data->op_fid1 = oinfo->lmo_fid;
- it->it_disposition &= ~DISP_ENQ_COMPLETE;
- rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp,
- cb_blocking, extra_lock_flags);
- if (rc)
- return rc;
- }
- }
-
- if (!it_has_reply_body(it))
- return 0;
-
- /*
- * MDS has returned success. Probably name has been resolved in
- * remote inode. Let's check this.
- */
- body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
- if (!body)
- return -EPROTO;
-
- /* Not cross-ref case, just get out of here. */
- if (unlikely((body->mbo_valid & OBD_MD_MDS))) {
- rc = lmv_intent_remote(exp, it, NULL, reqp, cb_blocking,
- extra_lock_flags);
- if (rc != 0)
- return rc;
- body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
- if (!body)
- return -EPROTO;
- }
-
- return rc;
-}
-
-int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
- struct lookup_intent *it, struct ptlrpc_request **reqp,
- ldlm_blocking_callback cb_blocking,
- __u64 extra_lock_flags)
-{
- int rc;
-
- LASSERT(fid_is_sane(&op_data->op_fid1));
-
- CDEBUG(D_INODE, "INTENT LOCK '%s' for " DFID " '%*s' on " DFID "\n",
- LL_IT2STR(it), PFID(&op_data->op_fid2),
- (int)op_data->op_namelen, op_data->op_name,
- PFID(&op_data->op_fid1));
-
- if (it->it_op & (IT_LOOKUP | IT_GETATTR | IT_LAYOUT | IT_GETXATTR))
- rc = lmv_intent_lookup(exp, op_data, it, reqp, cb_blocking,
- extra_lock_flags);
- else if (it->it_op & IT_OPEN)
- rc = lmv_intent_open(exp, op_data, it, reqp, cb_blocking,
- extra_lock_flags);
- else
- LBUG();
-
- if (rc < 0) {
- struct lustre_handle lock_handle;
-
- if (it->it_lock_mode) {
- lock_handle.cookie = it->it_lock_handle;
- ldlm_lock_decref_and_cancel(&lock_handle,
- it->it_lock_mode);
- }
-
- it->it_lock_handle = 0;
- it->it_lock_mode = 0;
-
- if (it->it_remote_lock_mode) {
- lock_handle.cookie = it->it_remote_lock_handle;
- ldlm_lock_decref_and_cancel(&lock_handle,
- it->it_remote_lock_mode);
- }
-
- it->it_remote_lock_handle = 0;
- it->it_remote_lock_mode = 0;
- }
-
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_internal.h b/drivers/staging/lustre/lustre/lmv/lmv_internal.h
deleted file mode 100644
index 68a99170c424..000000000000
--- a/drivers/staging/lustre/lustre/lmv/lmv_internal.h
+++ /dev/null
@@ -1,164 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef _LMV_INTERNAL_H_
-#define _LMV_INTERNAL_H_
-
-#include <uapi/linux/lustre/lustre_idl.h>
-#include <obd.h>
-#include <lustre_lmv.h>
-
-#define LMV_MAX_TGT_COUNT 128
-
-#define LL_IT2STR(it) \
- ((it) ? ldlm_it2str((it)->it_op) : "0")
-
-int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
- struct lookup_intent *it, struct ptlrpc_request **reqp,
- ldlm_blocking_callback cb_blocking,
- __u64 extra_lock_flags);
-
-int lmv_fld_lookup(struct lmv_obd *lmv, const struct lu_fid *fid, u32 *mds);
-int __lmv_fid_alloc(struct lmv_obd *lmv, struct lu_fid *fid, u32 mds);
-int lmv_fid_alloc(const struct lu_env *env, struct obd_export *exp,
- struct lu_fid *fid, struct md_op_data *op_data);
-
-int lmv_revalidate_slaves(struct obd_export *exp,
- const struct lmv_stripe_md *lsm,
- ldlm_blocking_callback cb_blocking,
- int extra_lock_flags);
-
-static inline struct obd_device *lmv2obd_dev(struct lmv_obd *lmv)
-{
- return container_of_safe(lmv, struct obd_device, u.lmv);
-}
-
-static inline struct lmv_tgt_desc *
-lmv_get_target(struct lmv_obd *lmv, u32 mdt_idx, int *index)
-{
- int i;
-
- for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- if (!lmv->tgts[i])
- continue;
-
- if (lmv->tgts[i]->ltd_idx == mdt_idx) {
- if (index)
- *index = i;
- return lmv->tgts[i];
- }
- }
-
- return ERR_PTR(-ENODEV);
-}
-
-static inline int
-lmv_find_target_index(struct lmv_obd *lmv, const struct lu_fid *fid)
-{
- struct lmv_tgt_desc *ltd;
- u32 mdt_idx = 0;
- int index = 0;
-
- if (lmv->desc.ld_tgt_count > 1) {
- int rc;
-
- rc = lmv_fld_lookup(lmv, fid, &mdt_idx);
- if (rc < 0)
- return rc;
- }
-
- ltd = lmv_get_target(lmv, mdt_idx, &index);
- if (IS_ERR(ltd))
- return PTR_ERR(ltd);
-
- return index;
-}
-
-static inline struct lmv_tgt_desc *
-lmv_find_target(struct lmv_obd *lmv, const struct lu_fid *fid)
-{
- int index;
-
- index = lmv_find_target_index(lmv, fid);
- if (index < 0)
- return ERR_PTR(index);
-
- return lmv->tgts[index];
-}
-
-static inline int lmv_stripe_md_size(int stripe_count)
-{
- struct lmv_stripe_md *lsm;
-
- return sizeof(*lsm) + stripe_count * sizeof(lsm->lsm_md_oinfo[0]);
-}
-
-int lmv_name_to_stripe_index(enum lmv_hash_type hashtype,
- unsigned int max_mdt_index,
- const char *name, int namelen);
-
-static inline const struct lmv_oinfo *
-lsm_name_to_stripe_info(const struct lmv_stripe_md *lsm, const char *name,
- int namelen)
-{
- int stripe_index;
-
- stripe_index = lmv_name_to_stripe_index(lsm->lsm_md_hash_type,
- lsm->lsm_md_stripe_count,
- name, namelen);
- if (stripe_index < 0)
- return ERR_PTR(stripe_index);
-
- LASSERTF(stripe_index < lsm->lsm_md_stripe_count,
- "stripe_index = %d, stripe_count = %d hash_type = %x name = %.*s\n",
- stripe_index, lsm->lsm_md_stripe_count,
- lsm->lsm_md_hash_type, namelen, name);
-
- return &lsm->lsm_md_oinfo[stripe_index];
-}
-
-static inline bool lmv_need_try_all_stripes(const struct lmv_stripe_md *lsm)
-{
- return !lmv_is_known_hash_type(lsm->lsm_md_hash_type) ||
- lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION;
-}
-
-struct lmv_tgt_desc
-*lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data,
- struct lu_fid *fid);
-/* lproc_lmv.c */
-void lprocfs_lmv_init_vars(struct lprocfs_static_vars *lvars);
-
-extern const struct file_operations lmv_proc_target_fops;
-
-#endif
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
deleted file mode 100644
index 65f94e6ecaad..000000000000
--- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c
+++ /dev/null
@@ -1,3131 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LMV
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/pagemap.h>
-#include <linux/mm.h>
-#include <linux/file.h>
-#include <asm/div64.h>
-#include <linux/seq_file.h>
-#include <linux/namei.h>
-#include <linux/uaccess.h>
-
-#include <linux/libcfs/libcfs.h>
-#include <obd_support.h>
-#include <lustre_net.h>
-#include <obd_class.h>
-#include <lustre_lmv.h>
-#include <lprocfs_status.h>
-#include <cl_object.h>
-#include <lustre_fid.h>
-#include <uapi/linux/lustre/lustre_ioctl.h>
-#include <lustre_kernelcomm.h>
-#include "lmv_internal.h"
-
-static int lmv_check_connect(struct obd_device *obd);
-
-static void lmv_activate_target(struct lmv_obd *lmv,
- struct lmv_tgt_desc *tgt,
- int activate)
-{
- if (tgt->ltd_active == activate)
- return;
-
- tgt->ltd_active = activate;
- lmv->desc.ld_active_tgt_count += (activate ? 1 : -1);
- tgt->ltd_exp->exp_obd->obd_inactive = !activate;
-}
-
-/**
- * Error codes:
- *
- * -EINVAL : UUID can't be found in the LMV's target list
- * -ENOTCONN: The UUID is found, but the target connection is bad (!)
- * -EBADF : The UUID is found, but the OBD of the wrong type (!)
- */
-static int lmv_set_mdc_active(struct lmv_obd *lmv, const struct obd_uuid *uuid,
- int activate)
-{
- struct lmv_tgt_desc *tgt = NULL;
- struct obd_device *obd;
- u32 i;
- int rc = 0;
-
- CDEBUG(D_INFO, "Searching in lmv %p for uuid %s (activate=%d)\n",
- lmv, uuid->uuid, activate);
-
- spin_lock(&lmv->lmv_lock);
- for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- tgt = lmv->tgts[i];
- if (!tgt || !tgt->ltd_exp)
- continue;
-
- CDEBUG(D_INFO, "Target idx %d is %s conn %#llx\n", i,
- tgt->ltd_uuid.uuid, tgt->ltd_exp->exp_handle.h_cookie);
-
- if (obd_uuid_equals(uuid, &tgt->ltd_uuid))
- break;
- }
-
- if (i == lmv->desc.ld_tgt_count) {
- rc = -EINVAL;
- goto out_lmv_lock;
- }
-
- obd = class_exp2obd(tgt->ltd_exp);
- if (!obd) {
- rc = -ENOTCONN;
- goto out_lmv_lock;
- }
-
- CDEBUG(D_INFO, "Found OBD %s=%s device %d (%p) type %s at LMV idx %d\n",
- obd->obd_name, obd->obd_uuid.uuid, obd->obd_minor, obd,
- obd->obd_type->typ_name, i);
- LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0);
-
- if (tgt->ltd_active == activate) {
- CDEBUG(D_INFO, "OBD %p already %sactive!\n", obd,
- activate ? "" : "in");
- goto out_lmv_lock;
- }
-
- CDEBUG(D_INFO, "Marking OBD %p %sactive\n", obd,
- activate ? "" : "in");
- lmv_activate_target(lmv, tgt, activate);
-
- out_lmv_lock:
- spin_unlock(&lmv->lmv_lock);
- return rc;
-}
-
-static struct obd_uuid *lmv_get_uuid(struct obd_export *exp)
-{
- struct lmv_obd *lmv = &exp->exp_obd->u.lmv;
- struct lmv_tgt_desc *tgt = lmv->tgts[0];
-
- return tgt ? obd_get_uuid(tgt->ltd_exp) : NULL;
-}
-
-static int lmv_notify(struct obd_device *obd, struct obd_device *watched,
- enum obd_notify_event ev, void *data)
-{
- struct obd_connect_data *conn_data;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct obd_uuid *uuid;
- int rc = 0;
-
- if (strcmp(watched->obd_type->typ_name, LUSTRE_MDC_NAME)) {
- CERROR("unexpected notification of %s %s!\n",
- watched->obd_type->typ_name,
- watched->obd_name);
- return -EINVAL;
- }
-
- uuid = &watched->u.cli.cl_target_uuid;
- if (ev == OBD_NOTIFY_ACTIVE || ev == OBD_NOTIFY_INACTIVE) {
- /*
- * Set MDC as active before notifying the observer, so the
- * observer can use the MDC normally.
- */
- rc = lmv_set_mdc_active(lmv, uuid,
- ev == OBD_NOTIFY_ACTIVE);
- if (rc) {
- CERROR("%sactivation of %s failed: %d\n",
- ev == OBD_NOTIFY_ACTIVE ? "" : "de",
- uuid->uuid, rc);
- return rc;
- }
- } else if (ev == OBD_NOTIFY_OCD) {
- conn_data = &watched->u.cli.cl_import->imp_connect_data;
- /*
- * XXX: Make sure that ocd_connect_flags from all targets are
- * the same. Otherwise one of MDTs runs wrong version or
- * something like this. --umka
- */
- obd->obd_self_export->exp_connect_data = *conn_data;
- }
-
- /*
- * Pass the notification up the chain.
- */
- if (obd->obd_observer)
- rc = obd_notify(obd->obd_observer, watched, ev, data);
-
- return rc;
-}
-
-static int lmv_connect(const struct lu_env *env,
- struct obd_export **pexp, struct obd_device *obd,
- struct obd_uuid *cluuid, struct obd_connect_data *data,
- void *localdata)
-{
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lustre_handle conn = { 0 };
- struct obd_export *exp;
- int rc = 0;
-
- rc = class_connect(&conn, obd, cluuid);
- if (rc) {
- CERROR("class_connection() returned %d\n", rc);
- return rc;
- }
-
- exp = class_conn2export(&conn);
-
- lmv->connected = 0;
- lmv->cluuid = *cluuid;
- lmv->conn_data = *data;
-
- lmv->lmv_tgts_kobj = kobject_create_and_add("target_obds",
- &obd->obd_kobj);
- rc = lmv_check_connect(obd);
- if (rc)
- goto out_sysfs;
-
- *pexp = exp;
-
- return rc;
-
-out_sysfs:
- if (lmv->lmv_tgts_kobj)
- kobject_put(lmv->lmv_tgts_kobj);
-
- class_disconnect(exp);
-
- return rc;
-}
-
-static int lmv_init_ea_size(struct obd_export *exp, u32 easize, u32 def_easize)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- u32 i;
- int rc = 0;
- int change = 0;
-
- if (lmv->max_easize < easize) {
- lmv->max_easize = easize;
- change = 1;
- }
- if (lmv->max_def_easize < def_easize) {
- lmv->max_def_easize = def_easize;
- change = 1;
- }
-
- if (change == 0)
- return 0;
-
- if (lmv->connected == 0)
- return 0;
-
- for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- struct lmv_tgt_desc *tgt = lmv->tgts[i];
-
- if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) {
- CWARN("%s: NULL export for %d\n", obd->obd_name, i);
- continue;
- }
-
- rc = md_init_ea_size(tgt->ltd_exp, easize, def_easize);
- if (rc) {
- CERROR("%s: obd_init_ea_size() failed on MDT target %d: rc = %d\n",
- obd->obd_name, i, rc);
- break;
- }
- }
- return rc;
-}
-
-#define MAX_STRING_SIZE 128
-
-static int lmv_connect_mdc(struct obd_device *obd, struct lmv_tgt_desc *tgt)
-{
- struct lmv_obd *lmv = &obd->u.lmv;
- struct obd_uuid *cluuid = &lmv->cluuid;
- struct obd_uuid lmv_mdc_uuid = { "LMV_MDC_UUID" };
- struct obd_device *mdc_obd;
- struct obd_export *mdc_exp;
- struct lu_fld_target target;
- int rc;
-
- mdc_obd = class_find_client_obd(&tgt->ltd_uuid, LUSTRE_MDC_NAME,
- &obd->obd_uuid);
- if (!mdc_obd) {
- CERROR("target %s not attached\n", tgt->ltd_uuid.uuid);
- return -EINVAL;
- }
-
- CDEBUG(D_CONFIG, "connect to %s(%s) - %s, %s FOR %s\n",
- mdc_obd->obd_name, mdc_obd->obd_uuid.uuid,
- tgt->ltd_uuid.uuid, obd->obd_uuid.uuid, cluuid->uuid);
-
- if (!mdc_obd->obd_set_up) {
- CERROR("target %s is not set up\n", tgt->ltd_uuid.uuid);
- return -EINVAL;
- }
-
- rc = obd_connect(NULL, &mdc_exp, mdc_obd, &lmv_mdc_uuid,
- &lmv->conn_data, NULL);
- if (rc) {
- CERROR("target %s connect error %d\n", tgt->ltd_uuid.uuid, rc);
- return rc;
- }
-
- /*
- * Init fid sequence client for this mdc and add new fld target.
- */
- rc = obd_fid_init(mdc_obd, mdc_exp, LUSTRE_SEQ_METADATA);
- if (rc)
- return rc;
-
- target.ft_srv = NULL;
- target.ft_exp = mdc_exp;
- target.ft_idx = tgt->ltd_idx;
-
- fld_client_add_target(&lmv->lmv_fld, &target);
-
- rc = obd_register_observer(mdc_obd, obd);
- if (rc) {
- obd_disconnect(mdc_exp);
- CERROR("target %s register_observer error %d\n",
- tgt->ltd_uuid.uuid, rc);
- return rc;
- }
-
- if (obd->obd_observer) {
- /*
- * Tell the observer about the new target.
- */
- rc = obd_notify(obd->obd_observer, mdc_exp->exp_obd,
- OBD_NOTIFY_ACTIVE,
- (void *)(tgt - lmv->tgts[0]));
- if (rc) {
- obd_disconnect(mdc_exp);
- return rc;
- }
- }
-
- tgt->ltd_active = 1;
- tgt->ltd_exp = mdc_exp;
- lmv->desc.ld_active_tgt_count++;
-
- md_init_ea_size(tgt->ltd_exp, lmv->max_easize, lmv->max_def_easize);
-
- CDEBUG(D_CONFIG, "Connected to %s(%s) successfully (%d)\n",
- mdc_obd->obd_name, mdc_obd->obd_uuid.uuid,
- atomic_read(&obd->obd_refcount));
-
- if (lmv->lmv_tgts_kobj)
- /* Even if we failed to create the link, that's fine */
- rc = sysfs_create_link(lmv->lmv_tgts_kobj, &mdc_obd->obd_kobj,
- mdc_obd->obd_name);
- return 0;
-}
-
-static void lmv_del_target(struct lmv_obd *lmv, int index)
-{
- if (!lmv->tgts[index])
- return;
-
- kfree(lmv->tgts[index]);
- lmv->tgts[index] = NULL;
-}
-
-static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
- __u32 index, int gen)
-{
- struct lmv_obd *lmv = &obd->u.lmv;
- struct obd_device *mdc_obd;
- struct lmv_tgt_desc *tgt;
- int orig_tgt_count = 0;
- int rc = 0;
-
- CDEBUG(D_CONFIG, "Target uuid: %s. index %d\n", uuidp->uuid, index);
-
- mdc_obd = class_find_client_obd(uuidp, LUSTRE_MDC_NAME,
- &obd->obd_uuid);
- if (!mdc_obd) {
- CERROR("%s: Target %s not attached: rc = %d\n",
- obd->obd_name, uuidp->uuid, -EINVAL);
- return -EINVAL;
- }
-
- mutex_lock(&lmv->lmv_init_mutex);
-
- if ((index < lmv->tgts_size) && lmv->tgts[index]) {
- tgt = lmv->tgts[index];
- CERROR("%s: UUID %s already assigned at LOV target index %d: rc = %d\n",
- obd->obd_name,
- obd_uuid2str(&tgt->ltd_uuid), index, -EEXIST);
- mutex_unlock(&lmv->lmv_init_mutex);
- return -EEXIST;
- }
-
- if (index >= lmv->tgts_size) {
- /* We need to reallocate the lmv target array. */
- struct lmv_tgt_desc **newtgts, **old = NULL;
- __u32 newsize = 1;
- __u32 oldsize = 0;
-
- while (newsize < index + 1)
- newsize <<= 1;
- newtgts = kcalloc(newsize, sizeof(*newtgts), GFP_NOFS);
- if (!newtgts) {
- mutex_unlock(&lmv->lmv_init_mutex);
- return -ENOMEM;
- }
-
- if (lmv->tgts_size) {
- memcpy(newtgts, lmv->tgts,
- sizeof(*newtgts) * lmv->tgts_size);
- old = lmv->tgts;
- oldsize = lmv->tgts_size;
- }
-
- lmv->tgts = newtgts;
- lmv->tgts_size = newsize;
- smp_rmb();
- kfree(old);
-
- CDEBUG(D_CONFIG, "tgts: %p size: %d\n", lmv->tgts,
- lmv->tgts_size);
- }
-
- tgt = kzalloc(sizeof(*tgt), GFP_NOFS);
- if (!tgt) {
- mutex_unlock(&lmv->lmv_init_mutex);
- return -ENOMEM;
- }
-
- mutex_init(&tgt->ltd_fid_mutex);
- tgt->ltd_idx = index;
- tgt->ltd_uuid = *uuidp;
- tgt->ltd_active = 0;
- lmv->tgts[index] = tgt;
- if (index >= lmv->desc.ld_tgt_count) {
- orig_tgt_count = lmv->desc.ld_tgt_count;
- lmv->desc.ld_tgt_count = index + 1;
- }
-
- if (!lmv->connected) {
- /* lmv_check_connect() will connect this target. */
- mutex_unlock(&lmv->lmv_init_mutex);
- return rc;
- }
-
- /* Otherwise let's connect it ourselves */
- mutex_unlock(&lmv->lmv_init_mutex);
- rc = lmv_connect_mdc(obd, tgt);
- if (rc) {
- spin_lock(&lmv->lmv_lock);
- if (lmv->desc.ld_tgt_count == index + 1)
- lmv->desc.ld_tgt_count = orig_tgt_count;
- memset(tgt, 0, sizeof(*tgt));
- spin_unlock(&lmv->lmv_lock);
- } else {
- int easize = sizeof(struct lmv_stripe_md) +
- lmv->desc.ld_tgt_count * sizeof(struct lu_fid);
- lmv_init_ea_size(obd->obd_self_export, easize, 0);
- }
-
- return rc;
-}
-
-static int lmv_check_connect(struct obd_device *obd)
-{
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt;
- u32 i;
- int rc;
- int easize;
-
- if (lmv->connected)
- return 0;
-
- mutex_lock(&lmv->lmv_init_mutex);
- if (lmv->connected) {
- mutex_unlock(&lmv->lmv_init_mutex);
- return 0;
- }
-
- if (lmv->desc.ld_tgt_count == 0) {
- mutex_unlock(&lmv->lmv_init_mutex);
- CERROR("%s: no targets configured.\n", obd->obd_name);
- return -EINVAL;
- }
-
- LASSERT(lmv->tgts);
-
- if (!lmv->tgts[0]) {
- mutex_unlock(&lmv->lmv_init_mutex);
- CERROR("%s: no target configured for index 0.\n",
- obd->obd_name);
- return -EINVAL;
- }
-
- CDEBUG(D_CONFIG, "Time to connect %s to %s\n",
- lmv->cluuid.uuid, obd->obd_name);
-
- for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- tgt = lmv->tgts[i];
- if (!tgt)
- continue;
- rc = lmv_connect_mdc(obd, tgt);
- if (rc)
- goto out_disc;
- }
-
- lmv->connected = 1;
- easize = lmv_mds_md_size(lmv->desc.ld_tgt_count, LMV_MAGIC);
- lmv_init_ea_size(obd->obd_self_export, easize, 0);
- mutex_unlock(&lmv->lmv_init_mutex);
- return 0;
-
- out_disc:
- while (i-- > 0) {
- int rc2;
-
- tgt = lmv->tgts[i];
- if (!tgt)
- continue;
- tgt->ltd_active = 0;
- if (tgt->ltd_exp) {
- --lmv->desc.ld_active_tgt_count;
- rc2 = obd_disconnect(tgt->ltd_exp);
- if (rc2) {
- CERROR("LMV target %s disconnect on MDC idx %d: error %d\n",
- tgt->ltd_uuid.uuid, i, rc2);
- }
- }
- }
-
- mutex_unlock(&lmv->lmv_init_mutex);
- return rc;
-}
-
-static int lmv_disconnect_mdc(struct obd_device *obd, struct lmv_tgt_desc *tgt)
-{
- struct lmv_obd *lmv = &obd->u.lmv;
- struct obd_device *mdc_obd;
- int rc;
-
- mdc_obd = class_exp2obd(tgt->ltd_exp);
-
- if (mdc_obd) {
- mdc_obd->obd_force = obd->obd_force;
- mdc_obd->obd_fail = obd->obd_fail;
- mdc_obd->obd_no_recov = obd->obd_no_recov;
-
- if (lmv->lmv_tgts_kobj)
- sysfs_remove_link(lmv->lmv_tgts_kobj,
- mdc_obd->obd_name);
- }
-
- rc = obd_fid_fini(tgt->ltd_exp->exp_obd);
- if (rc)
- CERROR("Can't finalize fids factory\n");
-
- CDEBUG(D_INFO, "Disconnected from %s(%s) successfully\n",
- tgt->ltd_exp->exp_obd->obd_name,
- tgt->ltd_exp->exp_obd->obd_uuid.uuid);
-
- obd_register_observer(tgt->ltd_exp->exp_obd, NULL);
- rc = obd_disconnect(tgt->ltd_exp);
- if (rc) {
- if (tgt->ltd_active) {
- CERROR("Target %s disconnect error %d\n",
- tgt->ltd_uuid.uuid, rc);
- }
- }
-
- lmv_activate_target(lmv, tgt, 0);
- tgt->ltd_exp = NULL;
- return 0;
-}
-
-static int lmv_disconnect(struct obd_export *exp)
-{
- struct obd_device *obd = class_exp2obd(exp);
- struct lmv_obd *lmv = &obd->u.lmv;
- int rc;
- u32 i;
-
- if (!lmv->tgts)
- goto out_local;
-
- for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp)
- continue;
-
- lmv_disconnect_mdc(obd, lmv->tgts[i]);
- }
-
- if (lmv->lmv_tgts_kobj)
- kobject_put(lmv->lmv_tgts_kobj);
-
-out_local:
- /*
- * This is the case when no real connection is established by
- * lmv_check_connect().
- */
- if (!lmv->connected)
- class_export_put(exp);
- rc = class_disconnect(exp);
- lmv->connected = 0;
- return rc;
-}
-
-static int lmv_fid2path(struct obd_export *exp, int len, void *karg,
- void __user *uarg)
-{
- struct obd_device *obddev = class_exp2obd(exp);
- struct lmv_obd *lmv = &obddev->u.lmv;
- struct getinfo_fid2path *gf;
- struct lmv_tgt_desc *tgt;
- struct getinfo_fid2path *remote_gf = NULL;
- int remote_gf_size = 0;
- int rc;
-
- gf = karg;
- tgt = lmv_find_target(lmv, &gf->gf_fid);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
-repeat_fid2path:
- rc = obd_iocontrol(OBD_IOC_FID2PATH, tgt->ltd_exp, len, gf, uarg);
- if (rc != 0 && rc != -EREMOTE)
- goto out_fid2path;
-
- /* If remote_gf != NULL, it means just building the
- * path on the remote MDT, copy this path segment to gf
- */
- if (remote_gf) {
- struct getinfo_fid2path *ori_gf;
- char *ptr;
-
- ori_gf = karg;
- if (strlen(ori_gf->gf_path) + 1 +
- strlen(gf->gf_path) + 1 > ori_gf->gf_pathlen) {
- rc = -EOVERFLOW;
- goto out_fid2path;
- }
-
- ptr = ori_gf->gf_path;
-
- memmove(ptr + strlen(gf->gf_path) + 1, ptr,
- strlen(ori_gf->gf_path));
-
- strncpy(ptr, gf->gf_path, strlen(gf->gf_path));
- ptr += strlen(gf->gf_path);
- *ptr = '/';
- }
-
- CDEBUG(D_INFO, "%s: get path %s " DFID " rec: %llu ln: %u\n",
- tgt->ltd_exp->exp_obd->obd_name,
- gf->gf_path, PFID(&gf->gf_fid), gf->gf_recno,
- gf->gf_linkno);
-
- if (rc == 0)
- goto out_fid2path;
-
- /* sigh, has to go to another MDT to do path building further */
- if (!remote_gf) {
- remote_gf_size = sizeof(*remote_gf) + PATH_MAX;
- remote_gf = kzalloc(remote_gf_size, GFP_NOFS);
- if (!remote_gf) {
- rc = -ENOMEM;
- goto out_fid2path;
- }
- remote_gf->gf_pathlen = PATH_MAX;
- }
-
- if (!fid_is_sane(&gf->gf_fid)) {
- CERROR("%s: invalid FID " DFID ": rc = %d\n",
- tgt->ltd_exp->exp_obd->obd_name,
- PFID(&gf->gf_fid), -EINVAL);
- rc = -EINVAL;
- goto out_fid2path;
- }
-
- tgt = lmv_find_target(lmv, &gf->gf_fid);
- if (IS_ERR(tgt)) {
- rc = -EINVAL;
- goto out_fid2path;
- }
-
- remote_gf->gf_fid = gf->gf_fid;
- remote_gf->gf_recno = -1;
- remote_gf->gf_linkno = -1;
- memset(remote_gf->gf_path, 0, remote_gf->gf_pathlen);
- gf = remote_gf;
- goto repeat_fid2path;
-
-out_fid2path:
- kfree(remote_gf);
- return rc;
-}
-
-static int lmv_hsm_req_count(struct lmv_obd *lmv,
- const struct hsm_user_request *hur,
- const struct lmv_tgt_desc *tgt_mds)
-{
- u32 i, nr = 0;
- struct lmv_tgt_desc *curr_tgt;
-
- /* count how many requests must be sent to the given target */
- for (i = 0; i < hur->hur_request.hr_itemcount; i++) {
- curr_tgt = lmv_find_target(lmv, &hur->hur_user_item[i].hui_fid);
- if (IS_ERR(curr_tgt))
- return PTR_ERR(curr_tgt);
- if (obd_uuid_equals(&curr_tgt->ltd_uuid, &tgt_mds->ltd_uuid))
- nr++;
- }
- return nr;
-}
-
-static int lmv_hsm_req_build(struct lmv_obd *lmv,
- struct hsm_user_request *hur_in,
- const struct lmv_tgt_desc *tgt_mds,
- struct hsm_user_request *hur_out)
-{
- int i, nr_out;
- struct lmv_tgt_desc *curr_tgt;
-
- /* build the hsm_user_request for the given target */
- hur_out->hur_request = hur_in->hur_request;
- nr_out = 0;
- for (i = 0; i < hur_in->hur_request.hr_itemcount; i++) {
- curr_tgt = lmv_find_target(lmv,
- &hur_in->hur_user_item[i].hui_fid);
- if (IS_ERR(curr_tgt))
- return PTR_ERR(curr_tgt);
- if (obd_uuid_equals(&curr_tgt->ltd_uuid, &tgt_mds->ltd_uuid)) {
- hur_out->hur_user_item[nr_out] =
- hur_in->hur_user_item[i];
- nr_out++;
- }
- }
- hur_out->hur_request.hr_itemcount = nr_out;
- memcpy(hur_data(hur_out), hur_data(hur_in),
- hur_in->hur_request.hr_data_len);
-
- return 0;
-}
-
-static int lmv_hsm_ct_unregister(struct lmv_obd *lmv, unsigned int cmd, int len,
- struct lustre_kernelcomm *lk,
- void __user *uarg)
-{
- __u32 i;
-
- /* unregister request (call from llapi_hsm_copytool_fini) */
- for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- struct lmv_tgt_desc *tgt = lmv->tgts[i];
-
- if (!tgt || !tgt->ltd_exp)
- continue;
-
- /* best effort: try to clean as much as possible
- * (continue on error)
- */
- obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp, len, lk, uarg);
- }
-
- /* Whatever the result, remove copytool from kuc groups.
- * Unreached coordinators will get EPIPE on next requests
- * and will unregister automatically.
- */
- return libcfs_kkuc_group_rem(lk->lk_uid, lk->lk_group);
-}
-
-static int lmv_hsm_ct_register(struct lmv_obd *lmv, unsigned int cmd, int len,
- struct lustre_kernelcomm *lk, void __user *uarg)
-{
- struct file *filp;
- __u32 i, j;
- int err, rc = 0;
- bool any_set = false;
- struct kkuc_ct_data kcd = { 0 };
-
- /* All or nothing: try to register to all MDS.
- * In case of failure, unregister from previous MDS,
- * except if it because of inactive target.
- */
- for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- struct lmv_tgt_desc *tgt = lmv->tgts[i];
-
- if (!tgt || !tgt->ltd_exp)
- continue;
-
- err = obd_iocontrol(cmd, tgt->ltd_exp, len, lk, uarg);
- if (err) {
- if (tgt->ltd_active) {
- /* permanent error */
- CERROR("error: iocontrol MDC %s on MDTidx %d cmd %x: err = %d\n",
- tgt->ltd_uuid.uuid, i, cmd, err);
- rc = err;
- lk->lk_flags |= LK_FLG_STOP;
- /* unregister from previous MDS */
- for (j = 0; j < i; j++) {
- tgt = lmv->tgts[j];
-
- if (!tgt || !tgt->ltd_exp)
- continue;
- obd_iocontrol(cmd, tgt->ltd_exp, len,
- lk, uarg);
- }
- return rc;
- }
- /* else: transient error.
- * kuc will register to the missing MDT when it is back
- */
- } else {
- any_set = true;
- }
- }
-
- if (!any_set)
- /* no registration done: return error */
- return -ENOTCONN;
-
- /* at least one registration done, with no failure */
- filp = fget(lk->lk_wfd);
- if (!filp)
- return -EBADF;
-
- kcd.kcd_magic = KKUC_CT_DATA_MAGIC;
- kcd.kcd_uuid = lmv->cluuid;
- kcd.kcd_archive = lk->lk_data;
-
- rc = libcfs_kkuc_group_add(filp, lk->lk_uid, lk->lk_group,
- &kcd, sizeof(kcd));
- if (rc)
- fput(filp);
-
- return rc;
-}
-
-static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
- int len, void *karg, void __user *uarg)
-{
- struct obd_device *obddev = class_exp2obd(exp);
- struct lmv_obd *lmv = &obddev->u.lmv;
- struct lmv_tgt_desc *tgt = NULL;
- u32 i = 0;
- int rc = 0;
- int set = 0;
- u32 count = lmv->desc.ld_tgt_count;
-
- if (count == 0)
- return -ENOTTY;
-
- switch (cmd) {
- case IOC_OBD_STATFS: {
- struct obd_ioctl_data *data = karg;
- struct obd_device *mdc_obd;
- struct obd_statfs stat_buf = {0};
- __u32 index;
-
- memcpy(&index, data->ioc_inlbuf2, sizeof(__u32));
- if (index >= count)
- return -ENODEV;
-
- tgt = lmv->tgts[index];
- if (!tgt || !tgt->ltd_active)
- return -ENODATA;
-
- mdc_obd = class_exp2obd(tgt->ltd_exp);
- if (!mdc_obd)
- return -EINVAL;
-
- /* copy UUID */
- if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(mdc_obd),
- min((int)data->ioc_plen2,
- (int)sizeof(struct obd_uuid))))
- return -EFAULT;
-
- rc = obd_statfs(NULL, tgt->ltd_exp, &stat_buf,
- get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ,
- 0);
- if (rc)
- return rc;
- if (copy_to_user(data->ioc_pbuf1, &stat_buf,
- min((int)data->ioc_plen1,
- (int)sizeof(stat_buf))))
- return -EFAULT;
- break;
- }
- case OBD_IOC_QUOTACTL: {
- struct if_quotactl *qctl = karg;
- struct obd_quotactl *oqctl;
-
- if (qctl->qc_valid == QC_MDTIDX) {
- if (count <= qctl->qc_idx)
- return -EINVAL;
-
- tgt = lmv->tgts[qctl->qc_idx];
- if (!tgt || !tgt->ltd_exp)
- return -EINVAL;
- } else if (qctl->qc_valid == QC_UUID) {
- for (i = 0; i < count; i++) {
- tgt = lmv->tgts[i];
- if (!tgt)
- continue;
- if (!obd_uuid_equals(&tgt->ltd_uuid,
- &qctl->obd_uuid))
- continue;
-
- if (!tgt->ltd_exp)
- return -EINVAL;
-
- break;
- }
- } else {
- return -EINVAL;
- }
-
- if (i >= count)
- return -EAGAIN;
-
- LASSERT(tgt && tgt->ltd_exp);
- oqctl = kzalloc(sizeof(*oqctl), GFP_NOFS);
- if (!oqctl)
- return -ENOMEM;
-
- QCTL_COPY(oqctl, qctl);
- rc = obd_quotactl(tgt->ltd_exp, oqctl);
- if (rc == 0) {
- QCTL_COPY(qctl, oqctl);
- qctl->qc_valid = QC_MDTIDX;
- qctl->obd_uuid = tgt->ltd_uuid;
- }
- kfree(oqctl);
- break;
- }
- case OBD_IOC_CHANGELOG_SEND:
- case OBD_IOC_CHANGELOG_CLEAR: {
- struct ioc_changelog *icc = karg;
-
- if (icc->icc_mdtindex >= count)
- return -ENODEV;
-
- tgt = lmv->tgts[icc->icc_mdtindex];
- if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
- return -ENODEV;
- rc = obd_iocontrol(cmd, tgt->ltd_exp, sizeof(*icc), icc, NULL);
- break;
- }
- case LL_IOC_GET_CONNECT_FLAGS: {
- tgt = lmv->tgts[0];
-
- if (!tgt || !tgt->ltd_exp)
- return -ENODATA;
- rc = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg);
- break;
- }
- case LL_IOC_FID2MDTIDX: {
- struct lu_fid *fid = karg;
- int mdt_index;
-
- rc = lmv_fld_lookup(lmv, fid, &mdt_index);
- if (rc)
- return rc;
-
- /*
- * Note: this is from llite(see ll_dir_ioctl()), @uarg does not
- * point to user space memory for FID2MDTIDX.
- */
- *(__u32 *)uarg = mdt_index;
- break;
- }
- case OBD_IOC_FID2PATH: {
- rc = lmv_fid2path(exp, len, karg, uarg);
- break;
- }
- case LL_IOC_HSM_STATE_GET:
- case LL_IOC_HSM_STATE_SET:
- case LL_IOC_HSM_ACTION: {
- struct md_op_data *op_data = karg;
-
- tgt = lmv_find_target(lmv, &op_data->op_fid1);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- if (!tgt->ltd_exp)
- return -EINVAL;
-
- rc = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg);
- break;
- }
- case LL_IOC_HSM_PROGRESS: {
- const struct hsm_progress_kernel *hpk = karg;
-
- tgt = lmv_find_target(lmv, &hpk->hpk_fid);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
- rc = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg);
- break;
- }
- case LL_IOC_HSM_REQUEST: {
- struct hsm_user_request *hur = karg;
- unsigned int reqcount = hur->hur_request.hr_itemcount;
-
- if (reqcount == 0)
- return 0;
-
- /* if the request is about a single fid
- * or if there is a single MDS, no need to split
- * the request.
- */
- if (reqcount == 1 || count == 1) {
- tgt = lmv_find_target(lmv,
- &hur->hur_user_item[0].hui_fid);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
- rc = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg);
- } else {
- /* split fid list to their respective MDS */
- for (i = 0; i < count; i++) {
- struct hsm_user_request *req;
- size_t reqlen;
- int nr, rc1;
-
- tgt = lmv->tgts[i];
- if (!tgt || !tgt->ltd_exp)
- continue;
-
- nr = lmv_hsm_req_count(lmv, hur, tgt);
- if (nr < 0)
- return nr;
- if (nr == 0) /* nothing for this MDS */
- continue;
-
- /* build a request with fids for this MDS */
- reqlen = offsetof(typeof(*hur),
- hur_user_item[nr])
- + hur->hur_request.hr_data_len;
- req = kvzalloc(reqlen, GFP_NOFS);
- if (!req)
- return -ENOMEM;
-
- rc1 = lmv_hsm_req_build(lmv, hur, tgt, req);
- if (rc1 < 0)
- goto hsm_req_err;
-
- rc1 = obd_iocontrol(cmd, tgt->ltd_exp, reqlen,
- req, uarg);
-hsm_req_err:
- if (rc1 != 0 && rc == 0)
- rc = rc1;
- kvfree(req);
- }
- }
- break;
- }
- case LL_IOC_LOV_SWAP_LAYOUTS: {
- struct md_op_data *op_data = karg;
- struct lmv_tgt_desc *tgt1, *tgt2;
-
- tgt1 = lmv_find_target(lmv, &op_data->op_fid1);
- if (IS_ERR(tgt1))
- return PTR_ERR(tgt1);
-
- tgt2 = lmv_find_target(lmv, &op_data->op_fid2);
- if (IS_ERR(tgt2))
- return PTR_ERR(tgt2);
-
- if (!tgt1->ltd_exp || !tgt2->ltd_exp)
- return -EINVAL;
-
- /* only files on same MDT can have their layouts swapped */
- if (tgt1->ltd_idx != tgt2->ltd_idx)
- return -EPERM;
-
- rc = obd_iocontrol(cmd, tgt1->ltd_exp, len, karg, uarg);
- break;
- }
- case LL_IOC_HSM_CT_START: {
- struct lustre_kernelcomm *lk = karg;
-
- if (lk->lk_flags & LK_FLG_STOP)
- rc = lmv_hsm_ct_unregister(lmv, cmd, len, lk, uarg);
- else
- rc = lmv_hsm_ct_register(lmv, cmd, len, lk, uarg);
- break;
- }
- default:
- for (i = 0; i < count; i++) {
- struct obd_device *mdc_obd;
- int err;
-
- tgt = lmv->tgts[i];
- if (!tgt || !tgt->ltd_exp)
- continue;
- /* ll_umount_begin() sets force flag but for lmv, not
- * mdc. Let's pass it through
- */
- mdc_obd = class_exp2obd(tgt->ltd_exp);
- mdc_obd->obd_force = obddev->obd_force;
- err = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg);
- if (err) {
- if (tgt->ltd_active) {
- CERROR("%s: error: iocontrol MDC %s on MDTidx %d cmd %x: err = %d\n",
- lmv2obd_dev(lmv)->obd_name,
- tgt->ltd_uuid.uuid, i, cmd, err);
- if (!rc)
- rc = err;
- }
- } else {
- set = 1;
- }
- }
- if (!set && !rc)
- rc = -EIO;
- }
- return rc;
-}
-
-/**
- * This is _inode_ placement policy function (not name).
- */
-static int lmv_placement_policy(struct obd_device *obd,
- struct md_op_data *op_data, u32 *mds)
-{
- struct lmv_obd *lmv = &obd->u.lmv;
-
- LASSERT(mds);
-
- if (lmv->desc.ld_tgt_count == 1) {
- *mds = 0;
- return 0;
- }
-
- if (op_data->op_default_stripe_offset != -1) {
- *mds = op_data->op_default_stripe_offset;
- return 0;
- }
-
- /**
- * If stripe_offset is provided during setdirstripe
- * (setdirstripe -i xx), xx MDS will be chosen.
- */
- if (op_data->op_cli_flags & CLI_SET_MEA && op_data->op_data) {
- struct lmv_user_md *lum;
-
- lum = op_data->op_data;
- if (le32_to_cpu(lum->lum_stripe_offset) != (__u32)-1) {
- *mds = le32_to_cpu(lum->lum_stripe_offset);
- } else {
- /*
- * -1 means default, which will be in the same MDT with
- * the stripe
- */
- *mds = op_data->op_mds;
- lum->lum_stripe_offset = cpu_to_le32(op_data->op_mds);
- }
- } else {
- /*
- * Allocate new fid on target according to operation type and
- * parent home mds.
- */
- *mds = op_data->op_mds;
- }
-
- return 0;
-}
-
-int __lmv_fid_alloc(struct lmv_obd *lmv, struct lu_fid *fid, u32 mds)
-{
- struct lmv_tgt_desc *tgt;
- int rc;
-
- tgt = lmv_get_target(lmv, mds, NULL);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- /*
- * New seq alloc and FLD setup should be atomic. Otherwise we may find
- * on server that seq in new allocated fid is not yet known.
- */
- mutex_lock(&tgt->ltd_fid_mutex);
-
- if (tgt->ltd_active == 0 || !tgt->ltd_exp) {
- rc = -ENODEV;
- goto out;
- }
-
- /*
- * Asking underlaying tgt layer to allocate new fid.
- */
- rc = obd_fid_alloc(NULL, tgt->ltd_exp, fid, NULL);
- if (rc > 0) {
- LASSERT(fid_is_sane(fid));
- rc = 0;
- }
-
-out:
- mutex_unlock(&tgt->ltd_fid_mutex);
- return rc;
-}
-
-int lmv_fid_alloc(const struct lu_env *env, struct obd_export *exp,
- struct lu_fid *fid, struct md_op_data *op_data)
-{
- struct obd_device *obd = class_exp2obd(exp);
- struct lmv_obd *lmv = &obd->u.lmv;
- u32 mds = 0;
- int rc;
-
- LASSERT(op_data);
- LASSERT(fid);
-
- rc = lmv_placement_policy(obd, op_data, &mds);
- if (rc) {
- CERROR("Can't get target for allocating fid, rc %d\n",
- rc);
- return rc;
- }
-
- rc = __lmv_fid_alloc(lmv, fid, mds);
- if (rc) {
- CERROR("Can't alloc new fid, rc %d\n", rc);
- return rc;
- }
-
- return rc;
-}
-
-static int lmv_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
-{
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lprocfs_static_vars lvars = { NULL };
- struct lmv_desc *desc;
- int rc;
-
- if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) {
- CERROR("LMV setup requires a descriptor\n");
- return -EINVAL;
- }
-
- desc = (struct lmv_desc *)lustre_cfg_buf(lcfg, 1);
- if (sizeof(*desc) > LUSTRE_CFG_BUFLEN(lcfg, 1)) {
- CERROR("Lmv descriptor size wrong: %d > %d\n",
- (int)sizeof(*desc), LUSTRE_CFG_BUFLEN(lcfg, 1));
- return -EINVAL;
- }
-
- lmv->tgts_size = 32U;
- lmv->tgts = kcalloc(lmv->tgts_size, sizeof(*lmv->tgts), GFP_NOFS);
- if (!lmv->tgts)
- return -ENOMEM;
-
- obd_str2uuid(&lmv->desc.ld_uuid, desc->ld_uuid.uuid);
- lmv->desc.ld_tgt_count = 0;
- lmv->desc.ld_active_tgt_count = 0;
- lmv->max_def_easize = 0;
- lmv->max_easize = 0;
-
- spin_lock_init(&lmv->lmv_lock);
- mutex_init(&lmv->lmv_init_mutex);
-
- lprocfs_lmv_init_vars(&lvars);
-
- lprocfs_obd_setup(obd, lvars.obd_vars, lvars.sysfs_vars);
- debugfs_create_file("target_obd", 0444, obd->obd_debugfs_entry, obd,
- &lmv_proc_target_fops);
- rc = fld_client_init(&lmv->lmv_fld, obd->obd_name,
- LUSTRE_CLI_FLD_HASH_DHT);
- if (rc) {
- CERROR("Can't init FLD, err %d\n", rc);
- goto out;
- }
-
- return 0;
-
-out:
- return rc;
-}
-
-static int lmv_cleanup(struct obd_device *obd)
-{
- struct lmv_obd *lmv = &obd->u.lmv;
-
- fld_client_fini(&lmv->lmv_fld);
- if (lmv->tgts) {
- int i;
-
- for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- if (!lmv->tgts[i])
- continue;
- lmv_del_target(lmv, i);
- }
- kfree(lmv->tgts);
- lmv->tgts_size = 0;
- }
- return 0;
-}
-
-static int lmv_process_config(struct obd_device *obd, u32 len, void *buf)
-{
- struct lustre_cfg *lcfg = buf;
- struct obd_uuid obd_uuid;
- int gen;
- __u32 index;
- int rc;
-
- switch (lcfg->lcfg_command) {
- case LCFG_ADD_MDC:
- /* modify_mdc_tgts add 0:lustre-clilmv 1:lustre-MDT0000_UUID
- * 2:0 3:1 4:lustre-MDT0000-mdc_UUID
- */
- if (LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof(obd_uuid.uuid)) {
- rc = -EINVAL;
- goto out;
- }
-
- obd_str2uuid(&obd_uuid, lustre_cfg_buf(lcfg, 1));
-
- if (sscanf(lustre_cfg_buf(lcfg, 2), "%u", &index) != 1) {
- rc = -EINVAL;
- goto out;
- }
- if (sscanf(lustre_cfg_buf(lcfg, 3), "%d", &gen) != 1) {
- rc = -EINVAL;
- goto out;
- }
- rc = lmv_add_target(obd, &obd_uuid, index, gen);
- goto out;
- default:
- CERROR("Unknown command: %d\n", lcfg->lcfg_command);
- rc = -EINVAL;
- goto out;
- }
-out:
- return rc;
-}
-
-static int lmv_statfs(const struct lu_env *env, struct obd_export *exp,
- struct obd_statfs *osfs, __u64 max_age, __u32 flags)
-{
- struct obd_device *obd = class_exp2obd(exp);
- struct lmv_obd *lmv = &obd->u.lmv;
- struct obd_statfs *temp;
- int rc = 0;
- u32 i;
-
- temp = kzalloc(sizeof(*temp), GFP_NOFS);
- if (!temp)
- return -ENOMEM;
-
- for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp)
- continue;
-
- rc = obd_statfs(env, lmv->tgts[i]->ltd_exp, temp,
- max_age, flags);
- if (rc) {
- CERROR("can't stat MDS #%d (%s), error %d\n", i,
- lmv->tgts[i]->ltd_exp->exp_obd->obd_name,
- rc);
- goto out_free_temp;
- }
-
- if (i == 0) {
- *osfs = *temp;
- /* If the statfs is from mount, it will needs
- * retrieve necessary information from MDT0.
- * i.e. mount does not need the merged osfs
- * from all of MDT.
- * And also clients can be mounted as long as
- * MDT0 is in service
- */
- if (flags & OBD_STATFS_FOR_MDT0)
- goto out_free_temp;
- } else {
- osfs->os_bavail += temp->os_bavail;
- osfs->os_blocks += temp->os_blocks;
- osfs->os_ffree += temp->os_ffree;
- osfs->os_files += temp->os_files;
- }
- }
-
-out_free_temp:
- kfree(temp);
- return rc;
-}
-
-static int lmv_getstatus(struct obd_export *exp,
- struct lu_fid *fid)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
-
- return md_getstatus(lmv->tgts[0]->ltd_exp, fid);
-}
-
-static int lmv_getxattr(struct obd_export *exp, const struct lu_fid *fid,
- u64 obd_md_valid, const char *name, size_t buf_size,
- struct ptlrpc_request **req)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt;
-
- tgt = lmv_find_target(lmv, fid);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- return md_getxattr(tgt->ltd_exp, fid, obd_md_valid, name, buf_size,
- req);
-}
-
-static int lmv_setxattr(struct obd_export *exp, const struct lu_fid *fid,
- u64 obd_md_valid, const char *name,
- const void *value, size_t value_size,
- unsigned int xattr_flags, u32 suppgid,
- struct ptlrpc_request **req)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt;
-
- tgt = lmv_find_target(lmv, fid);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- return md_setxattr(tgt->ltd_exp, fid, obd_md_valid, name,
- value, value_size, xattr_flags, suppgid, req);
-}
-
-static int lmv_getattr(struct obd_export *exp, struct md_op_data *op_data,
- struct ptlrpc_request **request)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt;
-
- tgt = lmv_find_target(lmv, &op_data->op_fid1);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- if (op_data->op_flags & MF_GET_MDT_IDX) {
- op_data->op_mds = tgt->ltd_idx;
- return 0;
- }
-
- return md_getattr(tgt->ltd_exp, op_data, request);
-}
-
-static int lmv_null_inode(struct obd_export *exp, const struct lu_fid *fid)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- u32 i;
-
- CDEBUG(D_INODE, "CBDATA for " DFID "\n", PFID(fid));
-
- /*
- * With DNE every object can have two locks in different namespaces:
- * lookup lock in space of MDT storing direntry and update/open lock in
- * space of MDT storing inode.
- */
- for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp)
- continue;
- md_null_inode(lmv->tgts[i]->ltd_exp, fid);
- }
-
- return 0;
-}
-
-static int lmv_close(struct obd_export *exp, struct md_op_data *op_data,
- struct md_open_data *mod, struct ptlrpc_request **request)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt;
-
- tgt = lmv_find_target(lmv, &op_data->op_fid1);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- CDEBUG(D_INODE, "CLOSE " DFID "\n", PFID(&op_data->op_fid1));
- return md_close(tgt->ltd_exp, op_data, mod, request);
-}
-
-/**
- * Choosing the MDT by name or FID in @op_data.
- * For non-striped directory, it will locate MDT by fid.
- * For striped-directory, it will locate MDT by name. And also
- * it will reset op_fid1 with the FID of the chosen stripe.
- **/
-static struct lmv_tgt_desc *
-lmv_locate_target_for_name(struct lmv_obd *lmv, struct lmv_stripe_md *lsm,
- const char *name, int namelen, struct lu_fid *fid,
- u32 *mds)
-{
- const struct lmv_oinfo *oinfo;
- struct lmv_tgt_desc *tgt;
-
- if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_NAME_HASH)) {
- if (cfs_fail_val >= lsm->lsm_md_stripe_count)
- return ERR_PTR(-EBADF);
- oinfo = &lsm->lsm_md_oinfo[cfs_fail_val];
- } else {
- oinfo = lsm_name_to_stripe_info(lsm, name, namelen);
- if (IS_ERR(oinfo))
- return ERR_CAST(oinfo);
- }
-
- if (fid)
- *fid = oinfo->lmo_fid;
- if (mds)
- *mds = oinfo->lmo_mds;
-
- tgt = lmv_get_target(lmv, oinfo->lmo_mds, NULL);
-
- CDEBUG(D_INFO, "locate on mds %u " DFID "\n", oinfo->lmo_mds,
- PFID(&oinfo->lmo_fid));
- return tgt;
-}
-
-/**
- * Locate mds by fid or name
- *
- * For striped directory (lsm != NULL), it will locate the stripe
- * by name hash (see lsm_name_to_stripe_info()). Note: if the hash_type
- * is unknown, it will return -EBADFD, and lmv_intent_lookup might need
- * walk through all of stripes to locate the entry.
- *
- * For normal direcotry, it will locate MDS by FID directly.
- * \param[in] lmv LMV device
- * \param[in] op_data client MD stack parameters, name, namelen
- * mds_num etc.
- * \param[in] fid object FID used to locate MDS.
- *
- * retval pointer to the lmv_tgt_desc if succeed.
- * ERR_PTR(errno) if failed.
- */
-struct lmv_tgt_desc*
-lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data,
- struct lu_fid *fid)
-{
- struct lmv_stripe_md *lsm = op_data->op_mea1;
- struct lmv_tgt_desc *tgt;
-
- /*
- * During creating VOLATILE file, it should honor the mdt
- * index if the file under striped dir is being restored, see
- * ct_restore().
- */
- if (op_data->op_bias & MDS_CREATE_VOLATILE &&
- (int)op_data->op_mds != -1) {
- int i;
-
- tgt = lmv_get_target(lmv, op_data->op_mds, NULL);
- if (IS_ERR(tgt))
- return tgt;
-
- if (lsm) {
- /* refill the right parent fid */
- for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
- struct lmv_oinfo *oinfo;
-
- oinfo = &lsm->lsm_md_oinfo[i];
- if (oinfo->lmo_mds == op_data->op_mds) {
- *fid = oinfo->lmo_fid;
- break;
- }
- }
-
- if (i == lsm->lsm_md_stripe_count)
- *fid = lsm->lsm_md_oinfo[0].lmo_fid;
- }
-
- return tgt;
- }
-
- if (!lsm || !op_data->op_namelen) {
- tgt = lmv_find_target(lmv, fid);
- if (IS_ERR(tgt))
- return tgt;
-
- op_data->op_mds = tgt->ltd_idx;
-
- return tgt;
- }
-
- return lmv_locate_target_for_name(lmv, lsm, op_data->op_name,
- op_data->op_namelen, fid,
- &op_data->op_mds);
-}
-
-static int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
- const void *data, size_t datalen, umode_t mode,
- uid_t uid, gid_t gid, kernel_cap_t cap_effective,
- __u64 rdev, struct ptlrpc_request **request)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt;
- int rc;
-
- if (!lmv->desc.ld_active_tgt_count)
- return -EIO;
-
- tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- CDEBUG(D_INODE, "CREATE name '%.*s' on " DFID " -> mds #%x\n",
- (int)op_data->op_namelen, op_data->op_name,
- PFID(&op_data->op_fid1), op_data->op_mds);
-
- rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
- if (rc)
- return rc;
-
- if (exp_connect_flags(exp) & OBD_CONNECT_DIR_STRIPE) {
- /*
- * Send the create request to the MDT where the object
- * will be located
- */
- tgt = lmv_find_target(lmv, &op_data->op_fid2);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- op_data->op_mds = tgt->ltd_idx;
- } else {
- CDEBUG(D_CONFIG, "Server doesn't support striped dirs\n");
- }
-
- CDEBUG(D_INODE, "CREATE obj " DFID " -> mds #%x\n",
- PFID(&op_data->op_fid1), op_data->op_mds);
-
- op_data->op_flags |= MF_MDC_CANCEL_FID1;
- rc = md_create(tgt->ltd_exp, op_data, data, datalen, mode, uid, gid,
- cap_effective, rdev, request);
-
- if (rc == 0) {
- if (!*request)
- return rc;
- CDEBUG(D_INODE, "Created - " DFID "\n", PFID(&op_data->op_fid2));
- }
- return rc;
-}
-
-static int
-lmv_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
- const union ldlm_policy_data *policy, struct md_op_data *op_data,
- struct lustre_handle *lockh, __u64 extra_lock_flags)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt;
-
- CDEBUG(D_INODE, "ENQUEUE on " DFID "\n", PFID(&op_data->op_fid1));
-
- tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- CDEBUG(D_INODE, "ENQUEUE on " DFID " -> mds #%u\n",
- PFID(&op_data->op_fid1), tgt->ltd_idx);
-
- return md_enqueue(tgt->ltd_exp, einfo, policy, op_data, lockh,
- extra_lock_flags);
-}
-
-static int
-lmv_getattr_name(struct obd_export *exp, struct md_op_data *op_data,
- struct ptlrpc_request **preq)
-{
- struct ptlrpc_request *req = NULL;
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt;
- struct mdt_body *body;
- int rc;
-
- tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- CDEBUG(D_INODE, "GETATTR_NAME for %*s on " DFID " -> mds #%u\n",
- (int)op_data->op_namelen, op_data->op_name,
- PFID(&op_data->op_fid1), tgt->ltd_idx);
-
- rc = md_getattr_name(tgt->ltd_exp, op_data, preq);
- if (rc != 0)
- return rc;
-
- body = req_capsule_server_get(&(*preq)->rq_pill, &RMF_MDT_BODY);
- if (body->mbo_valid & OBD_MD_MDS) {
- struct lu_fid rid = body->mbo_fid1;
-
- CDEBUG(D_INODE, "Request attrs for " DFID "\n",
- PFID(&rid));
-
- tgt = lmv_find_target(lmv, &rid);
- if (IS_ERR(tgt)) {
- ptlrpc_req_finished(*preq);
- *preq = NULL;
- return PTR_ERR(tgt);
- }
-
- op_data->op_fid1 = rid;
- op_data->op_valid |= OBD_MD_FLCROSSREF;
- op_data->op_namelen = 0;
- op_data->op_name = NULL;
- rc = md_getattr_name(tgt->ltd_exp, op_data, &req);
- ptlrpc_req_finished(*preq);
- *preq = req;
- }
-
- return rc;
-}
-
-#define md_op_data_fid(op_data, fl) \
- (fl == MF_MDC_CANCEL_FID1 ? &op_data->op_fid1 : \
- fl == MF_MDC_CANCEL_FID2 ? &op_data->op_fid2 : \
- fl == MF_MDC_CANCEL_FID3 ? &op_data->op_fid3 : \
- fl == MF_MDC_CANCEL_FID4 ? &op_data->op_fid4 : \
- NULL)
-
-static int lmv_early_cancel(struct obd_export *exp, struct lmv_tgt_desc *tgt,
- struct md_op_data *op_data, int op_tgt,
- enum ldlm_mode mode, int bits, int flag)
-{
- struct lu_fid *fid = md_op_data_fid(op_data, flag);
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- union ldlm_policy_data policy = { { 0 } };
- int rc = 0;
-
- if (!fid_is_sane(fid))
- return 0;
-
- if (!tgt) {
- tgt = lmv_find_target(lmv, fid);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
- }
-
- if (tgt->ltd_idx != op_tgt) {
- CDEBUG(D_INODE, "EARLY_CANCEL on " DFID "\n", PFID(fid));
- policy.l_inodebits.bits = bits;
- rc = md_cancel_unused(tgt->ltd_exp, fid, &policy,
- mode, LCF_ASYNC, NULL);
- } else {
- CDEBUG(D_INODE,
- "EARLY_CANCEL skip operation target %d on " DFID "\n",
- op_tgt, PFID(fid));
- op_data->op_flags |= flag;
- rc = 0;
- }
-
- return rc;
-}
-
-/*
- * llite passes fid of an target inode in op_data->op_fid1 and id of directory in
- * op_data->op_fid2
- */
-static int lmv_link(struct obd_export *exp, struct md_op_data *op_data,
- struct ptlrpc_request **request)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt;
- int rc;
-
- LASSERT(op_data->op_namelen != 0);
-
- CDEBUG(D_INODE, "LINK " DFID ":%*s to " DFID "\n",
- PFID(&op_data->op_fid2), (int)op_data->op_namelen,
- op_data->op_name, PFID(&op_data->op_fid1));
-
- op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid());
- op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid());
- op_data->op_cap = current_cap();
- if (op_data->op_mea2) {
- struct lmv_stripe_md *lsm = op_data->op_mea2;
- const struct lmv_oinfo *oinfo;
-
- oinfo = lsm_name_to_stripe_info(lsm, op_data->op_name,
- op_data->op_namelen);
- if (IS_ERR(oinfo))
- return PTR_ERR(oinfo);
-
- op_data->op_fid2 = oinfo->lmo_fid;
- }
-
- tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid2);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- /*
- * Cancel UPDATE lock on child (fid1).
- */
- op_data->op_flags |= MF_MDC_CANCEL_FID2;
- rc = lmv_early_cancel(exp, NULL, op_data, tgt->ltd_idx, LCK_EX,
- MDS_INODELOCK_UPDATE, MF_MDC_CANCEL_FID1);
- if (rc != 0)
- return rc;
-
- return md_link(tgt->ltd_exp, op_data, request);
-}
-
-static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data,
- const char *old, size_t oldlen,
- const char *new, size_t newlen,
- struct ptlrpc_request **request)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct obd_export *target_exp;
- struct lmv_tgt_desc *src_tgt;
- struct lmv_tgt_desc *tgt_tgt;
- struct mdt_body *body;
- int rc;
-
- LASSERT(oldlen != 0);
-
- CDEBUG(D_INODE, "RENAME %.*s in " DFID ":%d to %.*s in " DFID ":%d\n",
- (int)oldlen, old, PFID(&op_data->op_fid1),
- op_data->op_mea1 ? op_data->op_mea1->lsm_md_stripe_count : 0,
- (int)newlen, new, PFID(&op_data->op_fid2),
- op_data->op_mea2 ? op_data->op_mea2->lsm_md_stripe_count : 0);
-
- op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid());
- op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid());
- op_data->op_cap = current_cap();
-
- if (op_data->op_cli_flags & CLI_MIGRATE) {
- LASSERTF(fid_is_sane(&op_data->op_fid3), "invalid FID " DFID "\n",
- PFID(&op_data->op_fid3));
-
- if (op_data->op_mea1) {
- struct lmv_stripe_md *lsm = op_data->op_mea1;
- struct lmv_tgt_desc *tmp;
-
- /* Fix the parent fid for striped dir */
- tmp = lmv_locate_target_for_name(lmv, lsm, old,
- oldlen,
- &op_data->op_fid1,
- NULL);
- if (IS_ERR(tmp))
- return PTR_ERR(tmp);
- }
-
- rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
- if (rc)
- return rc;
- src_tgt = lmv_find_target(lmv, &op_data->op_fid3);
- if (IS_ERR(src_tgt))
- return PTR_ERR(src_tgt);
-
- target_exp = src_tgt->ltd_exp;
- } else {
- if (op_data->op_mea1) {
- struct lmv_stripe_md *lsm = op_data->op_mea1;
-
- src_tgt = lmv_locate_target_for_name(lmv, lsm, old,
- oldlen,
- &op_data->op_fid1,
- &op_data->op_mds);
- } else {
- src_tgt = lmv_find_target(lmv, &op_data->op_fid1);
- }
- if (IS_ERR(src_tgt))
- return PTR_ERR(src_tgt);
-
- if (op_data->op_mea2) {
- struct lmv_stripe_md *lsm = op_data->op_mea2;
-
- tgt_tgt = lmv_locate_target_for_name(lmv, lsm, new,
- newlen,
- &op_data->op_fid2,
- &op_data->op_mds);
- } else {
- tgt_tgt = lmv_find_target(lmv, &op_data->op_fid2);
- }
- if (IS_ERR(tgt_tgt))
- return PTR_ERR(tgt_tgt);
-
- target_exp = tgt_tgt->ltd_exp;
- }
-
- /*
- * LOOKUP lock on src child (fid3) should also be cancelled for
- * src_tgt in mdc_rename.
- */
- op_data->op_flags |= MF_MDC_CANCEL_FID1 | MF_MDC_CANCEL_FID3;
-
- /*
- * Cancel UPDATE locks on tgt parent (fid2), tgt_tgt is its
- * own target.
- */
- rc = lmv_early_cancel(exp, NULL, op_data, src_tgt->ltd_idx,
- LCK_EX, MDS_INODELOCK_UPDATE,
- MF_MDC_CANCEL_FID2);
- if (rc)
- return rc;
- /*
- * Cancel LOOKUP locks on source child (fid3) for parent tgt_tgt.
- */
- if (fid_is_sane(&op_data->op_fid3)) {
- struct lmv_tgt_desc *tgt;
-
- tgt = lmv_find_target(lmv, &op_data->op_fid1);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- /* Cancel LOOKUP lock on its parent */
- rc = lmv_early_cancel(exp, tgt, op_data, src_tgt->ltd_idx,
- LCK_EX, MDS_INODELOCK_LOOKUP,
- MF_MDC_CANCEL_FID3);
- if (rc)
- return rc;
-
- rc = lmv_early_cancel(exp, NULL, op_data, src_tgt->ltd_idx,
- LCK_EX, MDS_INODELOCK_FULL,
- MF_MDC_CANCEL_FID3);
- if (rc)
- return rc;
- }
-
-retry_rename:
- /*
- * Cancel all the locks on tgt child (fid4).
- */
- if (fid_is_sane(&op_data->op_fid4)) {
- struct lmv_tgt_desc *tgt;
-
- rc = lmv_early_cancel(exp, NULL, op_data, src_tgt->ltd_idx,
- LCK_EX, MDS_INODELOCK_FULL,
- MF_MDC_CANCEL_FID4);
- if (rc)
- return rc;
-
- tgt = lmv_find_target(lmv, &op_data->op_fid4);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- /*
- * Since the target child might be destroyed, and it might
- * become orphan, and we can only check orphan on the local
- * MDT right now, so we send rename request to the MDT where
- * target child is located. If target child does not exist,
- * then it will send the request to the target parent
- */
- target_exp = tgt->ltd_exp;
- }
-
- rc = md_rename(target_exp, op_data, old, oldlen, new, newlen, request);
- if (rc && rc != -EREMOTE)
- return rc;
-
- body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY);
- if (!body)
- return -EPROTO;
-
- /* Not cross-ref case, just get out of here. */
- if (likely(!(body->mbo_valid & OBD_MD_MDS)))
- return rc;
-
- CDEBUG(D_INODE, "%s: try rename to another MDT for " DFID "\n",
- exp->exp_obd->obd_name, PFID(&body->mbo_fid1));
-
- op_data->op_fid4 = body->mbo_fid1;
- ptlrpc_req_finished(*request);
- *request = NULL;
- goto retry_rename;
-}
-
-static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data,
- void *ea, size_t ealen, struct ptlrpc_request **request)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt;
-
- CDEBUG(D_INODE, "SETATTR for " DFID ", valid 0x%x\n",
- PFID(&op_data->op_fid1), op_data->op_attr.ia_valid);
-
- op_data->op_flags |= MF_MDC_CANCEL_FID1;
- tgt = lmv_find_target(lmv, &op_data->op_fid1);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- return md_setattr(tgt->ltd_exp, op_data, ea, ealen, request);
-}
-
-static int lmv_sync(struct obd_export *exp, const struct lu_fid *fid,
- struct ptlrpc_request **request)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt;
-
- tgt = lmv_find_target(lmv, fid);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- return md_sync(tgt->ltd_exp, fid, request);
-}
-
-/**
- * Get current minimum entry from striped directory
- *
- * This function will search the dir entry, whose hash value is the
- * closest(>=) to @hash_offset, from all of sub-stripes, and it is
- * only being called for striped directory.
- *
- * \param[in] exp export of LMV
- * \param[in] op_data parameters transferred beween client MD stack
- * stripe_information will be included in this
- * parameter
- * \param[in] cb_op ldlm callback being used in enqueue in
- * mdc_read_page
- * \param[in] hash_offset the hash value, which is used to locate
- * minum(closet) dir entry
- * \param[in|out] stripe_offset the caller use this to indicate the stripe
- * index of last entry, so to avoid hash conflict
- * between stripes. It will also be used to
- * return the stripe index of current dir entry.
- * \param[in|out] entp the minum entry and it also is being used
- * to input the last dir entry to resolve the
- * hash conflict
- *
- * \param[out] ppage the page which holds the minum entry
- *
- * \retval = 0 get the entry successfully
- * negative errno (< 0) does not get the entry
- */
-static int lmv_get_min_striped_entry(struct obd_export *exp,
- struct md_op_data *op_data,
- struct md_callback *cb_op,
- __u64 hash_offset, int *stripe_offset,
- struct lu_dirent **entp,
- struct page **ppage)
-{
- struct lmv_stripe_md *lsm = op_data->op_mea1;
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lu_dirent *min_ent = NULL;
- struct page *min_page = NULL;
- struct lmv_tgt_desc *tgt;
- int stripe_count;
- int min_idx = 0;
- int rc = 0;
- int i;
-
- stripe_count = lsm->lsm_md_stripe_count;
- for (i = 0; i < stripe_count; i++) {
- __u64 stripe_hash = hash_offset;
- struct lu_dirent *ent = NULL;
- struct page *page = NULL;
- struct lu_dirpage *dp;
-
- tgt = lmv_get_target(lmv, lsm->lsm_md_oinfo[i].lmo_mds, NULL);
- if (IS_ERR(tgt)) {
- rc = PTR_ERR(tgt);
- goto out;
- }
-
- /*
- * op_data will be shared by each stripe, so we need
- * reset these value for each stripe
- */
- op_data->op_fid1 = lsm->lsm_md_oinfo[i].lmo_fid;
- op_data->op_fid2 = lsm->lsm_md_oinfo[i].lmo_fid;
- op_data->op_data = lsm->lsm_md_oinfo[i].lmo_root;
-next:
- rc = md_read_page(tgt->ltd_exp, op_data, cb_op, stripe_hash,
- &page);
- if (rc)
- goto out;
-
- dp = page_address(page);
- for (ent = lu_dirent_start(dp); ent;
- ent = lu_dirent_next(ent)) {
- /* Skip dummy entry */
- if (!le16_to_cpu(ent->lde_namelen))
- continue;
-
- if (le64_to_cpu(ent->lde_hash) < hash_offset)
- continue;
-
- if (le64_to_cpu(ent->lde_hash) == hash_offset &&
- (*entp == ent || i < *stripe_offset))
- continue;
-
- /* skip . and .. for other stripes */
- if (i && (!strncmp(ent->lde_name, ".",
- le16_to_cpu(ent->lde_namelen)) ||
- !strncmp(ent->lde_name, "..",
- le16_to_cpu(ent->lde_namelen))))
- continue;
- break;
- }
-
- if (!ent) {
- stripe_hash = le64_to_cpu(dp->ldp_hash_end);
-
- kunmap(page);
- put_page(page);
- page = NULL;
-
- /*
- * reach the end of current stripe, go to next stripe
- */
- if (stripe_hash == MDS_DIR_END_OFF)
- continue;
- else
- goto next;
- }
-
- if (min_ent) {
- if (le64_to_cpu(min_ent->lde_hash) >
- le64_to_cpu(ent->lde_hash)) {
- min_ent = ent;
- kunmap(min_page);
- put_page(min_page);
- min_idx = i;
- min_page = page;
- } else {
- kunmap(page);
- put_page(page);
- page = NULL;
- }
- } else {
- min_ent = ent;
- min_page = page;
- min_idx = i;
- }
- }
-
-out:
- if (*ppage) {
- kunmap(*ppage);
- put_page(*ppage);
- }
- *stripe_offset = min_idx;
- *entp = min_ent;
- *ppage = min_page;
- return rc;
-}
-
-/**
- * Build dir entry page from a striped directory
- *
- * This function gets one entry by @offset from a striped directory. It will
- * read entries from all of stripes, and choose one closest to the required
- * offset(&offset). A few notes
- * 1. skip . and .. for non-zero stripes, because there can only have one .
- * and .. in a directory.
- * 2. op_data will be shared by all of stripes, instead of allocating new
- * one, so need to restore before reusing.
- * 3. release the entry page if that is not being chosen.
- *
- * \param[in] exp obd export refer to LMV
- * \param[in] op_data hold those MD parameters of read_entry
- * \param[in] cb_op ldlm callback being used in enqueue in mdc_read_entry
- * \param[out] ldp the entry being read
- * \param[out] ppage the page holding the entry. Note: because the entry
- * will be accessed in upper layer, so we need hold the
- * page until the usages of entry is finished, see
- * ll_dir_entry_next.
- *
- * retval =0 if get entry successfully
- * <0 cannot get entry
- */
-static int lmv_read_striped_page(struct obd_export *exp,
- struct md_op_data *op_data,
- struct md_callback *cb_op,
- __u64 offset, struct page **ppage)
-{
- struct inode *master_inode = op_data->op_data;
- struct lu_fid master_fid = op_data->op_fid1;
- __u64 hash_offset = offset;
- __u32 ldp_flags;
- struct page *min_ent_page = NULL;
- struct page *ent_page = NULL;
- struct lu_dirent *min_ent = NULL;
- struct lu_dirent *last_ent;
- struct lu_dirent *ent;
- struct lu_dirpage *dp;
- size_t left_bytes;
- int ent_idx = 0;
- void *area;
- int rc;
-
- /*
- * Allocate a page and read entries from all of stripes and fill
- * the page by hash order
- */
- ent_page = alloc_page(GFP_KERNEL);
- if (!ent_page)
- return -ENOMEM;
-
- /* Initialize the entry page */
- dp = kmap(ent_page);
- memset(dp, 0, sizeof(*dp));
- dp->ldp_hash_start = cpu_to_le64(offset);
- ldp_flags = LDF_COLLIDE;
-
- area = dp + 1;
- left_bytes = PAGE_SIZE - sizeof(*dp);
- ent = area;
- last_ent = ent;
- do {
- __u16 ent_size;
-
- /* Find the minum entry from all sub-stripes */
- rc = lmv_get_min_striped_entry(exp, op_data, cb_op, hash_offset,
- &ent_idx, &min_ent,
- &min_ent_page);
- if (rc)
- goto out;
-
- /*
- * If it can not get minum entry, it means it already reaches
- * the end of this directory
- */
- if (!min_ent) {
- last_ent->lde_reclen = 0;
- hash_offset = MDS_DIR_END_OFF;
- goto out;
- }
-
- ent_size = le16_to_cpu(min_ent->lde_reclen);
-
- /*
- * the last entry lde_reclen is 0, but it might not
- * the end of this entry of this temporay entry
- */
- if (!ent_size)
- ent_size = lu_dirent_calc_size(
- le16_to_cpu(min_ent->lde_namelen),
- le32_to_cpu(min_ent->lde_attrs));
- if (ent_size > left_bytes) {
- last_ent->lde_reclen = cpu_to_le16(0);
- hash_offset = le64_to_cpu(min_ent->lde_hash);
- goto out;
- }
-
- memcpy(ent, min_ent, ent_size);
-
- /*
- * Replace . with master FID and Replace .. with the parent FID
- * of master object
- */
- if (!strncmp(ent->lde_name, ".",
- le16_to_cpu(ent->lde_namelen)) &&
- le16_to_cpu(ent->lde_namelen) == 1)
- fid_cpu_to_le(&ent->lde_fid, &master_fid);
- else if (!strncmp(ent->lde_name, "..",
- le16_to_cpu(ent->lde_namelen)) &&
- le16_to_cpu(ent->lde_namelen) == 2)
- fid_cpu_to_le(&ent->lde_fid, &op_data->op_fid3);
-
- left_bytes -= ent_size;
- ent->lde_reclen = cpu_to_le16(ent_size);
- last_ent = ent;
- ent = (void *)ent + ent_size;
- hash_offset = le64_to_cpu(min_ent->lde_hash);
- if (hash_offset == MDS_DIR_END_OFF) {
- last_ent->lde_reclen = 0;
- break;
- }
- } while (1);
-out:
- if (min_ent_page) {
- kunmap(min_ent_page);
- put_page(min_ent_page);
- }
-
- if (unlikely(rc)) {
- __free_page(ent_page);
- ent_page = NULL;
- } else {
- if (ent == area)
- ldp_flags |= LDF_EMPTY;
- dp->ldp_flags |= cpu_to_le32(ldp_flags);
- dp->ldp_hash_end = cpu_to_le64(hash_offset);
- }
-
- /*
- * We do not want to allocate md_op_data during each
- * dir entry reading, so op_data will be shared by every stripe,
- * then we need to restore it back to original value before
- * return to the upper layer
- */
- op_data->op_fid1 = master_fid;
- op_data->op_fid2 = master_fid;
- op_data->op_data = master_inode;
-
- *ppage = ent_page;
-
- return rc;
-}
-
-static int lmv_read_page(struct obd_export *exp, struct md_op_data *op_data,
- struct md_callback *cb_op, __u64 offset,
- struct page **ppage)
-{
- struct lmv_stripe_md *lsm = op_data->op_mea1;
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt;
-
- if (unlikely(lsm))
- return lmv_read_striped_page(exp, op_data, cb_op, offset, ppage);
-
- tgt = lmv_find_target(lmv, &op_data->op_fid1);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- return md_read_page(tgt->ltd_exp, op_data, cb_op, offset, ppage);
-}
-
-/**
- * Unlink a file/directory
- *
- * Unlink a file or directory under the parent dir. The unlink request
- * usually will be sent to the MDT where the child is located, but if
- * the client does not have the child FID then request will be sent to the
- * MDT where the parent is located.
- *
- * If the parent is a striped directory then it also needs to locate which
- * stripe the name of the child is located, and replace the parent FID
- * (@op->op_fid1) with the stripe FID. Note: if the stripe is unknown,
- * it will walk through all of sub-stripes until the child is being
- * unlinked finally.
- *
- * \param[in] exp export refer to LMV
- * \param[in] op_data different parameters transferred beween client
- * MD stacks, name, namelen, FIDs etc.
- * op_fid1 is the parent FID, op_fid2 is the child
- * FID.
- * \param[out] request point to the request of unlink.
- *
- * retval 0 if succeed
- * negative errno if failed.
- */
-static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data,
- struct ptlrpc_request **request)
-{
- struct lmv_stripe_md *lsm = op_data->op_mea1;
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *parent_tgt = NULL;
- struct lmv_tgt_desc *tgt = NULL;
- struct mdt_body *body;
- int stripe_index = 0;
- int rc;
-
-retry_unlink:
- /* For striped dir, we need to locate the parent as well */
- if (lsm) {
- struct lmv_tgt_desc *tmp;
-
- LASSERT(op_data->op_name && op_data->op_namelen);
-
- tmp = lmv_locate_target_for_name(lmv, lsm,
- op_data->op_name,
- op_data->op_namelen,
- &op_data->op_fid1,
- &op_data->op_mds);
-
- /*
- * return -EBADFD means unknown hash type, might
- * need try all sub-stripe here
- */
- if (IS_ERR(tmp) && PTR_ERR(tmp) != -EBADFD)
- return PTR_ERR(tmp);
-
- /*
- * Note: both migrating dir and unknown hash dir need to
- * try all of sub-stripes, so we need start search the
- * name from stripe 0, but migrating dir is already handled
- * inside lmv_locate_target_for_name(), so we only check
- * unknown hash type directory here
- */
- if (!lmv_is_known_hash_type(lsm->lsm_md_hash_type)) {
- struct lmv_oinfo *oinfo;
-
- oinfo = &lsm->lsm_md_oinfo[stripe_index];
-
- op_data->op_fid1 = oinfo->lmo_fid;
- op_data->op_mds = oinfo->lmo_mds;
- }
- }
-
-try_next_stripe:
- /* Send unlink requests to the MDT where the child is located */
- if (likely(!fid_is_zero(&op_data->op_fid2)))
- tgt = lmv_find_target(lmv, &op_data->op_fid2);
- else if (lsm)
- tgt = lmv_get_target(lmv, op_data->op_mds, NULL);
- else
- tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
-
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid());
- op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid());
- op_data->op_cap = current_cap();
-
- /*
- * If child's fid is given, cancel unused locks for it if it is from
- * another export than parent.
- *
- * LOOKUP lock for child (fid3) should also be cancelled on parent
- * tgt_tgt in mdc_unlink().
- */
- op_data->op_flags |= MF_MDC_CANCEL_FID1 | MF_MDC_CANCEL_FID3;
-
- /*
- * Cancel FULL locks on child (fid3).
- */
- parent_tgt = lmv_find_target(lmv, &op_data->op_fid1);
- if (IS_ERR(parent_tgt))
- return PTR_ERR(parent_tgt);
-
- if (parent_tgt != tgt) {
- rc = lmv_early_cancel(exp, parent_tgt, op_data, tgt->ltd_idx,
- LCK_EX, MDS_INODELOCK_LOOKUP,
- MF_MDC_CANCEL_FID3);
- }
-
- rc = lmv_early_cancel(exp, NULL, op_data, tgt->ltd_idx, LCK_EX,
- MDS_INODELOCK_FULL, MF_MDC_CANCEL_FID3);
- if (rc != 0)
- return rc;
-
- CDEBUG(D_INODE, "unlink with fid=" DFID "/" DFID " -> mds #%u\n",
- PFID(&op_data->op_fid1), PFID(&op_data->op_fid2), tgt->ltd_idx);
-
- rc = md_unlink(tgt->ltd_exp, op_data, request);
- if (rc != 0 && rc != -EREMOTE && rc != -ENOENT)
- return rc;
-
- /* Try next stripe if it is needed. */
- if (rc == -ENOENT && lsm && lmv_need_try_all_stripes(lsm)) {
- struct lmv_oinfo *oinfo;
-
- stripe_index++;
- if (stripe_index >= lsm->lsm_md_stripe_count)
- return rc;
-
- oinfo = &lsm->lsm_md_oinfo[stripe_index];
-
- op_data->op_fid1 = oinfo->lmo_fid;
- op_data->op_mds = oinfo->lmo_mds;
-
- ptlrpc_req_finished(*request);
- *request = NULL;
-
- goto try_next_stripe;
- }
-
- body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY);
- if (!body)
- return -EPROTO;
-
- /* Not cross-ref case, just get out of here. */
- if (likely(!(body->mbo_valid & OBD_MD_MDS)))
- return rc;
-
- CDEBUG(D_INODE, "%s: try unlink to another MDT for " DFID "\n",
- exp->exp_obd->obd_name, PFID(&body->mbo_fid1));
-
- /* This is a remote object, try remote MDT, Note: it may
- * try more than 1 time here, Considering following case
- * /mnt/lustre is root on MDT0, remote1 is on MDT1
- * 1. Initially A does not know where remote1 is, it send
- * unlink RPC to MDT0, MDT0 return -EREMOTE, it will
- * resend unlink RPC to MDT1 (retry 1st time).
- *
- * 2. During the unlink RPC in flight,
- * client B mv /mnt/lustre/remote1 /mnt/lustre/remote2
- * and create new remote1, but on MDT0
- *
- * 3. MDT1 get unlink RPC(from A), then do remote lock on
- * /mnt/lustre, then lookup get fid of remote1, and find
- * it is remote dir again, and replay -EREMOTE again.
- *
- * 4. Then A will resend unlink RPC to MDT0. (retry 2nd times).
- *
- * In theory, it might try unlimited time here, but it should
- * be very rare case.
- */
- op_data->op_fid2 = body->mbo_fid1;
- ptlrpc_req_finished(*request);
- *request = NULL;
-
- goto retry_unlink;
-}
-
-static int lmv_precleanup(struct obd_device *obd)
-{
- fld_client_debugfs_fini(&obd->u.lmv.lmv_fld);
- lprocfs_obd_cleanup(obd);
- return 0;
-}
-
-/**
- * Get by key a value associated with a LMV device.
- *
- * Dispatch request to lower-layer devices as needed.
- *
- * \param[in] env execution environment for this thread
- * \param[in] exp export for the LMV device
- * \param[in] keylen length of key identifier
- * \param[in] key identifier of key to get value for
- * \param[in] vallen size of \a val
- * \param[out] val pointer to storage location for value
- *
- * \retval 0 on success
- * \retval negative negated errno on failure
- */
-static int lmv_get_info(const struct lu_env *env, struct obd_export *exp,
- __u32 keylen, void *key, __u32 *vallen, void *val)
-{
- struct obd_device *obd;
- struct lmv_obd *lmv;
- int rc = 0;
-
- obd = class_exp2obd(exp);
- if (!obd) {
- CDEBUG(D_IOCTL, "Invalid client cookie %#llx\n",
- exp->exp_handle.h_cookie);
- return -EINVAL;
- }
-
- lmv = &obd->u.lmv;
- if (keylen >= strlen("remote_flag") && !strcmp(key, "remote_flag")) {
- int i;
-
- LASSERT(*vallen == sizeof(__u32));
- for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- struct lmv_tgt_desc *tgt = lmv->tgts[i];
-
- /*
- * All tgts should be connected when this gets called.
- */
- if (!tgt || !tgt->ltd_exp)
- continue;
-
- if (!obd_get_info(env, tgt->ltd_exp, keylen, key,
- vallen, val))
- return 0;
- }
- return -EINVAL;
- } else if (KEY_IS(KEY_MAX_EASIZE) ||
- KEY_IS(KEY_DEFAULT_EASIZE) ||
- KEY_IS(KEY_CONN_DATA)) {
- /*
- * Forwarding this request to first MDS, it should know LOV
- * desc.
- */
- rc = obd_get_info(env, lmv->tgts[0]->ltd_exp, keylen, key,
- vallen, val);
- if (!rc && KEY_IS(KEY_CONN_DATA))
- exp->exp_connect_data = *(struct obd_connect_data *)val;
- return rc;
- } else if (KEY_IS(KEY_TGT_COUNT)) {
- *((int *)val) = lmv->desc.ld_tgt_count;
- return 0;
- }
-
- CDEBUG(D_IOCTL, "Invalid key\n");
- return -EINVAL;
-}
-
-/**
- * Asynchronously set by key a value associated with a LMV device.
- *
- * Dispatch request to lower-layer devices as needed.
- *
- * \param[in] env execution environment for this thread
- * \param[in] exp export for the LMV device
- * \param[in] keylen length of key identifier
- * \param[in] key identifier of key to store value for
- * \param[in] vallen size of value to store
- * \param[in] val pointer to data to be stored
- * \param[in] set optional list of related ptlrpc requests
- *
- * \retval 0 on success
- * \retval negative negated errno on failure
- */
-static int lmv_set_info_async(const struct lu_env *env, struct obd_export *exp,
- u32 keylen, void *key, u32 vallen,
- void *val, struct ptlrpc_request_set *set)
-{
- struct lmv_tgt_desc *tgt;
- struct obd_device *obd;
- struct lmv_obd *lmv;
- int rc = 0;
-
- obd = class_exp2obd(exp);
- if (!obd) {
- CDEBUG(D_IOCTL, "Invalid client cookie %#llx\n",
- exp->exp_handle.h_cookie);
- return -EINVAL;
- }
- lmv = &obd->u.lmv;
-
- if (KEY_IS(KEY_READ_ONLY) || KEY_IS(KEY_FLUSH_CTX) ||
- KEY_IS(KEY_DEFAULT_EASIZE)) {
- int i, err = 0;
-
- for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- tgt = lmv->tgts[i];
-
- if (!tgt || !tgt->ltd_exp)
- continue;
-
- err = obd_set_info_async(env, tgt->ltd_exp,
- keylen, key, vallen, val, set);
- if (err && rc == 0)
- rc = err;
- }
-
- return rc;
- }
-
- return -EINVAL;
-}
-
-static int lmv_unpack_md_v1(struct obd_export *exp, struct lmv_stripe_md *lsm,
- const struct lmv_mds_md_v1 *lmm1)
-{
- struct lmv_obd *lmv = &exp->exp_obd->u.lmv;
- int stripe_count;
- int rc = 0;
- int cplen;
- int i;
-
- lsm->lsm_md_magic = le32_to_cpu(lmm1->lmv_magic);
- lsm->lsm_md_stripe_count = le32_to_cpu(lmm1->lmv_stripe_count);
- lsm->lsm_md_master_mdt_index = le32_to_cpu(lmm1->lmv_master_mdt_index);
- if (OBD_FAIL_CHECK(OBD_FAIL_UNKNOWN_LMV_STRIPE))
- lsm->lsm_md_hash_type = LMV_HASH_TYPE_UNKNOWN;
- else
- lsm->lsm_md_hash_type = le32_to_cpu(lmm1->lmv_hash_type);
- lsm->lsm_md_layout_version = le32_to_cpu(lmm1->lmv_layout_version);
- cplen = strlcpy(lsm->lsm_md_pool_name, lmm1->lmv_pool_name,
- sizeof(lsm->lsm_md_pool_name));
-
- if (cplen >= sizeof(lsm->lsm_md_pool_name))
- return -E2BIG;
-
- CDEBUG(D_INFO, "unpack lsm count %d, master %d hash_type %d layout_version %d\n",
- lsm->lsm_md_stripe_count, lsm->lsm_md_master_mdt_index,
- lsm->lsm_md_hash_type, lsm->lsm_md_layout_version);
-
- stripe_count = le32_to_cpu(lmm1->lmv_stripe_count);
- for (i = 0; i < stripe_count; i++) {
- fid_le_to_cpu(&lsm->lsm_md_oinfo[i].lmo_fid,
- &lmm1->lmv_stripe_fids[i]);
- rc = lmv_fld_lookup(lmv, &lsm->lsm_md_oinfo[i].lmo_fid,
- &lsm->lsm_md_oinfo[i].lmo_mds);
- if (rc)
- return rc;
- CDEBUG(D_INFO, "unpack fid #%d " DFID "\n", i,
- PFID(&lsm->lsm_md_oinfo[i].lmo_fid));
- }
-
- return rc;
-}
-
-static int lmv_unpackmd(struct obd_export *exp, struct lmv_stripe_md **lsmp,
- const union lmv_mds_md *lmm, size_t lmm_size)
-{
- struct lmv_stripe_md *lsm;
- bool allocated = false;
- int lsm_size, rc;
-
- LASSERT(lsmp);
-
- lsm = *lsmp;
- /* Free memmd */
- if (lsm && !lmm) {
- int i;
-
- for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
- /*
- * For migrating inode, the master stripe and master
- * object will be the same, so do not need iput, see
- * ll_update_lsm_md
- */
- if (!(lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION &&
- !i) && lsm->lsm_md_oinfo[i].lmo_root)
- iput(lsm->lsm_md_oinfo[i].lmo_root);
- }
-
- kvfree(lsm);
- *lsmp = NULL;
- return 0;
- }
-
- if (le32_to_cpu(lmm->lmv_magic) == LMV_MAGIC_STRIPE)
- return -EPERM;
-
- /* Unpack memmd */
- if (le32_to_cpu(lmm->lmv_magic) != LMV_MAGIC_V1 &&
- le32_to_cpu(lmm->lmv_magic) != LMV_USER_MAGIC) {
- CERROR("%s: invalid lmv magic %x: rc = %d\n",
- exp->exp_obd->obd_name, le32_to_cpu(lmm->lmv_magic),
- -EIO);
- return -EIO;
- }
-
- if (le32_to_cpu(lmm->lmv_magic) == LMV_MAGIC_V1)
- lsm_size = lmv_stripe_md_size(lmv_mds_md_stripe_count_get(lmm));
- else
- /**
- * Unpack default dirstripe(lmv_user_md) to lmv_stripe_md,
- * stripecount should be 0 then.
- */
- lsm_size = lmv_stripe_md_size(0);
-
- if (!lsm) {
- lsm = kvzalloc(lsm_size, GFP_NOFS);
- if (!lsm)
- return -ENOMEM;
- allocated = true;
- *lsmp = lsm;
- }
-
- switch (le32_to_cpu(lmm->lmv_magic)) {
- case LMV_MAGIC_V1:
- rc = lmv_unpack_md_v1(exp, lsm, &lmm->lmv_md_v1);
- break;
- default:
- CERROR("%s: unrecognized magic %x\n", exp->exp_obd->obd_name,
- le32_to_cpu(lmm->lmv_magic));
- rc = -EINVAL;
- break;
- }
-
- if (rc && allocated) {
- kvfree(lsm);
- *lsmp = NULL;
- lsm_size = rc;
- }
- return lsm_size;
-}
-
-void lmv_free_memmd(struct lmv_stripe_md *lsm)
-{
- lmv_unpackmd(NULL, &lsm, NULL, 0);
-}
-EXPORT_SYMBOL(lmv_free_memmd);
-
-static int lmv_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
- union ldlm_policy_data *policy,
- enum ldlm_mode mode, enum ldlm_cancel_flags flags,
- void *opaque)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- int rc = 0;
- int err;
- u32 i;
-
- LASSERT(fid);
-
- for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- struct lmv_tgt_desc *tgt = lmv->tgts[i];
-
- if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
- continue;
-
- err = md_cancel_unused(tgt->ltd_exp, fid, policy, mode, flags,
- opaque);
- if (!rc)
- rc = err;
- }
- return rc;
-}
-
-static int lmv_set_lock_data(struct obd_export *exp,
- const struct lustre_handle *lockh,
- void *data, __u64 *bits)
-{
- struct lmv_obd *lmv = &exp->exp_obd->u.lmv;
- struct lmv_tgt_desc *tgt = lmv->tgts[0];
-
- if (!tgt || !tgt->ltd_exp)
- return -EINVAL;
-
- return md_set_lock_data(tgt->ltd_exp, lockh, data, bits);
-}
-
-static enum ldlm_mode lmv_lock_match(struct obd_export *exp, __u64 flags,
- const struct lu_fid *fid,
- enum ldlm_type type,
- union ldlm_policy_data *policy,
- enum ldlm_mode mode,
- struct lustre_handle *lockh)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- enum ldlm_mode rc;
- int tgt;
- u32 i;
-
- CDEBUG(D_INODE, "Lock match for " DFID "\n", PFID(fid));
-
- /*
- * With DNE every object can have two locks in different namespaces:
- * lookup lock in space of MDT storing direntry and update/open lock in
- * space of MDT storing inode. Try the MDT that the FID maps to first,
- * since this can be easily found, and only try others if that fails.
- */
- for (i = 0, tgt = lmv_find_target_index(lmv, fid);
- i < lmv->desc.ld_tgt_count;
- i++, tgt = (tgt + 1) % lmv->desc.ld_tgt_count) {
- if (tgt < 0) {
- CDEBUG(D_HA, "%s: " DFID " is inaccessible: rc = %d\n",
- obd->obd_name, PFID(fid), tgt);
- tgt = 0;
- }
-
- if (!lmv->tgts[tgt] || !lmv->tgts[tgt]->ltd_exp ||
- !lmv->tgts[tgt]->ltd_active)
- continue;
-
- rc = md_lock_match(lmv->tgts[tgt]->ltd_exp, flags, fid,
- type, policy, mode, lockh);
- if (rc)
- return rc;
- }
-
- return 0;
-}
-
-static int lmv_get_lustre_md(struct obd_export *exp,
- struct ptlrpc_request *req,
- struct obd_export *dt_exp,
- struct obd_export *md_exp,
- struct lustre_md *md)
-{
- struct lmv_obd *lmv = &exp->exp_obd->u.lmv;
- struct lmv_tgt_desc *tgt = lmv->tgts[0];
-
- if (!tgt || !tgt->ltd_exp)
- return -EINVAL;
- return md_get_lustre_md(tgt->ltd_exp, req, dt_exp, md_exp, md);
-}
-
-static int lmv_free_lustre_md(struct obd_export *exp, struct lustre_md *md)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt = lmv->tgts[0];
-
- if (md->lmv) {
- lmv_free_memmd(md->lmv);
- md->lmv = NULL;
- }
- if (!tgt || !tgt->ltd_exp)
- return -EINVAL;
- return md_free_lustre_md(tgt->ltd_exp, md);
-}
-
-static int lmv_set_open_replay_data(struct obd_export *exp,
- struct obd_client_handle *och,
- struct lookup_intent *it)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt;
-
- tgt = lmv_find_target(lmv, &och->och_fid);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- return md_set_open_replay_data(tgt->ltd_exp, och, it);
-}
-
-static int lmv_clear_open_replay_data(struct obd_export *exp,
- struct obd_client_handle *och)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt;
-
- tgt = lmv_find_target(lmv, &och->och_fid);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- return md_clear_open_replay_data(tgt->ltd_exp, och);
-}
-
-static int lmv_intent_getattr_async(struct obd_export *exp,
- struct md_enqueue_info *minfo)
-{
- struct md_op_data *op_data = &minfo->mi_data;
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *ptgt = NULL;
- struct lmv_tgt_desc *ctgt = NULL;
-
- if (!fid_is_sane(&op_data->op_fid2))
- return -EINVAL;
-
- ptgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
- if (IS_ERR(ptgt))
- return PTR_ERR(ptgt);
-
- ctgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid2);
- if (IS_ERR(ctgt))
- return PTR_ERR(ctgt);
-
- /*
- * if child is on remote MDT, we need 2 async RPCs to fetch both LOOKUP
- * lock on parent, and UPDATE lock on child MDT, which makes all
- * complicated. Considering remote dir is rare case, and not supporting
- * it in statahead won't cause any issue, drop its support for now.
- */
- if (ptgt != ctgt)
- return -ENOTSUPP;
-
- return md_intent_getattr_async(ptgt->ltd_exp, minfo);
-}
-
-static int lmv_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
- struct lu_fid *fid, __u64 *bits)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt;
-
- tgt = lmv_find_target(lmv, fid);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- return md_revalidate_lock(tgt->ltd_exp, it, fid, bits);
-}
-
-static int
-lmv_get_fid_from_lsm(struct obd_export *exp,
- const struct lmv_stripe_md *lsm,
- const char *name, int namelen, struct lu_fid *fid)
-{
- const struct lmv_oinfo *oinfo;
-
- LASSERT(lsm);
- oinfo = lsm_name_to_stripe_info(lsm, name, namelen);
- if (IS_ERR(oinfo))
- return PTR_ERR(oinfo);
-
- *fid = oinfo->lmo_fid;
-
- return 0;
-}
-
-/**
- * For lmv, only need to send request to master MDT, and the master MDT will
- * process with other slave MDTs. The only exception is Q_GETOQUOTA for which
- * we directly fetch data from the slave MDTs.
- */
-static int lmv_quotactl(struct obd_device *unused, struct obd_export *exp,
- struct obd_quotactl *oqctl)
-{
- struct obd_device *obd = class_exp2obd(exp);
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt = lmv->tgts[0];
- int rc = 0;
- __u64 curspace = 0, curinodes = 0;
- u32 i;
-
- if (!tgt || !tgt->ltd_exp || !tgt->ltd_active ||
- !lmv->desc.ld_tgt_count) {
- CERROR("master lmv inactive\n");
- return -EIO;
- }
-
- if (oqctl->qc_cmd != Q_GETOQUOTA)
- return obd_quotactl(tgt->ltd_exp, oqctl);
-
- for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- int err;
-
- tgt = lmv->tgts[i];
-
- if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
- continue;
-
- err = obd_quotactl(tgt->ltd_exp, oqctl);
- if (err) {
- CERROR("getquota on mdt %d failed. %d\n", i, err);
- if (!rc)
- rc = err;
- } else {
- curspace += oqctl->qc_dqblk.dqb_curspace;
- curinodes += oqctl->qc_dqblk.dqb_curinodes;
- }
- }
- oqctl->qc_dqblk.dqb_curspace = curspace;
- oqctl->qc_dqblk.dqb_curinodes = curinodes;
-
- return rc;
-}
-
-static int lmv_merge_attr(struct obd_export *exp,
- const struct lmv_stripe_md *lsm,
- struct cl_attr *attr,
- ldlm_blocking_callback cb_blocking)
-{
- int rc, i;
-
- rc = lmv_revalidate_slaves(exp, lsm, cb_blocking, 0);
- if (rc < 0)
- return rc;
-
- for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
- struct inode *inode = lsm->lsm_md_oinfo[i].lmo_root;
-
- CDEBUG(D_INFO, "" DFID " size %llu, blocks %llu nlink %u, atime %lu ctime %lu, mtime %lu.\n",
- PFID(&lsm->lsm_md_oinfo[i].lmo_fid),
- i_size_read(inode), (unsigned long long)inode->i_blocks,
- inode->i_nlink, LTIME_S(inode->i_atime),
- LTIME_S(inode->i_ctime), LTIME_S(inode->i_mtime));
-
- /* for slave stripe, it needs to subtract nlink for . and .. */
- if (i)
- attr->cat_nlink += inode->i_nlink - 2;
- else
- attr->cat_nlink = inode->i_nlink;
-
- attr->cat_size += i_size_read(inode);
- attr->cat_blocks += inode->i_blocks;
-
- if (attr->cat_atime < LTIME_S(inode->i_atime))
- attr->cat_atime = LTIME_S(inode->i_atime);
-
- if (attr->cat_ctime < LTIME_S(inode->i_ctime))
- attr->cat_ctime = LTIME_S(inode->i_ctime);
-
- if (attr->cat_mtime < LTIME_S(inode->i_mtime))
- attr->cat_mtime = LTIME_S(inode->i_mtime);
- }
- return 0;
-}
-
-static struct obd_ops lmv_obd_ops = {
- .owner = THIS_MODULE,
- .setup = lmv_setup,
- .cleanup = lmv_cleanup,
- .precleanup = lmv_precleanup,
- .process_config = lmv_process_config,
- .connect = lmv_connect,
- .disconnect = lmv_disconnect,
- .statfs = lmv_statfs,
- .get_info = lmv_get_info,
- .set_info_async = lmv_set_info_async,
- .notify = lmv_notify,
- .get_uuid = lmv_get_uuid,
- .iocontrol = lmv_iocontrol,
- .quotactl = lmv_quotactl
-};
-
-static struct md_ops lmv_md_ops = {
- .getstatus = lmv_getstatus,
- .null_inode = lmv_null_inode,
- .close = lmv_close,
- .create = lmv_create,
- .enqueue = lmv_enqueue,
- .getattr = lmv_getattr,
- .getxattr = lmv_getxattr,
- .getattr_name = lmv_getattr_name,
- .intent_lock = lmv_intent_lock,
- .link = lmv_link,
- .rename = lmv_rename,
- .setattr = lmv_setattr,
- .setxattr = lmv_setxattr,
- .sync = lmv_sync,
- .read_page = lmv_read_page,
- .unlink = lmv_unlink,
- .init_ea_size = lmv_init_ea_size,
- .cancel_unused = lmv_cancel_unused,
- .set_lock_data = lmv_set_lock_data,
- .lock_match = lmv_lock_match,
- .get_lustre_md = lmv_get_lustre_md,
- .free_lustre_md = lmv_free_lustre_md,
- .merge_attr = lmv_merge_attr,
- .set_open_replay_data = lmv_set_open_replay_data,
- .clear_open_replay_data = lmv_clear_open_replay_data,
- .intent_getattr_async = lmv_intent_getattr_async,
- .revalidate_lock = lmv_revalidate_lock,
- .get_fid_from_lsm = lmv_get_fid_from_lsm,
- .unpackmd = lmv_unpackmd,
-};
-
-static int __init lmv_init(void)
-{
- struct lprocfs_static_vars lvars;
- int rc;
-
- lprocfs_lmv_init_vars(&lvars);
-
- rc = libcfs_setup();
- if (rc)
- return rc;
-
- return class_register_type(&lmv_obd_ops, &lmv_md_ops,
- LUSTRE_LMV_NAME, NULL);
-}
-
-static void lmv_exit(void)
-{
- class_unregister_type(LUSTRE_LMV_NAME);
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre Logical Metadata Volume");
-MODULE_VERSION(LUSTRE_VERSION_STRING);
-MODULE_LICENSE("GPL");
-
-module_init(lmv_init);
-module_exit(lmv_exit);
diff --git a/drivers/staging/lustre/lustre/lmv/lproc_lmv.c b/drivers/staging/lustre/lustre/lmv/lproc_lmv.c
deleted file mode 100644
index 30727b7acccc..000000000000
--- a/drivers/staging/lustre/lustre/lmv/lproc_lmv.c
+++ /dev/null
@@ -1,173 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_CLASS
-
-#include <linux/seq_file.h>
-#include <linux/statfs.h>
-#include <lprocfs_status.h>
-#include <obd_class.h>
-#include "lmv_internal.h"
-
-static ssize_t numobd_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
- struct lmv_desc *desc;
-
- desc = &dev->u.lmv.desc;
- return sprintf(buf, "%u\n", desc->ld_tgt_count);
-}
-LUSTRE_RO_ATTR(numobd);
-
-static ssize_t activeobd_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
- struct lmv_desc *desc;
-
- desc = &dev->u.lmv.desc;
- return sprintf(buf, "%u\n", desc->ld_active_tgt_count);
-}
-LUSTRE_RO_ATTR(activeobd);
-
-static int lmv_desc_uuid_seq_show(struct seq_file *m, void *v)
-{
- struct obd_device *dev = (struct obd_device *)m->private;
- struct lmv_obd *lmv;
-
- LASSERT(dev);
- lmv = &dev->u.lmv;
- seq_printf(m, "%s\n", lmv->desc.ld_uuid.uuid);
- return 0;
-}
-
-LPROC_SEQ_FOPS_RO(lmv_desc_uuid);
-
-static void *lmv_tgt_seq_start(struct seq_file *p, loff_t *pos)
-{
- struct obd_device *dev = p->private;
- struct lmv_obd *lmv = &dev->u.lmv;
-
- while (*pos < lmv->tgts_size) {
- if (lmv->tgts[*pos])
- return lmv->tgts[*pos];
- ++*pos;
- }
-
- return NULL;
-}
-
-static void lmv_tgt_seq_stop(struct seq_file *p, void *v)
-{
-}
-
-static void *lmv_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos)
-{
- struct obd_device *dev = p->private;
- struct lmv_obd *lmv = &dev->u.lmv;
-
- ++*pos;
- while (*pos < lmv->tgts_size) {
- if (lmv->tgts[*pos])
- return lmv->tgts[*pos];
- ++*pos;
- }
-
- return NULL;
-}
-
-static int lmv_tgt_seq_show(struct seq_file *p, void *v)
-{
- struct lmv_tgt_desc *tgt = v;
-
- if (!tgt)
- return 0;
- seq_printf(p, "%u: %s %sACTIVE\n",
- tgt->ltd_idx, tgt->ltd_uuid.uuid,
- tgt->ltd_active ? "" : "IN");
- return 0;
-}
-
-static const struct seq_operations lmv_tgt_sops = {
- .start = lmv_tgt_seq_start,
- .stop = lmv_tgt_seq_stop,
- .next = lmv_tgt_seq_next,
- .show = lmv_tgt_seq_show,
-};
-
-static int lmv_target_seq_open(struct inode *inode, struct file *file)
-{
- struct seq_file *seq;
- int rc;
-
- rc = seq_open(file, &lmv_tgt_sops);
- if (rc)
- return rc;
-
- seq = file->private_data;
- seq->private = inode->i_private;
-
- return 0;
-}
-
-static struct lprocfs_vars lprocfs_lmv_obd_vars[] = {
- { "desc_uuid", &lmv_desc_uuid_fops, NULL, 0 },
- { NULL }
-};
-
-const struct file_operations lmv_proc_target_fops = {
- .owner = THIS_MODULE,
- .open = lmv_target_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static struct attribute *lmv_attrs[] = {
- &lustre_attr_activeobd.attr,
- &lustre_attr_numobd.attr,
- NULL,
-};
-
-static const struct attribute_group lmv_attr_group = {
- .attrs = lmv_attrs,
-};
-
-void lprocfs_lmv_init_vars(struct lprocfs_static_vars *lvars)
-{
- lvars->sysfs_vars = &lmv_attr_group;
- lvars->obd_vars = lprocfs_lmv_obd_vars;
-}
diff --git a/drivers/staging/lustre/lustre/lov/Makefile b/drivers/staging/lustre/lustre/lov/Makefile
deleted file mode 100644
index 1ebf0193f61a..000000000000
--- a/drivers/staging/lustre/lustre/lov/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LUSTRE_FS) += lov.o
-lov-y := lov_obd.o lov_pack.o lov_offset.o lov_merge.o \
- lov_request.o lov_ea.o lov_dev.o lov_object.o lov_page.o \
- lov_lock.o lov_io.o lovsub_dev.o lovsub_object.o lovsub_page.o \
- lovsub_lock.o lov_pool.o lproc_lov.o
diff --git a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
deleted file mode 100644
index e4f762137a4a..000000000000
--- a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
+++ /dev/null
@@ -1,639 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Internal interfaces of LOV layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Jinshan Xiong <jinshan.xiong@intel.com>
- */
-
-#ifndef LOV_CL_INTERNAL_H
-#define LOV_CL_INTERNAL_H
-
-#include <obd.h>
-#include <cl_object.h>
-#include "lov_internal.h"
-
-/** \defgroup lov lov
- * Logical object volume layer. This layer implements data striping (raid0).
- *
- * At the lov layer top-entity (object, page, lock, io) is connected to one or
- * more sub-entities: top-object, representing a file is connected to a set of
- * sub-objects, each representing a stripe, file-level top-lock is connected
- * to a set of per-stripe sub-locks, top-page is connected to a (single)
- * sub-page, and a top-level IO is connected to a set of (potentially
- * concurrent) sub-IO's.
- *
- * Sub-object, sub-page, and sub-io have well-defined top-object and top-page
- * respectively, while a single sub-lock can be part of multiple top-locks.
- *
- * Reference counting models are different for different types of entities:
- *
- * - top-object keeps a reference to its sub-objects, and destroys them
- * when it is destroyed.
- *
- * - top-page keeps a reference to its sub-page, and destroys it when it
- * is destroyed.
- *
- * - IO's are not reference counted.
- *
- * To implement a connection between top and sub entities, lov layer is split
- * into two pieces: lov ("upper half"), and lovsub ("bottom half"), both
- * implementing full set of cl-interfaces. For example, top-object has vvp and
- * lov layers, and it's sub-object has lovsub and osc layers. lovsub layer is
- * used to track child-parent relationship.
- *
- * @{
- */
-
-struct lovsub_device;
-struct lovsub_object;
-struct lovsub_lock;
-
-enum lov_device_flags {
- LOV_DEV_INITIALIZED = 1 << 0
-};
-
-/*
- * Upper half.
- */
-
-struct lov_device {
- /*
- * XXX Locking of lov-private data is missing.
- */
- struct cl_device ld_cl;
- struct lov_obd *ld_lov;
- /** size of lov_device::ld_target[] array */
- __u32 ld_target_nr;
- struct lovsub_device **ld_target;
- __u32 ld_flags;
-};
-
-/**
- * Layout type.
- */
-enum lov_layout_type {
- LLT_EMPTY, /** empty file without body (mknod + truncate) */
- LLT_RAID0, /** striped file */
- LLT_RELEASED, /** file with no objects (data in HSM) */
- LLT_NR
-};
-
-static inline char *llt2str(enum lov_layout_type llt)
-{
- switch (llt) {
- case LLT_EMPTY:
- return "EMPTY";
- case LLT_RAID0:
- return "RAID0";
- case LLT_RELEASED:
- return "RELEASED";
- case LLT_NR:
- LBUG();
- }
- LBUG();
- return "";
-}
-
-/**
- * lov-specific file state.
- *
- * lov object has particular layout type, determining how top-object is built
- * on top of sub-objects. Layout type can change dynamically. When this
- * happens, lov_object::lo_type_guard semaphore is taken in exclusive mode,
- * all state pertaining to the old layout type is destroyed, and new state is
- * constructed. All object methods take said semaphore in the shared mode,
- * providing serialization against transition between layout types.
- *
- * To avoid multiple `if' or `switch' statements, selecting behavior for the
- * current layout type, object methods perform double-dispatch, invoking
- * function corresponding to the current layout type.
- */
-struct lov_object {
- struct cl_object lo_cl;
- /**
- * Serializes object operations with transitions between layout types.
- *
- * This semaphore is taken in shared mode by all object methods, and
- * is taken in exclusive mode when object type is changed.
- *
- * \see lov_object::lo_type
- */
- struct rw_semaphore lo_type_guard;
- /**
- * Type of an object. Protected by lov_object::lo_type_guard.
- */
- enum lov_layout_type lo_type;
- /**
- * True if layout is invalid. This bit is cleared when layout lock
- * is lost.
- */
- bool lo_layout_invalid;
- /**
- * How many IOs are on going on this object. Layout can be changed
- * only if there is no active IO.
- */
- atomic_t lo_active_ios;
- /**
- * Waitq - wait for no one else is using lo_lsm
- */
- wait_queue_head_t lo_waitq;
- /**
- * Layout metadata. NULL if empty layout.
- */
- struct lov_stripe_md *lo_lsm;
-
- union lov_layout_state {
- struct lov_layout_raid0 {
- unsigned int lo_nr;
- /**
- * When this is true, lov_object::lo_attr contains
- * valid up to date attributes for a top-level
- * object. This field is reset to 0 when attributes of
- * any sub-object change.
- */
- int lo_attr_valid;
- /**
- * Array of sub-objects. Allocated when top-object is
- * created (lov_init_raid0()).
- *
- * Top-object is a strict master of its sub-objects:
- * it is created before them, and outlives its
- * children (this later is necessary so that basic
- * functions like cl_object_top() always
- * work). Top-object keeps a reference on every
- * sub-object.
- *
- * When top-object is destroyed (lov_delete_raid0())
- * it releases its reference to a sub-object and waits
- * until the latter is finally destroyed.
- */
- struct lovsub_object **lo_sub;
- /**
- * protect lo_sub
- */
- spinlock_t lo_sub_lock;
- /**
- * Cached object attribute, built from sub-object
- * attributes.
- */
- struct cl_attr lo_attr;
- } raid0;
- struct lov_layout_state_empty {
- } empty;
- struct lov_layout_state_released {
- } released;
- } u;
- /**
- * Thread that acquired lov_object::lo_type_guard in an exclusive
- * mode.
- */
- struct task_struct *lo_owner;
-};
-
-/**
- * State lov_lock keeps for each sub-lock.
- */
-struct lov_lock_sub {
- /** sub-lock itself */
- struct cl_lock sub_lock;
- /** Set if the sublock has ever been enqueued, meaning it may
- * hold resources of underlying layers
- */
- unsigned int sub_is_enqueued:1,
- sub_initialized:1;
- int sub_stripe;
-};
-
-/**
- * lov-specific lock state.
- */
-struct lov_lock {
- struct cl_lock_slice lls_cl;
- /** Number of sub-locks in this lock */
- int lls_nr;
- /** sublock array */
- struct lov_lock_sub lls_sub[0];
-};
-
-struct lov_page {
- struct cl_page_slice lps_cl;
- unsigned int lps_stripe; /* stripe index */
-};
-
-/*
- * Bottom half.
- */
-
-struct lovsub_device {
- struct cl_device acid_cl;
- struct cl_device *acid_next;
-};
-
-struct lovsub_object {
- struct cl_object_header lso_header;
- struct cl_object lso_cl;
- struct lov_object *lso_super;
- int lso_index;
-};
-
-/**
- * Lock state at lovsub layer.
- */
-struct lovsub_lock {
- struct cl_lock_slice lss_cl;
-};
-
-/**
- * Describe the environment settings for sublocks.
- */
-struct lov_sublock_env {
- const struct lu_env *lse_env;
- struct cl_io *lse_io;
-};
-
-struct lovsub_page {
- struct cl_page_slice lsb_cl;
-};
-
-struct lov_thread_info {
- struct cl_object_conf lti_stripe_conf;
- struct lu_fid lti_fid;
- struct ost_lvb lti_lvb;
- struct cl_2queue lti_cl2q;
- struct cl_page_list lti_plist;
- wait_queue_entry_t lti_waiter;
-};
-
-/**
- * State that lov_io maintains for every sub-io.
- */
-struct lov_io_sub {
- u16 sub_stripe;
- /**
- * environment's refcheck.
- *
- * \see cl_env_get()
- */
- u16 sub_refcheck;
- /**
- * true, iff cl_io_init() was successfully executed against
- * lov_io_sub::sub_io.
- */
- u16 sub_io_initialized:1,
- /**
- * True, iff lov_io_sub::sub_io and lov_io_sub::sub_env weren't
- * allocated, but borrowed from a per-device emergency pool.
- */
- sub_borrowed:1;
- /**
- * Linkage into a list (hanging off lov_io::lis_active) of all
- * sub-io's active for the current IO iteration.
- */
- struct list_head sub_linkage;
- /**
- * sub-io for a stripe. Ideally sub-io's can be stopped and resumed
- * independently, with lov acting as a scheduler to maximize overall
- * throughput.
- */
- struct cl_io *sub_io;
- /**
- * environment, in which sub-io executes.
- */
- struct lu_env *sub_env;
-};
-
-/**
- * IO state private for LOV.
- */
-struct lov_io {
- /** super-class */
- struct cl_io_slice lis_cl;
- /**
- * Pointer to the object slice. This is a duplicate of
- * lov_io::lis_cl::cis_object.
- */
- struct lov_object *lis_object;
- /**
- * Original end-of-io position for this IO, set by the upper layer as
- * cl_io::u::ci_rw::pos + cl_io::u::ci_rw::count. lov remembers this,
- * changes pos and count to fit IO into a single stripe and uses saved
- * value to determine when IO iterations have to stop.
- *
- * This is used only for CIT_READ and CIT_WRITE io's.
- */
- loff_t lis_io_endpos;
-
- /**
- * starting position within a file, for the current io loop iteration
- * (stripe), used by ci_io_loop().
- */
- u64 lis_pos;
- /**
- * end position with in a file, for the current stripe io. This is
- * exclusive (i.e., next offset after last byte affected by io).
- */
- u64 lis_endpos;
-
- int lis_stripe_count;
- int lis_active_subios;
-
- /**
- * the index of ls_single_subio in ls_subios array
- */
- int lis_single_subio_index;
- struct cl_io lis_single_subio;
-
- /**
- * size of ls_subios array, actually the highest stripe #
- */
- int lis_nr_subios;
- struct lov_io_sub *lis_subs;
- /**
- * List of active sub-io's.
- */
- struct list_head lis_active;
-};
-
-struct lov_session {
- struct lov_io ls_io;
- struct lov_sublock_env ls_subenv;
-};
-
-extern struct lu_device_type lov_device_type;
-extern struct lu_device_type lovsub_device_type;
-
-extern struct lu_context_key lov_key;
-extern struct lu_context_key lov_session_key;
-
-extern struct kmem_cache *lov_lock_kmem;
-extern struct kmem_cache *lov_object_kmem;
-extern struct kmem_cache *lov_thread_kmem;
-extern struct kmem_cache *lov_session_kmem;
-
-extern struct kmem_cache *lovsub_lock_kmem;
-extern struct kmem_cache *lovsub_object_kmem;
-
-int lov_object_init(const struct lu_env *env, struct lu_object *obj,
- const struct lu_object_conf *conf);
-int lovsub_object_init(const struct lu_env *env, struct lu_object *obj,
- const struct lu_object_conf *conf);
-int lov_lock_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_lock *lock, const struct cl_io *io);
-int lov_io_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io);
-int lovsub_lock_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_lock *lock, const struct cl_io *io);
-
-int lov_lock_init_raid0(const struct lu_env *env, struct cl_object *obj,
- struct cl_lock *lock, const struct cl_io *io);
-int lov_lock_init_empty(const struct lu_env *env, struct cl_object *obj,
- struct cl_lock *lock, const struct cl_io *io);
-int lov_io_init_raid0(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io);
-int lov_io_init_empty(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io);
-int lov_io_init_released(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io);
-
-struct lov_io_sub *lov_sub_get(const struct lu_env *env, struct lov_io *lio,
- int stripe);
-
-int lov_page_init(const struct lu_env *env, struct cl_object *ob,
- struct cl_page *page, pgoff_t index);
-int lovsub_page_init(const struct lu_env *env, struct cl_object *ob,
- struct cl_page *page, pgoff_t index);
-int lov_page_init_empty(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, pgoff_t index);
-int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, pgoff_t index);
-struct lu_object *lov_object_alloc(const struct lu_env *env,
- const struct lu_object_header *hdr,
- struct lu_device *dev);
-struct lu_object *lovsub_object_alloc(const struct lu_env *env,
- const struct lu_object_header *hdr,
- struct lu_device *dev);
-
-struct lov_stripe_md *lov_lsm_addref(struct lov_object *lov);
-int lov_page_stripe(const struct cl_page *page);
-
-#define lov_foreach_target(lov, var) \
- for (var = 0; var < lov_targets_nr(lov); ++var)
-
-/*****************************************************************************
- *
- * Type conversions.
- *
- * Accessors.
- *
- */
-
-static inline struct lov_session *lov_env_session(const struct lu_env *env)
-{
- struct lov_session *ses;
-
- ses = lu_context_key_get(env->le_ses, &lov_session_key);
- LASSERT(ses);
- return ses;
-}
-
-static inline struct lov_io *lov_env_io(const struct lu_env *env)
-{
- return &lov_env_session(env)->ls_io;
-}
-
-static inline int lov_is_object(const struct lu_object *obj)
-{
- return obj->lo_dev->ld_type == &lov_device_type;
-}
-
-static inline int lovsub_is_object(const struct lu_object *obj)
-{
- return obj->lo_dev->ld_type == &lovsub_device_type;
-}
-
-static inline struct lu_device *lov2lu_dev(struct lov_device *lov)
-{
- return &lov->ld_cl.cd_lu_dev;
-}
-
-static inline struct lov_device *lu2lov_dev(const struct lu_device *d)
-{
- LINVRNT(d->ld_type == &lov_device_type);
- return container_of(d, struct lov_device, ld_cl.cd_lu_dev);
-}
-
-static inline struct cl_device *lovsub2cl_dev(struct lovsub_device *lovsub)
-{
- return &lovsub->acid_cl;
-}
-
-static inline struct lu_device *lovsub2lu_dev(struct lovsub_device *lovsub)
-{
- return &lovsub2cl_dev(lovsub)->cd_lu_dev;
-}
-
-static inline struct lovsub_device *lu2lovsub_dev(const struct lu_device *d)
-{
- LINVRNT(d->ld_type == &lovsub_device_type);
- return container_of(d, struct lovsub_device, acid_cl.cd_lu_dev);
-}
-
-static inline struct lovsub_device *cl2lovsub_dev(const struct cl_device *d)
-{
- LINVRNT(d->cd_lu_dev.ld_type == &lovsub_device_type);
- return container_of(d, struct lovsub_device, acid_cl);
-}
-
-static inline struct lu_object *lov2lu(struct lov_object *lov)
-{
- return &lov->lo_cl.co_lu;
-}
-
-static inline struct cl_object *lov2cl(struct lov_object *lov)
-{
- return &lov->lo_cl;
-}
-
-static inline struct lov_object *lu2lov(const struct lu_object *obj)
-{
- LINVRNT(lov_is_object(obj));
- return container_of(obj, struct lov_object, lo_cl.co_lu);
-}
-
-static inline struct lov_object *cl2lov(const struct cl_object *obj)
-{
- LINVRNT(lov_is_object(&obj->co_lu));
- return container_of(obj, struct lov_object, lo_cl);
-}
-
-static inline struct lu_object *lovsub2lu(struct lovsub_object *los)
-{
- return &los->lso_cl.co_lu;
-}
-
-static inline struct cl_object *lovsub2cl(struct lovsub_object *los)
-{
- return &los->lso_cl;
-}
-
-static inline struct lovsub_object *cl2lovsub(const struct cl_object *obj)
-{
- LINVRNT(lovsub_is_object(&obj->co_lu));
- return container_of(obj, struct lovsub_object, lso_cl);
-}
-
-static inline struct lovsub_object *lu2lovsub(const struct lu_object *obj)
-{
- LINVRNT(lovsub_is_object(obj));
- return container_of(obj, struct lovsub_object, lso_cl.co_lu);
-}
-
-static inline struct lovsub_lock *
-cl2lovsub_lock(const struct cl_lock_slice *slice)
-{
- LINVRNT(lovsub_is_object(&slice->cls_obj->co_lu));
- return container_of(slice, struct lovsub_lock, lss_cl);
-}
-
-static inline struct lovsub_lock *cl2sub_lock(const struct cl_lock *lock)
-{
- const struct cl_lock_slice *slice;
-
- slice = cl_lock_at(lock, &lovsub_device_type);
- LASSERT(slice);
- return cl2lovsub_lock(slice);
-}
-
-static inline struct lov_lock *cl2lov_lock(const struct cl_lock_slice *slice)
-{
- LINVRNT(lov_is_object(&slice->cls_obj->co_lu));
- return container_of(slice, struct lov_lock, lls_cl);
-}
-
-static inline struct lov_page *cl2lov_page(const struct cl_page_slice *slice)
-{
- LINVRNT(lov_is_object(&slice->cpl_obj->co_lu));
- return container_of(slice, struct lov_page, lps_cl);
-}
-
-static inline struct lovsub_page *
-cl2lovsub_page(const struct cl_page_slice *slice)
-{
- LINVRNT(lovsub_is_object(&slice->cpl_obj->co_lu));
- return container_of(slice, struct lovsub_page, lsb_cl);
-}
-
-static inline struct lov_io *cl2lov_io(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct lov_io *lio;
-
- lio = container_of(ios, struct lov_io, lis_cl);
- LASSERT(lio == lov_env_io(env));
- return lio;
-}
-
-static inline int lov_targets_nr(const struct lov_device *lov)
-{
- return lov->ld_lov->desc.ld_tgt_count;
-}
-
-static inline struct lov_thread_info *lov_env_info(const struct lu_env *env)
-{
- struct lov_thread_info *info;
-
- info = lu_context_key_get(&env->le_ctx, &lov_key);
- LASSERT(info);
- return info;
-}
-
-static inline struct lov_layout_raid0 *lov_r0(struct lov_object *lov)
-{
- LASSERT(lov->lo_type == LLT_RAID0);
- LASSERT(lov->lo_lsm->lsm_magic == LOV_MAGIC ||
- lov->lo_lsm->lsm_magic == LOV_MAGIC_V3);
- return &lov->u.raid0;
-}
-
-/* lov_pack.c */
-int lov_getstripe(struct lov_object *obj, struct lov_stripe_md *lsm,
- struct lov_user_md __user *lump);
-
-/** @} lov */
-
-#endif
diff --git a/drivers/staging/lustre/lustre/lov/lov_dev.c b/drivers/staging/lustre/lustre/lov/lov_dev.c
deleted file mode 100644
index c7db23472346..000000000000
--- a/drivers/staging/lustre/lustre/lov/lov_dev.c
+++ /dev/null
@@ -1,384 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_device and cl_device_type for LOV layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LOV
-
-/* class_name2obd() */
-#include <obd_class.h>
-
-#include "lov_cl_internal.h"
-#include "lov_internal.h"
-
-struct kmem_cache *lov_lock_kmem;
-struct kmem_cache *lov_object_kmem;
-struct kmem_cache *lov_thread_kmem;
-struct kmem_cache *lov_session_kmem;
-
-struct kmem_cache *lovsub_lock_kmem;
-struct kmem_cache *lovsub_object_kmem;
-
-struct lu_kmem_descr lov_caches[] = {
- {
- .ckd_cache = &lov_lock_kmem,
- .ckd_name = "lov_lock_kmem",
- .ckd_size = sizeof(struct lov_lock)
- },
- {
- .ckd_cache = &lov_object_kmem,
- .ckd_name = "lov_object_kmem",
- .ckd_size = sizeof(struct lov_object)
- },
- {
- .ckd_cache = &lov_thread_kmem,
- .ckd_name = "lov_thread_kmem",
- .ckd_size = sizeof(struct lov_thread_info)
- },
- {
- .ckd_cache = &lov_session_kmem,
- .ckd_name = "lov_session_kmem",
- .ckd_size = sizeof(struct lov_session)
- },
- {
- .ckd_cache = &lovsub_lock_kmem,
- .ckd_name = "lovsub_lock_kmem",
- .ckd_size = sizeof(struct lovsub_lock)
- },
- {
- .ckd_cache = &lovsub_object_kmem,
- .ckd_name = "lovsub_object_kmem",
- .ckd_size = sizeof(struct lovsub_object)
- },
- {
- .ckd_cache = NULL
- }
-};
-
-/*****************************************************************************
- *
- * Lov device and device type functions.
- *
- */
-
-static void *lov_key_init(const struct lu_context *ctx,
- struct lu_context_key *key)
-{
- struct lov_thread_info *info;
-
- info = kmem_cache_zalloc(lov_thread_kmem, GFP_NOFS);
- if (!info)
- info = ERR_PTR(-ENOMEM);
- return info;
-}
-
-static void lov_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
-{
- struct lov_thread_info *info = data;
-
- kmem_cache_free(lov_thread_kmem, info);
-}
-
-struct lu_context_key lov_key = {
- .lct_tags = LCT_CL_THREAD,
- .lct_init = lov_key_init,
- .lct_fini = lov_key_fini
-};
-
-static void *lov_session_key_init(const struct lu_context *ctx,
- struct lu_context_key *key)
-{
- struct lov_session *info;
-
- info = kmem_cache_zalloc(lov_session_kmem, GFP_NOFS);
- if (!info)
- info = ERR_PTR(-ENOMEM);
- return info;
-}
-
-static void lov_session_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
-{
- struct lov_session *info = data;
-
- kmem_cache_free(lov_session_kmem, info);
-}
-
-struct lu_context_key lov_session_key = {
- .lct_tags = LCT_SESSION,
- .lct_init = lov_session_key_init,
- .lct_fini = lov_session_key_fini
-};
-
-/* type constructor/destructor: lov_type_{init,fini,start,stop}() */
-LU_TYPE_INIT_FINI(lov, &lov_key, &lov_session_key);
-
-static struct lu_device *lov_device_fini(const struct lu_env *env,
- struct lu_device *d)
-{
- int i;
- struct lov_device *ld = lu2lov_dev(d);
-
- LASSERT(ld->ld_lov);
- if (!ld->ld_target)
- return NULL;
-
- lov_foreach_target(ld, i) {
- struct lovsub_device *lsd;
-
- lsd = ld->ld_target[i];
- if (lsd) {
- cl_stack_fini(env, lovsub2cl_dev(lsd));
- ld->ld_target[i] = NULL;
- }
- }
- return NULL;
-}
-
-static int lov_device_init(const struct lu_env *env, struct lu_device *d,
- const char *name, struct lu_device *next)
-{
- struct lov_device *ld = lu2lov_dev(d);
- int i;
- int rc = 0;
-
- LASSERT(d->ld_site);
- if (!ld->ld_target)
- return rc;
-
- lov_foreach_target(ld, i) {
- struct lovsub_device *lsd;
- struct cl_device *cl;
- struct lov_tgt_desc *desc;
-
- desc = ld->ld_lov->lov_tgts[i];
- if (!desc)
- continue;
-
- cl = cl_type_setup(env, d->ld_site, &lovsub_device_type,
- desc->ltd_obd->obd_lu_dev);
- if (IS_ERR(cl)) {
- rc = PTR_ERR(cl);
- break;
- }
- lsd = cl2lovsub_dev(cl);
- ld->ld_target[i] = lsd;
- }
-
- if (rc)
- lov_device_fini(env, d);
- else
- ld->ld_flags |= LOV_DEV_INITIALIZED;
-
- return rc;
-}
-
-static struct lu_device *lov_device_free(const struct lu_env *env,
- struct lu_device *d)
-{
- struct lov_device *ld = lu2lov_dev(d);
-
- cl_device_fini(lu2cl_dev(d));
- kfree(ld->ld_target);
- kfree(ld);
- return NULL;
-}
-
-static void lov_cl_del_target(const struct lu_env *env, struct lu_device *dev,
- __u32 index)
-{
- struct lov_device *ld = lu2lov_dev(dev);
-
- if (ld->ld_target[index]) {
- cl_stack_fini(env, lovsub2cl_dev(ld->ld_target[index]));
- ld->ld_target[index] = NULL;
- }
-}
-
-static int lov_expand_targets(const struct lu_env *env, struct lov_device *dev)
-{
- int result;
- __u32 tgt_size;
- __u32 sub_size;
-
- result = 0;
- tgt_size = dev->ld_lov->lov_tgt_size;
- sub_size = dev->ld_target_nr;
- if (sub_size < tgt_size) {
- struct lovsub_device **newd;
- const size_t sz = sizeof(newd[0]);
-
- newd = kcalloc(tgt_size, sz, GFP_NOFS);
- if (newd) {
- if (sub_size > 0) {
- memcpy(newd, dev->ld_target, sub_size * sz);
- kfree(dev->ld_target);
- }
- dev->ld_target = newd;
- dev->ld_target_nr = tgt_size;
- } else {
- result = -ENOMEM;
- }
- }
- return result;
-}
-
-static int lov_cl_add_target(const struct lu_env *env, struct lu_device *dev,
- __u32 index)
-{
- struct obd_device *obd = dev->ld_obd;
- struct lov_device *ld = lu2lov_dev(dev);
- struct lov_tgt_desc *tgt;
- struct lovsub_device *lsd;
- struct cl_device *cl;
- int rc;
-
- obd_getref(obd);
-
- tgt = obd->u.lov.lov_tgts[index];
-
- if (!tgt->ltd_obd->obd_set_up) {
- CERROR("Target %s not set up\n", obd_uuid2str(&tgt->ltd_uuid));
- return -EINVAL;
- }
-
- rc = lov_expand_targets(env, ld);
- if (rc == 0 && ld->ld_flags & LOV_DEV_INITIALIZED) {
- LASSERT(dev->ld_site);
-
- cl = cl_type_setup(env, dev->ld_site, &lovsub_device_type,
- tgt->ltd_obd->obd_lu_dev);
- if (!IS_ERR(cl)) {
- lsd = cl2lovsub_dev(cl);
- ld->ld_target[index] = lsd;
- } else {
- CERROR("add failed (%d), deleting %s\n", rc,
- obd_uuid2str(&tgt->ltd_uuid));
- lov_cl_del_target(env, dev, index);
- rc = PTR_ERR(cl);
- }
- }
- obd_putref(obd);
- return rc;
-}
-
-static int lov_process_config(const struct lu_env *env,
- struct lu_device *d, struct lustre_cfg *cfg)
-{
- struct obd_device *obd = d->ld_obd;
- int cmd;
- int rc;
- int gen;
- __u32 index;
-
- obd_getref(obd);
-
- cmd = cfg->lcfg_command;
- rc = lov_process_config_base(d->ld_obd, cfg, &index, &gen);
- if (rc == 0) {
- switch (cmd) {
- case LCFG_LOV_ADD_OBD:
- case LCFG_LOV_ADD_INA:
- rc = lov_cl_add_target(env, d, index);
- if (rc != 0)
- lov_del_target(d->ld_obd, index, NULL, 0);
- break;
- case LCFG_LOV_DEL_OBD:
- lov_cl_del_target(env, d, index);
- break;
- }
- }
- obd_putref(obd);
- return rc;
-}
-
-static const struct lu_device_operations lov_lu_ops = {
- .ldo_object_alloc = lov_object_alloc,
- .ldo_process_config = lov_process_config,
-};
-
-static struct lu_device *lov_device_alloc(const struct lu_env *env,
- struct lu_device_type *t,
- struct lustre_cfg *cfg)
-{
- struct lu_device *d;
- struct lov_device *ld;
- struct obd_device *obd;
- int rc;
-
- ld = kzalloc(sizeof(*ld), GFP_NOFS);
- if (!ld)
- return ERR_PTR(-ENOMEM);
-
- cl_device_init(&ld->ld_cl, t);
- d = lov2lu_dev(ld);
- d->ld_ops = &lov_lu_ops;
-
- /* setup the LOV OBD */
- obd = class_name2obd(lustre_cfg_string(cfg, 0));
- LASSERT(obd);
- rc = lov_setup(obd, cfg);
- if (rc) {
- lov_device_free(env, d);
- return ERR_PTR(rc);
- }
-
- ld->ld_lov = &obd->u.lov;
- return d;
-}
-
-static const struct lu_device_type_operations lov_device_type_ops = {
- .ldto_init = lov_type_init,
- .ldto_fini = lov_type_fini,
-
- .ldto_start = lov_type_start,
- .ldto_stop = lov_type_stop,
-
- .ldto_device_alloc = lov_device_alloc,
- .ldto_device_free = lov_device_free,
-
- .ldto_device_init = lov_device_init,
- .ldto_device_fini = lov_device_fini
-};
-
-struct lu_device_type lov_device_type = {
- .ldt_tags = LU_DEVICE_CL,
- .ldt_name = LUSTRE_LOV_NAME,
- .ldt_ops = &lov_device_type_ops,
- .ldt_ctx_tags = LCT_CL_THREAD
-};
-
-/** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lov_ea.c b/drivers/staging/lustre/lustre/lov/lov_ea.c
deleted file mode 100644
index c80320ab0858..000000000000
--- a/drivers/staging/lustre/lustre/lov/lov_ea.c
+++ /dev/null
@@ -1,331 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/lov/lov_ea.c
- *
- * Author: Wang Di <wangdi@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LOV
-
-#include <asm/div64.h>
-
-#include <obd_class.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-
-#include "lov_internal.h"
-
-static int lsm_lmm_verify_common(struct lov_mds_md *lmm, int lmm_bytes,
- __u16 stripe_count)
-{
- if (stripe_count > LOV_V1_INSANE_STRIPE_COUNT) {
- CERROR("bad stripe count %d\n", stripe_count);
- lov_dump_lmm_common(D_WARNING, lmm);
- return -EINVAL;
- }
-
- if (lmm_oi_id(&lmm->lmm_oi) == 0) {
- CERROR("zero object id\n");
- lov_dump_lmm_common(D_WARNING, lmm);
- return -EINVAL;
- }
-
- if (lov_pattern(le32_to_cpu(lmm->lmm_pattern)) != LOV_PATTERN_RAID0) {
- CERROR("bad striping pattern\n");
- lov_dump_lmm_common(D_WARNING, lmm);
- return -EINVAL;
- }
-
- if (lmm->lmm_stripe_size == 0 ||
- (le32_to_cpu(lmm->lmm_stripe_size) &
- (LOV_MIN_STRIPE_SIZE - 1)) != 0) {
- CERROR("bad stripe size %u\n",
- le32_to_cpu(lmm->lmm_stripe_size));
- lov_dump_lmm_common(D_WARNING, lmm);
- return -EINVAL;
- }
- return 0;
-}
-
-struct lov_stripe_md *lsm_alloc_plain(u16 stripe_count)
-{
- size_t oinfo_ptrs_size, lsm_size;
- struct lov_stripe_md *lsm;
- struct lov_oinfo *loi;
- int i;
-
- LASSERT(stripe_count <= LOV_MAX_STRIPE_COUNT);
-
- oinfo_ptrs_size = sizeof(struct lov_oinfo *) * stripe_count;
- lsm_size = sizeof(*lsm) + oinfo_ptrs_size;
-
- lsm = kvzalloc(lsm_size, GFP_NOFS);
- if (!lsm)
- return NULL;
-
- for (i = 0; i < stripe_count; i++) {
- loi = kmem_cache_zalloc(lov_oinfo_slab, GFP_NOFS);
- if (!loi)
- goto err;
- lsm->lsm_oinfo[i] = loi;
- }
- lsm->lsm_stripe_count = stripe_count;
- return lsm;
-
-err:
- while (--i >= 0)
- kmem_cache_free(lov_oinfo_slab, lsm->lsm_oinfo[i]);
- kvfree(lsm);
- return NULL;
-}
-
-void lsm_free_plain(struct lov_stripe_md *lsm)
-{
- __u16 stripe_count = lsm->lsm_stripe_count;
- int i;
-
- for (i = 0; i < stripe_count; i++)
- kmem_cache_free(lov_oinfo_slab, lsm->lsm_oinfo[i]);
- kvfree(lsm);
-}
-
-/*
- * Find minimum stripe maxbytes value. For inactive or
- * reconnecting targets use LUSTRE_EXT3_STRIPE_MAXBYTES.
- */
-static loff_t lov_tgt_maxbytes(struct lov_tgt_desc *tgt)
-{
- loff_t maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;
- struct obd_import *imp;
-
- if (!tgt->ltd_active)
- return maxbytes;
-
- imp = tgt->ltd_obd->u.cli.cl_import;
- if (!imp)
- return maxbytes;
-
- spin_lock(&imp->imp_lock);
- if (imp->imp_state == LUSTRE_IMP_FULL &&
- (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_MAXBYTES) &&
- imp->imp_connect_data.ocd_maxbytes > 0)
- maxbytes = imp->imp_connect_data.ocd_maxbytes;
-
- spin_unlock(&imp->imp_lock);
-
- return maxbytes;
-}
-
-static int lsm_unpackmd_common(struct lov_obd *lov,
- struct lov_stripe_md *lsm,
- struct lov_mds_md *lmm,
- struct lov_ost_data_v1 *objects)
-{
- loff_t min_stripe_maxbytes = 0;
- unsigned int stripe_count;
- struct lov_oinfo *loi;
- loff_t lov_bytes;
- unsigned int i;
-
- /*
- * This supposes lov_mds_md_v1/v3 first fields are
- * are the same
- */
- lmm_oi_le_to_cpu(&lsm->lsm_oi, &lmm->lmm_oi);
- lsm->lsm_stripe_size = le32_to_cpu(lmm->lmm_stripe_size);
- lsm->lsm_pattern = le32_to_cpu(lmm->lmm_pattern);
- lsm->lsm_layout_gen = le16_to_cpu(lmm->lmm_layout_gen);
- lsm->lsm_pool_name[0] = '\0';
-
- stripe_count = lsm_is_released(lsm) ? 0 : lsm->lsm_stripe_count;
-
- for (i = 0; i < stripe_count; i++) {
- loi = lsm->lsm_oinfo[i];
- ostid_le_to_cpu(&objects[i].l_ost_oi, &loi->loi_oi);
- loi->loi_ost_idx = le32_to_cpu(objects[i].l_ost_idx);
- loi->loi_ost_gen = le32_to_cpu(objects[i].l_ost_gen);
- if (lov_oinfo_is_dummy(loi))
- continue;
-
- if (loi->loi_ost_idx >= lov->desc.ld_tgt_count &&
- !lov2obd(lov)->obd_process_conf) {
- CERROR("%s: OST index %d more than OST count %d\n",
- (char *)lov->desc.ld_uuid.uuid,
- loi->loi_ost_idx, lov->desc.ld_tgt_count);
- lov_dump_lmm_v1(D_WARNING, lmm);
- return -EINVAL;
- }
-
- if (!lov->lov_tgts[loi->loi_ost_idx]) {
- CERROR("%s: OST index %d missing\n",
- (char *)lov->desc.ld_uuid.uuid,
- loi->loi_ost_idx);
- lov_dump_lmm_v1(D_WARNING, lmm);
- continue;
- }
-
- lov_bytes = lov_tgt_maxbytes(lov->lov_tgts[loi->loi_ost_idx]);
- if (min_stripe_maxbytes == 0 || lov_bytes < min_stripe_maxbytes)
- min_stripe_maxbytes = lov_bytes;
- }
-
- if (min_stripe_maxbytes == 0)
- min_stripe_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;
-
- stripe_count = lsm->lsm_stripe_count ?: lov->desc.ld_tgt_count;
- lov_bytes = min_stripe_maxbytes * stripe_count;
-
- if (lov_bytes < min_stripe_maxbytes) /* handle overflow */
- lsm->lsm_maxbytes = MAX_LFS_FILESIZE;
- else
- lsm->lsm_maxbytes = lov_bytes;
-
- return 0;
-}
-
-static void
-lsm_stripe_by_index_plain(struct lov_stripe_md *lsm, int *stripeno,
- loff_t *lov_off, loff_t *swidth)
-{
- if (swidth)
- *swidth = (u64)lsm->lsm_stripe_size * lsm->lsm_stripe_count;
-}
-
-static void
-lsm_stripe_by_offset_plain(struct lov_stripe_md *lsm, int *stripeno,
- loff_t *lov_off, loff_t *swidth)
-{
- if (swidth)
- *swidth = (u64)lsm->lsm_stripe_size * lsm->lsm_stripe_count;
-}
-
-static int lsm_lmm_verify_v1(struct lov_mds_md_v1 *lmm, int lmm_bytes,
- __u16 *stripe_count)
-{
- if (lmm_bytes < sizeof(*lmm)) {
- CERROR("lov_mds_md_v1 too small: %d, need at least %d\n",
- lmm_bytes, (int)sizeof(*lmm));
- return -EINVAL;
- }
-
- *stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
- if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
- *stripe_count = 0;
-
- if (lmm_bytes < lov_mds_md_size(*stripe_count, LOV_MAGIC_V1)) {
- CERROR("LOV EA V1 too small: %d, need %d\n",
- lmm_bytes, lov_mds_md_size(*stripe_count, LOV_MAGIC_V1));
- lov_dump_lmm_common(D_WARNING, lmm);
- return -EINVAL;
- }
-
- return lsm_lmm_verify_common(lmm, lmm_bytes, *stripe_count);
-}
-
-static int lsm_unpackmd_v1(struct lov_obd *lov, struct lov_stripe_md *lsm,
- struct lov_mds_md_v1 *lmm)
-{
- return lsm_unpackmd_common(lov, lsm, lmm, lmm->lmm_objects);
-}
-
-const struct lsm_operations lsm_v1_ops = {
- .lsm_free = lsm_free_plain,
- .lsm_stripe_by_index = lsm_stripe_by_index_plain,
- .lsm_stripe_by_offset = lsm_stripe_by_offset_plain,
- .lsm_lmm_verify = lsm_lmm_verify_v1,
- .lsm_unpackmd = lsm_unpackmd_v1,
-};
-
-static int lsm_lmm_verify_v3(struct lov_mds_md *lmmv1, int lmm_bytes,
- __u16 *stripe_count)
-{
- struct lov_mds_md_v3 *lmm;
-
- lmm = (struct lov_mds_md_v3 *)lmmv1;
-
- if (lmm_bytes < sizeof(*lmm)) {
- CERROR("lov_mds_md_v3 too small: %d, need at least %d\n",
- lmm_bytes, (int)sizeof(*lmm));
- return -EINVAL;
- }
-
- *stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
- if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
- *stripe_count = 0;
-
- if (lmm_bytes < lov_mds_md_size(*stripe_count, LOV_MAGIC_V3)) {
- CERROR("LOV EA V3 too small: %d, need %d\n",
- lmm_bytes, lov_mds_md_size(*stripe_count, LOV_MAGIC_V3));
- lov_dump_lmm_common(D_WARNING, lmm);
- return -EINVAL;
- }
-
- return lsm_lmm_verify_common((struct lov_mds_md_v1 *)lmm, lmm_bytes,
- *stripe_count);
-}
-
-static int lsm_unpackmd_v3(struct lov_obd *lov, struct lov_stripe_md *lsm,
- struct lov_mds_md *lmm)
-{
- struct lov_mds_md_v3 *lmm_v3 = (struct lov_mds_md_v3 *)lmm;
- size_t cplen = 0;
- int rc;
-
- rc = lsm_unpackmd_common(lov, lsm, lmm, lmm_v3->lmm_objects);
- if (rc)
- return rc;
-
- cplen = strlcpy(lsm->lsm_pool_name, lmm_v3->lmm_pool_name,
- sizeof(lsm->lsm_pool_name));
- if (cplen >= sizeof(lsm->lsm_pool_name))
- return -E2BIG;
-
- return 0;
-}
-
-const struct lsm_operations lsm_v3_ops = {
- .lsm_free = lsm_free_plain,
- .lsm_stripe_by_index = lsm_stripe_by_index_plain,
- .lsm_stripe_by_offset = lsm_stripe_by_offset_plain,
- .lsm_lmm_verify = lsm_lmm_verify_v3,
- .lsm_unpackmd = lsm_unpackmd_v3,
-};
-
-void dump_lsm(unsigned int level, const struct lov_stripe_md *lsm)
-{
- CDEBUG(level, "lsm %p, objid " DOSTID ", maxbytes %#llx, magic 0x%08X, stripe_size %u, stripe_count %u, refc: %d, layout_gen %u, pool [" LOV_POOLNAMEF "]\n",
- lsm,
- POSTID(&lsm->lsm_oi), lsm->lsm_maxbytes, lsm->lsm_magic,
- lsm->lsm_stripe_size, lsm->lsm_stripe_count,
- atomic_read(&lsm->lsm_refc), lsm->lsm_layout_gen,
- lsm->lsm_pool_name);
-}
diff --git a/drivers/staging/lustre/lustre/lov/lov_internal.h b/drivers/staging/lustre/lustre/lov/lov_internal.h
deleted file mode 100644
index 47042f27ca90..000000000000
--- a/drivers/staging/lustre/lustre/lov/lov_internal.h
+++ /dev/null
@@ -1,286 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef LOV_INTERNAL_H
-#define LOV_INTERNAL_H
-
-#include <obd_class.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-
-/*
- * If we are unable to get the maximum object size from the OST in
- * ocd_maxbytes using OBD_CONNECT_MAXBYTES, then we fall back to using
- * the old maximum object size from ext3.
- */
-#define LUSTRE_EXT3_STRIPE_MAXBYTES 0x1fffffff000ULL
-
-struct lov_stripe_md {
- atomic_t lsm_refc;
- spinlock_t lsm_lock;
- pid_t lsm_lock_owner; /* debugging */
-
- /*
- * maximum possible file size, might change as OSTs status changes,
- * e.g. disconnected, deactivated
- */
- loff_t lsm_maxbytes;
- struct ost_id lsm_oi;
- u32 lsm_magic;
- u32 lsm_stripe_size;
- u32 lsm_pattern; /* RAID0, RAID1, released, ... */
- u16 lsm_stripe_count;
- u16 lsm_layout_gen;
- char lsm_pool_name[LOV_MAXPOOLNAME + 1];
- struct lov_oinfo *lsm_oinfo[0];
-};
-
-static inline bool lsm_is_released(struct lov_stripe_md *lsm)
-{
- return !!(lsm->lsm_pattern & LOV_PATTERN_F_RELEASED);
-}
-
-static inline bool lsm_has_objects(struct lov_stripe_md *lsm)
-{
- if (!lsm)
- return false;
-
- if (lsm_is_released(lsm))
- return false;
-
- return true;
-}
-
-struct lsm_operations {
- void (*lsm_free)(struct lov_stripe_md *);
- void (*lsm_stripe_by_index)(struct lov_stripe_md *, int *, loff_t *,
- loff_t *);
- void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, loff_t *,
- loff_t *);
- int (*lsm_lmm_verify)(struct lov_mds_md *lmm, int lmm_bytes,
- u16 *stripe_count);
- int (*lsm_unpackmd)(struct lov_obd *lov, struct lov_stripe_md *lsm,
- struct lov_mds_md *lmm);
-};
-
-extern const struct lsm_operations lsm_v1_ops;
-extern const struct lsm_operations lsm_v3_ops;
-
-static inline const struct lsm_operations *lsm_op_find(int magic)
-{
- switch (magic) {
- case LOV_MAGIC_V1:
- return &lsm_v1_ops;
- case LOV_MAGIC_V3:
- return &lsm_v3_ops;
- default:
- CERROR("unrecognized lsm_magic %08x\n", magic);
- return NULL;
- }
-}
-
-/* lov_do_div64(a, b) returns a % b, and a = a / b.
- * The 32-bit code is LOV-specific due to knowing about stripe limits in
- * order to reduce the divisor to a 32-bit number. If the divisor is
- * already a 32-bit value the compiler handles this directly.
- */
-#if BITS_PER_LONG == 64
-# define lov_do_div64(n, base) ({ \
- u64 __base = (base); \
- u64 __rem; \
- __rem = ((u64)(n)) % __base; \
- (n) = ((u64)(n)) / __base; \
- __rem; \
-})
-#elif BITS_PER_LONG == 32
-# define lov_do_div64(n, base) ({ \
- u64 __rem; \
- if ((sizeof(base) > 4) && (((base) & 0xffffffff00000000ULL) != 0)) { \
- int __remainder; \
- LASSERTF(!((base) & (LOV_MIN_STRIPE_SIZE - 1)), "64 bit lov " \
- "division %llu / %llu\n", (n), (u64)(base)); \
- __remainder = (n) & (LOV_MIN_STRIPE_SIZE - 1); \
- (n) >>= LOV_MIN_STRIPE_BITS; \
- __rem = do_div(n, (base) >> LOV_MIN_STRIPE_BITS); \
- __rem <<= LOV_MIN_STRIPE_BITS; \
- __rem += __remainder; \
- } else { \
- __rem = do_div(n, base); \
- } \
- __rem; \
-})
-#endif
-
-#define pool_tgt_size(p) ((p)->pool_obds.op_size)
-#define pool_tgt_count(p) ((p)->pool_obds.op_count)
-#define pool_tgt_array(p) ((p)->pool_obds.op_array)
-#define pool_tgt_rw_sem(p) ((p)->pool_obds.op_rw_sem)
-
-struct pool_desc {
- char pool_name[LOV_MAXPOOLNAME + 1];
- struct ost_pool pool_obds;
- atomic_t pool_refcount;
- struct rhash_head pool_hash; /* access by poolname */
- union {
- struct list_head pool_list; /* serial access */
- struct rcu_head rcu; /* delayed free */
- };
- struct dentry *pool_debugfs_entry; /* file in debugfs */
- struct obd_device *pool_lobd; /* owner */
-};
-int lov_pool_hash_init(struct rhashtable *tbl);
-void lov_pool_hash_destroy(struct rhashtable *tbl);
-
-struct lov_request {
- struct obd_info rq_oi;
- struct lov_request_set *rq_rqset;
-
- struct list_head rq_link;
-
- int rq_idx; /* index in lov->tgts array */
-};
-
-struct lov_request_set {
- struct obd_info *set_oi;
- struct obd_device *set_obd;
- int set_count;
- atomic_t set_completes;
- atomic_t set_success;
- struct list_head set_list;
-};
-
-extern struct kmem_cache *lov_oinfo_slab;
-
-extern struct lu_kmem_descr lov_caches[];
-
-#define lov_uuid2str(lv, index) \
- (char *)((lv)->lov_tgts[index]->ltd_uuid.uuid)
-
-/* lov_merge.c */
-int lov_merge_lvb_kms(struct lov_stripe_md *lsm,
- struct ost_lvb *lvb, __u64 *kms_place);
-
-/* lov_offset.c */
-u64 lov_stripe_size(struct lov_stripe_md *lsm, u64 ost_size, int stripeno);
-int lov_stripe_offset(struct lov_stripe_md *lsm, u64 lov_off,
- int stripeno, u64 *u64);
-u64 lov_size_to_stripe(struct lov_stripe_md *lsm, u64 file_size, int stripeno);
-int lov_stripe_intersects(struct lov_stripe_md *lsm, int stripeno,
- u64 start, u64 end,
- u64 *obd_start, u64 *obd_end);
-int lov_stripe_number(struct lov_stripe_md *lsm, u64 lov_off);
-pgoff_t lov_stripe_pgoff(struct lov_stripe_md *lsm, pgoff_t stripe_index,
- int stripe);
-
-/* lov_request.c */
-int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo,
- struct lov_request_set **reqset);
-int lov_fini_statfs_set(struct lov_request_set *set);
-
-/* lov_obd.c */
-void lov_stripe_lock(struct lov_stripe_md *md);
-void lov_stripe_unlock(struct lov_stripe_md *md);
-void lov_fix_desc(struct lov_desc *desc);
-void lov_fix_desc_stripe_size(__u64 *val);
-void lov_fix_desc_stripe_count(__u32 *val);
-void lov_fix_desc_pattern(__u32 *val);
-void lov_fix_desc_qos_maxage(__u32 *val);
-__u16 lov_get_stripecnt(struct lov_obd *lov, __u32 magic, __u16 stripe_count);
-int lov_connect_obd(struct obd_device *obd, __u32 index, int activate,
- struct obd_connect_data *data);
-int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg);
-int lov_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg,
- __u32 *indexp, int *genp);
-int lov_del_target(struct obd_device *obd, __u32 index,
- struct obd_uuid *uuidp, int gen);
-
-/* lov_pack.c */
-ssize_t lov_lsm_pack(const struct lov_stripe_md *lsm, void *buf,
- size_t buf_size);
-struct lov_stripe_md *lov_unpackmd(struct lov_obd *lov, struct lov_mds_md *lmm,
- size_t lmm_size);
-int lov_free_memmd(struct lov_stripe_md **lsmp);
-
-void lov_dump_lmm_v1(int level, struct lov_mds_md_v1 *lmm);
-void lov_dump_lmm_v3(int level, struct lov_mds_md_v3 *lmm);
-void lov_dump_lmm_common(int level, void *lmmp);
-
-/* lov_ea.c */
-struct lov_stripe_md *lsm_alloc_plain(u16 stripe_count);
-void lsm_free_plain(struct lov_stripe_md *lsm);
-void dump_lsm(unsigned int level, const struct lov_stripe_md *lsm);
-
-/* lproc_lov.c */
-extern const struct file_operations lov_proc_target_fops;
-void lprocfs_lov_init_vars(struct lprocfs_static_vars *lvars);
-
-/* lov_cl.c */
-extern struct lu_device_type lov_device_type;
-
-/* ost_pool methods */
-int lov_ost_pool_init(struct ost_pool *op, unsigned int count);
-int lov_ost_pool_extend(struct ost_pool *op, unsigned int min_count);
-int lov_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int min_count);
-int lov_ost_pool_remove(struct ost_pool *op, __u32 idx);
-int lov_ost_pool_free(struct ost_pool *op);
-
-/* high level pool methods */
-int lov_pool_new(struct obd_device *obd, char *poolname);
-int lov_pool_del(struct obd_device *obd, char *poolname);
-int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname);
-int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname);
-void lov_pool_putref(struct pool_desc *pool);
-
-static inline struct lov_stripe_md *lsm_addref(struct lov_stripe_md *lsm)
-{
- LASSERT(atomic_read(&lsm->lsm_refc) > 0);
- atomic_inc(&lsm->lsm_refc);
- return lsm;
-}
-
-static inline bool lov_oinfo_is_dummy(const struct lov_oinfo *loi)
-{
- if (unlikely(loi->loi_oi.oi.oi_id == 0 &&
- loi->loi_oi.oi.oi_seq == 0 &&
- loi->loi_ost_idx == 0 &&
- loi->loi_ost_gen == 0))
- return true;
-
- return false;
-}
-
-static inline struct obd_device *lov2obd(const struct lov_obd *lov)
-{
- return container_of_safe(lov, struct obd_device, u.lov);
-}
-
-#endif
diff --git a/drivers/staging/lustre/lustre/lov/lov_io.c b/drivers/staging/lustre/lustre/lov/lov_io.c
deleted file mode 100644
index b823f8a21856..000000000000
--- a/drivers/staging/lustre/lustre/lov/lov_io.c
+++ /dev/null
@@ -1,1023 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_io for LOV layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LOV
-
-#include "lov_cl_internal.h"
-
-/** \addtogroup lov
- * @{
- */
-
-static void lov_io_sub_fini(const struct lu_env *env, struct lov_io *lio,
- struct lov_io_sub *sub)
-{
- if (sub->sub_io) {
- if (sub->sub_io_initialized) {
- cl_io_fini(sub->sub_env, sub->sub_io);
- sub->sub_io_initialized = 0;
- lio->lis_active_subios--;
- }
- if (sub->sub_stripe == lio->lis_single_subio_index)
- lio->lis_single_subio_index = -1;
- else if (!sub->sub_borrowed)
- kfree(sub->sub_io);
- sub->sub_io = NULL;
- }
- if (!IS_ERR_OR_NULL(sub->sub_env)) {
- if (!sub->sub_borrowed)
- cl_env_put(sub->sub_env, &sub->sub_refcheck);
- sub->sub_env = NULL;
- }
-}
-
-static void lov_io_sub_inherit(struct cl_io *io, struct lov_io *lio,
- int stripe, loff_t start, loff_t end)
-{
- struct lov_stripe_md *lsm = lio->lis_object->lo_lsm;
- struct cl_io *parent = lio->lis_cl.cis_io;
-
- switch (io->ci_type) {
- case CIT_SETATTR: {
- io->u.ci_setattr.sa_attr = parent->u.ci_setattr.sa_attr;
- io->u.ci_setattr.sa_attr_flags =
- parent->u.ci_setattr.sa_attr_flags;
- io->u.ci_setattr.sa_valid = parent->u.ci_setattr.sa_valid;
- io->u.ci_setattr.sa_stripe_index = stripe;
- io->u.ci_setattr.sa_parent_fid =
- parent->u.ci_setattr.sa_parent_fid;
- if (cl_io_is_trunc(io)) {
- loff_t new_size = parent->u.ci_setattr.sa_attr.lvb_size;
-
- new_size = lov_size_to_stripe(lsm, new_size, stripe);
- io->u.ci_setattr.sa_attr.lvb_size = new_size;
- }
- break;
- }
- case CIT_DATA_VERSION: {
- io->u.ci_data_version.dv_data_version = 0;
- io->u.ci_data_version.dv_flags =
- parent->u.ci_data_version.dv_flags;
- break;
- }
- case CIT_FAULT: {
- struct cl_object *obj = parent->ci_obj;
- loff_t off = cl_offset(obj, parent->u.ci_fault.ft_index);
-
- io->u.ci_fault = parent->u.ci_fault;
- off = lov_size_to_stripe(lsm, off, stripe);
- io->u.ci_fault.ft_index = cl_index(obj, off);
- break;
- }
- case CIT_FSYNC: {
- io->u.ci_fsync.fi_start = start;
- io->u.ci_fsync.fi_end = end;
- io->u.ci_fsync.fi_fid = parent->u.ci_fsync.fi_fid;
- io->u.ci_fsync.fi_mode = parent->u.ci_fsync.fi_mode;
- break;
- }
- case CIT_READ:
- case CIT_WRITE: {
- io->u.ci_wr.wr_sync = cl_io_is_sync_write(parent);
- if (cl_io_is_append(parent)) {
- io->u.ci_wr.wr_append = 1;
- } else {
- io->u.ci_rw.crw_pos = start;
- io->u.ci_rw.crw_count = end - start;
- }
- break;
- }
- default:
- break;
- }
-}
-
-static int lov_io_sub_init(const struct lu_env *env, struct lov_io *lio,
- struct lov_io_sub *sub)
-{
- struct lov_object *lov = lio->lis_object;
- struct cl_io *sub_io;
- struct cl_object *sub_obj;
- struct cl_io *io = lio->lis_cl.cis_io;
- int stripe = sub->sub_stripe;
- int rc;
-
- LASSERT(!sub->sub_io);
- LASSERT(!sub->sub_env);
- LASSERT(sub->sub_stripe < lio->lis_stripe_count);
-
- if (unlikely(!lov_r0(lov)->lo_sub[stripe]))
- return -EIO;
-
- sub->sub_io_initialized = 0;
- sub->sub_borrowed = 0;
-
- /* obtain new environment */
- sub->sub_env = cl_env_get(&sub->sub_refcheck);
- if (IS_ERR(sub->sub_env)) {
- rc = PTR_ERR(sub->sub_env);
- goto fini_lov_io;
- }
-
- /*
- * First sub-io. Use ->lis_single_subio to
- * avoid dynamic allocation.
- */
- if (lio->lis_active_subios == 0) {
- sub->sub_io = &lio->lis_single_subio;
- lio->lis_single_subio_index = stripe;
- } else {
- sub->sub_io = kzalloc(sizeof(*sub->sub_io),
- GFP_NOFS);
- if (!sub->sub_io) {
- rc = -ENOMEM;
- goto fini_lov_io;
- }
- }
-
- sub_obj = lovsub2cl(lov_r0(lov)->lo_sub[stripe]);
- sub_io = sub->sub_io;
-
- sub_io->ci_obj = sub_obj;
- sub_io->ci_result = 0;
- sub_io->ci_parent = io;
- sub_io->ci_lockreq = io->ci_lockreq;
- sub_io->ci_type = io->ci_type;
- sub_io->ci_no_srvlock = io->ci_no_srvlock;
- sub_io->ci_noatime = io->ci_noatime;
-
- rc = cl_io_sub_init(sub->sub_env, sub_io, io->ci_type, sub_obj);
- if (rc >= 0) {
- lio->lis_active_subios++;
- sub->sub_io_initialized = 1;
- rc = 0;
- }
-fini_lov_io:
- if (rc)
- lov_io_sub_fini(env, lio, sub);
- return rc;
-}
-
-struct lov_io_sub *lov_sub_get(const struct lu_env *env,
- struct lov_io *lio, int stripe)
-{
- int rc;
- struct lov_io_sub *sub = &lio->lis_subs[stripe];
-
- LASSERT(stripe < lio->lis_stripe_count);
-
- if (!sub->sub_io_initialized) {
- sub->sub_stripe = stripe;
- rc = lov_io_sub_init(env, lio, sub);
- } else {
- rc = 0;
- }
- if (rc < 0)
- sub = ERR_PTR(rc);
-
- return sub;
-}
-
-/*****************************************************************************
- *
- * Lov io operations.
- *
- */
-
-int lov_page_stripe(const struct cl_page *page)
-{
- const struct cl_page_slice *slice;
-
- slice = cl_page_at(page, &lov_device_type);
- LASSERT(slice->cpl_obj);
-
- return cl2lov_page(slice)->lps_stripe;
-}
-
-static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio,
- struct cl_io *io)
-{
- struct lov_stripe_md *lsm;
- int result;
-
- LASSERT(lio->lis_object);
- lsm = lio->lis_object->lo_lsm;
-
- /*
- * Need to be optimized, we can't afford to allocate a piece of memory
- * when writing a page. -jay
- */
- lio->lis_subs =
- kvzalloc(lsm->lsm_stripe_count *
- sizeof(lio->lis_subs[0]),
- GFP_NOFS);
- if (lio->lis_subs) {
- lio->lis_nr_subios = lio->lis_stripe_count;
- lio->lis_single_subio_index = -1;
- lio->lis_active_subios = 0;
- result = 0;
- } else {
- result = -ENOMEM;
- }
- return result;
-}
-
-static int lov_io_slice_init(struct lov_io *lio, struct lov_object *obj,
- struct cl_io *io)
-{
- io->ci_result = 0;
- lio->lis_object = obj;
-
- lio->lis_stripe_count = obj->lo_lsm->lsm_stripe_count;
-
- switch (io->ci_type) {
- case CIT_READ:
- case CIT_WRITE:
- lio->lis_pos = io->u.ci_rw.crw_pos;
- lio->lis_endpos = io->u.ci_rw.crw_pos + io->u.ci_rw.crw_count;
- lio->lis_io_endpos = lio->lis_endpos;
- if (cl_io_is_append(io)) {
- LASSERT(io->ci_type == CIT_WRITE);
-
- /*
- * If there is LOV EA hole, then we may cannot locate
- * the current file-tail exactly.
- */
- if (unlikely(obj->lo_lsm->lsm_pattern &
- LOV_PATTERN_F_HOLE))
- return -EIO;
-
- lio->lis_pos = 0;
- lio->lis_endpos = OBD_OBJECT_EOF;
- }
- break;
-
- case CIT_SETATTR:
- if (cl_io_is_trunc(io))
- lio->lis_pos = io->u.ci_setattr.sa_attr.lvb_size;
- else
- lio->lis_pos = 0;
- lio->lis_endpos = OBD_OBJECT_EOF;
- break;
-
- case CIT_DATA_VERSION:
- lio->lis_pos = 0;
- lio->lis_endpos = OBD_OBJECT_EOF;
- break;
-
- case CIT_FAULT: {
- pgoff_t index = io->u.ci_fault.ft_index;
-
- lio->lis_pos = cl_offset(io->ci_obj, index);
- lio->lis_endpos = cl_offset(io->ci_obj, index + 1);
- break;
- }
-
- case CIT_FSYNC: {
- lio->lis_pos = io->u.ci_fsync.fi_start;
- lio->lis_endpos = io->u.ci_fsync.fi_end;
- break;
- }
-
- case CIT_MISC:
- lio->lis_pos = 0;
- lio->lis_endpos = OBD_OBJECT_EOF;
- break;
-
- default:
- LBUG();
- }
- return 0;
-}
-
-static void lov_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
-{
- struct lov_io *lio = cl2lov_io(env, ios);
- struct lov_object *lov = cl2lov(ios->cis_obj);
- int i;
-
- if (lio->lis_subs) {
- for (i = 0; i < lio->lis_nr_subios; i++)
- lov_io_sub_fini(env, lio, &lio->lis_subs[i]);
- kvfree(lio->lis_subs);
- lio->lis_nr_subios = 0;
- }
-
- LASSERT(atomic_read(&lov->lo_active_ios) > 0);
- if (atomic_dec_and_test(&lov->lo_active_ios))
- wake_up_all(&lov->lo_waitq);
-}
-
-static u64 lov_offset_mod(u64 val, int delta)
-{
- if (val != OBD_OBJECT_EOF)
- val += delta;
- return val;
-}
-
-static int lov_io_iter_init(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct lov_io *lio = cl2lov_io(env, ios);
- struct lov_stripe_md *lsm = lio->lis_object->lo_lsm;
- struct lov_io_sub *sub;
- u64 endpos;
- u64 start;
- u64 end;
- int stripe;
- int rc = 0;
-
- endpos = lov_offset_mod(lio->lis_endpos, -1);
- for (stripe = 0; stripe < lio->lis_stripe_count; stripe++) {
- if (!lov_stripe_intersects(lsm, stripe, lio->lis_pos,
- endpos, &start, &end))
- continue;
-
- if (unlikely(!lov_r0(lio->lis_object)->lo_sub[stripe])) {
- if (ios->cis_io->ci_type == CIT_READ ||
- ios->cis_io->ci_type == CIT_WRITE ||
- ios->cis_io->ci_type == CIT_FAULT)
- return -EIO;
-
- continue;
- }
-
- end = lov_offset_mod(end, 1);
- sub = lov_sub_get(env, lio, stripe);
- if (IS_ERR(sub)) {
- rc = PTR_ERR(sub);
- break;
- }
-
- lov_io_sub_inherit(sub->sub_io, lio, stripe, start, end);
- rc = cl_io_iter_init(sub->sub_env, sub->sub_io);
- if (rc) {
- cl_io_iter_fini(sub->sub_env, sub->sub_io);
- break;
- }
- CDEBUG(D_VFSTRACE, "shrink: %d [%llu, %llu)\n",
- stripe, start, end);
-
- list_add_tail(&sub->sub_linkage, &lio->lis_active);
- }
- return rc;
-}
-
-static int lov_io_rw_iter_init(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct lov_io *lio = cl2lov_io(env, ios);
- struct cl_io *io = ios->cis_io;
- struct lov_stripe_md *lsm = lio->lis_object->lo_lsm;
- __u64 start = io->u.ci_rw.crw_pos;
- loff_t next;
- unsigned long ssize = lsm->lsm_stripe_size;
-
- LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
-
- /* fast path for common case. */
- if (lio->lis_nr_subios != 1 && !cl_io_is_append(io)) {
- lov_do_div64(start, ssize);
- next = (start + 1) * ssize;
- if (next <= start * ssize)
- next = ~0ull;
-
- io->ci_continue = next < lio->lis_io_endpos;
- io->u.ci_rw.crw_count = min_t(loff_t, lio->lis_io_endpos,
- next) - io->u.ci_rw.crw_pos;
- lio->lis_pos = io->u.ci_rw.crw_pos;
- lio->lis_endpos = io->u.ci_rw.crw_pos + io->u.ci_rw.crw_count;
- CDEBUG(D_VFSTRACE, "stripe: %llu chunk: [%llu, %llu) %llu\n",
- (__u64)start, lio->lis_pos, lio->lis_endpos,
- (__u64)lio->lis_io_endpos);
- }
- /*
- * XXX The following call should be optimized: we know, that
- * [lio->lis_pos, lio->lis_endpos) intersects with exactly one stripe.
- */
- return lov_io_iter_init(env, ios);
-}
-
-static int lov_io_call(const struct lu_env *env, struct lov_io *lio,
- int (*iofunc)(const struct lu_env *, struct cl_io *))
-{
- struct cl_io *parent = lio->lis_cl.cis_io;
- struct lov_io_sub *sub;
- int rc = 0;
-
- list_for_each_entry(sub, &lio->lis_active, sub_linkage) {
- rc = iofunc(sub->sub_env, sub->sub_io);
- if (rc)
- break;
-
- if (parent->ci_result == 0)
- parent->ci_result = sub->sub_io->ci_result;
- }
- return rc;
-}
-
-static int lov_io_lock(const struct lu_env *env, const struct cl_io_slice *ios)
-{
- return lov_io_call(env, cl2lov_io(env, ios), cl_io_lock);
-}
-
-static int lov_io_start(const struct lu_env *env, const struct cl_io_slice *ios)
-{
- return lov_io_call(env, cl2lov_io(env, ios), cl_io_start);
-}
-
-static int lov_io_end_wrapper(const struct lu_env *env, struct cl_io *io)
-{
- /*
- * It's possible that lov_io_start() wasn't called against this
- * sub-io, either because previous sub-io failed, or upper layer
- * completed IO.
- */
- if (io->ci_state == CIS_IO_GOING)
- cl_io_end(env, io);
- else
- io->ci_state = CIS_IO_FINISHED;
- return 0;
-}
-
-static void
-lov_io_data_version_end(const struct lu_env *env, const struct cl_io_slice *ios)
-{
- struct lov_io *lio = cl2lov_io(env, ios);
- struct cl_io *parent = lio->lis_cl.cis_io;
- struct lov_io_sub *sub;
-
- list_for_each_entry(sub, &lio->lis_active, sub_linkage) {
- lov_io_end_wrapper(sub->sub_env, sub->sub_io);
-
- parent->u.ci_data_version.dv_data_version +=
- sub->sub_io->u.ci_data_version.dv_data_version;
-
- if (!parent->ci_result)
- parent->ci_result = sub->sub_io->ci_result;
- }
-}
-
-static int lov_io_iter_fini_wrapper(const struct lu_env *env, struct cl_io *io)
-{
- cl_io_iter_fini(env, io);
- return 0;
-}
-
-static int lov_io_unlock_wrapper(const struct lu_env *env, struct cl_io *io)
-{
- cl_io_unlock(env, io);
- return 0;
-}
-
-static void lov_io_end(const struct lu_env *env, const struct cl_io_slice *ios)
-{
- int rc;
-
- rc = lov_io_call(env, cl2lov_io(env, ios), lov_io_end_wrapper);
- LASSERT(rc == 0);
-}
-
-static void lov_io_iter_fini(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct lov_io *lio = cl2lov_io(env, ios);
- int rc;
-
- rc = lov_io_call(env, lio, lov_io_iter_fini_wrapper);
- LASSERT(rc == 0);
- while (!list_empty(&lio->lis_active))
- list_del_init(lio->lis_active.next);
-}
-
-static void lov_io_unlock(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- int rc;
-
- rc = lov_io_call(env, cl2lov_io(env, ios), lov_io_unlock_wrapper);
- LASSERT(rc == 0);
-}
-
-static int lov_io_read_ahead(const struct lu_env *env,
- const struct cl_io_slice *ios,
- pgoff_t start, struct cl_read_ahead *ra)
-{
- struct lov_io *lio = cl2lov_io(env, ios);
- struct lov_object *loo = lio->lis_object;
- struct cl_object *obj = lov2cl(loo);
- struct lov_layout_raid0 *r0 = lov_r0(loo);
- unsigned int pps; /* pages per stripe */
- struct lov_io_sub *sub;
- pgoff_t ra_end;
- loff_t suboff;
- int stripe;
- int rc;
-
- stripe = lov_stripe_number(loo->lo_lsm, cl_offset(obj, start));
- if (unlikely(!r0->lo_sub[stripe]))
- return -EIO;
-
- sub = lov_sub_get(env, lio, stripe);
- if (IS_ERR(sub))
- return PTR_ERR(sub);
-
- lov_stripe_offset(loo->lo_lsm, cl_offset(obj, start), stripe, &suboff);
- rc = cl_io_read_ahead(sub->sub_env, sub->sub_io,
- cl_index(lovsub2cl(r0->lo_sub[stripe]), suboff),
- ra);
-
- CDEBUG(D_READA, DFID " cra_end = %lu, stripes = %d, rc = %d\n",
- PFID(lu_object_fid(lov2lu(loo))), ra->cra_end, r0->lo_nr, rc);
- if (rc)
- return rc;
-
- /**
- * Adjust the stripe index by layout of raid0. ra->cra_end is
- * the maximum page index covered by an underlying DLM lock.
- * This function converts cra_end from stripe level to file
- * level, and make sure it's not beyond stripe boundary.
- */
- if (r0->lo_nr == 1) /* single stripe file */
- return 0;
-
- /* cra_end is stripe level, convert it into file level */
- ra_end = ra->cra_end;
- if (ra_end != CL_PAGE_EOF)
- ra_end = lov_stripe_pgoff(loo->lo_lsm, ra_end, stripe);
-
- pps = loo->lo_lsm->lsm_stripe_size >> PAGE_SHIFT;
-
- CDEBUG(D_READA, DFID " max_index = %lu, pps = %u, stripe_size = %u, stripe no = %u, start index = %lu\n",
- PFID(lu_object_fid(lov2lu(loo))), ra_end, pps,
- loo->lo_lsm->lsm_stripe_size, stripe, start);
-
- /* never exceed the end of the stripe */
- ra->cra_end = min_t(pgoff_t, ra_end, start + pps - start % pps - 1);
- return 0;
-}
-
-/**
- * lov implementation of cl_operations::cio_submit() method. It takes a list
- * of pages in \a queue, splits it into per-stripe sub-lists, invokes
- * cl_io_submit() on underlying devices to submit sub-lists, and then splices
- * everything back.
- *
- * Major complication of this function is a need to handle memory cleansing:
- * cl_io_submit() is called to write out pages as a part of VM memory
- * reclamation, and hence it may not fail due to memory shortages (system
- * dead-locks otherwise). To deal with this, some resources (sub-lists,
- * sub-environment, etc.) are allocated per-device on "startup" (i.e., in a
- * not-memory cleansing context), and in case of memory shortage, these
- * pre-allocated resources are used by lov_io_submit() under
- * lov_device::ld_mutex mutex.
- */
-static int lov_io_submit(const struct lu_env *env,
- const struct cl_io_slice *ios,
- enum cl_req_type crt, struct cl_2queue *queue)
-{
- struct cl_page_list *qin = &queue->c2_qin;
- struct lov_io *lio = cl2lov_io(env, ios);
- struct lov_io_sub *sub;
- struct cl_page_list *plist = &lov_env_info(env)->lti_plist;
- struct cl_page *page;
- int stripe;
-
- int rc = 0;
-
- if (lio->lis_active_subios == 1) {
- int idx = lio->lis_single_subio_index;
-
- LASSERT(idx < lio->lis_nr_subios);
- sub = lov_sub_get(env, lio, idx);
- LASSERT(!IS_ERR(sub));
- LASSERT(sub->sub_io == &lio->lis_single_subio);
- rc = cl_io_submit_rw(sub->sub_env, sub->sub_io,
- crt, queue);
- return rc;
- }
-
- LASSERT(lio->lis_subs);
-
- cl_page_list_init(plist);
- while (qin->pl_nr > 0) {
- struct cl_2queue *cl2q = &lov_env_info(env)->lti_cl2q;
-
- cl_2queue_init(cl2q);
-
- page = cl_page_list_first(qin);
- cl_page_list_move(&cl2q->c2_qin, qin, page);
-
- stripe = lov_page_stripe(page);
- while (qin->pl_nr > 0) {
- page = cl_page_list_first(qin);
- if (stripe != lov_page_stripe(page))
- break;
-
- cl_page_list_move(&cl2q->c2_qin, qin, page);
- }
-
- sub = lov_sub_get(env, lio, stripe);
- if (!IS_ERR(sub)) {
- rc = cl_io_submit_rw(sub->sub_env, sub->sub_io,
- crt, cl2q);
- } else {
- rc = PTR_ERR(sub);
- }
-
- cl_page_list_splice(&cl2q->c2_qin, plist);
- cl_page_list_splice(&cl2q->c2_qout, &queue->c2_qout);
- cl_2queue_fini(env, cl2q);
-
- if (rc != 0)
- break;
- }
-
- cl_page_list_splice(plist, qin);
- cl_page_list_fini(env, plist);
-
- return rc;
-}
-
-static int lov_io_commit_async(const struct lu_env *env,
- const struct cl_io_slice *ios,
- struct cl_page_list *queue, int from, int to,
- cl_commit_cbt cb)
-{
- struct cl_page_list *plist = &lov_env_info(env)->lti_plist;
- struct lov_io *lio = cl2lov_io(env, ios);
- struct lov_io_sub *sub;
- struct cl_page *page;
- int rc = 0;
-
- if (lio->lis_active_subios == 1) {
- int idx = lio->lis_single_subio_index;
-
- LASSERT(idx < lio->lis_nr_subios);
- sub = lov_sub_get(env, lio, idx);
- LASSERT(!IS_ERR(sub));
- LASSERT(sub->sub_io == &lio->lis_single_subio);
- rc = cl_io_commit_async(sub->sub_env, sub->sub_io, queue,
- from, to, cb);
- return rc;
- }
-
- LASSERT(lio->lis_subs);
-
- cl_page_list_init(plist);
- while (queue->pl_nr > 0) {
- int stripe_to = to;
- int stripe;
-
- LASSERT(plist->pl_nr == 0);
- page = cl_page_list_first(queue);
- cl_page_list_move(plist, queue, page);
-
- stripe = lov_page_stripe(page);
- while (queue->pl_nr > 0) {
- page = cl_page_list_first(queue);
- if (stripe != lov_page_stripe(page))
- break;
-
- cl_page_list_move(plist, queue, page);
- }
-
- if (queue->pl_nr > 0) /* still has more pages */
- stripe_to = PAGE_SIZE;
-
- sub = lov_sub_get(env, lio, stripe);
- if (!IS_ERR(sub)) {
- rc = cl_io_commit_async(sub->sub_env, sub->sub_io,
- plist, from, stripe_to, cb);
- } else {
- rc = PTR_ERR(sub);
- break;
- }
-
- if (plist->pl_nr > 0) /* short write */
- break;
-
- from = 0;
- }
-
- /* for error case, add the page back into the qin list */
- LASSERT(ergo(rc == 0, plist->pl_nr == 0));
- while (plist->pl_nr > 0) {
- /* error occurred, add the uncommitted pages back into queue */
- page = cl_page_list_last(plist);
- cl_page_list_move_head(queue, plist, page);
- }
-
- return rc;
-}
-
-static int lov_io_fault_start(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_fault_io *fio;
- struct lov_io *lio;
- struct lov_io_sub *sub;
-
- fio = &ios->cis_io->u.ci_fault;
- lio = cl2lov_io(env, ios);
- sub = lov_sub_get(env, lio, lov_page_stripe(fio->ft_page));
- if (IS_ERR(sub))
- return PTR_ERR(sub);
- sub->sub_io->u.ci_fault.ft_nob = fio->ft_nob;
- return lov_io_start(env, ios);
-}
-
-static void lov_io_fsync_end(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct lov_io *lio = cl2lov_io(env, ios);
- struct lov_io_sub *sub;
- unsigned int *written = &ios->cis_io->u.ci_fsync.fi_nr_written;
-
- *written = 0;
- list_for_each_entry(sub, &lio->lis_active, sub_linkage) {
- struct cl_io *subio = sub->sub_io;
-
- lov_io_end_wrapper(sub->sub_env, subio);
-
- if (subio->ci_result == 0)
- *written += subio->u.ci_fsync.fi_nr_written;
- }
-}
-
-static const struct cl_io_operations lov_io_ops = {
- .op = {
- [CIT_READ] = {
- .cio_fini = lov_io_fini,
- .cio_iter_init = lov_io_rw_iter_init,
- .cio_iter_fini = lov_io_iter_fini,
- .cio_lock = lov_io_lock,
- .cio_unlock = lov_io_unlock,
- .cio_start = lov_io_start,
- .cio_end = lov_io_end
- },
- [CIT_WRITE] = {
- .cio_fini = lov_io_fini,
- .cio_iter_init = lov_io_rw_iter_init,
- .cio_iter_fini = lov_io_iter_fini,
- .cio_lock = lov_io_lock,
- .cio_unlock = lov_io_unlock,
- .cio_start = lov_io_start,
- .cio_end = lov_io_end
- },
- [CIT_SETATTR] = {
- .cio_fini = lov_io_fini,
- .cio_iter_init = lov_io_iter_init,
- .cio_iter_fini = lov_io_iter_fini,
- .cio_lock = lov_io_lock,
- .cio_unlock = lov_io_unlock,
- .cio_start = lov_io_start,
- .cio_end = lov_io_end
- },
- [CIT_DATA_VERSION] = {
- .cio_fini = lov_io_fini,
- .cio_iter_init = lov_io_iter_init,
- .cio_iter_fini = lov_io_iter_fini,
- .cio_lock = lov_io_lock,
- .cio_unlock = lov_io_unlock,
- .cio_start = lov_io_start,
- .cio_end = lov_io_data_version_end,
- },
- [CIT_FAULT] = {
- .cio_fini = lov_io_fini,
- .cio_iter_init = lov_io_iter_init,
- .cio_iter_fini = lov_io_iter_fini,
- .cio_lock = lov_io_lock,
- .cio_unlock = lov_io_unlock,
- .cio_start = lov_io_fault_start,
- .cio_end = lov_io_end
- },
- [CIT_FSYNC] = {
- .cio_fini = lov_io_fini,
- .cio_iter_init = lov_io_iter_init,
- .cio_iter_fini = lov_io_iter_fini,
- .cio_lock = lov_io_lock,
- .cio_unlock = lov_io_unlock,
- .cio_start = lov_io_start,
- .cio_end = lov_io_fsync_end
- },
- [CIT_MISC] = {
- .cio_fini = lov_io_fini
- }
- },
- .cio_read_ahead = lov_io_read_ahead,
- .cio_submit = lov_io_submit,
- .cio_commit_async = lov_io_commit_async,
-};
-
-/*****************************************************************************
- *
- * Empty lov io operations.
- *
- */
-
-static void lov_empty_io_fini(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct lov_object *lov = cl2lov(ios->cis_obj);
-
- if (atomic_dec_and_test(&lov->lo_active_ios))
- wake_up_all(&lov->lo_waitq);
-}
-
-static int lov_empty_io_submit(const struct lu_env *env,
- const struct cl_io_slice *ios,
- enum cl_req_type crt, struct cl_2queue *queue)
-{
- return -EBADF;
-}
-
-static void lov_empty_impossible(const struct lu_env *env,
- struct cl_io_slice *ios)
-{
- LBUG();
-}
-
-#define LOV_EMPTY_IMPOSSIBLE ((void *)lov_empty_impossible)
-
-/**
- * An io operation vector for files without stripes.
- */
-static const struct cl_io_operations lov_empty_io_ops = {
- .op = {
- [CIT_READ] = {
- .cio_fini = lov_empty_io_fini,
- },
- [CIT_WRITE] = {
- .cio_fini = lov_empty_io_fini,
- .cio_iter_init = LOV_EMPTY_IMPOSSIBLE,
- .cio_lock = LOV_EMPTY_IMPOSSIBLE,
- .cio_start = LOV_EMPTY_IMPOSSIBLE,
- .cio_end = LOV_EMPTY_IMPOSSIBLE
- },
- [CIT_SETATTR] = {
- .cio_fini = lov_empty_io_fini,
- .cio_iter_init = LOV_EMPTY_IMPOSSIBLE,
- .cio_lock = LOV_EMPTY_IMPOSSIBLE,
- .cio_start = LOV_EMPTY_IMPOSSIBLE,
- .cio_end = LOV_EMPTY_IMPOSSIBLE
- },
- [CIT_FAULT] = {
- .cio_fini = lov_empty_io_fini,
- .cio_iter_init = LOV_EMPTY_IMPOSSIBLE,
- .cio_lock = LOV_EMPTY_IMPOSSIBLE,
- .cio_start = LOV_EMPTY_IMPOSSIBLE,
- .cio_end = LOV_EMPTY_IMPOSSIBLE
- },
- [CIT_FSYNC] = {
- .cio_fini = lov_empty_io_fini
- },
- [CIT_MISC] = {
- .cio_fini = lov_empty_io_fini
- }
- },
- .cio_submit = lov_empty_io_submit,
- .cio_commit_async = LOV_EMPTY_IMPOSSIBLE
-};
-
-int lov_io_init_raid0(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io)
-{
- struct lov_io *lio = lov_env_io(env);
- struct lov_object *lov = cl2lov(obj);
-
- INIT_LIST_HEAD(&lio->lis_active);
- io->ci_result = lov_io_slice_init(lio, lov, io);
- if (io->ci_result == 0) {
- io->ci_result = lov_io_subio_init(env, lio, io);
- if (io->ci_result == 0) {
- cl_io_slice_add(io, &lio->lis_cl, obj, &lov_io_ops);
- atomic_inc(&lov->lo_active_ios);
- }
- }
- return io->ci_result;
-}
-
-int lov_io_init_empty(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io)
-{
- struct lov_object *lov = cl2lov(obj);
- struct lov_io *lio = lov_env_io(env);
- int result;
-
- lio->lis_object = lov;
- switch (io->ci_type) {
- default:
- LBUG();
- case CIT_MISC:
- case CIT_READ:
- result = 0;
- break;
- case CIT_FSYNC:
- case CIT_SETATTR:
- case CIT_DATA_VERSION:
- result = 1;
- break;
- case CIT_WRITE:
- result = -EBADF;
- break;
- case CIT_FAULT:
- result = -EFAULT;
- CERROR("Page fault on a file without stripes: " DFID "\n",
- PFID(lu_object_fid(&obj->co_lu)));
- break;
- }
- if (result == 0) {
- cl_io_slice_add(io, &lio->lis_cl, obj, &lov_empty_io_ops);
- atomic_inc(&lov->lo_active_ios);
- }
-
- io->ci_result = result < 0 ? result : 0;
- return result;
-}
-
-int lov_io_init_released(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io)
-{
- struct lov_object *lov = cl2lov(obj);
- struct lov_io *lio = lov_env_io(env);
- int result;
-
- LASSERT(lov->lo_lsm);
- lio->lis_object = lov;
-
- switch (io->ci_type) {
- default:
- LASSERTF(0, "invalid type %d\n", io->ci_type);
- result = -EOPNOTSUPP;
- break;
- case CIT_MISC:
- case CIT_FSYNC:
- case CIT_DATA_VERSION:
- result = 1;
- break;
- case CIT_SETATTR:
- /* the truncate to 0 is managed by MDT:
- * - in open, for open O_TRUNC
- * - in setattr, for truncate
- */
- /* the truncate is for size > 0 so triggers a restore */
- if (cl_io_is_trunc(io)) {
- io->ci_restore_needed = 1;
- result = -ENODATA;
- } else {
- result = 1;
- }
- break;
- case CIT_READ:
- case CIT_WRITE:
- case CIT_FAULT:
- io->ci_restore_needed = 1;
- result = -ENODATA;
- break;
- }
- if (result == 0) {
- cl_io_slice_add(io, &lio->lis_cl, obj, &lov_empty_io_ops);
- atomic_inc(&lov->lo_active_ios);
- }
-
- io->ci_result = result < 0 ? result : 0;
- return result;
-}
-
-/** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lov_lock.c b/drivers/staging/lustre/lustre/lov/lov_lock.c
deleted file mode 100644
index b0292100bf26..000000000000
--- a/drivers/staging/lustre/lustre/lov/lov_lock.c
+++ /dev/null
@@ -1,348 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_lock for LOV layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LOV
-
-#include "lov_cl_internal.h"
-
-/** \addtogroup lov
- * @{
- */
-
-/*****************************************************************************
- *
- * Lov lock operations.
- *
- */
-
-static struct lov_sublock_env *lov_sublock_env_get(const struct lu_env *env,
- const struct cl_lock *parent,
- struct lov_lock_sub *lls)
-{
- struct lov_sublock_env *subenv;
- struct lov_io *lio = lov_env_io(env);
- struct cl_io *io = lio->lis_cl.cis_io;
- struct lov_io_sub *sub;
-
- subenv = &lov_env_session(env)->ls_subenv;
-
- /*
- * FIXME: We tend to use the subio's env & io to call the sublock
- * lock operations because osc lock sometimes stores some control
- * variables in thread's IO information(Now only lockless information).
- * However, if the lock's host(object) is different from the object
- * for current IO, we have no way to get the subenv and subio because
- * they are not initialized at all. As a temp fix, in this case,
- * we still borrow the parent's env to call sublock operations.
- */
- if (!io || !cl_object_same(io->ci_obj, parent->cll_descr.cld_obj)) {
- subenv->lse_env = env;
- subenv->lse_io = io;
- } else {
- sub = lov_sub_get(env, lio, lls->sub_stripe);
- if (!IS_ERR(sub)) {
- subenv->lse_env = sub->sub_env;
- subenv->lse_io = sub->sub_io;
- } else {
- subenv = (void *)sub;
- }
- }
- return subenv;
-}
-
-static int lov_sublock_init(const struct lu_env *env,
- const struct cl_lock *parent,
- struct lov_lock_sub *lls)
-{
- struct lov_sublock_env *subenv;
- int result;
-
- subenv = lov_sublock_env_get(env, parent, lls);
- if (!IS_ERR(subenv)) {
- result = cl_lock_init(subenv->lse_env, &lls->sub_lock,
- subenv->lse_io);
- } else {
- /* error occurs. */
- result = PTR_ERR(subenv);
- }
- return result;
-}
-
-/**
- * Creates sub-locks for a given lov_lock for the first time.
- *
- * Goes through all sub-objects of top-object, and creates sub-locks on every
- * sub-object intersecting with top-lock extent. This is complicated by the
- * fact that top-lock (that is being created) can be accessed concurrently
- * through already created sub-locks (possibly shared with other top-locks).
- */
-static struct lov_lock *lov_lock_sub_init(const struct lu_env *env,
- const struct cl_object *obj,
- struct cl_lock *lock)
-{
- int result = 0;
- int i;
- int nr;
- u64 start;
- u64 end;
- u64 file_start;
- u64 file_end;
-
- struct lov_object *loo = cl2lov(obj);
- struct lov_layout_raid0 *r0 = lov_r0(loo);
- struct lov_lock *lovlck;
-
- CDEBUG(D_INODE, "%p: lock/io FID " DFID "/" DFID ", lock/io clobj %p/%p\n",
- loo, PFID(lu_object_fid(lov2lu(loo))),
- PFID(lu_object_fid(&obj->co_lu)),
- lov2cl(loo), obj);
-
- file_start = cl_offset(lov2cl(loo), lock->cll_descr.cld_start);
- file_end = cl_offset(lov2cl(loo), lock->cll_descr.cld_end + 1) - 1;
-
- for (i = 0, nr = 0; i < r0->lo_nr; i++) {
- /*
- * XXX for wide striping smarter algorithm is desirable,
- * breaking out of the loop, early.
- */
- if (likely(r0->lo_sub[i]) && /* spare layout */
- lov_stripe_intersects(loo->lo_lsm, i,
- file_start, file_end, &start, &end))
- nr++;
- }
- LASSERT(nr > 0);
- lovlck = kvzalloc(offsetof(struct lov_lock, lls_sub[nr]),
- GFP_NOFS);
- if (!lovlck)
- return ERR_PTR(-ENOMEM);
-
- lovlck->lls_nr = nr;
- for (i = 0, nr = 0; i < r0->lo_nr; ++i) {
- if (likely(r0->lo_sub[i]) &&
- lov_stripe_intersects(loo->lo_lsm, i,
- file_start, file_end, &start, &end)) {
- struct lov_lock_sub *lls = &lovlck->lls_sub[nr];
- struct cl_lock_descr *descr;
-
- descr = &lls->sub_lock.cll_descr;
-
- LASSERT(!descr->cld_obj);
- descr->cld_obj = lovsub2cl(r0->lo_sub[i]);
- descr->cld_start = cl_index(descr->cld_obj, start);
- descr->cld_end = cl_index(descr->cld_obj, end);
- descr->cld_mode = lock->cll_descr.cld_mode;
- descr->cld_gid = lock->cll_descr.cld_gid;
- descr->cld_enq_flags = lock->cll_descr.cld_enq_flags;
- lls->sub_stripe = i;
-
- /* initialize sub lock */
- result = lov_sublock_init(env, lock, lls);
- if (result < 0)
- break;
-
- lls->sub_initialized = 1;
- nr++;
- }
- }
- LASSERT(ergo(result == 0, nr == lovlck->lls_nr));
-
- if (result != 0) {
- for (i = 0; i < nr; ++i) {
- if (!lovlck->lls_sub[i].sub_initialized)
- break;
-
- cl_lock_fini(env, &lovlck->lls_sub[i].sub_lock);
- }
- kvfree(lovlck);
- lovlck = ERR_PTR(result);
- }
-
- return lovlck;
-}
-
-static void lov_lock_fini(const struct lu_env *env,
- struct cl_lock_slice *slice)
-{
- struct lov_lock *lovlck;
- int i;
-
- lovlck = cl2lov_lock(slice);
- for (i = 0; i < lovlck->lls_nr; ++i) {
- LASSERT(!lovlck->lls_sub[i].sub_is_enqueued);
- if (lovlck->lls_sub[i].sub_initialized)
- cl_lock_fini(env, &lovlck->lls_sub[i].sub_lock);
- }
- kvfree(lovlck);
-}
-
-/**
- * Implementation of cl_lock_operations::clo_enqueue() for lov layer. This
- * function is rather subtle, as it enqueues top-lock (i.e., advances top-lock
- * state machine from CLS_QUEUING to CLS_ENQUEUED states) by juggling sub-lock
- * state machines in the face of sub-locks sharing (by multiple top-locks),
- * and concurrent sub-lock cancellations.
- */
-static int lov_lock_enqueue(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- struct cl_io *io, struct cl_sync_io *anchor)
-{
- struct cl_lock *lock = slice->cls_lock;
- struct lov_lock *lovlck = cl2lov_lock(slice);
- int i;
- int rc = 0;
-
- for (i = 0; i < lovlck->lls_nr; ++i) {
- struct lov_lock_sub *lls = &lovlck->lls_sub[i];
- struct lov_sublock_env *subenv;
-
- subenv = lov_sublock_env_get(env, lock, lls);
- if (IS_ERR(subenv)) {
- rc = PTR_ERR(subenv);
- break;
- }
- rc = cl_lock_enqueue(subenv->lse_env, subenv->lse_io,
- &lls->sub_lock, anchor);
- if (rc != 0)
- break;
-
- lls->sub_is_enqueued = 1;
- }
- return rc;
-}
-
-static void lov_lock_cancel(const struct lu_env *env,
- const struct cl_lock_slice *slice)
-{
- struct cl_lock *lock = slice->cls_lock;
- struct lov_lock *lovlck = cl2lov_lock(slice);
- int i;
-
- for (i = 0; i < lovlck->lls_nr; ++i) {
- struct lov_lock_sub *lls = &lovlck->lls_sub[i];
- struct cl_lock *sublock = &lls->sub_lock;
- struct lov_sublock_env *subenv;
-
- if (!lls->sub_is_enqueued)
- continue;
-
- lls->sub_is_enqueued = 0;
- subenv = lov_sublock_env_get(env, lock, lls);
- if (!IS_ERR(subenv)) {
- cl_lock_cancel(subenv->lse_env, sublock);
- } else {
- CL_LOCK_DEBUG(D_ERROR, env, slice->cls_lock,
- "%s fails with %ld.\n",
- __func__, PTR_ERR(subenv));
- }
- }
-}
-
-static int lov_lock_print(const struct lu_env *env, void *cookie,
- lu_printer_t p, const struct cl_lock_slice *slice)
-{
- struct lov_lock *lck = cl2lov_lock(slice);
- int i;
-
- (*p)(env, cookie, "%d\n", lck->lls_nr);
- for (i = 0; i < lck->lls_nr; ++i) {
- struct lov_lock_sub *sub;
-
- sub = &lck->lls_sub[i];
- (*p)(env, cookie, " %d %x: ", i, sub->sub_is_enqueued);
- cl_lock_print(env, cookie, p, &sub->sub_lock);
- }
- return 0;
-}
-
-static const struct cl_lock_operations lov_lock_ops = {
- .clo_fini = lov_lock_fini,
- .clo_enqueue = lov_lock_enqueue,
- .clo_cancel = lov_lock_cancel,
- .clo_print = lov_lock_print
-};
-
-int lov_lock_init_raid0(const struct lu_env *env, struct cl_object *obj,
- struct cl_lock *lock, const struct cl_io *io)
-{
- struct lov_lock *lck;
- int result = 0;
-
- lck = lov_lock_sub_init(env, obj, lock);
- if (!IS_ERR(lck))
- cl_lock_slice_add(lock, &lck->lls_cl, obj, &lov_lock_ops);
- else
- result = PTR_ERR(lck);
- return result;
-}
-
-static void lov_empty_lock_fini(const struct lu_env *env,
- struct cl_lock_slice *slice)
-{
- struct lov_lock *lck = cl2lov_lock(slice);
-
- kmem_cache_free(lov_lock_kmem, lck);
-}
-
-static int lov_empty_lock_print(const struct lu_env *env, void *cookie,
- lu_printer_t p,
- const struct cl_lock_slice *slice)
-{
- (*p)(env, cookie, "empty\n");
- return 0;
-}
-
-/* XXX: more methods will be added later. */
-static const struct cl_lock_operations lov_empty_lock_ops = {
- .clo_fini = lov_empty_lock_fini,
- .clo_print = lov_empty_lock_print
-};
-
-int lov_lock_init_empty(const struct lu_env *env, struct cl_object *obj,
- struct cl_lock *lock, const struct cl_io *io)
-{
- struct lov_lock *lck;
- int result = -ENOMEM;
-
- lck = kmem_cache_zalloc(lov_lock_kmem, GFP_NOFS);
- if (lck) {
- cl_lock_slice_add(lock, &lck->lls_cl, obj, &lov_empty_lock_ops);
- result = 0;
- }
- return result;
-}
-
-/** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lov_merge.c b/drivers/staging/lustre/lustre/lov/lov_merge.c
deleted file mode 100644
index 006717cf7a41..000000000000
--- a/drivers/staging/lustre/lustre/lov/lov_merge.c
+++ /dev/null
@@ -1,105 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LOV
-
-#include <obd_class.h>
-#include "lov_internal.h"
-
-/** Merge the lock value block(&lvb) attributes and KMS from each of the
- * stripes in a file into a single lvb. It is expected that the caller
- * initializes the current atime, mtime, ctime to avoid regressing a more
- * uptodate time on the local client.
- */
-int lov_merge_lvb_kms(struct lov_stripe_md *lsm,
- struct ost_lvb *lvb, __u64 *kms_place)
-{
- __u64 size = 0;
- __u64 kms = 0;
- __u64 blocks = 0;
- s64 current_mtime = lvb->lvb_mtime;
- s64 current_atime = lvb->lvb_atime;
- s64 current_ctime = lvb->lvb_ctime;
- int i;
- int rc = 0;
-
- assert_spin_locked(&lsm->lsm_lock);
- LASSERT(lsm->lsm_lock_owner == current->pid);
-
- CDEBUG(D_INODE, "MDT ID " DOSTID " initial value: s=%llu m=%llu a=%llu c=%llu b=%llu\n",
- POSTID(&lsm->lsm_oi), lvb->lvb_size, lvb->lvb_mtime,
- lvb->lvb_atime, lvb->lvb_ctime, lvb->lvb_blocks);
- for (i = 0; i < lsm->lsm_stripe_count; i++) {
- struct lov_oinfo *loi = lsm->lsm_oinfo[i];
- u64 lov_size, tmpsize;
-
- if (OST_LVB_IS_ERR(loi->loi_lvb.lvb_blocks)) {
- rc = OST_LVB_GET_ERR(loi->loi_lvb.lvb_blocks);
- continue;
- }
-
- tmpsize = loi->loi_kms;
- lov_size = lov_stripe_size(lsm, tmpsize, i);
- if (lov_size > kms)
- kms = lov_size;
-
- if (loi->loi_lvb.lvb_size > tmpsize)
- tmpsize = loi->loi_lvb.lvb_size;
-
- lov_size = lov_stripe_size(lsm, tmpsize, i);
- if (lov_size > size)
- size = lov_size;
- /* merge blocks, mtime, atime */
- blocks += loi->loi_lvb.lvb_blocks;
- if (loi->loi_lvb.lvb_mtime > current_mtime)
- current_mtime = loi->loi_lvb.lvb_mtime;
- if (loi->loi_lvb.lvb_atime > current_atime)
- current_atime = loi->loi_lvb.lvb_atime;
- if (loi->loi_lvb.lvb_ctime > current_ctime)
- current_ctime = loi->loi_lvb.lvb_ctime;
-
- CDEBUG(D_INODE, "MDT ID " DOSTID " on OST[%u]: s=%llu m=%llu a=%llu c=%llu b=%llu\n",
- POSTID(&lsm->lsm_oi), loi->loi_ost_idx,
- loi->loi_lvb.lvb_size, loi->loi_lvb.lvb_mtime,
- loi->loi_lvb.lvb_atime, loi->loi_lvb.lvb_ctime,
- loi->loi_lvb.lvb_blocks);
- }
-
- *kms_place = kms;
- lvb->lvb_size = size;
- lvb->lvb_blocks = blocks;
- lvb->lvb_mtime = current_mtime;
- lvb->lvb_atime = current_atime;
- lvb->lvb_ctime = current_ctime;
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/lov/lov_obd.c b/drivers/staging/lustre/lustre/lov/lov_obd.c
deleted file mode 100644
index 344ff4b20168..000000000000
--- a/drivers/staging/lustre/lustre/lov/lov_obd.c
+++ /dev/null
@@ -1,1444 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/lov/lov_obd.c
- *
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Peter Braam <braam@clusterfs.com>
- * Author: Mike Shaver <shaver@clusterfs.com>
- * Author: Nathan Rutman <nathan@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LOV
-
-#include <uapi/linux/lustre/lustre_idl.h>
-#include <uapi/linux/lustre/lustre_ioctl.h>
-
-#include <cl_object.h>
-#include <lustre_dlm.h>
-#include <lustre_fid.h>
-#include <lustre_lib.h>
-#include <lustre_mds.h>
-#include <lustre_net.h>
-#include <uapi/linux/lustre/lustre_param.h>
-#include <lustre_swab.h>
-#include <lprocfs_status.h>
-#include <obd_class.h>
-#include <obd_support.h>
-
-#include "lov_internal.h"
-
-/* Keep a refcount of lov->tgt usage to prevent racing with addition/deletion.
- * Any function that expects lov_tgts to remain stationary must take a ref.
- */
-static void lov_getref(struct obd_device *obd)
-{
- struct lov_obd *lov = &obd->u.lov;
-
- /* nobody gets through here until lov_putref is done */
- mutex_lock(&lov->lov_lock);
- atomic_inc(&lov->lov_refcount);
- mutex_unlock(&lov->lov_lock);
-}
-
-static void __lov_del_obd(struct obd_device *obd, struct lov_tgt_desc *tgt);
-
-static void lov_putref(struct obd_device *obd)
-{
- struct lov_obd *lov = &obd->u.lov;
-
- mutex_lock(&lov->lov_lock);
- /* ok to dec to 0 more than once -- ltd_exp's will be null */
- if (atomic_dec_and_test(&lov->lov_refcount) && lov->lov_death_row) {
- LIST_HEAD(kill);
- int i;
- struct lov_tgt_desc *tgt, *n;
-
- CDEBUG(D_CONFIG, "destroying %d lov targets\n",
- lov->lov_death_row);
- for (i = 0; i < lov->desc.ld_tgt_count; i++) {
- tgt = lov->lov_tgts[i];
-
- if (!tgt || !tgt->ltd_reap)
- continue;
- list_add(&tgt->ltd_kill, &kill);
- /* XXX - right now there is a dependency on ld_tgt_count
- * being the maximum tgt index for computing the
- * mds_max_easize. So we can't shrink it.
- */
- lov_ost_pool_remove(&lov->lov_packed, i);
- lov->lov_tgts[i] = NULL;
- lov->lov_death_row--;
- }
- mutex_unlock(&lov->lov_lock);
-
- list_for_each_entry_safe(tgt, n, &kill, ltd_kill) {
- list_del(&tgt->ltd_kill);
- /* Disconnect */
- __lov_del_obd(obd, tgt);
- }
-
- if (lov->lov_tgts_kobj)
- kobject_put(lov->lov_tgts_kobj);
-
- } else {
- mutex_unlock(&lov->lov_lock);
- }
-}
-
-static int lov_set_osc_active(struct obd_device *obd, struct obd_uuid *uuid,
- enum obd_notify_event ev);
-static int lov_notify(struct obd_device *obd, struct obd_device *watched,
- enum obd_notify_event ev, void *data);
-
-int lov_connect_obd(struct obd_device *obd, __u32 index, int activate,
- struct obd_connect_data *data)
-{
- struct lov_obd *lov = &obd->u.lov;
- struct obd_uuid *tgt_uuid;
- struct obd_device *tgt_obd;
- static struct obd_uuid lov_osc_uuid = { "LOV_OSC_UUID" };
- struct obd_import *imp;
- int rc;
-
- if (!lov->lov_tgts[index])
- return -EINVAL;
-
- tgt_uuid = &lov->lov_tgts[index]->ltd_uuid;
- tgt_obd = lov->lov_tgts[index]->ltd_obd;
-
- if (!tgt_obd->obd_set_up) {
- CERROR("Target %s not set up\n", obd_uuid2str(tgt_uuid));
- return -EINVAL;
- }
-
- /* override the sp_me from lov */
- tgt_obd->u.cli.cl_sp_me = lov->lov_sp_me;
-
- if (data && (data->ocd_connect_flags & OBD_CONNECT_INDEX))
- data->ocd_index = index;
-
- /*
- * Divine LOV knows that OBDs under it are OSCs.
- */
- imp = tgt_obd->u.cli.cl_import;
-
- if (activate) {
- tgt_obd->obd_no_recov = 0;
- /* FIXME this is probably supposed to be
- * ptlrpc_set_import_active. Horrible naming.
- */
- ptlrpc_activate_import(imp);
- }
-
- rc = obd_register_observer(tgt_obd, obd);
- if (rc) {
- CERROR("Target %s register_observer error %d\n",
- obd_uuid2str(tgt_uuid), rc);
- return rc;
- }
-
- if (imp->imp_invalid) {
- CDEBUG(D_CONFIG, "not connecting OSC %s; administratively disabled\n",
- obd_uuid2str(tgt_uuid));
- return 0;
- }
-
- rc = obd_connect(NULL, &lov->lov_tgts[index]->ltd_exp, tgt_obd,
- &lov_osc_uuid, data, NULL);
- if (rc || !lov->lov_tgts[index]->ltd_exp) {
- CERROR("Target %s connect error %d\n",
- obd_uuid2str(tgt_uuid), rc);
- return -ENODEV;
- }
-
- lov->lov_tgts[index]->ltd_reap = 0;
-
- CDEBUG(D_CONFIG, "Connected tgt idx %d %s (%s) %sactive\n", index,
- obd_uuid2str(tgt_uuid), tgt_obd->obd_name, activate ? "":"in");
-
- if (lov->lov_tgts_kobj)
- /* Even if we failed, that's ok */
- rc = sysfs_create_link(lov->lov_tgts_kobj, &tgt_obd->obd_kobj,
- tgt_obd->obd_name);
-
- return 0;
-}
-
-static int lov_connect(const struct lu_env *env,
- struct obd_export **exp, struct obd_device *obd,
- struct obd_uuid *cluuid, struct obd_connect_data *data,
- void *localdata)
-{
- struct lov_obd *lov = &obd->u.lov;
- struct lov_tgt_desc *tgt;
- struct lustre_handle conn;
- int i, rc;
-
- CDEBUG(D_CONFIG, "connect #%d\n", lov->lov_connects);
-
- rc = class_connect(&conn, obd, cluuid);
- if (rc)
- return rc;
-
- *exp = class_conn2export(&conn);
-
- /* Why should there ever be more than 1 connect? */
- lov->lov_connects++;
- LASSERT(lov->lov_connects == 1);
-
- memset(&lov->lov_ocd, 0, sizeof(lov->lov_ocd));
- if (data)
- lov->lov_ocd = *data;
-
- obd_getref(obd);
-
- lov->lov_tgts_kobj = kobject_create_and_add("target_obds",
- &obd->obd_kobj);
-
- for (i = 0; i < lov->desc.ld_tgt_count; i++) {
- tgt = lov->lov_tgts[i];
- if (!tgt || obd_uuid_empty(&tgt->ltd_uuid))
- continue;
- /* Flags will be lowest common denominator */
- rc = lov_connect_obd(obd, i, tgt->ltd_activate, &lov->lov_ocd);
- if (rc) {
- CERROR("%s: lov connect tgt %d failed: %d\n",
- obd->obd_name, i, rc);
- continue;
- }
- /* connect to administrative disabled ost */
- if (!lov->lov_tgts[i]->ltd_exp)
- continue;
-
- rc = lov_notify(obd, lov->lov_tgts[i]->ltd_exp->exp_obd,
- OBD_NOTIFY_CONNECT, (void *)&i);
- if (rc) {
- CERROR("%s error sending notify %d\n",
- obd->obd_name, rc);
- }
- }
- obd_putref(obd);
-
- return 0;
-}
-
-static int lov_disconnect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt)
-{
- struct lov_obd *lov = &obd->u.lov;
- struct obd_device *osc_obd;
- int rc;
-
- osc_obd = class_exp2obd(tgt->ltd_exp);
- CDEBUG(D_CONFIG, "%s: disconnecting target %s\n",
- obd->obd_name, osc_obd ? osc_obd->obd_name : "NULL");
-
- if (tgt->ltd_active) {
- tgt->ltd_active = 0;
- lov->desc.ld_active_tgt_count--;
- tgt->ltd_exp->exp_obd->obd_inactive = 1;
- }
-
- if (osc_obd) {
- if (lov->lov_tgts_kobj)
- sysfs_remove_link(lov->lov_tgts_kobj,
- osc_obd->obd_name);
-
- /* Pass it on to our clients.
- * XXX This should be an argument to disconnect,
- * XXX not a back-door flag on the OBD. Ah well.
- */
- osc_obd->obd_force = obd->obd_force;
- osc_obd->obd_fail = obd->obd_fail;
- osc_obd->obd_no_recov = obd->obd_no_recov;
- }
-
- obd_register_observer(osc_obd, NULL);
-
- rc = obd_disconnect(tgt->ltd_exp);
- if (rc) {
- CERROR("Target %s disconnect error %d\n",
- tgt->ltd_uuid.uuid, rc);
- rc = 0;
- }
-
- tgt->ltd_exp = NULL;
- return 0;
-}
-
-static int lov_disconnect(struct obd_export *exp)
-{
- struct obd_device *obd = class_exp2obd(exp);
- struct lov_obd *lov = &obd->u.lov;
- int i, rc;
-
- if (!lov->lov_tgts)
- goto out;
-
- /* Only disconnect the underlying layers on the final disconnect. */
- lov->lov_connects--;
- if (lov->lov_connects != 0) {
- /* why should there be more than 1 connect? */
- CERROR("disconnect #%d\n", lov->lov_connects);
- goto out;
- }
-
- /* Let's hold another reference so lov_del_obd doesn't spin through
- * putref every time
- */
- obd_getref(obd);
-
- for (i = 0; i < lov->desc.ld_tgt_count; i++) {
- if (lov->lov_tgts[i] && lov->lov_tgts[i]->ltd_exp) {
- /* Disconnection is the last we know about an obd */
- lov_del_target(obd, i, NULL, lov->lov_tgts[i]->ltd_gen);
- }
- }
-
- obd_putref(obd);
-
-out:
- rc = class_disconnect(exp); /* bz 9811 */
- return rc;
-}
-
-/* Error codes:
- *
- * -EINVAL : UUID can't be found in the LOV's target list
- * -ENOTCONN: The UUID is found, but the target connection is bad (!)
- * -EBADF : The UUID is found, but the OBD is the wrong type (!)
- * any >= 0 : is log target index
- */
-static int lov_set_osc_active(struct obd_device *obd, struct obd_uuid *uuid,
- enum obd_notify_event ev)
-{
- struct lov_obd *lov = &obd->u.lov;
- struct lov_tgt_desc *tgt;
- int index, activate, active;
-
- CDEBUG(D_INFO, "Searching in lov %p for uuid %s event(%d)\n",
- lov, uuid->uuid, ev);
-
- obd_getref(obd);
- for (index = 0; index < lov->desc.ld_tgt_count; index++) {
- tgt = lov->lov_tgts[index];
- if (!tgt)
- continue;
- /*
- * LU-642, initially inactive OSC could miss the obd_connect,
- * we make up for it here.
- */
- if (ev == OBD_NOTIFY_ACTIVATE && !tgt->ltd_exp &&
- obd_uuid_equals(uuid, &tgt->ltd_uuid)) {
- struct obd_uuid lov_osc_uuid = {"LOV_OSC_UUID"};
-
- obd_connect(NULL, &tgt->ltd_exp, tgt->ltd_obd,
- &lov_osc_uuid, &lov->lov_ocd, NULL);
- }
- if (!tgt->ltd_exp)
- continue;
-
- CDEBUG(D_INFO, "lov idx %d is %s conn %#llx\n",
- index, obd_uuid2str(&tgt->ltd_uuid),
- tgt->ltd_exp->exp_handle.h_cookie);
- if (obd_uuid_equals(uuid, &tgt->ltd_uuid))
- break;
- }
-
- if (index == lov->desc.ld_tgt_count) {
- index = -EINVAL;
- goto out;
- }
-
- if (ev == OBD_NOTIFY_DEACTIVATE || ev == OBD_NOTIFY_ACTIVATE) {
- activate = (ev == OBD_NOTIFY_ACTIVATE) ? 1 : 0;
-
- if (lov->lov_tgts[index]->ltd_activate == activate) {
- CDEBUG(D_INFO, "OSC %s already %sactivate!\n",
- uuid->uuid, activate ? "" : "de");
- } else {
- lov->lov_tgts[index]->ltd_activate = activate;
- CDEBUG(D_CONFIG, "%sactivate OSC %s\n",
- activate ? "" : "de", obd_uuid2str(uuid));
- }
-
- } else if (ev == OBD_NOTIFY_INACTIVE || ev == OBD_NOTIFY_ACTIVE) {
- active = (ev == OBD_NOTIFY_ACTIVE) ? 1 : 0;
-
- if (lov->lov_tgts[index]->ltd_active == active) {
- CDEBUG(D_INFO, "OSC %s already %sactive!\n",
- uuid->uuid, active ? "" : "in");
- goto out;
- }
- CDEBUG(D_CONFIG, "Marking OSC %s %sactive\n",
- obd_uuid2str(uuid), active ? "" : "in");
-
- lov->lov_tgts[index]->ltd_active = active;
- if (active) {
- lov->desc.ld_active_tgt_count++;
- lov->lov_tgts[index]->ltd_exp->exp_obd->obd_inactive = 0;
- } else {
- lov->desc.ld_active_tgt_count--;
- lov->lov_tgts[index]->ltd_exp->exp_obd->obd_inactive = 1;
- }
- } else {
- CERROR("Unknown event(%d) for uuid %s", ev, uuid->uuid);
- }
-
- out:
- obd_putref(obd);
- return index;
-}
-
-static int lov_notify(struct obd_device *obd, struct obd_device *watched,
- enum obd_notify_event ev, void *data)
-{
- int rc = 0;
- struct lov_obd *lov = &obd->u.lov;
-
- down_read(&lov->lov_notify_lock);
- if (!lov->lov_connects) {
- up_read(&lov->lov_notify_lock);
- return rc;
- }
-
- if (ev == OBD_NOTIFY_ACTIVE || ev == OBD_NOTIFY_INACTIVE ||
- ev == OBD_NOTIFY_ACTIVATE || ev == OBD_NOTIFY_DEACTIVATE) {
- struct obd_uuid *uuid;
-
- LASSERT(watched);
-
- if (strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) {
- up_read(&lov->lov_notify_lock);
- CERROR("unexpected notification of %s %s!\n",
- watched->obd_type->typ_name,
- watched->obd_name);
- return -EINVAL;
- }
- uuid = &watched->u.cli.cl_target_uuid;
-
- /* Set OSC as active before notifying the observer, so the
- * observer can use the OSC normally.
- */
- rc = lov_set_osc_active(obd, uuid, ev);
- if (rc < 0) {
- up_read(&lov->lov_notify_lock);
- CERROR("event(%d) of %s failed: %d\n", ev,
- obd_uuid2str(uuid), rc);
- return rc;
- }
- /* active event should be pass lov target index as data */
- data = &rc;
- }
-
- /* Pass the notification up the chain. */
- if (watched) {
- rc = obd_notify_observer(obd, watched, ev, data);
- } else {
- /* NULL watched means all osc's in the lov (only for syncs) */
- /* sync event should be send lov idx as data */
- struct lov_obd *lov = &obd->u.lov;
- int i, is_sync;
-
- data = &i;
- is_sync = (ev == OBD_NOTIFY_SYNC) ||
- (ev == OBD_NOTIFY_SYNC_NONBLOCK);
-
- obd_getref(obd);
- for (i = 0; i < lov->desc.ld_tgt_count; i++) {
- if (!lov->lov_tgts[i])
- continue;
-
- /* don't send sync event if target not
- * connected/activated
- */
- if (is_sync && !lov->lov_tgts[i]->ltd_active)
- continue;
-
- rc = obd_notify_observer(obd, lov->lov_tgts[i]->ltd_obd,
- ev, data);
- if (rc) {
- CERROR("%s: notify %s of %s failed %d\n",
- obd->obd_name,
- obd->obd_observer->obd_name,
- lov->lov_tgts[i]->ltd_obd->obd_name,
- rc);
- }
- }
- obd_putref(obd);
- }
-
- up_read(&lov->lov_notify_lock);
- return rc;
-}
-
-static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
- __u32 index, int gen, int active)
-{
- struct lov_obd *lov = &obd->u.lov;
- struct lov_tgt_desc *tgt;
- struct obd_device *tgt_obd;
- int rc;
-
- CDEBUG(D_CONFIG, "uuid:%s idx:%d gen:%d active:%d\n",
- uuidp->uuid, index, gen, active);
-
- if (gen <= 0) {
- CERROR("request to add OBD %s with invalid generation: %d\n",
- uuidp->uuid, gen);
- return -EINVAL;
- }
-
- tgt_obd = class_find_client_obd(uuidp, LUSTRE_OSC_NAME,
- &obd->obd_uuid);
- if (!tgt_obd)
- return -EINVAL;
-
- mutex_lock(&lov->lov_lock);
-
- if ((index < lov->lov_tgt_size) && lov->lov_tgts[index]) {
- tgt = lov->lov_tgts[index];
- CERROR("UUID %s already assigned at LOV target index %d\n",
- obd_uuid2str(&tgt->ltd_uuid), index);
- mutex_unlock(&lov->lov_lock);
- return -EEXIST;
- }
-
- if (index >= lov->lov_tgt_size) {
- /* We need to reallocate the lov target array. */
- struct lov_tgt_desc **newtgts, **old = NULL;
- __u32 newsize, oldsize = 0;
-
- newsize = max_t(__u32, lov->lov_tgt_size, 2);
- while (newsize < index + 1)
- newsize <<= 1;
- newtgts = kcalloc(newsize, sizeof(*newtgts), GFP_NOFS);
- if (!newtgts) {
- mutex_unlock(&lov->lov_lock);
- return -ENOMEM;
- }
-
- if (lov->lov_tgt_size) {
- memcpy(newtgts, lov->lov_tgts, sizeof(*newtgts) *
- lov->lov_tgt_size);
- old = lov->lov_tgts;
- oldsize = lov->lov_tgt_size;
- }
-
- lov->lov_tgts = newtgts;
- lov->lov_tgt_size = newsize;
- smp_rmb();
- kfree(old);
-
- CDEBUG(D_CONFIG, "tgts: %p size: %d\n",
- lov->lov_tgts, lov->lov_tgt_size);
- }
-
- tgt = kzalloc(sizeof(*tgt), GFP_NOFS);
- if (!tgt) {
- mutex_unlock(&lov->lov_lock);
- return -ENOMEM;
- }
-
- rc = lov_ost_pool_add(&lov->lov_packed, index, lov->lov_tgt_size);
- if (rc) {
- mutex_unlock(&lov->lov_lock);
- kfree(tgt);
- return rc;
- }
-
- tgt->ltd_uuid = *uuidp;
- tgt->ltd_obd = tgt_obd;
- /* XXX - add a sanity check on the generation number. */
- tgt->ltd_gen = gen;
- tgt->ltd_index = index;
- tgt->ltd_activate = active;
- lov->lov_tgts[index] = tgt;
- if (index >= lov->desc.ld_tgt_count)
- lov->desc.ld_tgt_count = index + 1;
-
- mutex_unlock(&lov->lov_lock);
-
- CDEBUG(D_CONFIG, "idx=%d ltd_gen=%d ld_tgt_count=%d\n",
- index, tgt->ltd_gen, lov->desc.ld_tgt_count);
-
- if (lov->lov_connects == 0) {
- /* lov_connect hasn't been called yet. We'll do the
- * lov_connect_obd on this target when that fn first runs,
- * because we don't know the connect flags yet.
- */
- return 0;
- }
-
- obd_getref(obd);
-
- rc = lov_connect_obd(obd, index, active, &lov->lov_ocd);
- if (rc)
- goto out;
-
- /* connect to administrative disabled ost */
- if (!tgt->ltd_exp) {
- rc = 0;
- goto out;
- }
-
- if (lov->lov_cache) {
- rc = obd_set_info_async(NULL, tgt->ltd_exp,
- sizeof(KEY_CACHE_SET), KEY_CACHE_SET,
- sizeof(struct cl_client_cache),
- lov->lov_cache, NULL);
- if (rc < 0)
- goto out;
- }
-
- rc = lov_notify(obd, tgt->ltd_exp->exp_obd,
- active ? OBD_NOTIFY_CONNECT : OBD_NOTIFY_INACTIVE,
- (void *)&index);
-
-out:
- if (rc) {
- CERROR("add failed (%d), deleting %s\n", rc,
- obd_uuid2str(&tgt->ltd_uuid));
- lov_del_target(obd, index, NULL, 0);
- }
- obd_putref(obd);
- return rc;
-}
-
-/* Schedule a target for deletion */
-int lov_del_target(struct obd_device *obd, __u32 index,
- struct obd_uuid *uuidp, int gen)
-{
- struct lov_obd *lov = &obd->u.lov;
- int count = lov->desc.ld_tgt_count;
- int rc = 0;
-
- if (index >= count) {
- CERROR("LOV target index %d >= number of LOV OBDs %d.\n",
- index, count);
- return -EINVAL;
- }
-
- /* to make sure there's no ongoing lov_notify() now */
- down_write(&lov->lov_notify_lock);
- obd_getref(obd);
-
- if (!lov->lov_tgts[index]) {
- CERROR("LOV target at index %d is not setup.\n", index);
- rc = -EINVAL;
- goto out;
- }
-
- if (uuidp && !obd_uuid_equals(uuidp, &lov->lov_tgts[index]->ltd_uuid)) {
- CERROR("LOV target UUID %s at index %d doesn't match %s.\n",
- lov_uuid2str(lov, index), index,
- obd_uuid2str(uuidp));
- rc = -EINVAL;
- goto out;
- }
-
- CDEBUG(D_CONFIG, "uuid: %s idx: %d gen: %d exp: %p active: %d\n",
- lov_uuid2str(lov, index), index,
- lov->lov_tgts[index]->ltd_gen, lov->lov_tgts[index]->ltd_exp,
- lov->lov_tgts[index]->ltd_active);
-
- lov->lov_tgts[index]->ltd_reap = 1;
- lov->lov_death_row++;
- /* we really delete it from obd_putref */
-out:
- obd_putref(obd);
- up_write(&lov->lov_notify_lock);
-
- return rc;
-}
-
-static void __lov_del_obd(struct obd_device *obd, struct lov_tgt_desc *tgt)
-{
- struct obd_device *osc_obd;
-
- LASSERT(tgt);
- LASSERT(tgt->ltd_reap);
-
- osc_obd = class_exp2obd(tgt->ltd_exp);
-
- CDEBUG(D_CONFIG, "Removing tgt %s : %s\n",
- tgt->ltd_uuid.uuid,
- osc_obd ? osc_obd->obd_name : "<no obd>");
-
- if (tgt->ltd_exp)
- lov_disconnect_obd(obd, tgt);
-
- kfree(tgt);
-
- /* Manual cleanup - no cleanup logs to clean up the osc's. We must
- * do it ourselves. And we can't do it from lov_cleanup,
- * because we just lost our only reference to it.
- */
- if (osc_obd)
- class_manual_cleanup(osc_obd);
-}
-
-void lov_fix_desc_stripe_size(__u64 *val)
-{
- if (*val < LOV_MIN_STRIPE_SIZE) {
- if (*val != 0)
- LCONSOLE_INFO("Increasing default stripe size to minimum %u\n",
- LOV_DESC_STRIPE_SIZE_DEFAULT);
- *val = LOV_DESC_STRIPE_SIZE_DEFAULT;
- } else if (*val & (LOV_MIN_STRIPE_SIZE - 1)) {
- *val &= ~(LOV_MIN_STRIPE_SIZE - 1);
- LCONSOLE_WARN("Changing default stripe size to %llu (a multiple of %u)\n",
- *val, LOV_MIN_STRIPE_SIZE);
- }
-}
-
-void lov_fix_desc_stripe_count(__u32 *val)
-{
- if (*val == 0)
- *val = 1;
-}
-
-void lov_fix_desc_pattern(__u32 *val)
-{
- /* from lov_setstripe */
- if ((*val != 0) && (*val != LOV_PATTERN_RAID0)) {
- LCONSOLE_WARN("Unknown stripe pattern: %#x\n", *val);
- *val = 0;
- }
-}
-
-void lov_fix_desc_qos_maxage(__u32 *val)
-{
- if (*val == 0)
- *val = LOV_DESC_QOS_MAXAGE_DEFAULT;
-}
-
-void lov_fix_desc(struct lov_desc *desc)
-{
- lov_fix_desc_stripe_size(&desc->ld_default_stripe_size);
- lov_fix_desc_stripe_count(&desc->ld_default_stripe_count);
- lov_fix_desc_pattern(&desc->ld_pattern);
- lov_fix_desc_qos_maxage(&desc->ld_qos_maxage);
-}
-
-int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
-{
- struct lprocfs_static_vars lvars = { NULL };
- struct lov_desc *desc;
- struct lov_obd *lov = &obd->u.lov;
- int rc;
-
- if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) {
- CERROR("LOV setup requires a descriptor\n");
- return -EINVAL;
- }
-
- desc = (struct lov_desc *)lustre_cfg_buf(lcfg, 1);
-
- if (sizeof(*desc) > LUSTRE_CFG_BUFLEN(lcfg, 1)) {
- CERROR("descriptor size wrong: %d > %d\n",
- (int)sizeof(*desc), LUSTRE_CFG_BUFLEN(lcfg, 1));
- return -EINVAL;
- }
-
- if (desc->ld_magic != LOV_DESC_MAGIC) {
- if (desc->ld_magic == __swab32(LOV_DESC_MAGIC)) {
- CDEBUG(D_OTHER, "%s: Swabbing lov desc %p\n",
- obd->obd_name, desc);
- lustre_swab_lov_desc(desc);
- } else {
- CERROR("%s: Bad lov desc magic: %#x\n",
- obd->obd_name, desc->ld_magic);
- return -EINVAL;
- }
- }
-
- lov_fix_desc(desc);
-
- desc->ld_active_tgt_count = 0;
- lov->desc = *desc;
- lov->lov_tgt_size = 0;
-
- mutex_init(&lov->lov_lock);
- atomic_set(&lov->lov_refcount, 0);
- lov->lov_sp_me = LUSTRE_SP_CLI;
-
- init_rwsem(&lov->lov_notify_lock);
-
- INIT_LIST_HEAD(&lov->lov_pool_list);
- lov->lov_pool_count = 0;
- rc = lov_pool_hash_init(&lov->lov_pools_hash_body);
- if (rc)
- goto out;
- rc = lov_ost_pool_init(&lov->lov_packed, 0);
- if (rc)
- goto out;
-
- lprocfs_lov_init_vars(&lvars);
- lprocfs_obd_setup(obd, lvars.obd_vars, lvars.sysfs_vars);
-
- debugfs_create_file("target_obd", 0444, obd->obd_debugfs_entry, obd,
- &lov_proc_target_fops);
-
- lov->lov_pool_debugfs_entry = debugfs_create_dir("pools",
- obd->obd_debugfs_entry);
- return 0;
-
-out:
- return rc;
-}
-
-static int lov_cleanup(struct obd_device *obd)
-{
- struct lov_obd *lov = &obd->u.lov;
- struct pool_desc *pool, *tmp;
-
- list_for_each_entry_safe(pool, tmp, &lov->lov_pool_list, pool_list) {
- /* free pool structs */
- CDEBUG(D_INFO, "delete pool %p\n", pool);
- /* In the function below, .hs_keycmp resolves to
- * pool_hashkey_keycmp()
- */
- /* coverity[overrun-buffer-val] */
- lov_pool_del(obd, pool->pool_name);
- }
- lov_pool_hash_destroy(&lov->lov_pools_hash_body);
- lov_ost_pool_free(&lov->lov_packed);
-
- lprocfs_obd_cleanup(obd);
- if (lov->lov_tgts) {
- int i;
-
- obd_getref(obd);
- for (i = 0; i < lov->desc.ld_tgt_count; i++) {
- if (!lov->lov_tgts[i])
- continue;
-
- /* Inactive targets may never have connected */
- if (lov->lov_tgts[i]->ltd_active ||
- atomic_read(&lov->lov_refcount))
- /* We should never get here - these
- * should have been removed in the
- * disconnect.
- */
- CERROR("lov tgt %d not cleaned! deathrow=%d, lovrc=%d\n",
- i, lov->lov_death_row,
- atomic_read(&lov->lov_refcount));
- lov_del_target(obd, i, NULL, 0);
- }
- obd_putref(obd);
- kfree(lov->lov_tgts);
- lov->lov_tgt_size = 0;
- }
-
- if (lov->lov_cache) {
- cl_cache_decref(lov->lov_cache);
- lov->lov_cache = NULL;
- }
-
- return 0;
-}
-
-int lov_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg,
- __u32 *indexp, int *genp)
-{
- struct obd_uuid obd_uuid;
- int cmd;
- int rc = 0;
-
- switch (cmd = lcfg->lcfg_command) {
- case LCFG_LOV_ADD_OBD:
- case LCFG_LOV_ADD_INA:
- case LCFG_LOV_DEL_OBD: {
- __u32 index;
- int gen;
- /* lov_modify_tgts add 0:lov_mdsA 1:ost1_UUID 2:0 3:1 */
- if (LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof(obd_uuid.uuid)) {
- rc = -EINVAL;
- goto out;
- }
-
- obd_str2uuid(&obd_uuid, lustre_cfg_buf(lcfg, 1));
-
- rc = kstrtoint(lustre_cfg_buf(lcfg, 2), 10, indexp);
- if (rc < 0)
- goto out;
- rc = kstrtoint(lustre_cfg_buf(lcfg, 3), 10, genp);
- if (rc < 0)
- goto out;
- index = *indexp;
- gen = *genp;
- if (cmd == LCFG_LOV_ADD_OBD)
- rc = lov_add_target(obd, &obd_uuid, index, gen, 1);
- else if (cmd == LCFG_LOV_ADD_INA)
- rc = lov_add_target(obd, &obd_uuid, index, gen, 0);
- else
- rc = lov_del_target(obd, index, &obd_uuid, gen);
- goto out;
- }
- case LCFG_PARAM: {
- struct lprocfs_static_vars lvars = { NULL };
- struct lov_desc *desc = &obd->u.lov.desc;
-
- if (!desc) {
- rc = -EINVAL;
- goto out;
- }
-
- lprocfs_lov_init_vars(&lvars);
-
- rc = class_process_proc_param(PARAM_LOV, lvars.obd_vars,
- lcfg, obd);
- if (rc > 0)
- rc = 0;
- goto out;
- }
- case LCFG_POOL_NEW:
- case LCFG_POOL_ADD:
- case LCFG_POOL_DEL:
- case LCFG_POOL_REM:
- goto out;
-
- default: {
- CERROR("Unknown command: %d\n", lcfg->lcfg_command);
- rc = -EINVAL;
- goto out;
- }
- }
-out:
- return rc;
-}
-
-static int
-lov_statfs_interpret(struct ptlrpc_request_set *rqset, void *data, int rc)
-{
- struct lov_request_set *lovset = (struct lov_request_set *)data;
- int err;
-
- if (rc)
- atomic_set(&lovset->set_completes, 0);
-
- err = lov_fini_statfs_set(lovset);
- return rc ? rc : err;
-}
-
-static int lov_statfs_async(struct obd_export *exp, struct obd_info *oinfo,
- __u64 max_age, struct ptlrpc_request_set *rqset)
-{
- struct obd_device *obd = class_exp2obd(exp);
- struct lov_request_set *set;
- struct lov_request *req;
- struct lov_obd *lov;
- int rc = 0;
-
- LASSERT(oinfo->oi_osfs);
-
- lov = &obd->u.lov;
- rc = lov_prep_statfs_set(obd, oinfo, &set);
- if (rc)
- return rc;
-
- list_for_each_entry(req, &set->set_list, rq_link) {
- rc = obd_statfs_async(lov->lov_tgts[req->rq_idx]->ltd_exp,
- &req->rq_oi, max_age, rqset);
- if (rc)
- break;
- }
-
- if (rc || list_empty(&rqset->set_requests)) {
- int err;
-
- if (rc)
- atomic_set(&set->set_completes, 0);
- err = lov_fini_statfs_set(set);
- return rc ? rc : err;
- }
-
- LASSERT(!rqset->set_interpret);
- rqset->set_interpret = lov_statfs_interpret;
- rqset->set_arg = (void *)set;
- return 0;
-}
-
-static int lov_statfs(const struct lu_env *env, struct obd_export *exp,
- struct obd_statfs *osfs, __u64 max_age, __u32 flags)
-{
- struct ptlrpc_request_set *set = NULL;
- struct obd_info oinfo = {
- .oi_osfs = osfs,
- .oi_flags = flags,
- };
- int rc = 0;
-
- /* for obdclass we forbid using obd_statfs_rqset, but prefer using async
- * statfs requests
- */
- set = ptlrpc_prep_set();
- if (!set)
- return -ENOMEM;
-
- rc = lov_statfs_async(exp, &oinfo, max_age, set);
- if (rc == 0)
- rc = ptlrpc_set_wait(set);
- ptlrpc_set_destroy(set);
-
- return rc;
-}
-
-static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
- void *karg, void __user *uarg)
-{
- struct obd_device *obddev = class_exp2obd(exp);
- struct lov_obd *lov = &obddev->u.lov;
- int i = 0, rc = 0, count = lov->desc.ld_tgt_count;
- struct obd_uuid *uuidp;
-
- switch (cmd) {
- case IOC_OBD_STATFS: {
- struct obd_ioctl_data *data = karg;
- struct obd_device *osc_obd;
- struct obd_statfs stat_buf = {0};
- __u32 index;
- __u32 flags;
-
- memcpy(&index, data->ioc_inlbuf2, sizeof(__u32));
- if (index >= count)
- return -ENODEV;
-
- if (!lov->lov_tgts[index])
- /* Try again with the next index */
- return -EAGAIN;
- if (!lov->lov_tgts[index]->ltd_active)
- return -ENODATA;
-
- osc_obd = class_exp2obd(lov->lov_tgts[index]->ltd_exp);
- if (!osc_obd)
- return -EINVAL;
-
- /* copy UUID */
- if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(osc_obd),
- min_t(unsigned long, data->ioc_plen2,
- sizeof(struct obd_uuid))))
- return -EFAULT;
-
- memcpy(&flags, data->ioc_inlbuf1, sizeof(__u32));
- flags = flags & LL_STATFS_NODELAY ? OBD_STATFS_NODELAY : 0;
-
- /* got statfs data */
- rc = obd_statfs(NULL, lov->lov_tgts[index]->ltd_exp, &stat_buf,
- get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ,
- flags);
- if (rc)
- return rc;
- if (copy_to_user(data->ioc_pbuf1, &stat_buf,
- min_t(unsigned long, data->ioc_plen1,
- sizeof(stat_buf))))
- return -EFAULT;
- break;
- }
- case OBD_IOC_LOV_GET_CONFIG: {
- struct obd_ioctl_data *data;
- struct lov_desc *desc;
- char *buf = NULL;
- __u32 *genp;
-
- len = 0;
- if (obd_ioctl_getdata(&buf, &len, uarg))
- return -EINVAL;
-
- data = (struct obd_ioctl_data *)buf;
-
- if (sizeof(*desc) > data->ioc_inllen1) {
- kvfree(buf);
- return -EINVAL;
- }
-
- if (sizeof(uuidp->uuid) * count > data->ioc_inllen2) {
- kvfree(buf);
- return -EINVAL;
- }
-
- if (sizeof(__u32) * count > data->ioc_inllen3) {
- kvfree(buf);
- return -EINVAL;
- }
-
- desc = (struct lov_desc *)data->ioc_inlbuf1;
- memcpy(desc, &lov->desc, sizeof(*desc));
-
- uuidp = (struct obd_uuid *)data->ioc_inlbuf2;
- genp = (__u32 *)data->ioc_inlbuf3;
- /* the uuid will be empty for deleted OSTs */
- for (i = 0; i < count; i++, uuidp++, genp++) {
- if (!lov->lov_tgts[i])
- continue;
- *uuidp = lov->lov_tgts[i]->ltd_uuid;
- *genp = lov->lov_tgts[i]->ltd_gen;
- }
-
- if (copy_to_user(uarg, buf, len))
- rc = -EFAULT;
- kvfree(buf);
- break;
- }
- case OBD_IOC_QUOTACTL: {
- struct if_quotactl *qctl = karg;
- struct lov_tgt_desc *tgt = NULL;
- struct obd_quotactl *oqctl;
-
- if (qctl->qc_valid == QC_OSTIDX) {
- if (count <= qctl->qc_idx)
- return -EINVAL;
-
- tgt = lov->lov_tgts[qctl->qc_idx];
- if (!tgt || !tgt->ltd_exp)
- return -EINVAL;
- } else if (qctl->qc_valid == QC_UUID) {
- for (i = 0; i < count; i++) {
- tgt = lov->lov_tgts[i];
- if (!tgt ||
- !obd_uuid_equals(&tgt->ltd_uuid,
- &qctl->obd_uuid))
- continue;
-
- if (!tgt->ltd_exp)
- return -EINVAL;
-
- break;
- }
- } else {
- return -EINVAL;
- }
-
- if (i >= count)
- return -EAGAIN;
-
- LASSERT(tgt && tgt->ltd_exp);
- oqctl = kzalloc(sizeof(*oqctl), GFP_NOFS);
- if (!oqctl)
- return -ENOMEM;
-
- QCTL_COPY(oqctl, qctl);
- rc = obd_quotactl(tgt->ltd_exp, oqctl);
- if (rc == 0) {
- QCTL_COPY(qctl, oqctl);
- qctl->qc_valid = QC_OSTIDX;
- qctl->obd_uuid = tgt->ltd_uuid;
- }
- kfree(oqctl);
- break;
- }
- default: {
- int set = 0;
-
- if (count == 0)
- return -ENOTTY;
-
- for (i = 0; i < count; i++) {
- int err;
- struct obd_device *osc_obd;
-
- /* OST was disconnected */
- if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_exp)
- continue;
-
- /* ll_umount_begin() sets force flag but for lov, not
- * osc. Let's pass it through
- */
- osc_obd = class_exp2obd(lov->lov_tgts[i]->ltd_exp);
- osc_obd->obd_force = obddev->obd_force;
- err = obd_iocontrol(cmd, lov->lov_tgts[i]->ltd_exp,
- len, karg, uarg);
- if (err) {
- if (lov->lov_tgts[i]->ltd_active) {
- CDEBUG(err == -ENOTTY ?
- D_IOCTL : D_WARNING,
- "iocontrol OSC %s on OST idx %d cmd %x: err = %d\n",
- lov_uuid2str(lov, i),
- i, cmd, err);
- if (!rc)
- rc = err;
- }
- } else {
- set = 1;
- }
- }
- if (!set && !rc)
- rc = -EIO;
- }
- }
-
- return rc;
-}
-
-static int lov_get_info(const struct lu_env *env, struct obd_export *exp,
- __u32 keylen, void *key, __u32 *vallen, void *val)
-{
- struct obd_device *obddev = class_exp2obd(exp);
- struct lov_obd *lov = &obddev->u.lov;
- struct lov_desc *ld = &lov->desc;
- int rc = 0;
-
- if (!vallen || !val)
- return -EFAULT;
-
- obd_getref(obddev);
-
- if (KEY_IS(KEY_MAX_EASIZE)) {
- u32 max_stripe_count = min_t(u32, ld->ld_active_tgt_count,
- LOV_MAX_STRIPE_COUNT);
-
- *((u32 *)val) = lov_mds_md_size(max_stripe_count, LOV_MAGIC_V3);
- } else if (KEY_IS(KEY_DEFAULT_EASIZE)) {
- u32 def_stripe_count = min_t(u32, ld->ld_default_stripe_count,
- LOV_MAX_STRIPE_COUNT);
-
- *((u32 *)val) = lov_mds_md_size(def_stripe_count, LOV_MAGIC_V3);
- } else if (KEY_IS(KEY_TGT_COUNT)) {
- *((int *)val) = lov->desc.ld_tgt_count;
- } else {
- rc = -EINVAL;
- }
-
- obd_putref(obddev);
- return rc;
-}
-
-static int lov_set_info_async(const struct lu_env *env, struct obd_export *exp,
- u32 keylen, void *key, u32 vallen,
- void *val, struct ptlrpc_request_set *set)
-{
- struct obd_device *obddev = class_exp2obd(exp);
- struct lov_obd *lov = &obddev->u.lov;
- u32 count;
- int i, rc = 0, err;
- struct lov_tgt_desc *tgt;
- int do_inactive = 0, no_set = 0;
-
- if (!set) {
- no_set = 1;
- set = ptlrpc_prep_set();
- if (!set)
- return -ENOMEM;
- }
-
- obd_getref(obddev);
- count = lov->desc.ld_tgt_count;
-
- if (KEY_IS(KEY_CHECKSUM)) {
- do_inactive = 1;
- } else if (KEY_IS(KEY_CACHE_SET)) {
- LASSERT(!lov->lov_cache);
- lov->lov_cache = val;
- do_inactive = 1;
- cl_cache_incref(lov->lov_cache);
- }
-
- for (i = 0; i < count; i++) {
- tgt = lov->lov_tgts[i];
-
- /* OST was disconnected */
- if (!tgt || !tgt->ltd_exp)
- continue;
-
- /* OST is inactive and we don't want inactive OSCs */
- if (!tgt->ltd_active && !do_inactive)
- continue;
-
- err = obd_set_info_async(env, tgt->ltd_exp, keylen, key,
- vallen, val, set);
- if (!rc)
- rc = err;
- }
-
- obd_putref(obddev);
- if (no_set) {
- err = ptlrpc_set_wait(set);
- if (!rc)
- rc = err;
- ptlrpc_set_destroy(set);
- }
- return rc;
-}
-
-void lov_stripe_lock(struct lov_stripe_md *md)
- __acquires(&md->lsm_lock)
-{
- LASSERT(md->lsm_lock_owner != current->pid);
- spin_lock(&md->lsm_lock);
- LASSERT(md->lsm_lock_owner == 0);
- md->lsm_lock_owner = current->pid;
-}
-
-void lov_stripe_unlock(struct lov_stripe_md *md)
- __releases(&md->lsm_lock)
-{
- LASSERT(md->lsm_lock_owner == current->pid);
- md->lsm_lock_owner = 0;
- spin_unlock(&md->lsm_lock);
-}
-
-static int lov_quotactl(struct obd_device *obd, struct obd_export *exp,
- struct obd_quotactl *oqctl)
-{
- struct lov_obd *lov = &obd->u.lov;
- struct lov_tgt_desc *tgt;
- __u64 curspace = 0;
- __u64 bhardlimit = 0;
- int i, rc = 0;
-
- if (oqctl->qc_cmd != Q_GETOQUOTA &&
- oqctl->qc_cmd != LUSTRE_Q_SETQUOTA) {
- CERROR("bad quota opc %x for lov obd\n", oqctl->qc_cmd);
- return -EFAULT;
- }
-
- /* for lov tgt */
- obd_getref(obd);
- for (i = 0; i < lov->desc.ld_tgt_count; i++) {
- int err;
-
- tgt = lov->lov_tgts[i];
-
- if (!tgt)
- continue;
-
- if (!tgt->ltd_active || tgt->ltd_reap) {
- if (oqctl->qc_cmd == Q_GETOQUOTA &&
- lov->lov_tgts[i]->ltd_activate) {
- rc = -EREMOTEIO;
- CERROR("ost %d is inactive\n", i);
- } else {
- CDEBUG(D_HA, "ost %d is inactive\n", i);
- }
- continue;
- }
-
- err = obd_quotactl(tgt->ltd_exp, oqctl);
- if (err) {
- if (tgt->ltd_active && !rc)
- rc = err;
- continue;
- }
-
- if (oqctl->qc_cmd == Q_GETOQUOTA) {
- curspace += oqctl->qc_dqblk.dqb_curspace;
- bhardlimit += oqctl->qc_dqblk.dqb_bhardlimit;
- }
- }
- obd_putref(obd);
-
- if (oqctl->qc_cmd == Q_GETOQUOTA) {
- oqctl->qc_dqblk.dqb_curspace = curspace;
- oqctl->qc_dqblk.dqb_bhardlimit = bhardlimit;
- }
- return rc;
-}
-
-static struct obd_ops lov_obd_ops = {
- .owner = THIS_MODULE,
- .setup = lov_setup,
- .cleanup = lov_cleanup,
- /*.process_config = lov_process_config,*/
- .connect = lov_connect,
- .disconnect = lov_disconnect,
- .statfs = lov_statfs,
- .statfs_async = lov_statfs_async,
- .iocontrol = lov_iocontrol,
- .get_info = lov_get_info,
- .set_info_async = lov_set_info_async,
- .notify = lov_notify,
- .pool_new = lov_pool_new,
- .pool_rem = lov_pool_remove,
- .pool_add = lov_pool_add,
- .pool_del = lov_pool_del,
- .getref = lov_getref,
- .putref = lov_putref,
- .quotactl = lov_quotactl,
-};
-
-struct kmem_cache *lov_oinfo_slab;
-
-static int __init lov_init(void)
-{
- struct lprocfs_static_vars lvars = { NULL };
- int rc;
-
- /* print an address of _any_ initialized kernel symbol from this
- * module, to allow debugging with gdb that doesn't support data
- * symbols from modules.
- */
- CDEBUG(D_INFO, "Lustre LOV module (%p).\n", &lov_caches);
-
- rc = libcfs_setup();
- if (rc)
- return rc;
-
- rc = lu_kmem_init(lov_caches);
- if (rc)
- return rc;
-
- lov_oinfo_slab = kmem_cache_create("lov_oinfo",
- sizeof(struct lov_oinfo),
- 0, SLAB_HWCACHE_ALIGN, NULL);
- if (!lov_oinfo_slab) {
- lu_kmem_fini(lov_caches);
- return -ENOMEM;
- }
- lprocfs_lov_init_vars(&lvars);
-
- rc = class_register_type(&lov_obd_ops, NULL,
- LUSTRE_LOV_NAME, &lov_device_type);
-
- if (rc) {
- kmem_cache_destroy(lov_oinfo_slab);
- lu_kmem_fini(lov_caches);
- }
-
- return rc;
-}
-
-static void /*__exit*/ lov_exit(void)
-{
- class_unregister_type(LUSTRE_LOV_NAME);
- kmem_cache_destroy(lov_oinfo_slab);
-
- lu_kmem_fini(lov_caches);
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre Logical Object Volume");
-MODULE_LICENSE("GPL");
-MODULE_VERSION(LUSTRE_VERSION_STRING);
-
-module_init(lov_init);
-module_exit(lov_exit);
diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c
deleted file mode 100644
index adc90f310fd7..000000000000
--- a/drivers/staging/lustre/lustre/lov/lov_object.c
+++ /dev/null
@@ -1,1625 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_object for LOV layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LOV
-
-#include "lov_cl_internal.h"
-
-static inline struct lov_device *lov_object_dev(struct lov_object *obj)
-{
- return lu2lov_dev(obj->lo_cl.co_lu.lo_dev);
-}
-
-/** \addtogroup lov
- * @{
- */
-
-/*****************************************************************************
- *
- * Layout operations.
- *
- */
-
-struct lov_layout_operations {
- int (*llo_init)(const struct lu_env *env, struct lov_device *dev,
- struct lov_object *lov, struct lov_stripe_md *lsm,
- const struct cl_object_conf *conf,
- union lov_layout_state *state);
- int (*llo_delete)(const struct lu_env *env, struct lov_object *lov,
- union lov_layout_state *state);
- void (*llo_fini)(const struct lu_env *env, struct lov_object *lov,
- union lov_layout_state *state);
- void (*llo_install)(const struct lu_env *env, struct lov_object *lov,
- union lov_layout_state *state);
- int (*llo_print)(const struct lu_env *env, void *cookie,
- lu_printer_t p, const struct lu_object *o);
- int (*llo_page_init)(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, pgoff_t index);
- int (*llo_lock_init)(const struct lu_env *env,
- struct cl_object *obj, struct cl_lock *lock,
- const struct cl_io *io);
- int (*llo_io_init)(const struct lu_env *env,
- struct cl_object *obj, struct cl_io *io);
- int (*llo_getattr)(const struct lu_env *env, struct cl_object *obj,
- struct cl_attr *attr);
-};
-
-static int lov_layout_wait(const struct lu_env *env, struct lov_object *lov);
-
-static void lov_lsm_put(struct lov_stripe_md *lsm)
-{
- if (lsm)
- lov_free_memmd(&lsm);
-}
-
-/*****************************************************************************
- *
- * Lov object layout operations.
- *
- */
-
-static void lov_install_empty(const struct lu_env *env,
- struct lov_object *lov,
- union lov_layout_state *state)
-{
- /*
- * File without objects.
- */
-}
-
-static int lov_init_empty(const struct lu_env *env, struct lov_device *dev,
- struct lov_object *lov, struct lov_stripe_md *lsm,
- const struct cl_object_conf *conf,
- union lov_layout_state *state)
-{
- return 0;
-}
-
-static void lov_install_raid0(const struct lu_env *env,
- struct lov_object *lov,
- union lov_layout_state *state)
-{
-}
-
-static struct cl_object *lov_sub_find(const struct lu_env *env,
- struct cl_device *dev,
- const struct lu_fid *fid,
- const struct cl_object_conf *conf)
-{
- struct lu_object *o;
-
- o = lu_object_find_at(env, cl2lu_dev(dev), fid, &conf->coc_lu);
- LASSERT(ergo(!IS_ERR(o), o->lo_dev->ld_type == &lovsub_device_type));
- return lu2cl(o);
-}
-
-static int lov_init_sub(const struct lu_env *env, struct lov_object *lov,
- struct cl_object *stripe, struct lov_layout_raid0 *r0,
- int idx)
-{
- struct cl_object_header *hdr;
- struct cl_object_header *subhdr;
- struct cl_object_header *parent;
- struct lov_oinfo *oinfo;
- int result;
-
- if (OBD_FAIL_CHECK(OBD_FAIL_LOV_INIT)) {
- /* For sanity:test_206.
- * Do not leave the object in cache to avoid accessing
- * freed memory. This is because osc_object is referring to
- * lov_oinfo of lsm_stripe_data which will be freed due to
- * this failure.
- */
- cl_object_kill(env, stripe);
- cl_object_put(env, stripe);
- return -EIO;
- }
-
- hdr = cl_object_header(lov2cl(lov));
- subhdr = cl_object_header(stripe);
-
- oinfo = lov->lo_lsm->lsm_oinfo[idx];
- CDEBUG(D_INODE, DFID "@%p[%d] -> " DFID "@%p: ostid: " DOSTID " idx: %d gen: %d\n",
- PFID(&subhdr->coh_lu.loh_fid), subhdr, idx,
- PFID(&hdr->coh_lu.loh_fid), hdr, POSTID(&oinfo->loi_oi),
- oinfo->loi_ost_idx, oinfo->loi_ost_gen);
-
- /* reuse ->coh_attr_guard to protect coh_parent change */
- spin_lock(&subhdr->coh_attr_guard);
- parent = subhdr->coh_parent;
- if (!parent) {
- subhdr->coh_parent = hdr;
- spin_unlock(&subhdr->coh_attr_guard);
- subhdr->coh_nesting = hdr->coh_nesting + 1;
- lu_object_ref_add(&stripe->co_lu, "lov-parent", lov);
- r0->lo_sub[idx] = cl2lovsub(stripe);
- r0->lo_sub[idx]->lso_super = lov;
- r0->lo_sub[idx]->lso_index = idx;
- result = 0;
- } else {
- struct lu_object *old_obj;
- struct lov_object *old_lov;
- unsigned int mask = D_INODE;
-
- spin_unlock(&subhdr->coh_attr_guard);
- old_obj = lu_object_locate(&parent->coh_lu, &lov_device_type);
- LASSERT(old_obj);
- old_lov = cl2lov(lu2cl(old_obj));
- if (old_lov->lo_layout_invalid) {
- /* the object's layout has already changed but isn't
- * refreshed
- */
- lu_object_unhash(env, &stripe->co_lu);
- result = -EAGAIN;
- } else {
- mask = D_ERROR;
- result = -EIO;
- }
-
- LU_OBJECT_DEBUG(mask, env, &stripe->co_lu,
- "stripe %d is already owned.", idx);
- LU_OBJECT_DEBUG(mask, env, old_obj, "owned.");
- LU_OBJECT_HEADER(mask, env, lov2lu(lov), "try to own.\n");
- cl_object_put(env, stripe);
- }
- return result;
-}
-
-static int lov_page_slice_fixup(struct lov_object *lov,
- struct cl_object *stripe)
-{
- struct cl_object_header *hdr = cl_object_header(&lov->lo_cl);
- struct cl_object *o;
-
- if (!stripe)
- return hdr->coh_page_bufsize - lov->lo_cl.co_slice_off -
- cfs_size_round(sizeof(struct lov_page));
-
- cl_object_for_each(o, stripe)
- o->co_slice_off += hdr->coh_page_bufsize;
-
- return cl_object_header(stripe)->coh_page_bufsize;
-}
-
-static int lov_init_raid0(const struct lu_env *env, struct lov_device *dev,
- struct lov_object *lov, struct lov_stripe_md *lsm,
- const struct cl_object_conf *conf,
- union lov_layout_state *state)
-{
- int result;
- int i;
-
- struct cl_object *stripe;
- struct lov_thread_info *lti = lov_env_info(env);
- struct cl_object_conf *subconf = &lti->lti_stripe_conf;
- struct lu_fid *ofid = &lti->lti_fid;
- struct lov_layout_raid0 *r0 = &state->raid0;
-
- if (lsm->lsm_magic != LOV_MAGIC_V1 && lsm->lsm_magic != LOV_MAGIC_V3) {
- dump_lsm(D_ERROR, lsm);
- LASSERTF(0, "magic mismatch, expected %d/%d, actual %d.\n",
- LOV_MAGIC_V1, LOV_MAGIC_V3, lsm->lsm_magic);
- }
-
- LASSERT(!lov->lo_lsm);
- lov->lo_lsm = lsm_addref(lsm);
- lov->lo_layout_invalid = true;
- r0->lo_nr = lsm->lsm_stripe_count;
- LASSERT(r0->lo_nr <= lov_targets_nr(dev));
-
- r0->lo_sub = kvzalloc(r0->lo_nr * sizeof(r0->lo_sub[0]),
- GFP_NOFS);
- if (r0->lo_sub) {
- int psz = 0;
-
- result = 0;
- subconf->coc_inode = conf->coc_inode;
- spin_lock_init(&r0->lo_sub_lock);
- /*
- * Create stripe cl_objects.
- */
- for (i = 0; i < r0->lo_nr && result == 0; ++i) {
- struct cl_device *subdev;
- struct lov_oinfo *oinfo = lsm->lsm_oinfo[i];
- int ost_idx = oinfo->loi_ost_idx;
-
- if (lov_oinfo_is_dummy(oinfo))
- continue;
-
- result = ostid_to_fid(ofid, &oinfo->loi_oi,
- oinfo->loi_ost_idx);
- if (result != 0)
- goto out;
-
- if (!dev->ld_target[ost_idx]) {
- CERROR("%s: OST %04x is not initialized\n",
- lov2obd(dev->ld_lov)->obd_name, ost_idx);
- result = -EIO;
- goto out;
- }
-
- subdev = lovsub2cl_dev(dev->ld_target[ost_idx]);
- subconf->u.coc_oinfo = oinfo;
- LASSERTF(subdev, "not init ost %d\n", ost_idx);
- /* In the function below, .hs_keycmp resolves to
- * lu_obj_hop_keycmp()
- */
- /* coverity[overrun-buffer-val] */
- stripe = lov_sub_find(env, subdev, ofid, subconf);
- if (!IS_ERR(stripe)) {
- result = lov_init_sub(env, lov, stripe, r0, i);
- if (result == -EAGAIN) { /* try again */
- --i;
- result = 0;
- continue;
- }
- } else {
- result = PTR_ERR(stripe);
- }
-
- if (result == 0) {
- int sz = lov_page_slice_fixup(lov, stripe);
-
- LASSERT(ergo(psz > 0, psz == sz));
- psz = sz;
- }
- }
- if (result == 0)
- cl_object_header(&lov->lo_cl)->coh_page_bufsize += psz;
- } else {
- result = -ENOMEM;
- }
-out:
- return result;
-}
-
-static int lov_init_released(const struct lu_env *env, struct lov_device *dev,
- struct lov_object *lov, struct lov_stripe_md *lsm,
- const struct cl_object_conf *conf,
- union lov_layout_state *state)
-{
- LASSERT(lsm);
- LASSERT(lsm_is_released(lsm));
- LASSERT(!lov->lo_lsm);
-
- lov->lo_lsm = lsm_addref(lsm);
- return 0;
-}
-
-static struct cl_object *lov_find_subobj(const struct lu_env *env,
- struct lov_object *lov,
- struct lov_stripe_md *lsm,
- int stripe_idx)
-{
- struct lov_device *dev = lu2lov_dev(lov2lu(lov)->lo_dev);
- struct lov_oinfo *oinfo = lsm->lsm_oinfo[stripe_idx];
- struct lov_thread_info *lti = lov_env_info(env);
- struct lu_fid *ofid = &lti->lti_fid;
- struct cl_device *subdev;
- struct cl_object *result;
- int ost_idx;
- int rc;
-
- if (lov->lo_type != LLT_RAID0) {
- result = NULL;
- goto out;
- }
-
- ost_idx = oinfo->loi_ost_idx;
- rc = ostid_to_fid(ofid, &oinfo->loi_oi, ost_idx);
- if (rc) {
- result = NULL;
- goto out;
- }
-
- subdev = lovsub2cl_dev(dev->ld_target[ost_idx]);
- result = lov_sub_find(env, subdev, ofid, NULL);
-out:
- if (!result)
- result = ERR_PTR(-EINVAL);
- return result;
-}
-
-static int lov_delete_empty(const struct lu_env *env, struct lov_object *lov,
- union lov_layout_state *state)
-{
- LASSERT(lov->lo_type == LLT_EMPTY || lov->lo_type == LLT_RELEASED);
-
- lov_layout_wait(env, lov);
- return 0;
-}
-
-static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
- struct lovsub_object *los, int idx)
-{
- struct cl_object *sub;
- struct lov_layout_raid0 *r0;
- struct lu_site *site;
- wait_queue_head_t *wq;
- wait_queue_entry_t *waiter;
-
- r0 = &lov->u.raid0;
- LASSERT(r0->lo_sub[idx] == los);
-
- sub = lovsub2cl(los);
- site = sub->co_lu.lo_dev->ld_site;
- wq = lu_site_wq_from_fid(site, &sub->co_lu.lo_header->loh_fid);
-
- cl_object_kill(env, sub);
- /* release a reference to the sub-object and ... */
- lu_object_ref_del(&sub->co_lu, "lov-parent", lov);
- cl_object_put(env, sub);
-
- /* ... wait until it is actually destroyed---sub-object clears its
- * ->lo_sub[] slot in lovsub_object_fini()
- */
- if (r0->lo_sub[idx] == los) {
- waiter = &lov_env_info(env)->lti_waiter;
- init_waitqueue_entry(waiter, current);
- add_wait_queue(wq, waiter);
- set_current_state(TASK_UNINTERRUPTIBLE);
- while (1) {
- /* this wait-queue is signaled at the end of
- * lu_object_free().
- */
- set_current_state(TASK_UNINTERRUPTIBLE);
- spin_lock(&r0->lo_sub_lock);
- if (r0->lo_sub[idx] == los) {
- spin_unlock(&r0->lo_sub_lock);
- schedule();
- } else {
- spin_unlock(&r0->lo_sub_lock);
- set_current_state(TASK_RUNNING);
- break;
- }
- }
- remove_wait_queue(wq, waiter);
- }
- LASSERT(!r0->lo_sub[idx]);
-}
-
-static int lov_delete_raid0(const struct lu_env *env, struct lov_object *lov,
- union lov_layout_state *state)
-{
- struct lov_layout_raid0 *r0 = &state->raid0;
- struct lov_stripe_md *lsm = lov->lo_lsm;
- int i;
-
- dump_lsm(D_INODE, lsm);
-
- lov_layout_wait(env, lov);
- if (r0->lo_sub) {
- for (i = 0; i < r0->lo_nr; ++i) {
- struct lovsub_object *los = r0->lo_sub[i];
-
- if (los) {
- cl_object_prune(env, &los->lso_cl);
- /*
- * If top-level object is to be evicted from
- * the cache, so are its sub-objects.
- */
- lov_subobject_kill(env, lov, los, i);
- }
- }
- }
- return 0;
-}
-
-static void lov_fini_empty(const struct lu_env *env, struct lov_object *lov,
- union lov_layout_state *state)
-{
- LASSERT(lov->lo_type == LLT_EMPTY || lov->lo_type == LLT_RELEASED);
-}
-
-static void lov_fini_raid0(const struct lu_env *env, struct lov_object *lov,
- union lov_layout_state *state)
-{
- struct lov_layout_raid0 *r0 = &state->raid0;
-
- if (r0->lo_sub) {
- kvfree(r0->lo_sub);
- r0->lo_sub = NULL;
- }
-
- dump_lsm(D_INODE, lov->lo_lsm);
- lov_free_memmd(&lov->lo_lsm);
-}
-
-static void lov_fini_released(const struct lu_env *env, struct lov_object *lov,
- union lov_layout_state *state)
-{
- dump_lsm(D_INODE, lov->lo_lsm);
- lov_free_memmd(&lov->lo_lsm);
-}
-
-static int lov_print_empty(const struct lu_env *env, void *cookie,
- lu_printer_t p, const struct lu_object *o)
-{
- (*p)(env, cookie, "empty %d\n", lu2lov(o)->lo_layout_invalid);
- return 0;
-}
-
-static int lov_print_raid0(const struct lu_env *env, void *cookie,
- lu_printer_t p, const struct lu_object *o)
-{
- struct lov_object *lov = lu2lov(o);
- struct lov_layout_raid0 *r0 = lov_r0(lov);
- struct lov_stripe_md *lsm = lov->lo_lsm;
- int i;
-
- (*p)(env, cookie, "stripes: %d, %s, lsm{%p 0x%08X %d %u %u}:\n",
- r0->lo_nr, lov->lo_layout_invalid ? "invalid" : "valid", lsm,
- lsm->lsm_magic, atomic_read(&lsm->lsm_refc),
- lsm->lsm_stripe_count, lsm->lsm_layout_gen);
- for (i = 0; i < r0->lo_nr; ++i) {
- struct lu_object *sub;
-
- if (r0->lo_sub[i]) {
- sub = lovsub2lu(r0->lo_sub[i]);
- lu_object_print(env, cookie, p, sub);
- } else {
- (*p)(env, cookie, "sub %d absent\n", i);
- }
- }
- return 0;
-}
-
-static int lov_print_released(const struct lu_env *env, void *cookie,
- lu_printer_t p, const struct lu_object *o)
-{
- struct lov_object *lov = lu2lov(o);
- struct lov_stripe_md *lsm = lov->lo_lsm;
-
- (*p)(env, cookie,
- "released: %s, lsm{%p 0x%08X %d %u %u}:\n",
- lov->lo_layout_invalid ? "invalid" : "valid", lsm,
- lsm->lsm_magic, atomic_read(&lsm->lsm_refc),
- lsm->lsm_stripe_count, lsm->lsm_layout_gen);
- return 0;
-}
-
-/**
- * Implements cl_object_operations::coo_attr_get() method for an object
- * without stripes (LLT_EMPTY layout type).
- *
- * The only attributes this layer is authoritative in this case is
- * cl_attr::cat_blocks---it's 0.
- */
-static int lov_attr_get_empty(const struct lu_env *env, struct cl_object *obj,
- struct cl_attr *attr)
-{
- attr->cat_blocks = 0;
- return 0;
-}
-
-static int lov_attr_get_raid0(const struct lu_env *env, struct cl_object *obj,
- struct cl_attr *attr)
-{
- struct lov_object *lov = cl2lov(obj);
- struct lov_layout_raid0 *r0 = lov_r0(lov);
- struct cl_attr *lov_attr = &r0->lo_attr;
- int result = 0;
-
- /* this is called w/o holding type guard mutex, so it must be inside
- * an on going IO otherwise lsm may be replaced.
- * LU-2117: it turns out there exists one exception. For mmaped files,
- * the lock of those files may be requested in the other file's IO
- * context, and this function is called in ccc_lock_state(), it will
- * hit this assertion.
- * Anyway, it's still okay to call attr_get w/o type guard as layout
- * can't go if locks exist.
- */
- /* LASSERT(atomic_read(&lsm->lsm_refc) > 1); */
-
- if (!r0->lo_attr_valid) {
- struct lov_stripe_md *lsm = lov->lo_lsm;
- struct ost_lvb *lvb = &lov_env_info(env)->lti_lvb;
- __u64 kms = 0;
-
- memset(lvb, 0, sizeof(*lvb));
- /* XXX: timestamps can be negative by sanity:test_39m,
- * how can it be?
- */
- lvb->lvb_atime = LLONG_MIN;
- lvb->lvb_ctime = LLONG_MIN;
- lvb->lvb_mtime = LLONG_MIN;
-
- /*
- * XXX that should be replaced with a loop over sub-objects,
- * doing cl_object_attr_get() on them. But for now, let's
- * reuse old lov code.
- */
-
- /*
- * XXX take lsm spin-lock to keep lov_merge_lvb_kms()
- * happy. It's not needed, because new code uses
- * ->coh_attr_guard spin-lock to protect consistency of
- * sub-object attributes.
- */
- lov_stripe_lock(lsm);
- result = lov_merge_lvb_kms(lsm, lvb, &kms);
- lov_stripe_unlock(lsm);
- if (result == 0) {
- cl_lvb2attr(lov_attr, lvb);
- lov_attr->cat_kms = kms;
- r0->lo_attr_valid = 1;
- }
- }
- if (result == 0) { /* merge results */
- attr->cat_blocks = lov_attr->cat_blocks;
- attr->cat_size = lov_attr->cat_size;
- attr->cat_kms = lov_attr->cat_kms;
- if (attr->cat_atime < lov_attr->cat_atime)
- attr->cat_atime = lov_attr->cat_atime;
- if (attr->cat_ctime < lov_attr->cat_ctime)
- attr->cat_ctime = lov_attr->cat_ctime;
- if (attr->cat_mtime < lov_attr->cat_mtime)
- attr->cat_mtime = lov_attr->cat_mtime;
- }
- return result;
-}
-
-static const struct lov_layout_operations lov_dispatch[] = {
- [LLT_EMPTY] = {
- .llo_init = lov_init_empty,
- .llo_delete = lov_delete_empty,
- .llo_fini = lov_fini_empty,
- .llo_install = lov_install_empty,
- .llo_print = lov_print_empty,
- .llo_page_init = lov_page_init_empty,
- .llo_lock_init = lov_lock_init_empty,
- .llo_io_init = lov_io_init_empty,
- .llo_getattr = lov_attr_get_empty
- },
- [LLT_RAID0] = {
- .llo_init = lov_init_raid0,
- .llo_delete = lov_delete_raid0,
- .llo_fini = lov_fini_raid0,
- .llo_install = lov_install_raid0,
- .llo_print = lov_print_raid0,
- .llo_page_init = lov_page_init_raid0,
- .llo_lock_init = lov_lock_init_raid0,
- .llo_io_init = lov_io_init_raid0,
- .llo_getattr = lov_attr_get_raid0
- },
- [LLT_RELEASED] = {
- .llo_init = lov_init_released,
- .llo_delete = lov_delete_empty,
- .llo_fini = lov_fini_released,
- .llo_install = lov_install_empty,
- .llo_print = lov_print_released,
- .llo_page_init = lov_page_init_empty,
- .llo_lock_init = lov_lock_init_empty,
- .llo_io_init = lov_io_init_released,
- .llo_getattr = lov_attr_get_empty
- }
-};
-
-/**
- * Performs a double-dispatch based on the layout type of an object.
- */
-#define LOV_2DISPATCH_NOLOCK(obj, op, ...) \
-({ \
- struct lov_object *__obj = (obj); \
- enum lov_layout_type __llt; \
- \
- __llt = __obj->lo_type; \
- LASSERT(__llt < ARRAY_SIZE(lov_dispatch)); \
- lov_dispatch[__llt].op(__VA_ARGS__); \
-})
-
-/**
- * Return lov_layout_type associated with a given lsm
- */
-static enum lov_layout_type lov_type(struct lov_stripe_md *lsm)
-{
- if (!lsm)
- return LLT_EMPTY;
- if (lsm_is_released(lsm))
- return LLT_RELEASED;
- return LLT_RAID0;
-}
-
-static inline void lov_conf_freeze(struct lov_object *lov)
-{
- CDEBUG(D_INODE, "To take share lov(%p) owner %p/%p\n",
- lov, lov->lo_owner, current);
- if (lov->lo_owner != current)
- down_read(&lov->lo_type_guard);
-}
-
-static inline void lov_conf_thaw(struct lov_object *lov)
-{
- CDEBUG(D_INODE, "To release share lov(%p) owner %p/%p\n",
- lov, lov->lo_owner, current);
- if (lov->lo_owner != current)
- up_read(&lov->lo_type_guard);
-}
-
-#define LOV_2DISPATCH_MAYLOCK(obj, op, lock, ...) \
-({ \
- struct lov_object *__obj = (obj); \
- int __lock = !!(lock); \
- typeof(lov_dispatch[0].op(__VA_ARGS__)) __result; \
- \
- if (__lock) \
- lov_conf_freeze(__obj); \
- __result = LOV_2DISPATCH_NOLOCK(obj, op, __VA_ARGS__); \
- if (__lock) \
- lov_conf_thaw(__obj); \
- __result; \
-})
-
-/**
- * Performs a locked double-dispatch based on the layout type of an object.
- */
-#define LOV_2DISPATCH(obj, op, ...) \
- LOV_2DISPATCH_MAYLOCK(obj, op, 1, __VA_ARGS__)
-
-#define LOV_2DISPATCH_VOID(obj, op, ...) \
-do { \
- struct lov_object *__obj = (obj); \
- enum lov_layout_type __llt; \
- \
- lov_conf_freeze(__obj); \
- __llt = __obj->lo_type; \
- LASSERT(__llt < ARRAY_SIZE(lov_dispatch)); \
- lov_dispatch[__llt].op(__VA_ARGS__); \
- lov_conf_thaw(__obj); \
-} while (0)
-
-static void lov_conf_lock(struct lov_object *lov)
-{
- LASSERT(lov->lo_owner != current);
- down_write(&lov->lo_type_guard);
- LASSERT(!lov->lo_owner);
- lov->lo_owner = current;
- CDEBUG(D_INODE, "Took exclusive lov(%p) owner %p\n",
- lov, lov->lo_owner);
-}
-
-static void lov_conf_unlock(struct lov_object *lov)
-{
- CDEBUG(D_INODE, "To release exclusive lov(%p) owner %p\n",
- lov, lov->lo_owner);
- lov->lo_owner = NULL;
- up_write(&lov->lo_type_guard);
-}
-
-static int lov_layout_wait(const struct lu_env *env, struct lov_object *lov)
-{
- while (atomic_read(&lov->lo_active_ios) > 0) {
- CDEBUG(D_INODE, "file:" DFID " wait for active IO, now: %d.\n",
- PFID(lu_object_fid(lov2lu(lov))),
- atomic_read(&lov->lo_active_ios));
-
- wait_event_idle(lov->lo_waitq,
- atomic_read(&lov->lo_active_ios) == 0);
- }
- return 0;
-}
-
-static int lov_layout_change(const struct lu_env *unused,
- struct lov_object *lov, struct lov_stripe_md *lsm,
- const struct cl_object_conf *conf)
-{
- struct lov_device *lov_dev = lov_object_dev(lov);
- enum lov_layout_type llt = lov_type(lsm);
- union lov_layout_state *state = &lov->u;
- const struct lov_layout_operations *old_ops;
- const struct lov_layout_operations *new_ops;
- struct lu_env *env;
- u16 refcheck;
- int rc;
-
- LASSERT(lov->lo_type < ARRAY_SIZE(lov_dispatch));
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- LASSERT(llt < ARRAY_SIZE(lov_dispatch));
-
- CDEBUG(D_INODE, DFID " from %s to %s\n",
- PFID(lu_object_fid(lov2lu(lov))),
- llt2str(lov->lo_type), llt2str(llt));
-
- old_ops = &lov_dispatch[lov->lo_type];
- new_ops = &lov_dispatch[llt];
-
- rc = cl_object_prune(env, &lov->lo_cl);
- if (rc)
- goto out;
-
- rc = old_ops->llo_delete(env, lov, &lov->u);
- if (rc)
- goto out;
-
- old_ops->llo_fini(env, lov, &lov->u);
-
- LASSERT(!atomic_read(&lov->lo_active_ios));
-
- CDEBUG(D_INODE, DFID "Apply new layout lov %p, type %d\n",
- PFID(lu_object_fid(lov2lu(lov))), lov, llt);
-
- lov->lo_type = LLT_EMPTY;
-
- /* page bufsize fixup */
- cl_object_header(&lov->lo_cl)->coh_page_bufsize -=
- lov_page_slice_fixup(lov, NULL);
-
- rc = new_ops->llo_init(env, lov_dev, lov, lsm, conf, state);
- if (rc) {
- struct obd_device *obd = lov2obd(lov_dev->ld_lov);
-
- CERROR("%s: cannot apply new layout on " DFID " : rc = %d\n",
- obd->obd_name, PFID(lu_object_fid(lov2lu(lov))), rc);
- new_ops->llo_delete(env, lov, state);
- new_ops->llo_fini(env, lov, state);
- /* this file becomes an EMPTY file. */
- goto out;
- }
-
- new_ops->llo_install(env, lov, state);
- lov->lo_type = llt;
-out:
- cl_env_put(env, &refcheck);
- return rc;
-}
-
-/*****************************************************************************
- *
- * Lov object operations.
- *
- */
-int lov_object_init(const struct lu_env *env, struct lu_object *obj,
- const struct lu_object_conf *conf)
-{
- struct lov_object *lov = lu2lov(obj);
- struct lov_device *dev = lov_object_dev(lov);
- const struct cl_object_conf *cconf = lu2cl_conf(conf);
- union lov_layout_state *set = &lov->u;
- const struct lov_layout_operations *ops;
- struct lov_stripe_md *lsm = NULL;
- int rc;
-
- init_rwsem(&lov->lo_type_guard);
- atomic_set(&lov->lo_active_ios, 0);
- init_waitqueue_head(&lov->lo_waitq);
- cl_object_page_init(lu2cl(obj), sizeof(struct lov_page));
-
- lov->lo_type = LLT_EMPTY;
- if (cconf->u.coc_layout.lb_buf) {
- lsm = lov_unpackmd(dev->ld_lov,
- cconf->u.coc_layout.lb_buf,
- cconf->u.coc_layout.lb_len);
- if (IS_ERR(lsm))
- return PTR_ERR(lsm);
- }
-
- /* no locking is necessary, as object is being created */
- lov->lo_type = lov_type(lsm);
- ops = &lov_dispatch[lov->lo_type];
- rc = ops->llo_init(env, dev, lov, lsm, cconf, set);
- if (!rc)
- ops->llo_install(env, lov, set);
-
- lov_lsm_put(lsm);
-
- return rc;
-}
-
-static int lov_conf_set(const struct lu_env *env, struct cl_object *obj,
- const struct cl_object_conf *conf)
-{
- struct lov_stripe_md *lsm = NULL;
- struct lov_object *lov = cl2lov(obj);
- int result = 0;
-
- if (conf->coc_opc == OBJECT_CONF_SET &&
- conf->u.coc_layout.lb_buf) {
- lsm = lov_unpackmd(lov_object_dev(lov)->ld_lov,
- conf->u.coc_layout.lb_buf,
- conf->u.coc_layout.lb_len);
- if (IS_ERR(lsm))
- return PTR_ERR(lsm);
- }
-
- lov_conf_lock(lov);
- if (conf->coc_opc == OBJECT_CONF_INVALIDATE) {
- lov->lo_layout_invalid = true;
- result = 0;
- goto out;
- }
-
- if (conf->coc_opc == OBJECT_CONF_WAIT) {
- if (lov->lo_layout_invalid &&
- atomic_read(&lov->lo_active_ios) > 0) {
- lov_conf_unlock(lov);
- result = lov_layout_wait(env, lov);
- lov_conf_lock(lov);
- }
- goto out;
- }
-
- LASSERT(conf->coc_opc == OBJECT_CONF_SET);
-
- if ((!lsm && !lov->lo_lsm) ||
- ((lsm && lov->lo_lsm) &&
- (lov->lo_lsm->lsm_layout_gen == lsm->lsm_layout_gen) &&
- (lov->lo_lsm->lsm_pattern == lsm->lsm_pattern))) {
- /* same version of layout */
- lov->lo_layout_invalid = false;
- result = 0;
- goto out;
- }
-
- /* will change layout - check if there still exists active IO. */
- if (atomic_read(&lov->lo_active_ios) > 0) {
- lov->lo_layout_invalid = true;
- result = -EBUSY;
- goto out;
- }
-
- result = lov_layout_change(env, lov, lsm, conf);
- lov->lo_layout_invalid = result != 0;
-
-out:
- lov_conf_unlock(lov);
- lov_lsm_put(lsm);
- CDEBUG(D_INODE, DFID " lo_layout_invalid=%d\n",
- PFID(lu_object_fid(lov2lu(lov))), lov->lo_layout_invalid);
- return result;
-}
-
-static void lov_object_delete(const struct lu_env *env, struct lu_object *obj)
-{
- struct lov_object *lov = lu2lov(obj);
-
- LOV_2DISPATCH_VOID(lov, llo_delete, env, lov, &lov->u);
-}
-
-static void lov_object_free(const struct lu_env *env, struct lu_object *obj)
-{
- struct lov_object *lov = lu2lov(obj);
-
- LOV_2DISPATCH_VOID(lov, llo_fini, env, lov, &lov->u);
- lu_object_fini(obj);
- kmem_cache_free(lov_object_kmem, lov);
-}
-
-static int lov_object_print(const struct lu_env *env, void *cookie,
- lu_printer_t p, const struct lu_object *o)
-{
- return LOV_2DISPATCH_NOLOCK(lu2lov(o), llo_print, env, cookie, p, o);
-}
-
-int lov_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, pgoff_t index)
-{
- return LOV_2DISPATCH_NOLOCK(cl2lov(obj), llo_page_init, env, obj, page,
- index);
-}
-
-/**
- * Implements cl_object_operations::clo_io_init() method for lov
- * layer. Dispatches to the appropriate layout io initialization method.
- */
-int lov_io_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io)
-{
- CL_IO_SLICE_CLEAN(lov_env_io(env), lis_cl);
-
- CDEBUG(D_INODE, DFID "io %p type %d ignore/verify layout %d/%d\n",
- PFID(lu_object_fid(&obj->co_lu)), io, io->ci_type,
- io->ci_ignore_layout, io->ci_verify_layout);
-
- return LOV_2DISPATCH_MAYLOCK(cl2lov(obj), llo_io_init,
- !io->ci_ignore_layout, env, obj, io);
-}
-
-/**
- * An implementation of cl_object_operations::clo_attr_get() method for lov
- * layer. For raid0 layout this collects and merges attributes of all
- * sub-objects.
- */
-static int lov_attr_get(const struct lu_env *env, struct cl_object *obj,
- struct cl_attr *attr)
-{
- /* do not take lock, as this function is called under a
- * spin-lock. Layout is protected from changing by ongoing IO.
- */
- return LOV_2DISPATCH_NOLOCK(cl2lov(obj), llo_getattr, env, obj, attr);
-}
-
-static int lov_attr_update(const struct lu_env *env, struct cl_object *obj,
- const struct cl_attr *attr, unsigned int valid)
-{
- /*
- * No dispatch is required here, as no layout implements this.
- */
- return 0;
-}
-
-int lov_lock_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_lock *lock, const struct cl_io *io)
-{
- /* No need to lock because we've taken one refcount of layout. */
- return LOV_2DISPATCH_NOLOCK(cl2lov(obj), llo_lock_init, env, obj, lock,
- io);
-}
-
-/**
- * We calculate on which OST the mapping will end. If the length of mapping
- * is greater than (stripe_size * stripe_count) then the last_stripe will
- * will be one just before start_stripe. Else we check if the mapping
- * intersects each OST and find last_stripe.
- * This function returns the last_stripe and also sets the stripe_count
- * over which the mapping is spread
- *
- * \param lsm [in] striping information for the file
- * \param fm_start [in] logical start of mapping
- * \param fm_end [in] logical end of mapping
- * \param start_stripe [in] starting stripe of the mapping
- * \param stripe_count [out] the number of stripes across which to map is
- * returned
- *
- * \retval last_stripe return the last stripe of the mapping
- */
-static int fiemap_calc_last_stripe(struct lov_stripe_md *lsm,
- u64 fm_start, u64 fm_end,
- int start_stripe, int *stripe_count)
-{
- int last_stripe;
- u64 obd_start;
- u64 obd_end;
- int i, j;
-
- if (fm_end - fm_start > lsm->lsm_stripe_size * lsm->lsm_stripe_count) {
- last_stripe = (start_stripe < 1 ? lsm->lsm_stripe_count - 1 :
- start_stripe - 1);
- *stripe_count = lsm->lsm_stripe_count;
- } else {
- for (j = 0, i = start_stripe; j < lsm->lsm_stripe_count;
- i = (i + 1) % lsm->lsm_stripe_count, j++) {
- if (!(lov_stripe_intersects(lsm, i, fm_start, fm_end,
- &obd_start, &obd_end)))
- break;
- }
- *stripe_count = j;
- last_stripe = (start_stripe + j - 1) % lsm->lsm_stripe_count;
- }
-
- return last_stripe;
-}
-
-/**
- * Set fe_device and copy extents from local buffer into main return buffer.
- *
- * \param fiemap [out] fiemap to hold all extents
- * \param lcl_fm_ext [in] array of fiemap extents get from OSC layer
- * \param ost_index [in] OST index to be written into the fm_device
- * field for each extent
- * \param ext_count [in] number of extents to be copied
- * \param current_extent [in] where to start copying in the extent array
- */
-static void fiemap_prepare_and_copy_exts(struct fiemap *fiemap,
- struct fiemap_extent *lcl_fm_ext,
- int ost_index, unsigned int ext_count,
- int current_extent)
-{
- unsigned int ext;
- char *to;
-
- for (ext = 0; ext < ext_count; ext++) {
- lcl_fm_ext[ext].fe_device = ost_index;
- lcl_fm_ext[ext].fe_flags |= FIEMAP_EXTENT_NET;
- }
-
- /* Copy fm_extent's from fm_local to return buffer */
- to = (char *)fiemap + fiemap_count_to_size(current_extent);
- memcpy(to, lcl_fm_ext, ext_count * sizeof(struct fiemap_extent));
-}
-
-#define FIEMAP_BUFFER_SIZE 4096
-
-/**
- * Non-zero fe_logical indicates that this is a continuation FIEMAP
- * call. The local end offset and the device are sent in the first
- * fm_extent. This function calculates the stripe number from the index.
- * This function returns a stripe_no on which mapping is to be restarted.
- *
- * This function returns fm_end_offset which is the in-OST offset at which
- * mapping should be restarted. If fm_end_offset=0 is returned then caller
- * will re-calculate proper offset in next stripe.
- * Note that the first extent is passed to lov_get_info via the value field.
- *
- * \param fiemap [in] fiemap request header
- * \param lsm [in] striping information for the file
- * \param fm_start [in] logical start of mapping
- * \param fm_end [in] logical end of mapping
- * \param start_stripe [out] starting stripe will be returned in this
- */
-static u64 fiemap_calc_fm_end_offset(struct fiemap *fiemap,
- struct lov_stripe_md *lsm,
- u64 fm_start, u64 fm_end,
- int *start_stripe)
-{
- u64 local_end = fiemap->fm_extents[0].fe_logical;
- u64 lun_start, lun_end;
- u64 fm_end_offset;
- int stripe_no = -1;
- int i;
-
- if (!fiemap->fm_extent_count || !fiemap->fm_extents[0].fe_logical)
- return 0;
-
- /* Find out stripe_no from ost_index saved in the fe_device */
- for (i = 0; i < lsm->lsm_stripe_count; i++) {
- struct lov_oinfo *oinfo = lsm->lsm_oinfo[i];
-
- if (lov_oinfo_is_dummy(oinfo))
- continue;
-
- if (oinfo->loi_ost_idx == fiemap->fm_extents[0].fe_device) {
- stripe_no = i;
- break;
- }
- }
-
- if (stripe_no == -1)
- return -EINVAL;
-
- /*
- * If we have finished mapping on previous device, shift logical
- * offset to start of next device
- */
- if (lov_stripe_intersects(lsm, stripe_no, fm_start, fm_end,
- &lun_start, &lun_end) &&
- local_end < lun_end) {
- fm_end_offset = local_end;
- *start_stripe = stripe_no;
- } else {
- /* This is a special value to indicate that caller should
- * calculate offset in next stripe.
- */
- fm_end_offset = 0;
- *start_stripe = (stripe_no + 1) % lsm->lsm_stripe_count;
- }
-
- return fm_end_offset;
-}
-
-struct fiemap_state {
- struct fiemap *fs_fm;
- u64 fs_start;
- u64 fs_length;
- u64 fs_end;
- u64 fs_end_offset;
- int fs_cur_extent;
- int fs_cnt_need;
- int fs_start_stripe;
- int fs_last_stripe;
- bool fs_device_done;
- bool fs_finish;
- bool fs_enough;
-};
-
-static int fiemap_for_stripe(const struct lu_env *env, struct cl_object *obj,
- struct lov_stripe_md *lsm,
- struct fiemap *fiemap, size_t *buflen,
- struct ll_fiemap_info_key *fmkey, int stripeno,
- struct fiemap_state *fs)
-{
- struct cl_object *subobj;
- struct lov_obd *lov = lu2lov_dev(obj->co_lu.lo_dev)->ld_lov;
- struct fiemap_extent *fm_ext = &fs->fs_fm->fm_extents[0];
- u64 req_fm_len; /* Stores length of required mapping */
- u64 len_mapped_single_call;
- u64 lun_start;
- u64 lun_end;
- u64 obd_object_end;
- unsigned int ext_count;
- /* EOF for object */
- bool ost_eof = false;
- /* done with required mapping for this OST? */
- bool ost_done = false;
- int ost_index;
- int rc = 0;
-
- fs->fs_device_done = false;
- /* Find out range of mapping on this stripe */
- if ((lov_stripe_intersects(lsm, stripeno, fs->fs_start, fs->fs_end,
- &lun_start, &obd_object_end)) == 0)
- return 0;
-
- if (lov_oinfo_is_dummy(lsm->lsm_oinfo[stripeno]))
- return -EIO;
-
- /* If this is a continuation FIEMAP call and we are on
- * starting stripe then lun_start needs to be set to
- * end_offset
- */
- if (fs->fs_end_offset != 0 && stripeno == fs->fs_start_stripe)
- lun_start = fs->fs_end_offset;
-
- lun_end = fs->fs_length;
- if (lun_end != ~0ULL) {
- /* Handle fs->fs_start + fs->fs_length overflow */
- if (fs->fs_start + fs->fs_length < fs->fs_start)
- fs->fs_length = ~0ULL - fs->fs_start;
- lun_end = lov_size_to_stripe(lsm, fs->fs_start + fs->fs_length,
- stripeno);
- }
-
- if (lun_start == lun_end)
- return 0;
-
- req_fm_len = obd_object_end - lun_start;
- fs->fs_fm->fm_length = 0;
- len_mapped_single_call = 0;
-
- /* find lobsub object */
- subobj = lov_find_subobj(env, cl2lov(obj), lsm, stripeno);
- if (IS_ERR(subobj))
- return PTR_ERR(subobj);
- /* If the output buffer is very large and the objects have many
- * extents we may need to loop on a single OST repeatedly
- */
- do {
- if (fiemap->fm_extent_count > 0) {
- /* Don't get too many extents. */
- if (fs->fs_cur_extent + fs->fs_cnt_need >
- fiemap->fm_extent_count)
- fs->fs_cnt_need = fiemap->fm_extent_count -
- fs->fs_cur_extent;
- }
-
- lun_start += len_mapped_single_call;
- fs->fs_fm->fm_length = req_fm_len - len_mapped_single_call;
- req_fm_len = fs->fs_fm->fm_length;
- fs->fs_fm->fm_extent_count = fs->fs_enough ?
- 1 : fs->fs_cnt_need;
- fs->fs_fm->fm_mapped_extents = 0;
- fs->fs_fm->fm_flags = fiemap->fm_flags;
-
- ost_index = lsm->lsm_oinfo[stripeno]->loi_ost_idx;
-
- if (ost_index < 0 || ost_index >= lov->desc.ld_tgt_count) {
- rc = -EINVAL;
- goto obj_put;
- }
- /* If OST is inactive, return extent with UNKNOWN flag. */
- if (!lov->lov_tgts[ost_index]->ltd_active) {
- fs->fs_fm->fm_flags |= FIEMAP_EXTENT_LAST;
- fs->fs_fm->fm_mapped_extents = 1;
-
- fm_ext[0].fe_logical = lun_start;
- fm_ext[0].fe_length = obd_object_end - lun_start;
- fm_ext[0].fe_flags |= FIEMAP_EXTENT_UNKNOWN;
-
- goto inactive_tgt;
- }
-
- fs->fs_fm->fm_start = lun_start;
- fs->fs_fm->fm_flags &= ~FIEMAP_FLAG_DEVICE_ORDER;
- memcpy(&fmkey->lfik_fiemap, fs->fs_fm, sizeof(*fs->fs_fm));
- *buflen = fiemap_count_to_size(fs->fs_fm->fm_extent_count);
-
- rc = cl_object_fiemap(env, subobj, fmkey, fs->fs_fm, buflen);
- if (rc)
- goto obj_put;
-inactive_tgt:
- ext_count = fs->fs_fm->fm_mapped_extents;
- if (ext_count == 0) {
- ost_done = true;
- fs->fs_device_done = true;
- /* If last stripe has hold at the end,
- * we need to return
- */
- if (stripeno == fs->fs_last_stripe) {
- fiemap->fm_mapped_extents = 0;
- fs->fs_finish = true;
- goto obj_put;
- }
- break;
- } else if (fs->fs_enough) {
- /*
- * We've collected enough extents and there are
- * more extents after it.
- */
- fs->fs_finish = true;
- goto obj_put;
- }
-
- /* If we just need num of extents, got to next device */
- if (fiemap->fm_extent_count == 0) {
- fs->fs_cur_extent += ext_count;
- break;
- }
-
- /* prepare to copy retrived map extents */
- len_mapped_single_call = fm_ext[ext_count - 1].fe_logical +
- fm_ext[ext_count - 1].fe_length -
- lun_start;
-
- /* Have we finished mapping on this device? */
- if (req_fm_len <= len_mapped_single_call) {
- ost_done = true;
- fs->fs_device_done = true;
- }
-
- /* Clear the EXTENT_LAST flag which can be present on
- * the last extent
- */
- if (fm_ext[ext_count - 1].fe_flags & FIEMAP_EXTENT_LAST)
- fm_ext[ext_count - 1].fe_flags &= ~FIEMAP_EXTENT_LAST;
- if (lov_stripe_size(lsm, fm_ext[ext_count - 1].fe_logical +
- fm_ext[ext_count - 1].fe_length,
- stripeno) >= fmkey->lfik_oa.o_size) {
- ost_eof = true;
- fs->fs_device_done = true;
- }
-
- fiemap_prepare_and_copy_exts(fiemap, fm_ext, ost_index,
- ext_count, fs->fs_cur_extent);
- fs->fs_cur_extent += ext_count;
-
- /* Ran out of available extents? */
- if (fs->fs_cur_extent >= fiemap->fm_extent_count)
- fs->fs_enough = true;
- } while (!ost_done && !ost_eof);
-
- if (stripeno == fs->fs_last_stripe)
- fs->fs_finish = true;
-obj_put:
- cl_object_put(env, subobj);
-
- return rc;
-}
-
-/**
- * Break down the FIEMAP request and send appropriate calls to individual OSTs.
- * This also handles the restarting of FIEMAP calls in case mapping overflows
- * the available number of extents in single call.
- *
- * \param env [in] lustre environment
- * \param obj [in] file object
- * \param fmkey [in] fiemap request header and other info
- * \param fiemap [out] fiemap buffer holding retrived map extents
- * \param buflen [in/out] max buffer length of @fiemap, when iterate
- * each OST, it is used to limit max map needed
- * \retval 0 success
- * \retval < 0 error
- */
-static int lov_object_fiemap(const struct lu_env *env, struct cl_object *obj,
- struct ll_fiemap_info_key *fmkey,
- struct fiemap *fiemap, size_t *buflen)
-{
- unsigned int buffer_size = FIEMAP_BUFFER_SIZE;
- struct fiemap *fm_local = NULL;
- struct lov_stripe_md *lsm;
- int rc = 0;
- int cur_stripe;
- int stripe_count;
- struct fiemap_state fs = { NULL };
-
- lsm = lov_lsm_addref(cl2lov(obj));
- if (!lsm)
- return -ENODATA;
-
- /**
- * If the stripe_count > 1 and the application does not understand
- * DEVICE_ORDER flag, it cannot interpret the extents correctly.
- */
- if (lsm->lsm_stripe_count > 1 &&
- !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER)) {
- rc = -ENOTSUPP;
- goto out;
- }
-
- if (lsm_is_released(lsm)) {
- if (fiemap->fm_start < fmkey->lfik_oa.o_size) {
- /**
- * released file, return a minimal FIEMAP if
- * request fits in file-size.
- */
- fiemap->fm_mapped_extents = 1;
- fiemap->fm_extents[0].fe_logical = fiemap->fm_start;
- if (fiemap->fm_start + fiemap->fm_length <
- fmkey->lfik_oa.o_size)
- fiemap->fm_extents[0].fe_length =
- fiemap->fm_length;
- else
- fiemap->fm_extents[0].fe_length =
- fmkey->lfik_oa.o_size -
- fiemap->fm_start;
- fiemap->fm_extents[0].fe_flags |=
- FIEMAP_EXTENT_UNKNOWN | FIEMAP_EXTENT_LAST;
- }
- rc = 0;
- goto out;
- }
-
- if (fiemap_count_to_size(fiemap->fm_extent_count) < buffer_size)
- buffer_size = fiemap_count_to_size(fiemap->fm_extent_count);
-
- fm_local = kvzalloc(buffer_size, GFP_NOFS);
- if (!fm_local) {
- rc = -ENOMEM;
- goto out;
- }
- fs.fs_fm = fm_local;
- fs.fs_cnt_need = fiemap_size_to_count(buffer_size);
-
- fs.fs_start = fiemap->fm_start;
- /* fs_start is beyond the end of the file */
- if (fs.fs_start > fmkey->lfik_oa.o_size) {
- rc = -EINVAL;
- goto out;
- }
- /* Calculate start stripe, last stripe and length of mapping */
- fs.fs_start_stripe = lov_stripe_number(lsm, fs.fs_start);
- fs.fs_end = (fs.fs_length == ~0ULL) ? fmkey->lfik_oa.o_size :
- fs.fs_start + fs.fs_length - 1;
- /* If fs_length != ~0ULL but fs_start+fs_length-1 exceeds file size */
- if (fs.fs_end > fmkey->lfik_oa.o_size) {
- fs.fs_end = fmkey->lfik_oa.o_size;
- fs.fs_length = fs.fs_end - fs.fs_start;
- }
-
- fs.fs_last_stripe = fiemap_calc_last_stripe(lsm, fs.fs_start, fs.fs_end,
- fs.fs_start_stripe,
- &stripe_count);
- fs.fs_end_offset = fiemap_calc_fm_end_offset(fiemap, lsm, fs.fs_start,
- fs.fs_end,
- &fs.fs_start_stripe);
- if (fs.fs_end_offset == -EINVAL) {
- rc = -EINVAL;
- goto out;
- }
-
-
- /**
- * Requested extent count exceeds the fiemap buffer size, shrink our
- * ambition.
- */
- if (fiemap_count_to_size(fiemap->fm_extent_count) > *buflen)
- fiemap->fm_extent_count = fiemap_size_to_count(*buflen);
- if (!fiemap->fm_extent_count)
- fs.fs_cnt_need = 0;
-
- fs.fs_finish = false;
- fs.fs_enough = false;
- fs.fs_cur_extent = 0;
-
- /* Check each stripe */
- for (cur_stripe = fs.fs_start_stripe; stripe_count > 0;
- --stripe_count,
- cur_stripe = (cur_stripe + 1) % lsm->lsm_stripe_count) {
- rc = fiemap_for_stripe(env, obj, lsm, fiemap, buflen, fmkey,
- cur_stripe, &fs);
- if (rc < 0)
- goto out;
- if (fs.fs_finish)
- break;
- } /* for each stripe */
- /*
- * Indicate that we are returning device offsets unless file just has
- * single stripe
- */
- if (lsm->lsm_stripe_count > 1)
- fiemap->fm_flags |= FIEMAP_FLAG_DEVICE_ORDER;
-
- if (!fiemap->fm_extent_count)
- goto skip_last_device_calc;
-
- /*
- * Check if we have reached the last stripe and whether mapping for that
- * stripe is done.
- */
- if ((cur_stripe == fs.fs_last_stripe) && fs.fs_device_done)
- fiemap->fm_extents[fs.fs_cur_extent - 1].fe_flags |=
- FIEMAP_EXTENT_LAST;
-skip_last_device_calc:
- fiemap->fm_mapped_extents = fs.fs_cur_extent;
-out:
- kvfree(fm_local);
- lov_lsm_put(lsm);
- return rc;
-}
-
-static int lov_object_getstripe(const struct lu_env *env, struct cl_object *obj,
- struct lov_user_md __user *lum)
-{
- struct lov_object *lov = cl2lov(obj);
- struct lov_stripe_md *lsm;
- int rc = 0;
-
- lsm = lov_lsm_addref(lov);
- if (!lsm)
- return -ENODATA;
-
- rc = lov_getstripe(cl2lov(obj), lsm, lum);
- lov_lsm_put(lsm);
- return rc;
-}
-
-static int lov_object_layout_get(const struct lu_env *env,
- struct cl_object *obj,
- struct cl_layout *cl)
-{
- struct lov_object *lov = cl2lov(obj);
- struct lov_stripe_md *lsm = lov_lsm_addref(lov);
- struct lu_buf *buf = &cl->cl_buf;
- ssize_t rc;
-
- if (!lsm) {
- cl->cl_size = 0;
- cl->cl_layout_gen = CL_LAYOUT_GEN_EMPTY;
- return 0;
- }
-
- cl->cl_size = lov_mds_md_size(lsm->lsm_stripe_count, lsm->lsm_magic);
- cl->cl_layout_gen = lsm->lsm_layout_gen;
-
- rc = lov_lsm_pack(lsm, buf->lb_buf, buf->lb_len);
- lov_lsm_put(lsm);
-
- return rc < 0 ? rc : 0;
-}
-
-static loff_t lov_object_maxbytes(struct cl_object *obj)
-{
- struct lov_object *lov = cl2lov(obj);
- struct lov_stripe_md *lsm = lov_lsm_addref(lov);
- loff_t maxbytes;
-
- if (!lsm)
- return LLONG_MAX;
-
- maxbytes = lsm->lsm_maxbytes;
-
- lov_lsm_put(lsm);
-
- return maxbytes;
-}
-
-static const struct cl_object_operations lov_ops = {
- .coo_page_init = lov_page_init,
- .coo_lock_init = lov_lock_init,
- .coo_io_init = lov_io_init,
- .coo_attr_get = lov_attr_get,
- .coo_attr_update = lov_attr_update,
- .coo_conf_set = lov_conf_set,
- .coo_getstripe = lov_object_getstripe,
- .coo_layout_get = lov_object_layout_get,
- .coo_maxbytes = lov_object_maxbytes,
- .coo_fiemap = lov_object_fiemap,
-};
-
-static const struct lu_object_operations lov_lu_obj_ops = {
- .loo_object_init = lov_object_init,
- .loo_object_delete = lov_object_delete,
- .loo_object_release = NULL,
- .loo_object_free = lov_object_free,
- .loo_object_print = lov_object_print,
- .loo_object_invariant = NULL
-};
-
-struct lu_object *lov_object_alloc(const struct lu_env *env,
- const struct lu_object_header *unused,
- struct lu_device *dev)
-{
- struct lov_object *lov;
- struct lu_object *obj;
-
- lov = kmem_cache_zalloc(lov_object_kmem, GFP_NOFS);
- if (lov) {
- obj = lov2lu(lov);
- lu_object_init(obj, NULL, dev);
- lov->lo_cl.co_ops = &lov_ops;
- lov->lo_type = -1; /* invalid, to catch uninitialized type */
- /*
- * object io operation vector (cl_object::co_iop) is installed
- * later in lov_object_init(), as different vectors are used
- * for object with different layouts.
- */
- obj->lo_ops = &lov_lu_obj_ops;
- } else {
- obj = NULL;
- }
- return obj;
-}
-
-struct lov_stripe_md *lov_lsm_addref(struct lov_object *lov)
-{
- struct lov_stripe_md *lsm = NULL;
-
- lov_conf_freeze(lov);
- if (lov->lo_lsm) {
- lsm = lsm_addref(lov->lo_lsm);
- CDEBUG(D_INODE, "lsm %p addref %d/%d by %p.\n",
- lsm, atomic_read(&lsm->lsm_refc),
- lov->lo_layout_invalid, current);
- }
- lov_conf_thaw(lov);
- return lsm;
-}
-
-int lov_read_and_clear_async_rc(struct cl_object *clob)
-{
- struct lu_object *luobj;
- int rc = 0;
-
- luobj = lu_object_locate(&cl_object_header(clob)->coh_lu,
- &lov_device_type);
- if (luobj) {
- struct lov_object *lov = lu2lov(luobj);
-
- lov_conf_freeze(lov);
- switch (lov->lo_type) {
- case LLT_RAID0: {
- struct lov_stripe_md *lsm;
- int i;
-
- lsm = lov->lo_lsm;
- for (i = 0; i < lsm->lsm_stripe_count; i++) {
- struct lov_oinfo *loi = lsm->lsm_oinfo[i];
-
- if (lov_oinfo_is_dummy(loi))
- continue;
-
- if (loi->loi_ar.ar_rc && !rc)
- rc = loi->loi_ar.ar_rc;
- loi->loi_ar.ar_rc = 0;
- }
- }
- case LLT_RELEASED:
- case LLT_EMPTY:
- break;
- default:
- LBUG();
- }
- lov_conf_thaw(lov);
- }
- return rc;
-}
-EXPORT_SYMBOL(lov_read_and_clear_async_rc);
-
-/** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lov_offset.c b/drivers/staging/lustre/lustre/lov/lov_offset.c
deleted file mode 100644
index a5f00f6ec347..000000000000
--- a/drivers/staging/lustre/lustre/lov/lov_offset.c
+++ /dev/null
@@ -1,269 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LOV
-
-#include <obd_class.h>
-
-#include "lov_internal.h"
-
-/* compute object size given "stripeno" and the ost size */
-u64 lov_stripe_size(struct lov_stripe_md *lsm, u64 ost_size, int stripeno)
-{
- unsigned long ssize = lsm->lsm_stripe_size;
- unsigned long stripe_size;
- u64 swidth;
- u64 lov_size;
- int magic = lsm->lsm_magic;
-
- if (ost_size == 0)
- return 0;
-
- lsm_op_find(magic)->lsm_stripe_by_index(lsm, &stripeno, NULL, &swidth);
-
- /* lov_do_div64(a, b) returns a % b, and a = a / b */
- stripe_size = lov_do_div64(ost_size, ssize);
- if (stripe_size)
- lov_size = ost_size * swidth + stripeno * ssize + stripe_size;
- else
- lov_size = (ost_size - 1) * swidth + (stripeno + 1) * ssize;
-
- return lov_size;
-}
-
-/**
- * Compute file level page index by stripe level page offset
- */
-pgoff_t lov_stripe_pgoff(struct lov_stripe_md *lsm, pgoff_t stripe_index,
- int stripe)
-{
- loff_t offset;
-
- offset = lov_stripe_size(lsm, (stripe_index << PAGE_SHIFT) + 1, stripe);
- return offset >> PAGE_SHIFT;
-}
-
-/* we have an offset in file backed by an lov and want to find out where
- * that offset lands in our given stripe of the file. for the easy
- * case where the offset is within the stripe, we just have to scale the
- * offset down to make it relative to the stripe instead of the lov.
- *
- * the harder case is what to do when the offset doesn't intersect the
- * stripe. callers will want start offsets clamped ahead to the start
- * of the nearest stripe in the file. end offsets similarly clamped to the
- * nearest ending byte of a stripe in the file:
- *
- * all this function does is move offsets to the nearest region of the
- * stripe, and it does its work "mod" the full length of all the stripes.
- * consider a file with 3 stripes:
- *
- * S E
- * ---------------------------------------------------------------------
- * | 0 | 1 | 2 | 0 | 1 | 2 |
- * ---------------------------------------------------------------------
- *
- * to find stripe 1's offsets for S and E, it divides by the full stripe
- * width and does its math in the context of a single set of stripes:
- *
- * S E
- * -----------------------------------
- * | 0 | 1 | 2 |
- * -----------------------------------
- *
- * it'll notice that E is outside stripe 1 and clamp it to the end of the
- * stripe, then multiply it back out by lov_off to give the real offsets in
- * the stripe:
- *
- * S E
- * ---------------------------------------------------------------------
- * | 1 | 1 | 1 | 1 | 1 | 1 |
- * ---------------------------------------------------------------------
- *
- * it would have done similarly and pulled S forward to the start of a 1
- * stripe if, say, S had landed in a 0 stripe.
- *
- * this rounding isn't always correct. consider an E lov offset that lands
- * on a 0 stripe, the "mod stripe width" math will pull it forward to the
- * start of a 1 stripe, when in fact it wanted to be rounded back to the end
- * of a previous 1 stripe. this logic is handled by callers and this is why:
- *
- * this function returns < 0 when the offset was "before" the stripe and
- * was moved forward to the start of the stripe in question; 0 when it
- * falls in the stripe and no shifting was done; > 0 when the offset
- * was outside the stripe and was pulled back to its final byte.
- */
-int lov_stripe_offset(struct lov_stripe_md *lsm, u64 lov_off,
- int stripeno, u64 *obdoff)
-{
- unsigned long ssize = lsm->lsm_stripe_size;
- u64 stripe_off, this_stripe, swidth;
- int magic = lsm->lsm_magic;
- int ret = 0;
-
- if (lov_off == OBD_OBJECT_EOF) {
- *obdoff = OBD_OBJECT_EOF;
- return 0;
- }
-
- lsm_op_find(magic)->lsm_stripe_by_index(lsm, &stripeno, &lov_off,
- &swidth);
-
- /* lov_do_div64(a, b) returns a % b, and a = a / b */
- stripe_off = lov_do_div64(lov_off, swidth);
-
- this_stripe = (u64)stripeno * ssize;
- if (stripe_off < this_stripe) {
- stripe_off = 0;
- ret = -1;
- } else {
- stripe_off -= this_stripe;
-
- if (stripe_off >= ssize) {
- stripe_off = ssize;
- ret = 1;
- }
- }
-
- *obdoff = lov_off * ssize + stripe_off;
- return ret;
-}
-
-/* Given a whole-file size and a stripe number, give the file size which
- * corresponds to the individual object of that stripe.
- *
- * This behaves basically in the same was as lov_stripe_offset, except that
- * file sizes falling before the beginning of a stripe are clamped to the end
- * of the previous stripe, not the beginning of the next:
- *
- * S
- * ---------------------------------------------------------------------
- * | 0 | 1 | 2 | 0 | 1 | 2 |
- * ---------------------------------------------------------------------
- *
- * if clamped to stripe 2 becomes:
- *
- * S
- * ---------------------------------------------------------------------
- * | 0 | 1 | 2 | 0 | 1 | 2 |
- * ---------------------------------------------------------------------
- */
-u64 lov_size_to_stripe(struct lov_stripe_md *lsm, u64 file_size,
- int stripeno)
-{
- unsigned long ssize = lsm->lsm_stripe_size;
- u64 stripe_off, this_stripe, swidth;
- int magic = lsm->lsm_magic;
-
- if (file_size == OBD_OBJECT_EOF)
- return OBD_OBJECT_EOF;
-
- lsm_op_find(magic)->lsm_stripe_by_index(lsm, &stripeno, &file_size,
- &swidth);
-
- /* lov_do_div64(a, b) returns a % b, and a = a / b */
- stripe_off = lov_do_div64(file_size, swidth);
-
- this_stripe = (u64)stripeno * ssize;
- if (stripe_off < this_stripe) {
- /* Move to end of previous stripe, or zero */
- if (file_size > 0) {
- file_size--;
- stripe_off = ssize;
- } else {
- stripe_off = 0;
- }
- } else {
- stripe_off -= this_stripe;
-
- if (stripe_off >= ssize) {
- /* Clamp to end of this stripe */
- stripe_off = ssize;
- }
- }
-
- return (file_size * ssize + stripe_off);
-}
-
-/* given an extent in an lov and a stripe, calculate the extent of the stripe
- * that is contained within the lov extent. this returns true if the given
- * stripe does intersect with the lov extent.
- */
-int lov_stripe_intersects(struct lov_stripe_md *lsm, int stripeno,
- u64 start, u64 end, u64 *obd_start, u64 *obd_end)
-{
- int start_side, end_side;
-
- start_side = lov_stripe_offset(lsm, start, stripeno, obd_start);
- end_side = lov_stripe_offset(lsm, end, stripeno, obd_end);
-
- CDEBUG(D_INODE, "[%llu->%llu] -> [(%d) %llu->%llu (%d)]\n",
- start, end, start_side, *obd_start, *obd_end, end_side);
-
- /* this stripe doesn't intersect the file extent when neither
- * start or the end intersected the stripe and obd_start and
- * obd_end got rounded up to the save value.
- */
- if (start_side != 0 && end_side != 0 && *obd_start == *obd_end)
- return 0;
-
- /* as mentioned in the lov_stripe_offset commentary, end
- * might have been shifted in the wrong direction. This
- * happens when an end offset is before the stripe when viewed
- * through the "mod stripe size" math. we detect it being shifted
- * in the wrong direction and touch it up.
- * interestingly, this can't underflow since end must be > start
- * if we passed through the previous check.
- * (should we assert for that somewhere?)
- */
- if (end_side != 0)
- (*obd_end)--;
-
- return 1;
-}
-
-/* compute which stripe number "lov_off" will be written into */
-int lov_stripe_number(struct lov_stripe_md *lsm, u64 lov_off)
-{
- unsigned long ssize = lsm->lsm_stripe_size;
- u64 stripe_off, swidth;
- int magic = lsm->lsm_magic;
-
- lsm_op_find(magic)->lsm_stripe_by_offset(lsm, NULL, &lov_off, &swidth);
-
- stripe_off = lov_do_div64(lov_off, swidth);
-
- /* Puts stripe_off/ssize result into stripe_off */
- lov_do_div64(stripe_off, ssize);
-
- return stripe_off;
-}
diff --git a/drivers/staging/lustre/lustre/lov/lov_pack.c b/drivers/staging/lustre/lustre/lov/lov_pack.c
deleted file mode 100644
index b1060d02a164..000000000000
--- a/drivers/staging/lustre/lustre/lov/lov_pack.c
+++ /dev/null
@@ -1,400 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/lov/lov_pack.c
- *
- * (Un)packing of OST/MDS requests
- *
- * Author: Andreas Dilger <adilger@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LOV
-
-#include <lustre_net.h>
-#include <lustre_swab.h>
-#include <obd.h>
-#include <obd_class.h>
-#include <obd_support.h>
-
-#include "lov_cl_internal.h"
-#include "lov_internal.h"
-
-void lov_dump_lmm_common(int level, void *lmmp)
-{
- struct lov_mds_md *lmm = lmmp;
- struct ost_id oi;
-
- lmm_oi_le_to_cpu(&oi, &lmm->lmm_oi);
- CDEBUG(level, "objid " DOSTID ", magic 0x%08x, pattern %#x\n",
- POSTID(&oi), le32_to_cpu(lmm->lmm_magic),
- le32_to_cpu(lmm->lmm_pattern));
- CDEBUG(level, "stripe_size %u, stripe_count %u, layout_gen %u\n",
- le32_to_cpu(lmm->lmm_stripe_size),
- le16_to_cpu(lmm->lmm_stripe_count),
- le16_to_cpu(lmm->lmm_layout_gen));
-}
-
-static void lov_dump_lmm_objects(int level, struct lov_ost_data *lod,
- int stripe_count)
-{
- int i;
-
- if (stripe_count > LOV_V1_INSANE_STRIPE_COUNT) {
- CDEBUG(level, "bad stripe_count %u > max_stripe_count %u\n",
- stripe_count, LOV_V1_INSANE_STRIPE_COUNT);
- return;
- }
-
- for (i = 0; i < stripe_count; ++i, ++lod) {
- struct ost_id oi;
-
- ostid_le_to_cpu(&lod->l_ost_oi, &oi);
- CDEBUG(level, "stripe %u idx %u subobj " DOSTID "\n", i,
- le32_to_cpu(lod->l_ost_idx), POSTID(&oi));
- }
-}
-
-void lov_dump_lmm_v1(int level, struct lov_mds_md_v1 *lmm)
-{
- lov_dump_lmm_common(level, lmm);
- lov_dump_lmm_objects(level, lmm->lmm_objects,
- le16_to_cpu(lmm->lmm_stripe_count));
-}
-
-void lov_dump_lmm_v3(int level, struct lov_mds_md_v3 *lmm)
-{
- lov_dump_lmm_common(level, lmm);
- CDEBUG(level, "pool_name " LOV_POOLNAMEF "\n", lmm->lmm_pool_name);
- lov_dump_lmm_objects(level, lmm->lmm_objects,
- le16_to_cpu(lmm->lmm_stripe_count));
-}
-
-/**
- * Pack LOV striping metadata for disk storage format (in little
- * endian byte order).
- *
- * This follows the getxattr() conventions. If \a buf_size is zero
- * then return the size needed. If \a buf_size is too small then
- * return -ERANGE. Otherwise return the size of the result.
- */
-ssize_t lov_lsm_pack(const struct lov_stripe_md *lsm, void *buf,
- size_t buf_size)
-{
- struct lov_ost_data_v1 *lmm_objects;
- struct lov_mds_md_v1 *lmmv1 = buf;
- struct lov_mds_md_v3 *lmmv3 = buf;
- size_t lmm_size;
- unsigned int i;
-
- lmm_size = lov_mds_md_size(lsm->lsm_stripe_count, lsm->lsm_magic);
- if (!buf_size)
- return lmm_size;
-
- if (buf_size < lmm_size)
- return -ERANGE;
-
- /*
- * lmmv1 and lmmv3 point to the same struct and have the
- * same first fields
- */
- lmmv1->lmm_magic = cpu_to_le32(lsm->lsm_magic);
- lmm_oi_cpu_to_le(&lmmv1->lmm_oi, &lsm->lsm_oi);
- lmmv1->lmm_stripe_size = cpu_to_le32(lsm->lsm_stripe_size);
- lmmv1->lmm_stripe_count = cpu_to_le16(lsm->lsm_stripe_count);
- lmmv1->lmm_pattern = cpu_to_le32(lsm->lsm_pattern);
- lmmv1->lmm_layout_gen = cpu_to_le16(lsm->lsm_layout_gen);
-
- if (lsm->lsm_magic == LOV_MAGIC_V3) {
- BUILD_BUG_ON(sizeof(lsm->lsm_pool_name) !=
- sizeof(lmmv3->lmm_pool_name));
- strlcpy(lmmv3->lmm_pool_name, lsm->lsm_pool_name,
- sizeof(lmmv3->lmm_pool_name));
- lmm_objects = lmmv3->lmm_objects;
- } else {
- lmm_objects = lmmv1->lmm_objects;
- }
-
- for (i = 0; i < lsm->lsm_stripe_count; i++) {
- struct lov_oinfo *loi = lsm->lsm_oinfo[i];
-
- ostid_cpu_to_le(&loi->loi_oi, &lmm_objects[i].l_ost_oi);
- lmm_objects[i].l_ost_gen = cpu_to_le32(loi->loi_ost_gen);
- lmm_objects[i].l_ost_idx = cpu_to_le32(loi->loi_ost_idx);
- }
-
- return lmm_size;
-}
-
-/* Find the max stripecount we should use */
-__u16 lov_get_stripecnt(struct lov_obd *lov, __u32 magic, __u16 stripe_count)
-{
- __u32 max_stripes = LOV_MAX_STRIPE_COUNT_OLD;
-
- if (!stripe_count)
- stripe_count = lov->desc.ld_default_stripe_count;
- if (stripe_count > lov->desc.ld_active_tgt_count)
- stripe_count = lov->desc.ld_active_tgt_count;
- if (!stripe_count)
- stripe_count = 1;
-
- /* stripe count is based on whether ldiskfs can handle
- * larger EA sizes
- */
- if (lov->lov_ocd.ocd_connect_flags & OBD_CONNECT_MAX_EASIZE &&
- lov->lov_ocd.ocd_max_easize)
- max_stripes = lov_mds_md_max_stripe_count(
- lov->lov_ocd.ocd_max_easize, magic);
-
- if (stripe_count > max_stripes)
- stripe_count = max_stripes;
-
- return stripe_count;
-}
-
-static int lov_verify_lmm(void *lmm, int lmm_bytes, __u16 *stripe_count)
-{
- int rc;
-
- if (!lsm_op_find(le32_to_cpu(*(__u32 *)lmm))) {
- CERROR("bad disk LOV MAGIC: 0x%08X; dumping LMM (size=%d):\n",
- le32_to_cpu(*(__u32 *)lmm), lmm_bytes);
- CERROR("%*phN\n", lmm_bytes, lmm);
- return -EINVAL;
- }
- rc = lsm_op_find(le32_to_cpu(*(__u32 *)lmm))->lsm_lmm_verify(lmm,
- lmm_bytes,
- stripe_count);
- return rc;
-}
-
-static struct lov_stripe_md *lov_lsm_alloc(u16 stripe_count, u32 pattern,
- u32 magic)
-{
- struct lov_stripe_md *lsm;
- unsigned int i;
-
- CDEBUG(D_INFO, "alloc lsm, stripe_count %u\n", stripe_count);
-
- lsm = lsm_alloc_plain(stripe_count);
- if (!lsm) {
- CERROR("cannot allocate LSM stripe_count %u\n", stripe_count);
- return ERR_PTR(-ENOMEM);
- }
-
- atomic_set(&lsm->lsm_refc, 1);
- spin_lock_init(&lsm->lsm_lock);
- lsm->lsm_magic = magic;
- lsm->lsm_stripe_count = stripe_count;
- lsm->lsm_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES * stripe_count;
- lsm->lsm_pattern = pattern;
- lsm->lsm_pool_name[0] = '\0';
- lsm->lsm_layout_gen = 0;
- if (stripe_count > 0)
- lsm->lsm_oinfo[0]->loi_ost_idx = ~0;
-
- for (i = 0; i < stripe_count; i++)
- loi_init(lsm->lsm_oinfo[i]);
-
- return lsm;
-}
-
-int lov_free_memmd(struct lov_stripe_md **lsmp)
-{
- struct lov_stripe_md *lsm = *lsmp;
- int refc;
-
- *lsmp = NULL;
- LASSERT(atomic_read(&lsm->lsm_refc) > 0);
- refc = atomic_dec_return(&lsm->lsm_refc);
- if (refc == 0)
- lsm_op_find(lsm->lsm_magic)->lsm_free(lsm);
-
- return refc;
-}
-
-/* Unpack LOV object metadata from disk storage. It is packed in LE byte
- * order and is opaque to the networking layer.
- */
-struct lov_stripe_md *lov_unpackmd(struct lov_obd *lov, struct lov_mds_md *lmm,
- size_t lmm_size)
-{
- struct lov_stripe_md *lsm;
- u16 stripe_count;
- u32 pattern;
- u32 magic;
- int rc;
-
- rc = lov_verify_lmm(lmm, lmm_size, &stripe_count);
- if (rc)
- return ERR_PTR(rc);
-
- magic = le32_to_cpu(lmm->lmm_magic);
- pattern = le32_to_cpu(lmm->lmm_pattern);
-
- lsm = lov_lsm_alloc(stripe_count, pattern, magic);
- if (IS_ERR(lsm))
- return lsm;
-
- LASSERT(lsm_op_find(magic));
- rc = lsm_op_find(magic)->lsm_unpackmd(lov, lsm, lmm);
- if (rc) {
- lov_free_memmd(&lsm);
- return ERR_PTR(rc);
- }
-
- return lsm;
-}
-
-/* Retrieve object striping information.
- *
- * @lump is a pointer to an in-core struct with lmm_ost_count indicating
- * the maximum number of OST indices which will fit in the user buffer.
- * lmm_magic must be LOV_USER_MAGIC.
- */
-int lov_getstripe(struct lov_object *obj, struct lov_stripe_md *lsm,
- struct lov_user_md __user *lump)
-{
- /* we use lov_user_md_v3 because it is larger than lov_user_md_v1 */
- struct lov_user_md_v3 lum;
- struct lov_mds_md *lmmk;
- u32 stripe_count;
- ssize_t lmm_size;
- size_t lmmk_size;
- size_t lum_size;
- int rc;
-
- if (!lsm)
- return -ENODATA;
-
- if (lsm->lsm_magic != LOV_MAGIC_V1 && lsm->lsm_magic != LOV_MAGIC_V3) {
- CERROR("bad LSM MAGIC: 0x%08X != 0x%08X nor 0x%08X\n",
- lsm->lsm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3);
- rc = -EIO;
- goto out;
- }
-
- if (!lsm_is_released(lsm))
- stripe_count = lsm->lsm_stripe_count;
- else
- stripe_count = 0;
-
- /* we only need the header part from user space to get lmm_magic and
- * lmm_stripe_count, (the header part is common to v1 and v3)
- */
- lum_size = sizeof(struct lov_user_md_v1);
- if (copy_from_user(&lum, lump, lum_size)) {
- rc = -EFAULT;
- goto out;
- }
- if (lum.lmm_magic != LOV_USER_MAGIC_V1 &&
- lum.lmm_magic != LOV_USER_MAGIC_V3 &&
- lum.lmm_magic != LOV_USER_MAGIC_SPECIFIC) {
- rc = -EINVAL;
- goto out;
- }
-
- if (lum.lmm_stripe_count &&
- (lum.lmm_stripe_count < lsm->lsm_stripe_count)) {
- /* Return right size of stripe to user */
- lum.lmm_stripe_count = stripe_count;
- rc = copy_to_user(lump, &lum, lum_size);
- rc = -EOVERFLOW;
- goto out;
- }
- lmmk_size = lov_mds_md_size(stripe_count, lsm->lsm_magic);
-
-
- lmmk = kvzalloc(lmmk_size, GFP_NOFS);
- if (!lmmk) {
- rc = -ENOMEM;
- goto out;
- }
-
- lmm_size = lov_lsm_pack(lsm, lmmk, lmmk_size);
- if (lmm_size < 0) {
- rc = lmm_size;
- goto out_free;
- }
-
- /* FIXME: Bug 1185 - copy fields properly when structs change */
- /* struct lov_user_md_v3 and struct lov_mds_md_v3 must be the same */
- BUILD_BUG_ON(sizeof(lum) != sizeof(struct lov_mds_md_v3));
- BUILD_BUG_ON(sizeof(lum.lmm_objects[0]) != sizeof(lmmk->lmm_objects[0]));
-
- if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC &&
- (lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_V1) ||
- lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_V3))) {
- lustre_swab_lov_mds_md(lmmk);
- lustre_swab_lov_user_md_objects(
- (struct lov_user_ost_data *)lmmk->lmm_objects,
- lmmk->lmm_stripe_count);
- }
-
- if (lum.lmm_magic == LOV_USER_MAGIC) {
- /* User request for v1, we need skip lmm_pool_name */
- if (lmmk->lmm_magic == LOV_MAGIC_V3) {
- memmove(((struct lov_mds_md_v1 *)lmmk)->lmm_objects,
- ((struct lov_mds_md_v3 *)lmmk)->lmm_objects,
- lmmk->lmm_stripe_count *
- sizeof(struct lov_ost_data_v1));
- lmm_size -= LOV_MAXPOOLNAME;
- }
- } else {
- /* if v3 we just have to update the lum_size */
- lum_size = sizeof(struct lov_user_md_v3);
- }
-
- /* User wasn't expecting this many OST entries */
- if (lum.lmm_stripe_count == 0) {
- lmm_size = lum_size;
- } else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) {
- rc = -EOVERFLOW;
- goto out_free;
- }
- /*
- * Have a difference between lov_mds_md & lov_user_md.
- * So we have to re-order the data before copy to user.
- */
- lum.lmm_stripe_count = lmmk->lmm_stripe_count;
- lum.lmm_layout_gen = lmmk->lmm_layout_gen;
- ((struct lov_user_md *)lmmk)->lmm_layout_gen = lum.lmm_layout_gen;
- ((struct lov_user_md *)lmmk)->lmm_stripe_count = lum.lmm_stripe_count;
- if (copy_to_user(lump, lmmk, lmm_size))
- rc = -EFAULT;
- else
- rc = 0;
-
-out_free:
- kvfree(lmmk);
-out:
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/lov/lov_page.c b/drivers/staging/lustre/lustre/lov/lov_page.c
deleted file mode 100644
index cfae1294d77a..000000000000
--- a/drivers/staging/lustre/lustre/lov/lov_page.c
+++ /dev/null
@@ -1,136 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_page for LOV layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Jinshan Xiong <jinshan.xiong@intel.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LOV
-
-#include "lov_cl_internal.h"
-
-/** \addtogroup lov
- * @{
- */
-
-/*****************************************************************************
- *
- * Lov page operations.
- *
- */
-
-static int lov_raid0_page_print(const struct lu_env *env,
- const struct cl_page_slice *slice,
- void *cookie, lu_printer_t printer)
-{
- struct lov_page *lp = cl2lov_page(slice);
-
- return (*printer)(env, cookie, LUSTRE_LOV_NAME "-page@%p, raid0\n", lp);
-}
-
-static const struct cl_page_operations lov_raid0_page_ops = {
- .cpo_print = lov_raid0_page_print
-};
-
-int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, pgoff_t index)
-{
- struct lov_object *loo = cl2lov(obj);
- struct lov_layout_raid0 *r0 = lov_r0(loo);
- struct lov_io *lio = lov_env_io(env);
- struct cl_object *subobj;
- struct cl_object *o;
- struct lov_io_sub *sub;
- struct lov_page *lpg = cl_object_page_slice(obj, page);
- loff_t offset;
- u64 suboff;
- int stripe;
- int rc;
-
- offset = cl_offset(obj, index);
- stripe = lov_stripe_number(loo->lo_lsm, offset);
- LASSERT(stripe < r0->lo_nr);
- rc = lov_stripe_offset(loo->lo_lsm, offset, stripe, &suboff);
- LASSERT(rc == 0);
-
- lpg->lps_stripe = stripe;
- cl_page_slice_add(page, &lpg->lps_cl, obj, index, &lov_raid0_page_ops);
-
- sub = lov_sub_get(env, lio, stripe);
- if (IS_ERR(sub))
- return PTR_ERR(sub);
-
- subobj = lovsub2cl(r0->lo_sub[stripe]);
- list_for_each_entry(o, &subobj->co_lu.lo_header->loh_layers,
- co_lu.lo_linkage) {
- if (o->co_ops->coo_page_init) {
- rc = o->co_ops->coo_page_init(sub->sub_env, o, page,
- cl_index(subobj, suboff));
- if (rc != 0)
- break;
- }
- }
-
- return rc;
-}
-
-static int lov_empty_page_print(const struct lu_env *env,
- const struct cl_page_slice *slice,
- void *cookie, lu_printer_t printer)
-{
- struct lov_page *lp = cl2lov_page(slice);
-
- return (*printer)(env, cookie, LUSTRE_LOV_NAME "-page@%p, empty.\n",
- lp);
-}
-
-static const struct cl_page_operations lov_empty_page_ops = {
- .cpo_print = lov_empty_page_print
-};
-
-int lov_page_init_empty(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, pgoff_t index)
-{
- struct lov_page *lpg = cl_object_page_slice(obj, page);
- void *addr;
-
- cl_page_slice_add(page, &lpg->lps_cl, obj, index, &lov_empty_page_ops);
- addr = kmap(page->cp_vmpage);
- memset(addr, 0, cl_page_size(obj));
- kunmap(page->cp_vmpage);
- cl_page_export(env, page, 1);
- return 0;
-}
-
-/** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lov_pool.c b/drivers/staging/lustre/lustre/lov/lov_pool.c
deleted file mode 100644
index b2a88ba72eb2..000000000000
--- a/drivers/staging/lustre/lustre/lov/lov_pool.c
+++ /dev/null
@@ -1,546 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/lov/lov_pool.c
- *
- * OST pool methods
- *
- * Author: Jacques-Charles LAFOUCRIERE <jc.lafoucriere@cea.fr>
- * Author: Alex Lyashkov <Alexey.Lyashkov@Sun.COM>
- * Author: Nathaniel Rutman <Nathan.Rutman@Sun.COM>
- */
-
-#define DEBUG_SUBSYSTEM S_LOV
-
-#include <obd.h>
-#include "lov_internal.h"
-
-#define pool_tgt(_p, _i) \
- _p->pool_lobd->u.lov.lov_tgts[_p->pool_obds.op_array[_i]]
-
-static u32 pool_hashfh(const void *data, u32 len, u32 seed)
-{
- const char *pool_name = data;
- return hashlen_hash(hashlen_string((void*)(unsigned long)seed, pool_name));
-}
-
-static int pool_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
-{
- const struct pool_desc *pool = obj;
- const char *pool_name = arg->key;
- return strcmp(pool_name, pool->pool_name);
-}
-
-static const struct rhashtable_params pools_hash_params = {
- .key_len = 1, /* actually variable */
- .key_offset = offsetof(struct pool_desc, pool_name),
- .head_offset = offsetof(struct pool_desc, pool_hash),
- .hashfn = pool_hashfh,
- .obj_cmpfn = pool_cmpfn,
- .automatic_shrinking = true,
-};
-
-static void lov_pool_getref(struct pool_desc *pool)
-{
- CDEBUG(D_INFO, "pool %p\n", pool);
- atomic_inc(&pool->pool_refcount);
-}
-
-void lov_pool_putref(struct pool_desc *pool)
-{
- CDEBUG(D_INFO, "pool %p\n", pool);
- if (atomic_dec_and_test(&pool->pool_refcount)) {
- LASSERT(list_empty(&pool->pool_list));
- lov_ost_pool_free(&pool->pool_obds);
- kfree_rcu(pool, rcu);
- }
-}
-
-/*
- * pool debugfs seq_file methods
- */
-/*
- * iterator is used to go through the target pool entries
- * index is the current entry index in the lp_array[] array
- * index >= pos returned to the seq_file interface
- * pos is from 0 to (pool->pool_obds.op_count - 1)
- */
-#define POOL_IT_MAGIC 0xB001CEA0
-struct pool_iterator {
- int magic;
- struct pool_desc *pool;
- int idx; /* from 0 to pool_tgt_size - 1 */
-};
-
-static void *pool_proc_next(struct seq_file *s, void *v, loff_t *pos)
-{
- struct pool_iterator *iter = (struct pool_iterator *)s->private;
- int prev_idx;
-
- LASSERTF(iter->magic == POOL_IT_MAGIC, "%08X\n", iter->magic);
-
- /* test if end of file */
- if (*pos >= pool_tgt_count(iter->pool))
- return NULL;
-
- /* iterate to find a non empty entry */
- prev_idx = iter->idx;
- down_read(&pool_tgt_rw_sem(iter->pool));
- iter->idx++;
- if (iter->idx == pool_tgt_count(iter->pool)) {
- iter->idx = prev_idx; /* we stay on the last entry */
- up_read(&pool_tgt_rw_sem(iter->pool));
- return NULL;
- }
- up_read(&pool_tgt_rw_sem(iter->pool));
- (*pos)++;
- /* return != NULL to continue */
- return iter;
-}
-
-static void *pool_proc_start(struct seq_file *s, loff_t *pos)
-{
- struct pool_desc *pool = (struct pool_desc *)s->private;
- struct pool_iterator *iter;
-
- lov_pool_getref(pool);
- if ((pool_tgt_count(pool) == 0) ||
- (*pos >= pool_tgt_count(pool))) {
- /* iter is not created, so stop() has no way to
- * find pool to dec ref
- */
- lov_pool_putref(pool);
- return NULL;
- }
-
- iter = kzalloc(sizeof(*iter), GFP_NOFS);
- if (!iter)
- return ERR_PTR(-ENOMEM);
- iter->magic = POOL_IT_MAGIC;
- iter->pool = pool;
- iter->idx = 0;
-
- /* we use seq_file private field to memorized iterator so
- * we can free it at stop()
- */
- /* /!\ do not forget to restore it to pool before freeing it */
- s->private = iter;
- if (*pos > 0) {
- loff_t i;
- void *ptr;
-
- i = 0;
- do {
- ptr = pool_proc_next(s, &iter, &i);
- } while ((i < *pos) && ptr);
- return ptr;
- }
- return iter;
-}
-
-static void pool_proc_stop(struct seq_file *s, void *v)
-{
- struct pool_iterator *iter = (struct pool_iterator *)s->private;
-
- /* in some cases stop() method is called 2 times, without
- * calling start() method (see seq_read() from fs/seq_file.c)
- * we have to free only if s->private is an iterator
- */
- if ((iter) && (iter->magic == POOL_IT_MAGIC)) {
- /* we restore s->private so next call to pool_proc_start()
- * will work
- */
- s->private = iter->pool;
- lov_pool_putref(iter->pool);
- kfree(iter);
- }
-}
-
-static int pool_proc_show(struct seq_file *s, void *v)
-{
- struct pool_iterator *iter = (struct pool_iterator *)v;
- struct lov_tgt_desc *tgt;
-
- LASSERTF(iter->magic == POOL_IT_MAGIC, "%08X\n", iter->magic);
- LASSERT(iter->pool);
- LASSERT(iter->idx <= pool_tgt_count(iter->pool));
-
- down_read(&pool_tgt_rw_sem(iter->pool));
- tgt = pool_tgt(iter->pool, iter->idx);
- up_read(&pool_tgt_rw_sem(iter->pool));
- if (tgt)
- seq_printf(s, "%s\n", obd_uuid2str(&tgt->ltd_uuid));
-
- return 0;
-}
-
-static const struct seq_operations pool_proc_ops = {
- .start = pool_proc_start,
- .next = pool_proc_next,
- .stop = pool_proc_stop,
- .show = pool_proc_show,
-};
-
-static int pool_proc_open(struct inode *inode, struct file *file)
-{
- int rc;
-
- rc = seq_open(file, &pool_proc_ops);
- if (!rc) {
- struct seq_file *s = file->private_data;
-
- s->private = inode->i_private;
- }
- return rc;
-}
-
-static const struct file_operations pool_proc_operations = {
- .open = pool_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-#define LOV_POOL_INIT_COUNT 2
-int lov_ost_pool_init(struct ost_pool *op, unsigned int count)
-{
- if (count == 0)
- count = LOV_POOL_INIT_COUNT;
- op->op_array = NULL;
- op->op_count = 0;
- init_rwsem(&op->op_rw_sem);
- op->op_size = count;
- op->op_array = kcalloc(op->op_size, sizeof(op->op_array[0]), GFP_NOFS);
- if (!op->op_array) {
- op->op_size = 0;
- return -ENOMEM;
- }
- return 0;
-}
-
-/* Caller must hold write op_rwlock */
-int lov_ost_pool_extend(struct ost_pool *op, unsigned int min_count)
-{
- __u32 *new;
- int new_size;
-
- LASSERT(min_count != 0);
-
- if (op->op_count < op->op_size)
- return 0;
-
- new_size = max(min_count, 2 * op->op_size);
- new = kcalloc(new_size, sizeof(op->op_array[0]), GFP_NOFS);
- if (!new)
- return -ENOMEM;
-
- /* copy old array to new one */
- memcpy(new, op->op_array, op->op_size * sizeof(op->op_array[0]));
- kfree(op->op_array);
- op->op_array = new;
- op->op_size = new_size;
- return 0;
-}
-
-int lov_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int min_count)
-{
- int rc = 0, i;
-
- down_write(&op->op_rw_sem);
-
- rc = lov_ost_pool_extend(op, min_count);
- if (rc)
- goto out;
-
- /* search ost in pool array */
- for (i = 0; i < op->op_count; i++) {
- if (op->op_array[i] == idx) {
- rc = -EEXIST;
- goto out;
- }
- }
- /* ost not found we add it */
- op->op_array[op->op_count] = idx;
- op->op_count++;
-out:
- up_write(&op->op_rw_sem);
- return rc;
-}
-
-int lov_ost_pool_remove(struct ost_pool *op, __u32 idx)
-{
- int i;
-
- down_write(&op->op_rw_sem);
-
- for (i = 0; i < op->op_count; i++) {
- if (op->op_array[i] == idx) {
- memmove(&op->op_array[i], &op->op_array[i + 1],
- (op->op_count - i - 1) * sizeof(op->op_array[0]));
- op->op_count--;
- up_write(&op->op_rw_sem);
- return 0;
- }
- }
-
- up_write(&op->op_rw_sem);
- return -EINVAL;
-}
-
-int lov_ost_pool_free(struct ost_pool *op)
-{
- if (op->op_size == 0)
- return 0;
-
- down_write(&op->op_rw_sem);
-
- kfree(op->op_array);
- op->op_array = NULL;
- op->op_count = 0;
- op->op_size = 0;
-
- up_write(&op->op_rw_sem);
- return 0;
-}
-
-static void
-pools_hash_exit(void *vpool, void *data)
-{
- struct pool_desc *pool = vpool;
- lov_pool_putref(pool);
-}
-
-int lov_pool_hash_init(struct rhashtable *tbl)
-{
- return rhashtable_init(tbl, &pools_hash_params);
-}
-
-void lov_pool_hash_destroy(struct rhashtable *tbl)
-{
- rhashtable_free_and_destroy(tbl, pools_hash_exit, NULL);
-}
-
-int lov_pool_new(struct obd_device *obd, char *poolname)
-{
- struct lov_obd *lov;
- struct pool_desc *new_pool;
- int rc;
-
- lov = &obd->u.lov;
-
- if (strlen(poolname) > LOV_MAXPOOLNAME)
- return -ENAMETOOLONG;
-
- new_pool = kzalloc(sizeof(*new_pool), GFP_NOFS);
- if (!new_pool)
- return -ENOMEM;
-
- strlcpy(new_pool->pool_name, poolname, sizeof(new_pool->pool_name));
- new_pool->pool_lobd = obd;
- /* ref count init to 1 because when created a pool is always used
- * up to deletion
- */
- atomic_set(&new_pool->pool_refcount, 1);
- rc = lov_ost_pool_init(&new_pool->pool_obds, 0);
- if (rc)
- goto out_err;
-
- /* get ref for debugfs file */
- lov_pool_getref(new_pool);
-
- new_pool->pool_debugfs_entry = debugfs_create_file(poolname, 0444,
- lov->lov_pool_debugfs_entry,
- new_pool,
- &pool_proc_operations);
-
- spin_lock(&obd->obd_dev_lock);
- list_add_tail(&new_pool->pool_list, &lov->lov_pool_list);
- lov->lov_pool_count++;
- spin_unlock(&obd->obd_dev_lock);
-
- /* Add to hash table only when it is fully ready. */
- rc = rhashtable_lookup_insert_fast(&lov->lov_pools_hash_body,
- &new_pool->pool_hash, pools_hash_params);
- if (rc) {
- if (rc != -EEXIST)
- /*
- * Hide -E2BIG and -EBUSY which
- * are not helpful.
- */
- rc = -ENOMEM;
- goto out_err;
- }
-
- CDEBUG(D_CONFIG, LOV_POOLNAMEF " is pool #%d\n",
- poolname, lov->lov_pool_count);
-
- return 0;
-
-out_err:
- spin_lock(&obd->obd_dev_lock);
- list_del_init(&new_pool->pool_list);
- lov->lov_pool_count--;
- spin_unlock(&obd->obd_dev_lock);
- debugfs_remove_recursive(new_pool->pool_debugfs_entry);
- lov_ost_pool_free(&new_pool->pool_obds);
- kfree(new_pool);
-
- return rc;
-}
-
-int lov_pool_del(struct obd_device *obd, char *poolname)
-{
- struct lov_obd *lov;
- struct pool_desc *pool;
-
- lov = &obd->u.lov;
-
- /* lookup and kill hash reference */
- rcu_read_lock();
- pool = rhashtable_lookup(&lov->lov_pools_hash_body, poolname, pools_hash_params);
- if (pool)
- if (rhashtable_remove_fast(&lov->lov_pools_hash_body,
- &pool->pool_hash, pools_hash_params) != 0)
- pool = NULL;
- rcu_read_unlock();
- if (!pool)
- return -ENOENT;
-
- debugfs_remove_recursive(pool->pool_debugfs_entry);
- lov_pool_putref(pool);
-
- spin_lock(&obd->obd_dev_lock);
- list_del_init(&pool->pool_list);
- lov->lov_pool_count--;
- spin_unlock(&obd->obd_dev_lock);
-
- /* release last reference */
- lov_pool_putref(pool);
-
- return 0;
-}
-
-int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname)
-{
- struct obd_uuid ost_uuid;
- struct lov_obd *lov;
- struct pool_desc *pool;
- unsigned int lov_idx;
- int rc;
-
- lov = &obd->u.lov;
-
- rcu_read_lock();
- pool = rhashtable_lookup(&lov->lov_pools_hash_body, poolname, pools_hash_params);
- if (pool && !atomic_inc_not_zero(&pool->pool_refcount))
- pool = NULL;
- rcu_read_unlock();
- if (!pool)
- return -ENOENT;
-
- obd_str2uuid(&ost_uuid, ostname);
-
- /* search ost in lov array */
- obd_getref(obd);
- for (lov_idx = 0; lov_idx < lov->desc.ld_tgt_count; lov_idx++) {
- if (!lov->lov_tgts[lov_idx])
- continue;
- if (obd_uuid_equals(&ost_uuid,
- &lov->lov_tgts[lov_idx]->ltd_uuid))
- break;
- }
- /* test if ost found in lov */
- if (lov_idx == lov->desc.ld_tgt_count) {
- rc = -EINVAL;
- goto out;
- }
-
- rc = lov_ost_pool_add(&pool->pool_obds, lov_idx, lov->lov_tgt_size);
- if (rc)
- goto out;
-
- CDEBUG(D_CONFIG, "Added %s to " LOV_POOLNAMEF " as member %d\n",
- ostname, poolname, pool_tgt_count(pool));
-
-out:
- obd_putref(obd);
- lov_pool_putref(pool);
- return rc;
-}
-
-int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
-{
- struct obd_uuid ost_uuid;
- struct lov_obd *lov;
- struct pool_desc *pool;
- unsigned int lov_idx;
- int rc = 0;
-
- lov = &obd->u.lov;
-
- rcu_read_lock();
- pool = rhashtable_lookup(&lov->lov_pools_hash_body, poolname, pools_hash_params);
- if (pool && !atomic_inc_not_zero(&pool->pool_refcount))
- pool = NULL;
- rcu_read_unlock();
- if (!pool)
- return -ENOENT;
-
- obd_str2uuid(&ost_uuid, ostname);
-
- obd_getref(obd);
- /* search ost in lov array, to get index */
- for (lov_idx = 0; lov_idx < lov->desc.ld_tgt_count; lov_idx++) {
- if (!lov->lov_tgts[lov_idx])
- continue;
-
- if (obd_uuid_equals(&ost_uuid,
- &lov->lov_tgts[lov_idx]->ltd_uuid))
- break;
- }
-
- /* test if ost found in lov */
- if (lov_idx == lov->desc.ld_tgt_count) {
- rc = -EINVAL;
- goto out;
- }
-
- lov_ost_pool_remove(&pool->pool_obds, lov_idx);
-
- CDEBUG(D_CONFIG, "%s removed from " LOV_POOLNAMEF "\n", ostname,
- poolname);
-
-out:
- obd_putref(obd);
- lov_pool_putref(pool);
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/lov/lov_request.c b/drivers/staging/lustre/lustre/lov/lov_request.c
deleted file mode 100644
index cb8567f20ea7..000000000000
--- a/drivers/staging/lustre/lustre/lov/lov_request.c
+++ /dev/null
@@ -1,354 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LOV
-
-#include <obd_class.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-#include "lov_internal.h"
-
-static void lov_init_set(struct lov_request_set *set)
-{
- set->set_count = 0;
- atomic_set(&set->set_completes, 0);
- atomic_set(&set->set_success, 0);
- INIT_LIST_HEAD(&set->set_list);
-}
-
-static void lov_finish_set(struct lov_request_set *set)
-{
- struct lov_request *req;
-
- LASSERT(set);
- while ((req = list_first_entry_or_null(&set->set_list,
- struct lov_request,
- rq_link)) != NULL) {
- list_del_init(&req->rq_link);
- kfree(req->rq_oi.oi_osfs);
- kfree(req);
- }
- kfree(set);
-}
-
-static void lov_update_set(struct lov_request_set *set,
- struct lov_request *req, int rc)
-{
- atomic_inc(&set->set_completes);
- if (rc == 0)
- atomic_inc(&set->set_success);
-}
-
-static void lov_set_add_req(struct lov_request *req,
- struct lov_request_set *set)
-{
- list_add_tail(&req->rq_link, &set->set_list);
- set->set_count++;
- req->rq_rqset = set;
-}
-
-static int lov_check_set(struct lov_obd *lov, int idx)
-{
- int rc;
- struct lov_tgt_desc *tgt;
-
- mutex_lock(&lov->lov_lock);
- tgt = lov->lov_tgts[idx];
- rc = !tgt || tgt->ltd_active ||
- (tgt->ltd_exp &&
- class_exp2cliimp(tgt->ltd_exp)->imp_connect_tried);
- mutex_unlock(&lov->lov_lock);
-
- return rc;
-}
-
-/* Check if the OSC connection exists and is active.
- * If the OSC has not yet had a chance to connect to the OST the first time,
- * wait once for it to connect instead of returning an error.
- */
-static int lov_check_and_wait_active(struct lov_obd *lov, int ost_idx)
-{
- int cnt = 0;
- struct lov_tgt_desc *tgt;
- int rc = 0;
-
- mutex_lock(&lov->lov_lock);
-
- tgt = lov->lov_tgts[ost_idx];
-
- if (unlikely(!tgt)) {
- rc = 0;
- goto out;
- }
-
- if (likely(tgt->ltd_active)) {
- rc = 1;
- goto out;
- }
-
- if (tgt->ltd_exp && class_exp2cliimp(tgt->ltd_exp)->imp_connect_tried) {
- rc = 0;
- goto out;
- }
-
- mutex_unlock(&lov->lov_lock);
-
- while (cnt < obd_timeout && !lov_check_set(lov, ost_idx)) {
- schedule_timeout_uninterruptible(HZ);
- cnt++;
- }
- if (tgt->ltd_active)
- return 1;
-
- return 0;
-
-out:
- mutex_unlock(&lov->lov_lock);
- return rc;
-}
-
-#define LOV_U64_MAX ((__u64)~0ULL)
-#define LOV_SUM_MAX(tot, add) \
- do { \
- if ((tot) + (add) < (tot)) \
- (tot) = LOV_U64_MAX; \
- else \
- (tot) += (add); \
- } while (0)
-
-static int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs,
- int success)
-{
- if (success) {
- __u32 expected_stripes = lov_get_stripecnt(&obd->u.lov,
- LOV_MAGIC, 0);
- if (osfs->os_files != LOV_U64_MAX)
- lov_do_div64(osfs->os_files, expected_stripes);
- if (osfs->os_ffree != LOV_U64_MAX)
- lov_do_div64(osfs->os_ffree, expected_stripes);
-
- spin_lock(&obd->obd_osfs_lock);
- memcpy(&obd->obd_osfs, osfs, sizeof(*osfs));
- obd->obd_osfs_age = get_jiffies_64();
- spin_unlock(&obd->obd_osfs_lock);
- return 0;
- }
-
- return -EIO;
-}
-
-int lov_fini_statfs_set(struct lov_request_set *set)
-{
- int rc = 0;
-
- if (!set)
- return 0;
-
- if (atomic_read(&set->set_completes)) {
- rc = lov_fini_statfs(set->set_obd, set->set_oi->oi_osfs,
- atomic_read(&set->set_success));
- }
-
- lov_finish_set(set);
-
- return rc;
-}
-
-static void lov_update_statfs(struct obd_statfs *osfs,
- struct obd_statfs *lov_sfs,
- int success)
-{
- int shift = 0, quit = 0;
- __u64 tmp;
-
- if (success == 0) {
- memcpy(osfs, lov_sfs, sizeof(*lov_sfs));
- } else {
- if (osfs->os_bsize != lov_sfs->os_bsize) {
- /* assume all block sizes are always powers of 2 */
- /* get the bits difference */
- tmp = osfs->os_bsize | lov_sfs->os_bsize;
- for (shift = 0; shift <= 64; ++shift) {
- if (tmp & 1) {
- if (quit)
- break;
- quit = 1;
- shift = 0;
- }
- tmp >>= 1;
- }
- }
-
- if (osfs->os_bsize < lov_sfs->os_bsize) {
- osfs->os_bsize = lov_sfs->os_bsize;
-
- osfs->os_bfree >>= shift;
- osfs->os_bavail >>= shift;
- osfs->os_blocks >>= shift;
- } else if (shift != 0) {
- lov_sfs->os_bfree >>= shift;
- lov_sfs->os_bavail >>= shift;
- lov_sfs->os_blocks >>= shift;
- }
- osfs->os_bfree += lov_sfs->os_bfree;
- osfs->os_bavail += lov_sfs->os_bavail;
- osfs->os_blocks += lov_sfs->os_blocks;
- /* XXX not sure about this one - depends on policy.
- * - could be minimum if we always stripe on all OBDs
- * (but that would be wrong for any other policy,
- * if one of the OBDs has no more objects left)
- * - could be sum if we stripe whole objects
- * - could be average, just to give a nice number
- *
- * To give a "reasonable" (if not wholly accurate)
- * number, we divide the total number of free objects
- * by expected stripe count (watch out for overflow).
- */
- LOV_SUM_MAX(osfs->os_files, lov_sfs->os_files);
- LOV_SUM_MAX(osfs->os_ffree, lov_sfs->os_ffree);
- }
-}
-
-/* The callback for osc_statfs_async that finalizes a request info when a
- * response is received.
- */
-static int cb_statfs_update(void *cookie, int rc)
-{
- struct obd_info *oinfo = cookie;
- struct lov_request *lovreq;
- struct lov_request_set *set;
- struct obd_statfs *osfs, *lov_sfs;
- struct lov_obd *lov;
- struct lov_tgt_desc *tgt;
- struct obd_device *lovobd, *tgtobd;
- int success;
-
- lovreq = container_of(oinfo, struct lov_request, rq_oi);
- set = lovreq->rq_rqset;
- lovobd = set->set_obd;
- lov = &lovobd->u.lov;
- osfs = set->set_oi->oi_osfs;
- lov_sfs = oinfo->oi_osfs;
- success = atomic_read(&set->set_success);
- /* XXX: the same is done in lov_update_common_set, however
- * lovset->set_exp is not initialized.
- */
- lov_update_set(set, lovreq, rc);
- if (rc)
- goto out;
-
- obd_getref(lovobd);
- tgt = lov->lov_tgts[lovreq->rq_idx];
- if (!tgt || !tgt->ltd_active)
- goto out_update;
-
- tgtobd = class_exp2obd(tgt->ltd_exp);
- spin_lock(&tgtobd->obd_osfs_lock);
- memcpy(&tgtobd->obd_osfs, lov_sfs, sizeof(*lov_sfs));
- if ((oinfo->oi_flags & OBD_STATFS_FROM_CACHE) == 0)
- tgtobd->obd_osfs_age = get_jiffies_64();
- spin_unlock(&tgtobd->obd_osfs_lock);
-
-out_update:
- lov_update_statfs(osfs, lov_sfs, success);
- obd_putref(lovobd);
-out:
- return 0;
-}
-
-int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo,
- struct lov_request_set **reqset)
-{
- struct lov_request_set *set;
- struct lov_obd *lov = &obd->u.lov;
- int rc = 0, i;
-
- set = kzalloc(sizeof(*set), GFP_NOFS);
- if (!set)
- return -ENOMEM;
- lov_init_set(set);
-
- set->set_obd = obd;
- set->set_oi = oinfo;
-
- /* We only get block data from the OBD */
- for (i = 0; i < lov->desc.ld_tgt_count; i++) {
- struct lov_request *req;
-
- if (!lov->lov_tgts[i] ||
- (oinfo->oi_flags & OBD_STATFS_NODELAY &&
- !lov->lov_tgts[i]->ltd_active)) {
- CDEBUG(D_HA, "lov idx %d inactive\n", i);
- continue;
- }
-
- /* skip targets that have been explicitly disabled by the
- * administrator
- */
- if (!lov->lov_tgts[i]->ltd_exp) {
- CDEBUG(D_HA, "lov idx %d administratively disabled\n", i);
- continue;
- }
-
- if (!lov->lov_tgts[i]->ltd_active)
- lov_check_and_wait_active(lov, i);
-
- req = kzalloc(sizeof(*req), GFP_NOFS);
- if (!req) {
- rc = -ENOMEM;
- goto out_set;
- }
-
- req->rq_oi.oi_osfs = kzalloc(sizeof(*req->rq_oi.oi_osfs),
- GFP_NOFS);
- if (!req->rq_oi.oi_osfs) {
- kfree(req);
- rc = -ENOMEM;
- goto out_set;
- }
-
- req->rq_idx = i;
- req->rq_oi.oi_cb_up = cb_statfs_update;
- req->rq_oi.oi_flags = oinfo->oi_flags;
-
- lov_set_add_req(req, set);
- }
- if (!set->set_count) {
- rc = -EIO;
- goto out_set;
- }
- *reqset = set;
- return rc;
-out_set:
- lov_fini_statfs_set(set);
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_dev.c b/drivers/staging/lustre/lustre/lov/lovsub_dev.c
deleted file mode 100644
index 7e89a2e485fc..000000000000
--- a/drivers/staging/lustre/lustre/lov/lovsub_dev.c
+++ /dev/null
@@ -1,147 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2013, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_device and cl_device_type for LOVSUB layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LOV
-
-#include "lov_cl_internal.h"
-
-/** \addtogroup lov
- * @{
- */
-
-/*****************************************************************************
- *
- * Lov-sub device and device type functions.
- *
- */
-
-static int lovsub_device_init(const struct lu_env *env, struct lu_device *d,
- const char *name, struct lu_device *next)
-{
- struct lovsub_device *lsd = lu2lovsub_dev(d);
- struct lu_device_type *ldt;
- int rc;
-
- next->ld_site = d->ld_site;
- ldt = next->ld_type;
- rc = ldt->ldt_ops->ldto_device_init(env, next, ldt->ldt_name, NULL);
- if (rc) {
- next->ld_site = NULL;
- return rc;
- }
-
- lu_device_get(next);
- lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init);
- lsd->acid_next = lu2cl_dev(next);
- return rc;
-}
-
-static struct lu_device *lovsub_device_fini(const struct lu_env *env,
- struct lu_device *d)
-{
- struct lu_device *next;
- struct lovsub_device *lsd;
-
- lsd = lu2lovsub_dev(d);
- next = cl2lu_dev(lsd->acid_next);
- lsd->acid_next = NULL;
- return next;
-}
-
-static struct lu_device *lovsub_device_free(const struct lu_env *env,
- struct lu_device *d)
-{
- struct lovsub_device *lsd = lu2lovsub_dev(d);
- struct lu_device *next = cl2lu_dev(lsd->acid_next);
-
- if (atomic_read(&d->ld_ref) && d->ld_site) {
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_ERROR, NULL);
- lu_site_print(env, d->ld_site, &msgdata, lu_cdebug_printer);
- }
- cl_device_fini(lu2cl_dev(d));
- kfree(lsd);
- return next;
-}
-
-static const struct lu_device_operations lovsub_lu_ops = {
- .ldo_object_alloc = lovsub_object_alloc,
- .ldo_process_config = NULL,
- .ldo_recovery_complete = NULL
-};
-
-static struct lu_device *lovsub_device_alloc(const struct lu_env *env,
- struct lu_device_type *t,
- struct lustre_cfg *cfg)
-{
- struct lu_device *d;
- struct lovsub_device *lsd;
-
- lsd = kzalloc(sizeof(*lsd), GFP_NOFS);
- if (lsd) {
- int result;
-
- result = cl_device_init(&lsd->acid_cl, t);
- if (result == 0) {
- d = lovsub2lu_dev(lsd);
- d->ld_ops = &lovsub_lu_ops;
- } else {
- d = ERR_PTR(result);
- }
- } else {
- d = ERR_PTR(-ENOMEM);
- }
- return d;
-}
-
-static const struct lu_device_type_operations lovsub_device_type_ops = {
- .ldto_device_alloc = lovsub_device_alloc,
- .ldto_device_free = lovsub_device_free,
-
- .ldto_device_init = lovsub_device_init,
- .ldto_device_fini = lovsub_device_fini
-};
-
-#define LUSTRE_LOVSUB_NAME "lovsub"
-
-struct lu_device_type lovsub_device_type = {
- .ldt_tags = LU_DEVICE_CL,
- .ldt_name = LUSTRE_LOVSUB_NAME,
- .ldt_ops = &lovsub_device_type_ops,
- .ldt_ctx_tags = LCT_CL_THREAD
-};
-
-/** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_lock.c b/drivers/staging/lustre/lustre/lov/lovsub_lock.c
deleted file mode 100644
index ea492be2eef3..000000000000
--- a/drivers/staging/lustre/lustre/lov/lovsub_lock.c
+++ /dev/null
@@ -1,81 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_lock for LOVSUB layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LOV
-
-#include "lov_cl_internal.h"
-
-/** \addtogroup lov
- * @{
- */
-
-/*****************************************************************************
- *
- * Lovsub lock operations.
- *
- */
-
-static void lovsub_lock_fini(const struct lu_env *env,
- struct cl_lock_slice *slice)
-{
- struct lovsub_lock *lsl;
-
- lsl = cl2lovsub_lock(slice);
- kmem_cache_free(lovsub_lock_kmem, lsl);
-}
-
-static const struct cl_lock_operations lovsub_lock_ops = {
- .clo_fini = lovsub_lock_fini,
-};
-
-int lovsub_lock_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_lock *lock, const struct cl_io *io)
-{
- struct lovsub_lock *lsk;
- int result;
-
- lsk = kmem_cache_zalloc(lovsub_lock_kmem, GFP_NOFS);
- if (lsk) {
- cl_lock_slice_add(lock, &lsk->lss_cl, obj, &lovsub_lock_ops);
- result = 0;
- } else {
- result = -ENOMEM;
- }
- return result;
-}
-
-/** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_object.c b/drivers/staging/lustre/lustre/lov/lovsub_object.c
deleted file mode 100644
index 13d452086b61..000000000000
--- a/drivers/staging/lustre/lustre/lov/lovsub_object.c
+++ /dev/null
@@ -1,180 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_object for LOVSUB layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LOV
-
-#include "lov_cl_internal.h"
-
-/** \addtogroup lov
- * @{
- */
-
-/*****************************************************************************
- *
- * Lovsub object operations.
- *
- */
-
-int lovsub_object_init(const struct lu_env *env, struct lu_object *obj,
- const struct lu_object_conf *conf)
-{
- struct lovsub_device *dev = lu2lovsub_dev(obj->lo_dev);
- struct lu_object *below;
- struct lu_device *under;
-
- int result;
-
- under = &dev->acid_next->cd_lu_dev;
- below = under->ld_ops->ldo_object_alloc(env, obj->lo_header, under);
- if (below) {
- lu_object_add(obj, below);
- cl_object_page_init(lu2cl(obj), sizeof(struct lovsub_page));
- result = 0;
- } else {
- result = -ENOMEM;
- }
- return result;
-}
-
-static void lovsub_object_free(const struct lu_env *env, struct lu_object *obj)
-{
- struct lovsub_object *los = lu2lovsub(obj);
- struct lov_object *lov = los->lso_super;
-
- /* We can't assume lov was assigned here, because of the shadow
- * object handling in lu_object_find.
- */
- if (lov) {
- LASSERT(lov->lo_type == LLT_RAID0);
- LASSERT(lov->u.raid0.lo_sub[los->lso_index] == los);
- spin_lock(&lov->u.raid0.lo_sub_lock);
- lov->u.raid0.lo_sub[los->lso_index] = NULL;
- spin_unlock(&lov->u.raid0.lo_sub_lock);
- }
-
- lu_object_fini(obj);
- lu_object_header_fini(&los->lso_header.coh_lu);
- kmem_cache_free(lovsub_object_kmem, los);
-}
-
-static int lovsub_object_print(const struct lu_env *env, void *cookie,
- lu_printer_t p, const struct lu_object *obj)
-{
- struct lovsub_object *los = lu2lovsub(obj);
-
- return (*p)(env, cookie, "[%d]", los->lso_index);
-}
-
-static int lovsub_attr_update(const struct lu_env *env, struct cl_object *obj,
- const struct cl_attr *attr, unsigned int valid)
-{
- struct lov_object *lov = cl2lovsub(obj)->lso_super;
-
- lov_r0(lov)->lo_attr_valid = 0;
- return 0;
-}
-
-static int lovsub_object_glimpse(const struct lu_env *env,
- const struct cl_object *obj,
- struct ost_lvb *lvb)
-{
- struct lovsub_object *los = cl2lovsub(obj);
-
- return cl_object_glimpse(env, &los->lso_super->lo_cl, lvb);
-}
-
-/**
- * Implementation of struct cl_object_operations::coo_req_attr_set() for lovsub
- * layer. Lov and lovsub are responsible only for struct obdo::o_stripe_idx
- * field, which is filled there.
- */
-static void lovsub_req_attr_set(const struct lu_env *env, struct cl_object *obj,
- struct cl_req_attr *attr)
-{
- struct lovsub_object *subobj = cl2lovsub(obj);
-
- cl_req_attr_set(env, &subobj->lso_super->lo_cl, attr);
-
- /*
- * There is no OBD_MD_* flag for obdo::o_stripe_idx, so set it
- * unconditionally. It never changes anyway.
- */
- attr->cra_oa->o_stripe_idx = subobj->lso_index;
-}
-
-static const struct cl_object_operations lovsub_ops = {
- .coo_page_init = lovsub_page_init,
- .coo_lock_init = lovsub_lock_init,
- .coo_attr_update = lovsub_attr_update,
- .coo_glimpse = lovsub_object_glimpse,
- .coo_req_attr_set = lovsub_req_attr_set
-};
-
-static const struct lu_object_operations lovsub_lu_obj_ops = {
- .loo_object_init = lovsub_object_init,
- .loo_object_delete = NULL,
- .loo_object_release = NULL,
- .loo_object_free = lovsub_object_free,
- .loo_object_print = lovsub_object_print,
- .loo_object_invariant = NULL
-};
-
-struct lu_object *lovsub_object_alloc(const struct lu_env *env,
- const struct lu_object_header *unused,
- struct lu_device *dev)
-{
- struct lovsub_object *los;
- struct lu_object *obj;
-
- los = kmem_cache_zalloc(lovsub_object_kmem, GFP_NOFS);
- if (los) {
- struct cl_object_header *hdr;
-
- obj = lovsub2lu(los);
- hdr = &los->lso_header;
- cl_object_header_init(hdr);
- lu_object_init(obj, &hdr->coh_lu, dev);
- lu_object_add_top(&hdr->coh_lu, obj);
- los->lso_cl.co_ops = &lovsub_ops;
- obj->lo_ops = &lovsub_lu_obj_ops;
- } else {
- obj = NULL;
- }
- return obj;
-}
-
-/** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_page.c b/drivers/staging/lustre/lustre/lov/lovsub_page.c
deleted file mode 100644
index 915520bcdd60..000000000000
--- a/drivers/staging/lustre/lustre/lov/lovsub_page.c
+++ /dev/null
@@ -1,68 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_page for LOVSUB layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LOV
-
-#include "lov_cl_internal.h"
-
-/** \addtogroup lov
- * @{
- */
-
-/*****************************************************************************
- *
- * Lovsub page operations.
- *
- */
-
-static void lovsub_page_fini(const struct lu_env *env,
- struct cl_page_slice *slice)
-{
-}
-
-static const struct cl_page_operations lovsub_page_ops = {
- .cpo_fini = lovsub_page_fini
-};
-
-int lovsub_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, pgoff_t index)
-{
- struct lovsub_page *lsb = cl_object_page_slice(obj, page);
-
- cl_page_slice_add(page, &lsb->lsb_cl, obj, index, &lovsub_page_ops);
- return 0;
-}
-
-/** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lproc_lov.c b/drivers/staging/lustre/lustre/lov/lproc_lov.c
deleted file mode 100644
index 721440feef72..000000000000
--- a/drivers/staging/lustre/lustre/lov/lproc_lov.c
+++ /dev/null
@@ -1,299 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-#define DEBUG_SUBSYSTEM S_CLASS
-
-#include <linux/statfs.h>
-#include <lprocfs_status.h>
-#include <obd_class.h>
-#include <linux/seq_file.h>
-#include "lov_internal.h"
-
-static int lov_stripesize_seq_show(struct seq_file *m, void *v)
-{
- struct obd_device *dev = (struct obd_device *)m->private;
- struct lov_desc *desc;
-
- LASSERT(dev);
- desc = &dev->u.lov.desc;
- seq_printf(m, "%llu\n", desc->ld_default_stripe_size);
- return 0;
-}
-
-static ssize_t lov_stripesize_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct obd_device *dev = ((struct seq_file *)file->private_data)->private;
- struct lov_desc *desc;
- __u64 val;
- int rc;
-
- LASSERT(dev);
- desc = &dev->u.lov.desc;
- rc = lprocfs_write_u64_helper(buffer, count, &val);
- if (rc)
- return rc;
-
- lov_fix_desc_stripe_size(&val);
- desc->ld_default_stripe_size = val;
- return count;
-}
-
-LPROC_SEQ_FOPS(lov_stripesize);
-
-static int lov_stripeoffset_seq_show(struct seq_file *m, void *v)
-{
- struct obd_device *dev = (struct obd_device *)m->private;
- struct lov_desc *desc;
-
- LASSERT(dev);
- desc = &dev->u.lov.desc;
- seq_printf(m, "%llu\n", desc->ld_default_stripe_offset);
- return 0;
-}
-
-static ssize_t lov_stripeoffset_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct obd_device *dev = ((struct seq_file *)file->private_data)->private;
- struct lov_desc *desc;
- __u64 val;
- int rc;
-
- LASSERT(dev);
- desc = &dev->u.lov.desc;
- rc = lprocfs_write_u64_helper(buffer, count, &val);
- if (rc)
- return rc;
-
- desc->ld_default_stripe_offset = val;
- return count;
-}
-
-LPROC_SEQ_FOPS(lov_stripeoffset);
-
-static int lov_stripetype_seq_show(struct seq_file *m, void *v)
-{
- struct obd_device *dev = (struct obd_device *)m->private;
- struct lov_desc *desc;
-
- LASSERT(dev);
- desc = &dev->u.lov.desc;
- seq_printf(m, "%u\n", desc->ld_pattern);
- return 0;
-}
-
-static ssize_t lov_stripetype_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct obd_device *dev = ((struct seq_file *)file->private_data)->private;
- struct lov_desc *desc;
- int val, rc;
-
- LASSERT(dev);
- desc = &dev->u.lov.desc;
- rc = lprocfs_write_helper(buffer, count, &val);
- if (rc)
- return rc;
-
- lov_fix_desc_pattern(&val);
- desc->ld_pattern = val;
- return count;
-}
-
-LPROC_SEQ_FOPS(lov_stripetype);
-
-static int lov_stripecount_seq_show(struct seq_file *m, void *v)
-{
- struct obd_device *dev = (struct obd_device *)m->private;
- struct lov_desc *desc;
-
- LASSERT(dev);
- desc = &dev->u.lov.desc;
- seq_printf(m, "%d\n", (__s16)(desc->ld_default_stripe_count + 1) - 1);
- return 0;
-}
-
-static ssize_t lov_stripecount_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct obd_device *dev = ((struct seq_file *)file->private_data)->private;
- struct lov_desc *desc;
- int val, rc;
-
- LASSERT(dev);
- desc = &dev->u.lov.desc;
- rc = lprocfs_write_helper(buffer, count, &val);
- if (rc)
- return rc;
-
- lov_fix_desc_stripe_count(&val);
- desc->ld_default_stripe_count = val;
- return count;
-}
-
-LPROC_SEQ_FOPS(lov_stripecount);
-
-static ssize_t numobd_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
- struct lov_desc *desc;
-
- desc = &dev->u.lov.desc;
- return sprintf(buf, "%u\n", desc->ld_tgt_count);
-}
-LUSTRE_RO_ATTR(numobd);
-
-static ssize_t activeobd_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
- struct lov_desc *desc;
-
- desc = &dev->u.lov.desc;
- return sprintf(buf, "%u\n", desc->ld_active_tgt_count);
-}
-LUSTRE_RO_ATTR(activeobd);
-
-static int lov_desc_uuid_seq_show(struct seq_file *m, void *v)
-{
- struct obd_device *dev = (struct obd_device *)m->private;
- struct lov_obd *lov;
-
- LASSERT(dev);
- lov = &dev->u.lov;
- seq_printf(m, "%s\n", lov->desc.ld_uuid.uuid);
- return 0;
-}
-
-LPROC_SEQ_FOPS_RO(lov_desc_uuid);
-
-static void *lov_tgt_seq_start(struct seq_file *p, loff_t *pos)
-{
- struct obd_device *dev = p->private;
- struct lov_obd *lov = &dev->u.lov;
-
- while (*pos < lov->desc.ld_tgt_count) {
- if (lov->lov_tgts[*pos])
- return lov->lov_tgts[*pos];
- ++*pos;
- }
- return NULL;
-}
-
-static void lov_tgt_seq_stop(struct seq_file *p, void *v)
-{
-}
-
-static void *lov_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos)
-{
- struct obd_device *dev = p->private;
- struct lov_obd *lov = &dev->u.lov;
-
- while (++*pos < lov->desc.ld_tgt_count) {
- if (lov->lov_tgts[*pos])
- return lov->lov_tgts[*pos];
- }
- return NULL;
-}
-
-static int lov_tgt_seq_show(struct seq_file *p, void *v)
-{
- struct lov_tgt_desc *tgt = v;
-
- seq_printf(p, "%d: %s %sACTIVE\n",
- tgt->ltd_index, obd_uuid2str(&tgt->ltd_uuid),
- tgt->ltd_active ? "" : "IN");
- return 0;
-}
-
-static const struct seq_operations lov_tgt_sops = {
- .start = lov_tgt_seq_start,
- .stop = lov_tgt_seq_stop,
- .next = lov_tgt_seq_next,
- .show = lov_tgt_seq_show,
-};
-
-static int lov_target_seq_open(struct inode *inode, struct file *file)
-{
- struct seq_file *seq;
- int rc;
-
- rc = seq_open(file, &lov_tgt_sops);
- if (rc)
- return rc;
-
- seq = file->private_data;
- seq->private = inode->i_private;
- return 0;
-}
-
-static struct lprocfs_vars lprocfs_lov_obd_vars[] = {
- { "stripesize", &lov_stripesize_fops, NULL },
- { "stripeoffset", &lov_stripeoffset_fops, NULL },
- { "stripecount", &lov_stripecount_fops, NULL },
- { "stripetype", &lov_stripetype_fops, NULL },
- /*{ "filegroups", lprocfs_rd_filegroups, NULL, 0 },*/
- { "desc_uuid", &lov_desc_uuid_fops, NULL, 0 },
- { NULL }
-};
-
-static struct attribute *lov_attrs[] = {
- &lustre_attr_activeobd.attr,
- &lustre_attr_numobd.attr,
- NULL,
-};
-
-static const struct attribute_group lov_attr_group = {
- .attrs = lov_attrs,
-};
-
-void lprocfs_lov_init_vars(struct lprocfs_static_vars *lvars)
-{
- lvars->sysfs_vars = &lov_attr_group;
- lvars->obd_vars = lprocfs_lov_obd_vars;
-}
-
-const struct file_operations lov_proc_target_fops = {
- .owner = THIS_MODULE,
- .open = lov_target_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = lprocfs_seq_release,
-};
diff --git a/drivers/staging/lustre/lustre/mdc/Makefile b/drivers/staging/lustre/lustre/mdc/Makefile
deleted file mode 100644
index c7bc3351ccb0..000000000000
--- a/drivers/staging/lustre/lustre/mdc/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LUSTRE_FS) += mdc.o
-mdc-y := mdc_request.o mdc_reint.o mdc_lib.o mdc_locks.o lproc_mdc.o
diff --git a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
deleted file mode 100644
index 6cce32491eb5..000000000000
--- a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
+++ /dev/null
@@ -1,231 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-#define DEBUG_SUBSYSTEM S_CLASS
-
-#include <linux/vfs.h>
-#include <obd_class.h>
-#include <lprocfs_status.h>
-#include "mdc_internal.h"
-
-static ssize_t active_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
-
- return sprintf(buf, "%u\n", !dev->u.cli.cl_import->imp_deactive);
-}
-
-static ssize_t active_store(struct kobject *kobj, struct attribute *attr,
- const char *buffer, size_t count)
-{
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
- unsigned long val;
- int rc;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- if (val > 1)
- return -ERANGE;
-
- /* opposite senses */
- if (dev->u.cli.cl_import->imp_deactive == val) {
- rc = ptlrpc_set_import_active(dev->u.cli.cl_import, val);
- if (rc)
- count = rc;
- } else {
- CDEBUG(D_CONFIG, "activate %lu: ignoring repeat request\n", val);
- }
- return count;
-}
-LUSTRE_RW_ATTR(active);
-
-static ssize_t max_rpcs_in_flight_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- int len;
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
- __u32 max;
-
- max = obd_get_max_rpcs_in_flight(&dev->u.cli);
- len = sprintf(buf, "%u\n", max);
-
- return len;
-}
-
-static ssize_t max_rpcs_in_flight_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
- int rc;
- unsigned long val;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- rc = obd_set_max_rpcs_in_flight(&dev->u.cli, val);
- if (rc)
- count = rc;
-
- return count;
-}
-LUSTRE_RW_ATTR(max_rpcs_in_flight);
-
-static ssize_t max_mod_rpcs_in_flight_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
- u16 max;
- int len;
-
- max = dev->u.cli.cl_max_mod_rpcs_in_flight;
- len = sprintf(buf, "%hu\n", max);
-
- return len;
-}
-
-static ssize_t max_mod_rpcs_in_flight_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
- u16 val;
- int rc;
-
- rc = kstrtou16(buffer, 10, &val);
- if (rc)
- return rc;
-
- rc = obd_set_max_mod_rpcs_in_flight(&dev->u.cli, val);
- if (rc)
- count = rc;
-
- return count;
-}
-LUSTRE_RW_ATTR(max_mod_rpcs_in_flight);
-
-static int mdc_rpc_stats_seq_show(struct seq_file *seq, void *v)
-{
- struct obd_device *dev = seq->private;
-
- return obd_mod_rpc_stats_seq_show(&dev->u.cli, seq);
-}
-
-static ssize_t mdc_rpc_stats_seq_write(struct file *file,
- const char __user *buf,
- size_t len, loff_t *off)
-{
- struct seq_file *seq = file->private_data;
- struct obd_device *dev = seq->private;
- struct client_obd *cli = &dev->u.cli;
-
- lprocfs_oh_clear(&cli->cl_mod_rpcs_hist);
-
- return len;
-}
-LPROC_SEQ_FOPS(mdc_rpc_stats);
-
-LPROC_SEQ_FOPS_WR_ONLY(mdc, ping);
-
-LPROC_SEQ_FOPS_RO_TYPE(mdc, connect_flags);
-LPROC_SEQ_FOPS_RO_TYPE(mdc, server_uuid);
-LPROC_SEQ_FOPS_RO_TYPE(mdc, conn_uuid);
-LPROC_SEQ_FOPS_RO_TYPE(mdc, timeouts);
-LPROC_SEQ_FOPS_RO_TYPE(mdc, state);
-
-/*
- * Note: below sysfs entry is provided, but not currently in use, instead
- * sbi->sb_md_brw_size is used, the per obd variable should be used
- * when DNE is enabled, and dir pages are managed in MDC layer.
- * Don't forget to enable sysfs store function then.
- */
-static ssize_t max_pages_per_rpc_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
- struct client_obd *cli = &dev->u.cli;
-
- return sprintf(buf, "%d\n", cli->cl_max_pages_per_rpc);
-}
-LUSTRE_RO_ATTR(max_pages_per_rpc);
-
-LPROC_SEQ_FOPS_RW_TYPE(mdc, import);
-LPROC_SEQ_FOPS_RW_TYPE(mdc, pinger_recov);
-
-static struct lprocfs_vars lprocfs_mdc_obd_vars[] = {
- { "ping", &mdc_ping_fops, NULL, 0222 },
- { "connect_flags", &mdc_connect_flags_fops, NULL, 0 },
- /*{ "filegroups", lprocfs_rd_filegroups, NULL, 0 },*/
- { "mds_server_uuid", &mdc_server_uuid_fops, NULL, 0 },
- { "mds_conn_uuid", &mdc_conn_uuid_fops, NULL, 0 },
- { "timeouts", &mdc_timeouts_fops, NULL, 0 },
- { "import", &mdc_import_fops, NULL, 0 },
- { "state", &mdc_state_fops, NULL, 0 },
- { "pinger_recov", &mdc_pinger_recov_fops, NULL, 0 },
- { .name = "rpc_stats",
- .fops = &mdc_rpc_stats_fops },
- { NULL }
-};
-
-static struct attribute *mdc_attrs[] = {
- &lustre_attr_active.attr,
- &lustre_attr_max_rpcs_in_flight.attr,
- &lustre_attr_max_mod_rpcs_in_flight.attr,
- &lustre_attr_max_pages_per_rpc.attr,
- NULL,
-};
-
-static const struct attribute_group mdc_attr_group = {
- .attrs = mdc_attrs,
-};
-
-void lprocfs_mdc_init_vars(struct lprocfs_static_vars *lvars)
-{
- lvars->sysfs_vars = &mdc_attr_group;
- lvars->obd_vars = lprocfs_mdc_obd_vars;
-}
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_internal.h b/drivers/staging/lustre/lustre/mdc/mdc_internal.h
deleted file mode 100644
index 28924e927b50..000000000000
--- a/drivers/staging/lustre/lustre/mdc/mdc_internal.h
+++ /dev/null
@@ -1,144 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef _MDC_INTERNAL_H
-#define _MDC_INTERNAL_H
-
-#include <lustre_mdc.h>
-
-void lprocfs_mdc_init_vars(struct lprocfs_static_vars *lvars);
-
-void mdc_pack_body(struct ptlrpc_request *req, const struct lu_fid *fid,
- __u64 valid, size_t ea_size, __u32 suppgid, u32 flags);
-void mdc_swap_layouts_pack(struct ptlrpc_request *req,
- struct md_op_data *op_data);
-void mdc_readdir_pack(struct ptlrpc_request *req, __u64 pgoff, size_t size,
- const struct lu_fid *fid);
-void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, u32 flags,
- struct md_op_data *data, size_t ea_size);
-void mdc_setattr_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
- void *ea, size_t ealen);
-void mdc_create_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
- const void *data, size_t datalen, umode_t mode, uid_t uid,
- gid_t gid, kernel_cap_t capability, __u64 rdev);
-void mdc_open_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
- umode_t mode, __u64 rdev, __u64 flags, const void *data,
- size_t datalen);
-void mdc_unlink_pack(struct ptlrpc_request *req, struct md_op_data *op_data);
-void mdc_link_pack(struct ptlrpc_request *req, struct md_op_data *op_data);
-void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
- const char *old, size_t oldlen,
- const char *new, size_t newlen);
-void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data);
-
-/* mdc/mdc_locks.c */
-int mdc_set_lock_data(struct obd_export *exp,
- const struct lustre_handle *lockh,
- void *data, __u64 *bits);
-
-int mdc_null_inode(struct obd_export *exp, const struct lu_fid *fid);
-
-int mdc_intent_lock(struct obd_export *exp,
- struct md_op_data *op_data,
- struct lookup_intent *it,
- struct ptlrpc_request **reqp,
- ldlm_blocking_callback cb_blocking,
- __u64 extra_lock_flags);
-
-int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
- const union ldlm_policy_data *policy,
- struct md_op_data *op_data,
- struct lustre_handle *lockh, u64 extra_lock_flags);
-
-int mdc_resource_get_unused(struct obd_export *exp, const struct lu_fid *fid,
- struct list_head *cancels, enum ldlm_mode mode,
- __u64 bits);
-/* mdc/mdc_request.c */
-int mdc_fid_alloc(const struct lu_env *env, struct obd_export *exp,
- struct lu_fid *fid, struct md_op_data *op_data);
-struct obd_client_handle;
-
-int mdc_set_open_replay_data(struct obd_export *exp,
- struct obd_client_handle *och,
- struct lookup_intent *it);
-
-void mdc_commit_open(struct ptlrpc_request *req);
-void mdc_replay_open(struct ptlrpc_request *req);
-
-int mdc_create(struct obd_export *exp, struct md_op_data *op_data,
- const void *data, size_t datalen, umode_t mode, uid_t uid,
- gid_t gid, kernel_cap_t capability, __u64 rdev,
- struct ptlrpc_request **request);
-int mdc_link(struct obd_export *exp, struct md_op_data *op_data,
- struct ptlrpc_request **request);
-int mdc_rename(struct obd_export *exp, struct md_op_data *op_data,
- const char *old, size_t oldlen,
- const char *new, size_t newlen,
- struct ptlrpc_request **request);
-int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
- void *ea, size_t ealen, struct ptlrpc_request **request);
-int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
- struct ptlrpc_request **request);
-int mdc_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
- union ldlm_policy_data *policy, enum ldlm_mode mode,
- enum ldlm_cancel_flags flags, void *opaque);
-
-int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
- struct lu_fid *fid, __u64 *bits);
-
-int mdc_intent_getattr_async(struct obd_export *exp,
- struct md_enqueue_info *minfo);
-
-enum ldlm_mode mdc_lock_match(struct obd_export *exp, __u64 flags,
- const struct lu_fid *fid, enum ldlm_type type,
- union ldlm_policy_data *policy,
- enum ldlm_mode mode,
- struct lustre_handle *lockh);
-
-static inline int mdc_prep_elc_req(struct obd_export *exp,
- struct ptlrpc_request *req, int opc,
- struct list_head *cancels, int count)
-{
- return ldlm_prep_elc_req(exp, req, LUSTRE_MDS_VERSION, opc, 0, cancels,
- count);
-}
-
-static inline unsigned long hash_x_index(__u64 hash, int hash64)
-{
- if (BITS_PER_LONG == 32 && hash64)
- hash >>= 32;
- /* save hash 0 with hash 1 */
- return ~0UL - (hash + !hash);
-}
-
-#endif
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_lib.c b/drivers/staging/lustre/lustre/mdc/mdc_lib.c
deleted file mode 100644
index d582968987ff..000000000000
--- a/drivers/staging/lustre/lustre/mdc/mdc_lib.c
+++ /dev/null
@@ -1,498 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_MDC
-#include <lustre_net.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-#include "mdc_internal.h"
-
-static void set_mrc_cr_flags(struct mdt_rec_create *mrc, u64 flags)
-{
- mrc->cr_flags_l = (u32)(flags & 0xFFFFFFFFUll);
- mrc->cr_flags_h = (u32)(flags >> 32);
-}
-
-static void __mdc_pack_body(struct mdt_body *b, __u32 suppgid)
-{
- b->mbo_suppgid = suppgid;
- b->mbo_uid = from_kuid(&init_user_ns, current_uid());
- b->mbo_gid = from_kgid(&init_user_ns, current_gid());
- b->mbo_fsuid = from_kuid(&init_user_ns, current_fsuid());
- b->mbo_fsgid = from_kgid(&init_user_ns, current_fsgid());
- b->mbo_capability = current_cap().cap[0];
-}
-
-void mdc_swap_layouts_pack(struct ptlrpc_request *req,
- struct md_op_data *op_data)
-{
- struct mdt_body *b = req_capsule_client_get(&req->rq_pill,
- &RMF_MDT_BODY);
-
- __mdc_pack_body(b, op_data->op_suppgids[0]);
- b->mbo_fid1 = op_data->op_fid1;
- b->mbo_fid2 = op_data->op_fid2;
- b->mbo_valid |= OBD_MD_FLID;
-}
-
-void mdc_pack_body(struct ptlrpc_request *req, const struct lu_fid *fid,
- __u64 valid, size_t ea_size, __u32 suppgid, u32 flags)
-{
- struct mdt_body *b = req_capsule_client_get(&req->rq_pill,
- &RMF_MDT_BODY);
- b->mbo_valid = valid;
- b->mbo_eadatasize = ea_size;
- b->mbo_flags = flags;
- __mdc_pack_body(b, suppgid);
- if (fid) {
- b->mbo_fid1 = *fid;
- b->mbo_valid |= OBD_MD_FLID;
- }
-}
-
-/**
- * Pack a name (path component) into a request
- *
- * \param[in] req request
- * \param[in] field request field (usually RMF_NAME)
- * \param[in] name path component
- * \param[in] name_len length of path component
- *
- * \a field must be present in \a req and of size \a name_len + 1.
- *
- * \a name must be '\0' terminated of length \a name_len and represent
- * a single path component (not contain '/').
- */
-static void mdc_pack_name(struct ptlrpc_request *req,
- const struct req_msg_field *field,
- const char *name, size_t name_len)
-{
- size_t buf_size;
- size_t cpy_len;
- char *buf;
-
- buf = req_capsule_client_get(&req->rq_pill, field);
- buf_size = req_capsule_get_size(&req->rq_pill, field, RCL_CLIENT);
-
- LASSERT(name && name_len && buf && buf_size == name_len + 1);
-
- cpy_len = strlcpy(buf, name, buf_size);
-
- LASSERT(cpy_len == name_len && lu_name_is_valid_2(buf, cpy_len));
-}
-
-void mdc_readdir_pack(struct ptlrpc_request *req, __u64 pgoff, size_t size,
- const struct lu_fid *fid)
-{
- struct mdt_body *b = req_capsule_client_get(&req->rq_pill,
- &RMF_MDT_BODY);
- b->mbo_fid1 = *fid;
- b->mbo_valid |= OBD_MD_FLID;
- b->mbo_size = pgoff; /* !! */
- b->mbo_nlink = size; /* !! */
- __mdc_pack_body(b, -1);
- b->mbo_mode = LUDA_FID | LUDA_TYPE;
-}
-
-/* packing of MDS records */
-void mdc_create_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
- const void *data, size_t datalen, umode_t mode,
- uid_t uid, gid_t gid, kernel_cap_t cap_effective,
- __u64 rdev)
-{
- struct mdt_rec_create *rec;
- char *tmp;
- __u64 flags;
-
- BUILD_BUG_ON(sizeof(struct mdt_rec_reint) != sizeof(struct mdt_rec_create));
- rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
-
- rec->cr_opcode = REINT_CREATE;
- rec->cr_fsuid = uid;
- rec->cr_fsgid = gid;
- rec->cr_cap = cap_effective.cap[0];
- rec->cr_fid1 = op_data->op_fid1;
- rec->cr_fid2 = op_data->op_fid2;
- rec->cr_mode = mode;
- rec->cr_rdev = rdev;
- rec->cr_time = op_data->op_mod_time;
- rec->cr_suppgid1 = op_data->op_suppgids[0];
- rec->cr_suppgid2 = op_data->op_suppgids[1];
- flags = 0;
- if (op_data->op_bias & MDS_CREATE_VOLATILE)
- flags |= MDS_OPEN_VOLATILE;
- set_mrc_cr_flags(rec, flags);
- rec->cr_bias = op_data->op_bias;
- rec->cr_umask = current_umask();
-
- mdc_pack_name(req, &RMF_NAME, op_data->op_name, op_data->op_namelen);
- if (data) {
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_EADATA);
- memcpy(tmp, data, datalen);
- }
-}
-
-static inline __u64 mds_pack_open_flags(__u64 flags)
-{
- __u64 cr_flags = (flags & (FMODE_READ | FMODE_WRITE |
- MDS_OPEN_FL_INTERNAL));
- if (flags & O_CREAT)
- cr_flags |= MDS_OPEN_CREAT;
- if (flags & O_EXCL)
- cr_flags |= MDS_OPEN_EXCL;
- if (flags & O_TRUNC)
- cr_flags |= MDS_OPEN_TRUNC;
- if (flags & O_APPEND)
- cr_flags |= MDS_OPEN_APPEND;
- if (flags & O_SYNC)
- cr_flags |= MDS_OPEN_SYNC;
- if (flags & O_DIRECTORY)
- cr_flags |= MDS_OPEN_DIRECTORY;
- if (flags & __FMODE_EXEC)
- cr_flags |= MDS_FMODE_EXEC;
- if (cl_is_lov_delay_create(flags))
- cr_flags |= MDS_OPEN_DELAY_CREATE;
-
- if (flags & O_NONBLOCK)
- cr_flags |= MDS_OPEN_NORESTORE;
-
- return cr_flags;
-}
-
-/* packing of MDS records */
-void mdc_open_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
- umode_t mode, __u64 rdev, __u64 flags, const void *lmm,
- size_t lmmlen)
-{
- struct mdt_rec_create *rec;
- char *tmp;
- __u64 cr_flags;
-
- BUILD_BUG_ON(sizeof(struct mdt_rec_reint) != sizeof(struct mdt_rec_create));
- rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
-
- /* XXX do something about time, uid, gid */
- rec->cr_opcode = REINT_OPEN;
- rec->cr_fsuid = from_kuid(&init_user_ns, current_fsuid());
- rec->cr_fsgid = from_kgid(&init_user_ns, current_fsgid());
- rec->cr_cap = current_cap().cap[0];
- rec->cr_fid1 = op_data->op_fid1;
- rec->cr_fid2 = op_data->op_fid2;
-
- rec->cr_mode = mode;
- cr_flags = mds_pack_open_flags(flags);
- rec->cr_rdev = rdev;
- rec->cr_time = op_data->op_mod_time;
- rec->cr_suppgid1 = op_data->op_suppgids[0];
- rec->cr_suppgid2 = op_data->op_suppgids[1];
- rec->cr_bias = op_data->op_bias;
- rec->cr_umask = current_umask();
- rec->cr_old_handle = op_data->op_handle;
-
- if (op_data->op_name) {
- mdc_pack_name(req, &RMF_NAME, op_data->op_name,
- op_data->op_namelen);
-
- if (op_data->op_bias & MDS_CREATE_VOLATILE)
- cr_flags |= MDS_OPEN_VOLATILE;
- }
-
- if (lmm) {
- cr_flags |= MDS_OPEN_HAS_EA;
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_EADATA);
- memcpy(tmp, lmm, lmmlen);
- }
- set_mrc_cr_flags(rec, cr_flags);
-}
-
-static inline __u64 attr_pack(unsigned int ia_valid)
-{
- __u64 sa_valid = 0;
-
- if (ia_valid & ATTR_MODE)
- sa_valid |= MDS_ATTR_MODE;
- if (ia_valid & ATTR_UID)
- sa_valid |= MDS_ATTR_UID;
- if (ia_valid & ATTR_GID)
- sa_valid |= MDS_ATTR_GID;
- if (ia_valid & ATTR_SIZE)
- sa_valid |= MDS_ATTR_SIZE;
- if (ia_valid & ATTR_ATIME)
- sa_valid |= MDS_ATTR_ATIME;
- if (ia_valid & ATTR_MTIME)
- sa_valid |= MDS_ATTR_MTIME;
- if (ia_valid & ATTR_CTIME)
- sa_valid |= MDS_ATTR_CTIME;
- if (ia_valid & ATTR_ATIME_SET)
- sa_valid |= MDS_ATTR_ATIME_SET;
- if (ia_valid & ATTR_MTIME_SET)
- sa_valid |= MDS_ATTR_MTIME_SET;
- if (ia_valid & ATTR_FORCE)
- sa_valid |= MDS_ATTR_FORCE;
- if (ia_valid & ATTR_ATTR_FLAG)
- sa_valid |= MDS_ATTR_ATTR_FLAG;
- if (ia_valid & ATTR_KILL_SUID)
- sa_valid |= MDS_ATTR_KILL_SUID;
- if (ia_valid & ATTR_KILL_SGID)
- sa_valid |= MDS_ATTR_KILL_SGID;
- if (ia_valid & ATTR_CTIME_SET)
- sa_valid |= MDS_ATTR_CTIME_SET;
- if (ia_valid & ATTR_OPEN)
- sa_valid |= MDS_ATTR_FROM_OPEN;
- if (ia_valid & ATTR_BLOCKS)
- sa_valid |= MDS_ATTR_BLOCKS;
- if (ia_valid & MDS_OPEN_OWNEROVERRIDE)
- /* NFSD hack (see bug 5781) */
- sa_valid |= MDS_OPEN_OWNEROVERRIDE;
- return sa_valid;
-}
-
-static void mdc_setattr_pack_rec(struct mdt_rec_setattr *rec,
- struct md_op_data *op_data)
-{
- rec->sa_opcode = REINT_SETATTR;
- rec->sa_fsuid = from_kuid(&init_user_ns, current_fsuid());
- rec->sa_fsgid = from_kgid(&init_user_ns, current_fsgid());
- rec->sa_cap = current_cap().cap[0];
- rec->sa_suppgid = -1;
-
- rec->sa_fid = op_data->op_fid1;
- rec->sa_valid = attr_pack(op_data->op_attr.ia_valid);
- rec->sa_mode = op_data->op_attr.ia_mode;
- rec->sa_uid = from_kuid(&init_user_ns, op_data->op_attr.ia_uid);
- rec->sa_gid = from_kgid(&init_user_ns, op_data->op_attr.ia_gid);
- rec->sa_size = op_data->op_attr.ia_size;
- rec->sa_blocks = op_data->op_attr_blocks;
- rec->sa_atime = LTIME_S(op_data->op_attr.ia_atime);
- rec->sa_mtime = LTIME_S(op_data->op_attr.ia_mtime);
- rec->sa_ctime = LTIME_S(op_data->op_attr.ia_ctime);
- rec->sa_attr_flags = op_data->op_attr_flags;
- if ((op_data->op_attr.ia_valid & ATTR_GID) &&
- in_group_p(op_data->op_attr.ia_gid))
- rec->sa_suppgid =
- from_kgid(&init_user_ns, op_data->op_attr.ia_gid);
- else
- rec->sa_suppgid = op_data->op_suppgids[0];
-
- rec->sa_bias = op_data->op_bias;
-}
-
-static void mdc_ioepoch_pack(struct mdt_ioepoch *epoch,
- struct md_op_data *op_data)
-{
- epoch->mio_handle = op_data->op_handle;
- epoch->mio_unused1 = 0;
- epoch->mio_unused2 = 0;
- epoch->mio_padding = 0;
-}
-
-void mdc_setattr_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
- void *ea, size_t ealen)
-{
- struct mdt_rec_setattr *rec;
- struct lov_user_md *lum = NULL;
-
- BUILD_BUG_ON(sizeof(struct mdt_rec_reint) !=
- sizeof(struct mdt_rec_setattr));
- rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
- mdc_setattr_pack_rec(rec, op_data);
-
- if (ealen == 0)
- return;
-
- lum = req_capsule_client_get(&req->rq_pill, &RMF_EADATA);
- if (!ea) { /* Remove LOV EA */
- lum->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V1);
- lum->lmm_stripe_size = 0;
- lum->lmm_stripe_count = 0;
- lum->lmm_stripe_offset = (typeof(lum->lmm_stripe_offset))(-1);
- } else {
- memcpy(lum, ea, ealen);
- }
-}
-
-void mdc_unlink_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
-{
- struct mdt_rec_unlink *rec;
-
- BUILD_BUG_ON(sizeof(struct mdt_rec_reint) != sizeof(struct mdt_rec_unlink));
- rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
-
- rec->ul_opcode = op_data->op_cli_flags & CLI_RM_ENTRY ?
- REINT_RMENTRY : REINT_UNLINK;
- rec->ul_fsuid = op_data->op_fsuid;
- rec->ul_fsgid = op_data->op_fsgid;
- rec->ul_cap = op_data->op_cap.cap[0];
- rec->ul_mode = op_data->op_mode;
- rec->ul_suppgid1 = op_data->op_suppgids[0];
- rec->ul_suppgid2 = -1;
- rec->ul_fid1 = op_data->op_fid1;
- rec->ul_fid2 = op_data->op_fid2;
- rec->ul_time = op_data->op_mod_time;
- rec->ul_bias = op_data->op_bias;
-
- mdc_pack_name(req, &RMF_NAME, op_data->op_name, op_data->op_namelen);
-}
-
-void mdc_link_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
-{
- struct mdt_rec_link *rec;
-
- BUILD_BUG_ON(sizeof(struct mdt_rec_reint) != sizeof(struct mdt_rec_link));
- rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
-
- rec->lk_opcode = REINT_LINK;
- rec->lk_fsuid = op_data->op_fsuid; /* current->fsuid; */
- rec->lk_fsgid = op_data->op_fsgid; /* current->fsgid; */
- rec->lk_cap = op_data->op_cap.cap[0]; /* current->cap_effective; */
- rec->lk_suppgid1 = op_data->op_suppgids[0];
- rec->lk_suppgid2 = op_data->op_suppgids[1];
- rec->lk_fid1 = op_data->op_fid1;
- rec->lk_fid2 = op_data->op_fid2;
- rec->lk_time = op_data->op_mod_time;
- rec->lk_bias = op_data->op_bias;
-
- mdc_pack_name(req, &RMF_NAME, op_data->op_name, op_data->op_namelen);
-}
-
-static void mdc_intent_close_pack(struct ptlrpc_request *req,
- struct md_op_data *op_data)
-{
- enum mds_op_bias bias = op_data->op_bias;
- struct close_data *data;
- struct ldlm_lock *lock;
-
- if (!(bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP |
- MDS_RENAME_MIGRATE)))
- return;
-
- data = req_capsule_client_get(&req->rq_pill, &RMF_CLOSE_DATA);
- LASSERT(data);
-
- lock = ldlm_handle2lock(&op_data->op_lease_handle);
- if (lock) {
- data->cd_handle = lock->l_remote_handle;
- LDLM_LOCK_PUT(lock);
- }
- ldlm_cli_cancel(&op_data->op_lease_handle, LCF_LOCAL);
-
- data->cd_data_version = op_data->op_data_version;
- data->cd_fid = op_data->op_fid2;
-}
-
-void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
- const char *old, size_t oldlen,
- const char *new, size_t newlen)
-{
- struct mdt_rec_rename *rec;
-
- BUILD_BUG_ON(sizeof(struct mdt_rec_reint) != sizeof(struct mdt_rec_rename));
- rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
-
- /* XXX do something about time, uid, gid */
- rec->rn_opcode = op_data->op_cli_flags & CLI_MIGRATE ?
- REINT_MIGRATE : REINT_RENAME;
- rec->rn_opcode = REINT_RENAME;
- rec->rn_fsuid = op_data->op_fsuid;
- rec->rn_fsgid = op_data->op_fsgid;
- rec->rn_cap = op_data->op_cap.cap[0];
- rec->rn_suppgid1 = op_data->op_suppgids[0];
- rec->rn_suppgid2 = op_data->op_suppgids[1];
- rec->rn_fid1 = op_data->op_fid1;
- rec->rn_fid2 = op_data->op_fid2;
- rec->rn_time = op_data->op_mod_time;
- rec->rn_mode = op_data->op_mode;
- rec->rn_bias = op_data->op_bias;
-
- mdc_pack_name(req, &RMF_NAME, old, oldlen);
-
- if (new)
- mdc_pack_name(req, &RMF_SYMTGT, new, newlen);
-
- if (op_data->op_cli_flags & CLI_MIGRATE &&
- op_data->op_bias & MDS_RENAME_MIGRATE) {
- struct mdt_ioepoch *epoch;
-
- mdc_intent_close_pack(req, op_data);
- epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH);
- mdc_ioepoch_pack(epoch, op_data);
- }
-}
-
-void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, u32 flags,
- struct md_op_data *op_data, size_t ea_size)
-{
- struct mdt_body *b = req_capsule_client_get(&req->rq_pill,
- &RMF_MDT_BODY);
-
- b->mbo_valid = valid;
- if (op_data->op_bias & MDS_CHECK_SPLIT)
- b->mbo_valid |= OBD_MD_FLCKSPLIT;
- if (op_data->op_bias & MDS_CROSS_REF)
- b->mbo_valid |= OBD_MD_FLCROSSREF;
- b->mbo_eadatasize = ea_size;
- b->mbo_flags = flags;
- __mdc_pack_body(b, op_data->op_suppgids[0]);
-
- b->mbo_fid1 = op_data->op_fid1;
- b->mbo_fid2 = op_data->op_fid2;
- b->mbo_valid |= OBD_MD_FLID;
-
- if (op_data->op_name)
- mdc_pack_name(req, &RMF_NAME, op_data->op_name,
- op_data->op_namelen);
-}
-
-void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
-{
- struct mdt_ioepoch *epoch;
- struct mdt_rec_setattr *rec;
-
- epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH);
- rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
-
- mdc_setattr_pack_rec(rec, op_data);
- /*
- * The client will zero out local timestamps when losing the IBITS lock
- * so any new RPC timestamps will update the client inode's timestamps.
- * There was a defect on the server side which allowed the atime to be
- * overwritten by a zeroed-out atime packed into the close RPC.
- *
- * Proactively clear the MDS_ATTR_ATIME flag in the RPC in this case
- * to avoid zeroing the atime on old unpatched servers. See LU-8041.
- */
- if (rec->sa_atime == 0)
- rec->sa_valid &= ~MDS_ATTR_ATIME;
-
- mdc_ioepoch_pack(epoch, op_data);
- mdc_intent_close_pack(req, op_data);
-}
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_locks.c b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
deleted file mode 100644
index a8aa0fa5e87a..000000000000
--- a/drivers/staging/lustre/lustre/mdc/mdc_locks.c
+++ /dev/null
@@ -1,1239 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_MDC
-
-#include <linux/module.h>
-
-#include <lustre_intent.h>
-#include <obd.h>
-#include <obd_class.h>
-#include <lustre_dlm.h>
-#include <lustre_fid.h>
-#include <lustre_mdc.h>
-#include <lustre_net.h>
-#include <lustre_req_layout.h>
-#include <lustre_swab.h>
-
-#include "mdc_internal.h"
-
-struct mdc_getattr_args {
- struct obd_export *ga_exp;
- struct md_enqueue_info *ga_minfo;
-};
-
-int it_open_error(int phase, struct lookup_intent *it)
-{
- if (it_disposition(it, DISP_OPEN_LEASE)) {
- if (phase >= DISP_OPEN_LEASE)
- return it->it_status;
- else
- return 0;
- }
- if (it_disposition(it, DISP_OPEN_OPEN)) {
- if (phase >= DISP_OPEN_OPEN)
- return it->it_status;
- else
- return 0;
- }
-
- if (it_disposition(it, DISP_OPEN_CREATE)) {
- if (phase >= DISP_OPEN_CREATE)
- return it->it_status;
- else
- return 0;
- }
-
- if (it_disposition(it, DISP_LOOKUP_EXECD)) {
- if (phase >= DISP_LOOKUP_EXECD)
- return it->it_status;
- else
- return 0;
- }
-
- if (it_disposition(it, DISP_IT_EXECD)) {
- if (phase >= DISP_IT_EXECD)
- return it->it_status;
- else
- return 0;
- }
- CERROR("it disp: %X, status: %d\n", it->it_disposition,
- it->it_status);
- LBUG();
- return 0;
-}
-EXPORT_SYMBOL(it_open_error);
-
-/* this must be called on a lockh that is known to have a referenced lock */
-int mdc_set_lock_data(struct obd_export *exp, const struct lustre_handle *lockh,
- void *data, __u64 *bits)
-{
- struct ldlm_lock *lock;
- struct inode *new_inode = data;
-
- if (bits)
- *bits = 0;
-
- if (!lustre_handle_is_used(lockh))
- return 0;
-
- lock = ldlm_handle2lock(lockh);
-
- LASSERT(lock);
- lock_res_and_lock(lock);
- if (lock->l_resource->lr_lvb_inode &&
- lock->l_resource->lr_lvb_inode != data) {
- struct inode *old_inode = lock->l_resource->lr_lvb_inode;
-
- LASSERTF(old_inode->i_state & I_FREEING,
- "Found existing inode %p/%lu/%u state %lu in lock: setting data to %p/%lu/%u\n",
- old_inode, old_inode->i_ino, old_inode->i_generation,
- old_inode->i_state, new_inode, new_inode->i_ino,
- new_inode->i_generation);
- }
- lock->l_resource->lr_lvb_inode = new_inode;
- if (bits)
- *bits = lock->l_policy_data.l_inodebits.bits;
-
- unlock_res_and_lock(lock);
- LDLM_LOCK_PUT(lock);
-
- return 0;
-}
-
-enum ldlm_mode mdc_lock_match(struct obd_export *exp, __u64 flags,
- const struct lu_fid *fid, enum ldlm_type type,
- union ldlm_policy_data *policy,
- enum ldlm_mode mode,
- struct lustre_handle *lockh)
-{
- struct ldlm_res_id res_id;
- enum ldlm_mode rc;
-
- fid_build_reg_res_name(fid, &res_id);
- /* LU-4405: Clear bits not supported by server */
- policy->l_inodebits.bits &= exp_connect_ibits(exp);
- rc = ldlm_lock_match(class_exp2obd(exp)->obd_namespace, flags,
- &res_id, type, policy, mode, lockh, 0);
- return rc;
-}
-
-int mdc_cancel_unused(struct obd_export *exp,
- const struct lu_fid *fid,
- union ldlm_policy_data *policy,
- enum ldlm_mode mode,
- enum ldlm_cancel_flags flags,
- void *opaque)
-{
- struct ldlm_res_id res_id;
- struct obd_device *obd = class_exp2obd(exp);
- int rc;
-
- fid_build_reg_res_name(fid, &res_id);
- rc = ldlm_cli_cancel_unused_resource(obd->obd_namespace, &res_id,
- policy, mode, flags, opaque);
- return rc;
-}
-
-int mdc_null_inode(struct obd_export *exp,
- const struct lu_fid *fid)
-{
- struct ldlm_res_id res_id;
- struct ldlm_resource *res;
- struct ldlm_namespace *ns = class_exp2obd(exp)->obd_namespace;
-
- LASSERTF(ns, "no namespace passed\n");
-
- fid_build_reg_res_name(fid, &res_id);
-
- res = ldlm_resource_get(ns, NULL, &res_id, 0, 0);
- if (IS_ERR(res))
- return 0;
-
- lock_res(res);
- res->lr_lvb_inode = NULL;
- unlock_res(res);
-
- ldlm_resource_putref(res);
- return 0;
-}
-
-static inline void mdc_clear_replay_flag(struct ptlrpc_request *req, int rc)
-{
- /* Don't hold error requests for replay. */
- if (req->rq_replay) {
- spin_lock(&req->rq_lock);
- req->rq_replay = 0;
- spin_unlock(&req->rq_lock);
- }
- if (rc && req->rq_transno != 0) {
- DEBUG_REQ(D_ERROR, req, "transno returned on error rc %d", rc);
- LBUG();
- }
-}
-
-/* Save a large LOV EA into the request buffer so that it is available
- * for replay. We don't do this in the initial request because the
- * original request doesn't need this buffer (at most it sends just the
- * lov_mds_md) and it is a waste of RAM/bandwidth to send the empty
- * buffer and may also be difficult to allocate and save a very large
- * request buffer for each open. (bug 5707)
- *
- * OOM here may cause recovery failure if lmm is needed (only for the
- * original open if the MDS crashed just when this client also OOM'd)
- * but this is incredibly unlikely, and questionable whether the client
- * could do MDS recovery under OOM anyways...
- */
-static void mdc_realloc_openmsg(struct ptlrpc_request *req,
- struct mdt_body *body)
-{
- int rc;
-
- /* FIXME: remove this explicit offset. */
- rc = sptlrpc_cli_enlarge_reqbuf(req, DLM_INTENT_REC_OFF + 4,
- body->mbo_eadatasize);
- if (rc) {
- CERROR("Can't enlarge segment %d size to %d\n",
- DLM_INTENT_REC_OFF + 4, body->mbo_eadatasize);
- body->mbo_valid &= ~OBD_MD_FLEASIZE;
- body->mbo_eadatasize = 0;
- }
-}
-
-static struct ptlrpc_request *
-mdc_intent_open_pack(struct obd_export *exp, struct lookup_intent *it,
- struct md_op_data *op_data)
-{
- struct ptlrpc_request *req;
- struct obd_device *obddev = class_exp2obd(exp);
- struct ldlm_intent *lit;
- const void *lmm = op_data->op_data;
- u32 lmmsize = op_data->op_data_size;
- LIST_HEAD(cancels);
- int count = 0;
- int mode;
- int rc;
-
- it->it_create_mode = (it->it_create_mode & ~S_IFMT) | S_IFREG;
-
- /* XXX: openlock is not cancelled for cross-refs. */
- /* If inode is known, cancel conflicting OPEN locks. */
- if (fid_is_sane(&op_data->op_fid2)) {
- if (it->it_flags & MDS_OPEN_LEASE) { /* try to get lease */
- if (it->it_flags & FMODE_WRITE)
- mode = LCK_EX;
- else
- mode = LCK_PR;
- } else {
- if (it->it_flags & (FMODE_WRITE | MDS_OPEN_TRUNC))
- mode = LCK_CW;
- else if (it->it_flags & __FMODE_EXEC)
- mode = LCK_PR;
- else
- mode = LCK_CR;
- }
- count = mdc_resource_get_unused(exp, &op_data->op_fid2,
- &cancels, mode,
- MDS_INODELOCK_OPEN);
- }
-
- /* If CREATE, cancel parent's UPDATE lock. */
- if (it->it_op & IT_CREAT)
- mode = LCK_EX;
- else
- mode = LCK_CR;
- count += mdc_resource_get_unused(exp, &op_data->op_fid1,
- &cancels, mode,
- MDS_INODELOCK_UPDATE);
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- &RQF_LDLM_INTENT_OPEN);
- if (!req) {
- ldlm_lock_list_put(&cancels, l_bl_ast, count);
- return ERR_PTR(-ENOMEM);
- }
-
- req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
- op_data->op_namelen + 1);
- req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT,
- max(lmmsize, obddev->u.cli.cl_default_mds_easize));
-
- rc = ldlm_prep_enqueue_req(exp, req, &cancels, count);
- if (rc < 0) {
- ptlrpc_request_free(req);
- return ERR_PTR(rc);
- }
-
- spin_lock(&req->rq_lock);
- req->rq_replay = req->rq_import->imp_replayable;
- spin_unlock(&req->rq_lock);
-
- /* pack the intent */
- lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT);
- lit->opc = (__u64)it->it_op;
-
- /* pack the intended request */
- mdc_open_pack(req, op_data, it->it_create_mode, 0, it->it_flags, lmm,
- lmmsize);
-
- req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
- obddev->u.cli.cl_max_mds_easize);
- req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
- req->rq_import->imp_connect_data.ocd_max_easize);
-
- ptlrpc_request_set_replen(req);
- return req;
-}
-
-#define GA_DEFAULT_EA_NAME_LEN 20
-#define GA_DEFAULT_EA_VAL_LEN 250
-#define GA_DEFAULT_EA_NUM 10
-
-static struct ptlrpc_request *
-mdc_intent_getxattr_pack(struct obd_export *exp,
- struct lookup_intent *it,
- struct md_op_data *op_data)
-{
- struct ptlrpc_request *req;
- struct ldlm_intent *lit;
- int rc, count = 0;
- LIST_HEAD(cancels);
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- &RQF_LDLM_INTENT_GETXATTR);
- if (!req)
- return ERR_PTR(-ENOMEM);
-
- rc = ldlm_prep_enqueue_req(exp, req, &cancels, count);
- if (rc) {
- ptlrpc_request_free(req);
- return ERR_PTR(rc);
- }
-
- /* pack the intent */
- lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT);
- lit->opc = IT_GETXATTR;
-
- /* pack the intended request */
- mdc_pack_body(req, &op_data->op_fid1, op_data->op_valid,
- GA_DEFAULT_EA_NAME_LEN * GA_DEFAULT_EA_NUM, -1, 0);
-
- req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_SERVER,
- GA_DEFAULT_EA_NAME_LEN * GA_DEFAULT_EA_NUM);
-
- req_capsule_set_size(&req->rq_pill, &RMF_EAVALS, RCL_SERVER,
- GA_DEFAULT_EA_NAME_LEN * GA_DEFAULT_EA_NUM);
-
- req_capsule_set_size(&req->rq_pill, &RMF_EAVALS_LENS, RCL_SERVER,
- sizeof(u32) * GA_DEFAULT_EA_NUM);
-
- req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, 0);
-
- ptlrpc_request_set_replen(req);
-
- return req;
-}
-
-static struct ptlrpc_request *mdc_intent_unlink_pack(struct obd_export *exp,
- struct lookup_intent *it,
- struct md_op_data *op_data)
-{
- struct ptlrpc_request *req;
- struct obd_device *obddev = class_exp2obd(exp);
- struct ldlm_intent *lit;
- int rc;
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- &RQF_LDLM_INTENT_UNLINK);
- if (!req)
- return ERR_PTR(-ENOMEM);
-
- req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
- op_data->op_namelen + 1);
-
- rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
- if (rc) {
- ptlrpc_request_free(req);
- return ERR_PTR(rc);
- }
-
- /* pack the intent */
- lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT);
- lit->opc = (__u64)it->it_op;
-
- /* pack the intended request */
- mdc_unlink_pack(req, op_data);
-
- req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
- obddev->u.cli.cl_default_mds_easize);
- ptlrpc_request_set_replen(req);
- return req;
-}
-
-static struct ptlrpc_request *mdc_intent_getattr_pack(struct obd_export *exp,
- struct lookup_intent *it,
- struct md_op_data *op_data)
-{
- struct ptlrpc_request *req;
- struct obd_device *obddev = class_exp2obd(exp);
- u64 valid = OBD_MD_FLGETATTR | OBD_MD_FLEASIZE |
- OBD_MD_FLMODEASIZE | OBD_MD_FLDIREA |
- OBD_MD_MEA | OBD_MD_FLACL;
- struct ldlm_intent *lit;
- int rc;
- u32 easize;
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- &RQF_LDLM_INTENT_GETATTR);
- if (!req)
- return ERR_PTR(-ENOMEM);
-
- req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
- op_data->op_namelen + 1);
-
- rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
- if (rc) {
- ptlrpc_request_free(req);
- return ERR_PTR(rc);
- }
-
- /* pack the intent */
- lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT);
- lit->opc = (__u64)it->it_op;
-
- if (obddev->u.cli.cl_default_mds_easize > 0)
- easize = obddev->u.cli.cl_default_mds_easize;
- else
- easize = obddev->u.cli.cl_max_mds_easize;
-
- /* pack the intended request */
- mdc_getattr_pack(req, valid, it->it_flags, op_data, easize);
-
- req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, easize);
- req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
- req->rq_import->imp_connect_data.ocd_max_easize);
- ptlrpc_request_set_replen(req);
- return req;
-}
-
-static struct ptlrpc_request *mdc_intent_layout_pack(struct obd_export *exp,
- struct lookup_intent *it,
- struct md_op_data *unused)
-{
- struct obd_device *obd = class_exp2obd(exp);
- struct ptlrpc_request *req;
- struct ldlm_intent *lit;
- struct layout_intent *layout;
- int rc;
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- &RQF_LDLM_INTENT_LAYOUT);
- if (!req)
- return ERR_PTR(-ENOMEM);
-
- req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT, 0);
- rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
- if (rc) {
- ptlrpc_request_free(req);
- return ERR_PTR(rc);
- }
-
- /* pack the intent */
- lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT);
- lit->opc = (__u64)it->it_op;
-
- /* pack the layout intent request */
- layout = req_capsule_client_get(&req->rq_pill, &RMF_LAYOUT_INTENT);
- /* LAYOUT_INTENT_ACCESS is generic, specific operation will be
- * set for replication
- */
- layout->li_opc = LAYOUT_INTENT_ACCESS;
-
- req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER,
- obd->u.cli.cl_default_mds_easize);
- ptlrpc_request_set_replen(req);
- return req;
-}
-
-static struct ptlrpc_request *
-mdc_enqueue_pack(struct obd_export *exp, int lvb_len)
-{
- struct ptlrpc_request *req;
- int rc;
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_LDLM_ENQUEUE);
- if (!req)
- return ERR_PTR(-ENOMEM);
-
- rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
- if (rc) {
- ptlrpc_request_free(req);
- return ERR_PTR(rc);
- }
-
- req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER, lvb_len);
- ptlrpc_request_set_replen(req);
- return req;
-}
-
-static int mdc_finish_enqueue(struct obd_export *exp,
- struct ptlrpc_request *req,
- struct ldlm_enqueue_info *einfo,
- struct lookup_intent *it,
- struct lustre_handle *lockh,
- int rc)
-{
- struct req_capsule *pill = &req->rq_pill;
- struct ldlm_request *lockreq;
- struct ldlm_reply *lockrep;
- struct ldlm_lock *lock;
- void *lvb_data = NULL;
- u32 lvb_len = 0;
-
- LASSERT(rc >= 0);
- /* Similarly, if we're going to replay this request, we don't want to
- * actually get a lock, just perform the intent.
- */
- if (req->rq_transno || req->rq_replay) {
- lockreq = req_capsule_client_get(pill, &RMF_DLM_REQ);
- lockreq->lock_flags |= ldlm_flags_to_wire(LDLM_FL_INTENT_ONLY);
- }
-
- if (rc == ELDLM_LOCK_ABORTED) {
- einfo->ei_mode = 0;
- memset(lockh, 0, sizeof(*lockh));
- rc = 0;
- } else { /* rc = 0 */
- lock = ldlm_handle2lock(lockh);
-
- /* If the server gave us back a different lock mode, we should
- * fix up our variables.
- */
- if (lock->l_req_mode != einfo->ei_mode) {
- ldlm_lock_addref(lockh, lock->l_req_mode);
- ldlm_lock_decref(lockh, einfo->ei_mode);
- einfo->ei_mode = lock->l_req_mode;
- }
- LDLM_LOCK_PUT(lock);
- }
-
- lockrep = req_capsule_server_get(pill, &RMF_DLM_REP);
-
- it->it_disposition = (int)lockrep->lock_policy_res1;
- it->it_status = (int)lockrep->lock_policy_res2;
- it->it_lock_mode = einfo->ei_mode;
- it->it_lock_handle = lockh->cookie;
- it->it_request = req;
-
- /* Technically speaking rq_transno must already be zero if
- * it_status is in error, so the check is a bit redundant
- */
- if ((!req->rq_transno || it->it_status < 0) && req->rq_replay)
- mdc_clear_replay_flag(req, it->it_status);
-
- /* If we're doing an IT_OPEN which did not result in an actual
- * successful open, then we need to remove the bit which saves
- * this request for unconditional replay.
- *
- * It's important that we do this first! Otherwise we might exit the
- * function without doing so, and try to replay a failed create
- * (bug 3440)
- */
- if (it->it_op & IT_OPEN && req->rq_replay &&
- (!it_disposition(it, DISP_OPEN_OPEN) || it->it_status != 0))
- mdc_clear_replay_flag(req, it->it_status);
-
- DEBUG_REQ(D_RPCTRACE, req, "op: %d disposition: %x, status: %d",
- it->it_op, it->it_disposition, it->it_status);
-
- /* We know what to expect, so we do any byte flipping required here */
- if (it_has_reply_body(it)) {
- struct mdt_body *body;
-
- body = req_capsule_server_get(pill, &RMF_MDT_BODY);
- if (!body) {
- CERROR("Can't swab mdt_body\n");
- return -EPROTO;
- }
-
- if (it_disposition(it, DISP_OPEN_OPEN) &&
- !it_open_error(DISP_OPEN_OPEN, it)) {
- /*
- * If this is a successful OPEN request, we need to set
- * replay handler and data early, so that if replay
- * happens immediately after swabbing below, new reply
- * is swabbed by that handler correctly.
- */
- mdc_set_open_replay_data(NULL, NULL, it);
- }
-
- if ((body->mbo_valid & (OBD_MD_FLDIREA | OBD_MD_FLEASIZE)) != 0) {
- void *eadata;
-
- mdc_update_max_ea_from_body(exp, body);
-
- /*
- * The eadata is opaque; just check that it is there.
- * Eventually, obd_unpackmd() will check the contents.
- */
- eadata = req_capsule_server_sized_get(pill, &RMF_MDT_MD,
- body->mbo_eadatasize);
- if (!eadata)
- return -EPROTO;
-
- /* save lvb data and length in case this is for layout
- * lock
- */
- lvb_data = eadata;
- lvb_len = body->mbo_eadatasize;
-
- /*
- * We save the reply LOV EA in case we have to replay a
- * create for recovery. If we didn't allocate a large
- * enough request buffer above we need to reallocate it
- * here to hold the actual LOV EA.
- *
- * To not save LOV EA if request is not going to replay
- * (for example error one).
- */
- if ((it->it_op & IT_OPEN) && req->rq_replay) {
- void *lmm;
-
- if (req_capsule_get_size(pill, &RMF_EADATA,
- RCL_CLIENT) <
- body->mbo_eadatasize)
- mdc_realloc_openmsg(req, body);
- else
- req_capsule_shrink(pill, &RMF_EADATA,
- body->mbo_eadatasize,
- RCL_CLIENT);
-
- req_capsule_set_size(pill, &RMF_EADATA,
- RCL_CLIENT,
- body->mbo_eadatasize);
-
- lmm = req_capsule_client_get(pill, &RMF_EADATA);
- if (lmm)
- memcpy(lmm, eadata, body->mbo_eadatasize);
- }
- }
- } else if (it->it_op & IT_LAYOUT) {
- /* maybe the lock was granted right away and layout
- * is packed into RMF_DLM_LVB of req
- */
- lvb_len = req_capsule_get_size(pill, &RMF_DLM_LVB, RCL_SERVER);
- if (lvb_len > 0) {
- lvb_data = req_capsule_server_sized_get(pill,
- &RMF_DLM_LVB,
- lvb_len);
- if (!lvb_data)
- return -EPROTO;
- }
- }
-
- /* fill in stripe data for layout lock */
- lock = ldlm_handle2lock(lockh);
- if (lock && ldlm_has_layout(lock) && lvb_data) {
- void *lmm;
-
- LDLM_DEBUG(lock, "layout lock returned by: %s, lvb_len: %d",
- ldlm_it2str(it->it_op), lvb_len);
-
- lmm = kvzalloc(lvb_len, GFP_NOFS);
- if (!lmm) {
- LDLM_LOCK_PUT(lock);
- return -ENOMEM;
- }
- memcpy(lmm, lvb_data, lvb_len);
-
- /* install lvb_data */
- lock_res_and_lock(lock);
- if (!lock->l_lvb_data) {
- lock->l_lvb_type = LVB_T_LAYOUT;
- lock->l_lvb_data = lmm;
- lock->l_lvb_len = lvb_len;
- lmm = NULL;
- }
- unlock_res_and_lock(lock);
- if (lmm)
- kvfree(lmm);
- }
- if (lock)
- LDLM_LOCK_PUT(lock);
-
- return rc;
-}
-
-/* We always reserve enough space in the reply packet for a stripe MD, because
- * we don't know in advance the file type.
- */
-int mdc_enqueue_base(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
- const union ldlm_policy_data *policy,
- struct lookup_intent *it, struct md_op_data *op_data,
- struct lustre_handle *lockh, u64 extra_lock_flags)
-{
- static const union ldlm_policy_data lookup_policy = {
- .l_inodebits = { MDS_INODELOCK_LOOKUP }
- };
- static const union ldlm_policy_data update_policy = {
- .l_inodebits = { MDS_INODELOCK_UPDATE }
- };
- static const union ldlm_policy_data layout_policy = {
- .l_inodebits = { MDS_INODELOCK_LAYOUT }
- };
- static const union ldlm_policy_data getxattr_policy = {
- .l_inodebits = { MDS_INODELOCK_XATTR }
- };
- struct obd_device *obddev = class_exp2obd(exp);
- struct ptlrpc_request *req = NULL;
- u64 flags, saved_flags = extra_lock_flags;
- struct ldlm_res_id res_id;
- int generation, resends = 0;
- struct ldlm_reply *lockrep;
- enum lvb_type lvb_type = LVB_T_NONE;
- int rc;
-
- LASSERTF(!it || einfo->ei_type == LDLM_IBITS, "lock type %d\n",
- einfo->ei_type);
- fid_build_reg_res_name(&op_data->op_fid1, &res_id);
-
- if (it) {
- LASSERT(!policy);
-
- saved_flags |= LDLM_FL_HAS_INTENT;
- if (it->it_op & (IT_UNLINK | IT_GETATTR | IT_READDIR))
- policy = &update_policy;
- else if (it->it_op & IT_LAYOUT)
- policy = &layout_policy;
- else if (it->it_op & (IT_GETXATTR | IT_SETXATTR))
- policy = &getxattr_policy;
- else
- policy = &lookup_policy;
- }
-
- generation = obddev->u.cli.cl_import->imp_generation;
-resend:
- flags = saved_flags;
- if (!it) {
- /* The only way right now is FLOCK. */
- LASSERTF(einfo->ei_type == LDLM_FLOCK, "lock type %d\n",
- einfo->ei_type);
- res_id.name[3] = LDLM_FLOCK;
- } else if (it->it_op & IT_OPEN) {
- req = mdc_intent_open_pack(exp, it, op_data);
- } else if (it->it_op & IT_UNLINK) {
- req = mdc_intent_unlink_pack(exp, it, op_data);
- } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
- req = mdc_intent_getattr_pack(exp, it, op_data);
- } else if (it->it_op & IT_READDIR) {
- req = mdc_enqueue_pack(exp, 0);
- } else if (it->it_op & IT_LAYOUT) {
- if (!imp_connect_lvb_type(class_exp2cliimp(exp)))
- return -EOPNOTSUPP;
- req = mdc_intent_layout_pack(exp, it, op_data);
- lvb_type = LVB_T_LAYOUT;
- } else if (it->it_op & IT_GETXATTR) {
- req = mdc_intent_getxattr_pack(exp, it, op_data);
- } else {
- LBUG();
- return -EINVAL;
- }
-
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- if (resends) {
- req->rq_generation_set = 1;
- req->rq_import_generation = generation;
- req->rq_sent = ktime_get_real_seconds() + resends;
- }
-
- /* It is important to obtain modify RPC slot first (if applicable), so
- * that threads that are waiting for a modify RPC slot are not polluting
- * our rpcs in flight counter.
- * We do not do flock request limiting, though
- */
- if (it) {
- mdc_get_mod_rpc_slot(req, it);
- rc = obd_get_request_slot(&obddev->u.cli);
- if (rc != 0) {
- mdc_put_mod_rpc_slot(req, it);
- mdc_clear_replay_flag(req, 0);
- ptlrpc_req_finished(req);
- return rc;
- }
- }
-
- rc = ldlm_cli_enqueue(exp, &req, einfo, &res_id, policy, &flags, NULL,
- 0, lvb_type, lockh, 0);
- if (!it) {
- /* For flock requests we immediately return without further
- * delay and let caller deal with the rest, since rest of
- * this function metadata processing makes no sense for flock
- * requests anyway. But in case of problem during comms with
- * Server (ETIMEDOUT) or any signal/kill attempt (EINTR), we
- * can not rely on caller and this mainly for F_UNLCKs
- * (explicits or automatically generated by Kernel to clean
- * current FLocks upon exit) that can't be trashed
- */
- if (((rc == -EINTR) || (rc == -ETIMEDOUT)) &&
- (einfo->ei_type == LDLM_FLOCK) &&
- (einfo->ei_mode == LCK_NL))
- goto resend;
- return rc;
- }
-
- obd_put_request_slot(&obddev->u.cli);
- mdc_put_mod_rpc_slot(req, it);
-
- if (rc < 0) {
- CDEBUG(D_INFO, "%s: ldlm_cli_enqueue failed: rc = %d\n",
- obddev->obd_name, rc);
-
- mdc_clear_replay_flag(req, rc);
- ptlrpc_req_finished(req);
- return rc;
- }
-
- lockrep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
-
- lockrep->lock_policy_res2 =
- ptlrpc_status_ntoh(lockrep->lock_policy_res2);
-
- /*
- * Retry infinitely when the server returns -EINPROGRESS for the
- * intent operation, when server returns -EINPROGRESS for acquiring
- * intent lock, we'll retry in after_reply().
- */
- if (it->it_op && (int)lockrep->lock_policy_res2 == -EINPROGRESS) {
- mdc_clear_replay_flag(req, rc);
- ptlrpc_req_finished(req);
- resends++;
-
- CDEBUG(D_HA, "%s: resend:%d op:%d " DFID "/" DFID "\n",
- obddev->obd_name, resends, it->it_op,
- PFID(&op_data->op_fid1), PFID(&op_data->op_fid2));
-
- if (generation == obddev->u.cli.cl_import->imp_generation) {
- goto resend;
- } else {
- CDEBUG(D_HA, "resend cross eviction\n");
- return -EIO;
- }
- }
-
- rc = mdc_finish_enqueue(exp, req, einfo, it, lockh, rc);
- if (rc < 0) {
- if (lustre_handle_is_used(lockh)) {
- ldlm_lock_decref(lockh, einfo->ei_mode);
- memset(lockh, 0, sizeof(*lockh));
- }
- ptlrpc_req_finished(req);
-
- it->it_lock_handle = 0;
- it->it_lock_mode = 0;
- it->it_request = NULL;
- }
-
- return rc;
-}
-
-int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
- const union ldlm_policy_data *policy,
- struct md_op_data *op_data,
- struct lustre_handle *lockh, u64 extra_lock_flags)
-{
- return mdc_enqueue_base(exp, einfo, policy, NULL,
- op_data, lockh, extra_lock_flags);
-}
-
-static int mdc_finish_intent_lock(struct obd_export *exp,
- struct ptlrpc_request *request,
- struct md_op_data *op_data,
- struct lookup_intent *it,
- struct lustre_handle *lockh)
-{
- struct lustre_handle old_lock;
- struct ldlm_lock *lock;
- int rc = 0;
-
- LASSERT(request != LP_POISON);
- LASSERT(request->rq_repmsg != LP_POISON);
-
- if (it->it_op & IT_READDIR)
- return 0;
-
- if (it->it_op & (IT_GETXATTR | IT_LAYOUT)) {
- if (it->it_status != 0) {
- rc = it->it_status;
- goto out;
- }
- goto matching_lock;
- }
-
- if (!it_disposition(it, DISP_IT_EXECD)) {
- /* The server failed before it even started executing the
- * intent, i.e. because it couldn't unpack the request.
- */
- LASSERT(it->it_status != 0);
- rc = it->it_status;
- goto out;
- }
-
- rc = it_open_error(DISP_IT_EXECD, it);
- if (rc)
- goto out;
-
- rc = it_open_error(DISP_LOOKUP_EXECD, it);
- if (rc)
- goto out;
-
- /* keep requests around for the multiple phases of the call
- * this shows the DISP_XX must guarantee we make it into the call
- */
- if (!it_disposition(it, DISP_ENQ_CREATE_REF) &&
- it_disposition(it, DISP_OPEN_CREATE) &&
- !it_open_error(DISP_OPEN_CREATE, it)) {
- it_set_disposition(it, DISP_ENQ_CREATE_REF);
- ptlrpc_request_addref(request); /* balanced in ll_create_node */
- }
- if (!it_disposition(it, DISP_ENQ_OPEN_REF) &&
- it_disposition(it, DISP_OPEN_OPEN) &&
- !it_open_error(DISP_OPEN_OPEN, it)) {
- it_set_disposition(it, DISP_ENQ_OPEN_REF);
- ptlrpc_request_addref(request); /* balanced in ll_file_open */
- /* BUG 11546 - eviction in the middle of open rpc processing */
- OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_ENQUEUE_PAUSE, obd_timeout);
- }
-
- if (it->it_op & IT_CREAT)
- /* XXX this belongs in ll_create_it */
- ;
- else if (it->it_op == IT_OPEN)
- LASSERT(!it_disposition(it, DISP_OPEN_CREATE));
- else
- LASSERT(it->it_op & (IT_GETATTR | IT_LOOKUP));
-
-matching_lock:
- /* If we already have a matching lock, then cancel the new
- * one. We have to set the data here instead of in
- * mdc_enqueue, because we need to use the child's inode as
- * the l_ast_data to match, and that's not available until
- * intent_finish has performed the iget().)
- */
- lock = ldlm_handle2lock(lockh);
- if (lock) {
- union ldlm_policy_data policy = lock->l_policy_data;
-
- LDLM_DEBUG(lock, "matching against this");
-
- if (it_has_reply_body(it)) {
- struct mdt_body *body;
-
- body = req_capsule_server_get(&request->rq_pill,
- &RMF_MDT_BODY);
-
- /* mdc_enqueue checked */
- LASSERT(body);
- LASSERTF(fid_res_name_eq(&body->mbo_fid1,
- &lock->l_resource->lr_name),
- "Lock res_id: " DLDLMRES ", fid: " DFID "\n",
- PLDLMRES(lock->l_resource),
- PFID(&body->mbo_fid1));
- }
- LDLM_LOCK_PUT(lock);
-
- memcpy(&old_lock, lockh, sizeof(*lockh));
- if (ldlm_lock_match(NULL, LDLM_FL_BLOCK_GRANTED, NULL,
- LDLM_IBITS, &policy, LCK_NL,
- &old_lock, 0)) {
- ldlm_lock_decref_and_cancel(lockh,
- it->it_lock_mode);
- memcpy(lockh, &old_lock, sizeof(old_lock));
- it->it_lock_handle = lockh->cookie;
- }
- }
-out:
- CDEBUG(D_DENTRY,
- "D_IT dentry %.*s intent: %s status %d disp %x rc %d\n",
- (int)op_data->op_namelen, op_data->op_name,
- ldlm_it2str(it->it_op), it->it_status, it->it_disposition, rc);
- return rc;
-}
-
-int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
- struct lu_fid *fid, __u64 *bits)
-{
- /* We could just return 1 immediately, but since we should only
- * be called in revalidate_it if we already have a lock, let's
- * verify that.
- */
- struct ldlm_res_id res_id;
- struct lustre_handle lockh;
- union ldlm_policy_data policy;
- enum ldlm_mode mode;
-
- if (it->it_lock_handle) {
- lockh.cookie = it->it_lock_handle;
- mode = ldlm_revalidate_lock_handle(&lockh, bits);
- } else {
- fid_build_reg_res_name(fid, &res_id);
- switch (it->it_op) {
- case IT_GETATTR:
- /* File attributes are held under multiple bits:
- * nlink is under lookup lock, size and times are
- * under UPDATE lock and recently we've also got
- * a separate permissions lock for owner/group/acl that
- * were protected by lookup lock before.
- * Getattr must provide all of that information,
- * so we need to ensure we have all of those locks.
- * Unfortunately, if the bits are split across multiple
- * locks, there's no easy way to match all of them here,
- * so an extra RPC would be performed to fetch all
- * of those bits at once for now.
- */
- /* For new MDTs(> 2.4), UPDATE|PERM should be enough,
- * but for old MDTs (< 2.4), permission is covered
- * by LOOKUP lock, so it needs to match all bits here.
- */
- policy.l_inodebits.bits = MDS_INODELOCK_UPDATE |
- MDS_INODELOCK_LOOKUP |
- MDS_INODELOCK_PERM;
- break;
- case IT_READDIR:
- policy.l_inodebits.bits = MDS_INODELOCK_UPDATE;
- break;
- case IT_LAYOUT:
- policy.l_inodebits.bits = MDS_INODELOCK_LAYOUT;
- break;
- default:
- policy.l_inodebits.bits = MDS_INODELOCK_LOOKUP;
- break;
- }
-
- mode = mdc_lock_match(exp, LDLM_FL_BLOCK_GRANTED, fid,
- LDLM_IBITS, &policy,
- LCK_CR | LCK_CW | LCK_PR | LCK_PW,
- &lockh);
- }
-
- if (mode) {
- it->it_lock_handle = lockh.cookie;
- it->it_lock_mode = mode;
- } else {
- it->it_lock_handle = 0;
- it->it_lock_mode = 0;
- }
-
- return !!mode;
-}
-
-/*
- * This long block is all about fixing up the lock and request state
- * so that it is correct as of the moment _before_ the operation was
- * applied; that way, the VFS will think that everything is normal and
- * call Lustre's regular VFS methods.
- *
- * If we're performing a creation, that means that unless the creation
- * failed with EEXIST, we should fake up a negative dentry.
- *
- * For everything else, we want the lookup to succeed.
- *
- * One additional note: if CREATE or OPEN succeeded, we add an extra
- * reference to the request because we need to keep it around until
- * ll_create/ll_open gets called.
- *
- * The server will return to us, in it_disposition, an indication of
- * exactly what it_status refers to.
- *
- * If DISP_OPEN_OPEN is set, then it_status refers to the open() call,
- * otherwise if DISP_OPEN_CREATE is set, then it_status is the
- * creation failure mode. In either case, one of DISP_LOOKUP_NEG or
- * DISP_LOOKUP_POS will be set, indicating whether the child lookup
- * was successful.
- *
- * Else, if DISP_LOOKUP_EXECD then it_status is the rc of the
- * child lookup.
- */
-int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
- struct lookup_intent *it, struct ptlrpc_request **reqp,
- ldlm_blocking_callback cb_blocking, __u64 extra_lock_flags)
-{
- struct ldlm_enqueue_info einfo = {
- .ei_type = LDLM_IBITS,
- .ei_mode = it_to_lock_mode(it),
- .ei_cb_bl = cb_blocking,
- .ei_cb_cp = ldlm_completion_ast,
- };
- struct lustre_handle lockh;
- int rc = 0;
-
- LASSERT(it);
-
- CDEBUG(D_DLMTRACE, "(name: %.*s," DFID ") in obj " DFID
- ", intent: %s flags %#Lo\n", (int)op_data->op_namelen,
- op_data->op_name, PFID(&op_data->op_fid2),
- PFID(&op_data->op_fid1), ldlm_it2str(it->it_op),
- it->it_flags);
-
- lockh.cookie = 0;
- if (fid_is_sane(&op_data->op_fid2) &&
- (it->it_op & (IT_LOOKUP | IT_GETATTR | IT_READDIR))) {
- /* We could just return 1 immediately, but since we should only
- * be called in revalidate_it if we already have a lock, let's
- * verify that.
- */
- it->it_lock_handle = 0;
- rc = mdc_revalidate_lock(exp, it, &op_data->op_fid2, NULL);
- /* Only return failure if it was not GETATTR by cfid
- * (from inode_revalidate)
- */
- if (rc || op_data->op_namelen != 0)
- return rc;
- }
-
- /* For case if upper layer did not alloc fid, do it now. */
- if (!fid_is_sane(&op_data->op_fid2) && it->it_op & IT_CREAT) {
- rc = mdc_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
- if (rc < 0) {
- CERROR("Can't alloc new fid, rc %d\n", rc);
- return rc;
- }
- }
-
- rc = mdc_enqueue_base(exp, &einfo, NULL, it, op_data, &lockh,
- extra_lock_flags);
- if (rc < 0)
- return rc;
-
- *reqp = it->it_request;
- rc = mdc_finish_intent_lock(exp, *reqp, op_data, it, &lockh);
- return rc;
-}
-
-static int mdc_intent_getattr_async_interpret(const struct lu_env *env,
- struct ptlrpc_request *req,
- void *args, int rc)
-{
- struct mdc_getattr_args *ga = args;
- struct obd_export *exp = ga->ga_exp;
- struct md_enqueue_info *minfo = ga->ga_minfo;
- struct ldlm_enqueue_info *einfo = &minfo->mi_einfo;
- struct lookup_intent *it;
- struct lustre_handle *lockh;
- struct obd_device *obddev;
- struct ldlm_reply *lockrep;
- __u64 flags = LDLM_FL_HAS_INTENT;
-
- it = &minfo->mi_it;
- lockh = &minfo->mi_lockh;
-
- obddev = class_exp2obd(exp);
-
- obd_put_request_slot(&obddev->u.cli);
- if (OBD_FAIL_CHECK(OBD_FAIL_MDC_GETATTR_ENQUEUE))
- rc = -ETIMEDOUT;
-
- rc = ldlm_cli_enqueue_fini(exp, req, einfo->ei_type, 1, einfo->ei_mode,
- &flags, NULL, 0, lockh, rc);
- if (rc < 0) {
- CERROR("ldlm_cli_enqueue_fini: %d\n", rc);
- mdc_clear_replay_flag(req, rc);
- goto out;
- }
-
- lockrep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
-
- lockrep->lock_policy_res2 =
- ptlrpc_status_ntoh(lockrep->lock_policy_res2);
-
- rc = mdc_finish_enqueue(exp, req, einfo, it, lockh, rc);
- if (rc)
- goto out;
-
- rc = mdc_finish_intent_lock(exp, req, &minfo->mi_data, it, lockh);
-
-out:
- minfo->mi_cb(req, minfo, rc);
- return 0;
-}
-
-int mdc_intent_getattr_async(struct obd_export *exp,
- struct md_enqueue_info *minfo)
-{
- struct md_op_data *op_data = &minfo->mi_data;
- struct lookup_intent *it = &minfo->mi_it;
- struct ptlrpc_request *req;
- struct mdc_getattr_args *ga;
- struct obd_device *obddev = class_exp2obd(exp);
- struct ldlm_res_id res_id;
- union ldlm_policy_data policy = {
- .l_inodebits = { MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE }
- };
- int rc = 0;
- __u64 flags = LDLM_FL_HAS_INTENT;
-
- CDEBUG(D_DLMTRACE,
- "name: %.*s in inode " DFID ", intent: %s flags %#Lo\n",
- (int)op_data->op_namelen, op_data->op_name,
- PFID(&op_data->op_fid1), ldlm_it2str(it->it_op), it->it_flags);
-
- fid_build_reg_res_name(&op_data->op_fid1, &res_id);
- req = mdc_intent_getattr_pack(exp, it, op_data);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- rc = obd_get_request_slot(&obddev->u.cli);
- if (rc != 0) {
- ptlrpc_req_finished(req);
- return rc;
- }
-
- rc = ldlm_cli_enqueue(exp, &req, &minfo->mi_einfo, &res_id, &policy,
- &flags, NULL, 0, LVB_T_NONE, &minfo->mi_lockh, 1);
- if (rc < 0) {
- obd_put_request_slot(&obddev->u.cli);
- ptlrpc_req_finished(req);
- return rc;
- }
-
- BUILD_BUG_ON(sizeof(*ga) > sizeof(req->rq_async_args));
- ga = ptlrpc_req_async_args(req);
- ga->ga_exp = exp;
- ga->ga_minfo = minfo;
-
- req->rq_interpret_reply = mdc_intent_getattr_async_interpret;
- ptlrpcd_add_req(req);
-
- return 0;
-}
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_reint.c b/drivers/staging/lustre/lustre/mdc/mdc_reint.c
deleted file mode 100644
index e77c00df0693..000000000000
--- a/drivers/staging/lustre/lustre/mdc/mdc_reint.c
+++ /dev/null
@@ -1,421 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_MDC
-
-# include <linux/module.h>
-# include <linux/kernel.h>
-
-#include <obd_class.h>
-#include "mdc_internal.h"
-#include <lustre_fid.h>
-
-/* mdc_setattr does its own semaphore handling */
-static int mdc_reint(struct ptlrpc_request *request, int level)
-{
- int rc;
-
- request->rq_send_state = level;
-
- mdc_get_mod_rpc_slot(request, NULL);
- rc = ptlrpc_queue_wait(request);
- mdc_put_mod_rpc_slot(request, NULL);
- if (rc)
- CDEBUG(D_INFO, "error in handling %d\n", rc);
- else if (!req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY))
- rc = -EPROTO;
-
- return rc;
-}
-
-/* Find and cancel locally locks matched by inode @bits & @mode in the resource
- * found by @fid. Found locks are added into @cancel list. Returns the amount of
- * locks added to @cancels list.
- */
-int mdc_resource_get_unused(struct obd_export *exp, const struct lu_fid *fid,
- struct list_head *cancels, enum ldlm_mode mode,
- __u64 bits)
-{
- struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
- union ldlm_policy_data policy = {};
- struct ldlm_res_id res_id;
- struct ldlm_resource *res;
- int count;
-
- /* Return, i.e. cancel nothing, only if ELC is supported (flag in
- * export) but disabled through procfs (flag in NS).
- *
- * This distinguishes from a case when ELC is not supported originally,
- * when we still want to cancel locks in advance and just cancel them
- * locally, without sending any RPC.
- */
- if (exp_connect_cancelset(exp) && !ns_connect_cancelset(ns))
- return 0;
-
- fid_build_reg_res_name(fid, &res_id);
- res = ldlm_resource_get(exp->exp_obd->obd_namespace,
- NULL, &res_id, 0, 0);
- if (IS_ERR(res))
- return 0;
- LDLM_RESOURCE_ADDREF(res);
- /* Initialize ibits lock policy. */
- policy.l_inodebits.bits = bits;
- count = ldlm_cancel_resource_local(res, cancels, &policy,
- mode, 0, 0, NULL);
- LDLM_RESOURCE_DELREF(res);
- ldlm_resource_putref(res);
- return count;
-}
-
-int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
- void *ea, size_t ealen, struct ptlrpc_request **request)
-{
- LIST_HEAD(cancels);
- struct ptlrpc_request *req;
- int count = 0, rc;
- __u64 bits;
-
- bits = MDS_INODELOCK_UPDATE;
- if (op_data->op_attr.ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
- bits |= MDS_INODELOCK_LOOKUP;
- if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
- (fid_is_sane(&op_data->op_fid1)))
- count = mdc_resource_get_unused(exp, &op_data->op_fid1,
- &cancels, LCK_EX, bits);
- req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- &RQF_MDS_REINT_SETATTR);
- if (!req) {
- ldlm_lock_list_put(&cancels, l_bl_ast, count);
- return -ENOMEM;
- }
- req_capsule_set_size(&req->rq_pill, &RMF_MDT_EPOCH, RCL_CLIENT, 0);
- req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT, ealen);
- req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_CLIENT, 0);
-
- rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- if (op_data->op_attr.ia_valid & (ATTR_MTIME | ATTR_CTIME))
- CDEBUG(D_INODE, "setting mtime %ld, ctime %ld\n",
- LTIME_S(op_data->op_attr.ia_mtime),
- LTIME_S(op_data->op_attr.ia_ctime));
- mdc_setattr_pack(req, op_data, ea, ealen);
-
- req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
- req->rq_import->imp_connect_data.ocd_max_easize);
- ptlrpc_request_set_replen(req);
-
- rc = mdc_reint(req, LUSTRE_IMP_FULL);
-
- if (rc == -ERESTARTSYS)
- rc = 0;
-
- *request = req;
-
- return rc;
-}
-
-int mdc_create(struct obd_export *exp, struct md_op_data *op_data,
- const void *data, size_t datalen, umode_t mode,
- uid_t uid, gid_t gid, kernel_cap_t cap_effective,
- __u64 rdev, struct ptlrpc_request **request)
-{
- struct ptlrpc_request *req;
- int level, rc;
- int count, resends = 0;
- struct obd_import *import = exp->exp_obd->u.cli.cl_import;
- int generation = import->imp_generation;
- LIST_HEAD(cancels);
-
- /* For case if upper layer did not alloc fid, do it now. */
- if (!fid_is_sane(&op_data->op_fid2)) {
- /*
- * mdc_fid_alloc() may return errno 1 in case of switch to new
- * sequence, handle this.
- */
- rc = mdc_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
- if (rc < 0)
- return rc;
- }
-
-rebuild:
- count = 0;
- if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
- (fid_is_sane(&op_data->op_fid1)))
- count = mdc_resource_get_unused(exp, &op_data->op_fid1,
- &cancels, LCK_EX,
- MDS_INODELOCK_UPDATE);
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- &RQF_MDS_REINT_CREATE_ACL);
- if (!req) {
- ldlm_lock_list_put(&cancels, l_bl_ast, count);
- return -ENOMEM;
- }
- req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
- op_data->op_namelen + 1);
- req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT,
- data && datalen ? datalen : 0);
-
- rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- /*
- * mdc_create_pack() fills msg->bufs[1] with name and msg->bufs[2] with
- * tgt, for symlinks or lov MD data.
- */
- mdc_create_pack(req, op_data, data, datalen, mode, uid,
- gid, cap_effective, rdev);
-
- ptlrpc_request_set_replen(req);
-
- /* ask ptlrpc not to resend on EINPROGRESS since we have our own retry
- * logic here
- */
- req->rq_no_retry_einprogress = 1;
-
- if (resends) {
- req->rq_generation_set = 1;
- req->rq_import_generation = generation;
- req->rq_sent = ktime_get_real_seconds() + resends;
- }
- level = LUSTRE_IMP_FULL;
- resend:
- rc = mdc_reint(req, level);
-
- /* Resend if we were told to. */
- if (rc == -ERESTARTSYS) {
- level = LUSTRE_IMP_RECOVER;
- goto resend;
- } else if (rc == -EINPROGRESS) {
- /* Retry create infinitely until succeed or get other
- * error code.
- */
- ptlrpc_req_finished(req);
- resends++;
-
- CDEBUG(D_HA, "%s: resend:%d create on " DFID "/" DFID "\n",
- exp->exp_obd->obd_name, resends,
- PFID(&op_data->op_fid1), PFID(&op_data->op_fid2));
-
- if (generation == import->imp_generation) {
- goto rebuild;
- } else {
- CDEBUG(D_HA, "resend cross eviction\n");
- return -EIO;
- }
- }
-
- *request = req;
- return rc;
-}
-
-int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
- struct ptlrpc_request **request)
-{
- LIST_HEAD(cancels);
- struct obd_device *obd = class_exp2obd(exp);
- struct ptlrpc_request *req = *request;
- int count = 0, rc;
-
- LASSERT(!req);
-
- if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
- (fid_is_sane(&op_data->op_fid1)))
- count = mdc_resource_get_unused(exp, &op_data->op_fid1,
- &cancels, LCK_EX,
- MDS_INODELOCK_UPDATE);
- if ((op_data->op_flags & MF_MDC_CANCEL_FID3) &&
- (fid_is_sane(&op_data->op_fid3)))
- count += mdc_resource_get_unused(exp, &op_data->op_fid3,
- &cancels, LCK_EX,
- MDS_INODELOCK_FULL);
- req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- &RQF_MDS_REINT_UNLINK);
- if (!req) {
- ldlm_lock_list_put(&cancels, l_bl_ast, count);
- return -ENOMEM;
- }
- req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
- op_data->op_namelen + 1);
-
- rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- mdc_unlink_pack(req, op_data);
-
- req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
- obd->u.cli.cl_default_mds_easize);
- ptlrpc_request_set_replen(req);
-
- *request = req;
-
- rc = mdc_reint(req, LUSTRE_IMP_FULL);
- if (rc == -ERESTARTSYS)
- rc = 0;
- return rc;
-}
-
-int mdc_link(struct obd_export *exp, struct md_op_data *op_data,
- struct ptlrpc_request **request)
-{
- LIST_HEAD(cancels);
- struct ptlrpc_request *req;
- int count = 0, rc;
-
- if ((op_data->op_flags & MF_MDC_CANCEL_FID2) &&
- (fid_is_sane(&op_data->op_fid2)))
- count = mdc_resource_get_unused(exp, &op_data->op_fid2,
- &cancels, LCK_EX,
- MDS_INODELOCK_UPDATE);
- if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
- (fid_is_sane(&op_data->op_fid1)))
- count += mdc_resource_get_unused(exp, &op_data->op_fid1,
- &cancels, LCK_EX,
- MDS_INODELOCK_UPDATE);
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_REINT_LINK);
- if (!req) {
- ldlm_lock_list_put(&cancels, l_bl_ast, count);
- return -ENOMEM;
- }
- req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
- op_data->op_namelen + 1);
-
- rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- mdc_link_pack(req, op_data);
- ptlrpc_request_set_replen(req);
-
- rc = mdc_reint(req, LUSTRE_IMP_FULL);
- *request = req;
- if (rc == -ERESTARTSYS)
- rc = 0;
-
- return rc;
-}
-
-int mdc_rename(struct obd_export *exp, struct md_op_data *op_data,
- const char *old, size_t oldlen, const char *new, size_t newlen,
- struct ptlrpc_request **request)
-{
- LIST_HEAD(cancels);
- struct obd_device *obd = exp->exp_obd;
- struct ptlrpc_request *req;
- int count = 0, rc;
-
- if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
- (fid_is_sane(&op_data->op_fid1)))
- count = mdc_resource_get_unused(exp, &op_data->op_fid1,
- &cancels, LCK_EX,
- MDS_INODELOCK_UPDATE);
- if ((op_data->op_flags & MF_MDC_CANCEL_FID2) &&
- (fid_is_sane(&op_data->op_fid2)))
- count += mdc_resource_get_unused(exp, &op_data->op_fid2,
- &cancels, LCK_EX,
- MDS_INODELOCK_UPDATE);
- if ((op_data->op_flags & MF_MDC_CANCEL_FID3) &&
- (fid_is_sane(&op_data->op_fid3)))
- count += mdc_resource_get_unused(exp, &op_data->op_fid3,
- &cancels, LCK_EX,
- MDS_INODELOCK_LOOKUP);
- if ((op_data->op_flags & MF_MDC_CANCEL_FID4) &&
- (fid_is_sane(&op_data->op_fid4)))
- count += mdc_resource_get_unused(exp, &op_data->op_fid4,
- &cancels, LCK_EX,
- MDS_INODELOCK_FULL);
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- op_data->op_cli_flags & CLI_MIGRATE ?
- &RQF_MDS_REINT_MIGRATE : &RQF_MDS_REINT_RENAME);
- if (!req) {
- ldlm_lock_list_put(&cancels, l_bl_ast, count);
- return -ENOMEM;
- }
-
- req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, oldlen + 1);
- req_capsule_set_size(&req->rq_pill, &RMF_SYMTGT, RCL_CLIENT,
- newlen + 1);
-
- rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- if (op_data->op_cli_flags & CLI_MIGRATE && op_data->op_data) {
- struct md_open_data *mod = op_data->op_data;
-
- LASSERTF(mod->mod_open_req &&
- mod->mod_open_req->rq_type != LI_POISON,
- "POISONED open %p!\n", mod->mod_open_req);
-
- DEBUG_REQ(D_HA, mod->mod_open_req, "matched open");
- /*
- * We no longer want to preserve this open for replay even
- * though the open was committed. b=3632, b=3633
- */
- spin_lock(&mod->mod_open_req->rq_lock);
- mod->mod_open_req->rq_replay = 0;
- spin_unlock(&mod->mod_open_req->rq_lock);
- }
-
- if (exp_connect_cancelset(exp) && req)
- ldlm_cli_cancel_list(&cancels, count, req, 0);
-
- mdc_rename_pack(req, op_data, old, oldlen, new, newlen);
-
- req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
- obd->u.cli.cl_default_mds_easize);
- ptlrpc_request_set_replen(req);
-
- rc = mdc_reint(req, LUSTRE_IMP_FULL);
- *request = req;
- if (rc == -ERESTARTSYS)
- rc = 0;
-
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c
deleted file mode 100644
index cff31cb0a9ac..000000000000
--- a/drivers/staging/lustre/lustre/mdc/mdc_request.c
+++ /dev/null
@@ -1,2770 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_MDC
-
-# include <linux/module.h>
-# include <linux/pagemap.h>
-# include <linux/miscdevice.h>
-# include <linux/init.h>
-# include <linux/utsname.h>
-# include <linux/file.h>
-# include <linux/kthread.h>
-#include <linux/prefetch.h>
-
-#include <lustre_errno.h>
-#include <cl_object.h>
-#include <llog_swab.h>
-#include <lprocfs_status.h>
-#include <lustre_acl.h>
-#include <lustre_fid.h>
-#include <uapi/linux/lustre/lustre_ioctl.h>
-#include <lustre_kernelcomm.h>
-#include <lustre_lmv.h>
-#include <lustre_log.h>
-#include <uapi/linux/lustre/lustre_param.h>
-#include <lustre_swab.h>
-#include <obd_class.h>
-
-#include "mdc_internal.h"
-
-#define REQUEST_MINOR 244
-
-static int mdc_cleanup(struct obd_device *obd);
-
-static inline int mdc_queue_wait(struct ptlrpc_request *req)
-{
- struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
- int rc;
-
- /* obd_get_request_slot() ensures that this client has no more
- * than cl_max_rpcs_in_flight RPCs simultaneously inf light
- * against an MDT.
- */
- rc = obd_get_request_slot(cli);
- if (rc != 0)
- return rc;
-
- rc = ptlrpc_queue_wait(req);
- obd_put_request_slot(cli);
-
- return rc;
-}
-
-static int mdc_getstatus(struct obd_export *exp, struct lu_fid *rootfid)
-{
- struct ptlrpc_request *req;
- struct mdt_body *body;
- int rc;
-
- req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
- &RQF_MDS_GETSTATUS,
- LUSTRE_MDS_VERSION, MDS_GETSTATUS);
- if (!req)
- return -ENOMEM;
-
- mdc_pack_body(req, NULL, 0, 0, -1, 0);
- req->rq_send_state = LUSTRE_IMP_FULL;
-
- ptlrpc_request_set_replen(req);
-
- rc = ptlrpc_queue_wait(req);
- if (rc)
- goto out;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (!body) {
- rc = -EPROTO;
- goto out;
- }
-
- *rootfid = body->mbo_fid1;
- CDEBUG(D_NET,
- "root fid=" DFID ", last_committed=%llu\n",
- PFID(rootfid),
- lustre_msg_get_last_committed(req->rq_repmsg));
-out:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-/*
- * This function now is known to always saying that it will receive 4 buffers
- * from server. Even for cases when acl_size and md_size is zero, RPC header
- * will contain 4 fields and RPC itself will contain zero size fields. This is
- * because mdt_getattr*() _always_ returns 4 fields, but if acl is not needed
- * and thus zero, it shrinks it, making zero size. The same story about
- * md_size. And this is course of problem when client waits for smaller number
- * of fields. This issue will be fixed later when client gets aware of RPC
- * layouts. --umka
- */
-static int mdc_getattr_common(struct obd_export *exp,
- struct ptlrpc_request *req)
-{
- struct req_capsule *pill = &req->rq_pill;
- struct mdt_body *body;
- void *eadata;
- int rc;
-
- /* Request message already built. */
- rc = ptlrpc_queue_wait(req);
- if (rc != 0)
- return rc;
-
- /* sanity check for the reply */
- body = req_capsule_server_get(pill, &RMF_MDT_BODY);
- if (!body)
- return -EPROTO;
-
- CDEBUG(D_NET, "mode: %o\n", body->mbo_mode);
-
- mdc_update_max_ea_from_body(exp, body);
- if (body->mbo_eadatasize != 0) {
- eadata = req_capsule_server_sized_get(pill, &RMF_MDT_MD,
- body->mbo_eadatasize);
- if (!eadata)
- return -EPROTO;
- }
-
- return 0;
-}
-
-static int mdc_getattr(struct obd_export *exp, struct md_op_data *op_data,
- struct ptlrpc_request **request)
-{
- struct ptlrpc_request *req;
- int rc;
-
- /* Single MDS without an LMV case */
- if (op_data->op_flags & MF_GET_MDT_IDX) {
- op_data->op_mds = 0;
- return 0;
- }
- *request = NULL;
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_GETATTR);
- if (!req)
- return -ENOMEM;
-
- rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_GETATTR);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- mdc_pack_body(req, &op_data->op_fid1, op_data->op_valid,
- op_data->op_mode, -1, 0);
-
- req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
- req->rq_import->imp_connect_data.ocd_max_easize);
- req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
- op_data->op_mode);
- ptlrpc_request_set_replen(req);
-
- rc = mdc_getattr_common(exp, req);
- if (rc)
- ptlrpc_req_finished(req);
- else
- *request = req;
- return rc;
-}
-
-static int mdc_getattr_name(struct obd_export *exp, struct md_op_data *op_data,
- struct ptlrpc_request **request)
-{
- struct ptlrpc_request *req;
- int rc;
-
- *request = NULL;
- req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- &RQF_MDS_GETATTR_NAME);
- if (!req)
- return -ENOMEM;
-
- req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
- op_data->op_namelen + 1);
-
- rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_GETATTR_NAME);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- mdc_pack_body(req, &op_data->op_fid1, op_data->op_valid,
- op_data->op_mode, op_data->op_suppgids[0], 0);
-
- if (op_data->op_name) {
- char *name = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
-
- LASSERT(strnlen(op_data->op_name, op_data->op_namelen) ==
- op_data->op_namelen);
- memcpy(name, op_data->op_name, op_data->op_namelen);
- }
-
- req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
- op_data->op_mode);
- req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
- req->rq_import->imp_connect_data.ocd_max_easize);
- ptlrpc_request_set_replen(req);
-
- rc = mdc_getattr_common(exp, req);
- if (rc)
- ptlrpc_req_finished(req);
- else
- *request = req;
- return rc;
-}
-
-static int mdc_xattr_common(struct obd_export *exp,
- const struct req_format *fmt,
- const struct lu_fid *fid,
- int opcode, u64 valid,
- const char *xattr_name, const char *input,
- int input_size, int output_size, int flags,
- __u32 suppgid, struct ptlrpc_request **request)
-{
- struct ptlrpc_request *req;
- int xattr_namelen = 0;
- char *tmp;
- int rc;
-
- *request = NULL;
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), fmt);
- if (!req)
- return -ENOMEM;
-
- if (xattr_name) {
- xattr_namelen = strlen(xattr_name) + 1;
- req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
- xattr_namelen);
- }
- if (input_size) {
- LASSERT(input);
- req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT,
- input_size);
- }
-
- /* Flush local XATTR locks to get rid of a possible cancel RPC */
- if (opcode == MDS_REINT && fid_is_sane(fid) &&
- exp->exp_connect_data.ocd_ibits_known & MDS_INODELOCK_XATTR) {
- LIST_HEAD(cancels);
- int count;
-
- /* Without that packing would fail */
- if (input_size == 0)
- req_capsule_set_size(&req->rq_pill, &RMF_EADATA,
- RCL_CLIENT, 0);
-
- count = mdc_resource_get_unused(exp, fid,
- &cancels, LCK_EX,
- MDS_INODELOCK_XATTR);
-
- rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
- } else {
- rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, opcode);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
- }
-
- if (opcode == MDS_REINT) {
- struct mdt_rec_setxattr *rec;
-
- BUILD_BUG_ON(sizeof(struct mdt_rec_setxattr) !=
- sizeof(struct mdt_rec_reint));
- rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
- rec->sx_opcode = REINT_SETXATTR;
- rec->sx_fsuid = from_kuid(&init_user_ns, current_fsuid());
- rec->sx_fsgid = from_kgid(&init_user_ns, current_fsgid());
- rec->sx_cap = current_cap().cap[0];
- rec->sx_suppgid1 = suppgid;
- rec->sx_suppgid2 = -1;
- rec->sx_fid = *fid;
- rec->sx_valid = valid | OBD_MD_FLCTIME;
- rec->sx_time = ktime_get_real_seconds();
- rec->sx_size = output_size;
- rec->sx_flags = flags;
-
- } else {
- mdc_pack_body(req, fid, valid, output_size, suppgid, flags);
- }
-
- if (xattr_name) {
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
- memcpy(tmp, xattr_name, xattr_namelen);
- }
- if (input_size) {
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_EADATA);
- memcpy(tmp, input, input_size);
- }
-
- if (req_capsule_has_field(&req->rq_pill, &RMF_EADATA, RCL_SERVER))
- req_capsule_set_size(&req->rq_pill, &RMF_EADATA,
- RCL_SERVER, output_size);
- ptlrpc_request_set_replen(req);
-
- /* make rpc */
- if (opcode == MDS_REINT)
- mdc_get_mod_rpc_slot(req, NULL);
-
- rc = ptlrpc_queue_wait(req);
-
- if (opcode == MDS_REINT)
- mdc_put_mod_rpc_slot(req, NULL);
-
- if (rc)
- ptlrpc_req_finished(req);
- else
- *request = req;
- return rc;
-}
-
-static int mdc_setxattr(struct obd_export *exp, const struct lu_fid *fid,
- u64 obd_md_valid, const char *name,
- const void *value, size_t value_size,
- unsigned int xattr_flags, u32 suppgid,
- struct ptlrpc_request **req)
-{
- LASSERT(obd_md_valid == OBD_MD_FLXATTR ||
- obd_md_valid == OBD_MD_FLXATTRRM);
-
- return mdc_xattr_common(exp, &RQF_MDS_REINT_SETXATTR,
- fid, MDS_REINT, obd_md_valid, name,
- value, value_size, 0, xattr_flags, suppgid,
- req);
-}
-
-static int mdc_getxattr(struct obd_export *exp, const struct lu_fid *fid,
- u64 obd_md_valid, const char *name, size_t buf_size,
- struct ptlrpc_request **req)
-{
- LASSERT(obd_md_valid == OBD_MD_FLXATTR ||
- obd_md_valid == OBD_MD_FLXATTRLS);
-
- return mdc_xattr_common(exp, &RQF_MDS_GETXATTR, fid, MDS_GETXATTR,
- obd_md_valid, name, NULL, 0, buf_size, 0, -1,
- req);
-}
-
-#ifdef CONFIG_FS_POSIX_ACL
-static int mdc_unpack_acl(struct ptlrpc_request *req, struct lustre_md *md)
-{
- struct req_capsule *pill = &req->rq_pill;
- struct mdt_body *body = md->body;
- struct posix_acl *acl;
- void *buf;
- int rc;
-
- if (!body->mbo_aclsize)
- return 0;
-
- buf = req_capsule_server_sized_get(pill, &RMF_ACL, body->mbo_aclsize);
-
- if (!buf)
- return -EPROTO;
-
- acl = posix_acl_from_xattr(&init_user_ns, buf, body->mbo_aclsize);
- if (!acl)
- return 0;
-
- if (IS_ERR(acl)) {
- rc = PTR_ERR(acl);
- CERROR("convert xattr to acl: %d\n", rc);
- return rc;
- }
-
- rc = posix_acl_valid(&init_user_ns, acl);
- if (rc) {
- CERROR("validate acl: %d\n", rc);
- posix_acl_release(acl);
- return rc;
- }
-
- md->posix_acl = acl;
- return 0;
-}
-#else
-#define mdc_unpack_acl(req, md) 0
-#endif
-
-static int mdc_get_lustre_md(struct obd_export *exp,
- struct ptlrpc_request *req,
- struct obd_export *dt_exp,
- struct obd_export *md_exp,
- struct lustre_md *md)
-{
- struct req_capsule *pill = &req->rq_pill;
- int rc;
-
- LASSERT(md);
- memset(md, 0, sizeof(*md));
-
- md->body = req_capsule_server_get(pill, &RMF_MDT_BODY);
-
- if (md->body->mbo_valid & OBD_MD_FLEASIZE) {
- if (!S_ISREG(md->body->mbo_mode)) {
- CDEBUG(D_INFO,
- "OBD_MD_FLEASIZE set, should be a regular file, but is not\n");
- rc = -EPROTO;
- goto out;
- }
-
- if (md->body->mbo_eadatasize == 0) {
- CDEBUG(D_INFO,
- "OBD_MD_FLEASIZE set, but eadatasize 0\n");
- rc = -EPROTO;
- goto out;
- }
-
- md->layout.lb_len = md->body->mbo_eadatasize;
- md->layout.lb_buf = req_capsule_server_sized_get(pill,
- &RMF_MDT_MD,
- md->layout.lb_len);
- if (!md->layout.lb_buf) {
- rc = -EPROTO;
- goto out;
- }
- } else if (md->body->mbo_valid & OBD_MD_FLDIREA) {
- const union lmv_mds_md *lmv;
- size_t lmv_size;
-
- if (!S_ISDIR(md->body->mbo_mode)) {
- CDEBUG(D_INFO,
- "OBD_MD_FLDIREA set, should be a directory, but is not\n");
- rc = -EPROTO;
- goto out;
- }
-
- lmv_size = md->body->mbo_eadatasize;
- if (!lmv_size) {
- CDEBUG(D_INFO,
- "OBD_MD_FLDIREA is set, but eadatasize 0\n");
- return -EPROTO;
- }
- if (md->body->mbo_valid & OBD_MD_MEA) {
- lmv = req_capsule_server_sized_get(pill, &RMF_MDT_MD,
- lmv_size);
- if (!lmv) {
- rc = -EPROTO;
- goto out;
- }
-
- rc = md_unpackmd(md_exp, &md->lmv, lmv, lmv_size);
- if (rc < 0)
- goto out;
-
- if (rc < (typeof(rc))sizeof(*md->lmv)) {
- CDEBUG(D_INFO,
- "size too small: rc < sizeof(*md->lmv) (%d < %d)\n",
- rc, (int)sizeof(*md->lmv));
- rc = -EPROTO;
- goto out;
- }
- }
- }
- rc = 0;
-
- if (md->body->mbo_valid & OBD_MD_FLACL) {
- /* for ACL, it's possible that FLACL is set but aclsize is zero.
- * only when aclsize != 0 there's an actual segment for ACL
- * in reply buffer.
- */
- if (md->body->mbo_aclsize) {
- rc = mdc_unpack_acl(req, md);
- if (rc)
- goto out;
-#ifdef CONFIG_FS_POSIX_ACL
- } else {
- md->posix_acl = NULL;
-#endif
- }
- }
-
-out:
- if (rc) {
-#ifdef CONFIG_FS_POSIX_ACL
- posix_acl_release(md->posix_acl);
-#endif
- }
- return rc;
-}
-
-static int mdc_free_lustre_md(struct obd_export *exp, struct lustre_md *md)
-{
- return 0;
-}
-
-void mdc_replay_open(struct ptlrpc_request *req)
-{
- struct md_open_data *mod = req->rq_cb_data;
- struct ptlrpc_request *close_req;
- struct obd_client_handle *och;
- struct lustre_handle old;
- struct mdt_body *body;
-
- if (!mod) {
- DEBUG_REQ(D_ERROR, req,
- "Can't properly replay without open data.");
- return;
- }
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
-
- och = mod->mod_och;
- if (och) {
- struct lustre_handle *file_fh;
-
- LASSERT(och->och_magic == OBD_CLIENT_HANDLE_MAGIC);
-
- file_fh = &och->och_fh;
- CDEBUG(D_HA, "updating handle from %#llx to %#llx\n",
- file_fh->cookie, body->mbo_handle.cookie);
- old = *file_fh;
- *file_fh = body->mbo_handle;
- }
- close_req = mod->mod_close_req;
- if (close_req) {
- __u32 opc = lustre_msg_get_opc(close_req->rq_reqmsg);
- struct mdt_ioepoch *epoch;
-
- LASSERT(opc == MDS_CLOSE);
- epoch = req_capsule_client_get(&close_req->rq_pill,
- &RMF_MDT_EPOCH);
- LASSERT(epoch);
-
- if (och)
- LASSERT(!memcmp(&old, &epoch->mio_handle, sizeof(old)));
- DEBUG_REQ(D_HA, close_req, "updating close body with new fh");
- epoch->mio_handle = body->mbo_handle;
- }
-}
-
-void mdc_commit_open(struct ptlrpc_request *req)
-{
- struct md_open_data *mod = req->rq_cb_data;
-
- if (!mod)
- return;
-
- /**
- * No need to touch md_open_data::mod_och, it holds a reference on
- * \var mod and will zero references to each other, \var mod will be
- * freed after that when md_open_data::mod_och will put the reference.
- */
-
- /**
- * Do not let open request to disappear as it still may be needed
- * for close rpc to happen (it may happen on evict only, otherwise
- * ptlrpc_request::rq_replay does not let mdc_commit_open() to be
- * called), just mark this rpc as committed to distinguish these 2
- * cases, see mdc_close() for details. The open request reference will
- * be put along with freeing \var mod.
- */
- ptlrpc_request_addref(req);
- spin_lock(&req->rq_lock);
- req->rq_committed = 1;
- spin_unlock(&req->rq_lock);
- req->rq_cb_data = NULL;
- obd_mod_put(mod);
-}
-
-int mdc_set_open_replay_data(struct obd_export *exp,
- struct obd_client_handle *och,
- struct lookup_intent *it)
-{
- struct md_open_data *mod;
- struct mdt_rec_create *rec;
- struct mdt_body *body;
- struct ptlrpc_request *open_req = it->it_request;
- struct obd_import *imp = open_req->rq_import;
-
- if (!open_req->rq_replay)
- return 0;
-
- rec = req_capsule_client_get(&open_req->rq_pill, &RMF_REC_REINT);
- body = req_capsule_server_get(&open_req->rq_pill, &RMF_MDT_BODY);
- LASSERT(rec);
- /* Incoming message in my byte order (it's been swabbed). */
- /* Outgoing messages always in my byte order. */
- LASSERT(body);
-
- /* Only if the import is replayable, we set replay_open data */
- if (och && imp->imp_replayable) {
- mod = obd_mod_alloc();
- if (!mod) {
- DEBUG_REQ(D_ERROR, open_req,
- "Can't allocate md_open_data");
- return 0;
- }
-
- /**
- * Take a reference on \var mod, to be freed on mdc_close().
- * It protects \var mod from being freed on eviction (commit
- * callback is called despite rq_replay flag).
- * Another reference for \var och.
- */
- obd_mod_get(mod);
- obd_mod_get(mod);
-
- spin_lock(&open_req->rq_lock);
- och->och_mod = mod;
- mod->mod_och = och;
- mod->mod_is_create = it_disposition(it, DISP_OPEN_CREATE) ||
- it_disposition(it, DISP_OPEN_STRIPE);
- mod->mod_open_req = open_req;
- open_req->rq_cb_data = mod;
- open_req->rq_commit_cb = mdc_commit_open;
- spin_unlock(&open_req->rq_lock);
- }
-
- rec->cr_fid2 = body->mbo_fid1;
- rec->cr_ioepoch = body->mbo_ioepoch;
- rec->cr_old_handle.cookie = body->mbo_handle.cookie;
- open_req->rq_replay_cb = mdc_replay_open;
- if (!fid_is_sane(&body->mbo_fid1)) {
- DEBUG_REQ(D_ERROR, open_req,
- "Saving replay request with insane fid");
- LBUG();
- }
-
- DEBUG_REQ(D_RPCTRACE, open_req, "Set up open replay data");
- return 0;
-}
-
-static void mdc_free_open(struct md_open_data *mod)
-{
- int committed = 0;
-
- if (mod->mod_is_create == 0 &&
- imp_connect_disp_stripe(mod->mod_open_req->rq_import))
- committed = 1;
-
- /*
- * No reason to asssert here if the open request has
- * rq_replay == 1. It means that mdc_close failed, and
- * close request wasn`t sent. It is not fatal to client.
- * The worst thing is eviction if the client gets open lock
- */
- DEBUG_REQ(D_RPCTRACE, mod->mod_open_req,
- "free open request rq_replay = %d\n",
- mod->mod_open_req->rq_replay);
-
- ptlrpc_request_committed(mod->mod_open_req, committed);
- if (mod->mod_close_req)
- ptlrpc_request_committed(mod->mod_close_req, committed);
-}
-
-static int mdc_clear_open_replay_data(struct obd_export *exp,
- struct obd_client_handle *och)
-{
- struct md_open_data *mod = och->och_mod;
-
- /**
- * It is possible to not have \var mod in a case of eviction between
- * lookup and ll_file_open().
- **/
- if (!mod)
- return 0;
-
- LASSERT(mod != LP_POISON);
- LASSERT(mod->mod_open_req);
- mdc_free_open(mod);
-
- mod->mod_och = NULL;
- och->och_mod = NULL;
- obd_mod_put(mod);
-
- return 0;
-}
-
-static int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
- struct md_open_data *mod, struct ptlrpc_request **request)
-{
- struct obd_device *obd = class_exp2obd(exp);
- struct ptlrpc_request *req;
- struct req_format *req_fmt;
- int rc;
- int saved_rc = 0;
-
- if (op_data->op_bias & MDS_HSM_RELEASE) {
- req_fmt = &RQF_MDS_INTENT_CLOSE;
-
- /* allocate a FID for volatile file */
- rc = mdc_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
- if (rc < 0) {
- CERROR("%s: " DFID " failed to allocate FID: %d\n",
- obd->obd_name, PFID(&op_data->op_fid1), rc);
- /* save the errcode and proceed to close */
- saved_rc = rc;
- }
- } else if (op_data->op_bias & MDS_CLOSE_LAYOUT_SWAP) {
- req_fmt = &RQF_MDS_INTENT_CLOSE;
- } else {
- req_fmt = &RQF_MDS_CLOSE;
- }
-
- *request = NULL;
- if (OBD_FAIL_CHECK(OBD_FAIL_MDC_CLOSE))
- req = NULL;
- else
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), req_fmt);
-
- /* Ensure that this close's handle is fixed up during replay. */
- if (likely(mod)) {
- LASSERTF(mod->mod_open_req &&
- mod->mod_open_req->rq_type != LI_POISON,
- "POISONED open %p!\n", mod->mod_open_req);
-
- mod->mod_close_req = req;
-
- DEBUG_REQ(D_HA, mod->mod_open_req, "matched open");
- /* We no longer want to preserve this open for replay even
- * though the open was committed. b=3632, b=3633
- */
- spin_lock(&mod->mod_open_req->rq_lock);
- mod->mod_open_req->rq_replay = 0;
- spin_unlock(&mod->mod_open_req->rq_lock);
- } else {
- CDEBUG(D_HA,
- "couldn't find open req; expecting close error\n");
- }
- if (!req) {
- /*
- * TODO: repeat close after errors
- */
- CWARN("%s: close of FID " DFID " failed, file reference will be dropped when this client unmounts or is evicted\n",
- obd->obd_name, PFID(&op_data->op_fid1));
- rc = -ENOMEM;
- goto out;
- }
-
- rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_CLOSE);
- if (rc) {
- ptlrpc_request_free(req);
- req = NULL;
- goto out;
- }
-
- /*
- * To avoid a livelock (bug 7034), we need to send CLOSE RPCs to a
- * portal whose threads are not taking any DLM locks and are therefore
- * always progressing
- */
- req->rq_request_portal = MDS_READPAGE_PORTAL;
- ptlrpc_at_set_req_timeout(req);
-
- mdc_close_pack(req, op_data);
-
- req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
- obd->u.cli.cl_default_mds_easize);
-
- ptlrpc_request_set_replen(req);
-
- mdc_get_mod_rpc_slot(req, NULL);
- rc = ptlrpc_queue_wait(req);
- mdc_put_mod_rpc_slot(req, NULL);
-
- if (!req->rq_repmsg) {
- CDEBUG(D_RPCTRACE, "request failed to send: %p, %d\n", req,
- req->rq_status);
- if (rc == 0)
- rc = req->rq_status ?: -EIO;
- } else if (rc == 0 || rc == -EAGAIN) {
- struct mdt_body *body;
-
- rc = lustre_msg_get_status(req->rq_repmsg);
- if (lustre_msg_get_type(req->rq_repmsg) == PTL_RPC_MSG_ERR) {
- DEBUG_REQ(D_ERROR, req,
- "type == PTL_RPC_MSG_ERR, err = %d", rc);
- if (rc > 0)
- rc = -rc;
- }
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (!body)
- rc = -EPROTO;
- } else if (rc == -ESTALE) {
- /**
- * it can be allowed error after 3633 if open was committed and
- * server failed before close was sent. Let's check if mod
- * exists and return no error in that case
- */
- if (mod) {
- DEBUG_REQ(D_HA, req, "Reset ESTALE = %d", rc);
- if (mod->mod_open_req->rq_committed)
- rc = 0;
- }
- }
-
-out:
- if (mod) {
- if (rc != 0)
- mod->mod_close_req = NULL;
- /* Since now, mod is accessed through open_req only,
- * thus close req does not keep a reference on mod anymore.
- */
- obd_mod_put(mod);
- }
- *request = req;
- return rc < 0 ? rc : saved_rc;
-}
-
-static int mdc_getpage(struct obd_export *exp, const struct lu_fid *fid,
- u64 offset, struct page **pages, int npages,
- struct ptlrpc_request **request)
-{
- struct ptlrpc_bulk_desc *desc;
- struct ptlrpc_request *req;
- wait_queue_head_t waitq;
- int resends = 0;
- int rc;
- int i;
-
- *request = NULL;
- init_waitqueue_head(&waitq);
-
-restart_bulk:
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_READPAGE);
- if (!req)
- return -ENOMEM;
-
- rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_READPAGE);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- req->rq_request_portal = MDS_READPAGE_PORTAL;
- ptlrpc_at_set_req_timeout(req);
-
- desc = ptlrpc_prep_bulk_imp(req, npages, 1,
- PTLRPC_BULK_PUT_SINK | PTLRPC_BULK_BUF_KIOV,
- MDS_BULK_PORTAL,
- &ptlrpc_bulk_kiov_pin_ops);
- if (!desc) {
- ptlrpc_request_free(req);
- return -ENOMEM;
- }
-
- /* NB req now owns desc and will free it when it gets freed */
- for (i = 0; i < npages; i++)
- desc->bd_frag_ops->add_kiov_frag(desc, pages[i], 0, PAGE_SIZE);
-
- mdc_readdir_pack(req, offset, PAGE_SIZE * npages, fid);
-
- ptlrpc_request_set_replen(req);
- rc = ptlrpc_queue_wait(req);
- if (rc) {
- ptlrpc_req_finished(req);
- if (rc != -ETIMEDOUT)
- return rc;
-
- resends++;
- if (!client_should_resend(resends, &exp->exp_obd->u.cli)) {
- CERROR("%s: too many resend retries: rc = %d\n",
- exp->exp_obd->obd_name, -EIO);
- return -EIO;
- }
- wait_event_idle_timeout(waitq, 0, resends * HZ);
-
- goto restart_bulk;
- }
-
- rc = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk,
- req->rq_bulk->bd_nob_transferred);
- if (rc < 0) {
- ptlrpc_req_finished(req);
- return rc;
- }
-
- if (req->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK) {
- CERROR("%s: unexpected bytes transferred: %d (%ld expected)\n",
- exp->exp_obd->obd_name, req->rq_bulk->bd_nob_transferred,
- PAGE_SIZE * npages);
- ptlrpc_req_finished(req);
- return -EPROTO;
- }
-
- *request = req;
- return 0;
-}
-
-static void mdc_release_page(struct page *page, int remove)
-{
- if (remove) {
- lock_page(page);
- if (likely(page->mapping))
- truncate_complete_page(page->mapping, page);
- unlock_page(page);
- }
- put_page(page);
-}
-
-static struct page *mdc_page_locate(struct address_space *mapping, __u64 *hash,
- __u64 *start, __u64 *end, int hash64)
-{
- /*
- * Complement of hash is used as an index so that
- * radix_tree_gang_lookup() can be used to find a page with starting
- * hash _smaller_ than one we are looking for.
- */
- unsigned long offset = hash_x_index(*hash, hash64);
- struct page *page;
- int found;
-
- xa_lock_irq(&mapping->i_pages);
- found = radix_tree_gang_lookup(&mapping->i_pages,
- (void **)&page, offset, 1);
- if (found > 0 && !radix_tree_exceptional_entry(page)) {
- struct lu_dirpage *dp;
-
- get_page(page);
- xa_unlock_irq(&mapping->i_pages);
- /*
- * In contrast to find_lock_page() we are sure that directory
- * page cannot be truncated (while DLM lock is held) and,
- * hence, can avoid restart.
- *
- * In fact, page cannot be locked here at all, because
- * mdc_read_page_remote does synchronous io.
- */
- wait_on_page_locked(page);
- if (PageUptodate(page)) {
- dp = kmap(page);
- if (BITS_PER_LONG == 32 && hash64) {
- *start = le64_to_cpu(dp->ldp_hash_start) >> 32;
- *end = le64_to_cpu(dp->ldp_hash_end) >> 32;
- *hash = *hash >> 32;
- } else {
- *start = le64_to_cpu(dp->ldp_hash_start);
- *end = le64_to_cpu(dp->ldp_hash_end);
- }
- if (unlikely(*start == 1 && *hash == 0))
- *hash = *start;
- else
- LASSERTF(*start <= *hash, "start = %#llx,end = %#llx,hash = %#llx\n",
- *start, *end, *hash);
- CDEBUG(D_VFSTRACE, "offset %lx [%#llx %#llx], hash %#llx\n",
- offset, *start, *end, *hash);
- if (*hash > *end) {
- kunmap(page);
- mdc_release_page(page, 0);
- page = NULL;
- } else if (*end != *start && *hash == *end) {
- /*
- * upon hash collision, remove this page,
- * otherwise put page reference, and
- * mdc_read_page_remote() will issue RPC to
- * fetch the page we want.
- */
- kunmap(page);
- mdc_release_page(page,
- le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
- page = NULL;
- }
- } else {
- put_page(page);
- page = ERR_PTR(-EIO);
- }
- } else {
- xa_unlock_irq(&mapping->i_pages);
- page = NULL;
- }
- return page;
-}
-
-/*
- * Adjust a set of pages, each page containing an array of lu_dirpages,
- * so that each page can be used as a single logical lu_dirpage.
- *
- * A lu_dirpage is laid out as follows, where s = ldp_hash_start,
- * e = ldp_hash_end, f = ldp_flags, p = padding, and each "ent" is a
- * struct lu_dirent. It has size up to LU_PAGE_SIZE. The ldp_hash_end
- * value is used as a cookie to request the next lu_dirpage in a
- * directory listing that spans multiple pages (two in this example):
- * ________
- * | |
- * .|--------v------- -----.
- * |s|e|f|p|ent|ent| ... |ent|
- * '--|-------------- -----' Each PAGE contains a single
- * '------. lu_dirpage.
- * .---------v------- -----.
- * |s|e|f|p|ent| 0 | ... | 0 |
- * '----------------- -----'
- *
- * However, on hosts where the native VM page size (PAGE_SIZE) is
- * larger than LU_PAGE_SIZE, a single host page may contain multiple
- * lu_dirpages. After reading the lu_dirpages from the MDS, the
- * ldp_hash_end of the first lu_dirpage refers to the one immediately
- * after it in the same PAGE (arrows simplified for brevity, but
- * in general e0==s1, e1==s2, etc.):
- *
- * .-------------------- -----.
- * |s0|e0|f0|p|ent|ent| ... |ent|
- * |---v---------------- -----|
- * |s1|e1|f1|p|ent|ent| ... |ent|
- * |---v---------------- -----| Here, each PAGE contains
- * ... multiple lu_dirpages.
- * |---v---------------- -----|
- * |s'|e'|f'|p|ent|ent| ... |ent|
- * '---|---------------- -----'
- * v
- * .----------------------------.
- * | next PAGE |
- *
- * This structure is transformed into a single logical lu_dirpage as follows:
- *
- * - Replace e0 with e' so the request for the next lu_dirpage gets the page
- * labeled 'next PAGE'.
- *
- * - Copy the LDF_COLLIDE flag from f' to f0 to correctly reflect whether
- * a hash collision with the next page exists.
- *
- * - Adjust the lde_reclen of the ending entry of each lu_dirpage to span
- * to the first entry of the next lu_dirpage.
- */
-#if PAGE_SIZE > LU_PAGE_SIZE
-static void mdc_adjust_dirpages(struct page **pages, int cfs_pgs, int lu_pgs)
-{
- int i;
-
- for (i = 0; i < cfs_pgs; i++) {
- struct lu_dirpage *dp = kmap(pages[i]);
- __u64 hash_end = le64_to_cpu(dp->ldp_hash_end);
- __u32 flags = le32_to_cpu(dp->ldp_flags);
- struct lu_dirpage *first = dp;
-
- while (--lu_pgs > 0) {
- struct lu_dirent *end_dirent = NULL;
- struct lu_dirent *ent;
-
- for (ent = lu_dirent_start(dp); ent;
- ent = lu_dirent_next(ent))
- end_dirent = ent;
-
- /* Advance dp to next lu_dirpage. */
- dp = (struct lu_dirpage *)((char *)dp + LU_PAGE_SIZE);
-
- /* Check if we've reached the end of the CFS_PAGE. */
- if (!((unsigned long)dp & ~PAGE_MASK))
- break;
-
- /* Save the hash and flags of this lu_dirpage. */
- hash_end = le64_to_cpu(dp->ldp_hash_end);
- flags = le32_to_cpu(dp->ldp_flags);
-
- /* Check if lu_dirpage contains no entries. */
- if (!end_dirent)
- break;
-
- /*
- * Enlarge the end entry lde_reclen from 0 to
- * first entry of next lu_dirpage.
- */
- LASSERT(!le16_to_cpu(end_dirent->lde_reclen));
- end_dirent->lde_reclen =
- cpu_to_le16((char *)(dp->ldp_entries) -
- (char *)end_dirent);
- }
-
- first->ldp_hash_end = hash_end;
- first->ldp_flags &= ~cpu_to_le32(LDF_COLLIDE);
- first->ldp_flags |= flags & cpu_to_le32(LDF_COLLIDE);
-
- kunmap(pages[i]);
- }
- LASSERTF(lu_pgs == 0, "left = %d", lu_pgs);
-}
-#else
-#define mdc_adjust_dirpages(pages, cfs_pgs, lu_pgs) do {} while (0)
-#endif /* PAGE_SIZE > LU_PAGE_SIZE */
-
-/* parameters for readdir page */
-struct readpage_param {
- struct md_op_data *rp_mod;
- __u64 rp_off;
- int rp_hash64;
- struct obd_export *rp_exp;
- struct md_callback *rp_cb;
-};
-
-/**
- * Read pages from server.
- *
- * Page in MDS_READPAGE RPC is packed in LU_PAGE_SIZE, and each page contains
- * a header lu_dirpage which describes the start/end hash, and whether this
- * page is empty (contains no dir entry) or hash collide with next page.
- * After client receives reply, several pages will be integrated into dir page
- * in PAGE_SIZE (if PAGE_SIZE greater than LU_PAGE_SIZE), and the
- * lu_dirpage for this integrated page will be adjusted.
- **/
-static int mdc_read_page_remote(void *data, struct page *page0)
-{
- struct readpage_param *rp = data;
- struct page **page_pool;
- struct page *page;
- struct lu_dirpage *dp;
- int rd_pgs = 0; /* number of pages read actually */
- int npages;
- struct md_op_data *op_data = rp->rp_mod;
- struct ptlrpc_request *req;
- int max_pages = op_data->op_max_pages;
- struct inode *inode;
- struct lu_fid *fid;
- int i;
- int rc;
-
- LASSERT(max_pages > 0 && max_pages <= PTLRPC_MAX_BRW_PAGES);
- inode = op_data->op_data;
- fid = &op_data->op_fid1;
- LASSERT(inode);
-
- page_pool = kcalloc(max_pages, sizeof(page), GFP_NOFS);
- if (page_pool) {
- page_pool[0] = page0;
- } else {
- page_pool = &page0;
- max_pages = 1;
- }
-
- for (npages = 1; npages < max_pages; npages++) {
- page = page_cache_alloc(inode->i_mapping);
- if (!page)
- break;
- page_pool[npages] = page;
- }
-
- rc = mdc_getpage(rp->rp_exp, fid, rp->rp_off, page_pool, npages, &req);
- if (!rc) {
- int lu_pgs = req->rq_bulk->bd_nob_transferred;
-
- rd_pgs = (req->rq_bulk->bd_nob_transferred +
- PAGE_SIZE - 1) >> PAGE_SHIFT;
- lu_pgs >>= LU_PAGE_SHIFT;
- LASSERT(!(req->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK));
-
- CDEBUG(D_INODE, "read %d(%d) pages\n", rd_pgs, lu_pgs);
-
- mdc_adjust_dirpages(page_pool, rd_pgs, lu_pgs);
-
- SetPageUptodate(page0);
- }
-
- unlock_page(page0);
- ptlrpc_req_finished(req);
- CDEBUG(D_CACHE, "read %d/%d pages\n", rd_pgs, npages);
- for (i = 1; i < npages; i++) {
- unsigned long offset;
- __u64 hash;
- int ret;
-
- page = page_pool[i];
-
- if (rc < 0 || i >= rd_pgs) {
- put_page(page);
- continue;
- }
-
- SetPageUptodate(page);
-
- dp = kmap(page);
- hash = le64_to_cpu(dp->ldp_hash_start);
- kunmap(page);
-
- offset = hash_x_index(hash, rp->rp_hash64);
-
- prefetchw(&page->flags);
- ret = add_to_page_cache_lru(page, inode->i_mapping, offset,
- GFP_KERNEL);
- if (!ret)
- unlock_page(page);
- else
- CDEBUG(D_VFSTRACE, "page %lu add to page cache failed: rc = %d\n",
- offset, ret);
- put_page(page);
- }
-
- if (page_pool != &page0)
- kfree(page_pool);
-
- return rc;
-}
-
-/**
- * Read dir page from cache first, if it can not find it, read it from
- * server and add into the cache.
- *
- * \param[in] exp MDC export
- * \param[in] op_data client MD stack parameters, transferring parameters
- * between different layers on client MD stack.
- * \param[in] cb_op callback required for ldlm lock enqueue during
- * read page
- * \param[in] hash_offset the hash offset of the page to be read
- * \param[in] ppage the page to be read
- *
- * retval = 0 get the page successfully
- * errno(<0) get the page failed
- */
-static int mdc_read_page(struct obd_export *exp, struct md_op_data *op_data,
- struct md_callback *cb_op, __u64 hash_offset,
- struct page **ppage)
-{
- struct lookup_intent it = { .it_op = IT_READDIR };
- struct page *page;
- struct inode *dir = op_data->op_data;
- struct address_space *mapping;
- struct lu_dirpage *dp;
- __u64 start = 0;
- __u64 end = 0;
- struct lustre_handle lockh;
- struct ptlrpc_request *enq_req = NULL;
- struct readpage_param rp_param;
- int rc;
-
- *ppage = NULL;
-
- LASSERT(dir);
- mapping = dir->i_mapping;
-
- rc = mdc_intent_lock(exp, op_data, &it, &enq_req,
- cb_op->md_blocking_ast, 0);
- if (enq_req)
- ptlrpc_req_finished(enq_req);
-
- if (rc < 0) {
- CERROR("%s: " DFID " lock enqueue fails: rc = %d\n",
- exp->exp_obd->obd_name, PFID(&op_data->op_fid1), rc);
- return rc;
- }
-
- rc = 0;
- lockh.cookie = it.it_lock_handle;
- mdc_set_lock_data(exp, &lockh, dir, NULL);
-
- rp_param.rp_off = hash_offset;
- rp_param.rp_hash64 = op_data->op_cli_flags & CLI_HASH64;
- page = mdc_page_locate(mapping, &rp_param.rp_off, &start, &end,
- rp_param.rp_hash64);
- if (IS_ERR(page)) {
- CDEBUG(D_INFO, "%s: dir page locate: " DFID " at %llu: rc %ld\n",
- exp->exp_obd->obd_name, PFID(&op_data->op_fid1),
- rp_param.rp_off, PTR_ERR(page));
- rc = PTR_ERR(page);
- goto out_unlock;
- } else if (page) {
- /*
- * XXX nikita: not entirely correct handling of a corner case:
- * suppose hash chain of entries with hash value HASH crosses
- * border between pages P0 and P1. First both P0 and P1 are
- * cached, seekdir() is called for some entry from the P0 part
- * of the chain. Later P0 goes out of cache. telldir(HASH)
- * happens and finds P1, as it starts with matching hash
- * value. Remaining entries from P0 part of the chain are
- * skipped. (Is that really a bug?)
- *
- * Possible solutions: 0. don't cache P1 is such case, handle
- * it as an "overflow" page. 1. invalidate all pages at
- * once. 2. use HASH|1 as an index for P1.
- */
- goto hash_collision;
- }
-
- rp_param.rp_exp = exp;
- rp_param.rp_mod = op_data;
- page = read_cache_page(mapping,
- hash_x_index(rp_param.rp_off,
- rp_param.rp_hash64),
- mdc_read_page_remote, &rp_param);
- if (IS_ERR(page)) {
- CERROR("%s: read cache page: " DFID " at %llu: rc %ld\n",
- exp->exp_obd->obd_name, PFID(&op_data->op_fid1),
- rp_param.rp_off, PTR_ERR(page));
- rc = PTR_ERR(page);
- goto out_unlock;
- }
-
- wait_on_page_locked(page);
- (void)kmap(page);
- if (!PageUptodate(page)) {
- CERROR("%s: page not updated: " DFID " at %llu: rc %d\n",
- exp->exp_obd->obd_name, PFID(&op_data->op_fid1),
- rp_param.rp_off, -5);
- goto fail;
- }
- if (!PageChecked(page))
- SetPageChecked(page);
- if (PageError(page)) {
- CERROR("%s: page error: " DFID " at %llu: rc %d\n",
- exp->exp_obd->obd_name, PFID(&op_data->op_fid1),
- rp_param.rp_off, -5);
- goto fail;
- }
-
-hash_collision:
- dp = page_address(page);
- if (BITS_PER_LONG == 32 && rp_param.rp_hash64) {
- start = le64_to_cpu(dp->ldp_hash_start) >> 32;
- end = le64_to_cpu(dp->ldp_hash_end) >> 32;
- rp_param.rp_off = hash_offset >> 32;
- } else {
- start = le64_to_cpu(dp->ldp_hash_start);
- end = le64_to_cpu(dp->ldp_hash_end);
- rp_param.rp_off = hash_offset;
- }
- if (end == start) {
- LASSERT(start == rp_param.rp_off);
- CWARN("Page-wide hash collision: %#lx\n", (unsigned long)end);
-#if BITS_PER_LONG == 32
- CWARN("Real page-wide hash collision at [%llu %llu] with hash %llu\n",
- le64_to_cpu(dp->ldp_hash_start),
- le64_to_cpu(dp->ldp_hash_end), hash_offset);
-#endif
- /*
- * Fetch whole overflow chain...
- *
- * XXX not yet.
- */
- goto fail;
- }
- *ppage = page;
-out_unlock:
- ldlm_lock_decref(&lockh, it.it_lock_mode);
- return rc;
-fail:
- kunmap(page);
- mdc_release_page(page, 1);
- rc = -EIO;
- goto out_unlock;
-}
-
-static int mdc_statfs(const struct lu_env *env,
- struct obd_export *exp, struct obd_statfs *osfs,
- __u64 max_age, __u32 flags)
-{
- struct obd_device *obd = class_exp2obd(exp);
- struct ptlrpc_request *req;
- struct obd_statfs *msfs;
- struct obd_import *imp = NULL;
- int rc;
-
- /*
- * Since the request might also come from lprocfs, so we need
- * sync this with client_disconnect_export Bug15684
- */
- down_read(&obd->u.cli.cl_sem);
- if (obd->u.cli.cl_import)
- imp = class_import_get(obd->u.cli.cl_import);
- up_read(&obd->u.cli.cl_sem);
- if (!imp)
- return -ENODEV;
-
- req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_STATFS,
- LUSTRE_MDS_VERSION, MDS_STATFS);
- if (!req) {
- rc = -ENOMEM;
- goto output;
- }
-
- ptlrpc_request_set_replen(req);
-
- if (flags & OBD_STATFS_NODELAY) {
- /* procfs requests not want stay in wait for avoid deadlock */
- req->rq_no_resend = 1;
- req->rq_no_delay = 1;
- }
-
- rc = ptlrpc_queue_wait(req);
- if (rc) {
- /* check connection error first */
- if (imp->imp_connect_error)
- rc = imp->imp_connect_error;
- goto out;
- }
-
- msfs = req_capsule_server_get(&req->rq_pill, &RMF_OBD_STATFS);
- if (!msfs) {
- rc = -EPROTO;
- goto out;
- }
-
- *osfs = *msfs;
-out:
- ptlrpc_req_finished(req);
-output:
- class_import_put(imp);
- return rc;
-}
-
-static int mdc_ioc_fid2path(struct obd_export *exp, struct getinfo_fid2path *gf)
-{
- __u32 keylen, vallen;
- void *key;
- int rc;
-
- if (gf->gf_pathlen > PATH_MAX)
- return -ENAMETOOLONG;
- if (gf->gf_pathlen < 2)
- return -EOVERFLOW;
-
- /* Key is KEY_FID2PATH + getinfo_fid2path description */
- keylen = cfs_size_round(sizeof(KEY_FID2PATH)) + sizeof(*gf);
- key = kzalloc(keylen, GFP_NOFS);
- if (!key)
- return -ENOMEM;
- memcpy(key, KEY_FID2PATH, sizeof(KEY_FID2PATH));
- memcpy(key + cfs_size_round(sizeof(KEY_FID2PATH)), gf, sizeof(*gf));
-
- CDEBUG(D_IOCTL, "path get " DFID " from %llu #%d\n",
- PFID(&gf->gf_fid), gf->gf_recno, gf->gf_linkno);
-
- if (!fid_is_sane(&gf->gf_fid)) {
- rc = -EINVAL;
- goto out;
- }
-
- /* Val is struct getinfo_fid2path result plus path */
- vallen = sizeof(*gf) + gf->gf_pathlen;
-
- rc = obd_get_info(NULL, exp, keylen, key, &vallen, gf);
- if (rc != 0 && rc != -EREMOTE)
- goto out;
-
- if (vallen <= sizeof(*gf)) {
- rc = -EPROTO;
- goto out;
- } else if (vallen > sizeof(*gf) + gf->gf_pathlen) {
- rc = -EOVERFLOW;
- goto out;
- }
-
- CDEBUG(D_IOCTL, "path got " DFID " from %llu #%d: %s\n",
- PFID(&gf->gf_fid), gf->gf_recno, gf->gf_linkno,
- gf->gf_pathlen < 512 ? gf->gf_path :
- /* only log the last 512 characters of the path */
- gf->gf_path + gf->gf_pathlen - 512);
-
-out:
- kfree(key);
- return rc;
-}
-
-static int mdc_ioc_hsm_progress(struct obd_export *exp,
- struct hsm_progress_kernel *hpk)
-{
- struct obd_import *imp = class_exp2cliimp(exp);
- struct hsm_progress_kernel *req_hpk;
- struct ptlrpc_request *req;
- int rc;
-
- req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_HSM_PROGRESS,
- LUSTRE_MDS_VERSION, MDS_HSM_PROGRESS);
- if (!req) {
- rc = -ENOMEM;
- goto out;
- }
-
- mdc_pack_body(req, NULL, 0, 0, -1, 0);
-
- /* Copy hsm_progress struct */
- req_hpk = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_PROGRESS);
- if (!req_hpk) {
- rc = -EPROTO;
- goto out;
- }
-
- *req_hpk = *hpk;
- req_hpk->hpk_errval = lustre_errno_hton(hpk->hpk_errval);
-
- ptlrpc_request_set_replen(req);
-
- mdc_get_mod_rpc_slot(req, NULL);
- rc = ptlrpc_queue_wait(req);
- mdc_put_mod_rpc_slot(req, NULL);
-out:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-static int mdc_ioc_hsm_ct_register(struct obd_import *imp, __u32 archives)
-{
- __u32 *archive_mask;
- struct ptlrpc_request *req;
- int rc;
-
- req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_HSM_CT_REGISTER,
- LUSTRE_MDS_VERSION,
- MDS_HSM_CT_REGISTER);
- if (!req) {
- rc = -ENOMEM;
- goto out;
- }
-
- mdc_pack_body(req, NULL, 0, 0, -1, 0);
-
- /* Copy hsm_progress struct */
- archive_mask = req_capsule_client_get(&req->rq_pill,
- &RMF_MDS_HSM_ARCHIVE);
- if (!archive_mask) {
- rc = -EPROTO;
- goto out;
- }
-
- *archive_mask = archives;
-
- ptlrpc_request_set_replen(req);
-
- rc = mdc_queue_wait(req);
-out:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-static int mdc_ioc_hsm_current_action(struct obd_export *exp,
- struct md_op_data *op_data)
-{
- struct hsm_current_action *hca = op_data->op_data;
- struct hsm_current_action *req_hca;
- struct ptlrpc_request *req;
- int rc;
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- &RQF_MDS_HSM_ACTION);
- if (!req)
- return -ENOMEM;
-
- rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_HSM_ACTION);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- mdc_pack_body(req, &op_data->op_fid1, 0, 0,
- op_data->op_suppgids[0], 0);
-
- ptlrpc_request_set_replen(req);
-
- rc = mdc_queue_wait(req);
- if (rc)
- goto out;
-
- req_hca = req_capsule_server_get(&req->rq_pill,
- &RMF_MDS_HSM_CURRENT_ACTION);
- if (!req_hca) {
- rc = -EPROTO;
- goto out;
- }
-
- *hca = *req_hca;
-
-out:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-static int mdc_ioc_hsm_ct_unregister(struct obd_import *imp)
-{
- struct ptlrpc_request *req;
- int rc;
-
- req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_HSM_CT_UNREGISTER,
- LUSTRE_MDS_VERSION,
- MDS_HSM_CT_UNREGISTER);
- if (!req) {
- rc = -ENOMEM;
- goto out;
- }
-
- mdc_pack_body(req, NULL, 0, 0, -1, 0);
-
- ptlrpc_request_set_replen(req);
-
- rc = mdc_queue_wait(req);
-out:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-static int mdc_ioc_hsm_state_get(struct obd_export *exp,
- struct md_op_data *op_data)
-{
- struct hsm_user_state *hus = op_data->op_data;
- struct hsm_user_state *req_hus;
- struct ptlrpc_request *req;
- int rc;
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- &RQF_MDS_HSM_STATE_GET);
- if (!req)
- return -ENOMEM;
-
- rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_HSM_STATE_GET);
- if (rc != 0) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- mdc_pack_body(req, &op_data->op_fid1, 0, 0,
- op_data->op_suppgids[0], 0);
-
- ptlrpc_request_set_replen(req);
-
- rc = mdc_queue_wait(req);
- if (rc)
- goto out;
-
- req_hus = req_capsule_server_get(&req->rq_pill, &RMF_HSM_USER_STATE);
- if (!req_hus) {
- rc = -EPROTO;
- goto out;
- }
-
- *hus = *req_hus;
-
-out:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-static int mdc_ioc_hsm_state_set(struct obd_export *exp,
- struct md_op_data *op_data)
-{
- struct hsm_state_set *hss = op_data->op_data;
- struct hsm_state_set *req_hss;
- struct ptlrpc_request *req;
- int rc;
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- &RQF_MDS_HSM_STATE_SET);
- if (!req)
- return -ENOMEM;
-
- rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_HSM_STATE_SET);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- mdc_pack_body(req, &op_data->op_fid1, 0, 0,
- op_data->op_suppgids[0], 0);
-
- /* Copy states */
- req_hss = req_capsule_client_get(&req->rq_pill, &RMF_HSM_STATE_SET);
- if (!req_hss) {
- rc = -EPROTO;
- goto out;
- }
- *req_hss = *hss;
-
- ptlrpc_request_set_replen(req);
-
- mdc_get_mod_rpc_slot(req, NULL);
- rc = ptlrpc_queue_wait(req);
- mdc_put_mod_rpc_slot(req, NULL);
-out:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-static int mdc_ioc_hsm_request(struct obd_export *exp,
- struct hsm_user_request *hur)
-{
- struct obd_import *imp = class_exp2cliimp(exp);
- struct ptlrpc_request *req;
- struct hsm_request *req_hr;
- struct hsm_user_item *req_hui;
- char *req_opaque;
- int rc;
-
- req = ptlrpc_request_alloc(imp, &RQF_MDS_HSM_REQUEST);
- if (!req) {
- rc = -ENOMEM;
- goto out;
- }
-
- req_capsule_set_size(&req->rq_pill, &RMF_MDS_HSM_USER_ITEM, RCL_CLIENT,
- hur->hur_request.hr_itemcount
- * sizeof(struct hsm_user_item));
- req_capsule_set_size(&req->rq_pill, &RMF_GENERIC_DATA, RCL_CLIENT,
- hur->hur_request.hr_data_len);
-
- rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_HSM_REQUEST);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- mdc_pack_body(req, NULL, 0, 0, -1, 0);
-
- /* Copy hsm_request struct */
- req_hr = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_REQUEST);
- if (!req_hr) {
- rc = -EPROTO;
- goto out;
- }
- *req_hr = hur->hur_request;
-
- /* Copy hsm_user_item structs */
- req_hui = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_USER_ITEM);
- if (!req_hui) {
- rc = -EPROTO;
- goto out;
- }
- memcpy(req_hui, hur->hur_user_item,
- hur->hur_request.hr_itemcount * sizeof(struct hsm_user_item));
-
- /* Copy opaque field */
- req_opaque = req_capsule_client_get(&req->rq_pill, &RMF_GENERIC_DATA);
- if (!req_opaque) {
- rc = -EPROTO;
- goto out;
- }
- memcpy(req_opaque, hur_data(hur), hur->hur_request.hr_data_len);
-
- ptlrpc_request_set_replen(req);
-
- mdc_get_mod_rpc_slot(req, NULL);
- rc = ptlrpc_queue_wait(req);
- mdc_put_mod_rpc_slot(req, NULL);
-out:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-static struct kuc_hdr *changelog_kuc_hdr(char *buf, size_t len, u32 flags)
-{
- struct kuc_hdr *lh = (struct kuc_hdr *)buf;
-
- LASSERT(len <= KUC_CHANGELOG_MSG_MAXSIZE);
-
- lh->kuc_magic = KUC_MAGIC;
- lh->kuc_transport = KUC_TRANSPORT_CHANGELOG;
- lh->kuc_flags = flags;
- lh->kuc_msgtype = CL_RECORD;
- lh->kuc_msglen = len;
- return lh;
-}
-
-struct changelog_show {
- __u64 cs_startrec;
- enum changelog_send_flag cs_flags;
- struct file *cs_fp;
- char *cs_buf;
- struct obd_device *cs_obd;
-};
-
-static inline char *cs_obd_name(struct changelog_show *cs)
-{
- return cs->cs_obd->obd_name;
-}
-
-static int changelog_kkuc_cb(const struct lu_env *env, struct llog_handle *llh,
- struct llog_rec_hdr *hdr, void *data)
-{
- struct changelog_show *cs = data;
- struct llog_changelog_rec *rec = (struct llog_changelog_rec *)hdr;
- struct kuc_hdr *lh;
- size_t len;
- int rc;
-
- if (rec->cr_hdr.lrh_type != CHANGELOG_REC) {
- rc = -EINVAL;
- CERROR("%s: not a changelog rec %x/%d: rc = %d\n",
- cs_obd_name(cs), rec->cr_hdr.lrh_type,
- rec->cr.cr_type, rc);
- return rc;
- }
-
- if (rec->cr.cr_index < cs->cs_startrec) {
- /* Skip entries earlier than what we are interested in */
- CDEBUG(D_HSM, "rec=%llu start=%llu\n",
- rec->cr.cr_index, cs->cs_startrec);
- return 0;
- }
-
- CDEBUG(D_HSM, "%llu %02d%-5s %llu 0x%x t=" DFID " p=" DFID
- " %.*s\n", rec->cr.cr_index, rec->cr.cr_type,
- changelog_type2str(rec->cr.cr_type), rec->cr.cr_time,
- rec->cr.cr_flags & CLF_FLAGMASK,
- PFID(&rec->cr.cr_tfid), PFID(&rec->cr.cr_pfid),
- rec->cr.cr_namelen, changelog_rec_name(&rec->cr));
-
- len = sizeof(*lh) + changelog_rec_size(&rec->cr) + rec->cr.cr_namelen;
-
- /* Set up the message */
- lh = changelog_kuc_hdr(cs->cs_buf, len, cs->cs_flags);
- memcpy(lh + 1, &rec->cr, len - sizeof(*lh));
-
- rc = libcfs_kkuc_msg_put(cs->cs_fp, lh);
- CDEBUG(D_HSM, "kucmsg fp %p len %zu rc %d\n", cs->cs_fp, len, rc);
-
- return rc;
-}
-
-static int mdc_changelog_send_thread(void *csdata)
-{
- enum llog_flag flags = LLOG_F_IS_CAT;
- struct changelog_show *cs = csdata;
- struct llog_ctxt *ctxt = NULL;
- struct llog_handle *llh = NULL;
- struct kuc_hdr *kuch;
- int rc;
-
- CDEBUG(D_HSM, "changelog to fp=%p start %llu\n",
- cs->cs_fp, cs->cs_startrec);
-
- cs->cs_buf = kzalloc(KUC_CHANGELOG_MSG_MAXSIZE, GFP_NOFS);
- if (!cs->cs_buf) {
- rc = -ENOMEM;
- goto out;
- }
-
- /* Set up the remote catalog handle */
- ctxt = llog_get_context(cs->cs_obd, LLOG_CHANGELOG_REPL_CTXT);
- if (!ctxt) {
- rc = -ENOENT;
- goto out;
- }
- rc = llog_open(NULL, ctxt, &llh, NULL, CHANGELOG_CATALOG,
- LLOG_OPEN_EXISTS);
- if (rc) {
- CERROR("%s: fail to open changelog catalog: rc = %d\n",
- cs_obd_name(cs), rc);
- goto out;
- }
-
- if (cs->cs_flags & CHANGELOG_FLAG_JOBID)
- flags |= LLOG_F_EXT_JOBID;
-
- rc = llog_init_handle(NULL, llh, flags, NULL);
- if (rc) {
- CERROR("llog_init_handle failed %d\n", rc);
- goto out;
- }
-
- rc = llog_cat_process(NULL, llh, changelog_kkuc_cb, cs, 0, 0);
-
- /* Send EOF no matter what our result */
- kuch = changelog_kuc_hdr(cs->cs_buf, sizeof(*kuch), cs->cs_flags);
- kuch->kuc_msgtype = CL_EOF;
- libcfs_kkuc_msg_put(cs->cs_fp, kuch);
-
-out:
- fput(cs->cs_fp);
- if (llh)
- llog_cat_close(NULL, llh);
- if (ctxt)
- llog_ctxt_put(ctxt);
- kfree(cs->cs_buf);
- kfree(cs);
- return rc;
-}
-
-static int mdc_ioc_changelog_send(struct obd_device *obd,
- struct ioc_changelog *icc)
-{
- struct changelog_show *cs;
- struct task_struct *task;
- int rc;
-
- /* Freed in mdc_changelog_send_thread */
- cs = kzalloc(sizeof(*cs), GFP_NOFS);
- if (!cs)
- return -ENOMEM;
-
- cs->cs_obd = obd;
- cs->cs_startrec = icc->icc_recno;
- /* matching fput in mdc_changelog_send_thread */
- cs->cs_fp = fget(icc->icc_id);
- cs->cs_flags = icc->icc_flags;
-
- /*
- * New thread because we should return to user app before
- * writing into our pipe
- */
- task = kthread_run(mdc_changelog_send_thread, cs,
- "mdc_clg_send_thread");
- if (IS_ERR(task)) {
- rc = PTR_ERR(task);
- CERROR("%s: can't start changelog thread: rc = %d\n",
- cs_obd_name(cs), rc);
- kfree(cs);
- } else {
- rc = 0;
- CDEBUG(D_HSM, "%s: started changelog thread\n",
- cs_obd_name(cs));
- }
-
- CERROR("Failed to start changelog thread: %d\n", rc);
- return rc;
-}
-
-static int mdc_ioc_hsm_ct_start(struct obd_export *exp,
- struct lustre_kernelcomm *lk);
-
-static int mdc_quotactl(struct obd_device *unused, struct obd_export *exp,
- struct obd_quotactl *oqctl)
-{
- struct ptlrpc_request *req;
- struct obd_quotactl *oqc;
- int rc;
-
- req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
- &RQF_MDS_QUOTACTL, LUSTRE_MDS_VERSION,
- MDS_QUOTACTL);
- if (!req)
- return -ENOMEM;
-
- oqc = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL);
- *oqc = *oqctl;
-
- ptlrpc_request_set_replen(req);
- ptlrpc_at_set_req_timeout(req);
- req->rq_no_resend = 1;
-
- rc = ptlrpc_queue_wait(req);
- if (rc)
- CERROR("ptlrpc_queue_wait failed, rc: %d\n", rc);
-
- if (req->rq_repmsg) {
- oqc = req_capsule_server_get(&req->rq_pill, &RMF_OBD_QUOTACTL);
- if (oqc) {
- *oqctl = *oqc;
- } else if (!rc) {
- CERROR("Can't unpack obd_quotactl\n");
- rc = -EPROTO;
- }
- } else if (!rc) {
- CERROR("Can't unpack obd_quotactl\n");
- rc = -EPROTO;
- }
- ptlrpc_req_finished(req);
-
- return rc;
-}
-
-static int mdc_ioc_swap_layouts(struct obd_export *exp,
- struct md_op_data *op_data)
-{
- LIST_HEAD(cancels);
- struct ptlrpc_request *req;
- int rc, count;
- struct mdc_swap_layouts *msl, *payload;
-
- msl = op_data->op_data;
-
- /* When the MDT will get the MDS_SWAP_LAYOUTS RPC the
- * first thing it will do is to cancel the 2 layout
- * locks hold by this client.
- * So the client must cancel its layout locks on the 2 fids
- * with the request RPC to avoid extra RPC round trips
- */
- count = mdc_resource_get_unused(exp, &op_data->op_fid1, &cancels,
- LCK_CR, MDS_INODELOCK_LAYOUT |
- MDS_INODELOCK_XATTR);
- count += mdc_resource_get_unused(exp, &op_data->op_fid2, &cancels,
- LCK_CR, MDS_INODELOCK_LAYOUT |
- MDS_INODELOCK_XATTR);
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- &RQF_MDS_SWAP_LAYOUTS);
- if (!req) {
- ldlm_lock_list_put(&cancels, l_bl_ast, count);
- return -ENOMEM;
- }
-
- rc = mdc_prep_elc_req(exp, req, MDS_SWAP_LAYOUTS, &cancels, count);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- mdc_swap_layouts_pack(req, op_data);
-
- payload = req_capsule_client_get(&req->rq_pill, &RMF_SWAP_LAYOUTS);
- LASSERT(payload);
-
- *payload = *msl;
-
- ptlrpc_request_set_replen(req);
-
- rc = ptlrpc_queue_wait(req);
-
- ptlrpc_req_finished(req);
- return rc;
-}
-
-static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
- void *karg, void __user *uarg)
-{
- struct obd_device *obd = exp->exp_obd;
- struct obd_ioctl_data *data = karg;
- struct obd_import *imp = obd->u.cli.cl_import;
- int rc;
-
- if (!try_module_get(THIS_MODULE)) {
- CERROR("%s: cannot get module '%s'\n", obd->obd_name,
- module_name(THIS_MODULE));
- return -EINVAL;
- }
- switch (cmd) {
- case OBD_IOC_CHANGELOG_SEND:
- rc = mdc_ioc_changelog_send(obd, karg);
- goto out;
- case OBD_IOC_CHANGELOG_CLEAR: {
- struct ioc_changelog *icc = karg;
- struct changelog_setinfo cs = {
- .cs_recno = icc->icc_recno,
- .cs_id = icc->icc_id
- };
-
- rc = obd_set_info_async(NULL, exp, strlen(KEY_CHANGELOG_CLEAR),
- KEY_CHANGELOG_CLEAR, sizeof(cs), &cs,
- NULL);
- goto out;
- }
- case OBD_IOC_FID2PATH:
- rc = mdc_ioc_fid2path(exp, karg);
- goto out;
- case LL_IOC_HSM_CT_START:
- rc = mdc_ioc_hsm_ct_start(exp, karg);
- /* ignore if it was already registered on this MDS. */
- if (rc == -EEXIST)
- rc = 0;
- goto out;
- case LL_IOC_HSM_PROGRESS:
- rc = mdc_ioc_hsm_progress(exp, karg);
- goto out;
- case LL_IOC_HSM_STATE_GET:
- rc = mdc_ioc_hsm_state_get(exp, karg);
- goto out;
- case LL_IOC_HSM_STATE_SET:
- rc = mdc_ioc_hsm_state_set(exp, karg);
- goto out;
- case LL_IOC_HSM_ACTION:
- rc = mdc_ioc_hsm_current_action(exp, karg);
- goto out;
- case LL_IOC_HSM_REQUEST:
- rc = mdc_ioc_hsm_request(exp, karg);
- goto out;
- case OBD_IOC_CLIENT_RECOVER:
- rc = ptlrpc_recover_import(imp, data->ioc_inlbuf1, 0);
- if (rc < 0)
- goto out;
- rc = 0;
- goto out;
- case IOC_OSC_SET_ACTIVE:
- rc = ptlrpc_set_import_active(imp, data->ioc_offset);
- goto out;
- case OBD_IOC_PING_TARGET:
- rc = ptlrpc_obd_ping(obd);
- goto out;
- /*
- * Normally IOC_OBD_STATFS, OBD_IOC_QUOTACTL iocontrol are handled by
- * LMV instead of MDC. But when the cluster is upgraded from 1.8,
- * there'd be no LMV layer thus we might be called here. Eventually
- * this code should be removed.
- * bz20731, LU-592.
- */
- case IOC_OBD_STATFS: {
- struct obd_statfs stat_buf = {0};
-
- if (*((__u32 *)data->ioc_inlbuf2) != 0) {
- rc = -ENODEV;
- goto out;
- }
-
- /* copy UUID */
- if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(obd),
- min_t(size_t, data->ioc_plen2,
- sizeof(struct obd_uuid)))) {
- rc = -EFAULT;
- goto out;
- }
-
- rc = mdc_statfs(NULL, obd->obd_self_export, &stat_buf,
- get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ,
- 0);
- if (rc != 0)
- goto out;
-
- if (copy_to_user(data->ioc_pbuf1, &stat_buf,
- min_t(size_t, data->ioc_plen1,
- sizeof(stat_buf)))) {
- rc = -EFAULT;
- goto out;
- }
-
- rc = 0;
- goto out;
- }
- case OBD_IOC_QUOTACTL: {
- struct if_quotactl *qctl = karg;
- struct obd_quotactl *oqctl;
-
- oqctl = kzalloc(sizeof(*oqctl), GFP_NOFS);
- if (!oqctl) {
- rc = -ENOMEM;
- goto out;
- }
-
- QCTL_COPY(oqctl, qctl);
- rc = obd_quotactl(exp, oqctl);
- if (rc == 0) {
- QCTL_COPY(qctl, oqctl);
- qctl->qc_valid = QC_MDTIDX;
- qctl->obd_uuid = obd->u.cli.cl_target_uuid;
- }
-
- kfree(oqctl);
- goto out;
- }
- case LL_IOC_GET_CONNECT_FLAGS:
- if (copy_to_user(uarg, exp_connect_flags_ptr(exp),
- sizeof(*exp_connect_flags_ptr(exp)))) {
- rc = -EFAULT;
- goto out;
- }
-
- rc = 0;
- goto out;
- case LL_IOC_LOV_SWAP_LAYOUTS:
- rc = mdc_ioc_swap_layouts(exp, karg);
- goto out;
- default:
- CERROR("unrecognised ioctl: cmd = %#x\n", cmd);
- rc = -ENOTTY;
- goto out;
- }
-out:
- module_put(THIS_MODULE);
-
- return rc;
-}
-
-static int mdc_get_info_rpc(struct obd_export *exp,
- u32 keylen, void *key,
- int vallen, void *val)
-{
- struct obd_import *imp = class_exp2cliimp(exp);
- struct ptlrpc_request *req;
- char *tmp;
- int rc = -EINVAL;
-
- req = ptlrpc_request_alloc(imp, &RQF_MDS_GET_INFO);
- if (!req)
- return -ENOMEM;
-
- req_capsule_set_size(&req->rq_pill, &RMF_GETINFO_KEY,
- RCL_CLIENT, keylen);
- req_capsule_set_size(&req->rq_pill, &RMF_GETINFO_VALLEN,
- RCL_CLIENT, sizeof(__u32));
-
- rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_GET_INFO);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_GETINFO_KEY);
- memcpy(tmp, key, keylen);
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_GETINFO_VALLEN);
- memcpy(tmp, &vallen, sizeof(__u32));
-
- req_capsule_set_size(&req->rq_pill, &RMF_GETINFO_VAL,
- RCL_SERVER, vallen);
- ptlrpc_request_set_replen(req);
-
- rc = ptlrpc_queue_wait(req);
- /* -EREMOTE means the get_info result is partial, and it needs to
- * continue on another MDT, see fid2path part in lmv_iocontrol
- */
- if (rc == 0 || rc == -EREMOTE) {
- tmp = req_capsule_server_get(&req->rq_pill, &RMF_GETINFO_VAL);
- memcpy(val, tmp, vallen);
- if (ptlrpc_rep_need_swab(req)) {
- if (KEY_IS(KEY_FID2PATH))
- lustre_swab_fid2path(val);
- }
- }
- ptlrpc_req_finished(req);
-
- return rc;
-}
-
-static void lustre_swab_hai(struct hsm_action_item *h)
-{
- __swab32s(&h->hai_len);
- __swab32s(&h->hai_action);
- lustre_swab_lu_fid(&h->hai_fid);
- lustre_swab_lu_fid(&h->hai_dfid);
- __swab64s(&h->hai_cookie);
- __swab64s(&h->hai_extent.offset);
- __swab64s(&h->hai_extent.length);
- __swab64s(&h->hai_gid);
-}
-
-static void lustre_swab_hal(struct hsm_action_list *h)
-{
- struct hsm_action_item *hai;
- u32 i;
-
- __swab32s(&h->hal_version);
- __swab32s(&h->hal_count);
- __swab32s(&h->hal_archive_id);
- __swab64s(&h->hal_flags);
- hai = hai_first(h);
- for (i = 0; i < h->hal_count; i++, hai = hai_next(hai))
- lustre_swab_hai(hai);
-}
-
-static void lustre_swab_kuch(struct kuc_hdr *l)
-{
- __swab16s(&l->kuc_magic);
- /* __u8 l->kuc_transport */
- __swab16s(&l->kuc_msgtype);
- __swab16s(&l->kuc_msglen);
-}
-
-static int mdc_ioc_hsm_ct_start(struct obd_export *exp,
- struct lustre_kernelcomm *lk)
-{
- struct obd_import *imp = class_exp2cliimp(exp);
- __u32 archive = lk->lk_data;
- int rc = 0;
-
- if (lk->lk_group != KUC_GRP_HSM) {
- CERROR("Bad copytool group %d\n", lk->lk_group);
- return -EINVAL;
- }
-
- CDEBUG(D_HSM, "CT start r%d w%d u%d g%d f%#x\n", lk->lk_rfd, lk->lk_wfd,
- lk->lk_uid, lk->lk_group, lk->lk_flags);
-
- if (lk->lk_flags & LK_FLG_STOP) {
- /* Unregister with the coordinator */
- rc = mdc_ioc_hsm_ct_unregister(imp);
- } else {
- rc = mdc_ioc_hsm_ct_register(imp, archive);
- }
-
- return rc;
-}
-
-/**
- * Send a message to any listening copytools
- * @param val KUC message (kuc_hdr + hsm_action_list)
- * @param len total length of message
- */
-static int mdc_hsm_copytool_send(size_t len, void *val)
-{
- struct kuc_hdr *lh = (struct kuc_hdr *)val;
- struct hsm_action_list *hal = (struct hsm_action_list *)(lh + 1);
-
- if (len < sizeof(*lh) + sizeof(*hal)) {
- CERROR("Short HSM message %zu < %zu\n", len,
- sizeof(*lh) + sizeof(*hal));
- return -EPROTO;
- }
- if (lh->kuc_magic == __swab16(KUC_MAGIC)) {
- lustre_swab_kuch(lh);
- lustre_swab_hal(hal);
- } else if (lh->kuc_magic != KUC_MAGIC) {
- CERROR("Bad magic %x!=%x\n", lh->kuc_magic, KUC_MAGIC);
- return -EPROTO;
- }
-
- CDEBUG(D_HSM,
- "Received message mg=%x t=%d m=%d l=%d actions=%d on %s\n",
- lh->kuc_magic, lh->kuc_transport, lh->kuc_msgtype,
- lh->kuc_msglen, hal->hal_count, hal->hal_fsname);
-
- /* Broadcast to HSM listeners */
- return libcfs_kkuc_group_put(KUC_GRP_HSM, lh);
-}
-
-/**
- * callback function passed to kuc for re-registering each HSM copytool
- * running on MDC, after MDT shutdown/recovery.
- * @param data copytool registration data
- * @param cb_arg callback argument (obd_import)
- */
-static int mdc_hsm_ct_reregister(void *data, void *cb_arg)
-{
- struct kkuc_ct_data *kcd = data;
- struct obd_import *imp = (struct obd_import *)cb_arg;
- int rc;
-
- if (!kcd || kcd->kcd_magic != KKUC_CT_DATA_MAGIC)
- return -EPROTO;
-
- if (!obd_uuid_equals(&kcd->kcd_uuid, &imp->imp_obd->obd_uuid))
- return 0;
-
- CDEBUG(D_HA, "%s: recover copytool registration to MDT (archive=%#x)\n",
- imp->imp_obd->obd_name, kcd->kcd_archive);
- rc = mdc_ioc_hsm_ct_register(imp, kcd->kcd_archive);
-
- /* ignore error if the copytool is already registered */
- return (rc == -EEXIST) ? 0 : rc;
-}
-
-static int mdc_set_info_async(const struct lu_env *env,
- struct obd_export *exp,
- u32 keylen, void *key,
- u32 vallen, void *val,
- struct ptlrpc_request_set *set)
-{
- struct obd_import *imp = class_exp2cliimp(exp);
- int rc;
-
- if (KEY_IS(KEY_READ_ONLY)) {
- if (vallen != sizeof(int))
- return -EINVAL;
-
- spin_lock(&imp->imp_lock);
- if (*((int *)val)) {
- imp->imp_connect_flags_orig |= OBD_CONNECT_RDONLY;
- imp->imp_connect_data.ocd_connect_flags |=
- OBD_CONNECT_RDONLY;
- } else {
- imp->imp_connect_flags_orig &= ~OBD_CONNECT_RDONLY;
- imp->imp_connect_data.ocd_connect_flags &=
- ~OBD_CONNECT_RDONLY;
- }
- spin_unlock(&imp->imp_lock);
-
- return do_set_info_async(imp, MDS_SET_INFO, LUSTRE_MDS_VERSION,
- keylen, key, vallen, val, set);
- }
- if (KEY_IS(KEY_SPTLRPC_CONF)) {
- sptlrpc_conf_client_adapt(exp->exp_obd);
- return 0;
- }
- if (KEY_IS(KEY_FLUSH_CTX)) {
- sptlrpc_import_flush_my_ctx(imp);
- return 0;
- }
- if (KEY_IS(KEY_CHANGELOG_CLEAR)) {
- rc = do_set_info_async(imp, MDS_SET_INFO, LUSTRE_MDS_VERSION,
- keylen, key, vallen, val, set);
- return rc;
- }
- if (KEY_IS(KEY_HSM_COPYTOOL_SEND)) {
- rc = mdc_hsm_copytool_send(vallen, val);
- return rc;
- }
- if (KEY_IS(KEY_DEFAULT_EASIZE)) {
- u32 *default_easize = val;
-
- exp->exp_obd->u.cli.cl_default_mds_easize = *default_easize;
- return 0;
- }
-
- CERROR("Unknown key %s\n", (char *)key);
- return -EINVAL;
-}
-
-static int mdc_get_info(const struct lu_env *env, struct obd_export *exp,
- __u32 keylen, void *key, __u32 *vallen, void *val)
-{
- int rc = -EINVAL;
-
- if (KEY_IS(KEY_MAX_EASIZE)) {
- u32 mdsize, *max_easize;
-
- if (*vallen != sizeof(int))
- return -EINVAL;
- mdsize = *(u32 *)val;
- if (mdsize > exp->exp_obd->u.cli.cl_max_mds_easize)
- exp->exp_obd->u.cli.cl_max_mds_easize = mdsize;
- max_easize = val;
- *max_easize = exp->exp_obd->u.cli.cl_max_mds_easize;
- return 0;
- } else if (KEY_IS(KEY_DEFAULT_EASIZE)) {
- u32 *default_easize;
-
- if (*vallen != sizeof(int))
- return -EINVAL;
- default_easize = val;
- *default_easize = exp->exp_obd->u.cli.cl_default_mds_easize;
- return 0;
- } else if (KEY_IS(KEY_CONN_DATA)) {
- struct obd_import *imp = class_exp2cliimp(exp);
- struct obd_connect_data *data = val;
-
- if (*vallen != sizeof(*data))
- return -EINVAL;
-
- *data = imp->imp_connect_data;
- return 0;
- } else if (KEY_IS(KEY_TGT_COUNT)) {
- *((u32 *)val) = 1;
- return 0;
- }
-
- rc = mdc_get_info_rpc(exp, keylen, key, *vallen, val);
-
- return rc;
-}
-
-static int mdc_sync(struct obd_export *exp, const struct lu_fid *fid,
- struct ptlrpc_request **request)
-{
- struct ptlrpc_request *req;
- int rc;
-
- *request = NULL;
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_SYNC);
- if (!req)
- return -ENOMEM;
-
- rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_SYNC);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- mdc_pack_body(req, fid, 0, 0, -1, 0);
-
- ptlrpc_request_set_replen(req);
-
- rc = ptlrpc_queue_wait(req);
- if (rc)
- ptlrpc_req_finished(req);
- else
- *request = req;
- return rc;
-}
-
-static int mdc_import_event(struct obd_device *obd, struct obd_import *imp,
- enum obd_import_event event)
-{
- int rc = 0;
-
- LASSERT(imp->imp_obd == obd);
-
- switch (event) {
- case IMP_EVENT_INACTIVE: {
- struct client_obd *cli = &obd->u.cli;
- /*
- * Flush current sequence to make client obtain new one
- * from server in case of disconnect/reconnect.
- */
- if (cli->cl_seq)
- seq_client_flush(cli->cl_seq);
-
- rc = obd_notify_observer(obd, obd, OBD_NOTIFY_INACTIVE, NULL);
- break;
- }
- case IMP_EVENT_INVALIDATE: {
- struct ldlm_namespace *ns = obd->obd_namespace;
-
- ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
-
- break;
- }
- case IMP_EVENT_ACTIVE:
- rc = obd_notify_observer(obd, obd, OBD_NOTIFY_ACTIVE, NULL);
- /* redo the kuc registration after reconnecting */
- if (rc == 0)
- /* re-register HSM agents */
- rc = libcfs_kkuc_group_foreach(KUC_GRP_HSM,
- mdc_hsm_ct_reregister,
- (void *)imp);
- break;
- case IMP_EVENT_OCD:
- rc = obd_notify_observer(obd, obd, OBD_NOTIFY_OCD, NULL);
- break;
- case IMP_EVENT_DISCON:
- case IMP_EVENT_DEACTIVATE:
- case IMP_EVENT_ACTIVATE:
- break;
- default:
- CERROR("Unknown import event %x\n", event);
- LBUG();
- }
- return rc;
-}
-
-int mdc_fid_alloc(const struct lu_env *env, struct obd_export *exp,
- struct lu_fid *fid, struct md_op_data *op_data)
-{
- struct client_obd *cli = &exp->exp_obd->u.cli;
- struct lu_client_seq *seq = cli->cl_seq;
-
- return seq_client_alloc_fid(env, seq, fid);
-}
-
-static struct obd_uuid *mdc_get_uuid(struct obd_export *exp)
-{
- struct client_obd *cli = &exp->exp_obd->u.cli;
-
- return &cli->cl_target_uuid;
-}
-
-/**
- * Determine whether the lock can be canceled before replaying it during
- * recovery, non zero value will be return if the lock can be canceled,
- * or zero returned for not
- */
-static int mdc_cancel_weight(struct ldlm_lock *lock)
-{
- if (lock->l_resource->lr_type != LDLM_IBITS)
- return 0;
-
- /* FIXME: if we ever get into a situation where there are too many
- * opened files with open locks on a single node, then we really
- * should replay these open locks to reget it
- */
- if (lock->l_policy_data.l_inodebits.bits & MDS_INODELOCK_OPEN)
- return 0;
-
- return 1;
-}
-
-static int mdc_resource_inode_free(struct ldlm_resource *res)
-{
- if (res->lr_lvb_inode)
- res->lr_lvb_inode = NULL;
-
- return 0;
-}
-
-static struct ldlm_valblock_ops inode_lvbo = {
- .lvbo_free = mdc_resource_inode_free,
-};
-
-static int mdc_llog_init(struct obd_device *obd)
-{
- struct obd_llog_group *olg = &obd->obd_olg;
- struct llog_ctxt *ctxt;
- int rc;
-
- rc = llog_setup(NULL, obd, olg, LLOG_CHANGELOG_REPL_CTXT, obd,
- &llog_client_ops);
- if (rc)
- return rc;
-
- ctxt = llog_group_get_ctxt(olg, LLOG_CHANGELOG_REPL_CTXT);
- llog_initiator_connect(ctxt);
- llog_ctxt_put(ctxt);
-
- return 0;
-}
-
-static void mdc_llog_finish(struct obd_device *obd)
-{
- struct llog_ctxt *ctxt;
-
- ctxt = llog_get_context(obd, LLOG_CHANGELOG_REPL_CTXT);
- if (ctxt)
- llog_cleanup(NULL, ctxt);
-}
-
-static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg)
-{
- struct lprocfs_static_vars lvars = { NULL };
- int rc;
-
- rc = ptlrpcd_addref();
- if (rc < 0)
- return rc;
-
- rc = client_obd_setup(obd, cfg);
- if (rc)
- goto err_ptlrpcd_decref;
-
- lprocfs_mdc_init_vars(&lvars);
- lprocfs_obd_setup(obd, lvars.obd_vars, lvars.sysfs_vars);
- sptlrpc_lprocfs_cliobd_attach(obd);
- ptlrpc_lprocfs_register_obd(obd);
-
- ns_register_cancel(obd->obd_namespace, mdc_cancel_weight);
-
- obd->obd_namespace->ns_lvbo = &inode_lvbo;
-
- rc = mdc_llog_init(obd);
- if (rc) {
- mdc_cleanup(obd);
- CERROR("failed to setup llogging subsystems\n");
- return rc;
- }
-
- return rc;
-
-err_ptlrpcd_decref:
- ptlrpcd_decref();
- return rc;
-}
-
-/* Initialize the default and maximum LOV EA sizes. This allows
- * us to make MDS RPCs with large enough reply buffers to hold a default
- * sized EA without having to calculate this (via a call into the
- * LOV + OSCs) each time we make an RPC. The maximum size is also tracked
- * but not used to avoid wastefully vmalloc()'ing large reply buffers when
- * a large number of stripes is possible. If a larger reply buffer is
- * required it will be reallocated in the ptlrpc layer due to overflow.
- */
-static int mdc_init_ea_size(struct obd_export *exp, u32 easize, u32 def_easize)
-{
- struct obd_device *obd = exp->exp_obd;
- struct client_obd *cli = &obd->u.cli;
-
- if (cli->cl_max_mds_easize < easize)
- cli->cl_max_mds_easize = easize;
-
- if (cli->cl_default_mds_easize < def_easize)
- cli->cl_default_mds_easize = def_easize;
-
- return 0;
-}
-
-static int mdc_precleanup(struct obd_device *obd)
-{
- /* Failsafe, ok if racy */
- if (obd->obd_type->typ_refcnt <= 1)
- libcfs_kkuc_group_rem(0, KUC_GRP_HSM);
-
- obd_cleanup_client_import(obd);
- ptlrpc_lprocfs_unregister_obd(obd);
- lprocfs_obd_cleanup(obd);
- mdc_llog_finish(obd);
- return 0;
-}
-
-static int mdc_cleanup(struct obd_device *obd)
-{
- ptlrpcd_decref();
-
- return client_obd_cleanup(obd);
-}
-
-static int mdc_process_config(struct obd_device *obd, u32 len, void *buf)
-{
- struct lustre_cfg *lcfg = buf;
- struct lprocfs_static_vars lvars = { NULL };
- int rc = 0;
-
- lprocfs_mdc_init_vars(&lvars);
- switch (lcfg->lcfg_command) {
- default:
- rc = class_process_proc_param(PARAM_MDC, lvars.obd_vars,
- lcfg, obd);
- if (rc > 0)
- rc = 0;
- break;
- }
- return rc;
-}
-
-static struct obd_ops mdc_obd_ops = {
- .owner = THIS_MODULE,
- .setup = mdc_setup,
- .precleanup = mdc_precleanup,
- .cleanup = mdc_cleanup,
- .add_conn = client_import_add_conn,
- .del_conn = client_import_del_conn,
- .connect = client_connect_import,
- .disconnect = client_disconnect_export,
- .iocontrol = mdc_iocontrol,
- .set_info_async = mdc_set_info_async,
- .statfs = mdc_statfs,
- .fid_init = client_fid_init,
- .fid_fini = client_fid_fini,
- .fid_alloc = mdc_fid_alloc,
- .import_event = mdc_import_event,
- .get_info = mdc_get_info,
- .process_config = mdc_process_config,
- .get_uuid = mdc_get_uuid,
- .quotactl = mdc_quotactl,
-};
-
-static struct md_ops mdc_md_ops = {
- .getstatus = mdc_getstatus,
- .null_inode = mdc_null_inode,
- .close = mdc_close,
- .create = mdc_create,
- .enqueue = mdc_enqueue,
- .getattr = mdc_getattr,
- .getattr_name = mdc_getattr_name,
- .intent_lock = mdc_intent_lock,
- .link = mdc_link,
- .rename = mdc_rename,
- .setattr = mdc_setattr,
- .setxattr = mdc_setxattr,
- .getxattr = mdc_getxattr,
- .sync = mdc_sync,
- .read_page = mdc_read_page,
- .unlink = mdc_unlink,
- .cancel_unused = mdc_cancel_unused,
- .init_ea_size = mdc_init_ea_size,
- .set_lock_data = mdc_set_lock_data,
- .lock_match = mdc_lock_match,
- .get_lustre_md = mdc_get_lustre_md,
- .free_lustre_md = mdc_free_lustre_md,
- .set_open_replay_data = mdc_set_open_replay_data,
- .clear_open_replay_data = mdc_clear_open_replay_data,
- .intent_getattr_async = mdc_intent_getattr_async,
- .revalidate_lock = mdc_revalidate_lock
-};
-
-static int __init mdc_init(void)
-{
- struct lprocfs_static_vars lvars = { NULL };
- int rc;
-
- rc = libcfs_setup();
- if (rc)
- return rc;
-
- lprocfs_mdc_init_vars(&lvars);
-
- return class_register_type(&mdc_obd_ops, &mdc_md_ops,
- LUSTRE_MDC_NAME, NULL);
-}
-
-static void /*__exit*/ mdc_exit(void)
-{
- class_unregister_type(LUSTRE_MDC_NAME);
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre Metadata Client");
-MODULE_VERSION(LUSTRE_VERSION_STRING);
-MODULE_LICENSE("GPL");
-
-module_init(mdc_init);
-module_exit(mdc_exit);
diff --git a/drivers/staging/lustre/lustre/mgc/Makefile b/drivers/staging/lustre/lustre/mgc/Makefile
deleted file mode 100644
index 8abf108dbcf7..000000000000
--- a/drivers/staging/lustre/lustre/mgc/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LUSTRE_FS) += mgc.o
-mgc-y := mgc_request.o lproc_mgc.o
diff --git a/drivers/staging/lustre/lustre/mgc/lproc_mgc.c b/drivers/staging/lustre/lustre/mgc/lproc_mgc.c
deleted file mode 100644
index 636770624e8f..000000000000
--- a/drivers/staging/lustre/lustre/mgc/lproc_mgc.c
+++ /dev/null
@@ -1,69 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-#define DEBUG_SUBSYSTEM S_CLASS
-
-#include <linux/vfs.h>
-#include <obd_class.h>
-#include <lprocfs_status.h>
-#include "mgc_internal.h"
-
-LPROC_SEQ_FOPS_RO_TYPE(mgc, connect_flags);
-LPROC_SEQ_FOPS_RO_TYPE(mgc, server_uuid);
-LPROC_SEQ_FOPS_RO_TYPE(mgc, conn_uuid);
-LPROC_SEQ_FOPS_RO_TYPE(mgc, import);
-LPROC_SEQ_FOPS_RO_TYPE(mgc, state);
-
-LPROC_SEQ_FOPS_WR_ONLY(mgc, ping);
-
-static int mgc_ir_state_seq_show(struct seq_file *m, void *v)
-{
- return lprocfs_mgc_rd_ir_state(m, m->private);
-}
-
-LPROC_SEQ_FOPS_RO(mgc_ir_state);
-
-static struct lprocfs_vars lprocfs_mgc_obd_vars[] = {
- { "ping", &mgc_ping_fops, NULL, 0222 },
- { "connect_flags", &mgc_connect_flags_fops, NULL, 0 },
- { "mgs_server_uuid", &mgc_server_uuid_fops, NULL, 0 },
- { "mgs_conn_uuid", &mgc_conn_uuid_fops, NULL, 0 },
- { "import", &mgc_import_fops, NULL, 0 },
- { "state", &mgc_state_fops, NULL, 0 },
- { "ir_state", &mgc_ir_state_fops, NULL, 0 },
- { NULL }
-};
-
-void lprocfs_mgc_init_vars(struct lprocfs_static_vars *lvars)
-{
- lvars->obd_vars = lprocfs_mgc_obd_vars;
-}
diff --git a/drivers/staging/lustre/lustre/mgc/mgc_internal.h b/drivers/staging/lustre/lustre/mgc/mgc_internal.h
deleted file mode 100644
index 9541892b67c7..000000000000
--- a/drivers/staging/lustre/lustre/mgc/mgc_internal.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef _MGC_INTERNAL_H
-#define _MGC_INTERNAL_H
-
-#include <lustre_lib.h>
-#include <lustre_dlm.h>
-#include <lustre_log.h>
-#include <lustre_export.h>
-
-void lprocfs_mgc_init_vars(struct lprocfs_static_vars *lvars);
-int lprocfs_mgc_rd_ir_state(struct seq_file *m, void *data);
-
-int mgc_process_log(struct obd_device *mgc, struct config_llog_data *cld);
-
-static inline int cld_is_sptlrpc(struct config_llog_data *cld)
-{
- return cld->cld_type == CONFIG_T_SPTLRPC;
-}
-
-static inline int cld_is_recover(struct config_llog_data *cld)
-{
- return cld->cld_type == CONFIG_T_RECOVER;
-}
-
-#endif /* _MGC_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/mgc/mgc_request.c b/drivers/staging/lustre/lustre/mgc/mgc_request.c
deleted file mode 100644
index 32df804614d3..000000000000
--- a/drivers/staging/lustre/lustre/mgc/mgc_request.c
+++ /dev/null
@@ -1,1851 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/mgc/mgc_request.c
- *
- * Author: Nathan Rutman <nathan@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_MGC
-#define D_MGC D_CONFIG /*|D_WARNING*/
-
-#include <linux/module.h>
-#include <linux/random.h>
-
-#include <lprocfs_status.h>
-#include <lustre_dlm.h>
-#include <lustre_disk.h>
-#include <lustre_log.h>
-#include <lustre_swab.h>
-#include <obd_class.h>
-
-#include "mgc_internal.h"
-
-static int mgc_name2resid(char *name, int len, struct ldlm_res_id *res_id,
- int type)
-{
- __u64 resname = 0;
-
- if (len > sizeof(resname)) {
- CERROR("name too long: %s\n", name);
- return -EINVAL;
- }
- if (len <= 0) {
- CERROR("missing name: %s\n", name);
- return -EINVAL;
- }
- memcpy(&resname, name, len);
-
- /* Always use the same endianness for the resid */
- memset(res_id, 0, sizeof(*res_id));
- res_id->name[0] = cpu_to_le64(resname);
- /* XXX: unfortunately, sptlprc and config llog share one lock */
- switch (type) {
- case CONFIG_T_CONFIG:
- case CONFIG_T_SPTLRPC:
- resname = 0;
- break;
- case CONFIG_T_RECOVER:
- case CONFIG_T_PARAMS:
- resname = type;
- break;
- default:
- LBUG();
- }
- res_id->name[1] = cpu_to_le64(resname);
- CDEBUG(D_MGC, "log %s to resid %#llx/%#llx (%.8s)\n", name,
- res_id->name[0], res_id->name[1], (char *)&res_id->name[0]);
- return 0;
-}
-
-int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id, int type)
-{
- /* fsname is at most 8 chars long, maybe contain "-".
- * e.g. "lustre", "SUN-000"
- */
- return mgc_name2resid(fsname, strlen(fsname), res_id, type);
-}
-EXPORT_SYMBOL(mgc_fsname2resid);
-
-static int mgc_logname2resid(char *logname, struct ldlm_res_id *res_id, int type)
-{
- char *name_end;
- int len;
-
- /* logname consists of "fsname-nodetype".
- * e.g. "lustre-MDT0001", "SUN-000-client"
- * there is an exception: llog "params"
- */
- name_end = strrchr(logname, '-');
- if (!name_end)
- len = strlen(logname);
- else
- len = name_end - logname;
- return mgc_name2resid(logname, len, res_id, type);
-}
-
-/********************** config llog list **********************/
-static LIST_HEAD(config_llog_list);
-static DEFINE_SPINLOCK(config_list_lock);
-
-/* Take a reference to a config log */
-static int config_log_get(struct config_llog_data *cld)
-{
- atomic_inc(&cld->cld_refcount);
- CDEBUG(D_INFO, "log %s refs %d\n", cld->cld_logname,
- atomic_read(&cld->cld_refcount));
- return 0;
-}
-
-/* Drop a reference to a config log. When no longer referenced,
- * we can free the config log data
- */
-static void config_log_put(struct config_llog_data *cld)
-{
- CDEBUG(D_INFO, "log %s refs %d\n", cld->cld_logname,
- atomic_read(&cld->cld_refcount));
- LASSERT(atomic_read(&cld->cld_refcount) > 0);
-
- /* spinlock to make sure no item with 0 refcount in the list */
- if (atomic_dec_and_lock(&cld->cld_refcount, &config_list_lock)) {
- list_del(&cld->cld_list_chain);
- spin_unlock(&config_list_lock);
-
- CDEBUG(D_MGC, "dropping config log %s\n", cld->cld_logname);
-
- if (cld->cld_recover)
- config_log_put(cld->cld_recover);
- if (cld->cld_params)
- config_log_put(cld->cld_params);
- if (cld->cld_sptlrpc)
- config_log_put(cld->cld_sptlrpc);
- if (cld_is_sptlrpc(cld))
- sptlrpc_conf_log_stop(cld->cld_logname);
-
- class_export_put(cld->cld_mgcexp);
- kfree(cld);
- }
-}
-
-/* Find a config log by name */
-static
-struct config_llog_data *config_log_find(char *logname,
- struct config_llog_instance *cfg)
-{
- struct config_llog_data *cld;
- struct config_llog_data *found = NULL;
- void *instance;
-
- LASSERT(logname);
-
- instance = cfg ? cfg->cfg_instance : NULL;
- spin_lock(&config_list_lock);
- list_for_each_entry(cld, &config_llog_list, cld_list_chain) {
- /* check if instance equals */
- if (instance != cld->cld_cfg.cfg_instance)
- continue;
-
- /* instance may be NULL, should check name */
- if (strcmp(logname, cld->cld_logname) == 0) {
- found = cld;
- config_log_get(found);
- break;
- }
- }
- spin_unlock(&config_list_lock);
- return found;
-}
-
-static
-struct config_llog_data *do_config_log_add(struct obd_device *obd,
- char *logname,
- int type,
- struct config_llog_instance *cfg,
- struct super_block *sb)
-{
- struct config_llog_data *cld;
- int rc;
-
- CDEBUG(D_MGC, "do adding config log %s:%p\n", logname,
- cfg ? cfg->cfg_instance : NULL);
-
- cld = kzalloc(sizeof(*cld) + strlen(logname) + 1, GFP_NOFS);
- if (!cld)
- return ERR_PTR(-ENOMEM);
-
- rc = mgc_logname2resid(logname, &cld->cld_resid, type);
- if (rc) {
- kfree(cld);
- return ERR_PTR(rc);
- }
-
- strcpy(cld->cld_logname, logname);
- if (cfg)
- cld->cld_cfg = *cfg;
- else
- cld->cld_cfg.cfg_callback = class_config_llog_handler;
- mutex_init(&cld->cld_lock);
- cld->cld_cfg.cfg_last_idx = 0;
- cld->cld_cfg.cfg_flags = 0;
- cld->cld_cfg.cfg_sb = sb;
- cld->cld_type = type;
- atomic_set(&cld->cld_refcount, 1);
-
- /* Keep the mgc around until we are done */
- cld->cld_mgcexp = class_export_get(obd->obd_self_export);
-
- if (cld_is_sptlrpc(cld)) {
- sptlrpc_conf_log_start(logname);
- cld->cld_cfg.cfg_obdname = obd->obd_name;
- }
-
- spin_lock(&config_list_lock);
- list_add(&cld->cld_list_chain, &config_llog_list);
- spin_unlock(&config_list_lock);
-
- if (cld_is_sptlrpc(cld)) {
- rc = mgc_process_log(obd, cld);
- if (rc && rc != -ENOENT)
- CERROR("failed processing sptlrpc log: %d\n", rc);
- }
-
- return cld;
-}
-
-static struct config_llog_data *
-config_recover_log_add(struct obd_device *obd, char *fsname,
- struct config_llog_instance *cfg,
- struct super_block *sb)
-{
- struct config_llog_instance lcfg = *cfg;
- struct config_llog_data *cld;
- char logname[32];
-
- /* we have to use different llog for clients and mdts for cmd
- * where only clients are notified if one of cmd server restarts
- */
- LASSERT(strlen(fsname) < sizeof(logname) / 2);
- strcpy(logname, fsname);
- LASSERT(lcfg.cfg_instance);
- strcat(logname, "-cliir");
-
- cld = do_config_log_add(obd, logname, CONFIG_T_RECOVER, &lcfg, sb);
- return cld;
-}
-
-static struct config_llog_data *
-config_params_log_add(struct obd_device *obd,
- struct config_llog_instance *cfg, struct super_block *sb)
-{
- struct config_llog_instance lcfg = *cfg;
- struct config_llog_data *cld;
-
- lcfg.cfg_instance = sb;
-
- cld = do_config_log_add(obd, PARAMS_FILENAME, CONFIG_T_PARAMS,
- &lcfg, sb);
-
- return cld;
-}
-
-/** Add this log to the list of active logs watched by an MGC.
- * Active means we're watching for updates.
- * We have one active log per "mount" - client instance or servername.
- * Each instance may be at a different point in the log.
- */
-static struct config_llog_data *
-config_log_add(struct obd_device *obd, char *logname,
- struct config_llog_instance *cfg, struct super_block *sb)
-{
- struct lustre_sb_info *lsi = s2lsi(sb);
- struct config_llog_data *cld;
- struct config_llog_data *sptlrpc_cld;
- struct config_llog_data *params_cld;
- struct config_llog_data *recover_cld = NULL;
- char seclogname[32];
- char *ptr;
- int rc;
-
- CDEBUG(D_MGC, "adding config log %s:%p\n", logname, cfg->cfg_instance);
-
- /*
- * for each regular log, the depended sptlrpc log name is
- * <fsname>-sptlrpc. multiple regular logs may share one sptlrpc log.
- */
- ptr = strrchr(logname, '-');
- if (!ptr || ptr - logname > 8) {
- CERROR("logname %s is too long\n", logname);
- return ERR_PTR(-EINVAL);
- }
-
- memcpy(seclogname, logname, ptr - logname);
- strcpy(seclogname + (ptr - logname), "-sptlrpc");
-
- sptlrpc_cld = config_log_find(seclogname, NULL);
- if (!sptlrpc_cld) {
- sptlrpc_cld = do_config_log_add(obd, seclogname,
- CONFIG_T_SPTLRPC, NULL, NULL);
- if (IS_ERR(sptlrpc_cld)) {
- CERROR("can't create sptlrpc log: %s\n", seclogname);
- rc = PTR_ERR(sptlrpc_cld);
- goto out_err;
- }
- }
- params_cld = config_params_log_add(obd, cfg, sb);
- if (IS_ERR(params_cld)) {
- rc = PTR_ERR(params_cld);
- CERROR("%s: can't create params log: rc = %d\n",
- obd->obd_name, rc);
- goto out_sptlrpc;
- }
-
- cld = do_config_log_add(obd, logname, CONFIG_T_CONFIG, cfg, sb);
- if (IS_ERR(cld)) {
- CERROR("can't create log: %s\n", logname);
- rc = PTR_ERR(cld);
- goto out_params;
- }
-
- LASSERT(lsi->lsi_lmd);
- if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)) {
- ptr = strrchr(seclogname, '-');
- if (ptr) {
- *ptr = 0;
- } else {
- CERROR("%s: sptlrpc log name not correct, %s: rc = %d\n",
- obd->obd_name, seclogname, -EINVAL);
- rc = -EINVAL;
- goto out_cld;
- }
- recover_cld = config_recover_log_add(obd, seclogname, cfg, sb);
- if (IS_ERR(recover_cld)) {
- rc = PTR_ERR(recover_cld);
- goto out_cld;
- }
- }
-
- mutex_lock(&cld->cld_lock);
- cld->cld_recover = recover_cld;
- cld->cld_params = params_cld;
- cld->cld_sptlrpc = sptlrpc_cld;
- mutex_unlock(&cld->cld_lock);
-
- return cld;
-
-out_cld:
- config_log_put(cld);
-
-out_params:
- config_log_put(params_cld);
-
-out_sptlrpc:
- config_log_put(sptlrpc_cld);
-
-out_err:
- return ERR_PTR(rc);
-}
-
-static DEFINE_MUTEX(llog_process_lock);
-
-static inline void config_mark_cld_stop(struct config_llog_data *cld)
-{
- mutex_lock(&cld->cld_lock);
- spin_lock(&config_list_lock);
- cld->cld_stopping = 1;
- spin_unlock(&config_list_lock);
- mutex_unlock(&cld->cld_lock);
-}
-
-/** Stop watching for updates on this log.
- */
-static int config_log_end(char *logname, struct config_llog_instance *cfg)
-{
- struct config_llog_data *cld;
- struct config_llog_data *cld_sptlrpc = NULL;
- struct config_llog_data *cld_params = NULL;
- struct config_llog_data *cld_recover = NULL;
- int rc = 0;
-
- cld = config_log_find(logname, cfg);
- if (!cld)
- return -ENOENT;
-
- mutex_lock(&cld->cld_lock);
- /*
- * if cld_stopping is set, it means we didn't start the log thus
- * not owning the start ref. this can happen after previous umount:
- * the cld still hanging there waiting for lock cancel, and we
- * remount again but failed in the middle and call log_end without
- * calling start_log.
- */
- if (unlikely(cld->cld_stopping)) {
- mutex_unlock(&cld->cld_lock);
- /* drop the ref from the find */
- config_log_put(cld);
- return rc;
- }
-
- spin_lock(&config_list_lock);
- cld->cld_stopping = 1;
- spin_unlock(&config_list_lock);
-
- cld_recover = cld->cld_recover;
- cld->cld_recover = NULL;
-
- cld_params = cld->cld_params;
- cld->cld_params = NULL;
- cld_sptlrpc = cld->cld_sptlrpc;
- cld->cld_sptlrpc = NULL;
- mutex_unlock(&cld->cld_lock);
-
- if (cld_recover) {
- config_mark_cld_stop(cld_recover);
- config_log_put(cld_recover);
- }
-
- if (cld_params) {
- config_mark_cld_stop(cld_params);
- config_log_put(cld_params);
- }
-
- if (cld_sptlrpc)
- config_log_put(cld_sptlrpc);
-
- /* drop the ref from the find */
- config_log_put(cld);
- /* drop the start ref */
- config_log_put(cld);
-
- CDEBUG(D_MGC, "end config log %s (%d)\n", logname ? logname : "client",
- rc);
- return rc;
-}
-
-int lprocfs_mgc_rd_ir_state(struct seq_file *m, void *data)
-{
- struct obd_device *obd = data;
- struct obd_import *imp;
- struct obd_connect_data *ocd;
- struct config_llog_data *cld;
- int rc;
-
- rc = lprocfs_climp_check(obd);
- if (rc)
- return rc;
-
- imp = obd->u.cli.cl_import;
- ocd = &imp->imp_connect_data;
-
- seq_printf(m, "imperative_recovery: %s\n",
- OCD_HAS_FLAG(ocd, IMP_RECOV) ? "ENABLED" : "DISABLED");
- seq_printf(m, "client_state:\n");
-
- spin_lock(&config_list_lock);
- list_for_each_entry(cld, &config_llog_list, cld_list_chain) {
- if (!cld->cld_recover)
- continue;
- seq_printf(m, " - { client: %s, nidtbl_version: %u }\n",
- cld->cld_logname,
- cld->cld_recover->cld_cfg.cfg_last_idx);
- }
- spin_unlock(&config_list_lock);
-
- up_read(&obd->u.cli.cl_sem);
- return 0;
-}
-
-/* reenqueue any lost locks */
-#define RQ_RUNNING 0x1
-#define RQ_NOW 0x2
-#define RQ_LATER 0x4
-#define RQ_STOP 0x8
-#define RQ_PRECLEANUP 0x10
-static int rq_state;
-static wait_queue_head_t rq_waitq;
-static DECLARE_COMPLETION(rq_exit);
-static DECLARE_COMPLETION(rq_start);
-
-static void do_requeue(struct config_llog_data *cld)
-{
- LASSERT(atomic_read(&cld->cld_refcount) > 0);
-
- /* Do not run mgc_process_log on a disconnected export or an
- * export which is being disconnected. Take the client
- * semaphore to make the check non-racy.
- */
- down_read_nested(&cld->cld_mgcexp->exp_obd->u.cli.cl_sem,
- OBD_CLI_SEM_MGC);
-
- if (cld->cld_mgcexp->exp_obd->u.cli.cl_conn_count != 0) {
- int rc;
-
- CDEBUG(D_MGC, "updating log %s\n", cld->cld_logname);
- rc = mgc_process_log(cld->cld_mgcexp->exp_obd, cld);
- if (rc && rc != -ENOENT)
- CERROR("failed processing log: %d\n", rc);
- } else {
- CDEBUG(D_MGC, "disconnecting, won't update log %s\n",
- cld->cld_logname);
- }
- up_read(&cld->cld_mgcexp->exp_obd->u.cli.cl_sem);
-}
-
-/* this timeout represents how many seconds MGC should wait before
- * requeue config and recover lock to the MGS. We need to randomize this
- * in order to not flood the MGS.
- */
-#define MGC_TIMEOUT_MIN_SECONDS 5
-#define MGC_TIMEOUT_RAND_CENTISEC 500
-
-static int mgc_requeue_thread(void *data)
-{
- bool first = true;
-
- CDEBUG(D_MGC, "Starting requeue thread\n");
-
- /* Keep trying failed locks periodically */
- spin_lock(&config_list_lock);
- rq_state |= RQ_RUNNING;
- while (!(rq_state & RQ_STOP)) {
- struct config_llog_data *cld, *cld_prev;
- int rand = prandom_u32_max(MGC_TIMEOUT_RAND_CENTISEC);
- int to;
-
- /* Any new or requeued lostlocks will change the state */
- rq_state &= ~(RQ_NOW | RQ_LATER);
- spin_unlock(&config_list_lock);
-
- if (first) {
- first = false;
- complete(&rq_start);
- }
-
- /* Always wait a few seconds to allow the server who
- * caused the lock revocation to finish its setup, plus some
- * random so everyone doesn't try to reconnect at once.
- */
- to = msecs_to_jiffies(MGC_TIMEOUT_MIN_SECONDS * MSEC_PER_SEC);
- /* rand is centi-seconds */
- to += msecs_to_jiffies(rand * MSEC_PER_SEC / 100);
- wait_event_idle_timeout(rq_waitq,
- rq_state & (RQ_STOP | RQ_PRECLEANUP),
- to);
-
- /*
- * iterate & processing through the list. for each cld, process
- * its depending sptlrpc cld firstly (if any) and then itself.
- *
- * it's guaranteed any item in the list must have
- * reference > 0; and if cld_lostlock is set, at
- * least one reference is taken by the previous enqueue.
- */
- cld_prev = NULL;
-
- spin_lock(&config_list_lock);
- rq_state &= ~RQ_PRECLEANUP;
- list_for_each_entry(cld, &config_llog_list, cld_list_chain) {
- if (!cld->cld_lostlock || cld->cld_stopping)
- continue;
-
- /*
- * hold reference to avoid being freed during
- * subsequent processing.
- */
- config_log_get(cld);
- cld->cld_lostlock = 0;
- spin_unlock(&config_list_lock);
-
- if (cld_prev)
- config_log_put(cld_prev);
- cld_prev = cld;
-
- if (likely(!(rq_state & RQ_STOP))) {
- do_requeue(cld);
- spin_lock(&config_list_lock);
- } else {
- spin_lock(&config_list_lock);
- break;
- }
- }
- spin_unlock(&config_list_lock);
- if (cld_prev)
- config_log_put(cld_prev);
-
- /* Wait a bit to see if anyone else needs a requeue */
- wait_event_idle(rq_waitq, rq_state & (RQ_NOW | RQ_STOP));
- spin_lock(&config_list_lock);
- }
-
- /* spinlock and while guarantee RQ_NOW and RQ_LATER are not set */
- rq_state &= ~RQ_RUNNING;
- spin_unlock(&config_list_lock);
-
- complete(&rq_exit);
-
- CDEBUG(D_MGC, "Ending requeue thread\n");
- return 0;
-}
-
-/* Add a cld to the list to requeue. Start the requeue thread if needed.
- * We are responsible for dropping the config log reference from here on out.
- */
-static void mgc_requeue_add(struct config_llog_data *cld)
-{
- bool wakeup = false;
-
- CDEBUG(D_INFO, "log %s: requeue (r=%d sp=%d st=%x)\n",
- cld->cld_logname, atomic_read(&cld->cld_refcount),
- cld->cld_stopping, rq_state);
- LASSERT(atomic_read(&cld->cld_refcount) > 0);
-
- mutex_lock(&cld->cld_lock);
- spin_lock(&config_list_lock);
- if (!(rq_state & RQ_STOP) && !cld->cld_stopping && !cld->cld_lostlock) {
- cld->cld_lostlock = 1;
- rq_state |= RQ_NOW;
- wakeup = true;
- }
- spin_unlock(&config_list_lock);
- mutex_unlock(&cld->cld_lock);
- if (wakeup)
- wake_up(&rq_waitq);
-}
-
-static int mgc_llog_init(const struct lu_env *env, struct obd_device *obd)
-{
- struct llog_ctxt *ctxt;
- int rc;
-
- /* setup only remote ctxt, the local disk context is switched per each
- * filesystem during mgc_fs_setup()
- */
- rc = llog_setup(env, obd, &obd->obd_olg, LLOG_CONFIG_REPL_CTXT, obd,
- &llog_client_ops);
- if (rc)
- return rc;
-
- ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT);
- LASSERT(ctxt);
-
- llog_initiator_connect(ctxt);
- llog_ctxt_put(ctxt);
-
- return 0;
-}
-
-static int mgc_llog_fini(const struct lu_env *env, struct obd_device *obd)
-{
- struct llog_ctxt *ctxt;
-
- ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT);
- if (ctxt)
- llog_cleanup(env, ctxt);
-
- return 0;
-}
-
-static atomic_t mgc_count = ATOMIC_INIT(0);
-static int mgc_precleanup(struct obd_device *obd)
-{
- int rc = 0;
- int temp;
-
- if (atomic_dec_and_test(&mgc_count)) {
- LASSERT(rq_state & RQ_RUNNING);
- /* stop requeue thread */
- temp = RQ_STOP;
- } else {
- /* wakeup requeue thread to clean our cld */
- temp = RQ_NOW | RQ_PRECLEANUP;
- }
-
- spin_lock(&config_list_lock);
- rq_state |= temp;
- spin_unlock(&config_list_lock);
- wake_up(&rq_waitq);
-
- if (temp & RQ_STOP)
- wait_for_completion(&rq_exit);
- obd_cleanup_client_import(obd);
-
- rc = mgc_llog_fini(NULL, obd);
- if (rc)
- CERROR("failed to cleanup llogging subsystems\n");
-
- return rc;
-}
-
-static int mgc_cleanup(struct obd_device *obd)
-{
- /* COMPAT_146 - old config logs may have added profiles we don't
- * know about
- */
- if (obd->obd_type->typ_refcnt <= 1)
- /* Only for the last mgc */
- class_del_profiles();
-
- lprocfs_obd_cleanup(obd);
- ptlrpcd_decref();
-
- return client_obd_cleanup(obd);
-}
-
-static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
-{
- struct lprocfs_static_vars lvars = { NULL };
- struct task_struct *task;
- int rc;
-
- rc = ptlrpcd_addref();
- if (rc < 0)
- goto err_noref;
-
- rc = client_obd_setup(obd, lcfg);
- if (rc)
- goto err_decref;
-
- rc = mgc_llog_init(NULL, obd);
- if (rc) {
- CERROR("failed to setup llogging subsystems\n");
- goto err_cleanup;
- }
-
- lprocfs_mgc_init_vars(&lvars);
- lprocfs_obd_setup(obd, lvars.obd_vars, lvars.sysfs_vars);
- sptlrpc_lprocfs_cliobd_attach(obd);
-
- if (atomic_inc_return(&mgc_count) == 1) {
- rq_state = 0;
- init_waitqueue_head(&rq_waitq);
-
- /* start requeue thread */
- task = kthread_run(mgc_requeue_thread, NULL, "ll_cfg_requeue");
- if (IS_ERR(task)) {
- rc = PTR_ERR(task);
- CERROR("%s: cannot start requeue thread: rc = %d; no more log updates\n",
- obd->obd_name, rc);
- goto err_cleanup;
- }
- /* rc is the task_struct pointer of mgc_requeue_thread. */
- rc = 0;
- wait_for_completion(&rq_start);
- }
-
- return rc;
-
-err_cleanup:
- client_obd_cleanup(obd);
-err_decref:
- ptlrpcd_decref();
-err_noref:
- return rc;
-}
-
-/* based on ll_mdc_blocking_ast */
-static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
- void *data, int flag)
-{
- struct lustre_handle lockh;
- struct config_llog_data *cld = data;
- int rc = 0;
-
- switch (flag) {
- case LDLM_CB_BLOCKING:
- /* mgs wants the lock, give it up... */
- LDLM_DEBUG(lock, "MGC blocking CB");
- ldlm_lock2handle(lock, &lockh);
- rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
- break;
- case LDLM_CB_CANCELING:
- /* We've given up the lock, prepare ourselves to update. */
- LDLM_DEBUG(lock, "MGC cancel CB");
-
- CDEBUG(D_MGC, "Lock res " DLDLMRES " (%.8s)\n",
- PLDLMRES(lock->l_resource),
- (char *)&lock->l_resource->lr_name.name[0]);
-
- if (!cld) {
- CDEBUG(D_INFO, "missing data, won't requeue\n");
- break;
- }
-
- /* held at mgc_process_log(). */
- LASSERT(atomic_read(&cld->cld_refcount) > 0);
-
- lock->l_ast_data = NULL;
- /* Are we done with this log? */
- if (cld->cld_stopping) {
- CDEBUG(D_MGC, "log %s: stopping, won't requeue\n",
- cld->cld_logname);
- config_log_put(cld);
- break;
- }
- /* Make sure not to re-enqueue when the mgc is stopping
- * (we get called from client_disconnect_export)
- */
- if (!lock->l_conn_export ||
- !lock->l_conn_export->exp_obd->u.cli.cl_conn_count) {
- CDEBUG(D_MGC, "log %.8s: disconnecting, won't requeue\n",
- cld->cld_logname);
- config_log_put(cld);
- break;
- }
-
- /* Re-enqueue now */
- mgc_requeue_add(cld);
- config_log_put(cld);
- break;
- default:
- LBUG();
- }
-
- return rc;
-}
-
-/* Not sure where this should go... */
-/* This is the timeout value for MGS_CONNECT request plus a ping interval, such
- * that we can have a chance to try the secondary MGS if any.
- */
-#define MGC_ENQUEUE_LIMIT (INITIAL_CONNECT_TIMEOUT + (AT_OFF ? 0 : at_min) \
- + PING_INTERVAL)
-#define MGC_TARGET_REG_LIMIT 10
-#define MGC_SEND_PARAM_LIMIT 10
-
-/* Send parameter to MGS*/
-static int mgc_set_mgs_param(struct obd_export *exp,
- struct mgs_send_param *msp)
-{
- struct ptlrpc_request *req;
- struct mgs_send_param *req_msp, *rep_msp;
- int rc;
-
- req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
- &RQF_MGS_SET_INFO, LUSTRE_MGS_VERSION,
- MGS_SET_INFO);
- if (!req)
- return -ENOMEM;
-
- req_msp = req_capsule_client_get(&req->rq_pill, &RMF_MGS_SEND_PARAM);
- if (!req_msp) {
- ptlrpc_req_finished(req);
- return -ENOMEM;
- }
-
- memcpy(req_msp, msp, sizeof(*req_msp));
- ptlrpc_request_set_replen(req);
-
- /* Limit how long we will wait for the enqueue to complete */
- req->rq_delay_limit = MGC_SEND_PARAM_LIMIT;
- rc = ptlrpc_queue_wait(req);
- if (!rc) {
- rep_msp = req_capsule_server_get(&req->rq_pill, &RMF_MGS_SEND_PARAM);
- memcpy(msp, rep_msp, sizeof(*rep_msp));
- }
-
- ptlrpc_req_finished(req);
-
- return rc;
-}
-
-/* Take a config lock so we can get cancel notifications */
-static int mgc_enqueue(struct obd_export *exp, __u32 type,
- union ldlm_policy_data *policy, __u32 mode,
- __u64 *flags, void *bl_cb, void *cp_cb, void *gl_cb,
- void *data, __u32 lvb_len, void *lvb_swabber,
- struct lustre_handle *lockh)
-{
- struct config_llog_data *cld = data;
- struct ldlm_enqueue_info einfo = {
- .ei_type = type,
- .ei_mode = mode,
- .ei_cb_bl = mgc_blocking_ast,
- .ei_cb_cp = ldlm_completion_ast,
- };
- struct ptlrpc_request *req;
- int short_limit = cld_is_sptlrpc(cld);
- int rc;
-
- CDEBUG(D_MGC, "Enqueue for %s (res %#llx)\n", cld->cld_logname,
- cld->cld_resid.name[0]);
-
- /* We need a callback for every lockholder, so don't try to
- * ldlm_lock_match (see rev 1.1.2.11.2.47)
- */
- req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
- &RQF_LDLM_ENQUEUE, LUSTRE_DLM_VERSION,
- LDLM_ENQUEUE);
- if (!req)
- return -ENOMEM;
-
- req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER, 0);
- ptlrpc_request_set_replen(req);
-
- /* Limit how long we will wait for the enqueue to complete */
- req->rq_delay_limit = short_limit ? 5 : MGC_ENQUEUE_LIMIT;
- rc = ldlm_cli_enqueue(exp, &req, &einfo, &cld->cld_resid, NULL, flags,
- NULL, 0, LVB_T_NONE, lockh, 0);
- /* A failed enqueue should still call the mgc_blocking_ast,
- * where it will be requeued if needed ("grant failed").
- */
- ptlrpc_req_finished(req);
- return rc;
-}
-
-static void mgc_notify_active(struct obd_device *unused)
-{
- /* wakeup mgc_requeue_thread to requeue mgc lock */
- spin_lock(&config_list_lock);
- rq_state |= RQ_NOW;
- spin_unlock(&config_list_lock);
- wake_up(&rq_waitq);
-
- /* TODO: Help the MGS rebuild nidtbl. -jay */
-}
-
-/* Send target_reg message to MGS */
-static int mgc_target_register(struct obd_export *exp,
- struct mgs_target_info *mti)
-{
- struct ptlrpc_request *req;
- struct mgs_target_info *req_mti, *rep_mti;
- int rc;
-
- req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
- &RQF_MGS_TARGET_REG, LUSTRE_MGS_VERSION,
- MGS_TARGET_REG);
- if (!req)
- return -ENOMEM;
-
- req_mti = req_capsule_client_get(&req->rq_pill, &RMF_MGS_TARGET_INFO);
- if (!req_mti) {
- ptlrpc_req_finished(req);
- return -ENOMEM;
- }
-
- memcpy(req_mti, mti, sizeof(*req_mti));
- ptlrpc_request_set_replen(req);
- CDEBUG(D_MGC, "register %s\n", mti->mti_svname);
- /* Limit how long we will wait for the enqueue to complete */
- req->rq_delay_limit = MGC_TARGET_REG_LIMIT;
-
- rc = ptlrpc_queue_wait(req);
- if (!rc) {
- rep_mti = req_capsule_server_get(&req->rq_pill,
- &RMF_MGS_TARGET_INFO);
- memcpy(mti, rep_mti, sizeof(*rep_mti));
- CDEBUG(D_MGC, "register %s got index = %d\n",
- mti->mti_svname, mti->mti_stripe_index);
- }
- ptlrpc_req_finished(req);
-
- return rc;
-}
-
-static int mgc_set_info_async(const struct lu_env *env, struct obd_export *exp,
- u32 keylen, void *key, u32 vallen,
- void *val, struct ptlrpc_request_set *set)
-{
- int rc = -EINVAL;
-
- /* Turn off initial_recov after we try all backup servers once */
- if (KEY_IS(KEY_INIT_RECOV_BACKUP)) {
- struct obd_import *imp = class_exp2cliimp(exp);
- int value;
-
- if (vallen != sizeof(int))
- return -EINVAL;
- value = *(int *)val;
- CDEBUG(D_MGC, "InitRecov %s %d/d%d:i%d:r%d:or%d:%s\n",
- imp->imp_obd->obd_name, value,
- imp->imp_deactive, imp->imp_invalid,
- imp->imp_replayable, imp->imp_obd->obd_replayable,
- ptlrpc_import_state_name(imp->imp_state));
- /* Resurrect if we previously died */
- if ((imp->imp_state != LUSTRE_IMP_FULL &&
- imp->imp_state != LUSTRE_IMP_NEW) || value > 1)
- ptlrpc_reconnect_import(imp);
- return 0;
- }
- if (KEY_IS(KEY_SET_INFO)) {
- struct mgs_send_param *msp;
-
- msp = val;
- rc = mgc_set_mgs_param(exp, msp);
- return rc;
- }
- if (KEY_IS(KEY_MGSSEC)) {
- struct client_obd *cli = &exp->exp_obd->u.cli;
- struct sptlrpc_flavor flvr;
-
- /*
- * empty string means using current flavor, if which haven't
- * been set yet, set it as null.
- *
- * if flavor has been set previously, check the asking flavor
- * must match the existing one.
- */
- if (vallen == 0) {
- if (cli->cl_flvr_mgc.sf_rpc != SPTLRPC_FLVR_INVALID)
- return 0;
- val = "null";
- vallen = 4;
- }
-
- rc = sptlrpc_parse_flavor(val, &flvr);
- if (rc) {
- CERROR("invalid sptlrpc flavor %s to MGS\n",
- (char *)val);
- return rc;
- }
-
- /*
- * caller already hold a mutex
- */
- if (cli->cl_flvr_mgc.sf_rpc == SPTLRPC_FLVR_INVALID) {
- cli->cl_flvr_mgc = flvr;
- } else if (memcmp(&cli->cl_flvr_mgc, &flvr,
- sizeof(flvr)) != 0) {
- char str[20];
-
- sptlrpc_flavor2name(&cli->cl_flvr_mgc,
- str, sizeof(str));
- LCONSOLE_ERROR("asking sptlrpc flavor %s to MGS but currently %s is in use\n",
- (char *)val, str);
- rc = -EPERM;
- }
- return rc;
- }
-
- return rc;
-}
-
-static int mgc_get_info(const struct lu_env *env, struct obd_export *exp,
- __u32 keylen, void *key, __u32 *vallen, void *val)
-{
- int rc = -EINVAL;
-
- if (KEY_IS(KEY_CONN_DATA)) {
- struct obd_import *imp = class_exp2cliimp(exp);
- struct obd_connect_data *data = val;
-
- if (*vallen == sizeof(*data)) {
- *data = imp->imp_connect_data;
- rc = 0;
- }
- }
-
- return rc;
-}
-
-static int mgc_import_event(struct obd_device *obd,
- struct obd_import *imp,
- enum obd_import_event event)
-{
- LASSERT(imp->imp_obd == obd);
- CDEBUG(D_MGC, "import event %#x\n", event);
-
- switch (event) {
- case IMP_EVENT_DISCON:
- /* MGC imports should not wait for recovery */
- if (OCD_HAS_FLAG(&imp->imp_connect_data, IMP_RECOV))
- ptlrpc_pinger_ir_down();
- break;
- case IMP_EVENT_INACTIVE:
- break;
- case IMP_EVENT_INVALIDATE: {
- struct ldlm_namespace *ns = obd->obd_namespace;
-
- ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
- break;
- }
- case IMP_EVENT_ACTIVE:
- CDEBUG(D_INFO, "%s: Reactivating import\n", obd->obd_name);
- /* Clearing obd_no_recov allows us to continue pinging */
- obd->obd_no_recov = 0;
- mgc_notify_active(obd);
- if (OCD_HAS_FLAG(&imp->imp_connect_data, IMP_RECOV))
- ptlrpc_pinger_ir_up();
- break;
- case IMP_EVENT_OCD:
- break;
- case IMP_EVENT_DEACTIVATE:
- case IMP_EVENT_ACTIVATE:
- break;
- default:
- CERROR("Unknown import event %#x\n", event);
- LBUG();
- }
- return 0;
-}
-
-enum {
- CONFIG_READ_NRPAGES_INIT = 1 << (20 - PAGE_SHIFT),
- CONFIG_READ_NRPAGES = 4
-};
-
-static int mgc_apply_recover_logs(struct obd_device *mgc,
- struct config_llog_data *cld,
- __u64 max_version,
- void *data, int datalen, bool mne_swab)
-{
- struct config_llog_instance *cfg = &cld->cld_cfg;
- struct mgs_nidtbl_entry *entry;
- struct lustre_cfg *lcfg;
- struct lustre_cfg_bufs bufs;
- u64 prev_version = 0;
- char *inst;
- char *buf;
- int bufsz;
- int pos;
- int rc = 0;
- int off = 0;
-
- LASSERT(cfg->cfg_instance);
- LASSERT(cfg->cfg_sb == cfg->cfg_instance);
-
- inst = kzalloc(PAGE_SIZE, GFP_KERNEL);
- if (!inst)
- return -ENOMEM;
-
- pos = snprintf(inst, PAGE_SIZE, "%p", cfg->cfg_instance);
- if (pos >= PAGE_SIZE) {
- kfree(inst);
- return -E2BIG;
- }
-
- ++pos;
- buf = inst + pos;
- bufsz = PAGE_SIZE - pos;
-
- while (datalen > 0) {
- int entry_len = sizeof(*entry);
- int is_ost, i;
- struct obd_device *obd;
- char *obdname;
- char *cname;
- char *params;
- char *uuid;
- size_t len;
-
- rc = -EINVAL;
- if (datalen < sizeof(*entry))
- break;
-
- entry = (typeof(entry))(data + off);
-
- /* sanity check */
- if (entry->mne_nid_type != 0) /* only support type 0 for ipv4 */
- break;
- if (entry->mne_nid_count == 0) /* at least one nid entry */
- break;
- if (entry->mne_nid_size != sizeof(lnet_nid_t))
- break;
-
- entry_len += entry->mne_nid_count * entry->mne_nid_size;
- if (datalen < entry_len) /* must have entry_len at least */
- break;
-
- /* Keep this swab for normal mixed endian handling. LU-1644 */
- if (mne_swab)
- lustre_swab_mgs_nidtbl_entry(entry);
- if (entry->mne_length > PAGE_SIZE) {
- CERROR("MNE too large (%u)\n", entry->mne_length);
- break;
- }
-
- if (entry->mne_length < entry_len)
- break;
-
- off += entry->mne_length;
- datalen -= entry->mne_length;
- if (datalen < 0)
- break;
-
- if (entry->mne_version > max_version) {
- CERROR("entry index(%lld) is over max_index(%lld)\n",
- entry->mne_version, max_version);
- break;
- }
-
- if (prev_version >= entry->mne_version) {
- CERROR("index unsorted, prev %lld, now %lld\n",
- prev_version, entry->mne_version);
- break;
- }
- prev_version = entry->mne_version;
-
- /*
- * Write a string with format "nid::instance" to
- * lustre/<osc|mdc>/<target>-<osc|mdc>-<instance>/import.
- */
-
- is_ost = entry->mne_type == LDD_F_SV_TYPE_OST;
- memset(buf, 0, bufsz);
- obdname = buf;
- pos = 0;
-
- /* lustre-OST0001-osc-<instance #> */
- strcpy(obdname, cld->cld_logname);
- cname = strrchr(obdname, '-');
- if (!cname) {
- CERROR("mgc %s: invalid logname %s\n",
- mgc->obd_name, obdname);
- break;
- }
-
- pos = cname - obdname;
- obdname[pos] = 0;
- pos += sprintf(obdname + pos, "-%s%04x",
- is_ost ? "OST" : "MDT", entry->mne_index);
-
- cname = is_ost ? "osc" : "mdc";
- pos += sprintf(obdname + pos, "-%s-%s", cname, inst);
- lustre_cfg_bufs_reset(&bufs, obdname);
-
- /* find the obd by obdname */
- obd = class_name2obd(obdname);
- if (!obd) {
- CDEBUG(D_INFO, "mgc %s: cannot find obdname %s\n",
- mgc->obd_name, obdname);
- rc = 0;
- /* this is a safe race, when the ost is starting up...*/
- continue;
- }
-
- /* osc.import = "connection=<Conn UUID>::<target instance>" */
- ++pos;
- params = buf + pos;
- pos += sprintf(params, "%s.import=%s", cname, "connection=");
- uuid = buf + pos;
-
- down_read(&obd->u.cli.cl_sem);
- if (!obd->u.cli.cl_import) {
- /* client does not connect to the OST yet */
- up_read(&obd->u.cli.cl_sem);
- rc = 0;
- continue;
- }
-
- /* iterate all nids to find one */
- /* find uuid by nid */
- rc = -ENOENT;
- for (i = 0; i < entry->mne_nid_count; i++) {
- rc = client_import_find_conn(obd->u.cli.cl_import,
- entry->u.nids[0],
- (struct obd_uuid *)uuid);
- if (!rc)
- break;
- }
-
- up_read(&obd->u.cli.cl_sem);
- if (rc < 0) {
- CERROR("mgc: cannot find uuid by nid %s\n",
- libcfs_nid2str(entry->u.nids[0]));
- break;
- }
-
- CDEBUG(D_INFO, "Find uuid %s by nid %s\n",
- uuid, libcfs_nid2str(entry->u.nids[0]));
-
- pos += strlen(uuid);
- pos += sprintf(buf + pos, "::%u", entry->mne_instance);
- LASSERT(pos < bufsz);
-
- lustre_cfg_bufs_set_string(&bufs, 1, params);
-
- rc = -ENOMEM;
- len = lustre_cfg_len(bufs.lcfg_bufcount, bufs.lcfg_buflen);
- lcfg = kzalloc(len, GFP_NOFS);
- if (!lcfg) {
- rc = -ENOMEM;
- break;
- }
- lustre_cfg_init(lcfg, LCFG_PARAM, &bufs);
-
- CDEBUG(D_INFO, "ir apply logs %lld/%lld for %s -> %s\n",
- prev_version, max_version, obdname, params);
-
- rc = class_process_config(lcfg);
- kfree(lcfg);
- if (rc)
- CDEBUG(D_INFO, "process config for %s error %d\n",
- obdname, rc);
-
- /* continue, even one with error */
- }
-
- kfree(inst);
- return rc;
-}
-
-/**
- * This function is called if this client was notified for target restarting
- * by the MGS. A CONFIG_READ RPC is going to send to fetch recovery logs.
- */
-static int mgc_process_recover_log(struct obd_device *obd,
- struct config_llog_data *cld)
-{
- struct ptlrpc_request *req = NULL;
- struct config_llog_instance *cfg = &cld->cld_cfg;
- struct mgs_config_body *body;
- struct mgs_config_res *res;
- struct ptlrpc_bulk_desc *desc;
- struct page **pages;
- int nrpages;
- bool eof = true;
- bool mne_swab;
- int i;
- int ealen;
- int rc;
-
- /* allocate buffer for bulk transfer.
- * if this is the first time for this mgs to read logs,
- * CONFIG_READ_NRPAGES_INIT will be used since it will read all logs
- * once; otherwise, it only reads increment of logs, this should be
- * small and CONFIG_READ_NRPAGES will be used.
- */
- nrpages = CONFIG_READ_NRPAGES;
- if (cfg->cfg_last_idx == 0) /* the first time */
- nrpages = CONFIG_READ_NRPAGES_INIT;
-
- pages = kcalloc(nrpages, sizeof(*pages), GFP_KERNEL);
- if (!pages) {
- rc = -ENOMEM;
- goto out;
- }
-
- for (i = 0; i < nrpages; i++) {
- pages[i] = alloc_page(GFP_KERNEL);
- if (!pages[i]) {
- rc = -ENOMEM;
- goto out;
- }
- }
-
-again:
- LASSERT(cld_is_recover(cld));
- LASSERT(mutex_is_locked(&cld->cld_lock));
- req = ptlrpc_request_alloc(class_exp2cliimp(cld->cld_mgcexp),
- &RQF_MGS_CONFIG_READ);
- if (!req) {
- rc = -ENOMEM;
- goto out;
- }
-
- rc = ptlrpc_request_pack(req, LUSTRE_MGS_VERSION, MGS_CONFIG_READ);
- if (rc)
- goto out;
-
- /* pack request */
- body = req_capsule_client_get(&req->rq_pill, &RMF_MGS_CONFIG_BODY);
- LASSERT(sizeof(body->mcb_name) > strlen(cld->cld_logname));
- if (strlcpy(body->mcb_name, cld->cld_logname, sizeof(body->mcb_name))
- >= sizeof(body->mcb_name)) {
- rc = -E2BIG;
- goto out;
- }
- body->mcb_offset = cfg->cfg_last_idx + 1;
- body->mcb_type = cld->cld_type;
- body->mcb_bits = PAGE_SHIFT;
- body->mcb_units = nrpages;
-
- /* allocate bulk transfer descriptor */
- desc = ptlrpc_prep_bulk_imp(req, nrpages, 1,
- PTLRPC_BULK_PUT_SINK | PTLRPC_BULK_BUF_KIOV,
- MGS_BULK_PORTAL,
- &ptlrpc_bulk_kiov_pin_ops);
- if (!desc) {
- rc = -ENOMEM;
- goto out;
- }
-
- for (i = 0; i < nrpages; i++)
- desc->bd_frag_ops->add_kiov_frag(desc, pages[i], 0, PAGE_SIZE);
-
- ptlrpc_request_set_replen(req);
- rc = ptlrpc_queue_wait(req);
- if (rc)
- goto out;
-
- res = req_capsule_server_get(&req->rq_pill, &RMF_MGS_CONFIG_RES);
- if (res->mcr_size < res->mcr_offset) {
- rc = -EINVAL;
- goto out;
- }
-
- /* always update the index even though it might have errors with
- * handling the recover logs
- */
- cfg->cfg_last_idx = res->mcr_offset;
- eof = res->mcr_offset == res->mcr_size;
-
- CDEBUG(D_INFO, "Latest version %lld, more %d.\n",
- res->mcr_offset, eof == false);
-
- ealen = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk, 0);
- if (ealen < 0) {
- rc = ealen;
- goto out;
- }
-
- if (ealen > nrpages << PAGE_SHIFT) {
- rc = -EINVAL;
- goto out;
- }
-
- if (ealen == 0) { /* no logs transferred */
- if (!eof)
- rc = -EINVAL;
- goto out;
- }
-
- mne_swab = !!ptlrpc_rep_need_swab(req);
-#if OBD_OCD_VERSION(3, 0, 53, 0) > LUSTRE_VERSION_CODE
- /* This import flag means the server did an extra swab of IR MNE
- * records (fixed in LU-1252), reverse it here if needed. LU-1644
- */
- if (unlikely(req->rq_import->imp_need_mne_swab))
- mne_swab = !mne_swab;
-#endif
-
- for (i = 0; i < nrpages && ealen > 0; i++) {
- int rc2;
- void *ptr;
-
- ptr = kmap(pages[i]);
- rc2 = mgc_apply_recover_logs(obd, cld, res->mcr_offset, ptr,
- min_t(int, ealen, PAGE_SIZE),
- mne_swab);
- kunmap(pages[i]);
- if (rc2 < 0) {
- CWARN("Process recover log %s error %d\n",
- cld->cld_logname, rc2);
- break;
- }
-
- ealen -= PAGE_SIZE;
- }
-
-out:
- if (req)
- ptlrpc_req_finished(req);
-
- if (rc == 0 && !eof)
- goto again;
-
- if (pages) {
- for (i = 0; i < nrpages; i++) {
- if (!pages[i])
- break;
- __free_page(pages[i]);
- }
- kfree(pages);
- }
- return rc;
-}
-
-/* local_only means it cannot get remote llogs */
-static int mgc_process_cfg_log(struct obd_device *mgc,
- struct config_llog_data *cld, int local_only)
-{
- struct llog_ctxt *ctxt;
- struct lustre_sb_info *lsi = NULL;
- int rc = 0;
- bool sptlrpc_started = false;
- struct lu_env *env;
-
- LASSERT(cld);
- LASSERT(mutex_is_locked(&cld->cld_lock));
-
- /*
- * local copy of sptlrpc log is controlled elsewhere, don't try to
- * read it up here.
- */
- if (cld_is_sptlrpc(cld) && local_only)
- return 0;
-
- if (cld->cld_cfg.cfg_sb)
- lsi = s2lsi(cld->cld_cfg.cfg_sb);
-
- env = kzalloc(sizeof(*env), GFP_KERNEL);
- if (!env)
- return -ENOMEM;
-
- rc = lu_env_init(env, LCT_MG_THREAD);
- if (rc)
- goto out_free;
-
- ctxt = llog_get_context(mgc, LLOG_CONFIG_REPL_CTXT);
- LASSERT(ctxt);
-
- if (local_only) /* no local log at client side */ {
- rc = -EIO;
- goto out_pop;
- }
-
- if (cld_is_sptlrpc(cld)) {
- sptlrpc_conf_log_update_begin(cld->cld_logname);
- sptlrpc_started = true;
- }
-
- /* logname and instance info should be the same, so use our
- * copy of the instance for the update. The cfg_last_idx will
- * be updated here.
- */
- rc = class_config_parse_llog(env, ctxt, cld->cld_logname,
- &cld->cld_cfg);
-
-out_pop:
- __llog_ctxt_put(env, ctxt);
-
- /*
- * update settings on existing OBDs. doing it inside
- * of llog_process_lock so no device is attaching/detaching
- * in parallel.
- * the logname must be <fsname>-sptlrpc
- */
- if (sptlrpc_started) {
- LASSERT(cld_is_sptlrpc(cld));
- sptlrpc_conf_log_update_end(cld->cld_logname);
- class_notify_sptlrpc_conf(cld->cld_logname,
- strlen(cld->cld_logname) -
- strlen("-sptlrpc"));
- }
-
- lu_env_fini(env);
-out_free:
- kfree(env);
- return rc;
-}
-
-static bool mgc_import_in_recovery(struct obd_import *imp)
-{
- bool in_recovery = true;
-
- spin_lock(&imp->imp_lock);
- if (imp->imp_state == LUSTRE_IMP_FULL ||
- imp->imp_state == LUSTRE_IMP_CLOSED)
- in_recovery = false;
- spin_unlock(&imp->imp_lock);
-
- return in_recovery;
-}
-
-/**
- * Get a configuration log from the MGS and process it.
- *
- * This function is called for both clients and servers to process the
- * configuration log from the MGS. The MGC enqueues a DLM lock on the
- * log from the MGS, and if the lock gets revoked the MGC will be notified
- * by the lock cancellation callback that the config log has changed,
- * and will enqueue another MGS lock on it, and then continue processing
- * the new additions to the end of the log.
- *
- * Since the MGC import is not replayable, if the import is being evicted
- * (rcl == -ESHUTDOWN, \see ptlrpc_import_delay_req()), retry to process
- * the log until recovery is finished or the import is closed.
- *
- * Make a local copy of the log before parsing it if appropriate (non-MGS
- * server) so that the server can start even when the MGS is down.
- *
- * There shouldn't be multiple processes running process_log at once --
- * sounds like badness. It actually might be fine, as long as they're not
- * trying to update from the same log simultaneously, in which case we
- * should use a per-log semaphore instead of cld_lock.
- *
- * \param[in] mgc MGC device by which to fetch the configuration log
- * \param[in] cld log processing state (stored in lock callback data)
- *
- * \retval 0 on success
- * \retval negative errno on failure
- */
-int mgc_process_log(struct obd_device *mgc, struct config_llog_data *cld)
-{
- struct lustre_handle lockh = { 0 };
- __u64 flags = LDLM_FL_NO_LRU;
- bool retry = false;
- int rc = 0, rcl;
-
- LASSERT(cld);
-
- /* I don't want multiple processes running process_log at once --
- * sounds like badness. It actually might be fine, as long as
- * we're not trying to update from the same log
- * simultaneously (in which case we should use a per-log sem.)
- */
-restart:
- mutex_lock(&cld->cld_lock);
- if (cld->cld_stopping) {
- mutex_unlock(&cld->cld_lock);
- return 0;
- }
-
- OBD_FAIL_TIMEOUT(OBD_FAIL_MGC_PAUSE_PROCESS_LOG, 20);
-
- CDEBUG(D_MGC, "Process log %s:%p from %d\n", cld->cld_logname,
- cld->cld_cfg.cfg_instance, cld->cld_cfg.cfg_last_idx + 1);
-
- /* Get the cfg lock on the llog */
- rcl = mgc_enqueue(mgc->u.cli.cl_mgc_mgsexp, LDLM_PLAIN, NULL,
- LCK_CR, &flags, NULL, NULL, NULL,
- cld, 0, NULL, &lockh);
- if (rcl == 0) {
- /* Get the cld, it will be released in mgc_blocking_ast. */
- config_log_get(cld);
- rc = ldlm_lock_set_data(&lockh, (void *)cld);
- LASSERT(rc == 0);
- } else {
- CDEBUG(D_MGC, "Can't get cfg lock: %d\n", rcl);
-
- if (rcl == -ESHUTDOWN &&
- atomic_read(&mgc->u.cli.cl_mgc_refcount) > 0 && !retry) {
- struct obd_import *imp;
-
- mutex_unlock(&cld->cld_lock);
- imp = class_exp2cliimp(mgc->u.cli.cl_mgc_mgsexp);
-
- /*
- * Let's force the pinger, and wait the import to be
- * connected, note: since mgc import is non-replayable,
- * and even the import state is disconnected, it does
- * not mean the "recovery" is stopped, so we will keep
- * waitting until timeout or the import state is
- * FULL or closed
- */
- ptlrpc_pinger_force(imp);
-
- wait_event_idle_timeout(imp->imp_recovery_waitq,
- !mgc_import_in_recovery(imp),
- obd_timeout * HZ);
-
- if (imp->imp_state == LUSTRE_IMP_FULL) {
- retry = true;
- goto restart;
- } else {
- mutex_lock(&cld->cld_lock);
- spin_lock(&config_list_lock);
- cld->cld_lostlock = 1;
- spin_unlock(&config_list_lock);
- }
- } else {
- /* mark cld_lostlock so that it will requeue
- * after MGC becomes available.
- */
- spin_lock(&config_list_lock);
- cld->cld_lostlock = 1;
- spin_unlock(&config_list_lock);
- }
- }
-
- if (cld_is_recover(cld)) {
- rc = 0; /* this is not a fatal error for recover log */
- if (!rcl) {
- rc = mgc_process_recover_log(mgc, cld);
- if (rc) {
- CERROR("%s: recover log %s failed: rc = %d not fatal.\n",
- mgc->obd_name, cld->cld_logname, rc);
- rc = 0;
- spin_lock(&config_list_lock);
- cld->cld_lostlock = 1;
- spin_unlock(&config_list_lock);
- }
- }
- } else {
- rc = mgc_process_cfg_log(mgc, cld, rcl != 0);
- }
-
- CDEBUG(D_MGC, "%s: configuration from log '%s' %sed (%d).\n",
- mgc->obd_name, cld->cld_logname, rc ? "fail" : "succeed", rc);
-
- mutex_unlock(&cld->cld_lock);
-
- /* Now drop the lock so MGS can revoke it */
- if (!rcl)
- ldlm_lock_decref(&lockh, LCK_CR);
-
- return rc;
-}
-
-/** Called from lustre_process_log.
- * LCFG_LOG_START gets the config log from the MGS, processes it to start
- * any services, and adds it to the list logs to watch (follow).
- */
-static int mgc_process_config(struct obd_device *obd, u32 len, void *buf)
-{
- struct lustre_cfg *lcfg = buf;
- struct config_llog_instance *cfg = NULL;
- char *logname;
- int rc = 0;
-
- switch (lcfg->lcfg_command) {
- case LCFG_LOV_ADD_OBD: {
- /* Overloading this cfg command: register a new target */
- struct mgs_target_info *mti;
-
- if (LUSTRE_CFG_BUFLEN(lcfg, 1) !=
- sizeof(struct mgs_target_info)) {
- rc = -EINVAL;
- goto out;
- }
-
- mti = (struct mgs_target_info *)lustre_cfg_buf(lcfg, 1);
- CDEBUG(D_MGC, "add_target %s %#x\n",
- mti->mti_svname, mti->mti_flags);
- rc = mgc_target_register(obd->u.cli.cl_mgc_mgsexp, mti);
- break;
- }
- case LCFG_LOV_DEL_OBD:
- /* Unregister has no meaning at the moment. */
- CERROR("lov_del_obd unimplemented\n");
- rc = -ENOSYS;
- break;
- case LCFG_SPTLRPC_CONF: {
- rc = sptlrpc_process_config(lcfg);
- break;
- }
- case LCFG_LOG_START: {
- struct config_llog_data *cld;
- struct super_block *sb;
-
- logname = lustre_cfg_string(lcfg, 1);
- cfg = (struct config_llog_instance *)lustre_cfg_buf(lcfg, 2);
- sb = *(struct super_block **)lustre_cfg_buf(lcfg, 3);
-
- CDEBUG(D_MGC, "parse_log %s from %d\n", logname,
- cfg->cfg_last_idx);
-
- /* We're only called through here on the initial mount */
- cld = config_log_add(obd, logname, cfg, sb);
- if (IS_ERR(cld)) {
- rc = PTR_ERR(cld);
- break;
- }
-
- /* COMPAT_146 */
- /* FIXME only set this for old logs! Right now this forces
- * us to always skip the "inside markers" check
- */
- cld->cld_cfg.cfg_flags |= CFG_F_COMPAT146;
-
- rc = mgc_process_log(obd, cld);
- if (rc == 0 && cld->cld_recover) {
- if (OCD_HAS_FLAG(&obd->u.cli.cl_import->
- imp_connect_data, IMP_RECOV)) {
- rc = mgc_process_log(obd, cld->cld_recover);
- } else {
- struct config_llog_data *cir;
-
- mutex_lock(&cld->cld_lock);
- cir = cld->cld_recover;
- cld->cld_recover = NULL;
- mutex_unlock(&cld->cld_lock);
- config_log_put(cir);
- }
-
- if (rc)
- CERROR("Cannot process recover llog %d\n", rc);
- }
-
- if (rc == 0 && cld->cld_params) {
- rc = mgc_process_log(obd, cld->cld_params);
- if (rc == -ENOENT) {
- CDEBUG(D_MGC,
- "There is no params config file yet\n");
- rc = 0;
- }
- /* params log is optional */
- if (rc)
- CERROR(
- "%s: can't process params llog: rc = %d\n",
- obd->obd_name, rc);
- }
-
- break;
- }
- case LCFG_LOG_END: {
- logname = lustre_cfg_string(lcfg, 1);
-
- if (lcfg->lcfg_bufcount >= 2)
- cfg = (struct config_llog_instance *)lustre_cfg_buf(
- lcfg, 2);
- rc = config_log_end(logname, cfg);
- break;
- }
- default: {
- CERROR("Unknown command: %d\n", lcfg->lcfg_command);
- rc = -EINVAL;
- goto out;
- }
- }
-out:
- return rc;
-}
-
-static struct obd_ops mgc_obd_ops = {
- .owner = THIS_MODULE,
- .setup = mgc_setup,
- .precleanup = mgc_precleanup,
- .cleanup = mgc_cleanup,
- .add_conn = client_import_add_conn,
- .del_conn = client_import_del_conn,
- .connect = client_connect_import,
- .disconnect = client_disconnect_export,
- .set_info_async = mgc_set_info_async,
- .get_info = mgc_get_info,
- .import_event = mgc_import_event,
- .process_config = mgc_process_config,
-};
-
-static int __init mgc_init(void)
-{
- int rc;
-
- rc = libcfs_setup();
- if (rc)
- return rc;
-
- return class_register_type(&mgc_obd_ops, NULL,
- LUSTRE_MGC_NAME, NULL);
-}
-
-static void /*__exit*/ mgc_exit(void)
-{
- class_unregister_type(LUSTRE_MGC_NAME);
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre Management Client");
-MODULE_VERSION(LUSTRE_VERSION_STRING);
-MODULE_LICENSE("GPL");
-
-module_init(mgc_init);
-module_exit(mgc_exit);
diff --git a/drivers/staging/lustre/lustre/obdclass/Makefile b/drivers/staging/lustre/lustre/obdclass/Makefile
deleted file mode 100644
index e3fa9acff4c4..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LUSTRE_FS) += obdclass.o
-
-obdclass-y := linux/linux-module.o linux/linux-sysctl.o \
- llog.o llog_cat.o llog_obd.o llog_swab.o class_obd.o debug.o \
- genops.o uuid.o lprocfs_status.o lprocfs_counters.o \
- lustre_handles.o lustre_peer.o statfs_pack.o linkea.o \
- obdo.o obd_config.o obd_mount.o lu_object.o lu_ref.o \
- cl_object.o cl_page.o cl_lock.o cl_io.o kernelcomm.o
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_internal.h b/drivers/staging/lustre/lustre/obdclass/cl_internal.h
deleted file mode 100644
index a0db830ca841..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/cl_internal.h
+++ /dev/null
@@ -1,95 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Internal cl interfaces.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-#ifndef _CL_INTERNAL_H
-#define _CL_INTERNAL_H
-
-#define CLT_PVEC_SIZE (14)
-
-/**
- * Possible levels of the nesting. Currently this is 2: there are "top"
- * entities (files, extent locks), and "sub" entities (stripes and stripe
- * locks). This is used only for debugging counters right now.
- */
-enum clt_nesting_level {
- CNL_TOP,
- CNL_SUB,
- CNL_NR
-};
-
-/**
- * Thread local state internal for generic cl-code.
- */
-struct cl_thread_info {
- /*
- * Common fields.
- */
- struct cl_io clt_io;
- struct cl_2queue clt_queue;
-
- /*
- * Fields used by cl_lock.c
- */
- struct cl_lock_descr clt_descr;
- struct cl_page_list clt_list;
- /** @} debugging */
-
- /*
- * Fields used by cl_page.c
- */
- struct cl_page *clt_pvec[CLT_PVEC_SIZE];
-
- /*
- * Fields used by cl_io.c
- */
- /**
- * Pointer to the topmost ongoing IO in this thread.
- */
- struct cl_io *clt_current_io;
- /**
- * Used for submitting a sync io.
- */
- struct cl_sync_io clt_anchor;
- /**
- * Fields used by cl_lock_discard_pages().
- */
- pgoff_t clt_next_index;
- pgoff_t clt_fn_index; /* first non-overlapped index */
-};
-
-struct cl_thread_info *cl_env_info(const struct lu_env *env);
-
-#endif /* _CL_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_io.c b/drivers/staging/lustre/lustre/obdclass/cl_io.c
deleted file mode 100644
index fcdae6029258..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/cl_io.c
+++ /dev/null
@@ -1,1151 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Client IO.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Jinshan Xiong <jinshan.xiong@intel.com>
- */
-
-#define DEBUG_SUBSYSTEM S_CLASS
-
-#include <obd_class.h>
-#include <obd_support.h>
-#include <lustre_fid.h>
-#include <linux/list.h>
-#include <linux/sched.h>
-#include <cl_object.h>
-#include "cl_internal.h"
-
-/*****************************************************************************
- *
- * cl_io interface.
- *
- */
-
-#define cl_io_for_each(slice, io) \
- list_for_each_entry((slice), &io->ci_layers, cis_linkage)
-#define cl_io_for_each_reverse(slice, io) \
- list_for_each_entry_reverse((slice), &io->ci_layers, cis_linkage)
-
-static inline int cl_io_type_is_valid(enum cl_io_type type)
-{
- return CIT_READ <= type && type < CIT_OP_NR;
-}
-
-static inline int cl_io_is_loopable(const struct cl_io *io)
-{
- return cl_io_type_is_valid(io->ci_type) && io->ci_type != CIT_MISC;
-}
-
-/**
- * Returns true iff there is an IO ongoing in the given environment.
- */
-int cl_io_is_going(const struct lu_env *env)
-{
- return cl_env_info(env)->clt_current_io != NULL;
-}
-
-/**
- * cl_io invariant that holds at all times when exported cl_io_*() functions
- * are entered and left.
- */
-static int cl_io_invariant(const struct cl_io *io)
-{
- struct cl_io *up;
-
- up = io->ci_parent;
- return
- /*
- * io can own pages only when it is ongoing. Sub-io might
- * still be in CIS_LOCKED state when top-io is in
- * CIS_IO_GOING.
- */
- ergo(io->ci_owned_nr > 0, io->ci_state == CIS_IO_GOING ||
- (io->ci_state == CIS_LOCKED && up));
-}
-
-/**
- * Finalize \a io, by calling cl_io_operations::cio_fini() bottom-to-top.
- */
-void cl_io_fini(const struct lu_env *env, struct cl_io *io)
-{
- struct cl_io_slice *slice;
- struct cl_thread_info *info;
-
- LINVRNT(cl_io_type_is_valid(io->ci_type));
- LINVRNT(cl_io_invariant(io));
-
- while (!list_empty(&io->ci_layers)) {
- slice = container_of(io->ci_layers.prev, struct cl_io_slice,
- cis_linkage);
- list_del_init(&slice->cis_linkage);
- if (slice->cis_iop->op[io->ci_type].cio_fini)
- slice->cis_iop->op[io->ci_type].cio_fini(env, slice);
- /*
- * Invalidate slice to catch use after free. This assumes that
- * slices are allocated within session and can be touched
- * after ->cio_fini() returns.
- */
- slice->cis_io = NULL;
- }
- io->ci_state = CIS_FINI;
- info = cl_env_info(env);
- if (info->clt_current_io == io)
- info->clt_current_io = NULL;
-
- /* sanity check for layout change */
- switch (io->ci_type) {
- case CIT_READ:
- case CIT_WRITE:
- case CIT_DATA_VERSION:
- break;
- case CIT_FAULT:
- break;
- case CIT_FSYNC:
- LASSERT(!io->ci_need_restart);
- break;
- case CIT_SETATTR:
- case CIT_MISC:
- /* Check ignore layout change conf */
- LASSERT(ergo(io->ci_ignore_layout || !io->ci_verify_layout,
- !io->ci_need_restart));
- break;
- default:
- LBUG();
- }
-}
-EXPORT_SYMBOL(cl_io_fini);
-
-static int cl_io_init0(const struct lu_env *env, struct cl_io *io,
- enum cl_io_type iot, struct cl_object *obj)
-{
- struct cl_object *scan;
- int result;
-
- LINVRNT(io->ci_state == CIS_ZERO || io->ci_state == CIS_FINI);
- LINVRNT(cl_io_type_is_valid(iot));
- LINVRNT(cl_io_invariant(io));
-
- io->ci_type = iot;
- INIT_LIST_HEAD(&io->ci_lockset.cls_todo);
- INIT_LIST_HEAD(&io->ci_lockset.cls_done);
- INIT_LIST_HEAD(&io->ci_layers);
-
- result = 0;
- cl_object_for_each(scan, obj) {
- if (scan->co_ops->coo_io_init) {
- result = scan->co_ops->coo_io_init(env, scan, io);
- if (result != 0)
- break;
- }
- }
- if (result == 0)
- io->ci_state = CIS_INIT;
- return result;
-}
-
-/**
- * Initialize sub-io, by calling cl_io_operations::cio_init() top-to-bottom.
- *
- * \pre obj != cl_object_top(obj)
- */
-int cl_io_sub_init(const struct lu_env *env, struct cl_io *io,
- enum cl_io_type iot, struct cl_object *obj)
-{
- struct cl_thread_info *info = cl_env_info(env);
-
- LASSERT(obj != cl_object_top(obj));
- if (!info->clt_current_io)
- info->clt_current_io = io;
- return cl_io_init0(env, io, iot, obj);
-}
-EXPORT_SYMBOL(cl_io_sub_init);
-
-/**
- * Initialize \a io, by calling cl_io_operations::cio_init() top-to-bottom.
- *
- * Caller has to call cl_io_fini() after a call to cl_io_init(), no matter
- * what the latter returned.
- *
- * \pre obj == cl_object_top(obj)
- * \pre cl_io_type_is_valid(iot)
- * \post cl_io_type_is_valid(io->ci_type) && io->ci_type == iot
- */
-int cl_io_init(const struct lu_env *env, struct cl_io *io,
- enum cl_io_type iot, struct cl_object *obj)
-{
- struct cl_thread_info *info = cl_env_info(env);
-
- LASSERT(obj == cl_object_top(obj));
- LASSERT(!info->clt_current_io);
-
- info->clt_current_io = io;
- return cl_io_init0(env, io, iot, obj);
-}
-EXPORT_SYMBOL(cl_io_init);
-
-/**
- * Initialize read or write io.
- *
- * \pre iot == CIT_READ || iot == CIT_WRITE
- */
-int cl_io_rw_init(const struct lu_env *env, struct cl_io *io,
- enum cl_io_type iot, loff_t pos, size_t count)
-{
- LINVRNT(iot == CIT_READ || iot == CIT_WRITE);
- LINVRNT(io->ci_obj);
-
- LU_OBJECT_HEADER(D_VFSTRACE, env, &io->ci_obj->co_lu,
- "io range: %u [%llu, %llu) %u %u\n",
- iot, (__u64)pos, (__u64)pos + count,
- io->u.ci_rw.crw_nonblock, io->u.ci_wr.wr_append);
- io->u.ci_rw.crw_pos = pos;
- io->u.ci_rw.crw_count = count;
- return cl_io_init(env, io, iot, io->ci_obj);
-}
-EXPORT_SYMBOL(cl_io_rw_init);
-
-static int cl_lock_descr_sort(const struct cl_lock_descr *d0,
- const struct cl_lock_descr *d1)
-{
- return lu_fid_cmp(lu_object_fid(&d0->cld_obj->co_lu),
- lu_object_fid(&d1->cld_obj->co_lu));
-}
-
-/*
- * Sort locks in lexicographical order of their (fid, start-offset) pairs.
- */
-static void cl_io_locks_sort(struct cl_io *io)
-{
- int done = 0;
-
- /* hidden treasure: bubble sort for now. */
- do {
- struct cl_io_lock_link *curr;
- struct cl_io_lock_link *prev;
- struct cl_io_lock_link *temp;
-
- done = 1;
- prev = NULL;
-
- list_for_each_entry_safe(curr, temp,
- &io->ci_lockset.cls_todo,
- cill_linkage) {
- if (prev) {
- switch (cl_lock_descr_sort(&prev->cill_descr,
- &curr->cill_descr)) {
- case 0:
- /*
- * IMPOSSIBLE: Identical locks are
- * already removed at
- * this point.
- */
- default:
- LBUG();
- case 1:
- list_move_tail(&curr->cill_linkage,
- &prev->cill_linkage);
- done = 0;
- continue; /* don't change prev: it's
- * still "previous"
- */
- case -1: /* already in order */
- break;
- }
- }
- prev = curr;
- }
- } while (!done);
-}
-
-static void cl_lock_descr_merge(struct cl_lock_descr *d0,
- const struct cl_lock_descr *d1)
-{
- d0->cld_start = min(d0->cld_start, d1->cld_start);
- d0->cld_end = max(d0->cld_end, d1->cld_end);
-
- if (d1->cld_mode == CLM_WRITE && d0->cld_mode != CLM_WRITE)
- d0->cld_mode = CLM_WRITE;
-
- if (d1->cld_mode == CLM_GROUP && d0->cld_mode != CLM_GROUP)
- d0->cld_mode = CLM_GROUP;
-}
-
-static int cl_lockset_merge(const struct cl_lockset *set,
- const struct cl_lock_descr *need)
-{
- struct cl_io_lock_link *scan;
-
- list_for_each_entry(scan, &set->cls_todo, cill_linkage) {
- if (!cl_object_same(scan->cill_descr.cld_obj, need->cld_obj))
- continue;
-
- /* Merge locks for the same object because ldlm lock server
- * may expand the lock extent, otherwise there is a deadlock
- * case if two conflicted locks are queueud for the same object
- * and lock server expands one lock to overlap the another.
- * The side effect is that it can generate a multi-stripe lock
- * that may cause casacading problem
- */
- cl_lock_descr_merge(&scan->cill_descr, need);
- CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n",
- scan->cill_descr.cld_mode, scan->cill_descr.cld_start,
- scan->cill_descr.cld_end);
- return 1;
- }
- return 0;
-}
-
-static int cl_lockset_lock(const struct lu_env *env, struct cl_io *io,
- struct cl_lockset *set)
-{
- struct cl_io_lock_link *link;
- struct cl_io_lock_link *temp;
- int result;
-
- result = 0;
- list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) {
- result = cl_lock_request(env, io, &link->cill_lock);
- if (result < 0)
- break;
-
- list_move(&link->cill_linkage, &set->cls_done);
- }
- return result;
-}
-
-/**
- * Takes locks necessary for the current iteration of io.
- *
- * Calls cl_io_operations::cio_lock() top-to-bottom to collect locks required
- * by layers for the current iteration. Then sort locks (to avoid dead-locks),
- * and acquire them.
- */
-int cl_io_lock(const struct lu_env *env, struct cl_io *io)
-{
- const struct cl_io_slice *scan;
- int result = 0;
-
- LINVRNT(cl_io_is_loopable(io));
- LINVRNT(io->ci_state == CIS_IT_STARTED);
- LINVRNT(cl_io_invariant(io));
-
- cl_io_for_each(scan, io) {
- if (!scan->cis_iop->op[io->ci_type].cio_lock)
- continue;
- result = scan->cis_iop->op[io->ci_type].cio_lock(env, scan);
- if (result != 0)
- break;
- }
- if (result == 0) {
- cl_io_locks_sort(io);
- result = cl_lockset_lock(env, io, &io->ci_lockset);
- }
- if (result != 0)
- cl_io_unlock(env, io);
- else
- io->ci_state = CIS_LOCKED;
- return result;
-}
-EXPORT_SYMBOL(cl_io_lock);
-
-/**
- * Release locks takes by io.
- */
-void cl_io_unlock(const struct lu_env *env, struct cl_io *io)
-{
- struct cl_lockset *set;
- struct cl_io_lock_link *link;
- struct cl_io_lock_link *temp;
- const struct cl_io_slice *scan;
-
- LASSERT(cl_io_is_loopable(io));
- LASSERT(CIS_IT_STARTED <= io->ci_state && io->ci_state < CIS_UNLOCKED);
- LINVRNT(cl_io_invariant(io));
-
- set = &io->ci_lockset;
-
- list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) {
- list_del_init(&link->cill_linkage);
- if (link->cill_fini)
- link->cill_fini(env, link);
- }
-
- list_for_each_entry_safe(link, temp, &set->cls_done, cill_linkage) {
- list_del_init(&link->cill_linkage);
- cl_lock_release(env, &link->cill_lock);
- if (link->cill_fini)
- link->cill_fini(env, link);
- }
-
- cl_io_for_each_reverse(scan, io) {
- if (scan->cis_iop->op[io->ci_type].cio_unlock)
- scan->cis_iop->op[io->ci_type].cio_unlock(env, scan);
- }
- io->ci_state = CIS_UNLOCKED;
-}
-EXPORT_SYMBOL(cl_io_unlock);
-
-/**
- * Prepares next iteration of io.
- *
- * Calls cl_io_operations::cio_iter_init() top-to-bottom. This exists to give
- * layers a chance to modify io parameters, e.g., so that lov can restrict io
- * to a single stripe.
- */
-int cl_io_iter_init(const struct lu_env *env, struct cl_io *io)
-{
- const struct cl_io_slice *scan;
- int result;
-
- LINVRNT(cl_io_is_loopable(io));
- LINVRNT(io->ci_state == CIS_INIT || io->ci_state == CIS_IT_ENDED);
- LINVRNT(cl_io_invariant(io));
-
- result = 0;
- cl_io_for_each(scan, io) {
- if (!scan->cis_iop->op[io->ci_type].cio_iter_init)
- continue;
- result = scan->cis_iop->op[io->ci_type].cio_iter_init(env,
- scan);
- if (result != 0)
- break;
- }
- if (result == 0)
- io->ci_state = CIS_IT_STARTED;
- return result;
-}
-EXPORT_SYMBOL(cl_io_iter_init);
-
-/**
- * Finalizes io iteration.
- *
- * Calls cl_io_operations::cio_iter_fini() bottom-to-top.
- */
-void cl_io_iter_fini(const struct lu_env *env, struct cl_io *io)
-{
- const struct cl_io_slice *scan;
-
- LINVRNT(cl_io_is_loopable(io));
- LINVRNT(io->ci_state == CIS_UNLOCKED);
- LINVRNT(cl_io_invariant(io));
-
- cl_io_for_each_reverse(scan, io) {
- if (scan->cis_iop->op[io->ci_type].cio_iter_fini)
- scan->cis_iop->op[io->ci_type].cio_iter_fini(env, scan);
- }
- io->ci_state = CIS_IT_ENDED;
-}
-EXPORT_SYMBOL(cl_io_iter_fini);
-
-/**
- * Records that read or write io progressed \a nob bytes forward.
- */
-static void cl_io_rw_advance(const struct lu_env *env, struct cl_io *io,
- size_t nob)
-{
- const struct cl_io_slice *scan;
-
- LINVRNT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE ||
- nob == 0);
- LINVRNT(cl_io_is_loopable(io));
- LINVRNT(cl_io_invariant(io));
-
- io->u.ci_rw.crw_pos += nob;
- io->u.ci_rw.crw_count -= nob;
-
- /* layers have to be notified. */
- cl_io_for_each_reverse(scan, io) {
- if (scan->cis_iop->op[io->ci_type].cio_advance)
- scan->cis_iop->op[io->ci_type].cio_advance(env, scan,
- nob);
- }
-}
-
-/**
- * Adds a lock to a lockset.
- */
-int cl_io_lock_add(const struct lu_env *env, struct cl_io *io,
- struct cl_io_lock_link *link)
-{
- int result;
-
- if (cl_lockset_merge(&io->ci_lockset, &link->cill_descr)) {
- result = 1;
- } else {
- list_add(&link->cill_linkage, &io->ci_lockset.cls_todo);
- result = 0;
- }
- return result;
-}
-EXPORT_SYMBOL(cl_io_lock_add);
-
-static void cl_free_io_lock_link(const struct lu_env *env,
- struct cl_io_lock_link *link)
-{
- kfree(link);
-}
-
-/**
- * Allocates new lock link, and uses it to add a lock to a lockset.
- */
-int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io,
- struct cl_lock_descr *descr)
-{
- struct cl_io_lock_link *link;
- int result;
-
- link = kzalloc(sizeof(*link), GFP_NOFS);
- if (link) {
- link->cill_descr = *descr;
- link->cill_fini = cl_free_io_lock_link;
- result = cl_io_lock_add(env, io, link);
- if (result) /* lock match */
- link->cill_fini(env, link);
- } else {
- result = -ENOMEM;
- }
-
- return result;
-}
-EXPORT_SYMBOL(cl_io_lock_alloc_add);
-
-/**
- * Starts io by calling cl_io_operations::cio_start() top-to-bottom.
- */
-int cl_io_start(const struct lu_env *env, struct cl_io *io)
-{
- const struct cl_io_slice *scan;
- int result = 0;
-
- LINVRNT(cl_io_is_loopable(io));
- LINVRNT(io->ci_state == CIS_LOCKED);
- LINVRNT(cl_io_invariant(io));
-
- io->ci_state = CIS_IO_GOING;
- cl_io_for_each(scan, io) {
- if (!scan->cis_iop->op[io->ci_type].cio_start)
- continue;
- result = scan->cis_iop->op[io->ci_type].cio_start(env, scan);
- if (result != 0)
- break;
- }
- if (result >= 0)
- result = 0;
- return result;
-}
-EXPORT_SYMBOL(cl_io_start);
-
-/**
- * Wait until current io iteration is finished by calling
- * cl_io_operations::cio_end() bottom-to-top.
- */
-void cl_io_end(const struct lu_env *env, struct cl_io *io)
-{
- const struct cl_io_slice *scan;
-
- LINVRNT(cl_io_is_loopable(io));
- LINVRNT(io->ci_state == CIS_IO_GOING);
- LINVRNT(cl_io_invariant(io));
-
- cl_io_for_each_reverse(scan, io) {
- if (scan->cis_iop->op[io->ci_type].cio_end)
- scan->cis_iop->op[io->ci_type].cio_end(env, scan);
- /* TODO: error handling. */
- }
- io->ci_state = CIS_IO_FINISHED;
-}
-EXPORT_SYMBOL(cl_io_end);
-
-/**
- * Called by read io, to decide the readahead extent
- *
- * \see cl_io_operations::cio_read_ahead()
- */
-int cl_io_read_ahead(const struct lu_env *env, struct cl_io *io,
- pgoff_t start, struct cl_read_ahead *ra)
-{
- const struct cl_io_slice *scan;
- int result = 0;
-
- LINVRNT(io->ci_type == CIT_READ || io->ci_type == CIT_FAULT);
- LINVRNT(cl_io_invariant(io));
-
- cl_io_for_each(scan, io) {
- if (!scan->cis_iop->cio_read_ahead)
- continue;
-
- result = scan->cis_iop->cio_read_ahead(env, scan, start, ra);
- if (result)
- break;
- }
- return result > 0 ? 0 : result;
-}
-EXPORT_SYMBOL(cl_io_read_ahead);
-
-/**
- * Commit a list of contiguous pages into writeback cache.
- *
- * \returns 0 if all pages committed, or errcode if error occurred.
- * \see cl_io_operations::cio_commit_async()
- */
-int cl_io_commit_async(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue, int from, int to,
- cl_commit_cbt cb)
-{
- const struct cl_io_slice *scan;
- int result = 0;
-
- cl_io_for_each(scan, io) {
- if (!scan->cis_iop->cio_commit_async)
- continue;
- result = scan->cis_iop->cio_commit_async(env, scan, queue,
- from, to, cb);
- if (result != 0)
- break;
- }
- return result;
-}
-EXPORT_SYMBOL(cl_io_commit_async);
-
-/**
- * Submits a list of pages for immediate io.
- *
- * After the function gets returned, The submitted pages are moved to
- * queue->c2_qout queue, and queue->c2_qin contain both the pages don't need
- * to be submitted, and the pages are errant to submit.
- *
- * \returns 0 if at least one page was submitted, error code otherwise.
- * \see cl_io_operations::cio_submit()
- */
-int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io,
- enum cl_req_type crt, struct cl_2queue *queue)
-{
- const struct cl_io_slice *scan;
- int result = 0;
-
- cl_io_for_each(scan, io) {
- if (!scan->cis_iop->cio_submit)
- continue;
- result = scan->cis_iop->cio_submit(env, scan, crt, queue);
- if (result != 0)
- break;
- }
- /*
- * If ->cio_submit() failed, no pages were sent.
- */
- LASSERT(ergo(result != 0, list_empty(&queue->c2_qout.pl_pages)));
- return result;
-}
-EXPORT_SYMBOL(cl_io_submit_rw);
-
-static void cl_page_list_assume(const struct lu_env *env,
- struct cl_io *io, struct cl_page_list *plist);
-
-/**
- * Submit a sync_io and wait for the IO to be finished, or error happens.
- * If \a timeout is zero, it means to wait for the IO unconditionally.
- */
-int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
- enum cl_req_type iot, struct cl_2queue *queue,
- long timeout)
-{
- struct cl_sync_io *anchor = &cl_env_info(env)->clt_anchor;
- struct cl_page *pg;
- int rc;
-
- cl_page_list_for_each(pg, &queue->c2_qin) {
- LASSERT(!pg->cp_sync_io);
- pg->cp_sync_io = anchor;
- }
-
- cl_sync_io_init(anchor, queue->c2_qin.pl_nr, &cl_sync_io_end);
- rc = cl_io_submit_rw(env, io, iot, queue);
- if (rc == 0) {
- /*
- * If some pages weren't sent for any reason (e.g.,
- * read found up-to-date pages in the cache, or write found
- * clean pages), count them as completed to avoid infinite
- * wait.
- */
- cl_page_list_for_each(pg, &queue->c2_qin) {
- pg->cp_sync_io = NULL;
- cl_sync_io_note(env, anchor, 1);
- }
-
- /* wait for the IO to be finished. */
- rc = cl_sync_io_wait(env, anchor, timeout);
- cl_page_list_assume(env, io, &queue->c2_qout);
- } else {
- LASSERT(list_empty(&queue->c2_qout.pl_pages));
- cl_page_list_for_each(pg, &queue->c2_qin)
- pg->cp_sync_io = NULL;
- }
- return rc;
-}
-EXPORT_SYMBOL(cl_io_submit_sync);
-
-/**
- * Main io loop.
- *
- * Pumps io through iterations calling
- *
- * - cl_io_iter_init()
- *
- * - cl_io_lock()
- *
- * - cl_io_start()
- *
- * - cl_io_end()
- *
- * - cl_io_unlock()
- *
- * - cl_io_iter_fini()
- *
- * repeatedly until there is no more io to do.
- */
-int cl_io_loop(const struct lu_env *env, struct cl_io *io)
-{
- int result = 0;
-
- LINVRNT(cl_io_is_loopable(io));
-
- do {
- size_t nob;
-
- io->ci_continue = 0;
- result = cl_io_iter_init(env, io);
- if (result == 0) {
- nob = io->ci_nob;
- result = cl_io_lock(env, io);
- if (result == 0) {
- /*
- * Notify layers that locks has been taken,
- * and do actual i/o.
- *
- * - llite: kms, short read;
- * - llite: generic_file_read();
- */
- result = cl_io_start(env, io);
- /*
- * Send any remaining pending
- * io, etc.
- *
- * - llite: ll_rw_stats_tally.
- */
- cl_io_end(env, io);
- cl_io_unlock(env, io);
- cl_io_rw_advance(env, io, io->ci_nob - nob);
- }
- }
- cl_io_iter_fini(env, io);
- } while (result == 0 && io->ci_continue);
- if (result == 0)
- result = io->ci_result;
- return result < 0 ? result : 0;
-}
-EXPORT_SYMBOL(cl_io_loop);
-
-/**
- * Adds io slice to the cl_io.
- *
- * This is called by cl_object_operations::coo_io_init() methods to add a
- * per-layer state to the io. New state is added at the end of
- * cl_io::ci_layers list, that is, it is at the bottom of the stack.
- *
- * \see cl_lock_slice_add(), cl_req_slice_add(), cl_page_slice_add()
- */
-void cl_io_slice_add(struct cl_io *io, struct cl_io_slice *slice,
- struct cl_object *obj,
- const struct cl_io_operations *ops)
-{
- struct list_head *linkage = &slice->cis_linkage;
-
- LASSERT((!linkage->prev && !linkage->next) ||
- list_empty(linkage));
-
- list_add_tail(linkage, &io->ci_layers);
- slice->cis_io = io;
- slice->cis_obj = obj;
- slice->cis_iop = ops;
-}
-EXPORT_SYMBOL(cl_io_slice_add);
-
-/**
- * Initializes page list.
- */
-void cl_page_list_init(struct cl_page_list *plist)
-{
- plist->pl_nr = 0;
- INIT_LIST_HEAD(&plist->pl_pages);
- plist->pl_owner = current;
-}
-EXPORT_SYMBOL(cl_page_list_init);
-
-/**
- * Adds a page to a page list.
- */
-void cl_page_list_add(struct cl_page_list *plist, struct cl_page *page)
-{
- /* it would be better to check that page is owned by "current" io, but
- * it is not passed here.
- */
- LASSERT(page->cp_owner);
- LINVRNT(plist->pl_owner == current);
-
- LASSERT(list_empty(&page->cp_batch));
- list_add_tail(&page->cp_batch, &plist->pl_pages);
- ++plist->pl_nr;
- lu_ref_add_at(&page->cp_reference, &page->cp_queue_ref, "queue", plist);
- cl_page_get(page);
-}
-EXPORT_SYMBOL(cl_page_list_add);
-
-/**
- * Removes a page from a page list.
- */
-void cl_page_list_del(const struct lu_env *env, struct cl_page_list *plist,
- struct cl_page *page)
-{
- LASSERT(plist->pl_nr > 0);
- LASSERT(cl_page_is_vmlocked(env, page));
- LINVRNT(plist->pl_owner == current);
-
- list_del_init(&page->cp_batch);
- --plist->pl_nr;
- lu_ref_del_at(&page->cp_reference, &page->cp_queue_ref, "queue", plist);
- cl_page_put(env, page);
-}
-EXPORT_SYMBOL(cl_page_list_del);
-
-/**
- * Moves a page from one page list to another.
- */
-void cl_page_list_move(struct cl_page_list *dst, struct cl_page_list *src,
- struct cl_page *page)
-{
- LASSERT(src->pl_nr > 0);
- LINVRNT(dst->pl_owner == current);
- LINVRNT(src->pl_owner == current);
-
- list_move_tail(&page->cp_batch, &dst->pl_pages);
- --src->pl_nr;
- ++dst->pl_nr;
- lu_ref_set_at(&page->cp_reference, &page->cp_queue_ref, "queue",
- src, dst);
-}
-EXPORT_SYMBOL(cl_page_list_move);
-
-/**
- * Moves a page from one page list to the head of another list.
- */
-void cl_page_list_move_head(struct cl_page_list *dst, struct cl_page_list *src,
- struct cl_page *page)
-{
- LASSERT(src->pl_nr > 0);
- LINVRNT(dst->pl_owner == current);
- LINVRNT(src->pl_owner == current);
-
- list_move(&page->cp_batch, &dst->pl_pages);
- --src->pl_nr;
- ++dst->pl_nr;
- lu_ref_set_at(&page->cp_reference, &page->cp_queue_ref, "queue",
- src, dst);
-}
-EXPORT_SYMBOL(cl_page_list_move_head);
-
-/**
- * splice the cl_page_list, just as list head does
- */
-void cl_page_list_splice(struct cl_page_list *list, struct cl_page_list *head)
-{
- struct cl_page *page;
- struct cl_page *tmp;
-
- LINVRNT(list->pl_owner == current);
- LINVRNT(head->pl_owner == current);
-
- cl_page_list_for_each_safe(page, tmp, list)
- cl_page_list_move(head, list, page);
-}
-EXPORT_SYMBOL(cl_page_list_splice);
-
-
-/**
- * Disowns pages in a queue.
- */
-void cl_page_list_disown(const struct lu_env *env,
- struct cl_io *io, struct cl_page_list *plist)
-{
- struct cl_page *page;
- struct cl_page *temp;
-
- LINVRNT(plist->pl_owner == current);
-
- cl_page_list_for_each_safe(page, temp, plist) {
- LASSERT(plist->pl_nr > 0);
-
- list_del_init(&page->cp_batch);
- --plist->pl_nr;
- /*
- * cl_page_disown0 rather than usual cl_page_disown() is used,
- * because pages are possibly in CPS_FREEING state already due
- * to the call to cl_page_list_discard().
- */
- /*
- * XXX cl_page_disown0() will fail if page is not locked.
- */
- cl_page_disown0(env, io, page);
- lu_ref_del_at(&page->cp_reference, &page->cp_queue_ref, "queue",
- plist);
- cl_page_put(env, page);
- }
-}
-EXPORT_SYMBOL(cl_page_list_disown);
-
-/**
- * Releases pages from queue.
- */
-void cl_page_list_fini(const struct lu_env *env, struct cl_page_list *plist)
-{
- struct cl_page *page;
- struct cl_page *temp;
-
- LINVRNT(plist->pl_owner == current);
-
- cl_page_list_for_each_safe(page, temp, plist)
- cl_page_list_del(env, plist, page);
- LASSERT(plist->pl_nr == 0);
-}
-EXPORT_SYMBOL(cl_page_list_fini);
-
-/**
- * Assumes all pages in a queue.
- */
-static void cl_page_list_assume(const struct lu_env *env,
- struct cl_io *io, struct cl_page_list *plist)
-{
- struct cl_page *page;
-
- LINVRNT(plist->pl_owner == current);
-
- cl_page_list_for_each(page, plist)
- cl_page_assume(env, io, page);
-}
-
-/**
- * Discards all pages in a queue.
- */
-static void cl_page_list_discard(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *plist)
-{
- struct cl_page *page;
-
- LINVRNT(plist->pl_owner == current);
- cl_page_list_for_each(page, plist)
- cl_page_discard(env, io, page);
-}
-
-/**
- * Initialize dual page queue.
- */
-void cl_2queue_init(struct cl_2queue *queue)
-{
- cl_page_list_init(&queue->c2_qin);
- cl_page_list_init(&queue->c2_qout);
-}
-EXPORT_SYMBOL(cl_2queue_init);
-
-/**
- * Disown pages in both lists of a 2-queue.
- */
-void cl_2queue_disown(const struct lu_env *env,
- struct cl_io *io, struct cl_2queue *queue)
-{
- cl_page_list_disown(env, io, &queue->c2_qin);
- cl_page_list_disown(env, io, &queue->c2_qout);
-}
-EXPORT_SYMBOL(cl_2queue_disown);
-
-/**
- * Discard (truncate) pages in both lists of a 2-queue.
- */
-void cl_2queue_discard(const struct lu_env *env,
- struct cl_io *io, struct cl_2queue *queue)
-{
- cl_page_list_discard(env, io, &queue->c2_qin);
- cl_page_list_discard(env, io, &queue->c2_qout);
-}
-EXPORT_SYMBOL(cl_2queue_discard);
-
-/**
- * Finalize both page lists of a 2-queue.
- */
-void cl_2queue_fini(const struct lu_env *env, struct cl_2queue *queue)
-{
- cl_page_list_fini(env, &queue->c2_qout);
- cl_page_list_fini(env, &queue->c2_qin);
-}
-EXPORT_SYMBOL(cl_2queue_fini);
-
-/**
- * Initialize a 2-queue to contain \a page in its incoming page list.
- */
-void cl_2queue_init_page(struct cl_2queue *queue, struct cl_page *page)
-{
- cl_2queue_init(queue);
- /*
- * Add a page to the incoming page list of 2-queue.
- */
- cl_page_list_add(&queue->c2_qin, page);
-}
-EXPORT_SYMBOL(cl_2queue_init_page);
-
-/**
- * Returns top-level io.
- *
- * \see cl_object_top()
- */
-struct cl_io *cl_io_top(struct cl_io *io)
-{
- while (io->ci_parent)
- io = io->ci_parent;
- return io;
-}
-EXPORT_SYMBOL(cl_io_top);
-
-/**
- * Fills in attributes that are passed to server together with transfer. Only
- * attributes from \a flags may be touched. This can be called multiple times
- * for the same request.
- */
-void cl_req_attr_set(const struct lu_env *env, struct cl_object *obj,
- struct cl_req_attr *attr)
-{
- struct cl_object *scan;
-
- cl_object_for_each(scan, obj) {
- if (scan->co_ops->coo_req_attr_set)
- scan->co_ops->coo_req_attr_set(env, scan, attr);
- }
-}
-EXPORT_SYMBOL(cl_req_attr_set);
-
-/* cl_sync_io_callback assumes the caller must call cl_sync_io_wait() to
- * wait for the IO to finish.
- */
-void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor)
-{
- wake_up_all(&anchor->csi_waitq);
-
- /* it's safe to nuke or reuse anchor now */
- atomic_set(&anchor->csi_barrier, 0);
-}
-EXPORT_SYMBOL(cl_sync_io_end);
-
-/**
- * Initialize synchronous io wait anchor
- */
-void cl_sync_io_init(struct cl_sync_io *anchor, int nr,
- void (*end)(const struct lu_env *, struct cl_sync_io *))
-{
- memset(anchor, 0, sizeof(*anchor));
- init_waitqueue_head(&anchor->csi_waitq);
- atomic_set(&anchor->csi_sync_nr, nr);
- atomic_set(&anchor->csi_barrier, nr > 0);
- anchor->csi_sync_rc = 0;
- anchor->csi_end_io = end;
- LASSERT(end);
-}
-EXPORT_SYMBOL(cl_sync_io_init);
-
-/**
- * Wait until all IO completes. Transfer completion routine has to call
- * cl_sync_io_note() for every entity.
- */
-int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor,
- long timeout)
-{
- int rc = 1;
-
- LASSERT(timeout >= 0);
-
- if (timeout == 0)
- wait_event_idle(anchor->csi_waitq,
- atomic_read(&anchor->csi_sync_nr) == 0);
- else
- rc = wait_event_idle_timeout(anchor->csi_waitq,
- atomic_read(&anchor->csi_sync_nr) == 0,
- timeout * HZ);
- if (rc == 0) {
- rc = -ETIMEDOUT;
- CERROR("IO failed: %d, still wait for %d remaining entries\n",
- rc, atomic_read(&anchor->csi_sync_nr));
-
- wait_event_idle(anchor->csi_waitq,
- atomic_read(&anchor->csi_sync_nr) == 0);
- } else {
- rc = anchor->csi_sync_rc;
- }
- LASSERT(atomic_read(&anchor->csi_sync_nr) == 0);
-
- /* wait until cl_sync_io_note() has done wakeup */
- while (unlikely(atomic_read(&anchor->csi_barrier) != 0))
- cpu_relax();
-
-
- return rc;
-}
-EXPORT_SYMBOL(cl_sync_io_wait);
-
-/**
- * Indicate that transfer of a single page completed.
- */
-void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor,
- int ioret)
-{
- if (anchor->csi_sync_rc == 0 && ioret < 0)
- anchor->csi_sync_rc = ioret;
- /*
- * Synchronous IO done without releasing page lock (e.g., as a part of
- * ->{prepare,commit}_write(). Completion is used to signal the end of
- * IO.
- */
- LASSERT(atomic_read(&anchor->csi_sync_nr) > 0);
- if (atomic_dec_and_test(&anchor->csi_sync_nr)) {
- LASSERT(anchor->csi_end_io);
- anchor->csi_end_io(env, anchor);
- /* Can't access anchor any more */
- }
-}
-EXPORT_SYMBOL(cl_sync_io_note);
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_lock.c b/drivers/staging/lustre/lustre/obdclass/cl_lock.c
deleted file mode 100644
index 9ca29a26a38b..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/cl_lock.c
+++ /dev/null
@@ -1,275 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Client Extent Lock.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Jinshan Xiong <jinshan.xiong@intel.com>
- */
-
-#define DEBUG_SUBSYSTEM S_CLASS
-
-#include <obd_class.h>
-#include <obd_support.h>
-#include <lustre_fid.h>
-#include <linux/list.h>
-#include <cl_object.h>
-#include "cl_internal.h"
-
-static void cl_lock_trace0(int level, const struct lu_env *env,
- const char *prefix, const struct cl_lock *lock,
- const char *func, const int line)
-{
- struct cl_object_header *h = cl_object_header(lock->cll_descr.cld_obj);
-
- CDEBUG(level, "%s: %p (%p/%d) at %s():%d\n",
- prefix, lock, env, h->coh_nesting, func, line);
-}
-#define cl_lock_trace(level, env, prefix, lock) \
- cl_lock_trace0(level, env, prefix, lock, __func__, __LINE__)
-
-/**
- * Adds lock slice to the compound lock.
- *
- * This is called by cl_object_operations::coo_lock_init() methods to add a
- * per-layer state to the lock. New state is added at the end of
- * cl_lock::cll_layers list, that is, it is at the bottom of the stack.
- *
- * \see cl_req_slice_add(), cl_page_slice_add(), cl_io_slice_add()
- */
-void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice,
- struct cl_object *obj,
- const struct cl_lock_operations *ops)
-{
- slice->cls_lock = lock;
- list_add_tail(&slice->cls_linkage, &lock->cll_layers);
- slice->cls_obj = obj;
- slice->cls_ops = ops;
-}
-EXPORT_SYMBOL(cl_lock_slice_add);
-
-void cl_lock_fini(const struct lu_env *env, struct cl_lock *lock)
-{
- struct cl_lock_slice *slice;
- cl_lock_trace(D_DLMTRACE, env, "destroy lock", lock);
-
- while ((slice = list_first_entry_or_null(&lock->cll_layers,
- struct cl_lock_slice,
- cls_linkage)) != NULL) {
- list_del_init(lock->cll_layers.next);
- slice->cls_ops->clo_fini(env, slice);
- }
- POISON(lock, 0x5a, sizeof(*lock));
-}
-EXPORT_SYMBOL(cl_lock_fini);
-
-int cl_lock_init(const struct lu_env *env, struct cl_lock *lock,
- const struct cl_io *io)
-{
- struct cl_object *obj = lock->cll_descr.cld_obj;
- struct cl_object *scan;
- int result = 0;
-
- /* Make sure cl_lock::cll_descr is initialized. */
- LASSERT(obj);
-
- INIT_LIST_HEAD(&lock->cll_layers);
- list_for_each_entry(scan, &obj->co_lu.lo_header->loh_layers,
- co_lu.lo_linkage) {
- result = scan->co_ops->coo_lock_init(env, scan, lock, io);
- if (result != 0) {
- cl_lock_fini(env, lock);
- break;
- }
- }
-
- return result;
-}
-EXPORT_SYMBOL(cl_lock_init);
-
-/**
- * Returns a slice with a lock, corresponding to the given layer in the
- * device stack.
- *
- * \see cl_page_at()
- */
-const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
- const struct lu_device_type *dtype)
-{
- const struct cl_lock_slice *slice;
-
- list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
- if (slice->cls_obj->co_lu.lo_dev->ld_type == dtype)
- return slice;
- }
- return NULL;
-}
-EXPORT_SYMBOL(cl_lock_at);
-
-void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock)
-{
- const struct cl_lock_slice *slice;
-
- cl_lock_trace(D_DLMTRACE, env, "cancel lock", lock);
- list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) {
- if (slice->cls_ops->clo_cancel)
- slice->cls_ops->clo_cancel(env, slice);
- }
-}
-EXPORT_SYMBOL(cl_lock_cancel);
-
-/**
- * Enqueue a lock.
- * \param anchor: if we need to wait for resources before getting the lock,
- * use @anchor for the purpose.
- * \retval 0 enqueue successfully
- * \retval <0 error code
- */
-int cl_lock_enqueue(const struct lu_env *env, struct cl_io *io,
- struct cl_lock *lock, struct cl_sync_io *anchor)
-{
- const struct cl_lock_slice *slice;
- int rc = -ENOSYS;
-
- list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
- if (!slice->cls_ops->clo_enqueue)
- continue;
-
- rc = slice->cls_ops->clo_enqueue(env, slice, io, anchor);
- if (rc != 0)
- break;
- }
- return rc;
-}
-EXPORT_SYMBOL(cl_lock_enqueue);
-
-/**
- * Main high-level entry point of cl_lock interface that finds existing or
- * enqueues new lock matching given description.
- */
-int cl_lock_request(const struct lu_env *env, struct cl_io *io,
- struct cl_lock *lock)
-{
- struct cl_sync_io *anchor = NULL;
- __u32 enq_flags = lock->cll_descr.cld_enq_flags;
- int rc;
-
- rc = cl_lock_init(env, lock, io);
- if (rc < 0)
- return rc;
-
- if ((enq_flags & CEF_ASYNC) && !(enq_flags & CEF_AGL)) {
- anchor = &cl_env_info(env)->clt_anchor;
- cl_sync_io_init(anchor, 1, cl_sync_io_end);
- }
-
- rc = cl_lock_enqueue(env, io, lock, anchor);
-
- if (anchor) {
- int rc2;
-
- /* drop the reference count held at initialization time */
- cl_sync_io_note(env, anchor, 0);
- rc2 = cl_sync_io_wait(env, anchor, 0);
- if (rc2 < 0 && rc == 0)
- rc = rc2;
- }
-
- if (rc < 0)
- cl_lock_release(env, lock);
-
- return rc;
-}
-EXPORT_SYMBOL(cl_lock_request);
-
-/**
- * Releases a hold and a reference on a lock, obtained by cl_lock_hold().
- */
-void cl_lock_release(const struct lu_env *env, struct cl_lock *lock)
-{
- cl_lock_trace(D_DLMTRACE, env, "release lock", lock);
- cl_lock_cancel(env, lock);
- cl_lock_fini(env, lock);
-}
-EXPORT_SYMBOL(cl_lock_release);
-
-const char *cl_lock_mode_name(const enum cl_lock_mode mode)
-{
- static const char * const names[] = {
- [CLM_READ] = "R",
- [CLM_WRITE] = "W",
- [CLM_GROUP] = "G"
- };
- if (0 <= mode && mode < ARRAY_SIZE(names))
- return names[mode];
- else
- return "U";
-}
-EXPORT_SYMBOL(cl_lock_mode_name);
-
-/**
- * Prints human readable representation of a lock description.
- */
-void cl_lock_descr_print(const struct lu_env *env, void *cookie,
- lu_printer_t printer,
- const struct cl_lock_descr *descr)
-{
- const struct lu_fid *fid;
-
- fid = lu_object_fid(&descr->cld_obj->co_lu);
- (*printer)(env, cookie, DDESCR "@" DFID, PDESCR(descr), PFID(fid));
-}
-EXPORT_SYMBOL(cl_lock_descr_print);
-
-/**
- * Prints human readable representation of \a lock to the \a f.
- */
-void cl_lock_print(const struct lu_env *env, void *cookie,
- lu_printer_t printer, const struct cl_lock *lock)
-{
- const struct cl_lock_slice *slice;
-
- (*printer)(env, cookie, "lock@%p", lock);
- cl_lock_descr_print(env, cookie, printer, &lock->cll_descr);
- (*printer)(env, cookie, " {\n");
-
- list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
- (*printer)(env, cookie, " %s@%p: ",
- slice->cls_obj->co_lu.lo_dev->ld_type->ldt_name,
- slice);
- if (slice->cls_ops->clo_print)
- slice->cls_ops->clo_print(env, cookie, printer, slice);
- (*printer)(env, cookie, "\n");
- }
- (*printer)(env, cookie, "} lock@%p\n", lock);
-}
-EXPORT_SYMBOL(cl_lock_print);
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_object.c b/drivers/staging/lustre/lustre/obdclass/cl_object.c
deleted file mode 100644
index 42cce2dc5a45..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/cl_object.c
+++ /dev/null
@@ -1,1059 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Client Lustre Object.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Jinshan Xiong <jinshan.xiong@intel.com>
- */
-
-/*
- * Locking.
- *
- * i_mutex
- * PG_locked
- * ->coh_attr_guard
- * ->ls_guard
- */
-
-#define DEBUG_SUBSYSTEM S_CLASS
-
-/* class_put_type() */
-#include <obd_class.h>
-#include <obd_support.h>
-#include <lustre_fid.h>
-#include <linux/list.h>
-#include <cl_object.h>
-#include <lu_object.h>
-#include "cl_internal.h"
-
-static struct kmem_cache *cl_env_kmem;
-
-/** Lock class of cl_object_header::coh_attr_guard */
-static struct lock_class_key cl_attr_guard_class;
-
-/**
- * Initialize cl_object_header.
- */
-int cl_object_header_init(struct cl_object_header *h)
-{
- int result;
-
- result = lu_object_header_init(&h->coh_lu);
- if (result == 0) {
- spin_lock_init(&h->coh_attr_guard);
- lockdep_set_class(&h->coh_attr_guard, &cl_attr_guard_class);
- h->coh_page_bufsize = 0;
- }
- return result;
-}
-EXPORT_SYMBOL(cl_object_header_init);
-
-/**
- * Returns a cl_object with a given \a fid.
- *
- * Returns either cached or newly created object. Additional reference on the
- * returned object is acquired.
- *
- * \see lu_object_find(), cl_page_find(), cl_lock_find()
- */
-struct cl_object *cl_object_find(const struct lu_env *env,
- struct cl_device *cd, const struct lu_fid *fid,
- const struct cl_object_conf *c)
-{
- might_sleep();
- return lu2cl(lu_object_find_slice(env, cl2lu_dev(cd), fid, &c->coc_lu));
-}
-EXPORT_SYMBOL(cl_object_find);
-
-/**
- * Releases a reference on \a o.
- *
- * When last reference is released object is returned to the cache, unless
- * lu_object_header_flags::LU_OBJECT_HEARD_BANSHEE bit is set in its header.
- *
- * \see cl_page_put(), cl_lock_put().
- */
-void cl_object_put(const struct lu_env *env, struct cl_object *o)
-{
- lu_object_put(env, &o->co_lu);
-}
-EXPORT_SYMBOL(cl_object_put);
-
-/**
- * Acquire an additional reference to the object \a o.
- *
- * This can only be used to acquire _additional_ reference, i.e., caller
- * already has to possess at least one reference to \a o before calling this.
- *
- * \see cl_page_get(), cl_lock_get().
- */
-void cl_object_get(struct cl_object *o)
-{
- lu_object_get(&o->co_lu);
-}
-EXPORT_SYMBOL(cl_object_get);
-
-/**
- * Returns the top-object for a given \a o.
- *
- * \see cl_io_top()
- */
-struct cl_object *cl_object_top(struct cl_object *o)
-{
- struct cl_object_header *hdr = cl_object_header(o);
- struct cl_object *top;
-
- while (hdr->coh_parent)
- hdr = hdr->coh_parent;
-
- top = lu2cl(lu_object_top(&hdr->coh_lu));
- CDEBUG(D_TRACE, "%p -> %p\n", o, top);
- return top;
-}
-EXPORT_SYMBOL(cl_object_top);
-
-/**
- * Returns pointer to the lock protecting data-attributes for the given object
- * \a o.
- *
- * Data-attributes are protected by the cl_object_header::coh_attr_guard
- * spin-lock in the top-object.
- *
- * \see cl_attr, cl_object_attr_lock(), cl_object_operations::coo_attr_get().
- */
-static spinlock_t *cl_object_attr_guard(struct cl_object *o)
-{
- return &cl_object_header(cl_object_top(o))->coh_attr_guard;
-}
-
-/**
- * Locks data-attributes.
- *
- * Prevents data-attributes from changing, until lock is released by
- * cl_object_attr_unlock(). This has to be called before calls to
- * cl_object_attr_get(), cl_object_attr_update().
- */
-void cl_object_attr_lock(struct cl_object *o)
- __acquires(cl_object_attr_guard(o))
-{
- spin_lock(cl_object_attr_guard(o));
-}
-EXPORT_SYMBOL(cl_object_attr_lock);
-
-/**
- * Releases data-attributes lock, acquired by cl_object_attr_lock().
- */
-void cl_object_attr_unlock(struct cl_object *o)
- __releases(cl_object_attr_guard(o))
-{
- spin_unlock(cl_object_attr_guard(o));
-}
-EXPORT_SYMBOL(cl_object_attr_unlock);
-
-/**
- * Returns data-attributes of an object \a obj.
- *
- * Every layer is asked (by calling cl_object_operations::coo_attr_get())
- * top-to-bottom to fill in parts of \a attr that this layer is responsible
- * for.
- */
-int cl_object_attr_get(const struct lu_env *env, struct cl_object *obj,
- struct cl_attr *attr)
-{
- struct lu_object_header *top;
- int result;
-
- assert_spin_locked(cl_object_attr_guard(obj));
-
- top = obj->co_lu.lo_header;
- result = 0;
- list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) {
- if (obj->co_ops->coo_attr_get) {
- result = obj->co_ops->coo_attr_get(env, obj, attr);
- if (result != 0) {
- if (result > 0)
- result = 0;
- break;
- }
- }
- }
- return result;
-}
-EXPORT_SYMBOL(cl_object_attr_get);
-
-/**
- * Updates data-attributes of an object \a obj.
- *
- * Only attributes, mentioned in a validness bit-mask \a v are
- * updated. Calls cl_object_operations::coo_attr_update() on every layer,
- * bottom to top.
- */
-int cl_object_attr_update(const struct lu_env *env, struct cl_object *obj,
- const struct cl_attr *attr, unsigned int v)
-{
- struct lu_object_header *top;
- int result;
-
- assert_spin_locked(cl_object_attr_guard(obj));
-
- top = obj->co_lu.lo_header;
- result = 0;
- list_for_each_entry_reverse(obj, &top->loh_layers, co_lu.lo_linkage) {
- if (obj->co_ops->coo_attr_update) {
- result = obj->co_ops->coo_attr_update(env, obj, attr,
- v);
- if (result != 0) {
- if (result > 0)
- result = 0;
- break;
- }
- }
- }
- return result;
-}
-EXPORT_SYMBOL(cl_object_attr_update);
-
-/**
- * Notifies layers (bottom-to-top) that glimpse AST was received.
- *
- * Layers have to fill \a lvb fields with information that will be shipped
- * back to glimpse issuer.
- *
- * \see cl_lock_operations::clo_glimpse()
- */
-int cl_object_glimpse(const struct lu_env *env, struct cl_object *obj,
- struct ost_lvb *lvb)
-{
- struct lu_object_header *top;
- int result;
-
- top = obj->co_lu.lo_header;
- result = 0;
- list_for_each_entry_reverse(obj, &top->loh_layers, co_lu.lo_linkage) {
- if (obj->co_ops->coo_glimpse) {
- result = obj->co_ops->coo_glimpse(env, obj, lvb);
- if (result != 0)
- break;
- }
- }
- LU_OBJECT_HEADER(D_DLMTRACE, env, lu_object_top(top),
- "size: %llu mtime: %llu atime: %llu ctime: %llu blocks: %llu\n",
- lvb->lvb_size, lvb->lvb_mtime, lvb->lvb_atime,
- lvb->lvb_ctime, lvb->lvb_blocks);
- return result;
-}
-EXPORT_SYMBOL(cl_object_glimpse);
-
-/**
- * Updates a configuration of an object \a obj.
- */
-int cl_conf_set(const struct lu_env *env, struct cl_object *obj,
- const struct cl_object_conf *conf)
-{
- struct lu_object_header *top;
- int result;
-
- top = obj->co_lu.lo_header;
- result = 0;
- list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) {
- if (obj->co_ops->coo_conf_set) {
- result = obj->co_ops->coo_conf_set(env, obj, conf);
- if (result != 0)
- break;
- }
- }
- return result;
-}
-EXPORT_SYMBOL(cl_conf_set);
-
-/**
- * Prunes caches of pages and locks for this object.
- */
-int cl_object_prune(const struct lu_env *env, struct cl_object *obj)
-{
- struct lu_object_header *top;
- struct cl_object *o;
- int result;
-
- top = obj->co_lu.lo_header;
- result = 0;
- list_for_each_entry(o, &top->loh_layers, co_lu.lo_linkage) {
- if (o->co_ops->coo_prune) {
- result = o->co_ops->coo_prune(env, o);
- if (result != 0)
- break;
- }
- }
-
- return result;
-}
-EXPORT_SYMBOL(cl_object_prune);
-
-/**
- * Get stripe information of this object.
- */
-int cl_object_getstripe(const struct lu_env *env, struct cl_object *obj,
- struct lov_user_md __user *uarg)
-{
- struct lu_object_header *top;
- int result = 0;
-
- top = obj->co_lu.lo_header;
- list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) {
- if (obj->co_ops->coo_getstripe) {
- result = obj->co_ops->coo_getstripe(env, obj, uarg);
- if (result)
- break;
- }
- }
- return result;
-}
-EXPORT_SYMBOL(cl_object_getstripe);
-
-/**
- * Get fiemap extents from file object.
- *
- * \param env [in] lustre environment
- * \param obj [in] file object
- * \param key [in] fiemap request argument
- * \param fiemap [out] fiemap extents mapping retrived
- * \param buflen [in] max buffer length of @fiemap
- *
- * \retval 0 success
- * \retval < 0 error
- */
-int cl_object_fiemap(const struct lu_env *env, struct cl_object *obj,
- struct ll_fiemap_info_key *key,
- struct fiemap *fiemap, size_t *buflen)
-{
- struct lu_object_header *top;
- int result = 0;
-
- top = obj->co_lu.lo_header;
- list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) {
- if (obj->co_ops->coo_fiemap) {
- result = obj->co_ops->coo_fiemap(env, obj, key, fiemap,
- buflen);
- if (result)
- break;
- }
- }
- return result;
-}
-EXPORT_SYMBOL(cl_object_fiemap);
-
-int cl_object_layout_get(const struct lu_env *env, struct cl_object *obj,
- struct cl_layout *cl)
-{
- struct lu_object_header *top = obj->co_lu.lo_header;
-
- list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) {
- if (obj->co_ops->coo_layout_get)
- return obj->co_ops->coo_layout_get(env, obj, cl);
- }
-
- return -EOPNOTSUPP;
-}
-EXPORT_SYMBOL(cl_object_layout_get);
-
-loff_t cl_object_maxbytes(struct cl_object *obj)
-{
- struct lu_object_header *top = obj->co_lu.lo_header;
- loff_t maxbytes = LLONG_MAX;
-
- list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) {
- if (obj->co_ops->coo_maxbytes)
- maxbytes = min_t(loff_t, obj->co_ops->coo_maxbytes(obj),
- maxbytes);
- }
-
- return maxbytes;
-}
-EXPORT_SYMBOL(cl_object_maxbytes);
-
-/**
- * Helper function removing all object locks, and marking object for
- * deletion. All object pages must have been deleted at this point.
- *
- * This is called by cl_inode_fini() and lov_object_delete() to destroy top-
- * and sub- objects respectively.
- */
-void cl_object_kill(const struct lu_env *env, struct cl_object *obj)
-{
- struct cl_object_header *hdr = cl_object_header(obj);
-
- set_bit(LU_OBJECT_HEARD_BANSHEE, &hdr->coh_lu.loh_flags);
-}
-EXPORT_SYMBOL(cl_object_kill);
-
-void cache_stats_init(struct cache_stats *cs, const char *name)
-{
- int i;
-
- cs->cs_name = name;
- for (i = 0; i < CS_NR; i++)
- atomic_set(&cs->cs_stats[i], 0);
-}
-
-static int cache_stats_print(const struct cache_stats *cs,
- struct seq_file *m, int h)
-{
- int i;
- /*
- * lookup hit total cached create
- * env: ...... ...... ...... ...... ......
- */
- if (h) {
- const char *names[CS_NR] = CS_NAMES;
-
- seq_printf(m, "%6s", " ");
- for (i = 0; i < CS_NR; i++)
- seq_printf(m, "%8s", names[i]);
- seq_printf(m, "\n");
- }
-
- seq_printf(m, "%5.5s:", cs->cs_name);
- for (i = 0; i < CS_NR; i++)
- seq_printf(m, "%8u", atomic_read(&cs->cs_stats[i]));
- return 0;
-}
-
-static void cl_env_percpu_refill(void);
-
-/**
- * Initialize client site.
- *
- * Perform common initialization (lu_site_init()), and initialize statistical
- * counters. Also perform global initializations on the first call.
- */
-int cl_site_init(struct cl_site *s, struct cl_device *d)
-{
- size_t i;
- int result;
-
- result = lu_site_init(&s->cs_lu, &d->cd_lu_dev);
- if (result == 0) {
- cache_stats_init(&s->cs_pages, "pages");
- for (i = 0; i < ARRAY_SIZE(s->cs_pages_state); ++i)
- atomic_set(&s->cs_pages_state[0], 0);
- cl_env_percpu_refill();
- }
- return result;
-}
-EXPORT_SYMBOL(cl_site_init);
-
-/**
- * Finalize client site. Dual to cl_site_init().
- */
-void cl_site_fini(struct cl_site *s)
-{
- lu_site_fini(&s->cs_lu);
-}
-EXPORT_SYMBOL(cl_site_fini);
-
-static struct cache_stats cl_env_stats = {
- .cs_name = "envs",
- .cs_stats = { ATOMIC_INIT(0), }
-};
-
-/**
- * Outputs client site statistical counters into a buffer. Suitable for
- * ll_rd_*()-style functions.
- */
-int cl_site_stats_print(const struct cl_site *site, struct seq_file *m)
-{
- size_t i;
- static const char * const pstate[] = {
- [CPS_CACHED] = "c",
- [CPS_OWNED] = "o",
- [CPS_PAGEOUT] = "w",
- [CPS_PAGEIN] = "r",
- [CPS_FREEING] = "f"
- };
-/*
- lookup hit total busy create
-pages: ...... ...... ...... ...... ...... [...... ...... ...... ......]
-locks: ...... ...... ...... ...... ...... [...... ...... ...... ...... ......]
- env: ...... ...... ...... ...... ......
- */
- lu_site_stats_print(&site->cs_lu, m);
- cache_stats_print(&site->cs_pages, m, 1);
- seq_puts(m, " [");
- for (i = 0; i < ARRAY_SIZE(site->cs_pages_state); ++i)
- seq_printf(m, "%s: %u ", pstate[i],
- atomic_read(&site->cs_pages_state[i]));
- seq_puts(m, "]\n");
- cache_stats_print(&cl_env_stats, m, 0);
- seq_puts(m, "\n");
- return 0;
-}
-EXPORT_SYMBOL(cl_site_stats_print);
-
-/*****************************************************************************
- *
- * lu_env handling on client.
- *
- */
-
-/**
- * The most efficient way is to store cl_env pointer in task specific
- * structures. On Linux, it wont' be easy to use task_struct->journal_info
- * because Lustre code may call into other fs which has certain assumptions
- * about journal_info. Currently following fields in task_struct are identified
- * can be used for this purpose:
- * - tux_info: only on RedHat kernel.
- * - ...
- * \note As long as we use task_struct to store cl_env, we assume that once
- * called into Lustre, we'll never call into the other part of the kernel
- * which will use those fields in task_struct without explicitly exiting
- * Lustre.
- *
- * If there's no space in task_struct is available, hash will be used.
- * bz20044, bz22683.
- */
-
-static unsigned int cl_envs_cached_max = 32; /* XXX: prototype: arbitrary limit
- * for now.
- */
-static struct cl_env_cache {
- rwlock_t cec_guard;
- unsigned int cec_count;
- struct list_head cec_envs;
-} *cl_envs = NULL;
-
-struct cl_env {
- void *ce_magic;
- struct lu_env ce_lu;
- struct lu_context ce_ses;
-
- /*
- * Linkage into global list of all client environments. Used for
- * garbage collection.
- */
- struct list_head ce_linkage;
- /*
- *
- */
- int ce_ref;
- /*
- * Debugging field: address of the caller who made original
- * allocation.
- */
- void *ce_debug;
-};
-
-#define CL_ENV_INC(counter)
-#define CL_ENV_DEC(counter)
-
-static void cl_env_init0(struct cl_env *cle, void *debug)
-{
- LASSERT(cle->ce_ref == 0);
- LASSERT(cle->ce_magic == &cl_env_init0);
- LASSERT(!cle->ce_debug);
-
- cle->ce_ref = 1;
- cle->ce_debug = debug;
- CL_ENV_INC(busy);
-}
-
-static struct lu_env *cl_env_new(__u32 ctx_tags, __u32 ses_tags, void *debug)
-{
- struct lu_env *env;
- struct cl_env *cle;
-
- cle = kmem_cache_zalloc(cl_env_kmem, GFP_NOFS);
- if (cle) {
- int rc;
-
- INIT_LIST_HEAD(&cle->ce_linkage);
- cle->ce_magic = &cl_env_init0;
- env = &cle->ce_lu;
- rc = lu_env_init(env, ctx_tags | LCT_CL_THREAD);
- if (rc == 0) {
- rc = lu_context_init(&cle->ce_ses,
- ses_tags | LCT_SESSION);
- if (rc == 0) {
- lu_context_enter(&cle->ce_ses);
- env->le_ses = &cle->ce_ses;
- cl_env_init0(cle, debug);
- } else {
- lu_env_fini(env);
- }
- }
- if (rc != 0) {
- kmem_cache_free(cl_env_kmem, cle);
- env = ERR_PTR(rc);
- } else {
- CL_ENV_INC(create);
- CL_ENV_INC(total);
- }
- } else {
- env = ERR_PTR(-ENOMEM);
- }
- return env;
-}
-
-static void cl_env_fini(struct cl_env *cle)
-{
- CL_ENV_DEC(total);
- lu_context_fini(&cle->ce_lu.le_ctx);
- lu_context_fini(&cle->ce_ses);
- kmem_cache_free(cl_env_kmem, cle);
-}
-
-static struct lu_env *cl_env_obtain(void *debug)
-{
- struct cl_env *cle;
- struct lu_env *env;
- int cpu = get_cpu();
-
- read_lock(&cl_envs[cpu].cec_guard);
- LASSERT(equi(cl_envs[cpu].cec_count == 0,
- list_empty(&cl_envs[cpu].cec_envs)));
- if (cl_envs[cpu].cec_count > 0) {
- int rc;
-
- cle = container_of(cl_envs[cpu].cec_envs.next, struct cl_env,
- ce_linkage);
- list_del_init(&cle->ce_linkage);
- cl_envs[cpu].cec_count--;
- read_unlock(&cl_envs[cpu].cec_guard);
- put_cpu();
-
- env = &cle->ce_lu;
- rc = lu_env_refill(env);
- if (rc == 0) {
- cl_env_init0(cle, debug);
- lu_context_enter(&env->le_ctx);
- lu_context_enter(&cle->ce_ses);
- } else {
- cl_env_fini(cle);
- env = ERR_PTR(rc);
- }
- } else {
- read_unlock(&cl_envs[cpu].cec_guard);
- put_cpu();
- env = cl_env_new(lu_context_tags_default,
- lu_session_tags_default, debug);
- }
- return env;
-}
-
-static inline struct cl_env *cl_env_container(struct lu_env *env)
-{
- return container_of(env, struct cl_env, ce_lu);
-}
-
-/**
- * Returns lu_env: if there already is an environment associated with the
- * current thread, it is returned, otherwise, new environment is allocated.
- *
- * Allocations are amortized through the global cache of environments.
- *
- * \param refcheck pointer to a counter used to detect environment leaks. In
- * the usual case cl_env_get() and cl_env_put() are called in the same lexical
- * scope and pointer to the same integer is passed as \a refcheck. This is
- * used to detect missed cl_env_put().
- *
- * \see cl_env_put()
- */
-struct lu_env *cl_env_get(u16 *refcheck)
-{
- struct lu_env *env;
-
- env = cl_env_obtain(__builtin_return_address(0));
- if (!IS_ERR(env)) {
- struct cl_env *cle;
-
- cle = cl_env_container(env);
- *refcheck = cle->ce_ref;
- CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle);
- }
- return env;
-}
-EXPORT_SYMBOL(cl_env_get);
-
-/**
- * Forces an allocation of a fresh environment with given tags.
- *
- * \see cl_env_get()
- */
-struct lu_env *cl_env_alloc(u16 *refcheck, u32 tags)
-{
- struct lu_env *env;
-
- env = cl_env_new(tags, tags, __builtin_return_address(0));
- if (!IS_ERR(env)) {
- struct cl_env *cle;
-
- cle = cl_env_container(env);
- *refcheck = cle->ce_ref;
- CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle);
- }
- return env;
-}
-EXPORT_SYMBOL(cl_env_alloc);
-
-static void cl_env_exit(struct cl_env *cle)
-{
- lu_context_exit(&cle->ce_lu.le_ctx);
- lu_context_exit(&cle->ce_ses);
-}
-
-/**
- * Finalizes and frees a given number of cached environments. This is done to
- * (1) free some memory (not currently hooked into VM), or (2) release
- * references to modules.
- */
-unsigned int cl_env_cache_purge(unsigned int nr)
-{
- struct cl_env *cle;
- unsigned int i;
-
- for_each_possible_cpu(i) {
- write_lock(&cl_envs[i].cec_guard);
- for (; !list_empty(&cl_envs[i].cec_envs) && nr > 0; --nr) {
- cle = container_of(cl_envs[i].cec_envs.next,
- struct cl_env, ce_linkage);
- list_del_init(&cle->ce_linkage);
- LASSERT(cl_envs[i].cec_count > 0);
- cl_envs[i].cec_count--;
- write_unlock(&cl_envs[i].cec_guard);
-
- cl_env_fini(cle);
- write_lock(&cl_envs[i].cec_guard);
- }
- LASSERT(equi(cl_envs[i].cec_count == 0,
- list_empty(&cl_envs[i].cec_envs)));
- write_unlock(&cl_envs[i].cec_guard);
- }
- return nr;
-}
-EXPORT_SYMBOL(cl_env_cache_purge);
-
-/**
- * Release an environment.
- *
- * Decrement \a env reference counter. When counter drops to 0, nothing in
- * this thread is using environment and it is returned to the allocation
- * cache, or freed straight away, if cache is large enough.
- */
-void cl_env_put(struct lu_env *env, u16 *refcheck)
-{
- struct cl_env *cle;
-
- cle = cl_env_container(env);
-
- LASSERT(cle->ce_ref > 0);
- LASSERT(ergo(refcheck, cle->ce_ref == *refcheck));
-
- CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle);
- if (--cle->ce_ref == 0) {
- int cpu = get_cpu();
-
- CL_ENV_DEC(busy);
- cle->ce_debug = NULL;
- cl_env_exit(cle);
- /*
- * Don't bother to take a lock here.
- *
- * Return environment to the cache only when it was allocated
- * with the standard tags.
- */
- if (cl_envs[cpu].cec_count < cl_envs_cached_max &&
- (env->le_ctx.lc_tags & ~LCT_HAS_EXIT) == LCT_CL_THREAD &&
- (env->le_ses->lc_tags & ~LCT_HAS_EXIT) == LCT_SESSION) {
- read_lock(&cl_envs[cpu].cec_guard);
- list_add(&cle->ce_linkage, &cl_envs[cpu].cec_envs);
- cl_envs[cpu].cec_count++;
- read_unlock(&cl_envs[cpu].cec_guard);
- } else {
- cl_env_fini(cle);
- }
- put_cpu();
- }
-}
-EXPORT_SYMBOL(cl_env_put);
-
-/**
- * Converts struct ost_lvb to struct cl_attr.
- *
- * \see cl_attr2lvb
- */
-void cl_lvb2attr(struct cl_attr *attr, const struct ost_lvb *lvb)
-{
- attr->cat_size = lvb->lvb_size;
- attr->cat_mtime = lvb->lvb_mtime;
- attr->cat_atime = lvb->lvb_atime;
- attr->cat_ctime = lvb->lvb_ctime;
- attr->cat_blocks = lvb->lvb_blocks;
-}
-EXPORT_SYMBOL(cl_lvb2attr);
-
-static struct cl_env cl_env_percpu[NR_CPUS];
-
-static int cl_env_percpu_init(void)
-{
- struct cl_env *cle;
- int tags = LCT_REMEMBER | LCT_NOREF;
- int i, j;
- int rc = 0;
-
- for_each_possible_cpu(i) {
- struct lu_env *env;
-
- rwlock_init(&cl_envs[i].cec_guard);
- INIT_LIST_HEAD(&cl_envs[i].cec_envs);
- cl_envs[i].cec_count = 0;
-
- cle = &cl_env_percpu[i];
- env = &cle->ce_lu;
-
- INIT_LIST_HEAD(&cle->ce_linkage);
- cle->ce_magic = &cl_env_init0;
- rc = lu_env_init(env, LCT_CL_THREAD | tags);
- if (rc == 0) {
- rc = lu_context_init(&cle->ce_ses, LCT_SESSION | tags);
- if (rc == 0) {
- lu_context_enter(&cle->ce_ses);
- env->le_ses = &cle->ce_ses;
- } else {
- lu_env_fini(env);
- }
- }
- if (rc != 0)
- break;
- }
- if (rc != 0) {
- /* Indices 0 to i (excluding i) were correctly initialized,
- * thus we must uninitialize up to i, the rest are undefined.
- */
- for (j = 0; j < i; j++) {
- cle = &cl_env_percpu[j];
- lu_context_exit(&cle->ce_ses);
- lu_context_fini(&cle->ce_ses);
- lu_env_fini(&cle->ce_lu);
- }
- }
-
- return rc;
-}
-
-static void cl_env_percpu_fini(void)
-{
- int i;
-
- for_each_possible_cpu(i) {
- struct cl_env *cle = &cl_env_percpu[i];
-
- lu_context_exit(&cle->ce_ses);
- lu_context_fini(&cle->ce_ses);
- lu_env_fini(&cle->ce_lu);
- }
-}
-
-static void cl_env_percpu_refill(void)
-{
- int i;
-
- for_each_possible_cpu(i)
- lu_env_refill(&cl_env_percpu[i].ce_lu);
-}
-
-void cl_env_percpu_put(struct lu_env *env)
-{
- struct cl_env *cle;
- int cpu;
-
- cpu = smp_processor_id();
- cle = cl_env_container(env);
- LASSERT(cle == &cl_env_percpu[cpu]);
-
- cle->ce_ref--;
- LASSERT(cle->ce_ref == 0);
-
- CL_ENV_DEC(busy);
- cle->ce_debug = NULL;
-
- put_cpu();
-}
-EXPORT_SYMBOL(cl_env_percpu_put);
-
-struct lu_env *cl_env_percpu_get(void)
-{
- struct cl_env *cle;
-
- cle = &cl_env_percpu[get_cpu()];
- cl_env_init0(cle, __builtin_return_address(0));
-
- return &cle->ce_lu;
-}
-EXPORT_SYMBOL(cl_env_percpu_get);
-
-/*****************************************************************************
- *
- * Temporary prototype thing: mirror obd-devices into cl devices.
- *
- */
-
-struct cl_device *cl_type_setup(const struct lu_env *env, struct lu_site *site,
- struct lu_device_type *ldt,
- struct lu_device *next)
-{
- const char *typename;
- struct lu_device *d;
-
- typename = ldt->ldt_name;
- d = ldt->ldt_ops->ldto_device_alloc(env, ldt, NULL);
- if (!IS_ERR(d)) {
- int rc;
-
- if (site)
- d->ld_site = site;
- rc = ldt->ldt_ops->ldto_device_init(env, d, typename, next);
- if (rc == 0) {
- lu_device_get(d);
- lu_ref_add(&d->ld_reference,
- "lu-stack", &lu_site_init);
- } else {
- ldt->ldt_ops->ldto_device_free(env, d);
- CERROR("can't init device '%s', %d\n", typename, rc);
- d = ERR_PTR(rc);
- }
- } else {
- CERROR("Cannot allocate device: '%s'\n", typename);
- }
- return lu2cl_dev(d);
-}
-EXPORT_SYMBOL(cl_type_setup);
-
-/**
- * Finalize device stack by calling lu_stack_fini().
- */
-void cl_stack_fini(const struct lu_env *env, struct cl_device *cl)
-{
- lu_stack_fini(env, cl2lu_dev(cl));
-}
-EXPORT_SYMBOL(cl_stack_fini);
-
-static struct lu_context_key cl_key;
-
-struct cl_thread_info *cl_env_info(const struct lu_env *env)
-{
- return lu_context_key_get(&env->le_ctx, &cl_key);
-}
-
-/* defines cl0_key_{init,fini}() */
-LU_KEY_INIT_FINI(cl0, struct cl_thread_info);
-
-static void *cl_key_init(const struct lu_context *ctx,
- struct lu_context_key *key)
-{
- return cl0_key_init(ctx, key);
-}
-
-static void cl_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
-{
- cl0_key_fini(ctx, key, data);
-}
-
-static struct lu_context_key cl_key = {
- .lct_tags = LCT_CL_THREAD,
- .lct_init = cl_key_init,
- .lct_fini = cl_key_fini,
-};
-
-static struct lu_kmem_descr cl_object_caches[] = {
- {
- .ckd_cache = &cl_env_kmem,
- .ckd_name = "cl_env_kmem",
- .ckd_size = sizeof(struct cl_env)
- },
- {
- .ckd_cache = NULL
- }
-};
-
-/**
- * Global initialization of cl-data. Create kmem caches, register
- * lu_context_key's, etc.
- *
- * \see cl_global_fini()
- */
-int cl_global_init(void)
-{
- int result;
-
- cl_envs = kcalloc(num_possible_cpus(), sizeof(*cl_envs), GFP_KERNEL);
- if (!cl_envs) {
- result = -ENOMEM;
- goto out;
- }
-
- result = lu_kmem_init(cl_object_caches);
- if (result)
- goto out_envs;
-
- LU_CONTEXT_KEY_INIT(&cl_key);
- result = lu_context_key_register(&cl_key);
- if (result)
- goto out_kmem;
-
- result = cl_env_percpu_init();
- if (result)
- /* no cl_env_percpu_fini on error */
- goto out_keys;
-
- return 0;
-
-out_keys:
- lu_context_key_degister(&cl_key);
-out_kmem:
- lu_kmem_fini(cl_object_caches);
-out_envs:
- kfree(cl_envs);
-out:
- return result;
-}
-
-/**
- * Finalization of global cl-data. Dual to cl_global_init().
- */
-void cl_global_fini(void)
-{
- cl_env_percpu_fini();
- lu_context_key_degister(&cl_key);
- lu_kmem_fini(cl_object_caches);
- kfree(cl_envs);
-}
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_page.c b/drivers/staging/lustre/lustre/obdclass/cl_page.c
deleted file mode 100644
index 916cf81c5997..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/cl_page.c
+++ /dev/null
@@ -1,1045 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Client Lustre Page.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Jinshan Xiong <jinshan.xiong@intel.com>
- */
-
-#define DEBUG_SUBSYSTEM S_CLASS
-
-#include <obd_class.h>
-#include <obd_support.h>
-#include <linux/list.h>
-
-#include <cl_object.h>
-#include "cl_internal.h"
-
-static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg);
-
-# define PASSERT(env, page, expr) \
- do { \
- if (unlikely(!(expr))) { \
- CL_PAGE_DEBUG(D_ERROR, (env), (page), #expr "\n"); \
- LASSERT(0); \
- } \
- } while (0)
-
-# define PINVRNT(env, page, exp) \
- ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
-
-/**
- * Internal version of cl_page_get().
- *
- * This function can be used to obtain initial reference to previously
- * unreferenced cached object. It can be called only if concurrent page
- * reclamation is somehow prevented, e.g., by keeping a lock on a VM page,
- * associated with \a page.
- *
- * Use with care! Not exported.
- */
-static void cl_page_get_trust(struct cl_page *page)
-{
- LASSERT(atomic_read(&page->cp_ref) > 0);
- atomic_inc(&page->cp_ref);
-}
-
-/**
- * Returns a slice within a page, corresponding to the given layer in the
- * device stack.
- *
- * \see cl_lock_at()
- */
-static const struct cl_page_slice *
-cl_page_at_trusted(const struct cl_page *page,
- const struct lu_device_type *dtype)
-{
- const struct cl_page_slice *slice;
-
- list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
- if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype)
- return slice;
- }
- return NULL;
-}
-
-static void cl_page_free(const struct lu_env *env, struct cl_page *page)
-{
- struct cl_object *obj = page->cp_obj;
-
- PASSERT(env, page, list_empty(&page->cp_batch));
- PASSERT(env, page, !page->cp_owner);
- PASSERT(env, page, page->cp_state == CPS_FREEING);
-
- while (!list_empty(&page->cp_layers)) {
- struct cl_page_slice *slice;
-
- slice = list_entry(page->cp_layers.next,
- struct cl_page_slice, cpl_linkage);
- list_del_init(page->cp_layers.next);
- if (unlikely(slice->cpl_ops->cpo_fini))
- slice->cpl_ops->cpo_fini(env, slice);
- }
- lu_object_ref_del_at(&obj->co_lu, &page->cp_obj_ref, "cl_page", page);
- cl_object_put(env, obj);
- lu_ref_fini(&page->cp_reference);
- kfree(page);
-}
-
-/**
- * Helper function updating page state. This is the only place in the code
- * where cl_page::cp_state field is mutated.
- */
-static inline void cl_page_state_set_trust(struct cl_page *page,
- enum cl_page_state state)
-{
- /* bypass const. */
- *(enum cl_page_state *)&page->cp_state = state;
-}
-
-struct cl_page *cl_page_alloc(const struct lu_env *env,
- struct cl_object *o, pgoff_t ind,
- struct page *vmpage,
- enum cl_page_type type)
-{
- struct cl_page *page;
- struct lu_object_header *head;
-
- page = kzalloc(cl_object_header(o)->coh_page_bufsize, GFP_NOFS);
- if (page) {
- int result = 0;
-
- atomic_set(&page->cp_ref, 1);
- page->cp_obj = o;
- cl_object_get(o);
- lu_object_ref_add_at(&o->co_lu, &page->cp_obj_ref, "cl_page",
- page);
- page->cp_vmpage = vmpage;
- cl_page_state_set_trust(page, CPS_CACHED);
- page->cp_type = type;
- INIT_LIST_HEAD(&page->cp_layers);
- INIT_LIST_HEAD(&page->cp_batch);
- lu_ref_init(&page->cp_reference);
- head = o->co_lu.lo_header;
- list_for_each_entry(o, &head->loh_layers, co_lu.lo_linkage) {
- if (o->co_ops->coo_page_init) {
- result = o->co_ops->coo_page_init(env, o, page,
- ind);
- if (result != 0) {
- cl_page_delete0(env, page);
- cl_page_free(env, page);
- page = ERR_PTR(result);
- break;
- }
- }
- }
- } else {
- page = ERR_PTR(-ENOMEM);
- }
- return page;
-}
-
-/**
- * Returns a cl_page with index \a idx at the object \a o, and associated with
- * the VM page \a vmpage.
- *
- * This is the main entry point into the cl_page caching interface. First, a
- * cache (implemented as a per-object radix tree) is consulted. If page is
- * found there, it is returned immediately. Otherwise new page is allocated
- * and returned. In any case, additional reference to page is acquired.
- *
- * \see cl_object_find(), cl_lock_find()
- */
-struct cl_page *cl_page_find(const struct lu_env *env,
- struct cl_object *o,
- pgoff_t idx, struct page *vmpage,
- enum cl_page_type type)
-{
- struct cl_page *page = NULL;
- struct cl_object_header *hdr;
-
- LASSERT(type == CPT_CACHEABLE || type == CPT_TRANSIENT);
- might_sleep();
-
- hdr = cl_object_header(o);
-
- CDEBUG(D_PAGE, "%lu@" DFID " %p %lx %d\n",
- idx, PFID(&hdr->coh_lu.loh_fid), vmpage, vmpage->private, type);
- /* fast path. */
- if (type == CPT_CACHEABLE) {
- /*
- * vmpage lock is used to protect the child/parent
- * relationship
- */
- LASSERT(PageLocked(vmpage));
- /*
- * cl_vmpage_page() can be called here without any locks as
- *
- * - "vmpage" is locked (which prevents ->private from
- * concurrent updates), and
- *
- * - "o" cannot be destroyed while current thread holds a
- * reference on it.
- */
- page = cl_vmpage_page(vmpage, o);
-
- if (page)
- return page;
- }
-
- /* allocate and initialize cl_page */
- page = cl_page_alloc(env, o, idx, vmpage, type);
- return page;
-}
-EXPORT_SYMBOL(cl_page_find);
-
-static inline int cl_page_invariant(const struct cl_page *pg)
-{
- return cl_page_in_use_noref(pg);
-}
-
-static void cl_page_state_set0(const struct lu_env *env,
- struct cl_page *page, enum cl_page_state state)
-{
- enum cl_page_state old;
-
- /*
- * Matrix of allowed state transitions [old][new], for sanity
- * checking.
- */
- static const int allowed_transitions[CPS_NR][CPS_NR] = {
- [CPS_CACHED] = {
- [CPS_CACHED] = 0,
- [CPS_OWNED] = 1, /* io finds existing cached page */
- [CPS_PAGEIN] = 0,
- [CPS_PAGEOUT] = 1, /* write-out from the cache */
- [CPS_FREEING] = 1, /* eviction on the memory pressure */
- },
- [CPS_OWNED] = {
- [CPS_CACHED] = 1, /* release to the cache */
- [CPS_OWNED] = 0,
- [CPS_PAGEIN] = 1, /* start read immediately */
- [CPS_PAGEOUT] = 1, /* start write immediately */
- [CPS_FREEING] = 1, /* lock invalidation or truncate */
- },
- [CPS_PAGEIN] = {
- [CPS_CACHED] = 1, /* io completion */
- [CPS_OWNED] = 0,
- [CPS_PAGEIN] = 0,
- [CPS_PAGEOUT] = 0,
- [CPS_FREEING] = 0,
- },
- [CPS_PAGEOUT] = {
- [CPS_CACHED] = 1, /* io completion */
- [CPS_OWNED] = 0,
- [CPS_PAGEIN] = 0,
- [CPS_PAGEOUT] = 0,
- [CPS_FREEING] = 0,
- },
- [CPS_FREEING] = {
- [CPS_CACHED] = 0,
- [CPS_OWNED] = 0,
- [CPS_PAGEIN] = 0,
- [CPS_PAGEOUT] = 0,
- [CPS_FREEING] = 0,
- }
- };
-
- old = page->cp_state;
- PASSERT(env, page, allowed_transitions[old][state]);
- CL_PAGE_HEADER(D_TRACE, env, page, "%d -> %d\n", old, state);
- PASSERT(env, page, page->cp_state == old);
- PASSERT(env, page, equi(state == CPS_OWNED, page->cp_owner));
- cl_page_state_set_trust(page, state);
-}
-
-static void cl_page_state_set(const struct lu_env *env,
- struct cl_page *page, enum cl_page_state state)
-{
- cl_page_state_set0(env, page, state);
-}
-
-/**
- * Acquires an additional reference to a page.
- *
- * This can be called only by caller already possessing a reference to \a
- * page.
- *
- * \see cl_object_get(), cl_lock_get().
- */
-void cl_page_get(struct cl_page *page)
-{
- cl_page_get_trust(page);
-}
-EXPORT_SYMBOL(cl_page_get);
-
-/**
- * Releases a reference to a page.
- *
- * When last reference is released, page is returned to the cache, unless it
- * is in cl_page_state::CPS_FREEING state, in which case it is immediately
- * destroyed.
- *
- * \see cl_object_put(), cl_lock_put().
- */
-void cl_page_put(const struct lu_env *env, struct cl_page *page)
-{
- CL_PAGE_HEADER(D_TRACE, env, page, "%d\n",
- atomic_read(&page->cp_ref));
-
- if (atomic_dec_and_test(&page->cp_ref)) {
- LASSERT(page->cp_state == CPS_FREEING);
-
- LASSERT(atomic_read(&page->cp_ref) == 0);
- PASSERT(env, page, !page->cp_owner);
- PASSERT(env, page, list_empty(&page->cp_batch));
- /*
- * Page is no longer reachable by other threads. Tear
- * it down.
- */
- cl_page_free(env, page);
- }
-}
-EXPORT_SYMBOL(cl_page_put);
-
-/**
- * Returns a cl_page associated with a VM page, and given cl_object.
- */
-struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj)
-{
- struct cl_page *page;
-
- LASSERT(PageLocked(vmpage));
-
- /*
- * NOTE: absence of races and liveness of data are guaranteed by page
- * lock on a "vmpage". That works because object destruction has
- * bottom-to-top pass.
- */
-
- page = (struct cl_page *)vmpage->private;
- if (page) {
- cl_page_get_trust(page);
- LASSERT(page->cp_type == CPT_CACHEABLE);
- }
- return page;
-}
-EXPORT_SYMBOL(cl_vmpage_page);
-
-const struct cl_page_slice *cl_page_at(const struct cl_page *page,
- const struct lu_device_type *dtype)
-{
- return cl_page_at_trusted(page, dtype);
-}
-EXPORT_SYMBOL(cl_page_at);
-
-#define CL_PAGE_OP(opname) offsetof(struct cl_page_operations, opname)
-
-#define CL_PAGE_INVOKE(_env, _page, _op, _proto, ...) \
-({ \
- const struct lu_env *__env = (_env); \
- struct cl_page *__page = (_page); \
- const struct cl_page_slice *__scan; \
- int __result; \
- ptrdiff_t __op = (_op); \
- int (*__method)_proto; \
- \
- __result = 0; \
- list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) { \
- __method = *(void **)((char *)__scan->cpl_ops + __op); \
- if (__method) { \
- __result = (*__method)(__env, __scan, ## __VA_ARGS__); \
- if (__result != 0) \
- break; \
- } \
- } \
- if (__result > 0) \
- __result = 0; \
- __result; \
-})
-
-#define CL_PAGE_INVOID(_env, _page, _op, _proto, ...) \
-do { \
- const struct lu_env *__env = (_env); \
- struct cl_page *__page = (_page); \
- const struct cl_page_slice *__scan; \
- ptrdiff_t __op = (_op); \
- void (*__method)_proto; \
- \
- list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) { \
- __method = *(void **)((char *)__scan->cpl_ops + __op); \
- if (__method) \
- (*__method)(__env, __scan, ## __VA_ARGS__); \
- } \
-} while (0)
-
-#define CL_PAGE_INVOID_REVERSE(_env, _page, _op, _proto, ...) \
-do { \
- const struct lu_env *__env = (_env); \
- struct cl_page *__page = (_page); \
- const struct cl_page_slice *__scan; \
- ptrdiff_t __op = (_op); \
- void (*__method)_proto; \
- \
- list_for_each_entry_reverse(__scan, &__page->cp_layers, cpl_linkage) { \
- __method = *(void **)((char *)__scan->cpl_ops + __op); \
- if (__method) \
- (*__method)(__env, __scan, ## __VA_ARGS__); \
- } \
-} while (0)
-
-static int cl_page_invoke(const struct lu_env *env,
- struct cl_io *io, struct cl_page *page, ptrdiff_t op)
-
-{
- PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
- return CL_PAGE_INVOKE(env, page, op,
- (const struct lu_env *,
- const struct cl_page_slice *, struct cl_io *),
- io);
-}
-
-static void cl_page_invoid(const struct lu_env *env,
- struct cl_io *io, struct cl_page *page, ptrdiff_t op)
-
-{
- PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
- CL_PAGE_INVOID(env, page, op,
- (const struct lu_env *,
- const struct cl_page_slice *, struct cl_io *), io);
-}
-
-static void cl_page_owner_clear(struct cl_page *page)
-{
- if (page->cp_owner) {
- LASSERT(page->cp_owner->ci_owned_nr > 0);
- page->cp_owner->ci_owned_nr--;
- page->cp_owner = NULL;
- }
-}
-
-static void cl_page_owner_set(struct cl_page *page)
-{
- page->cp_owner->ci_owned_nr++;
-}
-
-void cl_page_disown0(const struct lu_env *env,
- struct cl_io *io, struct cl_page *pg)
-{
- enum cl_page_state state;
-
- state = pg->cp_state;
- PINVRNT(env, pg, state == CPS_OWNED || state == CPS_FREEING);
- PINVRNT(env, pg, cl_page_invariant(pg) || state == CPS_FREEING);
- cl_page_owner_clear(pg);
-
- if (state == CPS_OWNED)
- cl_page_state_set(env, pg, CPS_CACHED);
- /*
- * Completion call-backs are executed in the bottom-up order, so that
- * uppermost layer (llite), responsible for VFS/VM interaction runs
- * last and can release locks safely.
- */
- CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_disown),
- (const struct lu_env *,
- const struct cl_page_slice *, struct cl_io *),
- io);
-}
-
-/**
- * returns true, iff page is owned by the given io.
- */
-int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io)
-{
- struct cl_io *top = cl_io_top((struct cl_io *)io);
-
- LINVRNT(cl_object_same(pg->cp_obj, io->ci_obj));
- return pg->cp_state == CPS_OWNED && pg->cp_owner == top;
-}
-EXPORT_SYMBOL(cl_page_is_owned);
-
-/**
- * Try to own a page by IO.
- *
- * Waits until page is in cl_page_state::CPS_CACHED state, and then switch it
- * into cl_page_state::CPS_OWNED state.
- *
- * \pre !cl_page_is_owned(pg, io)
- * \post result == 0 iff cl_page_is_owned(pg, io)
- *
- * \retval 0 success
- *
- * \retval -ve failure, e.g., page was destroyed (and landed in
- * cl_page_state::CPS_FREEING instead of cl_page_state::CPS_CACHED).
- * or, page was owned by another thread, or in IO.
- *
- * \see cl_page_disown()
- * \see cl_page_operations::cpo_own()
- * \see cl_page_own_try()
- * \see cl_page_own
- */
-static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
- struct cl_page *pg, int nonblock)
-{
- int result;
-
- PINVRNT(env, pg, !cl_page_is_owned(pg, io));
-
- io = cl_io_top(io);
-
- if (pg->cp_state == CPS_FREEING) {
- result = -ENOENT;
- } else {
- result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(cpo_own),
- (const struct lu_env *,
- const struct cl_page_slice *,
- struct cl_io *, int),
- io, nonblock);
- if (result == 0) {
- PASSERT(env, pg, !pg->cp_owner);
- pg->cp_owner = cl_io_top(io);
- cl_page_owner_set(pg);
- if (pg->cp_state != CPS_FREEING) {
- cl_page_state_set(env, pg, CPS_OWNED);
- } else {
- cl_page_disown0(env, io, pg);
- result = -ENOENT;
- }
- }
- }
- PINVRNT(env, pg, ergo(result == 0, cl_page_invariant(pg)));
- return result;
-}
-
-/**
- * Own a page, might be blocked.
- *
- * \see cl_page_own0()
- */
-int cl_page_own(const struct lu_env *env, struct cl_io *io, struct cl_page *pg)
-{
- return cl_page_own0(env, io, pg, 0);
-}
-EXPORT_SYMBOL(cl_page_own);
-
-/**
- * Nonblock version of cl_page_own().
- *
- * \see cl_page_own0()
- */
-int cl_page_own_try(const struct lu_env *env, struct cl_io *io,
- struct cl_page *pg)
-{
- return cl_page_own0(env, io, pg, 1);
-}
-EXPORT_SYMBOL(cl_page_own_try);
-
-/**
- * Assume page ownership.
- *
- * Called when page is already locked by the hosting VM.
- *
- * \pre !cl_page_is_owned(pg, io)
- * \post cl_page_is_owned(pg, io)
- *
- * \see cl_page_operations::cpo_assume()
- */
-void cl_page_assume(const struct lu_env *env,
- struct cl_io *io, struct cl_page *pg)
-{
- PINVRNT(env, pg, cl_object_same(pg->cp_obj, io->ci_obj));
-
- io = cl_io_top(io);
-
- cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume));
- PASSERT(env, pg, !pg->cp_owner);
- pg->cp_owner = cl_io_top(io);
- cl_page_owner_set(pg);
- cl_page_state_set(env, pg, CPS_OWNED);
-}
-EXPORT_SYMBOL(cl_page_assume);
-
-/**
- * Releases page ownership without unlocking the page.
- *
- * Moves page into cl_page_state::CPS_CACHED without releasing a lock on the
- * underlying VM page (as VM is supposed to do this itself).
- *
- * \pre cl_page_is_owned(pg, io)
- * \post !cl_page_is_owned(pg, io)
- *
- * \see cl_page_assume()
- */
-void cl_page_unassume(const struct lu_env *env,
- struct cl_io *io, struct cl_page *pg)
-{
- PINVRNT(env, pg, cl_page_is_owned(pg, io));
- PINVRNT(env, pg, cl_page_invariant(pg));
-
- io = cl_io_top(io);
- cl_page_owner_clear(pg);
- cl_page_state_set(env, pg, CPS_CACHED);
- CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_unassume),
- (const struct lu_env *,
- const struct cl_page_slice *, struct cl_io *),
- io);
-}
-EXPORT_SYMBOL(cl_page_unassume);
-
-/**
- * Releases page ownership.
- *
- * Moves page into cl_page_state::CPS_CACHED.
- *
- * \pre cl_page_is_owned(pg, io)
- * \post !cl_page_is_owned(pg, io)
- *
- * \see cl_page_own()
- * \see cl_page_operations::cpo_disown()
- */
-void cl_page_disown(const struct lu_env *env,
- struct cl_io *io, struct cl_page *pg)
-{
- PINVRNT(env, pg, cl_page_is_owned(pg, io) ||
- pg->cp_state == CPS_FREEING);
-
- io = cl_io_top(io);
- cl_page_disown0(env, io, pg);
-}
-EXPORT_SYMBOL(cl_page_disown);
-
-/**
- * Called when page is to be removed from the object, e.g., as a result of
- * truncate.
- *
- * Calls cl_page_operations::cpo_discard() top-to-bottom.
- *
- * \pre cl_page_is_owned(pg, io)
- *
- * \see cl_page_operations::cpo_discard()
- */
-void cl_page_discard(const struct lu_env *env,
- struct cl_io *io, struct cl_page *pg)
-{
- PINVRNT(env, pg, cl_page_is_owned(pg, io));
- PINVRNT(env, pg, cl_page_invariant(pg));
-
- cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_discard));
-}
-EXPORT_SYMBOL(cl_page_discard);
-
-/**
- * Version of cl_page_delete() that can be called for not fully constructed
- * pages, e.g,. in a error handling cl_page_find()->cl_page_delete0()
- * path. Doesn't check page invariant.
- */
-static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg)
-{
- PASSERT(env, pg, pg->cp_state != CPS_FREEING);
-
- /*
- * Sever all ways to obtain new pointers to @pg.
- */
- cl_page_owner_clear(pg);
-
- cl_page_state_set0(env, pg, CPS_FREEING);
-
- CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_delete),
- (const struct lu_env *,
- const struct cl_page_slice *));
-}
-
-/**
- * Called when a decision is made to throw page out of memory.
- *
- * Notifies all layers about page destruction by calling
- * cl_page_operations::cpo_delete() method top-to-bottom.
- *
- * Moves page into cl_page_state::CPS_FREEING state (this is the only place
- * where transition to this state happens).
- *
- * Eliminates all venues through which new references to the page can be
- * obtained:
- *
- * - removes page from the radix trees,
- *
- * - breaks linkage from VM page to cl_page.
- *
- * Once page reaches cl_page_state::CPS_FREEING, all remaining references will
- * drain after some time, at which point page will be recycled.
- *
- * \pre VM page is locked
- * \post pg->cp_state == CPS_FREEING
- *
- * \see cl_page_operations::cpo_delete()
- */
-void cl_page_delete(const struct lu_env *env, struct cl_page *pg)
-{
- PINVRNT(env, pg, cl_page_invariant(pg));
- cl_page_delete0(env, pg);
-}
-EXPORT_SYMBOL(cl_page_delete);
-
-/**
- * Marks page up-to-date.
- *
- * Call cl_page_operations::cpo_export() through all layers top-to-bottom. The
- * layer responsible for VM interaction has to mark/clear page as up-to-date
- * by the \a uptodate argument.
- *
- * \see cl_page_operations::cpo_export()
- */
-void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate)
-{
- PINVRNT(env, pg, cl_page_invariant(pg));
- CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_export),
- (const struct lu_env *,
- const struct cl_page_slice *, int), uptodate);
-}
-EXPORT_SYMBOL(cl_page_export);
-
-/**
- * Returns true, iff \a pg is VM locked in a suitable sense by the calling
- * thread.
- */
-int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg)
-{
- int result;
- const struct cl_page_slice *slice;
-
- slice = container_of(pg->cp_layers.next,
- const struct cl_page_slice, cpl_linkage);
- PASSERT(env, pg, slice->cpl_ops->cpo_is_vmlocked);
- /*
- * Call ->cpo_is_vmlocked() directly instead of going through
- * CL_PAGE_INVOKE(), because cl_page_is_vmlocked() is used by
- * cl_page_invariant().
- */
- result = slice->cpl_ops->cpo_is_vmlocked(env, slice);
- PASSERT(env, pg, result == -EBUSY || result == -ENODATA);
- return result == -EBUSY;
-}
-EXPORT_SYMBOL(cl_page_is_vmlocked);
-
-static enum cl_page_state cl_req_type_state(enum cl_req_type crt)
-{
- return crt == CRT_WRITE ? CPS_PAGEOUT : CPS_PAGEIN;
-}
-
-static void cl_page_io_start(const struct lu_env *env,
- struct cl_page *pg, enum cl_req_type crt)
-{
- /*
- * Page is queued for IO, change its state.
- */
- cl_page_owner_clear(pg);
- cl_page_state_set(env, pg, cl_req_type_state(crt));
-}
-
-/**
- * Prepares page for immediate transfer. cl_page_operations::cpo_prep() is
- * called top-to-bottom. Every layer either agrees to submit this page (by
- * returning 0), or requests to omit this page (by returning -EALREADY). Layer
- * handling interactions with the VM also has to inform VM that page is under
- * transfer now.
- */
-int cl_page_prep(const struct lu_env *env, struct cl_io *io,
- struct cl_page *pg, enum cl_req_type crt)
-{
- int result;
-
- PINVRNT(env, pg, cl_page_is_owned(pg, io));
- PINVRNT(env, pg, cl_page_invariant(pg));
- PINVRNT(env, pg, crt < CRT_NR);
-
- /*
- * XXX this has to be called bottom-to-top, so that llite can set up
- * PG_writeback without risking other layers deciding to skip this
- * page.
- */
- if (crt >= CRT_NR)
- return -EINVAL;
- result = cl_page_invoke(env, io, pg, CL_PAGE_OP(io[crt].cpo_prep));
- if (result == 0)
- cl_page_io_start(env, pg, crt);
-
- CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
- return result;
-}
-EXPORT_SYMBOL(cl_page_prep);
-
-/**
- * Notify layers about transfer completion.
- *
- * Invoked by transfer sub-system (which is a part of osc) to notify layers
- * that a transfer, of which this page is a part of has completed.
- *
- * Completion call-backs are executed in the bottom-up order, so that
- * uppermost layer (llite), responsible for the VFS/VM interaction runs last
- * and can release locks safely.
- *
- * \pre pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
- * \post pg->cp_state == CPS_CACHED
- *
- * \see cl_page_operations::cpo_completion()
- */
-void cl_page_completion(const struct lu_env *env,
- struct cl_page *pg, enum cl_req_type crt, int ioret)
-{
- struct cl_sync_io *anchor = pg->cp_sync_io;
-
- PASSERT(env, pg, crt < CRT_NR);
- PASSERT(env, pg, pg->cp_state == cl_req_type_state(crt));
-
- CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, ioret);
-
- cl_page_state_set(env, pg, CPS_CACHED);
- if (crt >= CRT_NR)
- return;
- CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(io[crt].cpo_completion),
- (const struct lu_env *,
- const struct cl_page_slice *, int), ioret);
- if (anchor) {
- LASSERT(pg->cp_sync_io == anchor);
- pg->cp_sync_io = NULL;
- cl_sync_io_note(env, anchor, ioret);
- }
-}
-EXPORT_SYMBOL(cl_page_completion);
-
-/**
- * Notify layers that transfer formation engine decided to yank this page from
- * the cache and to make it a part of a transfer.
- *
- * \pre pg->cp_state == CPS_CACHED
- * \post pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
- *
- * \see cl_page_operations::cpo_make_ready()
- */
-int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg,
- enum cl_req_type crt)
-{
- int result;
-
- PINVRNT(env, pg, crt < CRT_NR);
-
- if (crt >= CRT_NR)
- return -EINVAL;
- result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(io[crt].cpo_make_ready),
- (const struct lu_env *,
- const struct cl_page_slice *));
- if (result == 0) {
- PASSERT(env, pg, pg->cp_state == CPS_CACHED);
- cl_page_io_start(env, pg, crt);
- }
- CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
- return result;
-}
-EXPORT_SYMBOL(cl_page_make_ready);
-
-/**
- * Called if a pge is being written back by kernel's intention.
- *
- * \pre cl_page_is_owned(pg, io)
- * \post ergo(result == 0, pg->cp_state == CPS_PAGEOUT)
- *
- * \see cl_page_operations::cpo_flush()
- */
-int cl_page_flush(const struct lu_env *env, struct cl_io *io,
- struct cl_page *pg)
-{
- int result;
-
- PINVRNT(env, pg, cl_page_is_owned(pg, io));
- PINVRNT(env, pg, cl_page_invariant(pg));
-
- result = cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_flush));
-
- CL_PAGE_HEADER(D_TRACE, env, pg, "%d\n", result);
- return result;
-}
-EXPORT_SYMBOL(cl_page_flush);
-
-/**
- * Tells transfer engine that only part of a page is to be transmitted.
- *
- * \see cl_page_operations::cpo_clip()
- */
-void cl_page_clip(const struct lu_env *env, struct cl_page *pg,
- int from, int to)
-{
- PINVRNT(env, pg, cl_page_invariant(pg));
-
- CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", from, to);
- CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_clip),
- (const struct lu_env *,
- const struct cl_page_slice *, int, int),
- from, to);
-}
-EXPORT_SYMBOL(cl_page_clip);
-
-/**
- * Prints human readable representation of \a pg to the \a f.
- */
-void cl_page_header_print(const struct lu_env *env, void *cookie,
- lu_printer_t printer, const struct cl_page *pg)
-{
- (*printer)(env, cookie,
- "page@%p[%d %p %d %d %p]\n",
- pg, atomic_read(&pg->cp_ref), pg->cp_obj,
- pg->cp_state, pg->cp_type,
- pg->cp_owner);
-}
-EXPORT_SYMBOL(cl_page_header_print);
-
-/**
- * Prints human readable representation of \a pg to the \a f.
- */
-void cl_page_print(const struct lu_env *env, void *cookie,
- lu_printer_t printer, const struct cl_page *pg)
-{
- cl_page_header_print(env, cookie, printer, pg);
- CL_PAGE_INVOKE(env, (struct cl_page *)pg, CL_PAGE_OP(cpo_print),
- (const struct lu_env *env,
- const struct cl_page_slice *slice,
- void *cookie, lu_printer_t p), cookie, printer);
- (*printer)(env, cookie, "end page@%p\n", pg);
-}
-EXPORT_SYMBOL(cl_page_print);
-
-/**
- * Cancel a page which is still in a transfer.
- */
-int cl_page_cancel(const struct lu_env *env, struct cl_page *page)
-{
- return CL_PAGE_INVOKE(env, page, CL_PAGE_OP(cpo_cancel),
- (const struct lu_env *,
- const struct cl_page_slice *));
-}
-
-/**
- * Converts a byte offset within object \a obj into a page index.
- */
-loff_t cl_offset(const struct cl_object *obj, pgoff_t idx)
-{
- /*
- * XXX for now.
- */
- return (loff_t)idx << PAGE_SHIFT;
-}
-EXPORT_SYMBOL(cl_offset);
-
-/**
- * Converts a page index into a byte offset within object \a obj.
- */
-pgoff_t cl_index(const struct cl_object *obj, loff_t offset)
-{
- /*
- * XXX for now.
- */
- return offset >> PAGE_SHIFT;
-}
-EXPORT_SYMBOL(cl_index);
-
-size_t cl_page_size(const struct cl_object *obj)
-{
- return 1UL << PAGE_SHIFT;
-}
-EXPORT_SYMBOL(cl_page_size);
-
-/**
- * Adds page slice to the compound page.
- *
- * This is called by cl_object_operations::coo_page_init() methods to add a
- * per-layer state to the page. New state is added at the end of
- * cl_page::cp_layers list, that is, it is at the bottom of the stack.
- *
- * \see cl_lock_slice_add(), cl_req_slice_add(), cl_io_slice_add()
- */
-void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
- struct cl_object *obj, pgoff_t index,
- const struct cl_page_operations *ops)
-{
- list_add_tail(&slice->cpl_linkage, &page->cp_layers);
- slice->cpl_obj = obj;
- slice->cpl_index = index;
- slice->cpl_ops = ops;
- slice->cpl_page = page;
-}
-EXPORT_SYMBOL(cl_page_slice_add);
-
-/**
- * Allocate and initialize cl_cache, called by ll_init_sbi().
- */
-struct cl_client_cache *cl_cache_init(unsigned long lru_page_max)
-{
- struct cl_client_cache *cache = NULL;
-
- cache = kzalloc(sizeof(*cache), GFP_KERNEL);
- if (!cache)
- return NULL;
-
- /* Initialize cache data */
- atomic_set(&cache->ccc_users, 1);
- cache->ccc_lru_max = lru_page_max;
- atomic_long_set(&cache->ccc_lru_left, lru_page_max);
- spin_lock_init(&cache->ccc_lru_lock);
- INIT_LIST_HEAD(&cache->ccc_lru);
-
- atomic_long_set(&cache->ccc_unstable_nr, 0);
- init_waitqueue_head(&cache->ccc_unstable_waitq);
-
- return cache;
-}
-EXPORT_SYMBOL(cl_cache_init);
-
-/**
- * Increase cl_cache refcount
- */
-void cl_cache_incref(struct cl_client_cache *cache)
-{
- atomic_inc(&cache->ccc_users);
-}
-EXPORT_SYMBOL(cl_cache_incref);
-
-/**
- * Decrease cl_cache refcount and free the cache if refcount=0.
- * Since llite, lov and osc all hold cl_cache refcount,
- * the free will not cause race. (LU-6173)
- */
-void cl_cache_decref(struct cl_client_cache *cache)
-{
- if (atomic_dec_and_test(&cache->ccc_users))
- kfree(cache);
-}
-EXPORT_SYMBOL(cl_cache_decref);
diff --git a/drivers/staging/lustre/lustre/obdclass/class_obd.c b/drivers/staging/lustre/lustre/obdclass/class_obd.c
deleted file mode 100644
index d6c46858941b..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/class_obd.c
+++ /dev/null
@@ -1,544 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_CLASS
-
-#include <linux/atomic.h>
-#include <linux/miscdevice.h>
-#include <linux/libcfs/libcfs.h>
-
-#include <obd_support.h>
-#include <obd_class.h>
-#include <uapi/linux/lnet/lnetctl.h>
-#include <lustre_debug.h>
-#include <lprocfs_status.h>
-#include <linux/list.h>
-#include <cl_object.h>
-#include <uapi/linux/lustre/lustre_ioctl.h>
-#include <uapi/linux/lnet/libcfs_ioctl.h>
-#include "llog_internal.h"
-
-struct obd_device *obd_devs[MAX_OBD_DEVICES];
-struct list_head obd_types;
-DEFINE_RWLOCK(obd_dev_lock);
-
-/* The following are visible and mutable through /sys/fs/lustre. */
-unsigned int obd_debug_peer_on_timeout;
-EXPORT_SYMBOL(obd_debug_peer_on_timeout);
-unsigned int obd_dump_on_timeout;
-EXPORT_SYMBOL(obd_dump_on_timeout);
-unsigned int obd_dump_on_eviction;
-EXPORT_SYMBOL(obd_dump_on_eviction);
-unsigned long obd_max_dirty_pages;
-EXPORT_SYMBOL(obd_max_dirty_pages);
-atomic_long_t obd_dirty_pages;
-EXPORT_SYMBOL(obd_dirty_pages);
-unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT; /* seconds */
-EXPORT_SYMBOL(obd_timeout);
-unsigned int obd_timeout_set;
-EXPORT_SYMBOL(obd_timeout_set);
-/* Adaptive timeout defs here instead of ptlrpc module for /sys/fs/ access */
-unsigned int at_min;
-EXPORT_SYMBOL(at_min);
-unsigned int at_max = 600;
-EXPORT_SYMBOL(at_max);
-unsigned int at_history = 600;
-EXPORT_SYMBOL(at_history);
-int at_early_margin = 5;
-EXPORT_SYMBOL(at_early_margin);
-int at_extra = 30;
-EXPORT_SYMBOL(at_extra);
-
-atomic_long_t obd_dirty_transit_pages;
-EXPORT_SYMBOL(obd_dirty_transit_pages);
-
-char obd_jobid_var[JOBSTATS_JOBID_VAR_MAX_LEN + 1] = JOBSTATS_DISABLE;
-char obd_jobid_node[LUSTRE_JOBID_SIZE + 1];
-
-/* Get jobid of current process from stored variable or calculate
- * it from pid and user_id.
- *
- * Historically this was also done by reading the environment variable
- * stored in between the "env_start" & "env_end" of task struct.
- * This is now deprecated.
- */
-int lustre_get_jobid(char *jobid)
-{
- memset(jobid, 0, LUSTRE_JOBID_SIZE);
- /* Jobstats isn't enabled */
- if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0)
- return 0;
-
- /* Use process name + fsuid as jobid */
- if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) {
- snprintf(jobid, LUSTRE_JOBID_SIZE, "%s.%u",
- current->comm,
- from_kuid(&init_user_ns, current_fsuid()));
- return 0;
- }
-
- /* Whole node dedicated to single job */
- if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0) {
- strcpy(jobid, obd_jobid_node);
- return 0;
- }
-
- return -ENOENT;
-}
-EXPORT_SYMBOL(lustre_get_jobid);
-
-static int class_resolve_dev_name(__u32 len, const char *name)
-{
- int rc;
- int dev;
-
- if (!len || !name) {
- CERROR("No name passed,!\n");
- rc = -EINVAL;
- goto out;
- }
- if (name[len - 1] != 0) {
- CERROR("Name not nul terminated!\n");
- rc = -EINVAL;
- goto out;
- }
-
- CDEBUG(D_IOCTL, "device name %s\n", name);
- dev = class_name2dev(name);
- if (dev == -1) {
- CDEBUG(D_IOCTL, "No device for name %s!\n", name);
- rc = -EINVAL;
- goto out;
- }
-
- CDEBUG(D_IOCTL, "device name %s, dev %d\n", name, dev);
- rc = dev;
-
-out:
- return rc;
-}
-
-int class_handle_ioctl(unsigned int cmd, unsigned long arg)
-{
- char *buf = NULL;
- struct obd_ioctl_data *data;
- struct libcfs_debug_ioctl_data *debug_data;
- struct obd_device *obd = NULL;
- int err = 0, len = 0;
-
- /* only for debugging */
- if (cmd == LIBCFS_IOC_DEBUG_MASK) {
- debug_data = (struct libcfs_debug_ioctl_data *)arg;
- libcfs_subsystem_debug = debug_data->subs;
- libcfs_debug = debug_data->debug;
- return 0;
- }
-
- CDEBUG(D_IOCTL, "cmd = %x\n", cmd);
- if (obd_ioctl_getdata(&buf, &len, (void __user *)arg)) {
- CERROR("OBD ioctl: data error\n");
- return -EINVAL;
- }
- data = (struct obd_ioctl_data *)buf;
-
- switch (cmd) {
- case OBD_IOC_PROCESS_CFG: {
- struct lustre_cfg *lcfg;
-
- if (!data->ioc_plen1 || !data->ioc_pbuf1) {
- CERROR("No config buffer passed!\n");
- err = -EINVAL;
- goto out;
- }
- lcfg = kzalloc(data->ioc_plen1, GFP_NOFS);
- if (!lcfg) {
- err = -ENOMEM;
- goto out;
- }
- if (copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1))
- err = -EFAULT;
- if (!err)
- err = lustre_cfg_sanity_check(lcfg, data->ioc_plen1);
- if (!err)
- err = class_process_config(lcfg);
-
- kfree(lcfg);
- goto out;
- }
-
- case OBD_GET_VERSION:
- if (!data->ioc_inlbuf1) {
- CERROR("No buffer passed in ioctl\n");
- err = -EINVAL;
- goto out;
- }
-
- if (strlen(LUSTRE_VERSION_STRING) + 1 > data->ioc_inllen1) {
- CERROR("ioctl buffer too small to hold version\n");
- err = -EINVAL;
- goto out;
- }
-
- memcpy(data->ioc_bulk, LUSTRE_VERSION_STRING,
- strlen(LUSTRE_VERSION_STRING) + 1);
-
- if (copy_to_user((void __user *)arg, data, len))
- err = -EFAULT;
- goto out;
-
- case OBD_IOC_NAME2DEV: {
- /* Resolve a device name. This does not change the
- * currently selected device.
- */
- int dev;
-
- dev = class_resolve_dev_name(data->ioc_inllen1,
- data->ioc_inlbuf1);
- data->ioc_dev = dev;
- if (dev < 0) {
- err = -EINVAL;
- goto out;
- }
-
- if (copy_to_user((void __user *)arg, data, sizeof(*data)))
- err = -EFAULT;
- goto out;
- }
-
- case OBD_IOC_UUID2DEV: {
- /* Resolve a device uuid. This does not change the
- * currently selected device.
- */
- int dev;
- struct obd_uuid uuid;
-
- if (!data->ioc_inllen1 || !data->ioc_inlbuf1) {
- CERROR("No UUID passed!\n");
- err = -EINVAL;
- goto out;
- }
- if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) {
- CERROR("UUID not NUL terminated!\n");
- err = -EINVAL;
- goto out;
- }
-
- CDEBUG(D_IOCTL, "device name %s\n", data->ioc_inlbuf1);
- obd_str2uuid(&uuid, data->ioc_inlbuf1);
- dev = class_uuid2dev(&uuid);
- data->ioc_dev = dev;
- if (dev == -1) {
- CDEBUG(D_IOCTL, "No device for UUID %s!\n",
- data->ioc_inlbuf1);
- err = -EINVAL;
- goto out;
- }
-
- CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1,
- dev);
-
- if (copy_to_user((void __user *)arg, data, sizeof(*data)))
- err = -EFAULT;
- goto out;
- }
-
- case OBD_IOC_GETDEVICE: {
- int index = data->ioc_count;
- char *status, *str;
-
- if (!data->ioc_inlbuf1) {
- CERROR("No buffer passed in ioctl\n");
- err = -EINVAL;
- goto out;
- }
- if (data->ioc_inllen1 < 128) {
- CERROR("ioctl buffer too small to hold version\n");
- err = -EINVAL;
- goto out;
- }
-
- obd = class_num2obd(index);
- if (!obd) {
- err = -ENOENT;
- goto out;
- }
-
- if (obd->obd_stopping)
- status = "ST";
- else if (obd->obd_set_up)
- status = "UP";
- else if (obd->obd_attached)
- status = "AT";
- else
- status = "--";
- str = (char *)data->ioc_bulk;
- snprintf(str, len - sizeof(*data), "%3d %s %s %s %s %d",
- (int)index, status, obd->obd_type->typ_name,
- obd->obd_name, obd->obd_uuid.uuid,
- atomic_read(&obd->obd_refcount));
-
- if (copy_to_user((void __user *)arg, data, len))
- err = -EFAULT;
- goto out;
- }
- }
-
- if (data->ioc_dev == OBD_DEV_BY_DEVNAME) {
- if (data->ioc_inllen4 <= 0 || !data->ioc_inlbuf4) {
- err = -EINVAL;
- goto out;
- }
- if (strnlen(data->ioc_inlbuf4, MAX_OBD_NAME) >= MAX_OBD_NAME) {
- err = -EINVAL;
- goto out;
- }
- obd = class_name2obd(data->ioc_inlbuf4);
- } else if (data->ioc_dev < class_devno_max()) {
- obd = class_num2obd(data->ioc_dev);
- } else {
- CERROR("OBD ioctl: No device\n");
- err = -EINVAL;
- goto out;
- }
-
- if (!obd) {
- CERROR("OBD ioctl : No Device %d\n", data->ioc_dev);
- err = -EINVAL;
- goto out;
- }
- LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
-
- if (!obd->obd_set_up || obd->obd_stopping) {
- CERROR("OBD ioctl: device not setup %d\n", data->ioc_dev);
- err = -EINVAL;
- goto out;
- }
-
- switch (cmd) {
- case OBD_IOC_NO_TRANSNO: {
- if (!obd->obd_attached) {
- CERROR("Device %d not attached\n", obd->obd_minor);
- err = -ENODEV;
- goto out;
- }
- CDEBUG(D_HA, "%s: disabling committed-transno notification\n",
- obd->obd_name);
- obd->obd_no_transno = 1;
- err = 0;
- goto out;
- }
-
- default: {
- err = obd_iocontrol(cmd, obd->obd_self_export, len, data, NULL);
- if (err)
- goto out;
-
- if (copy_to_user((void __user *)arg, data, len))
- err = -EFAULT;
- goto out;
- }
- }
-
- out:
- kvfree(buf);
- return err;
-} /* class_handle_ioctl */
-
-#define OBD_INIT_CHECK
-static int obd_init_checks(void)
-{
- __u64 u64val, div64val;
- char buf[64];
- int len, ret = 0;
-
- CDEBUG(D_INFO, "LPU64=%s, LPD64=%s, LPX64=%s\n", "%llu", "%lld",
- "%#llx");
-
- CDEBUG(D_INFO, "OBD_OBJECT_EOF = %#llx\n", (__u64)OBD_OBJECT_EOF);
-
- u64val = OBD_OBJECT_EOF;
- CDEBUG(D_INFO, "u64val OBD_OBJECT_EOF = %#llx\n", u64val);
- if (u64val != OBD_OBJECT_EOF) {
- CERROR("__u64 %#llx(%d) != 0xffffffffffffffff\n",
- u64val, (int)sizeof(u64val));
- ret = -EINVAL;
- }
- len = snprintf(buf, sizeof(buf), "%#llx", u64val);
- if (len != 18) {
- CWARN("LPX64 wrong length! strlen(%s)=%d != 18\n", buf, len);
- ret = -EINVAL;
- }
-
- div64val = OBD_OBJECT_EOF;
- CDEBUG(D_INFO, "u64val OBD_OBJECT_EOF = %#llx\n", u64val);
- if (u64val != OBD_OBJECT_EOF) {
- CERROR("__u64 %#llx(%d) != 0xffffffffffffffff\n",
- u64val, (int)sizeof(u64val));
- ret = -EOVERFLOW;
- }
- if (u64val >> 8 != OBD_OBJECT_EOF >> 8) {
- CERROR("__u64 %#llx(%d) != 0xffffffffffffffff\n",
- u64val, (int)sizeof(u64val));
- return -EOVERFLOW;
- }
- if (do_div(div64val, 256) != (u64val & 255)) {
- CERROR("do_div(%#llx,256) != %llu\n", u64val, u64val & 255);
- return -EOVERFLOW;
- }
- if (u64val >> 8 != div64val) {
- CERROR("do_div(%#llx,256) %llu != %llu\n",
- u64val, div64val, u64val >> 8);
- return -EOVERFLOW;
- }
- len = snprintf(buf, sizeof(buf), "%#llx", u64val);
- if (len != 18) {
- CWARN("LPX64 wrong length! strlen(%s)=%d != 18\n", buf, len);
- ret = -EINVAL;
- }
- len = snprintf(buf, sizeof(buf), "%llu", u64val);
- if (len != 20) {
- CWARN("LPU64 wrong length! strlen(%s)=%d != 20\n", buf, len);
- ret = -EINVAL;
- }
- len = snprintf(buf, sizeof(buf), "%lld", u64val);
- if (len != 2) {
- CWARN("LPD64 wrong length! strlen(%s)=%d != 2\n", buf, len);
- ret = -EINVAL;
- }
- if ((u64val & ~PAGE_MASK) >= PAGE_SIZE) {
- CWARN("mask failed: u64val %llu >= %llu\n", u64val,
- (__u64)PAGE_SIZE);
- ret = -EINVAL;
- }
-
- return ret;
-}
-
-static int __init obdclass_init(void)
-{
- int i, err;
-
- LCONSOLE_INFO("Lustre: Build Version: " LUSTRE_VERSION_STRING "\n");
-
- spin_lock_init(&obd_types_lock);
-
- err = libcfs_setup();
- if (err)
- return err;
-
- obd_zombie_impexp_init();
-
- err = obd_init_checks();
- if (err)
- return err;
-
- class_init_uuidlist();
- err = class_handle_init();
- if (err)
- return err;
-
- INIT_LIST_HEAD(&obd_types);
-
- err = misc_register(&obd_psdev);
- if (err) {
- CERROR("cannot register OBD miscdevices: err %d\n", err);
- return err;
- }
-
- /* This struct is already zeroed for us (static global) */
- for (i = 0; i < class_devno_max(); i++)
- obd_devs[i] = NULL;
-
- /* Default the dirty page cache cap to 1/2 of system memory.
- * For clients with less memory, a larger fraction is needed
- * for other purposes (mostly for BGL).
- */
- if (totalram_pages <= 512 << (20 - PAGE_SHIFT))
- obd_max_dirty_pages = totalram_pages / 4;
- else
- obd_max_dirty_pages = totalram_pages / 2;
-
- err = obd_init_caches();
- if (err)
- return err;
-
- err = class_procfs_init();
- if (err)
- return err;
-
- err = obd_sysctl_init();
- if (err)
- return err;
-
- err = lu_global_init();
- if (err)
- return err;
-
- err = cl_global_init();
- if (err != 0)
- return err;
-
- err = llog_info_init();
- if (err)
- return err;
-
- err = lustre_register_fs();
-
- return err;
-}
-
-static void obdclass_exit(void)
-{
- lustre_unregister_fs();
-
- misc_deregister(&obd_psdev);
- llog_info_fini();
- cl_global_fini();
- lu_global_fini();
-
- obd_cleanup_caches();
-
- class_procfs_clean();
-
- class_handle_cleanup();
- class_exit_uuidlist();
- obd_zombie_impexp_stop();
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre Class Driver");
-MODULE_VERSION(LUSTRE_VERSION_STRING);
-MODULE_LICENSE("GPL");
-
-module_init(obdclass_init);
-module_exit(obdclass_exit);
diff --git a/drivers/staging/lustre/lustre/obdclass/debug.c b/drivers/staging/lustre/lustre/obdclass/debug.c
deleted file mode 100644
index 2156a82a613a..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/debug.c
+++ /dev/null
@@ -1,96 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/obdclass/debug.c
- *
- * Helper routines for dumping data structs for debugging.
- */
-
-#define DEBUG_SUBSYSTEM D_OTHER
-
-#include <asm/unaligned.h>
-
-#include <obd_support.h>
-#include <lustre_debug.h>
-#include <lustre_net.h>
-
-#define LPDS sizeof(__u64)
-int block_debug_setup(void *addr, int len, __u64 off, __u64 id)
-{
- LASSERT(addr);
-
- put_unaligned_le64(off, addr);
- put_unaligned_le64(id, addr + LPDS);
- addr += len - LPDS - LPDS;
- put_unaligned_le64(off, addr);
- put_unaligned_le64(id, addr + LPDS);
-
- return 0;
-}
-EXPORT_SYMBOL(block_debug_setup);
-
-int block_debug_check(char *who, void *addr, int end, __u64 off, __u64 id)
-{
- __u64 ne_off;
- int err = 0;
-
- LASSERT(addr);
-
- ne_off = le64_to_cpu(off);
- id = le64_to_cpu(id);
- if (memcmp(addr, (char *)&ne_off, LPDS)) {
- CDEBUG(D_ERROR, "%s: id %#llx offset %llu off: %#llx != %#llx\n",
- who, id, off, *(__u64 *)addr, ne_off);
- err = -EINVAL;
- }
- if (memcmp(addr + LPDS, (char *)&id, LPDS)) {
- CDEBUG(D_ERROR, "%s: id %#llx offset %llu id: %#llx != %#llx\n",
- who, id, off, *(__u64 *)(addr + LPDS), id);
- err = -EINVAL;
- }
-
- addr += end - LPDS - LPDS;
- if (memcmp(addr, (char *)&ne_off, LPDS)) {
- CDEBUG(D_ERROR, "%s: id %#llx offset %llu end off: %#llx != %#llx\n",
- who, id, off, *(__u64 *)addr, ne_off);
- err = -EINVAL;
- }
- if (memcmp(addr + LPDS, (char *)&id, LPDS)) {
- CDEBUG(D_ERROR, "%s: id %#llx offset %llu end id: %#llx != %#llx\n",
- who, id, off, *(__u64 *)(addr + LPDS), id);
- err = -EINVAL;
- }
-
- return err;
-}
-EXPORT_SYMBOL(block_debug_check);
-#undef LPDS
diff --git a/drivers/staging/lustre/lustre/obdclass/genops.c b/drivers/staging/lustre/lustre/obdclass/genops.c
deleted file mode 100644
index 234f383ce6d9..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/genops.c
+++ /dev/null
@@ -1,1480 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/obdclass/genops.c
- *
- * These are the only exported functions, they provide some generic
- * infrastructure for managing object devices
- */
-
-#define DEBUG_SUBSYSTEM S_CLASS
-#include <obd_class.h>
-#include <lprocfs_status.h>
-#include <lustre_kernelcomm.h>
-
-spinlock_t obd_types_lock;
-
-static struct kmem_cache *obd_device_cachep;
-struct kmem_cache *obdo_cachep;
-EXPORT_SYMBOL(obdo_cachep);
-static struct kmem_cache *import_cachep;
-
-static struct workqueue_struct *zombie_wq;
-static void obd_zombie_export_add(struct obd_export *exp);
-static void obd_zombie_import_add(struct obd_import *imp);
-
-int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
-EXPORT_SYMBOL(ptlrpc_put_connection_superhack);
-
-/*
- * support functions: we could use inter-module communication, but this
- * is more portable to other OS's
- */
-static struct obd_device *obd_device_alloc(void)
-{
- struct obd_device *obd;
-
- obd = kmem_cache_zalloc(obd_device_cachep, GFP_NOFS);
- if (obd)
- obd->obd_magic = OBD_DEVICE_MAGIC;
- return obd;
-}
-
-static void obd_device_free(struct obd_device *obd)
-{
- LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, "obd %p obd_magic %08x != %08x\n",
- obd, obd->obd_magic, OBD_DEVICE_MAGIC);
- if (obd->obd_namespace) {
- CERROR("obd %p: namespace %p was not properly cleaned up (obd_force=%d)!\n",
- obd, obd->obd_namespace, obd->obd_force);
- LBUG();
- }
- lu_ref_fini(&obd->obd_reference);
- kmem_cache_free(obd_device_cachep, obd);
-}
-
-static struct obd_type *class_search_type(const char *name)
-{
- struct list_head *tmp;
- struct obd_type *type;
-
- spin_lock(&obd_types_lock);
- list_for_each(tmp, &obd_types) {
- type = list_entry(tmp, struct obd_type, typ_chain);
- if (strcmp(type->typ_name, name) == 0) {
- spin_unlock(&obd_types_lock);
- return type;
- }
- }
- spin_unlock(&obd_types_lock);
- return NULL;
-}
-
-static struct obd_type *class_get_type(const char *name)
-{
- struct obd_type *type = class_search_type(name);
-
- if (!type) {
- const char *modname = name;
-
- if (!request_module("%s", modname)) {
- CDEBUG(D_INFO, "Loaded module '%s'\n", modname);
- type = class_search_type(name);
- } else {
- LCONSOLE_ERROR_MSG(0x158, "Can't load module '%s'\n",
- modname);
- }
- }
- if (type) {
- spin_lock(&type->obd_type_lock);
- type->typ_refcnt++;
- try_module_get(type->typ_dt_ops->owner);
- spin_unlock(&type->obd_type_lock);
- }
- return type;
-}
-
-void class_put_type(struct obd_type *type)
-{
- LASSERT(type);
- spin_lock(&type->obd_type_lock);
- type->typ_refcnt--;
- module_put(type->typ_dt_ops->owner);
- spin_unlock(&type->obd_type_lock);
-}
-
-#define CLASS_MAX_NAME 1024
-
-int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops,
- const char *name,
- struct lu_device_type *ldt)
-{
- struct obd_type *type;
- int rc;
-
- /* sanity check */
- LASSERT(strnlen(name, CLASS_MAX_NAME) < CLASS_MAX_NAME);
-
- if (class_search_type(name)) {
- CDEBUG(D_IOCTL, "Type %s already registered\n", name);
- return -EEXIST;
- }
-
- rc = -ENOMEM;
- type = kzalloc(sizeof(*type), GFP_NOFS);
- if (!type)
- return rc;
-
- type->typ_dt_ops = kzalloc(sizeof(*type->typ_dt_ops), GFP_NOFS);
- type->typ_md_ops = kzalloc(sizeof(*type->typ_md_ops), GFP_NOFS);
- type->typ_name = kzalloc(strlen(name) + 1, GFP_NOFS);
-
- if (!type->typ_dt_ops ||
- !type->typ_md_ops ||
- !type->typ_name)
- goto failed;
-
- *type->typ_dt_ops = *dt_ops;
- /* md_ops is optional */
- if (md_ops)
- *type->typ_md_ops = *md_ops;
- strcpy(type->typ_name, name);
- spin_lock_init(&type->obd_type_lock);
-
- type->typ_debugfs_entry = debugfs_create_dir(type->typ_name,
- debugfs_lustre_root);
-
- type->typ_kobj = kobject_create_and_add(type->typ_name, lustre_kobj);
- if (!type->typ_kobj) {
- rc = -ENOMEM;
- goto failed;
- }
-
- if (ldt) {
- type->typ_lu = ldt;
- rc = lu_device_type_init(ldt);
- if (rc != 0)
- goto failed;
- }
-
- spin_lock(&obd_types_lock);
- list_add(&type->typ_chain, &obd_types);
- spin_unlock(&obd_types_lock);
-
- return 0;
-
- failed:
- if (type->typ_kobj)
- kobject_put(type->typ_kobj);
- kfree(type->typ_name);
- kfree(type->typ_md_ops);
- kfree(type->typ_dt_ops);
- kfree(type);
- return rc;
-}
-EXPORT_SYMBOL(class_register_type);
-
-int class_unregister_type(const char *name)
-{
- struct obd_type *type = class_search_type(name);
-
- if (!type) {
- CERROR("unknown obd type\n");
- return -EINVAL;
- }
-
- if (type->typ_refcnt) {
- CERROR("type %s has refcount (%d)\n", name, type->typ_refcnt);
- /* This is a bad situation, let's make the best of it */
- /* Remove ops, but leave the name for debugging */
- kfree(type->typ_dt_ops);
- kfree(type->typ_md_ops);
- return -EBUSY;
- }
-
- if (type->typ_kobj)
- kobject_put(type->typ_kobj);
-
- debugfs_remove_recursive(type->typ_debugfs_entry);
-
- if (type->typ_lu)
- lu_device_type_fini(type->typ_lu);
-
- spin_lock(&obd_types_lock);
- list_del(&type->typ_chain);
- spin_unlock(&obd_types_lock);
- kfree(type->typ_name);
- kfree(type->typ_dt_ops);
- kfree(type->typ_md_ops);
- kfree(type);
- return 0;
-} /* class_unregister_type */
-EXPORT_SYMBOL(class_unregister_type);
-
-/**
- * Create a new obd device.
- *
- * Find an empty slot in ::obd_devs[], create a new obd device in it.
- *
- * \param[in] type_name obd device type string.
- * \param[in] name obd device name.
- *
- * \retval NULL if create fails, otherwise return the obd device
- * pointer created.
- */
-struct obd_device *class_newdev(const char *type_name, const char *name)
-{
- struct obd_device *result = NULL;
- struct obd_device *newdev;
- struct obd_type *type = NULL;
- int i;
- int new_obd_minor = 0;
-
- if (strlen(name) >= MAX_OBD_NAME) {
- CERROR("name/uuid must be < %u bytes long\n", MAX_OBD_NAME);
- return ERR_PTR(-EINVAL);
- }
-
- type = class_get_type(type_name);
- if (!type) {
- CERROR("OBD: unknown type: %s\n", type_name);
- return ERR_PTR(-ENODEV);
- }
-
- newdev = obd_device_alloc();
- if (!newdev) {
- result = ERR_PTR(-ENOMEM);
- goto out_type;
- }
-
- LASSERT(newdev->obd_magic == OBD_DEVICE_MAGIC);
-
- write_lock(&obd_dev_lock);
- for (i = 0; i < class_devno_max(); i++) {
- struct obd_device *obd = class_num2obd(i);
-
- if (obd && (strcmp(name, obd->obd_name) == 0)) {
- CERROR("Device %s already exists at %d, won't add\n",
- name, i);
- if (result) {
- LASSERTF(result->obd_magic == OBD_DEVICE_MAGIC,
- "%p obd_magic %08x != %08x\n", result,
- result->obd_magic, OBD_DEVICE_MAGIC);
- LASSERTF(result->obd_minor == new_obd_minor,
- "%p obd_minor %d != %d\n", result,
- result->obd_minor, new_obd_minor);
-
- obd_devs[result->obd_minor] = NULL;
- result->obd_name[0] = '\0';
- }
- result = ERR_PTR(-EEXIST);
- break;
- }
- if (!result && !obd) {
- result = newdev;
- result->obd_minor = i;
- new_obd_minor = i;
- result->obd_type = type;
- strncpy(result->obd_name, name,
- sizeof(result->obd_name) - 1);
- obd_devs[i] = result;
- }
- }
- write_unlock(&obd_dev_lock);
-
- if (!result && i >= class_devno_max()) {
- CERROR("all %u OBD devices used, increase MAX_OBD_DEVICES\n",
- class_devno_max());
- result = ERR_PTR(-EOVERFLOW);
- goto out;
- }
-
- if (IS_ERR(result))
- goto out;
-
- CDEBUG(D_IOCTL, "Adding new device %s (%p)\n",
- result->obd_name, result);
-
- return result;
-out:
- obd_device_free(newdev);
-out_type:
- class_put_type(type);
- return result;
-}
-
-void class_release_dev(struct obd_device *obd)
-{
- struct obd_type *obd_type = obd->obd_type;
-
- LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, "%p obd_magic %08x != %08x\n",
- obd, obd->obd_magic, OBD_DEVICE_MAGIC);
- LASSERTF(obd == obd_devs[obd->obd_minor], "obd %p != obd_devs[%d] %p\n",
- obd, obd->obd_minor, obd_devs[obd->obd_minor]);
- LASSERT(obd_type);
-
- CDEBUG(D_INFO, "Release obd device %s at %d obd_type name =%s\n",
- obd->obd_name, obd->obd_minor, obd->obd_type->typ_name);
-
- write_lock(&obd_dev_lock);
- obd_devs[obd->obd_minor] = NULL;
- write_unlock(&obd_dev_lock);
- obd_device_free(obd);
-
- class_put_type(obd_type);
-}
-
-int class_name2dev(const char *name)
-{
- int i;
-
- if (!name)
- return -1;
-
- read_lock(&obd_dev_lock);
- for (i = 0; i < class_devno_max(); i++) {
- struct obd_device *obd = class_num2obd(i);
-
- if (obd && strcmp(name, obd->obd_name) == 0) {
- /* Make sure we finished attaching before we give
- * out any references
- */
- LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
- if (obd->obd_attached) {
- read_unlock(&obd_dev_lock);
- return i;
- }
- break;
- }
- }
- read_unlock(&obd_dev_lock);
-
- return -1;
-}
-
-struct obd_device *class_name2obd(const char *name)
-{
- int dev = class_name2dev(name);
-
- if (dev < 0 || dev > class_devno_max())
- return NULL;
- return class_num2obd(dev);
-}
-EXPORT_SYMBOL(class_name2obd);
-
-int class_uuid2dev(struct obd_uuid *uuid)
-{
- int i;
-
- read_lock(&obd_dev_lock);
- for (i = 0; i < class_devno_max(); i++) {
- struct obd_device *obd = class_num2obd(i);
-
- if (obd && obd_uuid_equals(uuid, &obd->obd_uuid)) {
- LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
- read_unlock(&obd_dev_lock);
- return i;
- }
- }
- read_unlock(&obd_dev_lock);
-
- return -1;
-}
-
-/**
- * Get obd device from ::obd_devs[]
- *
- * \param num [in] array index
- *
- * \retval NULL if ::obd_devs[\a num] does not contains an obd device
- * otherwise return the obd device there.
- */
-struct obd_device *class_num2obd(int num)
-{
- struct obd_device *obd = NULL;
-
- if (num < class_devno_max()) {
- obd = obd_devs[num];
- if (!obd)
- return NULL;
-
- LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC,
- "%p obd_magic %08x != %08x\n",
- obd, obd->obd_magic, OBD_DEVICE_MAGIC);
- LASSERTF(obd->obd_minor == num,
- "%p obd_minor %0d != %0d\n",
- obd, obd->obd_minor, num);
- }
-
- return obd;
-}
-
-/* Search for a client OBD connected to tgt_uuid. If grp_uuid is
- * specified, then only the client with that uuid is returned,
- * otherwise any client connected to the tgt is returned.
- */
-struct obd_device *class_find_client_obd(struct obd_uuid *tgt_uuid,
- const char *typ_name,
- struct obd_uuid *grp_uuid)
-{
- int i;
-
- read_lock(&obd_dev_lock);
- for (i = 0; i < class_devno_max(); i++) {
- struct obd_device *obd = class_num2obd(i);
-
- if (!obd)
- continue;
- if ((strncmp(obd->obd_type->typ_name, typ_name,
- strlen(typ_name)) == 0)) {
- if (obd_uuid_equals(tgt_uuid,
- &obd->u.cli.cl_target_uuid) &&
- ((grp_uuid) ? obd_uuid_equals(grp_uuid,
- &obd->obd_uuid) : 1)) {
- read_unlock(&obd_dev_lock);
- return obd;
- }
- }
- }
- read_unlock(&obd_dev_lock);
-
- return NULL;
-}
-EXPORT_SYMBOL(class_find_client_obd);
-
-/* Iterate the obd_device list looking devices have grp_uuid. Start
- * searching at *next, and if a device is found, the next index to look
- * at is saved in *next. If next is NULL, then the first matching device
- * will always be returned.
- */
-struct obd_device *class_devices_in_group(struct obd_uuid *grp_uuid, int *next)
-{
- int i;
-
- if (!next)
- i = 0;
- else if (*next >= 0 && *next < class_devno_max())
- i = *next;
- else
- return NULL;
-
- read_lock(&obd_dev_lock);
- for (; i < class_devno_max(); i++) {
- struct obd_device *obd = class_num2obd(i);
-
- if (!obd)
- continue;
- if (obd_uuid_equals(grp_uuid, &obd->obd_uuid)) {
- if (next)
- *next = i + 1;
- read_unlock(&obd_dev_lock);
- return obd;
- }
- }
- read_unlock(&obd_dev_lock);
-
- return NULL;
-}
-EXPORT_SYMBOL(class_devices_in_group);
-
-/**
- * to notify sptlrpc log for \a fsname has changed, let every relevant OBD
- * adjust sptlrpc settings accordingly.
- */
-int class_notify_sptlrpc_conf(const char *fsname, int namelen)
-{
- struct obd_device *obd;
- const char *type;
- int i, rc = 0, rc2;
-
- LASSERT(namelen > 0);
-
- read_lock(&obd_dev_lock);
- for (i = 0; i < class_devno_max(); i++) {
- obd = class_num2obd(i);
-
- if (!obd || obd->obd_set_up == 0 || obd->obd_stopping)
- continue;
-
- /* only notify mdc, osc, mdt, ost */
- type = obd->obd_type->typ_name;
- if (strcmp(type, LUSTRE_MDC_NAME) != 0 &&
- strcmp(type, LUSTRE_OSC_NAME) != 0 &&
- strcmp(type, LUSTRE_MDT_NAME) != 0 &&
- strcmp(type, LUSTRE_OST_NAME) != 0)
- continue;
-
- if (strncmp(obd->obd_name, fsname, namelen))
- continue;
-
- class_incref(obd, __func__, obd);
- read_unlock(&obd_dev_lock);
- rc2 = obd_set_info_async(NULL, obd->obd_self_export,
- sizeof(KEY_SPTLRPC_CONF),
- KEY_SPTLRPC_CONF, 0, NULL, NULL);
- rc = rc ? rc : rc2;
- class_decref(obd, __func__, obd);
- read_lock(&obd_dev_lock);
- }
- read_unlock(&obd_dev_lock);
- return rc;
-}
-EXPORT_SYMBOL(class_notify_sptlrpc_conf);
-
-void obd_cleanup_caches(void)
-{
- kmem_cache_destroy(obd_device_cachep);
- obd_device_cachep = NULL;
- kmem_cache_destroy(obdo_cachep);
- obdo_cachep = NULL;
- kmem_cache_destroy(import_cachep);
- import_cachep = NULL;
-}
-
-int obd_init_caches(void)
-{
- LASSERT(!obd_device_cachep);
- obd_device_cachep = kmem_cache_create("ll_obd_dev_cache",
- sizeof(struct obd_device),
- 0, 0, NULL);
- if (!obd_device_cachep)
- goto out;
-
- LASSERT(!obdo_cachep);
- obdo_cachep = kmem_cache_create("ll_obdo_cache", sizeof(struct obdo),
- 0, 0, NULL);
- if (!obdo_cachep)
- goto out;
-
- LASSERT(!import_cachep);
- import_cachep = kmem_cache_create("ll_import_cache",
- sizeof(struct obd_import),
- 0, 0, NULL);
- if (!import_cachep)
- goto out;
-
- return 0;
- out:
- obd_cleanup_caches();
- return -ENOMEM;
-}
-
-/* map connection to client */
-struct obd_export *class_conn2export(struct lustre_handle *conn)
-{
- struct obd_export *export;
-
- if (!conn) {
- CDEBUG(D_CACHE, "looking for null handle\n");
- return NULL;
- }
-
- if (conn->cookie == -1) { /* this means assign a new connection */
- CDEBUG(D_CACHE, "want a new connection\n");
- return NULL;
- }
-
- CDEBUG(D_INFO, "looking for export cookie %#llx\n", conn->cookie);
- export = class_handle2object(conn->cookie, NULL);
- return export;
-}
-EXPORT_SYMBOL(class_conn2export);
-
-struct obd_device *class_exp2obd(struct obd_export *exp)
-{
- if (exp)
- return exp->exp_obd;
- return NULL;
-}
-EXPORT_SYMBOL(class_exp2obd);
-
-struct obd_import *class_exp2cliimp(struct obd_export *exp)
-{
- struct obd_device *obd = exp->exp_obd;
-
- if (!obd)
- return NULL;
- return obd->u.cli.cl_import;
-}
-EXPORT_SYMBOL(class_exp2cliimp);
-
-/* Export management functions */
-static void class_export_destroy(struct obd_export *exp)
-{
- struct obd_device *obd = exp->exp_obd;
-
- LASSERT_ATOMIC_ZERO(&exp->exp_refcount);
- LASSERT(obd);
-
- CDEBUG(D_IOCTL, "destroying export %p/%s for %s\n", exp,
- exp->exp_client_uuid.uuid, obd->obd_name);
-
- /* "Local" exports (lctl, LOV->{mdc,osc}) have no connection. */
- if (exp->exp_connection)
- ptlrpc_put_connection_superhack(exp->exp_connection);
-
- LASSERT(list_empty(&exp->exp_outstanding_replies));
- LASSERT(list_empty(&exp->exp_uncommitted_replies));
- LASSERT(list_empty(&exp->exp_req_replay_queue));
- LASSERT(list_empty(&exp->exp_hp_rpcs));
- obd_destroy_export(exp);
- class_decref(obd, "export", exp);
-
- OBD_FREE_RCU(exp, sizeof(*exp), &exp->exp_handle);
-}
-
-static void export_handle_addref(void *export)
-{
- class_export_get(export);
-}
-
-static struct portals_handle_ops export_handle_ops = {
- .hop_addref = export_handle_addref,
- .hop_free = NULL,
-};
-
-struct obd_export *class_export_get(struct obd_export *exp)
-{
- atomic_inc(&exp->exp_refcount);
- CDEBUG(D_INFO, "GETting export %p : new refcount %d\n", exp,
- atomic_read(&exp->exp_refcount));
- return exp;
-}
-EXPORT_SYMBOL(class_export_get);
-
-void class_export_put(struct obd_export *exp)
-{
- LASSERT_ATOMIC_GT_LT(&exp->exp_refcount, 0, LI_POISON);
- CDEBUG(D_INFO, "PUTting export %p : new refcount %d\n", exp,
- atomic_read(&exp->exp_refcount) - 1);
-
- if (atomic_dec_and_test(&exp->exp_refcount)) {
- LASSERT(!list_empty(&exp->exp_obd_chain));
- CDEBUG(D_IOCTL, "final put %p/%s\n",
- exp, exp->exp_client_uuid.uuid);
-
- /* release nid stat refererence */
- lprocfs_exp_cleanup(exp);
-
- obd_zombie_export_add(exp);
- }
-}
-EXPORT_SYMBOL(class_export_put);
-
-static void obd_zombie_exp_cull(struct work_struct *ws)
-{
- struct obd_export *export = container_of(ws, struct obd_export, exp_zombie_work);
-
- class_export_destroy(export);
-}
-
-/* Creates a new export, adds it to the hash table, and returns a
- * pointer to it. The refcount is 2: one for the hash reference, and
- * one for the pointer returned by this function.
- */
-struct obd_export *class_new_export(struct obd_device *obd,
- struct obd_uuid *cluuid)
-{
- struct obd_export *export;
- int rc = 0;
-
- export = kzalloc(sizeof(*export), GFP_NOFS);
- if (!export)
- return ERR_PTR(-ENOMEM);
-
- export->exp_conn_cnt = 0;
- atomic_set(&export->exp_refcount, 2);
- atomic_set(&export->exp_rpc_count, 0);
- atomic_set(&export->exp_cb_count, 0);
- atomic_set(&export->exp_locks_count, 0);
-#if LUSTRE_TRACKS_LOCK_EXP_REFS
- INIT_LIST_HEAD(&export->exp_locks_list);
- spin_lock_init(&export->exp_locks_list_guard);
-#endif
- atomic_set(&export->exp_replay_count, 0);
- export->exp_obd = obd;
- INIT_LIST_HEAD(&export->exp_outstanding_replies);
- spin_lock_init(&export->exp_uncommitted_replies_lock);
- INIT_LIST_HEAD(&export->exp_uncommitted_replies);
- INIT_LIST_HEAD(&export->exp_req_replay_queue);
- INIT_LIST_HEAD(&export->exp_handle.h_link);
- INIT_LIST_HEAD(&export->exp_hp_rpcs);
- class_handle_hash(&export->exp_handle, &export_handle_ops);
- spin_lock_init(&export->exp_lock);
- spin_lock_init(&export->exp_rpc_lock);
- spin_lock_init(&export->exp_bl_list_lock);
- INIT_LIST_HEAD(&export->exp_bl_list);
- INIT_WORK(&export->exp_zombie_work, obd_zombie_exp_cull);
-
- export->exp_sp_peer = LUSTRE_SP_ANY;
- export->exp_flvr.sf_rpc = SPTLRPC_FLVR_INVALID;
- export->exp_client_uuid = *cluuid;
- obd_init_export(export);
-
- spin_lock(&obd->obd_dev_lock);
- /* shouldn't happen, but might race */
- if (obd->obd_stopping) {
- rc = -ENODEV;
- goto exit_unlock;
- }
-
- if (!obd_uuid_equals(cluuid, &obd->obd_uuid)) {
- rc = obd_uuid_add(obd, export);
- if (rc) {
- LCONSOLE_WARN("%s: denying duplicate export for %s, %d\n",
- obd->obd_name, cluuid->uuid, rc);
- goto exit_unlock;
- }
- }
-
- class_incref(obd, "export", export);
- list_add(&export->exp_obd_chain, &export->exp_obd->obd_exports);
- export->exp_obd->obd_num_exports++;
- spin_unlock(&obd->obd_dev_lock);
- return export;
-
-exit_unlock:
- spin_unlock(&obd->obd_dev_lock);
- class_handle_unhash(&export->exp_handle);
- obd_destroy_export(export);
- kfree(export);
- return ERR_PTR(rc);
-}
-EXPORT_SYMBOL(class_new_export);
-
-void class_unlink_export(struct obd_export *exp)
-{
- class_handle_unhash(&exp->exp_handle);
-
- spin_lock(&exp->exp_obd->obd_dev_lock);
- /* delete an uuid-export hashitem from hashtables */
- if (exp != exp->exp_obd->obd_self_export)
- obd_uuid_del(exp->exp_obd, exp);
-
- list_move(&exp->exp_obd_chain, &exp->exp_obd->obd_unlinked_exports);
- exp->exp_obd->obd_num_exports--;
- spin_unlock(&exp->exp_obd->obd_dev_lock);
- class_export_put(exp);
-}
-
-/* Import management functions */
-static void class_import_destroy(struct obd_import *imp)
-{
- CDEBUG(D_IOCTL, "destroying import %p for %s\n", imp,
- imp->imp_obd->obd_name);
-
- LASSERT_ATOMIC_ZERO(&imp->imp_refcount);
-
- ptlrpc_put_connection_superhack(imp->imp_connection);
-
- while (!list_empty(&imp->imp_conn_list)) {
- struct obd_import_conn *imp_conn;
-
- imp_conn = list_entry(imp->imp_conn_list.next,
- struct obd_import_conn, oic_item);
- list_del_init(&imp_conn->oic_item);
- ptlrpc_put_connection_superhack(imp_conn->oic_conn);
- kfree(imp_conn);
- }
-
- LASSERT(!imp->imp_sec);
- class_decref(imp->imp_obd, "import", imp);
- OBD_FREE_RCU(imp, sizeof(*imp), &imp->imp_handle);
-}
-
-static void import_handle_addref(void *import)
-{
- class_import_get(import);
-}
-
-static struct portals_handle_ops import_handle_ops = {
- .hop_addref = import_handle_addref,
- .hop_free = NULL,
-};
-
-struct obd_import *class_import_get(struct obd_import *import)
-{
- atomic_inc(&import->imp_refcount);
- CDEBUG(D_INFO, "import %p refcount=%d obd=%s\n", import,
- atomic_read(&import->imp_refcount),
- import->imp_obd->obd_name);
- return import;
-}
-EXPORT_SYMBOL(class_import_get);
-
-void class_import_put(struct obd_import *imp)
-{
- LASSERT_ATOMIC_GT_LT(&imp->imp_refcount, 0, LI_POISON);
-
- CDEBUG(D_INFO, "import %p refcount=%d obd=%s\n", imp,
- atomic_read(&imp->imp_refcount) - 1,
- imp->imp_obd->obd_name);
-
- if (atomic_dec_and_test(&imp->imp_refcount)) {
- CDEBUG(D_INFO, "final put import %p\n", imp);
- obd_zombie_import_add(imp);
- }
-
- /* catch possible import put race */
- LASSERT_ATOMIC_GE_LT(&imp->imp_refcount, 0, LI_POISON);
-}
-EXPORT_SYMBOL(class_import_put);
-
-static void init_imp_at(struct imp_at *at)
-{
- int i;
-
- at_init(&at->iat_net_latency, 0, 0);
- for (i = 0; i < IMP_AT_MAX_PORTALS; i++) {
- /* max service estimates are tracked on the server side, so
- * don't use the AT history here, just use the last reported
- * val. (But keep hist for proc histogram, worst_ever)
- */
- at_init(&at->iat_service_estimate[i], INITIAL_CONNECT_TIMEOUT,
- AT_FLG_NOHIST);
- }
-}
-
-static void obd_zombie_imp_cull(struct work_struct *ws)
-{
- struct obd_import *import = container_of(ws, struct obd_import, imp_zombie_work);
-
- class_import_destroy(import);
-}
-
-struct obd_import *class_new_import(struct obd_device *obd)
-{
- struct obd_import *imp;
-
- imp = kzalloc(sizeof(*imp), GFP_NOFS);
- if (!imp)
- return NULL;
-
- INIT_LIST_HEAD(&imp->imp_pinger_chain);
- INIT_LIST_HEAD(&imp->imp_replay_list);
- INIT_LIST_HEAD(&imp->imp_sending_list);
- INIT_LIST_HEAD(&imp->imp_delayed_list);
- INIT_LIST_HEAD(&imp->imp_committed_list);
- INIT_LIST_HEAD(&imp->imp_unreplied_list);
- imp->imp_known_replied_xid = 0;
- imp->imp_replay_cursor = &imp->imp_committed_list;
- spin_lock_init(&imp->imp_lock);
- imp->imp_last_success_conn = 0;
- imp->imp_state = LUSTRE_IMP_NEW;
- imp->imp_obd = class_incref(obd, "import", imp);
- mutex_init(&imp->imp_sec_mutex);
- init_waitqueue_head(&imp->imp_recovery_waitq);
- INIT_WORK(&imp->imp_zombie_work, obd_zombie_imp_cull);
-
- atomic_set(&imp->imp_refcount, 2);
- atomic_set(&imp->imp_unregistering, 0);
- atomic_set(&imp->imp_inflight, 0);
- atomic_set(&imp->imp_replay_inflight, 0);
- atomic_set(&imp->imp_inval_count, 0);
- INIT_LIST_HEAD(&imp->imp_conn_list);
- INIT_LIST_HEAD(&imp->imp_handle.h_link);
- class_handle_hash(&imp->imp_handle, &import_handle_ops);
- init_imp_at(&imp->imp_at);
-
- /* the default magic is V2, will be used in connect RPC, and
- * then adjusted according to the flags in request/reply.
- */
- imp->imp_msg_magic = LUSTRE_MSG_MAGIC_V2;
-
- return imp;
-}
-EXPORT_SYMBOL(class_new_import);
-
-void class_destroy_import(struct obd_import *import)
-{
- LASSERT(import);
- LASSERT(import != LP_POISON);
-
- class_handle_unhash(&import->imp_handle);
-
- spin_lock(&import->imp_lock);
- import->imp_generation++;
- spin_unlock(&import->imp_lock);
- class_import_put(import);
-}
-EXPORT_SYMBOL(class_destroy_import);
-
-#if LUSTRE_TRACKS_LOCK_EXP_REFS
-
-void __class_export_add_lock_ref(struct obd_export *exp, struct ldlm_lock *lock)
-{
- spin_lock(&exp->exp_locks_list_guard);
-
- LASSERT(lock->l_exp_refs_nr >= 0);
-
- if (lock->l_exp_refs_target && lock->l_exp_refs_target != exp) {
- LCONSOLE_WARN("setting export %p for lock %p which already has export %p\n",
- exp, lock, lock->l_exp_refs_target);
- }
- if ((lock->l_exp_refs_nr++) == 0) {
- list_add(&lock->l_exp_refs_link, &exp->exp_locks_list);
- lock->l_exp_refs_target = exp;
- }
- CDEBUG(D_INFO, "lock = %p, export = %p, refs = %u\n",
- lock, exp, lock->l_exp_refs_nr);
- spin_unlock(&exp->exp_locks_list_guard);
-}
-
-void __class_export_del_lock_ref(struct obd_export *exp, struct ldlm_lock *lock)
-{
- spin_lock(&exp->exp_locks_list_guard);
- LASSERT(lock->l_exp_refs_nr > 0);
- if (lock->l_exp_refs_target != exp) {
- LCONSOLE_WARN("lock %p, mismatching export pointers: %p, %p\n",
- lock, lock->l_exp_refs_target, exp);
- }
- if (-- lock->l_exp_refs_nr == 0) {
- list_del_init(&lock->l_exp_refs_link);
- lock->l_exp_refs_target = NULL;
- }
- CDEBUG(D_INFO, "lock = %p, export = %p, refs = %u\n",
- lock, exp, lock->l_exp_refs_nr);
- spin_unlock(&exp->exp_locks_list_guard);
-}
-#endif
-
-/* A connection defines an export context in which preallocation can
- * be managed. This releases the export pointer reference, and returns
- * the export handle, so the export refcount is 1 when this function
- * returns.
- */
-int class_connect(struct lustre_handle *conn, struct obd_device *obd,
- struct obd_uuid *cluuid)
-{
- struct obd_export *export;
-
- LASSERT(conn);
- LASSERT(obd);
- LASSERT(cluuid);
-
- export = class_new_export(obd, cluuid);
- if (IS_ERR(export))
- return PTR_ERR(export);
-
- conn->cookie = export->exp_handle.h_cookie;
- class_export_put(export);
-
- CDEBUG(D_IOCTL, "connect: client %s, cookie %#llx\n",
- cluuid->uuid, conn->cookie);
- return 0;
-}
-EXPORT_SYMBOL(class_connect);
-
-/* This function removes 1-3 references from the export:
- * 1 - for export pointer passed
- * and if disconnect really need
- * 2 - removing from hash
- * 3 - in client_unlink_export
- * The export pointer passed to this function can destroyed
- */
-int class_disconnect(struct obd_export *export)
-{
- int already_disconnected;
-
- if (!export) {
- CWARN("attempting to free NULL export %p\n", export);
- return -EINVAL;
- }
-
- spin_lock(&export->exp_lock);
- already_disconnected = export->exp_disconnected;
- export->exp_disconnected = 1;
- spin_unlock(&export->exp_lock);
-
- /* class_cleanup(), abort_recovery(), and class_fail_export()
- * all end up in here, and if any of them race we shouldn't
- * call extra class_export_puts().
- */
- if (already_disconnected)
- goto no_disconn;
-
- CDEBUG(D_IOCTL, "disconnect: cookie %#llx\n",
- export->exp_handle.h_cookie);
-
- class_unlink_export(export);
-no_disconn:
- class_export_put(export);
- return 0;
-}
-EXPORT_SYMBOL(class_disconnect);
-
-void class_fail_export(struct obd_export *exp)
-{
- int rc, already_failed;
-
- spin_lock(&exp->exp_lock);
- already_failed = exp->exp_failed;
- exp->exp_failed = 1;
- spin_unlock(&exp->exp_lock);
-
- if (already_failed) {
- CDEBUG(D_HA, "disconnecting dead export %p/%s; skipping\n",
- exp, exp->exp_client_uuid.uuid);
- return;
- }
-
- CDEBUG(D_HA, "disconnecting export %p/%s\n",
- exp, exp->exp_client_uuid.uuid);
-
- if (obd_dump_on_timeout)
- libcfs_debug_dumplog();
-
- /* need for safe call CDEBUG after obd_disconnect */
- class_export_get(exp);
-
- /* Most callers into obd_disconnect are removing their own reference
- * (request, for example) in addition to the one from the hash table.
- * We don't have such a reference here, so make one.
- */
- class_export_get(exp);
- rc = obd_disconnect(exp);
- if (rc)
- CERROR("disconnecting export %p failed: %d\n", exp, rc);
- else
- CDEBUG(D_HA, "disconnected export %p/%s\n",
- exp, exp->exp_client_uuid.uuid);
- class_export_put(exp);
-}
-EXPORT_SYMBOL(class_fail_export);
-
-#if LUSTRE_TRACKS_LOCK_EXP_REFS
-void (*class_export_dump_hook)(struct obd_export *) = NULL;
-#endif
-
-/**
- * Add export to the obd_zombie thread and notify it.
- */
-static void obd_zombie_export_add(struct obd_export *exp)
-{
- spin_lock(&exp->exp_obd->obd_dev_lock);
- LASSERT(!list_empty(&exp->exp_obd_chain));
- list_del_init(&exp->exp_obd_chain);
- spin_unlock(&exp->exp_obd->obd_dev_lock);
- queue_work(zombie_wq, &exp->exp_zombie_work);
-}
-
-/**
- * Add import to the obd_zombie thread and notify it.
- */
-static void obd_zombie_import_add(struct obd_import *imp)
-{
- LASSERT(!imp->imp_sec);
- queue_work(zombie_wq, &imp->imp_zombie_work);
-}
-
-/**
- * wait when obd_zombie import/export queues become empty
- */
-void obd_zombie_barrier(void)
-{
- flush_workqueue(zombie_wq);
-}
-EXPORT_SYMBOL(obd_zombie_barrier);
-
-/**
- * start destroy zombie import/export thread
- */
-int obd_zombie_impexp_init(void)
-{
- zombie_wq = alloc_workqueue("obd_zombid", 0, 0);
- if (!zombie_wq)
- return -ENOMEM;
-
- return 0;
-}
-
-/**
- * stop destroy zombie import/export thread
- */
-void obd_zombie_impexp_stop(void)
-{
- destroy_workqueue(zombie_wq);
-}
-
-struct obd_request_slot_waiter {
- struct list_head orsw_entry;
- wait_queue_head_t orsw_waitq;
- bool orsw_signaled;
-};
-
-static bool obd_request_slot_avail(struct client_obd *cli,
- struct obd_request_slot_waiter *orsw)
-{
- bool avail;
-
- spin_lock(&cli->cl_loi_list_lock);
- avail = !!list_empty(&orsw->orsw_entry);
- spin_unlock(&cli->cl_loi_list_lock);
-
- return avail;
-};
-
-/*
- * For network flow control, the RPC sponsor needs to acquire a credit
- * before sending the RPC. The credits count for a connection is defined
- * by the "cl_max_rpcs_in_flight". If all the credits are occpuied, then
- * the subsequent RPC sponsors need to wait until others released their
- * credits, or the administrator increased the "cl_max_rpcs_in_flight".
- */
-int obd_get_request_slot(struct client_obd *cli)
-{
- struct obd_request_slot_waiter orsw;
- int rc;
-
- spin_lock(&cli->cl_loi_list_lock);
- if (cli->cl_r_in_flight < cli->cl_max_rpcs_in_flight) {
- cli->cl_r_in_flight++;
- spin_unlock(&cli->cl_loi_list_lock);
- return 0;
- }
-
- init_waitqueue_head(&orsw.orsw_waitq);
- list_add_tail(&orsw.orsw_entry, &cli->cl_loi_read_list);
- orsw.orsw_signaled = false;
- spin_unlock(&cli->cl_loi_list_lock);
-
- rc = l_wait_event_abortable(orsw.orsw_waitq,
- obd_request_slot_avail(cli, &orsw) ||
- orsw.orsw_signaled);
-
- /*
- * Here, we must take the lock to avoid the on-stack 'orsw' to be
- * freed but other (such as obd_put_request_slot) is using it.
- */
- spin_lock(&cli->cl_loi_list_lock);
- if (rc) {
- if (!orsw.orsw_signaled) {
- if (list_empty(&orsw.orsw_entry))
- cli->cl_r_in_flight--;
- else
- list_del(&orsw.orsw_entry);
- }
- }
-
- if (orsw.orsw_signaled) {
- LASSERT(list_empty(&orsw.orsw_entry));
-
- rc = -EINTR;
- }
- spin_unlock(&cli->cl_loi_list_lock);
-
- return rc;
-}
-EXPORT_SYMBOL(obd_get_request_slot);
-
-void obd_put_request_slot(struct client_obd *cli)
-{
- struct obd_request_slot_waiter *orsw;
-
- spin_lock(&cli->cl_loi_list_lock);
- cli->cl_r_in_flight--;
-
- /* If there is free slot, wakeup the first waiter. */
- if (!list_empty(&cli->cl_loi_read_list) &&
- likely(cli->cl_r_in_flight < cli->cl_max_rpcs_in_flight)) {
- orsw = list_entry(cli->cl_loi_read_list.next,
- struct obd_request_slot_waiter, orsw_entry);
- list_del_init(&orsw->orsw_entry);
- cli->cl_r_in_flight++;
- wake_up(&orsw->orsw_waitq);
- }
- spin_unlock(&cli->cl_loi_list_lock);
-}
-EXPORT_SYMBOL(obd_put_request_slot);
-
-__u32 obd_get_max_rpcs_in_flight(struct client_obd *cli)
-{
- return cli->cl_max_rpcs_in_flight;
-}
-EXPORT_SYMBOL(obd_get_max_rpcs_in_flight);
-
-int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max)
-{
- struct obd_request_slot_waiter *orsw;
- const char *typ_name;
- __u32 old;
- int diff;
- int rc;
- int i;
-
- if (max > OBD_MAX_RIF_MAX || max < 1)
- return -ERANGE;
-
- typ_name = cli->cl_import->imp_obd->obd_type->typ_name;
- if (!strcmp(typ_name, LUSTRE_MDC_NAME)) {
- /*
- * adjust max_mod_rpcs_in_flight to ensure it is always
- * strictly lower that max_rpcs_in_flight
- */
- if (max < 2) {
- CERROR("%s: cannot set max_rpcs_in_flight to 1 because it must be higher than max_mod_rpcs_in_flight value\n",
- cli->cl_import->imp_obd->obd_name);
- return -ERANGE;
- }
- if (max <= cli->cl_max_mod_rpcs_in_flight) {
- rc = obd_set_max_mod_rpcs_in_flight(cli, max - 1);
- if (rc)
- return rc;
- }
- }
-
- spin_lock(&cli->cl_loi_list_lock);
- old = cli->cl_max_rpcs_in_flight;
- cli->cl_max_rpcs_in_flight = max;
- diff = max - old;
-
- /* We increase the max_rpcs_in_flight, then wakeup some waiters. */
- for (i = 0; i < diff; i++) {
- if (list_empty(&cli->cl_loi_read_list))
- break;
-
- orsw = list_entry(cli->cl_loi_read_list.next,
- struct obd_request_slot_waiter, orsw_entry);
- list_del_init(&orsw->orsw_entry);
- cli->cl_r_in_flight++;
- wake_up(&orsw->orsw_waitq);
- }
- spin_unlock(&cli->cl_loi_list_lock);
-
- return 0;
-}
-EXPORT_SYMBOL(obd_set_max_rpcs_in_flight);
-
-int obd_set_max_mod_rpcs_in_flight(struct client_obd *cli, __u16 max)
-{
- struct obd_connect_data *ocd;
- u16 maxmodrpcs;
- u16 prev;
-
- if (max > OBD_MAX_RIF_MAX || max < 1)
- return -ERANGE;
-
- /* cannot exceed or equal max_rpcs_in_flight */
- if (max >= cli->cl_max_rpcs_in_flight) {
- CERROR("%s: can't set max_mod_rpcs_in_flight to a value (%hu) higher or equal to max_rpcs_in_flight value (%u)\n",
- cli->cl_import->imp_obd->obd_name,
- max, cli->cl_max_rpcs_in_flight);
- return -ERANGE;
- }
-
- /* cannot exceed max modify RPCs in flight supported by the server */
- ocd = &cli->cl_import->imp_connect_data;
- if (ocd->ocd_connect_flags & OBD_CONNECT_MULTIMODRPCS)
- maxmodrpcs = ocd->ocd_maxmodrpcs;
- else
- maxmodrpcs = 1;
- if (max > maxmodrpcs) {
- CERROR("%s: can't set max_mod_rpcs_in_flight to a value (%hu) higher than max_mod_rpcs_per_client value (%hu) returned by the server at connection\n",
- cli->cl_import->imp_obd->obd_name,
- max, maxmodrpcs);
- return -ERANGE;
- }
-
- spin_lock(&cli->cl_mod_rpcs_lock);
-
- prev = cli->cl_max_mod_rpcs_in_flight;
- cli->cl_max_mod_rpcs_in_flight = max;
-
- /* wakeup waiters if limit has been increased */
- if (cli->cl_max_mod_rpcs_in_flight > prev)
- wake_up(&cli->cl_mod_rpcs_waitq);
-
- spin_unlock(&cli->cl_mod_rpcs_lock);
-
- return 0;
-}
-EXPORT_SYMBOL(obd_set_max_mod_rpcs_in_flight);
-
-#define pct(a, b) (b ? (a * 100) / b : 0)
-
-int obd_mod_rpc_stats_seq_show(struct client_obd *cli, struct seq_file *seq)
-{
- unsigned long mod_tot = 0, mod_cum;
- struct timespec64 now;
- int i;
-
- ktime_get_real_ts64(&now);
-
- spin_lock(&cli->cl_mod_rpcs_lock);
-
- seq_printf(seq, "snapshot_time: %llu.%9lu (secs.nsecs)\n",
- (s64)now.tv_sec, (unsigned long)now.tv_nsec);
- seq_printf(seq, "modify_RPCs_in_flight: %hu\n",
- cli->cl_mod_rpcs_in_flight);
-
- seq_puts(seq, "\n\t\t\tmodify\n");
- seq_puts(seq, "rpcs in flight rpcs %% cum %%\n");
-
- mod_tot = lprocfs_oh_sum(&cli->cl_mod_rpcs_hist);
-
- mod_cum = 0;
- for (i = 0; i < OBD_HIST_MAX; i++) {
- unsigned long mod = cli->cl_mod_rpcs_hist.oh_buckets[i];
-
- mod_cum += mod;
- seq_printf(seq, "%d:\t\t%10lu %3lu %3lu\n",
- i, mod, pct(mod, mod_tot),
- pct(mod_cum, mod_tot));
- if (mod_cum == mod_tot)
- break;
- }
-
- spin_unlock(&cli->cl_mod_rpcs_lock);
-
- return 0;
-}
-EXPORT_SYMBOL(obd_mod_rpc_stats_seq_show);
-#undef pct
-
-/*
- * The number of modify RPCs sent in parallel is limited
- * because the server has a finite number of slots per client to
- * store request result and ensure reply reconstruction when needed.
- * On the client, this limit is stored in cl_max_mod_rpcs_in_flight
- * that takes into account server limit and cl_max_rpcs_in_flight
- * value.
- * On the MDC client, to avoid a potential deadlock (see Bugzilla 3462),
- * one close request is allowed above the maximum.
- */
-static inline bool obd_mod_rpc_slot_avail_locked(struct client_obd *cli,
- bool close_req)
-{
- bool avail;
-
- /* A slot is available if
- * - number of modify RPCs in flight is less than the max
- * - it's a close RPC and no other close request is in flight
- */
- avail = cli->cl_mod_rpcs_in_flight < cli->cl_max_mod_rpcs_in_flight ||
- (close_req && !cli->cl_close_rpcs_in_flight);
-
- return avail;
-}
-
-static inline bool obd_mod_rpc_slot_avail(struct client_obd *cli,
- bool close_req)
-{
- bool avail;
-
- spin_lock(&cli->cl_mod_rpcs_lock);
- avail = obd_mod_rpc_slot_avail_locked(cli, close_req);
- spin_unlock(&cli->cl_mod_rpcs_lock);
- return avail;
-}
-
-/* Get a modify RPC slot from the obd client @cli according
- * to the kind of operation @opc that is going to be sent
- * and the intent @it of the operation if it applies.
- * If the maximum number of modify RPCs in flight is reached
- * the thread is put to sleep.
- * Returns the tag to be set in the request message. Tag 0
- * is reserved for non-modifying requests.
- */
-u16 obd_get_mod_rpc_slot(struct client_obd *cli, __u32 opc,
- struct lookup_intent *it)
-{
- bool close_req = false;
- u16 i, max;
-
- /* read-only metadata RPCs don't consume a slot on MDT
- * for reply reconstruction
- */
- if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
- it->it_op == IT_LAYOUT || it->it_op == IT_READDIR))
- return 0;
-
- if (opc == MDS_CLOSE)
- close_req = true;
-
- do {
- spin_lock(&cli->cl_mod_rpcs_lock);
- max = cli->cl_max_mod_rpcs_in_flight;
- if (obd_mod_rpc_slot_avail_locked(cli, close_req)) {
- /* there is a slot available */
- cli->cl_mod_rpcs_in_flight++;
- if (close_req)
- cli->cl_close_rpcs_in_flight++;
- lprocfs_oh_tally(&cli->cl_mod_rpcs_hist,
- cli->cl_mod_rpcs_in_flight);
- /* find a free tag */
- i = find_first_zero_bit(cli->cl_mod_tag_bitmap,
- max + 1);
- LASSERT(i < OBD_MAX_RIF_MAX);
- LASSERT(!test_and_set_bit(i, cli->cl_mod_tag_bitmap));
- spin_unlock(&cli->cl_mod_rpcs_lock);
- /* tag 0 is reserved for non-modify RPCs */
- return i + 1;
- }
- spin_unlock(&cli->cl_mod_rpcs_lock);
-
- CDEBUG(D_RPCTRACE, "%s: sleeping for a modify RPC slot opc %u, max %hu\n",
- cli->cl_import->imp_obd->obd_name, opc, max);
-
- wait_event_idle(cli->cl_mod_rpcs_waitq,
- obd_mod_rpc_slot_avail(cli, close_req));
- } while (true);
-}
-EXPORT_SYMBOL(obd_get_mod_rpc_slot);
-
-/*
- * Put a modify RPC slot from the obd client @cli according
- * to the kind of operation @opc that has been sent and the
- * intent @it of the operation if it applies.
- */
-void obd_put_mod_rpc_slot(struct client_obd *cli, u32 opc,
- struct lookup_intent *it, u16 tag)
-{
- bool close_req = false;
-
- if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
- it->it_op == IT_LAYOUT || it->it_op == IT_READDIR))
- return;
-
- if (opc == MDS_CLOSE)
- close_req = true;
-
- spin_lock(&cli->cl_mod_rpcs_lock);
- cli->cl_mod_rpcs_in_flight--;
- if (close_req)
- cli->cl_close_rpcs_in_flight--;
- /* release the tag in the bitmap */
- LASSERT(tag - 1 < OBD_MAX_RIF_MAX);
- LASSERT(test_and_clear_bit(tag - 1, cli->cl_mod_tag_bitmap) != 0);
- spin_unlock(&cli->cl_mod_rpcs_lock);
- wake_up(&cli->cl_mod_rpcs_waitq);
-}
-EXPORT_SYMBOL(obd_put_mod_rpc_slot);
diff --git a/drivers/staging/lustre/lustre/obdclass/kernelcomm.c b/drivers/staging/lustre/lustre/obdclass/kernelcomm.c
deleted file mode 100644
index 63067a7f1e19..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/kernelcomm.c
+++ /dev/null
@@ -1,240 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Author: Nathan Rutman <nathan.rutman@sun.com>
- *
- * Kernel <-> userspace communication routines.
- * Using pipes for all arches.
- */
-
-#define DEBUG_SUBSYSTEM S_CLASS
-#define D_KUC D_OTHER
-
-#include <linux/file.h>
-#include <linux/libcfs/libcfs.h>
-#include <obd_support.h>
-#include <lustre_kernelcomm.h>
-
-/**
- * libcfs_kkuc_msg_put - send an message from kernel to userspace
- * @param fp to send the message to
- * @param payload Payload data. First field of payload is always
- * struct kuc_hdr
- */
-int libcfs_kkuc_msg_put(struct file *filp, void *payload)
-{
- struct kuc_hdr *kuch = (struct kuc_hdr *)payload;
- ssize_t count = kuch->kuc_msglen;
- loff_t offset = 0;
- int rc = -ENXIO;
-
- if (IS_ERR_OR_NULL(filp))
- return -EBADF;
-
- if (kuch->kuc_magic != KUC_MAGIC) {
- CERROR("KernelComm: bad magic %x\n", kuch->kuc_magic);
- return rc;
- }
-
- while (count > 0) {
- rc = kernel_write(filp, payload, count, &offset);
- if (rc < 0)
- break;
- count -= rc;
- payload += rc;
- rc = 0;
- }
-
- if (rc < 0)
- CWARN("message send failed (%d)\n", rc);
- else
- CDEBUG(D_KUC, "Sent message rc=%d, fp=%p\n", rc, filp);
-
- return rc;
-}
-EXPORT_SYMBOL(libcfs_kkuc_msg_put);
-
-/*
- * Broadcast groups are global across all mounted filesystems;
- * i.e. registering for a group on 1 fs will get messages for that
- * group from any fs
- */
-/** A single group registration has a uid and a file pointer */
-struct kkuc_reg {
- struct list_head kr_chain;
- int kr_uid;
- struct file *kr_fp;
- char kr_data[0];
-};
-
-static struct list_head kkuc_groups[KUC_GRP_MAX + 1] = {};
-/* Protect message sending against remove and adds */
-static DECLARE_RWSEM(kg_sem);
-
-/** Add a receiver to a broadcast group
- * @param filp pipe to write into
- * @param uid identifier for this receiver
- * @param group group number
- * @param data user data
- */
-int libcfs_kkuc_group_add(struct file *filp, int uid, unsigned int group,
- void *data, size_t data_len)
-{
- struct kkuc_reg *reg;
-
- if (group > KUC_GRP_MAX) {
- CDEBUG(D_WARNING, "Kernelcomm: bad group %d\n", group);
- return -EINVAL;
- }
-
- /* fput in group_rem */
- if (!filp)
- return -EBADF;
-
- /* freed in group_rem */
- reg = kmalloc(sizeof(*reg) + data_len, 0);
- if (!reg)
- return -ENOMEM;
-
- reg->kr_fp = filp;
- reg->kr_uid = uid;
- memcpy(reg->kr_data, data, data_len);
-
- down_write(&kg_sem);
- if (!kkuc_groups[group].next)
- INIT_LIST_HEAD(&kkuc_groups[group]);
- list_add(&reg->kr_chain, &kkuc_groups[group]);
- up_write(&kg_sem);
-
- CDEBUG(D_KUC, "Added uid=%d fp=%p to group %d\n", uid, filp, group);
-
- return 0;
-}
-EXPORT_SYMBOL(libcfs_kkuc_group_add);
-
-int libcfs_kkuc_group_rem(int uid, unsigned int group)
-{
- struct kkuc_reg *reg, *next;
-
- if (!kkuc_groups[group].next)
- return 0;
-
- if (!uid) {
- /* Broadcast a shutdown message */
- struct kuc_hdr lh;
-
- lh.kuc_magic = KUC_MAGIC;
- lh.kuc_transport = KUC_TRANSPORT_GENERIC;
- lh.kuc_msgtype = KUC_MSG_SHUTDOWN;
- lh.kuc_msglen = sizeof(lh);
- libcfs_kkuc_group_put(group, &lh);
- }
-
- down_write(&kg_sem);
- list_for_each_entry_safe(reg, next, &kkuc_groups[group], kr_chain) {
- if (!uid || (uid == reg->kr_uid)) {
- list_del(&reg->kr_chain);
- CDEBUG(D_KUC, "Removed uid=%d fp=%p from group %d\n",
- reg->kr_uid, reg->kr_fp, group);
- if (reg->kr_fp)
- fput(reg->kr_fp);
- kfree(reg);
- }
- }
- up_write(&kg_sem);
-
- return 0;
-}
-EXPORT_SYMBOL(libcfs_kkuc_group_rem);
-
-int libcfs_kkuc_group_put(unsigned int group, void *payload)
-{
- struct kkuc_reg *reg;
- int rc = 0;
- int one_success = 0;
-
- down_write(&kg_sem);
- list_for_each_entry(reg, &kkuc_groups[group], kr_chain) {
- if (reg->kr_fp) {
- rc = libcfs_kkuc_msg_put(reg->kr_fp, payload);
- if (!rc) {
- one_success = 1;
- } else if (rc == -EPIPE) {
- fput(reg->kr_fp);
- reg->kr_fp = NULL;
- }
- }
- }
- up_write(&kg_sem);
-
- /*
- * don't return an error if the message has been delivered
- * at least to one agent
- */
- if (one_success)
- rc = 0;
-
- return rc;
-}
-EXPORT_SYMBOL(libcfs_kkuc_group_put);
-
-/**
- * Calls a callback function for each link of the given kuc group.
- * @param group the group to call the function on.
- * @param cb_func the function to be called.
- * @param cb_arg extra argument to be passed to the callback function.
- */
-int libcfs_kkuc_group_foreach(unsigned int group, libcfs_kkuc_cb_t cb_func,
- void *cb_arg)
-{
- struct kkuc_reg *reg;
- int rc = 0;
-
- if (group > KUC_GRP_MAX) {
- CDEBUG(D_WARNING, "Kernelcomm: bad group %d\n", group);
- return -EINVAL;
- }
-
- /* no link for this group */
- if (!kkuc_groups[group].next)
- return 0;
-
- down_read(&kg_sem);
- list_for_each_entry(reg, &kkuc_groups[group], kr_chain) {
- if (reg->kr_fp)
- rc = cb_func(reg->kr_data, cb_arg);
- }
- up_read(&kg_sem);
-
- return rc;
-}
-EXPORT_SYMBOL(libcfs_kkuc_group_foreach);
diff --git a/drivers/staging/lustre/lustre/obdclass/linkea.c b/drivers/staging/lustre/lustre/obdclass/linkea.c
deleted file mode 100644
index 74c99ee216bb..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/linkea.c
+++ /dev/null
@@ -1,249 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2013, 2014, Intel Corporation.
- * Use is subject to license terms.
- *
- * Author: Di Wang <di.wang@intel.com>
- */
-
-#include <uapi/linux/lustre/lustre_idl.h>
-#include <obd.h>
-#include <lustre_linkea.h>
-
-int linkea_data_new(struct linkea_data *ldata, struct lu_buf *buf)
-{
- buf->lb_buf = kzalloc(PAGE_SIZE, GFP_NOFS);
- if (!buf->lb_buf)
- return -ENOMEM;
- buf->lb_len = PAGE_SIZE;
- ldata->ld_buf = buf;
- ldata->ld_leh = ldata->ld_buf->lb_buf;
- ldata->ld_leh->leh_magic = LINK_EA_MAGIC;
- ldata->ld_leh->leh_len = sizeof(struct link_ea_header);
- ldata->ld_leh->leh_reccount = 0;
- ldata->ld_leh->leh_overflow_time = 0;
- ldata->ld_leh->leh_padding = 0;
- return 0;
-}
-EXPORT_SYMBOL(linkea_data_new);
-
-int linkea_init(struct linkea_data *ldata)
-{
- struct link_ea_header *leh;
-
- LASSERT(ldata->ld_buf);
- leh = ldata->ld_buf->lb_buf;
- if (leh->leh_magic == __swab32(LINK_EA_MAGIC)) {
- leh->leh_magic = LINK_EA_MAGIC;
- leh->leh_reccount = __swab32(leh->leh_reccount);
- leh->leh_len = __swab64(leh->leh_len);
- leh->leh_overflow_time = __swab32(leh->leh_overflow_time);
- leh->leh_padding = __swab32(leh->leh_padding);
- /* individual entries are swabbed by linkea_entry_unpack() */
- }
-
- if (leh->leh_magic != LINK_EA_MAGIC)
- return -EINVAL;
-
- if (leh->leh_reccount == 0 && leh->leh_overflow_time == 0)
- return -ENODATA;
-
- ldata->ld_leh = leh;
- return 0;
-}
-EXPORT_SYMBOL(linkea_init);
-
-int linkea_init_with_rec(struct linkea_data *ldata)
-{
- int rc;
-
- rc = linkea_init(ldata);
- if (!rc && ldata->ld_leh->leh_reccount == 0)
- rc = -ENODATA;
-
- return rc;
-}
-EXPORT_SYMBOL(linkea_init_with_rec);
-
-/**
- * Pack a link_ea_entry.
- * All elements are stored as chars to avoid alignment issues.
- * Numbers are always big-endian
- * \retval record length
- */
-int linkea_entry_pack(struct link_ea_entry *lee, const struct lu_name *lname,
- const struct lu_fid *pfid)
-{
- struct lu_fid tmpfid;
- int reclen;
-
- tmpfid = *pfid;
- if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LINKEA_CRASH))
- tmpfid.f_ver = ~0;
- fid_cpu_to_be(&tmpfid, &tmpfid);
- memcpy(&lee->lee_parent_fid, &tmpfid, sizeof(tmpfid));
- memcpy(lee->lee_name, lname->ln_name, lname->ln_namelen);
- reclen = sizeof(struct link_ea_entry) + lname->ln_namelen;
-
- lee->lee_reclen[0] = (reclen >> 8) & 0xff;
- lee->lee_reclen[1] = reclen & 0xff;
- return reclen;
-}
-EXPORT_SYMBOL(linkea_entry_pack);
-
-void linkea_entry_unpack(const struct link_ea_entry *lee, int *reclen,
- struct lu_name *lname, struct lu_fid *pfid)
-{
- LASSERT(lee);
-
- *reclen = (lee->lee_reclen[0] << 8) | lee->lee_reclen[1];
- memcpy(pfid, &lee->lee_parent_fid, sizeof(*pfid));
- fid_be_to_cpu(pfid, pfid);
- if (lname) {
- lname->ln_name = lee->lee_name;
- lname->ln_namelen = *reclen - sizeof(struct link_ea_entry);
- }
-}
-EXPORT_SYMBOL(linkea_entry_unpack);
-
-/**
- * Add a record to the end of link ea buf
- **/
-int linkea_add_buf(struct linkea_data *ldata, const struct lu_name *lname,
- const struct lu_fid *pfid)
-{
- struct link_ea_header *leh = ldata->ld_leh;
- int reclen;
-
- LASSERT(leh);
-
- if (!lname || !pfid)
- return -EINVAL;
-
- reclen = lname->ln_namelen + sizeof(struct link_ea_entry);
- if (unlikely(leh->leh_len + reclen > MAX_LINKEA_SIZE)) {
- /*
- * Use 32-bits to save the overflow time, although it will
- * shrink the ktime_get_real_seconds() returned 64-bits value
- * to 32-bits value, it is still quite large and can be used
- * for about 140 years. That is enough.
- */
- leh->leh_overflow_time = ktime_get_real_seconds();
- if (unlikely(leh->leh_overflow_time == 0))
- leh->leh_overflow_time++;
-
- CDEBUG(D_INODE, "No enough space to hold linkea entry '" DFID ": %.*s' at %u\n",
- PFID(pfid), lname->ln_namelen,
- lname->ln_name, leh->leh_overflow_time);
- return 0;
- }
-
- if (leh->leh_len + reclen > ldata->ld_buf->lb_len) {
- /* Note: this never happens as MAX_LINKEA_SIZE is 4096, while
- * the initial allocation is PAGE_SIZE.
- */
- void *b = krealloc(ldata->ld_buf->lb_buf, leh->leh_len + reclen, GFP_NOFS);
- if (!b)
- return -ENOMEM;
-
- ldata->ld_buf->lb_len = leh->leh_len + reclen;
- leh = ldata->ld_leh = ldata->ld_buf->lb_buf = b;
- }
-
- ldata->ld_lee = ldata->ld_buf->lb_buf + leh->leh_len;
- ldata->ld_reclen = linkea_entry_pack(ldata->ld_lee, lname, pfid);
- leh->leh_len += ldata->ld_reclen;
- leh->leh_reccount++;
- CDEBUG(D_INODE, "New link_ea name '" DFID ":%.*s' is added\n",
- PFID(pfid), lname->ln_namelen, lname->ln_name);
- return 0;
-}
-EXPORT_SYMBOL(linkea_add_buf);
-
-/** Del the current record from the link ea buf */
-void linkea_del_buf(struct linkea_data *ldata, const struct lu_name *lname)
-{
- LASSERT(ldata->ld_leh && ldata->ld_lee);
- LASSERT(ldata->ld_leh->leh_reccount > 0);
-
- ldata->ld_leh->leh_reccount--;
- ldata->ld_leh->leh_len -= ldata->ld_reclen;
- memmove(ldata->ld_lee, (char *)ldata->ld_lee + ldata->ld_reclen,
- (char *)ldata->ld_leh + ldata->ld_leh->leh_len -
- (char *)ldata->ld_lee);
- CDEBUG(D_INODE, "Old link_ea name '%.*s' is removed\n",
- lname->ln_namelen, lname->ln_name);
-
- if ((char *)ldata->ld_lee >= ((char *)ldata->ld_leh +
- ldata->ld_leh->leh_len))
- ldata->ld_lee = NULL;
-}
-EXPORT_SYMBOL(linkea_del_buf);
-
-/**
- * Check if such a link exists in linkEA.
- *
- * \param ldata link data the search to be done on
- * \param lname name in the parent's directory entry pointing to this object
- * \param pfid parent fid the link to be found for
- *
- * \retval 0 success
- * \retval -ENOENT link does not exist
- * \retval -ve on error
- */
-int linkea_links_find(struct linkea_data *ldata, const struct lu_name *lname,
- const struct lu_fid *pfid)
-{
- struct lu_name tmpname;
- struct lu_fid tmpfid;
- int count;
-
- LASSERT(ldata->ld_leh);
-
- /* link #0, if leh_reccount == 0 we skip the loop and return -ENOENT */
- if (likely(ldata->ld_leh->leh_reccount > 0))
- ldata->ld_lee = (struct link_ea_entry *)(ldata->ld_leh + 1);
-
- for (count = 0; count < ldata->ld_leh->leh_reccount; count++) {
- linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen,
- &tmpname, &tmpfid);
- if (tmpname.ln_namelen == lname->ln_namelen &&
- lu_fid_eq(&tmpfid, pfid) &&
- (strncmp(tmpname.ln_name, lname->ln_name,
- tmpname.ln_namelen) == 0))
- break;
- ldata->ld_lee = (struct link_ea_entry *)((char *)ldata->ld_lee +
- ldata->ld_reclen);
- }
-
- if (count == ldata->ld_leh->leh_reccount) {
- CDEBUG(D_INODE, "Old link_ea name '%.*s' not found\n",
- lname->ln_namelen, lname->ln_name);
- ldata->ld_lee = NULL;
- ldata->ld_reclen = 0;
- return -ENOENT;
- }
- return 0;
-}
-EXPORT_SYMBOL(linkea_links_find);
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
deleted file mode 100644
index 9c800580053b..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
+++ /dev/null
@@ -1,514 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/obdclass/linux/linux-module.c
- *
- * Object Devices Class Driver
- * These are the only exported functions, they provide some generic
- * infrastructure for managing object devices
- */
-
-#define DEBUG_SUBSYSTEM S_CLASS
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/lp.h>
-#include <linux/slab.h>
-#include <linux/ioport.h>
-#include <linux/fcntl.h>
-#include <linux/delay.h>
-#include <linux/skbuff.h>
-#include <linux/fs.h>
-#include <linux/poll.h>
-#include <linux/list.h>
-#include <linux/highmem.h>
-#include <linux/io.h>
-#include <asm/ioctls.h>
-#include <linux/uaccess.h>
-#include <linux/miscdevice.h>
-#include <linux/seq_file.h>
-#include <linux/kobject.h>
-
-#include <uapi/linux/lnet/lnetctl.h>
-#include <obd_support.h>
-#include <obd_class.h>
-#include <lprocfs_status.h>
-#include <uapi/linux/lustre/lustre_ioctl.h>
-#include <uapi/linux/lustre/lustre_ver.h>
-
-#define OBD_MAX_IOCTL_BUFFER 8192
-
-static int obd_ioctl_is_invalid(struct obd_ioctl_data *data)
-{
- if (data->ioc_len > BIT(30)) {
- CERROR("OBD ioctl: ioc_len larger than 1<<30\n");
- return 1;
- }
-
- if (data->ioc_inllen1 > BIT(30)) {
- CERROR("OBD ioctl: ioc_inllen1 larger than 1<<30\n");
- return 1;
- }
-
- if (data->ioc_inllen2 > BIT(30)) {
- CERROR("OBD ioctl: ioc_inllen2 larger than 1<<30\n");
- return 1;
- }
-
- if (data->ioc_inllen3 > BIT(30)) {
- CERROR("OBD ioctl: ioc_inllen3 larger than 1<<30\n");
- return 1;
- }
-
- if (data->ioc_inllen4 > BIT(30)) {
- CERROR("OBD ioctl: ioc_inllen4 larger than 1<<30\n");
- return 1;
- }
-
- if (data->ioc_inlbuf1 && data->ioc_inllen1 == 0) {
- CERROR("OBD ioctl: inlbuf1 pointer but 0 length\n");
- return 1;
- }
-
- if (data->ioc_inlbuf2 && data->ioc_inllen2 == 0) {
- CERROR("OBD ioctl: inlbuf2 pointer but 0 length\n");
- return 1;
- }
-
- if (data->ioc_inlbuf3 && data->ioc_inllen3 == 0) {
- CERROR("OBD ioctl: inlbuf3 pointer but 0 length\n");
- return 1;
- }
-
- if (data->ioc_inlbuf4 && data->ioc_inllen4 == 0) {
- CERROR("OBD ioctl: inlbuf4 pointer but 0 length\n");
- return 1;
- }
-
- if (data->ioc_pbuf1 && data->ioc_plen1 == 0) {
- CERROR("OBD ioctl: pbuf1 pointer but 0 length\n");
- return 1;
- }
-
- if (data->ioc_pbuf2 && data->ioc_plen2 == 0) {
- CERROR("OBD ioctl: pbuf2 pointer but 0 length\n");
- return 1;
- }
-
- if (!data->ioc_pbuf1 && data->ioc_plen1 != 0) {
- CERROR("OBD ioctl: plen1 set but NULL pointer\n");
- return 1;
- }
-
- if (!data->ioc_pbuf2 && data->ioc_plen2 != 0) {
- CERROR("OBD ioctl: plen2 set but NULL pointer\n");
- return 1;
- }
-
- if (obd_ioctl_packlen(data) > data->ioc_len) {
- CERROR("OBD ioctl: packlen exceeds ioc_len (%d > %d)\n",
- obd_ioctl_packlen(data), data->ioc_len);
- return 1;
- }
-
- return 0;
-}
-
-/* buffer MUST be at least the size of obd_ioctl_hdr */
-int obd_ioctl_getdata(char **buf, int *len, void __user *arg)
-{
- struct obd_ioctl_hdr hdr;
- struct obd_ioctl_data *data;
- int err;
- int offset = 0;
-
- if (copy_from_user(&hdr, arg, sizeof(hdr)))
- return -EFAULT;
-
- if (hdr.ioc_version != OBD_IOCTL_VERSION) {
- CERROR("Version mismatch kernel (%x) vs application (%x)\n",
- OBD_IOCTL_VERSION, hdr.ioc_version);
- return -EINVAL;
- }
-
- if (hdr.ioc_len > OBD_MAX_IOCTL_BUFFER) {
- CERROR("User buffer len %d exceeds %d max buffer\n",
- hdr.ioc_len, OBD_MAX_IOCTL_BUFFER);
- return -EINVAL;
- }
-
- if (hdr.ioc_len < sizeof(struct obd_ioctl_data)) {
- CERROR("User buffer too small for ioctl (%d)\n", hdr.ioc_len);
- return -EINVAL;
- }
-
- /* When there are lots of processes calling vmalloc on multi-core
- * system, the high lock contention will hurt performance badly,
- * obdfilter-survey is an example, which relies on ioctl. So we'd
- * better avoid vmalloc on ioctl path. LU-66
- */
- *buf = kvzalloc(hdr.ioc_len, GFP_KERNEL);
- if (!*buf) {
- CERROR("Cannot allocate control buffer of len %d\n",
- hdr.ioc_len);
- return -EINVAL;
- }
- *len = hdr.ioc_len;
- data = (struct obd_ioctl_data *)*buf;
-
- if (copy_from_user(*buf, arg, hdr.ioc_len)) {
- err = -EFAULT;
- goto free_buf;
- }
- if (hdr.ioc_len != data->ioc_len) {
- err = -EINVAL;
- goto free_buf;
- }
-
- if (obd_ioctl_is_invalid(data)) {
- CERROR("ioctl not correctly formatted\n");
- err = -EINVAL;
- goto free_buf;
- }
-
- if (data->ioc_inllen1) {
- data->ioc_inlbuf1 = &data->ioc_bulk[0];
- offset += cfs_size_round(data->ioc_inllen1);
- }
-
- if (data->ioc_inllen2) {
- data->ioc_inlbuf2 = &data->ioc_bulk[0] + offset;
- offset += cfs_size_round(data->ioc_inllen2);
- }
-
- if (data->ioc_inllen3) {
- data->ioc_inlbuf3 = &data->ioc_bulk[0] + offset;
- offset += cfs_size_round(data->ioc_inllen3);
- }
-
- if (data->ioc_inllen4)
- data->ioc_inlbuf4 = &data->ioc_bulk[0] + offset;
-
- return 0;
-
-free_buf:
- kvfree(*buf);
- return err;
-}
-EXPORT_SYMBOL(obd_ioctl_getdata);
-
-/* opening /dev/obd */
-static int obd_class_open(struct inode *inode, struct file *file)
-{
- try_module_get(THIS_MODULE);
- return 0;
-}
-
-/* closing /dev/obd */
-static int obd_class_release(struct inode *inode, struct file *file)
-{
- module_put(THIS_MODULE);
- return 0;
-}
-
-/* to control /dev/obd */
-static long obd_class_ioctl(struct file *filp, unsigned int cmd,
- unsigned long arg)
-{
- int err = 0;
-
- /* Allow non-root access for OBD_IOC_PING_TARGET - used by lfs check */
- if (!capable(CAP_SYS_ADMIN) && (cmd != OBD_IOC_PING_TARGET))
- return err = -EACCES;
- if ((cmd & 0xffffff00) == ((int)'T') << 8) /* ignore all tty ioctls */
- return err = -ENOTTY;
-
- err = class_handle_ioctl(cmd, (unsigned long)arg);
-
- return err;
-}
-
-/* declare character device */
-static const struct file_operations obd_psdev_fops = {
- .owner = THIS_MODULE,
- .unlocked_ioctl = obd_class_ioctl, /* unlocked_ioctl */
- .open = obd_class_open, /* open */
- .release = obd_class_release, /* release */
-};
-
-/* modules setup */
-struct miscdevice obd_psdev = {
- .minor = MISC_DYNAMIC_MINOR,
- .name = OBD_DEV_NAME,
- .fops = &obd_psdev_fops,
-};
-
-static ssize_t version_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- return sprintf(buf, "%s\n", LUSTRE_VERSION_STRING);
-}
-
-static ssize_t pinger_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- return sprintf(buf, "%s\n", "on");
-}
-
-static ssize_t
-health_check_show(struct kobject *kobj, struct attribute *attr, char *buf)
-{
- bool healthy = true;
- int i;
- size_t len = 0;
-
- if (libcfs_catastrophe)
- return sprintf(buf, "LBUG\n");
-
- read_lock(&obd_dev_lock);
- for (i = 0; i < class_devno_max(); i++) {
- struct obd_device *obd;
-
- obd = class_num2obd(i);
- if (!obd || !obd->obd_attached || !obd->obd_set_up)
- continue;
-
- LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
- if (obd->obd_stopping)
- continue;
-
- class_incref(obd, __func__, current);
- read_unlock(&obd_dev_lock);
-
- if (obd_health_check(NULL, obd))
- healthy = false;
- class_decref(obd, __func__, current);
- read_lock(&obd_dev_lock);
- }
- read_unlock(&obd_dev_lock);
-
- if (healthy)
- len = sprintf(buf, "healthy\n");
- else
- len = sprintf(buf, "NOT HEALTHY\n");
-
- return len;
-}
-
-static ssize_t jobid_var_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%s\n", obd_jobid_var);
-}
-
-static ssize_t jobid_var_store(struct kobject *kobj, struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- if (!count || count > JOBSTATS_JOBID_VAR_MAX_LEN)
- return -EINVAL;
-
- memset(obd_jobid_var, 0, JOBSTATS_JOBID_VAR_MAX_LEN + 1);
-
- memcpy(obd_jobid_var, buffer, count);
-
- /* Trim the trailing '\n' if any */
- if (obd_jobid_var[count - 1] == '\n')
- obd_jobid_var[count - 1] = 0;
-
- return count;
-}
-
-static ssize_t jobid_name_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%s\n", obd_jobid_node);
-}
-
-static ssize_t jobid_name_store(struct kobject *kobj, struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- if (!count || count > LUSTRE_JOBID_SIZE)
- return -EINVAL;
-
- memcpy(obd_jobid_node, buffer, count);
-
- obd_jobid_node[count] = 0;
-
- /* Trim the trailing '\n' if any */
- if (obd_jobid_node[count - 1] == '\n')
- obd_jobid_node[count - 1] = 0;
-
- return count;
-}
-
-/* Root for /sys/kernel/debug/lustre */
-struct dentry *debugfs_lustre_root;
-EXPORT_SYMBOL_GPL(debugfs_lustre_root);
-
-LUSTRE_RO_ATTR(version);
-LUSTRE_RO_ATTR(pinger);
-LUSTRE_RO_ATTR(health_check);
-LUSTRE_RW_ATTR(jobid_var);
-LUSTRE_RW_ATTR(jobid_name);
-
-static struct attribute *lustre_attrs[] = {
- &lustre_attr_version.attr,
- &lustre_attr_pinger.attr,
- &lustre_attr_health_check.attr,
- &lustre_attr_jobid_name.attr,
- &lustre_attr_jobid_var.attr,
- NULL,
-};
-
-static void *obd_device_list_seq_start(struct seq_file *p, loff_t *pos)
-{
- if (*pos >= class_devno_max())
- return NULL;
-
- return pos;
-}
-
-static void obd_device_list_seq_stop(struct seq_file *p, void *v)
-{
-}
-
-static void *obd_device_list_seq_next(struct seq_file *p, void *v, loff_t *pos)
-{
- ++*pos;
- if (*pos >= class_devno_max())
- return NULL;
-
- return pos;
-}
-
-static int obd_device_list_seq_show(struct seq_file *p, void *v)
-{
- loff_t index = *(loff_t *)v;
- struct obd_device *obd = class_num2obd((int)index);
- char *status;
-
- if (!obd)
- return 0;
-
- LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
- if (obd->obd_stopping)
- status = "ST";
- else if (obd->obd_inactive)
- status = "IN";
- else if (obd->obd_set_up)
- status = "UP";
- else if (obd->obd_attached)
- status = "AT";
- else
- status = "--";
-
- seq_printf(p, "%3d %s %s %s %s %d\n",
- (int)index, status, obd->obd_type->typ_name,
- obd->obd_name, obd->obd_uuid.uuid,
- atomic_read(&obd->obd_refcount));
- return 0;
-}
-
-static const struct seq_operations obd_device_list_sops = {
- .start = obd_device_list_seq_start,
- .stop = obd_device_list_seq_stop,
- .next = obd_device_list_seq_next,
- .show = obd_device_list_seq_show,
-};
-
-static int obd_device_list_open(struct inode *inode, struct file *file)
-{
- struct seq_file *seq;
- int rc = seq_open(file, &obd_device_list_sops);
-
- if (rc)
- return rc;
-
- seq = file->private_data;
- seq->private = inode->i_private;
-
- return 0;
-}
-
-static const struct file_operations obd_device_list_fops = {
- .owner = THIS_MODULE,
- .open = obd_device_list_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-struct kobject *lustre_kobj;
-EXPORT_SYMBOL_GPL(lustre_kobj);
-
-static const struct attribute_group lustre_attr_group = {
- .attrs = lustre_attrs,
-};
-
-int class_procfs_init(void)
-{
- int rc = -ENOMEM;
-
- lustre_kobj = kobject_create_and_add("lustre", fs_kobj);
- if (!lustre_kobj)
- goto out;
-
- /* Create the files associated with this kobject */
- rc = sysfs_create_group(lustre_kobj, &lustre_attr_group);
- if (rc) {
- kobject_put(lustre_kobj);
- goto out;
- }
-
- debugfs_lustre_root = debugfs_create_dir("lustre", NULL);
-
- debugfs_create_file("devices", 0444, debugfs_lustre_root, NULL,
- &obd_device_list_fops);
-out:
- return rc;
-}
-
-int class_procfs_clean(void)
-{
- debugfs_remove_recursive(debugfs_lustre_root);
-
- debugfs_lustre_root = NULL;
-
- sysfs_remove_group(lustre_kobj, &lustre_attr_group);
- kobject_put(lustre_kobj);
-
- return 0;
-}
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
deleted file mode 100644
index e5e8687784ee..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
+++ /dev/null
@@ -1,162 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include <linux/module.h>
-#include <linux/sysctl.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/stat.h>
-#include <linux/ctype.h>
-#include <linux/bitops.h>
-#include <linux/uaccess.h>
-#include <linux/utsname.h>
-
-#define DEBUG_SUBSYSTEM S_CLASS
-
-#include <obd_support.h>
-#include <lprocfs_status.h>
-#include <obd_class.h>
-
-struct static_lustre_uintvalue_attr {
- struct {
- struct attribute attr;
- ssize_t (*show)(struct kobject *kobj, struct attribute *attr,
- char *buf);
- ssize_t (*store)(struct kobject *kobj, struct attribute *attr,
- const char *buf, size_t len);
- } u;
- int *value;
-};
-
-static ssize_t static_uintvalue_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct static_lustre_uintvalue_attr *lattr = (void *)attr;
-
- return sprintf(buf, "%d\n", *lattr->value);
-}
-
-static ssize_t static_uintvalue_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer, size_t count)
-{
- struct static_lustre_uintvalue_attr *lattr = (void *)attr;
- int rc;
- unsigned int val;
-
- rc = kstrtouint(buffer, 10, &val);
- if (rc)
- return rc;
-
- *lattr->value = val;
-
- return count;
-}
-
-#define LUSTRE_STATIC_UINT_ATTR(name, value) \
-static struct static_lustre_uintvalue_attr lustre_sattr_##name = \
- {__ATTR(name, 0644, \
- static_uintvalue_show, \
- static_uintvalue_store),\
- value }
-
-LUSTRE_STATIC_UINT_ATTR(timeout, &obd_timeout);
-
-static ssize_t max_dirty_mb_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- return sprintf(buf, "%lu\n",
- obd_max_dirty_pages / (1 << (20 - PAGE_SHIFT)));
-}
-
-static ssize_t max_dirty_mb_store(struct kobject *kobj, struct attribute *attr,
- const char *buffer, size_t count)
-{
- int rc;
- unsigned long val;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- val *= 1 << (20 - PAGE_SHIFT); /* convert to pages */
-
- if (val > ((totalram_pages / 10) * 9)) {
- /* Somebody wants to assign too much memory to dirty pages */
- return -EINVAL;
- }
-
- if (val < 4 << (20 - PAGE_SHIFT)) {
- /* Less than 4 Mb for dirty cache is also bad */
- return -EINVAL;
- }
-
- obd_max_dirty_pages = val;
-
- return count;
-}
-LUSTRE_RW_ATTR(max_dirty_mb);
-
-LUSTRE_STATIC_UINT_ATTR(debug_peer_on_timeout, &obd_debug_peer_on_timeout);
-LUSTRE_STATIC_UINT_ATTR(dump_on_timeout, &obd_dump_on_timeout);
-LUSTRE_STATIC_UINT_ATTR(dump_on_eviction, &obd_dump_on_eviction);
-LUSTRE_STATIC_UINT_ATTR(at_min, &at_min);
-LUSTRE_STATIC_UINT_ATTR(at_max, &at_max);
-LUSTRE_STATIC_UINT_ATTR(at_extra, &at_extra);
-LUSTRE_STATIC_UINT_ATTR(at_early_margin, &at_early_margin);
-LUSTRE_STATIC_UINT_ATTR(at_history, &at_history);
-
-static struct attribute *lustre_attrs[] = {
- &lustre_sattr_timeout.u.attr,
- &lustre_attr_max_dirty_mb.attr,
- &lustre_sattr_debug_peer_on_timeout.u.attr,
- &lustre_sattr_dump_on_timeout.u.attr,
- &lustre_sattr_dump_on_eviction.u.attr,
- &lustre_sattr_at_min.u.attr,
- &lustre_sattr_at_max.u.attr,
- &lustre_sattr_at_extra.u.attr,
- &lustre_sattr_at_early_margin.u.attr,
- &lustre_sattr_at_history.u.attr,
- NULL,
-};
-
-static const struct attribute_group lustre_attr_group = {
- .attrs = lustre_attrs,
-};
-
-int obd_sysctl_init(void)
-{
- return sysfs_create_group(lustre_kobj, &lustre_attr_group);
-}
diff --git a/drivers/staging/lustre/lustre/obdclass/llog.c b/drivers/staging/lustre/lustre/obdclass/llog.c
deleted file mode 100644
index bba84eae1e19..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/llog.c
+++ /dev/null
@@ -1,524 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/obdclass/llog.c
- *
- * OST<->MDS recovery logging infrastructure.
- * Invariants in implementation:
- * - we do not share logs among different OST<->MDS connections, so that
- * if an OST or MDS fails it need only look at log(s) relevant to itself
- *
- * Author: Andreas Dilger <adilger@clusterfs.com>
- * Author: Alex Zhuravlev <bzzz@whamcloud.com>
- * Author: Mikhail Pershin <tappro@whamcloud.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LOG
-
-#include <linux/kthread.h>
-#include <llog_swab.h>
-#include <lustre_log.h>
-#include <obd_class.h>
-#include "llog_internal.h"
-
-/*
- * Allocate a new log or catalog handle
- * Used inside llog_open().
- */
-static struct llog_handle *llog_alloc_handle(void)
-{
- struct llog_handle *loghandle;
-
- loghandle = kzalloc(sizeof(*loghandle), GFP_NOFS);
- if (!loghandle)
- return NULL;
-
- init_rwsem(&loghandle->lgh_lock);
- spin_lock_init(&loghandle->lgh_hdr_lock);
- INIT_LIST_HEAD(&loghandle->u.phd.phd_entry);
- atomic_set(&loghandle->lgh_refcount, 1);
-
- return loghandle;
-}
-
-/*
- * Free llog handle and header data if exists. Used in llog_close() only
- */
-static void llog_free_handle(struct llog_handle *loghandle)
-{
- /* failed llog_init_handle */
- if (!loghandle->lgh_hdr)
- goto out;
-
- if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN)
- LASSERT(list_empty(&loghandle->u.phd.phd_entry));
- else if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_CAT)
- LASSERT(list_empty(&loghandle->u.chd.chd_head));
- kvfree(loghandle->lgh_hdr);
-out:
- kfree(loghandle);
-}
-
-void llog_handle_get(struct llog_handle *loghandle)
-{
- atomic_inc(&loghandle->lgh_refcount);
-}
-
-void llog_handle_put(struct llog_handle *loghandle)
-{
- LASSERT(atomic_read(&loghandle->lgh_refcount) > 0);
- if (atomic_dec_and_test(&loghandle->lgh_refcount))
- llog_free_handle(loghandle);
-}
-
-static int llog_read_header(const struct lu_env *env,
- struct llog_handle *handle,
- struct obd_uuid *uuid)
-{
- struct llog_operations *lop;
- int rc;
-
- rc = llog_handle2ops(handle, &lop);
- if (rc)
- return rc;
-
- if (!lop->lop_read_header)
- return -EOPNOTSUPP;
-
- rc = lop->lop_read_header(env, handle);
- if (rc == LLOG_EEMPTY) {
- struct llog_log_hdr *llh = handle->lgh_hdr;
- size_t len;
-
- /* lrh_len should be initialized in llog_init_handle */
- handle->lgh_last_idx = 0; /* header is record with index 0 */
- llh->llh_count = 1; /* for the header record */
- llh->llh_hdr.lrh_type = LLOG_HDR_MAGIC;
- LASSERT(handle->lgh_ctxt->loc_chunk_size >= LLOG_MIN_CHUNK_SIZE);
- llh->llh_hdr.lrh_len = handle->lgh_ctxt->loc_chunk_size;
- llh->llh_hdr.lrh_index = 0;
- llh->llh_timestamp = ktime_get_real_seconds();
- if (uuid)
- memcpy(&llh->llh_tgtuuid, uuid,
- sizeof(llh->llh_tgtuuid));
- llh->llh_bitmap_offset = offsetof(typeof(*llh), llh_bitmap);
- /*
- * Since update llog header might also call this function,
- * let's reset the bitmap to 0 here
- */
- len = llh->llh_hdr.lrh_len - llh->llh_bitmap_offset;
- memset(LLOG_HDR_BITMAP(llh), 0, len - sizeof(llh->llh_tail));
- ext2_set_bit(0, LLOG_HDR_BITMAP(llh));
- LLOG_HDR_TAIL(llh)->lrt_len = llh->llh_hdr.lrh_len;
- LLOG_HDR_TAIL(llh)->lrt_index = llh->llh_hdr.lrh_index;
- rc = 0;
- }
- return rc;
-}
-
-int llog_init_handle(const struct lu_env *env, struct llog_handle *handle,
- int flags, struct obd_uuid *uuid)
-{
- int chunk_size = handle->lgh_ctxt->loc_chunk_size;
- enum llog_flag fmt = flags & LLOG_F_EXT_MASK;
- struct llog_log_hdr *llh;
- int rc;
-
- LASSERT(!handle->lgh_hdr);
-
- LASSERT(chunk_size >= LLOG_MIN_CHUNK_SIZE);
- llh = kvzalloc(sizeof(*llh), GFP_KERNEL);
- if (!llh)
- return -ENOMEM;
- handle->lgh_hdr = llh;
- handle->lgh_hdr_size = chunk_size;
- /* first assign flags to use llog_client_ops */
- llh->llh_flags = flags;
- rc = llog_read_header(env, handle, uuid);
- if (rc == 0) {
- if (unlikely((llh->llh_flags & LLOG_F_IS_PLAIN &&
- flags & LLOG_F_IS_CAT) ||
- (llh->llh_flags & LLOG_F_IS_CAT &&
- flags & LLOG_F_IS_PLAIN))) {
- CERROR("%s: llog type is %s but initializing %s\n",
- handle->lgh_ctxt->loc_obd->obd_name,
- llh->llh_flags & LLOG_F_IS_CAT ?
- "catalog" : "plain",
- flags & LLOG_F_IS_CAT ? "catalog" : "plain");
- rc = -EINVAL;
- goto out;
- } else if (llh->llh_flags &
- (LLOG_F_IS_PLAIN | LLOG_F_IS_CAT)) {
- /*
- * it is possible to open llog without specifying llog
- * type so it is taken from llh_flags
- */
- flags = llh->llh_flags;
- } else {
- /* for some reason the llh_flags has no type set */
- CERROR("llog type is not specified!\n");
- rc = -EINVAL;
- goto out;
- }
- if (unlikely(uuid &&
- !obd_uuid_equals(uuid, &llh->llh_tgtuuid))) {
- CERROR("%s: llog uuid mismatch: %s/%s\n",
- handle->lgh_ctxt->loc_obd->obd_name,
- (char *)uuid->uuid,
- (char *)llh->llh_tgtuuid.uuid);
- rc = -EEXIST;
- goto out;
- }
- }
- if (flags & LLOG_F_IS_CAT) {
- LASSERT(list_empty(&handle->u.chd.chd_head));
- INIT_LIST_HEAD(&handle->u.chd.chd_head);
- llh->llh_size = sizeof(struct llog_logid_rec);
- llh->llh_flags |= LLOG_F_IS_FIXSIZE;
- } else if (!(flags & LLOG_F_IS_PLAIN)) {
- CERROR("%s: unknown flags: %#x (expected %#x or %#x)\n",
- handle->lgh_ctxt->loc_obd->obd_name,
- flags, LLOG_F_IS_CAT, LLOG_F_IS_PLAIN);
- rc = -EINVAL;
- }
- llh->llh_flags |= fmt;
-out:
- if (rc) {
- kvfree(llh);
- handle->lgh_hdr = NULL;
- }
- return rc;
-}
-EXPORT_SYMBOL(llog_init_handle);
-
-static int llog_process_thread(void *arg)
-{
- struct llog_process_info *lpi = arg;
- struct llog_handle *loghandle = lpi->lpi_loghandle;
- struct llog_log_hdr *llh = loghandle->lgh_hdr;
- struct llog_process_cat_data *cd = lpi->lpi_catdata;
- char *buf;
- u64 cur_offset, tmp_offset;
- int chunk_size;
- int rc = 0, index = 1, last_index;
- int saved_index = 0;
- int last_called_index = 0;
-
- if (!llh)
- return -EINVAL;
-
- cur_offset = llh->llh_hdr.lrh_len;
- chunk_size = llh->llh_hdr.lrh_len;
- /* expect chunk_size to be power of two */
- LASSERT(is_power_of_2(chunk_size));
-
- buf = kvzalloc(chunk_size, GFP_NOFS);
- if (!buf) {
- lpi->lpi_rc = -ENOMEM;
- return 0;
- }
-
- if (cd) {
- last_called_index = cd->lpcd_first_idx;
- index = cd->lpcd_first_idx + 1;
- }
- if (cd && cd->lpcd_last_idx)
- last_index = cd->lpcd_last_idx;
- else
- last_index = LLOG_HDR_BITMAP_SIZE(llh) - 1;
-
- while (rc == 0) {
- unsigned int buf_offset = 0;
- struct llog_rec_hdr *rec;
- bool partial_chunk;
- off_t chunk_offset;
-
- /* skip records not set in bitmap */
- while (index <= last_index &&
- !ext2_test_bit(index, LLOG_HDR_BITMAP(llh)))
- ++index;
-
- if (index > last_index)
- break;
-
- CDEBUG(D_OTHER, "index: %d last_index %d\n",
- index, last_index);
-repeat:
- /* get the buf with our target record; avoid old garbage */
- memset(buf, 0, chunk_size);
- rc = llog_next_block(lpi->lpi_env, loghandle, &saved_index,
- index, &cur_offset, buf, chunk_size);
- if (rc)
- goto out;
-
- /*
- * NB: after llog_next_block() call the cur_offset is the
- * offset of the next block after read one.
- * The absolute offset of the current chunk is calculated
- * from cur_offset value and stored in chunk_offset variable.
- */
- tmp_offset = cur_offset;
- if (do_div(tmp_offset, chunk_size)) {
- partial_chunk = true;
- chunk_offset = cur_offset & ~(chunk_size - 1);
- } else {
- partial_chunk = false;
- chunk_offset = cur_offset - chunk_size;
- }
-
- /* NB: when rec->lrh_len is accessed it is already swabbed
- * since it is used at the "end" of the loop and the rec
- * swabbing is done at the beginning of the loop.
- */
- for (rec = (struct llog_rec_hdr *)(buf + buf_offset);
- (char *)rec < buf + chunk_size;
- rec = llog_rec_hdr_next(rec)) {
- CDEBUG(D_OTHER, "processing rec 0x%p type %#x\n",
- rec, rec->lrh_type);
-
- if (LLOG_REC_HDR_NEEDS_SWABBING(rec))
- lustre_swab_llog_rec(rec);
-
- CDEBUG(D_OTHER, "after swabbing, type=%#x idx=%d\n",
- rec->lrh_type, rec->lrh_index);
-
- /*
- * for partial chunk the end of it is zeroed, check
- * for index 0 to distinguish it.
- */
- if (partial_chunk && !rec->lrh_index) {
- /* concurrent llog_add() might add new records
- * while llog_processing, check this is not
- * the case and re-read the current chunk
- * otherwise.
- */
- if (index > loghandle->lgh_last_idx) {
- rc = 0;
- goto out;
- }
- CDEBUG(D_OTHER, "Re-read last llog buffer for new records, index %u, last %u\n",
- index, loghandle->lgh_last_idx);
- /* save offset inside buffer for the re-read */
- buf_offset = (char *)rec - (char *)buf;
- cur_offset = chunk_offset;
- goto repeat;
- }
-
- if (!rec->lrh_len || rec->lrh_len > chunk_size) {
- CWARN("invalid length %d in llog record for index %d/%d\n",
- rec->lrh_len,
- rec->lrh_index, index);
- rc = -EINVAL;
- goto out;
- }
-
- if (rec->lrh_index < index) {
- CDEBUG(D_OTHER, "skipping lrh_index %d\n",
- rec->lrh_index);
- continue;
- }
-
- if (rec->lrh_index != index) {
- CERROR("%s: Invalid record: index %u but expected %u\n",
- loghandle->lgh_ctxt->loc_obd->obd_name,
- rec->lrh_index, index);
- rc = -ERANGE;
- goto out;
- }
-
- CDEBUG(D_OTHER,
- "lrh_index: %d lrh_len: %d (%d remains)\n",
- rec->lrh_index, rec->lrh_len,
- (int)(buf + chunk_size - (char *)rec));
-
- loghandle->lgh_cur_idx = rec->lrh_index;
- loghandle->lgh_cur_offset = (char *)rec - (char *)buf +
- chunk_offset;
-
- /* if set, process the callback on this record */
- if (ext2_test_bit(index, LLOG_HDR_BITMAP(llh))) {
- rc = lpi->lpi_cb(lpi->lpi_env, loghandle, rec,
- lpi->lpi_cbdata);
- last_called_index = index;
- if (rc)
- goto out;
- }
-
- /* exit if the last index is reached */
- if (index >= last_index) {
- rc = 0;
- goto out;
- }
- index++;
- }
- }
-
-out:
- if (cd)
- cd->lpcd_last_idx = last_called_index;
-
- kvfree(buf);
- lpi->lpi_rc = rc;
- return 0;
-}
-
-static int llog_process_thread_daemonize(void *arg)
-{
- struct llog_process_info *lpi = arg;
- struct lu_env env;
- int rc;
-
- unshare_fs_struct();
-
- /* client env has no keys, tags is just 0 */
- rc = lu_env_init(&env, LCT_LOCAL | LCT_MG_THREAD);
- if (rc)
- goto out;
- lpi->lpi_env = &env;
-
- rc = llog_process_thread(arg);
-
- lu_env_fini(&env);
-out:
- complete(&lpi->lpi_completion);
- return rc;
-}
-
-int llog_process_or_fork(const struct lu_env *env,
- struct llog_handle *loghandle,
- llog_cb_t cb, void *data, void *catdata, bool fork)
-{
- struct llog_process_info *lpi;
- int rc;
-
- lpi = kzalloc(sizeof(*lpi), GFP_NOFS);
- if (!lpi)
- return -ENOMEM;
- lpi->lpi_loghandle = loghandle;
- lpi->lpi_cb = cb;
- lpi->lpi_cbdata = data;
- lpi->lpi_catdata = catdata;
-
- if (fork) {
- struct task_struct *task;
-
- /* The new thread can't use parent env,
- * init the new one in llog_process_thread_daemonize.
- */
- lpi->lpi_env = NULL;
- init_completion(&lpi->lpi_completion);
- task = kthread_run(llog_process_thread_daemonize, lpi,
- "llog_process_thread");
- if (IS_ERR(task)) {
- rc = PTR_ERR(task);
- CERROR("%s: cannot start thread: rc = %d\n",
- loghandle->lgh_ctxt->loc_obd->obd_name, rc);
- goto out_lpi;
- }
- wait_for_completion(&lpi->lpi_completion);
- } else {
- lpi->lpi_env = env;
- llog_process_thread(lpi);
- }
- rc = lpi->lpi_rc;
-out_lpi:
- kfree(lpi);
- return rc;
-}
-EXPORT_SYMBOL(llog_process_or_fork);
-
-int llog_process(const struct lu_env *env, struct llog_handle *loghandle,
- llog_cb_t cb, void *data, void *catdata)
-{
- return llog_process_or_fork(env, loghandle, cb, data, catdata, true);
-}
-EXPORT_SYMBOL(llog_process);
-
-int llog_open(const struct lu_env *env, struct llog_ctxt *ctxt,
- struct llog_handle **lgh, struct llog_logid *logid,
- char *name, enum llog_open_param open_param)
-{
- const struct cred *old_cred = NULL;
- int rc;
-
- LASSERT(ctxt);
- LASSERT(ctxt->loc_logops);
-
- if (!ctxt->loc_logops->lop_open) {
- *lgh = NULL;
- return -EOPNOTSUPP;
- }
-
- *lgh = llog_alloc_handle();
- if (!*lgh)
- return -ENOMEM;
- (*lgh)->lgh_ctxt = ctxt;
- (*lgh)->lgh_logops = ctxt->loc_logops;
-
- if (cap_raised(current_cap(), CAP_SYS_RESOURCE)) {
- struct cred *cred = prepare_creds();
-
- if (cred) {
- cap_raise(cred->cap_effective, CAP_SYS_RESOURCE);
- old_cred = override_creds(cred);
- }
- }
- rc = ctxt->loc_logops->lop_open(env, *lgh, logid, name, open_param);
- if (old_cred)
- revert_creds(old_cred);
-
- if (rc) {
- llog_free_handle(*lgh);
- *lgh = NULL;
- }
- return rc;
-}
-EXPORT_SYMBOL(llog_open);
-
-int llog_close(const struct lu_env *env, struct llog_handle *loghandle)
-{
- struct llog_operations *lop;
- int rc;
-
- rc = llog_handle2ops(loghandle, &lop);
- if (rc)
- goto out;
- if (!lop->lop_close) {
- rc = -EOPNOTSUPP;
- goto out;
- }
- rc = lop->lop_close(env, loghandle);
-out:
- llog_handle_put(loghandle);
- return rc;
-}
-EXPORT_SYMBOL(llog_close);
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_cat.c b/drivers/staging/lustre/lustre/obdclass/llog_cat.c
deleted file mode 100644
index d9c63adff206..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/llog_cat.c
+++ /dev/null
@@ -1,236 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/obdclass/llog_cat.c
- *
- * OST<->MDS recovery logging infrastructure.
- *
- * Invariants in implementation:
- * - we do not share logs among different OST<->MDS connections, so that
- * if an OST or MDS fails it need only look at log(s) relevant to itself
- *
- * Author: Andreas Dilger <adilger@clusterfs.com>
- * Author: Alexey Zhuravlev <alexey.zhuravlev@intel.com>
- * Author: Mikhail Pershin <mike.pershin@intel.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LOG
-
-#include <obd_class.h>
-
-#include "llog_internal.h"
-
-/* Open an existent log handle and add it to the open list.
- * This log handle will be closed when all of the records in it are removed.
- *
- * Assumes caller has already pushed us into the kernel context and is locking.
- * We return a lock on the handle to ensure nobody yanks it from us.
- *
- * This takes extra reference on llog_handle via llog_handle_get() and require
- * this reference to be put by caller using llog_handle_put()
- */
-static int llog_cat_id2handle(const struct lu_env *env,
- struct llog_handle *cathandle,
- struct llog_handle **res,
- struct llog_logid *logid)
-{
- struct llog_handle *loghandle;
- enum llog_flag fmt;
- int rc = 0;
-
- if (!cathandle)
- return -EBADF;
-
- fmt = cathandle->lgh_hdr->llh_flags & LLOG_F_EXT_MASK;
- down_write(&cathandle->lgh_lock);
- list_for_each_entry(loghandle, &cathandle->u.chd.chd_head,
- u.phd.phd_entry) {
- struct llog_logid *cgl = &loghandle->lgh_id;
-
- if (ostid_id(&cgl->lgl_oi) == ostid_id(&logid->lgl_oi) &&
- ostid_seq(&cgl->lgl_oi) == ostid_seq(&logid->lgl_oi)) {
- if (cgl->lgl_ogen != logid->lgl_ogen) {
- CERROR("%s: log " DOSTID " generation %x != %x\n",
- loghandle->lgh_ctxt->loc_obd->obd_name,
- POSTID(&logid->lgl_oi), cgl->lgl_ogen,
- logid->lgl_ogen);
- continue;
- }
- loghandle->u.phd.phd_cat_handle = cathandle;
- up_write(&cathandle->lgh_lock);
- rc = 0;
- goto out;
- }
- }
- up_write(&cathandle->lgh_lock);
-
- rc = llog_open(env, cathandle->lgh_ctxt, &loghandle, logid, NULL,
- LLOG_OPEN_EXISTS);
- if (rc < 0) {
- CERROR("%s: error opening log id " DOSTID ":%x: rc = %d\n",
- cathandle->lgh_ctxt->loc_obd->obd_name,
- POSTID(&logid->lgl_oi), logid->lgl_ogen, rc);
- return rc;
- }
-
- rc = llog_init_handle(env, loghandle, fmt | LLOG_F_IS_PLAIN, NULL);
- if (rc < 0) {
- llog_close(env, loghandle);
- loghandle = NULL;
- return rc;
- }
-
- down_write(&cathandle->lgh_lock);
- list_add_tail(&loghandle->u.phd.phd_entry, &cathandle->u.chd.chd_head);
- up_write(&cathandle->lgh_lock);
-
- loghandle->u.phd.phd_cat_handle = cathandle;
- loghandle->u.phd.phd_cookie.lgc_lgl = cathandle->lgh_id;
- loghandle->u.phd.phd_cookie.lgc_index =
- loghandle->lgh_hdr->llh_cat_idx;
-out:
- llog_handle_get(loghandle);
- *res = loghandle;
- return 0;
-}
-
-int llog_cat_close(const struct lu_env *env, struct llog_handle *cathandle)
-{
- struct llog_handle *loghandle, *n;
-
- list_for_each_entry_safe(loghandle, n, &cathandle->u.chd.chd_head,
- u.phd.phd_entry) {
- /* unlink open-not-created llogs */
- list_del_init(&loghandle->u.phd.phd_entry);
- llog_close(env, loghandle);
- }
- /* if handle was stored in ctxt, remove it too */
- if (cathandle->lgh_ctxt->loc_handle == cathandle)
- cathandle->lgh_ctxt->loc_handle = NULL;
- return llog_close(env, cathandle);
-}
-EXPORT_SYMBOL(llog_cat_close);
-
-static int llog_cat_process_cb(const struct lu_env *env,
- struct llog_handle *cat_llh,
- struct llog_rec_hdr *rec, void *data)
-{
- struct llog_process_data *d = data;
- struct llog_logid_rec *lir = (struct llog_logid_rec *)rec;
- struct llog_handle *llh;
- int rc;
-
- if (rec->lrh_type != LLOG_LOGID_MAGIC) {
- CERROR("invalid record in catalog\n");
- return -EINVAL;
- }
- CDEBUG(D_HA, "processing log " DOSTID ":%x at index %u of catalog "
- DOSTID "\n", POSTID(&lir->lid_id.lgl_oi), lir->lid_id.lgl_ogen,
- rec->lrh_index, POSTID(&cat_llh->lgh_id.lgl_oi));
-
- rc = llog_cat_id2handle(env, cat_llh, &llh, &lir->lid_id);
- if (rc) {
- CERROR("%s: cannot find handle for llog " DOSTID ": %d\n",
- cat_llh->lgh_ctxt->loc_obd->obd_name,
- POSTID(&lir->lid_id.lgl_oi), rc);
- return rc;
- }
-
- if (rec->lrh_index < d->lpd_startcat)
- /* Skip processing of the logs until startcat */
- rc = 0;
- else if (d->lpd_startidx > 0) {
- struct llog_process_cat_data cd;
-
- cd.lpcd_first_idx = d->lpd_startidx;
- cd.lpcd_last_idx = 0;
- rc = llog_process_or_fork(env, llh, d->lpd_cb, d->lpd_data,
- &cd, false);
- /* Continue processing the next log from idx 0 */
- d->lpd_startidx = 0;
- } else {
- rc = llog_process_or_fork(env, llh, d->lpd_cb, d->lpd_data,
- NULL, false);
- }
-
- llog_handle_put(llh);
-
- return rc;
-}
-
-static int llog_cat_process_or_fork(const struct lu_env *env,
- struct llog_handle *cat_llh,
- llog_cb_t cb, void *data, int startcat,
- int startidx, bool fork)
-{
- struct llog_process_data d;
- struct llog_log_hdr *llh = cat_llh->lgh_hdr;
- int rc;
-
- LASSERT(llh->llh_flags & LLOG_F_IS_CAT);
- d.lpd_data = data;
- d.lpd_cb = cb;
- d.lpd_startcat = startcat;
- d.lpd_startidx = startidx;
-
- if (llh->llh_cat_idx > cat_llh->lgh_last_idx) {
- struct llog_process_cat_data cd;
-
- CWARN("catlog " DOSTID " crosses index zero\n",
- POSTID(&cat_llh->lgh_id.lgl_oi));
-
- cd.lpcd_first_idx = llh->llh_cat_idx;
- cd.lpcd_last_idx = 0;
- rc = llog_process_or_fork(env, cat_llh, llog_cat_process_cb,
- &d, &cd, fork);
- if (rc != 0)
- return rc;
-
- cd.lpcd_first_idx = 0;
- cd.lpcd_last_idx = cat_llh->lgh_last_idx;
- rc = llog_process_or_fork(env, cat_llh, llog_cat_process_cb,
- &d, &cd, fork);
- } else {
- rc = llog_process_or_fork(env, cat_llh, llog_cat_process_cb,
- &d, NULL, fork);
- }
-
- return rc;
-}
-
-int llog_cat_process(const struct lu_env *env, struct llog_handle *cat_llh,
- llog_cb_t cb, void *data, int startcat, int startidx)
-{
- return llog_cat_process_or_fork(env, cat_llh, cb, data, startcat,
- startidx, false);
-}
-EXPORT_SYMBOL(llog_cat_process);
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_internal.h b/drivers/staging/lustre/lustre/obdclass/llog_internal.h
deleted file mode 100644
index 4991d4e589dc..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/llog_internal.h
+++ /dev/null
@@ -1,79 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef __LLOG_INTERNAL_H__
-#define __LLOG_INTERNAL_H__
-
-#include <lustre_log.h>
-
-struct llog_process_info {
- struct llog_handle *lpi_loghandle;
- llog_cb_t lpi_cb;
- void *lpi_cbdata;
- void *lpi_catdata;
- int lpi_rc;
- struct completion lpi_completion;
- const struct lu_env *lpi_env;
-
-};
-
-struct llog_thread_info {
- struct lu_attr lgi_attr;
- struct lu_fid lgi_fid;
- struct lu_buf lgi_buf;
- loff_t lgi_off;
- struct llog_rec_hdr lgi_lrh;
- struct llog_rec_tail lgi_tail;
-};
-
-extern struct lu_context_key llog_thread_key;
-
-int llog_info_init(void);
-void llog_info_fini(void);
-
-void llog_handle_get(struct llog_handle *loghandle);
-void llog_handle_put(struct llog_handle *loghandle);
-int class_config_dump_handler(const struct lu_env *env,
- struct llog_handle *handle,
- struct llog_rec_hdr *rec, void *data);
-int llog_process_or_fork(const struct lu_env *env,
- struct llog_handle *loghandle,
- llog_cb_t cb, void *data, void *catdata, bool fork);
-int llog_cat_cleanup(const struct lu_env *env, struct llog_handle *cathandle,
- struct llog_handle *loghandle, int index);
-
-static inline struct llog_rec_hdr *llog_rec_hdr_next(struct llog_rec_hdr *rec)
-{
- return (struct llog_rec_hdr *)((char *)rec + rec->lrh_len);
-}
-#endif
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_obd.c b/drivers/staging/lustre/lustre/obdclass/llog_obd.c
deleted file mode 100644
index 26aea114a29b..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/llog_obd.c
+++ /dev/null
@@ -1,225 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LOG
-
-#include <obd_class.h>
-#include <lustre_log.h>
-#include "llog_internal.h"
-
-/* helper functions for calling the llog obd methods */
-static struct llog_ctxt *llog_new_ctxt(struct obd_device *obd)
-{
- struct llog_ctxt *ctxt;
-
- ctxt = kzalloc(sizeof(*ctxt), GFP_NOFS);
- if (!ctxt)
- return NULL;
-
- ctxt->loc_obd = obd;
- atomic_set(&ctxt->loc_refcount, 1);
-
- return ctxt;
-}
-
-static void llog_ctxt_destroy(struct llog_ctxt *ctxt)
-{
- if (ctxt->loc_exp) {
- class_export_put(ctxt->loc_exp);
- ctxt->loc_exp = NULL;
- }
- if (ctxt->loc_imp) {
- class_import_put(ctxt->loc_imp);
- ctxt->loc_imp = NULL;
- }
- kfree(ctxt);
-}
-
-int __llog_ctxt_put(const struct lu_env *env, struct llog_ctxt *ctxt)
-{
- struct obd_llog_group *olg = ctxt->loc_olg;
- struct obd_device *obd;
- int rc = 0;
-
- spin_lock(&olg->olg_lock);
- if (!atomic_dec_and_test(&ctxt->loc_refcount)) {
- spin_unlock(&olg->olg_lock);
- return rc;
- }
- olg->olg_ctxts[ctxt->loc_idx] = NULL;
- spin_unlock(&olg->olg_lock);
-
- obd = ctxt->loc_obd;
- spin_lock(&obd->obd_dev_lock);
- /* sync with llog ctxt user thread */
- spin_unlock(&obd->obd_dev_lock);
-
- /* obd->obd_starting is needed for the case of cleanup
- * in error case while obd is starting up.
- */
- LASSERTF(obd->obd_starting == 1 ||
- obd->obd_stopping == 1 || obd->obd_set_up == 0,
- "wrong obd state: %d/%d/%d\n", !!obd->obd_starting,
- !!obd->obd_stopping, !!obd->obd_set_up);
-
- /* cleanup the llog ctxt here */
- if (CTXTP(ctxt, cleanup))
- rc = CTXTP(ctxt, cleanup)(env, ctxt);
-
- llog_ctxt_destroy(ctxt);
- wake_up(&olg->olg_waitq);
- return rc;
-}
-EXPORT_SYMBOL(__llog_ctxt_put);
-
-int llog_cleanup(const struct lu_env *env, struct llog_ctxt *ctxt)
-{
- struct obd_llog_group *olg;
- int rc, idx;
-
- olg = ctxt->loc_olg;
- LASSERT(olg);
- LASSERT(olg != LP_POISON);
-
- idx = ctxt->loc_idx;
-
- /*
- * Banlance the ctxt get when calling llog_cleanup()
- */
- LASSERT(atomic_read(&ctxt->loc_refcount) < LI_POISON);
- LASSERT(atomic_read(&ctxt->loc_refcount) > 1);
- llog_ctxt_put(ctxt);
-
- /*
- * Try to free the ctxt.
- */
- rc = __llog_ctxt_put(env, ctxt);
- if (rc)
- CERROR("Error %d while cleaning up ctxt %p\n",
- rc, ctxt);
-
- l_wait_event_abortable(olg->olg_waitq,
- llog_group_ctxt_null(olg, idx));
-
- return rc;
-}
-EXPORT_SYMBOL(llog_cleanup);
-
-int llog_setup(const struct lu_env *env, struct obd_device *obd,
- struct obd_llog_group *olg, int index,
- struct obd_device *disk_obd, struct llog_operations *op)
-{
- struct llog_ctxt *ctxt;
- int rc = 0;
-
- if (index < 0 || index >= LLOG_MAX_CTXTS)
- return -EINVAL;
-
- LASSERT(olg);
-
- ctxt = llog_new_ctxt(obd);
- if (!ctxt)
- return -ENOMEM;
-
- ctxt->loc_obd = obd;
- ctxt->loc_olg = olg;
- ctxt->loc_idx = index;
- ctxt->loc_logops = op;
- mutex_init(&ctxt->loc_mutex);
- ctxt->loc_exp = class_export_get(disk_obd->obd_self_export);
- ctxt->loc_flags = LLOG_CTXT_FLAG_UNINITIALIZED;
- ctxt->loc_chunk_size = LLOG_MIN_CHUNK_SIZE;
-
- rc = llog_group_set_ctxt(olg, ctxt, index);
- if (rc) {
- llog_ctxt_destroy(ctxt);
- if (rc == -EEXIST) {
- ctxt = llog_group_get_ctxt(olg, index);
- if (ctxt) {
- /*
- * mds_lov_update_desc() might call here multiple
- * times. So if the llog is already set up then
- * don't to do it again.
- */
- CDEBUG(D_CONFIG, "obd %s ctxt %d already set up\n",
- obd->obd_name, index);
- LASSERT(ctxt->loc_olg == olg);
- LASSERT(ctxt->loc_obd == obd);
- LASSERT(ctxt->loc_exp == disk_obd->obd_self_export);
- LASSERT(ctxt->loc_logops == op);
- llog_ctxt_put(ctxt);
- }
- rc = 0;
- }
- return rc;
- }
-
- if (op->lop_setup) {
- if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LLOG_SETUP))
- rc = -EOPNOTSUPP;
- else
- rc = op->lop_setup(env, obd, olg, index, disk_obd);
- }
-
- if (rc) {
- CERROR("%s: ctxt %d lop_setup=%p failed: rc = %d\n",
- obd->obd_name, index, op->lop_setup, rc);
- llog_group_clear_ctxt(olg, index);
- llog_ctxt_destroy(ctxt);
- } else {
- CDEBUG(D_CONFIG, "obd %s ctxt %d is initialized\n",
- obd->obd_name, index);
- ctxt->loc_flags &= ~LLOG_CTXT_FLAG_UNINITIALIZED;
- }
-
- return rc;
-}
-EXPORT_SYMBOL(llog_setup);
-
-/* context key constructor/destructor: llog_key_init, llog_key_fini */
-LU_KEY_INIT_FINI(llog, struct llog_thread_info);
-/* context key: llog_thread_key */
-LU_CONTEXT_KEY_DEFINE(llog, LCT_MD_THREAD | LCT_MG_THREAD | LCT_LOCAL);
-LU_KEY_INIT_GENERIC(llog);
-
-int llog_info_init(void)
-{
- llog_key_init_generic(&llog_thread_key, NULL);
- lu_context_key_register(&llog_thread_key);
- return 0;
-}
-
-void llog_info_fini(void)
-{
- lu_context_key_degister(&llog_thread_key);
-}
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_swab.c b/drivers/staging/lustre/lustre/obdclass/llog_swab.c
deleted file mode 100644
index b431c3408fe4..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/llog_swab.c
+++ /dev/null
@@ -1,412 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/obdclass/llog_swab.c
- *
- * Swabbing of llog datatypes (from disk or over the wire).
- *
- * Author: jacob berkman <jacob@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LOG
-
-#include <llog_swab.h>
-#include <lustre_log.h>
-
-static void print_llogd_body(struct llogd_body *d)
-{
- CDEBUG(D_OTHER, "llogd body: %p\n", d);
- CDEBUG(D_OTHER, "\tlgd_logid.lgl_oi: " DOSTID "\n",
- POSTID(&d->lgd_logid.lgl_oi));
- CDEBUG(D_OTHER, "\tlgd_logid.lgl_ogen: %#x\n", d->lgd_logid.lgl_ogen);
- CDEBUG(D_OTHER, "\tlgd_ctxt_idx: %#x\n", d->lgd_ctxt_idx);
- CDEBUG(D_OTHER, "\tlgd_llh_flags: %#x\n", d->lgd_llh_flags);
- CDEBUG(D_OTHER, "\tlgd_index: %#x\n", d->lgd_index);
- CDEBUG(D_OTHER, "\tlgd_saved_index: %#x\n", d->lgd_saved_index);
- CDEBUG(D_OTHER, "\tlgd_len: %#x\n", d->lgd_len);
- CDEBUG(D_OTHER, "\tlgd_cur_offset: %#llx\n", d->lgd_cur_offset);
-}
-
-void lustre_swab_lu_fid(struct lu_fid *fid)
-{
- __swab64s(&fid->f_seq);
- __swab32s(&fid->f_oid);
- __swab32s(&fid->f_ver);
-}
-EXPORT_SYMBOL(lustre_swab_lu_fid);
-
-void lustre_swab_ost_id(struct ost_id *oid)
-{
- if (fid_seq_is_mdt0(oid->oi.oi_seq)) {
- __swab64s(&oid->oi.oi_id);
- __swab64s(&oid->oi.oi_seq);
- } else {
- lustre_swab_lu_fid(&oid->oi_fid);
- }
-}
-EXPORT_SYMBOL(lustre_swab_ost_id);
-
-static void lustre_swab_llog_id(struct llog_logid *log_id)
-{
- __swab64s(&log_id->lgl_oi.oi.oi_id);
- __swab64s(&log_id->lgl_oi.oi.oi_seq);
- __swab32s(&log_id->lgl_ogen);
-}
-
-void lustre_swab_llogd_body(struct llogd_body *d)
-{
- print_llogd_body(d);
- lustre_swab_llog_id(&d->lgd_logid);
- __swab32s(&d->lgd_ctxt_idx);
- __swab32s(&d->lgd_llh_flags);
- __swab32s(&d->lgd_index);
- __swab32s(&d->lgd_saved_index);
- __swab32s(&d->lgd_len);
- __swab64s(&d->lgd_cur_offset);
- print_llogd_body(d);
-}
-EXPORT_SYMBOL(lustre_swab_llogd_body);
-
-void lustre_swab_llogd_conn_body(struct llogd_conn_body *d)
-{
- __swab64s(&d->lgdc_gen.mnt_cnt);
- __swab64s(&d->lgdc_gen.conn_cnt);
- lustre_swab_llog_id(&d->lgdc_logid);
- __swab32s(&d->lgdc_ctxt_idx);
-}
-EXPORT_SYMBOL(lustre_swab_llogd_conn_body);
-
-static void lustre_swab_ll_fid(struct ll_fid *fid)
-{
- __swab64s(&fid->id);
- __swab32s(&fid->generation);
- __swab32s(&fid->f_type);
-}
-
-void lustre_swab_lu_seq_range(struct lu_seq_range *range)
-{
- __swab64s(&range->lsr_start);
- __swab64s(&range->lsr_end);
- __swab32s(&range->lsr_index);
- __swab32s(&range->lsr_flags);
-}
-EXPORT_SYMBOL(lustre_swab_lu_seq_range);
-
-void lustre_swab_llog_rec(struct llog_rec_hdr *rec)
-{
- struct llog_rec_tail *tail = NULL;
-
- __swab32s(&rec->lrh_len);
- __swab32s(&rec->lrh_index);
- __swab32s(&rec->lrh_type);
- __swab32s(&rec->lrh_id);
-
- switch (rec->lrh_type) {
- case OST_SZ_REC:
- {
- struct llog_size_change_rec *lsc =
- (struct llog_size_change_rec *)rec;
-
- lustre_swab_ll_fid(&lsc->lsc_fid);
- __swab32s(&lsc->lsc_ioepoch);
- tail = &lsc->lsc_tail;
- break;
- }
- case MDS_UNLINK_REC:
- {
- struct llog_unlink_rec *lur = (struct llog_unlink_rec *)rec;
-
- __swab64s(&lur->lur_oid);
- __swab32s(&lur->lur_oseq);
- __swab32s(&lur->lur_count);
- tail = &lur->lur_tail;
- break;
- }
- case MDS_UNLINK64_REC:
- {
- struct llog_unlink64_rec *lur =
- (struct llog_unlink64_rec *)rec;
-
- lustre_swab_lu_fid(&lur->lur_fid);
- __swab32s(&lur->lur_count);
- tail = &lur->lur_tail;
- break;
- }
- case CHANGELOG_REC:
- {
- struct llog_changelog_rec *cr =
- (struct llog_changelog_rec *)rec;
-
- __swab16s(&cr->cr.cr_namelen);
- __swab16s(&cr->cr.cr_flags);
- __swab32s(&cr->cr.cr_type);
- __swab64s(&cr->cr.cr_index);
- __swab64s(&cr->cr.cr_prev);
- __swab64s(&cr->cr.cr_time);
- lustre_swab_lu_fid(&cr->cr.cr_tfid);
- lustre_swab_lu_fid(&cr->cr.cr_pfid);
- if (cr->cr.cr_flags & CLF_RENAME) {
- struct changelog_ext_rename *rnm =
- changelog_rec_rename(&cr->cr);
-
- lustre_swab_lu_fid(&rnm->cr_sfid);
- lustre_swab_lu_fid(&rnm->cr_spfid);
- }
- /*
- * Because the tail follows a variable-length structure we need
- * to compute its location at runtime
- */
- tail = (struct llog_rec_tail *)((char *)&cr->cr +
- changelog_rec_size(&cr->cr) +
- cr->cr.cr_namelen);
- break;
- }
-
- case CHANGELOG_USER_REC:
- {
- struct llog_changelog_user_rec *cur =
- (struct llog_changelog_user_rec *)rec;
-
- __swab32s(&cur->cur_id);
- __swab64s(&cur->cur_endrec);
- tail = &cur->cur_tail;
- break;
- }
-
- case HSM_AGENT_REC: {
- struct llog_agent_req_rec *arr =
- (struct llog_agent_req_rec *)rec;
-
- __swab32s(&arr->arr_hai.hai_len);
- __swab32s(&arr->arr_hai.hai_action);
- lustre_swab_lu_fid(&arr->arr_hai.hai_fid);
- lustre_swab_lu_fid(&arr->arr_hai.hai_dfid);
- __swab64s(&arr->arr_hai.hai_cookie);
- __swab64s(&arr->arr_hai.hai_extent.offset);
- __swab64s(&arr->arr_hai.hai_extent.length);
- __swab64s(&arr->arr_hai.hai_gid);
- /* no swabing for opaque data */
- /* hai_data[0]; */
- break;
- }
-
- case MDS_SETATTR64_REC:
- {
- struct llog_setattr64_rec *lsr =
- (struct llog_setattr64_rec *)rec;
-
- lustre_swab_ost_id(&lsr->lsr_oi);
- __swab32s(&lsr->lsr_uid);
- __swab32s(&lsr->lsr_uid_h);
- __swab32s(&lsr->lsr_gid);
- __swab32s(&lsr->lsr_gid_h);
- __swab64s(&lsr->lsr_valid);
- tail = &lsr->lsr_tail;
- break;
- }
- case OBD_CFG_REC:
- /* these are swabbed as they are consumed */
- break;
- case LLOG_HDR_MAGIC:
- {
- struct llog_log_hdr *llh = (struct llog_log_hdr *)rec;
-
- __swab64s(&llh->llh_timestamp);
- __swab32s(&llh->llh_count);
- __swab32s(&llh->llh_bitmap_offset);
- __swab32s(&llh->llh_flags);
- __swab32s(&llh->llh_size);
- __swab32s(&llh->llh_cat_idx);
- tail = LLOG_HDR_TAIL(llh);
- break;
- }
- case LLOG_LOGID_MAGIC:
- {
- struct llog_logid_rec *lid = (struct llog_logid_rec *)rec;
-
- lustre_swab_llog_id(&lid->lid_id);
- tail = &lid->lid_tail;
- break;
- }
- case LLOG_GEN_REC:
- {
- struct llog_gen_rec *lgr = (struct llog_gen_rec *)rec;
-
- __swab64s(&lgr->lgr_gen.mnt_cnt);
- __swab64s(&lgr->lgr_gen.conn_cnt);
- tail = &lgr->lgr_tail;
- break;
- }
- case LLOG_PAD_MAGIC:
- break;
- default:
- CERROR("Unknown llog rec type %#x swabbing rec %p\n",
- rec->lrh_type, rec);
- }
-
- if (tail) {
- __swab32s(&tail->lrt_len);
- __swab32s(&tail->lrt_index);
- }
-}
-EXPORT_SYMBOL(lustre_swab_llog_rec);
-
-static void print_llog_hdr(struct llog_log_hdr *h)
-{
- CDEBUG(D_OTHER, "llog header: %p\n", h);
- CDEBUG(D_OTHER, "\tllh_hdr.lrh_index: %#x\n", h->llh_hdr.lrh_index);
- CDEBUG(D_OTHER, "\tllh_hdr.lrh_len: %#x\n", h->llh_hdr.lrh_len);
- CDEBUG(D_OTHER, "\tllh_hdr.lrh_type: %#x\n", h->llh_hdr.lrh_type);
- CDEBUG(D_OTHER, "\tllh_timestamp: %#llx\n", h->llh_timestamp);
- CDEBUG(D_OTHER, "\tllh_count: %#x\n", h->llh_count);
- CDEBUG(D_OTHER, "\tllh_bitmap_offset: %#x\n", h->llh_bitmap_offset);
- CDEBUG(D_OTHER, "\tllh_flags: %#x\n", h->llh_flags);
- CDEBUG(D_OTHER, "\tllh_size: %#x\n", h->llh_size);
- CDEBUG(D_OTHER, "\tllh_cat_idx: %#x\n", h->llh_cat_idx);
- CDEBUG(D_OTHER, "\tllh_tail.lrt_index: %#x\n",
- LLOG_HDR_TAIL(h)->lrt_index);
- CDEBUG(D_OTHER, "\tllh_tail.lrt_len: %#x\n",
- LLOG_HDR_TAIL(h)->lrt_len);
-}
-
-void lustre_swab_llog_hdr(struct llog_log_hdr *h)
-{
- print_llog_hdr(h);
-
- lustre_swab_llog_rec(&h->llh_hdr);
-
- print_llog_hdr(h);
-}
-EXPORT_SYMBOL(lustre_swab_llog_hdr);
-
-static void print_lustre_cfg(struct lustre_cfg *lcfg)
-{
- int i;
-
- if (!(libcfs_debug & D_OTHER)) /* don't loop on nothing */
- return;
- CDEBUG(D_OTHER, "lustre_cfg: %p\n", lcfg);
- CDEBUG(D_OTHER, "\tlcfg->lcfg_version: %#x\n", lcfg->lcfg_version);
-
- CDEBUG(D_OTHER, "\tlcfg->lcfg_command: %#x\n", lcfg->lcfg_command);
- CDEBUG(D_OTHER, "\tlcfg->lcfg_num: %#x\n", lcfg->lcfg_num);
- CDEBUG(D_OTHER, "\tlcfg->lcfg_flags: %#x\n", lcfg->lcfg_flags);
- CDEBUG(D_OTHER, "\tlcfg->lcfg_nid: %s\n", libcfs_nid2str(lcfg->lcfg_nid));
-
- CDEBUG(D_OTHER, "\tlcfg->lcfg_bufcount: %d\n", lcfg->lcfg_bufcount);
- if (lcfg->lcfg_bufcount < LUSTRE_CFG_MAX_BUFCOUNT)
- for (i = 0; i < lcfg->lcfg_bufcount; i++)
- CDEBUG(D_OTHER, "\tlcfg->lcfg_buflens[%d]: %d\n",
- i, lcfg->lcfg_buflens[i]);
-}
-
-void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg)
-{
- int i;
-
- __swab32s(&lcfg->lcfg_version);
-
- if (lcfg->lcfg_version != LUSTRE_CFG_VERSION) {
- CERROR("not swabbing lustre_cfg version %#x (expecting %#x)\n",
- lcfg->lcfg_version, LUSTRE_CFG_VERSION);
- return;
- }
-
- __swab32s(&lcfg->lcfg_command);
- __swab32s(&lcfg->lcfg_num);
- __swab32s(&lcfg->lcfg_flags);
- __swab64s(&lcfg->lcfg_nid);
- __swab32s(&lcfg->lcfg_bufcount);
- for (i = 0; i < lcfg->lcfg_bufcount && i < LUSTRE_CFG_MAX_BUFCOUNT; i++)
- __swab32s(&lcfg->lcfg_buflens[i]);
-
- print_lustre_cfg(lcfg);
-}
-
-/* used only for compatibility with old on-disk cfg_marker data */
-struct cfg_marker32 {
- __u32 cm_step;
- __u32 cm_flags;
- __u32 cm_vers;
- __u32 padding;
- __u32 cm_createtime;
- __u32 cm_canceltime;
- char cm_tgtname[MTI_NAME_MAXLEN];
- char cm_comment[MTI_NAME_MAXLEN];
-};
-
-#define MTI_NAMELEN32 (MTI_NAME_MAXLEN - \
- (sizeof(struct cfg_marker) - sizeof(struct cfg_marker32)))
-
-void lustre_swab_cfg_marker(struct cfg_marker *marker, int swab, int size)
-{
- struct cfg_marker32 *cm32 = (struct cfg_marker32 *)marker;
-
- if (swab) {
- __swab32s(&marker->cm_step);
- __swab32s(&marker->cm_flags);
- __swab32s(&marker->cm_vers);
- }
- if (size == sizeof(*cm32)) {
- __u32 createtime, canceltime;
- /* There was a problem with the original declaration of
- * cfg_marker on 32-bit systems because it used time_t as
- * a wire protocol structure, and didn't verify this in
- * wirecheck. We now have to convert the offsets of the
- * later fields in order to work on 32- and 64-bit systems.
- *
- * Fortunately, the cm_comment field has no functional use
- * so can be sacrificed when converting the timestamp size.
- *
- * Overwrite fields from the end first, so they are not
- * clobbered, and use memmove() instead of memcpy() because
- * the source and target buffers overlap. bug 16771
- */
- createtime = cm32->cm_createtime;
- canceltime = cm32->cm_canceltime;
- memmove(marker->cm_comment, cm32->cm_comment, MTI_NAMELEN32);
- marker->cm_comment[MTI_NAMELEN32 - 1] = '\0';
- memmove(marker->cm_tgtname, cm32->cm_tgtname,
- sizeof(marker->cm_tgtname));
- if (swab) {
- __swab32s(&createtime);
- __swab32s(&canceltime);
- }
- marker->cm_createtime = createtime;
- marker->cm_canceltime = canceltime;
- CDEBUG(D_CONFIG, "Find old cfg_marker(Srv32b,Clt64b) for target %s, converting\n",
- marker->cm_tgtname);
- } else if (swab) {
- __swab64s(&marker->cm_createtime);
- __swab64s(&marker->cm_canceltime);
- }
-}
diff --git a/drivers/staging/lustre/lustre/obdclass/lprocfs_counters.c b/drivers/staging/lustre/lustre/obdclass/lprocfs_counters.c
deleted file mode 100644
index 85f09aff6e83..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/lprocfs_counters.c
+++ /dev/null
@@ -1,134 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- *
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2013, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/obdclass/lprocfs_counters.c
- *
- * Lustre lprocfs counter routines
- *
- * Author: Andreas Dilger <andreas.dilger@intel.com>
- */
-
-#include <linux/module.h>
-#include <lprocfs_status.h>
-#include <obd_support.h>
-#include <linux/libcfs/libcfs.h>
-
-void lprocfs_counter_add(struct lprocfs_stats *stats, int idx, long amount)
-{
- struct lprocfs_counter *percpu_cntr;
- struct lprocfs_counter_header *header;
- int smp_id;
- unsigned long flags = 0;
-
- if (!stats)
- return;
-
- LASSERTF(0 <= idx && idx < stats->ls_num,
- "idx %d, ls_num %hu\n", idx, stats->ls_num);
-
- /* With per-client stats, statistics are allocated only for
- * single CPU area, so the smp_id should be 0 always.
- */
- smp_id = lprocfs_stats_lock(stats, LPROCFS_GET_SMP_ID, &flags);
- if (smp_id < 0)
- return;
-
- header = &stats->ls_cnt_header[idx];
- percpu_cntr = lprocfs_stats_counter_get(stats, smp_id, idx);
- percpu_cntr->lc_count++;
-
- if (header->lc_config & LPROCFS_CNTR_AVGMINMAX) {
- /*
- * lprocfs_counter_add() can be called in interrupt context,
- * as memory allocation could trigger memory shrinker call
- * ldlm_pool_shrink(), which calls lprocfs_counter_add().
- * LU-1727.
- *
- */
- if (in_interrupt() &&
- (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
- percpu_cntr->lc_sum_irq += amount;
- else
- percpu_cntr->lc_sum += amount;
-
- if (header->lc_config & LPROCFS_CNTR_STDDEV)
- percpu_cntr->lc_sumsquare += (__s64)amount * amount;
- if (amount < percpu_cntr->lc_min)
- percpu_cntr->lc_min = amount;
- if (amount > percpu_cntr->lc_max)
- percpu_cntr->lc_max = amount;
- }
- lprocfs_stats_unlock(stats, LPROCFS_GET_SMP_ID, &flags);
-}
-EXPORT_SYMBOL(lprocfs_counter_add);
-
-void lprocfs_counter_sub(struct lprocfs_stats *stats, int idx, long amount)
-{
- struct lprocfs_counter *percpu_cntr;
- struct lprocfs_counter_header *header;
- int smp_id;
- unsigned long flags = 0;
-
- if (!stats)
- return;
-
- LASSERTF(0 <= idx && idx < stats->ls_num,
- "idx %d, ls_num %hu\n", idx, stats->ls_num);
-
- /* With per-client stats, statistics are allocated only for
- * single CPU area, so the smp_id should be 0 always.
- */
- smp_id = lprocfs_stats_lock(stats, LPROCFS_GET_SMP_ID, &flags);
- if (smp_id < 0)
- return;
-
- header = &stats->ls_cnt_header[idx];
- percpu_cntr = lprocfs_stats_counter_get(stats, smp_id, idx);
- if (header->lc_config & LPROCFS_CNTR_AVGMINMAX) {
- /*
- * Sometimes we use RCU callbacks to free memory which calls
- * lprocfs_counter_sub(), and RCU callbacks may execute in
- * softirq context - right now that's the only case we're in
- * softirq context here, use separate counter for that.
- * bz20650.
- *
- */
- if (in_interrupt() &&
- (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
- percpu_cntr->lc_sum_irq -= amount;
- else
- percpu_cntr->lc_sum -= amount;
- }
- lprocfs_stats_unlock(stats, LPROCFS_GET_SMP_ID, &flags);
-}
-EXPORT_SYMBOL(lprocfs_counter_sub);
diff --git a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
deleted file mode 100644
index bdbe6f52031a..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
+++ /dev/null
@@ -1,1698 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/obdclass/lprocfs_status.c
- *
- * Author: Hariharan Thantry <thantry@users.sourceforge.net>
- */
-
-#define DEBUG_SUBSYSTEM S_CLASS
-
-#include <obd_class.h>
-#include <lprocfs_status.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-#include <linux/seq_file.h>
-#include <linux/ctype.h>
-
-static const char * const obd_connect_names[] = {
- "read_only",
- "lov_index",
- "connect_from_mds",
- "write_grant",
- "server_lock",
- "version",
- "request_portal",
- "acl",
- "xattr",
- "create_on_write",
- "truncate_lock",
- "initial_transno",
- "inode_bit_locks",
- "join_file(obsolete)",
- "getattr_by_fid",
- "no_oh_for_devices",
- "remote_client",
- "remote_client_by_force",
- "max_byte_per_rpc",
- "64bit_qdata",
- "mds_capability",
- "oss_capability",
- "early_lock_cancel",
- "som",
- "adaptive_timeouts",
- "lru_resize",
- "mds_mds_connection",
- "real_conn",
- "change_qunit_size",
- "alt_checksum_algorithm",
- "fid_is_enabled",
- "version_recovery",
- "pools",
- "grant_shrink",
- "skip_orphan",
- "large_ea",
- "full20",
- "layout_lock",
- "64bithash",
- "object_max_bytes",
- "imp_recov",
- "jobstats",
- "umask",
- "einprogress",
- "grant_param",
- "flock_owner",
- "lvb_type",
- "nanoseconds_times",
- "lightweight_conn",
- "short_io",
- "pingless",
- "flock_deadlock",
- "disp_stripe",
- "open_by_fid",
- "lfsck",
- "unknown",
- "unlink_close",
- "multi_mod_rpcs",
- "dir_stripe",
- "subtree",
- "lock_ahead",
- "bulk_mbits",
- "compact_obdo",
- "second_flags",
- NULL
-};
-
-int obd_connect_flags2str(char *page, int count, __u64 flags, char *sep)
-{
- __u64 mask = 1;
- int i, ret = 0;
-
- for (i = 0; obd_connect_names[i]; i++, mask <<= 1) {
- if (flags & mask)
- ret += snprintf(page + ret, count - ret, "%s%s",
- ret ? sep : "", obd_connect_names[i]);
- }
- if (flags & ~(mask - 1))
- ret += snprintf(page + ret, count - ret,
- "%sunknown flags %#llx",
- ret ? sep : "", flags & ~(mask - 1));
- return ret;
-}
-EXPORT_SYMBOL(obd_connect_flags2str);
-
-static void obd_connect_data_seqprint(struct seq_file *m,
- struct obd_connect_data *ocd)
-{
- u64 flags;
-
- LASSERT(ocd);
- flags = ocd->ocd_connect_flags;
-
- seq_printf(m, " connect_data:\n"
- " flags: %llx\n"
- " instance: %u\n",
- ocd->ocd_connect_flags,
- ocd->ocd_instance);
- if (flags & OBD_CONNECT_VERSION)
- seq_printf(m, " target_version: %u.%u.%u.%u\n",
- OBD_OCD_VERSION_MAJOR(ocd->ocd_version),
- OBD_OCD_VERSION_MINOR(ocd->ocd_version),
- OBD_OCD_VERSION_PATCH(ocd->ocd_version),
- OBD_OCD_VERSION_FIX(ocd->ocd_version));
- if (flags & OBD_CONNECT_MDS)
- seq_printf(m, " mdt_index: %d\n", ocd->ocd_group);
- if (flags & OBD_CONNECT_GRANT)
- seq_printf(m, " initial_grant: %d\n", ocd->ocd_grant);
- if (flags & OBD_CONNECT_INDEX)
- seq_printf(m, " target_index: %u\n", ocd->ocd_index);
- if (flags & OBD_CONNECT_BRW_SIZE)
- seq_printf(m, " max_brw_size: %d\n", ocd->ocd_brw_size);
- if (flags & OBD_CONNECT_IBITS)
- seq_printf(m, " ibits_known: %llx\n",
- ocd->ocd_ibits_known);
- if (flags & OBD_CONNECT_GRANT_PARAM)
- seq_printf(m, " grant_block_size: %d\n"
- " grant_inode_size: %d\n"
- " grant_extent_overhead: %d\n",
- ocd->ocd_blocksize,
- ocd->ocd_inodespace,
- ocd->ocd_grant_extent);
- if (flags & OBD_CONNECT_TRANSNO)
- seq_printf(m, " first_transno: %llx\n",
- ocd->ocd_transno);
- if (flags & OBD_CONNECT_CKSUM)
- seq_printf(m, " cksum_types: %#x\n",
- ocd->ocd_cksum_types);
- if (flags & OBD_CONNECT_MAX_EASIZE)
- seq_printf(m, " max_easize: %d\n", ocd->ocd_max_easize);
- if (flags & OBD_CONNECT_MAXBYTES)
- seq_printf(m, " max_object_bytes: %llx\n",
- ocd->ocd_maxbytes);
- if (flags & OBD_CONNECT_MULTIMODRPCS)
- seq_printf(m, " max_mod_rpcs: %hu\n",
- ocd->ocd_maxmodrpcs);
-}
-
-int lprocfs_read_frac_helper(char *buffer, unsigned long count, long val,
- int mult)
-{
- long decimal_val, frac_val;
- int prtn;
-
- if (count < 10)
- return -EINVAL;
-
- decimal_val = val / mult;
- prtn = snprintf(buffer, count, "%ld", decimal_val);
- frac_val = val % mult;
-
- if (prtn < (count - 4) && frac_val > 0) {
- long temp_frac;
- int i, temp_mult = 1, frac_bits = 0;
-
- temp_frac = frac_val * 10;
- buffer[prtn++] = '.';
- while (frac_bits < 2 && (temp_frac / mult) < 1) {
- /* only reserved 2 bits fraction */
- buffer[prtn++] = '0';
- temp_frac *= 10;
- frac_bits++;
- }
- /*
- * Need to think these cases :
- * 1. #echo x.00 > /sys/xxx output result : x
- * 2. #echo x.0x > /sys/xxx output result : x.0x
- * 3. #echo x.x0 > /sys/xxx output result : x.x
- * 4. #echo x.xx > /sys/xxx output result : x.xx
- * Only reserved 2 bits fraction.
- */
- for (i = 0; i < (5 - prtn); i++)
- temp_mult *= 10;
-
- frac_bits = min((int)count - prtn, 3 - frac_bits);
- prtn += snprintf(buffer + prtn, frac_bits, "%ld",
- frac_val * temp_mult / mult);
-
- prtn--;
- while (buffer[prtn] < '1' || buffer[prtn] > '9') {
- prtn--;
- if (buffer[prtn] == '.') {
- prtn--;
- break;
- }
- }
- prtn++;
- }
- buffer[prtn++] = '\n';
- return prtn;
-}
-EXPORT_SYMBOL(lprocfs_read_frac_helper);
-
-int lprocfs_write_frac_helper(const char __user *buffer, unsigned long count,
- int *val, int mult)
-{
- char kernbuf[20], *end, *pbuf;
-
- if (count > (sizeof(kernbuf) - 1))
- return -EINVAL;
-
- if (copy_from_user(kernbuf, buffer, count))
- return -EFAULT;
-
- kernbuf[count] = '\0';
- pbuf = kernbuf;
- if (*pbuf == '-') {
- mult = -mult;
- pbuf++;
- }
-
- *val = (int)simple_strtoul(pbuf, &end, 10) * mult;
- if (pbuf == end)
- return -EINVAL;
-
- if (end && *end == '.') {
- int temp_val, pow = 1;
- int i;
-
- pbuf = end + 1;
- if (strlen(pbuf) > 5)
- pbuf[5] = '\0'; /*only allow 5bits fractional*/
-
- temp_val = (int)simple_strtoul(pbuf, &end, 10) * mult;
-
- if (pbuf < end) {
- for (i = 0; i < (end - pbuf); i++)
- pow *= 10;
-
- *val += temp_val / pow;
- }
- }
- return 0;
-}
-EXPORT_SYMBOL(lprocfs_write_frac_helper);
-
-static int lprocfs_no_percpu_stats;
-module_param(lprocfs_no_percpu_stats, int, 0644);
-MODULE_PARM_DESC(lprocfs_no_percpu_stats, "Do not alloc percpu data for lprocfs stats");
-
-#define MAX_STRING_SIZE 128
-
-int lprocfs_single_release(struct inode *inode, struct file *file)
-{
- return single_release(inode, file);
-}
-EXPORT_SYMBOL(lprocfs_single_release);
-
-int lprocfs_seq_release(struct inode *inode, struct file *file)
-{
- return seq_release(inode, file);
-}
-EXPORT_SYMBOL(lprocfs_seq_release);
-
-/* lprocfs API calls */
-
-static const struct file_operations lprocfs_generic_fops = { };
-
-void ldebugfs_add_vars(struct dentry *parent, struct lprocfs_vars *list,
- void *data)
-{
- if (IS_ERR_OR_NULL(parent) || IS_ERR_OR_NULL(list))
- return;
-
- while (list->name) {
- umode_t mode = 0;
-
- if (list->proc_mode != 0000) {
- mode = list->proc_mode;
- } else if (list->fops) {
- if (list->fops->read)
- mode = 0444;
- if (list->fops->write)
- mode |= 0200;
- }
- debugfs_create_file(list->name, mode, parent,
- list->data ?: data,
- list->fops ?: &lprocfs_generic_fops);
- list++;
- }
- return;
-}
-EXPORT_SYMBOL_GPL(ldebugfs_add_vars);
-
-/* Generic callbacks */
-static ssize_t uuid_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kobj);
-
- return sprintf(buf, "%s\n", obd->obd_uuid.uuid);
-}
-LUSTRE_RO_ATTR(uuid);
-
-static ssize_t blocksize_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kobj);
- struct obd_statfs osfs;
- int rc = obd_statfs(NULL, obd->obd_self_export, &osfs,
- get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ,
- OBD_STATFS_NODELAY);
- if (!rc)
- return sprintf(buf, "%u\n", osfs.os_bsize);
-
- return rc;
-}
-LUSTRE_RO_ATTR(blocksize);
-
-static ssize_t kbytestotal_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kobj);
- struct obd_statfs osfs;
- int rc = obd_statfs(NULL, obd->obd_self_export, &osfs,
- get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ,
- OBD_STATFS_NODELAY);
- if (!rc) {
- __u32 blk_size = osfs.os_bsize >> 10;
- __u64 result = osfs.os_blocks;
-
- while (blk_size >>= 1)
- result <<= 1;
-
- return sprintf(buf, "%llu\n", result);
- }
-
- return rc;
-}
-LUSTRE_RO_ATTR(kbytestotal);
-
-static ssize_t kbytesfree_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kobj);
- struct obd_statfs osfs;
- int rc = obd_statfs(NULL, obd->obd_self_export, &osfs,
- get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ,
- OBD_STATFS_NODELAY);
- if (!rc) {
- __u32 blk_size = osfs.os_bsize >> 10;
- __u64 result = osfs.os_bfree;
-
- while (blk_size >>= 1)
- result <<= 1;
-
- return sprintf(buf, "%llu\n", result);
- }
-
- return rc;
-}
-LUSTRE_RO_ATTR(kbytesfree);
-
-static ssize_t kbytesavail_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kobj);
- struct obd_statfs osfs;
- int rc = obd_statfs(NULL, obd->obd_self_export, &osfs,
- get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ,
- OBD_STATFS_NODELAY);
- if (!rc) {
- __u32 blk_size = osfs.os_bsize >> 10;
- __u64 result = osfs.os_bavail;
-
- while (blk_size >>= 1)
- result <<= 1;
-
- return sprintf(buf, "%llu\n", result);
- }
-
- return rc;
-}
-LUSTRE_RO_ATTR(kbytesavail);
-
-static ssize_t filestotal_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kobj);
- struct obd_statfs osfs;
- int rc = obd_statfs(NULL, obd->obd_self_export, &osfs,
- get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ,
- OBD_STATFS_NODELAY);
- if (!rc)
- return sprintf(buf, "%llu\n", osfs.os_files);
-
- return rc;
-}
-LUSTRE_RO_ATTR(filestotal);
-
-static ssize_t filesfree_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kobj);
- struct obd_statfs osfs;
- int rc = obd_statfs(NULL, obd->obd_self_export, &osfs,
- get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ,
- OBD_STATFS_NODELAY);
- if (!rc)
- return sprintf(buf, "%llu\n", osfs.os_ffree);
-
- return rc;
-}
-LUSTRE_RO_ATTR(filesfree);
-
-int lprocfs_rd_server_uuid(struct seq_file *m, void *data)
-{
- struct obd_device *obd = data;
- struct obd_import *imp;
- char *imp_state_name = NULL;
- int rc;
-
- LASSERT(obd);
- rc = lprocfs_climp_check(obd);
- if (rc)
- return rc;
-
- imp = obd->u.cli.cl_import;
- imp_state_name = ptlrpc_import_state_name(imp->imp_state);
- seq_printf(m, "%s\t%s%s\n",
- obd2cli_tgt(obd), imp_state_name,
- imp->imp_deactive ? "\tDEACTIVATED" : "");
-
- up_read(&obd->u.cli.cl_sem);
-
- return 0;
-}
-EXPORT_SYMBOL(lprocfs_rd_server_uuid);
-
-int lprocfs_rd_conn_uuid(struct seq_file *m, void *data)
-{
- struct obd_device *obd = data;
- struct ptlrpc_connection *conn;
- int rc;
-
- LASSERT(obd);
-
- rc = lprocfs_climp_check(obd);
- if (rc)
- return rc;
-
- conn = obd->u.cli.cl_import->imp_connection;
- if (conn && obd->u.cli.cl_import)
- seq_printf(m, "%s\n", conn->c_remote_uuid.uuid);
- else
- seq_puts(m, "<none>\n");
-
- up_read(&obd->u.cli.cl_sem);
-
- return 0;
-}
-EXPORT_SYMBOL(lprocfs_rd_conn_uuid);
-
-/**
- * Lock statistics structure for access, possibly only on this CPU.
- *
- * The statistics struct may be allocated with per-CPU structures for
- * efficient concurrent update (usually only on server-wide stats), or
- * as a single global struct (e.g. for per-client or per-job statistics),
- * so the required locking depends on the type of structure allocated.
- *
- * For per-CPU statistics, pin the thread to the current cpuid so that
- * will only access the statistics for that CPU. If the stats structure
- * for the current CPU has not been allocated (or previously freed),
- * allocate it now. The per-CPU statistics do not need locking since
- * the thread is pinned to the CPU during update.
- *
- * For global statistics, lock the stats structure to prevent concurrent update.
- *
- * \param[in] stats statistics structure to lock
- * \param[in] opc type of operation:
- * LPROCFS_GET_SMP_ID: "lock" and return current CPU index
- * for incrementing statistics for that CPU
- * LPROCFS_GET_NUM_CPU: "lock" and return number of used
- * CPU indices to iterate over all indices
- * \param[out] flags CPU interrupt saved state for IRQ-safe locking
- *
- * \retval cpuid of current thread or number of allocated structs
- * \retval negative on error (only for opc LPROCFS_GET_SMP_ID + per-CPU stats)
- */
-int lprocfs_stats_lock(struct lprocfs_stats *stats,
- enum lprocfs_stats_lock_ops opc,
- unsigned long *flags)
-{
- if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
- if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
- spin_lock_irqsave(&stats->ls_lock, *flags);
- else
- spin_lock(&stats->ls_lock);
- return opc == LPROCFS_GET_NUM_CPU ? 1 : 0;
- }
-
- switch (opc) {
- case LPROCFS_GET_SMP_ID: {
- unsigned int cpuid = get_cpu();
-
- if (unlikely(!stats->ls_percpu[cpuid])) {
- int rc = lprocfs_stats_alloc_one(stats, cpuid);
-
- if (rc < 0) {
- put_cpu();
- return rc;
- }
- }
- return cpuid;
- }
- case LPROCFS_GET_NUM_CPU:
- return stats->ls_biggest_alloc_num;
- default:
- LBUG();
- }
-}
-
-/**
- * Unlock statistics structure after access.
- *
- * Unlock the lock acquired via lprocfs_stats_lock() for global statistics,
- * or unpin this thread from the current cpuid for per-CPU statistics.
- *
- * This function must be called using the same arguments as used when calling
- * lprocfs_stats_lock() so that the correct operation can be performed.
- *
- * \param[in] stats statistics structure to unlock
- * \param[in] opc type of operation (current cpuid or number of structs)
- * \param[in] flags CPU interrupt saved state for IRQ-safe locking
- */
-void lprocfs_stats_unlock(struct lprocfs_stats *stats,
- enum lprocfs_stats_lock_ops opc,
- unsigned long *flags)
-{
- if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
- if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
- spin_unlock_irqrestore(&stats->ls_lock, *flags);
- else
- spin_unlock(&stats->ls_lock);
- } else if (opc == LPROCFS_GET_SMP_ID) {
- put_cpu();
- }
-}
-
-/** add up per-cpu counters */
-void lprocfs_stats_collect(struct lprocfs_stats *stats, int idx,
- struct lprocfs_counter *cnt)
-{
- unsigned int num_entry;
- struct lprocfs_counter *percpu_cntr;
- int i;
- unsigned long flags = 0;
-
- memset(cnt, 0, sizeof(*cnt));
-
- if (!stats) {
- /* set count to 1 to avoid divide-by-zero errs in callers */
- cnt->lc_count = 1;
- return;
- }
-
- cnt->lc_min = LC_MIN_INIT;
-
- num_entry = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags);
-
- for (i = 0; i < num_entry; i++) {
- if (!stats->ls_percpu[i])
- continue;
- percpu_cntr = lprocfs_stats_counter_get(stats, i, idx);
-
- cnt->lc_count += percpu_cntr->lc_count;
- cnt->lc_sum += percpu_cntr->lc_sum;
- if (percpu_cntr->lc_min < cnt->lc_min)
- cnt->lc_min = percpu_cntr->lc_min;
- if (percpu_cntr->lc_max > cnt->lc_max)
- cnt->lc_max = percpu_cntr->lc_max;
- cnt->lc_sumsquare += percpu_cntr->lc_sumsquare;
- }
-
- lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags);
-}
-
-/**
- * Append a space separated list of current set flags to str.
- */
-#define flag2str(flag, first) \
- do { \
- if (imp->imp_##flag) \
- seq_printf(m, "%s" #flag, first ? "" : ", "); \
- } while (0)
-static int obd_import_flags2str(struct obd_import *imp, struct seq_file *m)
-{
- bool first = true;
-
- if (imp->imp_obd->obd_no_recov) {
- seq_puts(m, "no_recov");
- first = false;
- }
-
- flag2str(invalid, first);
- first = false;
- flag2str(deactive, first);
- flag2str(replayable, first);
- flag2str(pingable, first);
- return 0;
-}
-
-#undef flags2str
-
-static void obd_connect_seq_flags2str(struct seq_file *m, __u64 flags, char *sep)
-{
- __u64 mask = 1;
- int i;
- bool first = true;
-
- for (i = 0; obd_connect_names[i]; i++, mask <<= 1) {
- if (flags & mask) {
- seq_printf(m, "%s%s",
- first ? sep : "", obd_connect_names[i]);
- first = false;
- }
- }
- if (flags & ~(mask - 1))
- seq_printf(m, "%sunknown flags %#llx",
- first ? sep : "", flags & ~(mask - 1));
-}
-
-int lprocfs_rd_import(struct seq_file *m, void *data)
-{
- char nidstr[LNET_NIDSTR_SIZE];
- struct lprocfs_counter ret;
- struct lprocfs_counter_header *header;
- struct obd_device *obd = data;
- struct obd_import *imp;
- struct obd_import_conn *conn;
- struct obd_connect_data *ocd;
- int j;
- int k;
- int rw = 0;
- int rc;
-
- LASSERT(obd);
- rc = lprocfs_climp_check(obd);
- if (rc)
- return rc;
-
- imp = obd->u.cli.cl_import;
- ocd = &imp->imp_connect_data;
-
- seq_printf(m, "import:\n"
- " name: %s\n"
- " target: %s\n"
- " state: %s\n"
- " instance: %u\n"
- " connect_flags: [ ",
- obd->obd_name,
- obd2cli_tgt(obd),
- ptlrpc_import_state_name(imp->imp_state),
- imp->imp_connect_data.ocd_instance);
- obd_connect_seq_flags2str(m, imp->imp_connect_data.ocd_connect_flags,
- ", ");
- seq_puts(m, " ]\n");
- obd_connect_data_seqprint(m, ocd);
- seq_puts(m, " import_flags: [ ");
- obd_import_flags2str(imp, m);
-
- seq_puts(m,
- " ]\n"
- " connection:\n"
- " failover_nids: [ ");
- spin_lock(&imp->imp_lock);
- j = 0;
- list_for_each_entry(conn, &imp->imp_conn_list, oic_item) {
- libcfs_nid2str_r(conn->oic_conn->c_peer.nid,
- nidstr, sizeof(nidstr));
- seq_printf(m, "%s%s", j ? ", " : "", nidstr);
- j++;
- }
- if (imp->imp_connection)
- libcfs_nid2str_r(imp->imp_connection->c_peer.nid,
- nidstr, sizeof(nidstr));
- else
- strncpy(nidstr, "<none>", sizeof(nidstr));
- seq_printf(m,
- " ]\n"
- " current_connection: %s\n"
- " connection_attempts: %u\n"
- " generation: %u\n"
- " in-progress_invalidations: %u\n",
- nidstr,
- imp->imp_conn_cnt,
- imp->imp_generation,
- atomic_read(&imp->imp_inval_count));
- spin_unlock(&imp->imp_lock);
-
- if (!obd->obd_svc_stats)
- goto out_climp;
-
- header = &obd->obd_svc_stats->ls_cnt_header[PTLRPC_REQWAIT_CNTR];
- lprocfs_stats_collect(obd->obd_svc_stats, PTLRPC_REQWAIT_CNTR, &ret);
- if (ret.lc_count != 0) {
- /* first argument to do_div MUST be __u64 */
- __u64 sum = ret.lc_sum;
-
- do_div(sum, ret.lc_count);
- ret.lc_sum = sum;
- } else {
- ret.lc_sum = 0;
- }
- seq_printf(m,
- " rpcs:\n"
- " inflight: %u\n"
- " unregistering: %u\n"
- " timeouts: %u\n"
- " avg_waittime: %llu %s\n",
- atomic_read(&imp->imp_inflight),
- atomic_read(&imp->imp_unregistering),
- atomic_read(&imp->imp_timeouts),
- ret.lc_sum, header->lc_units);
-
- k = 0;
- for (j = 0; j < IMP_AT_MAX_PORTALS; j++) {
- if (imp->imp_at.iat_portal[j] == 0)
- break;
- k = max_t(unsigned int, k,
- at_get(&imp->imp_at.iat_service_estimate[j]));
- }
- seq_printf(m,
- " service_estimates:\n"
- " services: %u sec\n"
- " network: %u sec\n",
- k,
- at_get(&imp->imp_at.iat_net_latency));
-
- seq_printf(m,
- " transactions:\n"
- " last_replay: %llu\n"
- " peer_committed: %llu\n"
- " last_checked: %llu\n",
- imp->imp_last_replay_transno,
- imp->imp_peer_committed_transno,
- imp->imp_last_transno_checked);
-
- /* avg data rates */
- for (rw = 0; rw <= 1; rw++) {
- lprocfs_stats_collect(obd->obd_svc_stats,
- PTLRPC_LAST_CNTR + BRW_READ_BYTES + rw,
- &ret);
- if (ret.lc_sum > 0 && ret.lc_count > 0) {
- /* first argument to do_div MUST be __u64 */
- __u64 sum = ret.lc_sum;
-
- do_div(sum, ret.lc_count);
- ret.lc_sum = sum;
- seq_printf(m,
- " %s_data_averages:\n"
- " bytes_per_rpc: %llu\n",
- rw ? "write" : "read",
- ret.lc_sum);
- }
- k = (int)ret.lc_sum;
- j = opcode_offset(OST_READ + rw) + EXTRA_MAX_OPCODES;
- header = &obd->obd_svc_stats->ls_cnt_header[j];
- lprocfs_stats_collect(obd->obd_svc_stats, j, &ret);
- if (ret.lc_sum > 0 && ret.lc_count != 0) {
- /* first argument to do_div MUST be __u64 */
- __u64 sum = ret.lc_sum;
-
- do_div(sum, ret.lc_count);
- ret.lc_sum = sum;
- seq_printf(m,
- " %s_per_rpc: %llu\n",
- header->lc_units, ret.lc_sum);
- j = (int)ret.lc_sum;
- if (j > 0)
- seq_printf(m,
- " MB_per_sec: %u.%.02u\n",
- k / j, (100 * k / j) % 100);
- }
- }
-
-out_climp:
- up_read(&obd->u.cli.cl_sem);
- return 0;
-}
-EXPORT_SYMBOL(lprocfs_rd_import);
-
-int lprocfs_rd_state(struct seq_file *m, void *data)
-{
- struct obd_device *obd = data;
- struct obd_import *imp;
- int j, k, rc;
-
- LASSERT(obd);
- rc = lprocfs_climp_check(obd);
- if (rc)
- return rc;
-
- imp = obd->u.cli.cl_import;
-
- seq_printf(m, "current_state: %s\n",
- ptlrpc_import_state_name(imp->imp_state));
- seq_puts(m, "state_history:\n");
- k = imp->imp_state_hist_idx;
- for (j = 0; j < IMP_STATE_HIST_LEN; j++) {
- struct import_state_hist *ish =
- &imp->imp_state_hist[(k + j) % IMP_STATE_HIST_LEN];
- if (ish->ish_state == 0)
- continue;
- seq_printf(m, " - [ %lld, %s ]\n", (s64)ish->ish_time,
- ptlrpc_import_state_name(ish->ish_state));
- }
-
- up_read(&obd->u.cli.cl_sem);
- return 0;
-}
-EXPORT_SYMBOL(lprocfs_rd_state);
-
-int lprocfs_at_hist_helper(struct seq_file *m, struct adaptive_timeout *at)
-{
- int i;
-
- for (i = 0; i < AT_BINS; i++)
- seq_printf(m, "%3u ", at->at_hist[i]);
- seq_puts(m, "\n");
- return 0;
-}
-EXPORT_SYMBOL(lprocfs_at_hist_helper);
-
-/* See also ptlrpc_lprocfs_rd_timeouts */
-int lprocfs_rd_timeouts(struct seq_file *m, void *data)
-{
- struct obd_device *obd = data;
- struct obd_import *imp;
- unsigned int cur, worst;
- time64_t now, worstt;
- struct dhms ts;
- int i, rc;
-
- LASSERT(obd);
- rc = lprocfs_climp_check(obd);
- if (rc)
- return rc;
-
- imp = obd->u.cli.cl_import;
-
- now = ktime_get_real_seconds();
-
- /* Some network health info for kicks */
- s2dhms(&ts, now - imp->imp_last_reply_time);
- seq_printf(m, "%-10s : %lld, " DHMS_FMT " ago\n",
- "last reply", (s64)imp->imp_last_reply_time, DHMS_VARS(&ts));
-
- cur = at_get(&imp->imp_at.iat_net_latency);
- worst = imp->imp_at.iat_net_latency.at_worst_ever;
- worstt = imp->imp_at.iat_net_latency.at_worst_time;
- s2dhms(&ts, now - worstt);
- seq_printf(m, "%-10s : cur %3u worst %3u (at %lld, " DHMS_FMT " ago) ",
- "network", cur, worst, (s64)worstt, DHMS_VARS(&ts));
- lprocfs_at_hist_helper(m, &imp->imp_at.iat_net_latency);
-
- for (i = 0; i < IMP_AT_MAX_PORTALS; i++) {
- if (imp->imp_at.iat_portal[i] == 0)
- break;
- cur = at_get(&imp->imp_at.iat_service_estimate[i]);
- worst = imp->imp_at.iat_service_estimate[i].at_worst_ever;
- worstt = imp->imp_at.iat_service_estimate[i].at_worst_time;
- s2dhms(&ts, now - worstt);
- seq_printf(m, "portal %-2d : cur %3u worst %3u (at %lld, "
- DHMS_FMT " ago) ", imp->imp_at.iat_portal[i],
- cur, worst, (s64)worstt, DHMS_VARS(&ts));
- lprocfs_at_hist_helper(m, &imp->imp_at.iat_service_estimate[i]);
- }
-
- up_read(&obd->u.cli.cl_sem);
- return 0;
-}
-EXPORT_SYMBOL(lprocfs_rd_timeouts);
-
-int lprocfs_rd_connect_flags(struct seq_file *m, void *data)
-{
- struct obd_device *obd = data;
- __u64 flags;
- int rc;
-
- rc = lprocfs_climp_check(obd);
- if (rc)
- return rc;
-
- flags = obd->u.cli.cl_import->imp_connect_data.ocd_connect_flags;
- seq_printf(m, "flags=%#llx\n", flags);
- obd_connect_seq_flags2str(m, flags, "\n");
- seq_puts(m, "\n");
- up_read(&obd->u.cli.cl_sem);
- return 0;
-}
-EXPORT_SYMBOL(lprocfs_rd_connect_flags);
-
-static struct attribute *obd_def_attrs[] = {
- &lustre_attr_blocksize.attr,
- &lustre_attr_kbytestotal.attr,
- &lustre_attr_kbytesfree.attr,
- &lustre_attr_kbytesavail.attr,
- &lustre_attr_filestotal.attr,
- &lustre_attr_filesfree.attr,
- &lustre_attr_uuid.attr,
- NULL,
-};
-
-static void obd_sysfs_release(struct kobject *kobj)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kobj);
-
- complete(&obd->obd_kobj_unregister);
-}
-
-static struct kobj_type obd_ktype = {
- .default_attrs = obd_def_attrs,
- .sysfs_ops = &lustre_sysfs_ops,
- .release = obd_sysfs_release,
-};
-
-int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list,
- const struct attribute_group *attrs)
-{
- int rc = 0;
-
- init_completion(&obd->obd_kobj_unregister);
- rc = kobject_init_and_add(&obd->obd_kobj, &obd_ktype,
- obd->obd_type->typ_kobj,
- "%s", obd->obd_name);
- if (rc)
- return rc;
-
- if (attrs) {
- rc = sysfs_create_group(&obd->obd_kobj, attrs);
- if (rc) {
- kobject_put(&obd->obd_kobj);
- return rc;
- }
- }
-
- obd->obd_debugfs_entry = debugfs_create_dir(obd->obd_name,
- obd->obd_type->typ_debugfs_entry);
- ldebugfs_add_vars(obd->obd_debugfs_entry, list, obd);
-
- return rc;
-}
-EXPORT_SYMBOL_GPL(lprocfs_obd_setup);
-
-int lprocfs_obd_cleanup(struct obd_device *obd)
-{
- if (!obd)
- return -EINVAL;
-
- debugfs_remove_recursive(obd->obd_debugfs_entry);
-
- kobject_put(&obd->obd_kobj);
- wait_for_completion(&obd->obd_kobj_unregister);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(lprocfs_obd_cleanup);
-
-int lprocfs_stats_alloc_one(struct lprocfs_stats *stats, unsigned int cpuid)
-{
- struct lprocfs_counter *cntr;
- unsigned int percpusize;
- int rc = -ENOMEM;
- unsigned long flags = 0;
- int i;
-
- LASSERT(!stats->ls_percpu[cpuid]);
- LASSERT((stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) == 0);
-
- percpusize = lprocfs_stats_counter_size(stats);
- stats->ls_percpu[cpuid] = kzalloc(percpusize, GFP_ATOMIC);
- if (stats->ls_percpu[cpuid]) {
- rc = 0;
- if (unlikely(stats->ls_biggest_alloc_num <= cpuid)) {
- if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
- spin_lock_irqsave(&stats->ls_lock, flags);
- else
- spin_lock(&stats->ls_lock);
- if (stats->ls_biggest_alloc_num <= cpuid)
- stats->ls_biggest_alloc_num = cpuid + 1;
- if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
- spin_unlock_irqrestore(&stats->ls_lock, flags);
- else
- spin_unlock(&stats->ls_lock);
- }
- /* initialize the ls_percpu[cpuid] non-zero counter */
- for (i = 0; i < stats->ls_num; ++i) {
- cntr = lprocfs_stats_counter_get(stats, cpuid, i);
- cntr->lc_min = LC_MIN_INIT;
- }
- }
- return rc;
-}
-
-struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num,
- enum lprocfs_stats_flags flags)
-{
- struct lprocfs_stats *stats;
- unsigned int num_entry;
- unsigned int percpusize = 0;
- int i;
-
- if (num == 0)
- return NULL;
-
- if (lprocfs_no_percpu_stats != 0)
- flags |= LPROCFS_STATS_FLAG_NOPERCPU;
-
- if (flags & LPROCFS_STATS_FLAG_NOPERCPU)
- num_entry = 1;
- else
- num_entry = num_possible_cpus();
-
- /* alloc percpu pointers for all possible cpu slots */
- stats = kvzalloc(offsetof(typeof(*stats), ls_percpu[num_entry]),
- GFP_KERNEL);
- if (!stats)
- return NULL;
-
- stats->ls_num = num;
- stats->ls_flags = flags;
- spin_lock_init(&stats->ls_lock);
-
- /* alloc num of counter headers */
- stats->ls_cnt_header = kvmalloc_array(stats->ls_num,
- sizeof(struct lprocfs_counter_header),
- GFP_KERNEL | __GFP_ZERO);
- if (!stats->ls_cnt_header)
- goto fail;
-
- if ((flags & LPROCFS_STATS_FLAG_NOPERCPU) != 0) {
- /* contains only one set counters */
- percpusize = lprocfs_stats_counter_size(stats);
- stats->ls_percpu[0] = kzalloc(percpusize, GFP_ATOMIC);
- if (!stats->ls_percpu[0])
- goto fail;
- stats->ls_biggest_alloc_num = 1;
- } else if ((flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) {
- /* alloc all percpu data */
- for (i = 0; i < num_entry; ++i)
- if (lprocfs_stats_alloc_one(stats, i) < 0)
- goto fail;
- }
-
- return stats;
-
-fail:
- lprocfs_free_stats(&stats);
- return NULL;
-}
-EXPORT_SYMBOL(lprocfs_alloc_stats);
-
-void lprocfs_free_stats(struct lprocfs_stats **statsh)
-{
- struct lprocfs_stats *stats = *statsh;
- unsigned int num_entry;
- unsigned int percpusize;
- unsigned int i;
-
- if (!stats || stats->ls_num == 0)
- return;
- *statsh = NULL;
-
- if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU)
- num_entry = 1;
- else
- num_entry = num_possible_cpus();
-
- percpusize = lprocfs_stats_counter_size(stats);
- for (i = 0; i < num_entry; i++)
- kfree(stats->ls_percpu[i]);
- kvfree(stats->ls_cnt_header);
- kvfree(stats);
-}
-EXPORT_SYMBOL(lprocfs_free_stats);
-
-__u64 lprocfs_stats_collector(struct lprocfs_stats *stats, int idx,
- enum lprocfs_fields_flags field)
-{
- unsigned int i;
- unsigned int num_cpu;
- unsigned long flags = 0;
- __u64 ret = 0;
-
- LASSERT(stats);
-
- num_cpu = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags);
- for (i = 0; i < num_cpu; i++) {
- if (!stats->ls_percpu[i])
- continue;
- ret += lprocfs_read_helper(
- lprocfs_stats_counter_get(stats, i, idx),
- &stats->ls_cnt_header[idx], stats->ls_flags,
- field);
- }
- lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags);
- return ret;
-}
-EXPORT_SYMBOL(lprocfs_stats_collector);
-
-void lprocfs_clear_stats(struct lprocfs_stats *stats)
-{
- struct lprocfs_counter *percpu_cntr;
- int i;
- int j;
- unsigned int num_entry;
- unsigned long flags = 0;
-
- num_entry = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags);
-
- for (i = 0; i < num_entry; i++) {
- if (!stats->ls_percpu[i])
- continue;
- for (j = 0; j < stats->ls_num; j++) {
- percpu_cntr = lprocfs_stats_counter_get(stats, i, j);
- percpu_cntr->lc_count = 0;
- percpu_cntr->lc_min = LC_MIN_INIT;
- percpu_cntr->lc_max = 0;
- percpu_cntr->lc_sumsquare = 0;
- percpu_cntr->lc_sum = 0;
- if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
- percpu_cntr->lc_sum_irq = 0;
- }
- }
-
- lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags);
-}
-EXPORT_SYMBOL(lprocfs_clear_stats);
-
-static ssize_t lprocfs_stats_seq_write(struct file *file,
- const char __user *buf,
- size_t len, loff_t *off)
-{
- struct seq_file *seq = file->private_data;
- struct lprocfs_stats *stats = seq->private;
-
- lprocfs_clear_stats(stats);
-
- return len;
-}
-
-static void *lprocfs_stats_seq_start(struct seq_file *p, loff_t *pos)
-{
- struct lprocfs_stats *stats = p->private;
-
- return (*pos < stats->ls_num) ? pos : NULL;
-}
-
-static void lprocfs_stats_seq_stop(struct seq_file *p, void *v)
-{
-}
-
-static void *lprocfs_stats_seq_next(struct seq_file *p, void *v, loff_t *pos)
-{
- (*pos)++;
- return lprocfs_stats_seq_start(p, pos);
-}
-
-/* seq file export of one lprocfs counter */
-static int lprocfs_stats_seq_show(struct seq_file *p, void *v)
-{
- struct lprocfs_stats *stats = p->private;
- struct lprocfs_counter_header *hdr;
- struct lprocfs_counter ctr;
- int idx = *(loff_t *)v;
-
- if (idx == 0) {
- struct timespec64 now;
-
- ktime_get_real_ts64(&now);
- seq_printf(p, "%-25s %llu.%9lu secs.usecs\n",
- "snapshot_time",
- (s64)now.tv_sec, (unsigned long)now.tv_nsec);
- }
-
- hdr = &stats->ls_cnt_header[idx];
- lprocfs_stats_collect(stats, idx, &ctr);
-
- if (ctr.lc_count != 0) {
- seq_printf(p, "%-25s %lld samples [%s]",
- hdr->lc_name, ctr.lc_count, hdr->lc_units);
-
- if ((hdr->lc_config & LPROCFS_CNTR_AVGMINMAX) &&
- (ctr.lc_count > 0)) {
- seq_printf(p, " %lld %lld %lld",
- ctr.lc_min, ctr.lc_max, ctr.lc_sum);
- if (hdr->lc_config & LPROCFS_CNTR_STDDEV)
- seq_printf(p, " %lld", ctr.lc_sumsquare);
- }
- seq_putc(p, '\n');
- }
-
- return 0;
-}
-
-static const struct seq_operations lprocfs_stats_seq_sops = {
- .start = lprocfs_stats_seq_start,
- .stop = lprocfs_stats_seq_stop,
- .next = lprocfs_stats_seq_next,
- .show = lprocfs_stats_seq_show,
-};
-
-static int lprocfs_stats_seq_open(struct inode *inode, struct file *file)
-{
- struct seq_file *seq;
- int rc;
-
- rc = seq_open(file, &lprocfs_stats_seq_sops);
- if (rc)
- return rc;
-
- seq = file->private_data;
- seq->private = inode->i_private;
-
- return 0;
-}
-
-const struct file_operations lprocfs_stats_seq_fops = {
- .owner = THIS_MODULE,
- .open = lprocfs_stats_seq_open,
- .read = seq_read,
- .write = lprocfs_stats_seq_write,
- .llseek = seq_lseek,
- .release = lprocfs_seq_release,
-};
-EXPORT_SYMBOL_GPL(lprocfs_stats_seq_fops);
-
-void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
- unsigned int conf, const char *name,
- const char *units)
-{
- struct lprocfs_counter_header *header;
- struct lprocfs_counter *percpu_cntr;
- unsigned long flags = 0;
- unsigned int i;
- unsigned int num_cpu;
-
- header = &stats->ls_cnt_header[index];
- LASSERTF(header, "Failed to allocate stats header:[%d]%s/%s\n",
- index, name, units);
-
- header->lc_config = conf;
- header->lc_name = name;
- header->lc_units = units;
-
- num_cpu = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags);
- for (i = 0; i < num_cpu; ++i) {
- if (!stats->ls_percpu[i])
- continue;
- percpu_cntr = lprocfs_stats_counter_get(stats, i, index);
- percpu_cntr->lc_count = 0;
- percpu_cntr->lc_min = LC_MIN_INIT;
- percpu_cntr->lc_max = 0;
- percpu_cntr->lc_sumsquare = 0;
- percpu_cntr->lc_sum = 0;
- if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
- percpu_cntr->lc_sum_irq = 0;
- }
- lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags);
-}
-EXPORT_SYMBOL(lprocfs_counter_init);
-
-int lprocfs_exp_cleanup(struct obd_export *exp)
-{
- return 0;
-}
-EXPORT_SYMBOL(lprocfs_exp_cleanup);
-
-__s64 lprocfs_read_helper(struct lprocfs_counter *lc,
- struct lprocfs_counter_header *header,
- enum lprocfs_stats_flags flags,
- enum lprocfs_fields_flags field)
-{
- __s64 ret = 0;
-
- if (!lc || !header)
- return 0;
-
- switch (field) {
- case LPROCFS_FIELDS_FLAGS_CONFIG:
- ret = header->lc_config;
- break;
- case LPROCFS_FIELDS_FLAGS_SUM:
- ret = lc->lc_sum;
- if ((flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
- ret += lc->lc_sum_irq;
- break;
- case LPROCFS_FIELDS_FLAGS_MIN:
- ret = lc->lc_min;
- break;
- case LPROCFS_FIELDS_FLAGS_MAX:
- ret = lc->lc_max;
- break;
- case LPROCFS_FIELDS_FLAGS_AVG:
- ret = (lc->lc_max - lc->lc_min) / 2;
- break;
- case LPROCFS_FIELDS_FLAGS_SUMSQUARE:
- ret = lc->lc_sumsquare;
- break;
- case LPROCFS_FIELDS_FLAGS_COUNT:
- ret = lc->lc_count;
- break;
- default:
- break;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(lprocfs_read_helper);
-
-int lprocfs_write_helper(const char __user *buffer, unsigned long count,
- int *val)
-{
- return lprocfs_write_frac_helper(buffer, count, val, 1);
-}
-EXPORT_SYMBOL(lprocfs_write_helper);
-
-int lprocfs_write_u64_helper(const char __user *buffer, unsigned long count,
- __u64 *val)
-{
- return lprocfs_write_frac_u64_helper(buffer, count, val, 1);
-}
-EXPORT_SYMBOL(lprocfs_write_u64_helper);
-
-int lprocfs_write_frac_u64_helper(const char __user *buffer,
- unsigned long count, __u64 *val, int mult)
-{
- char kernbuf[22], *end, *pbuf;
- __u64 whole, frac = 0, units;
- unsigned int frac_d = 1;
- int sign = 1;
-
- if (count > (sizeof(kernbuf) - 1))
- return -EINVAL;
-
- if (copy_from_user(kernbuf, buffer, count))
- return -EFAULT;
-
- kernbuf[count] = '\0';
- pbuf = kernbuf;
- if (*pbuf == '-') {
- sign = -1;
- pbuf++;
- }
-
- whole = simple_strtoull(pbuf, &end, 10);
- if (pbuf == end)
- return -EINVAL;
-
- if (*end == '.') {
- int i;
-
- pbuf = end + 1;
-
- /* need to limit frac_d to a __u32 */
- if (strlen(pbuf) > 10)
- pbuf[10] = '\0';
-
- frac = simple_strtoull(pbuf, &end, 10);
- /* count decimal places */
- for (i = 0; i < (end - pbuf); i++)
- frac_d *= 10;
- }
-
- units = 1;
- if (end) {
- switch (tolower(*end)) {
- case 'p':
- units <<= 10;
- /* fall through */
- case 't':
- units <<= 10;
- /* fall through */
- case 'g':
- units <<= 10;
- /* fall through */
- case 'm':
- units <<= 10;
- /* fall through */
- case 'k':
- units <<= 10;
- }
- }
- /* Specified units override the multiplier */
- if (units > 1)
- mult = units;
-
- frac *= mult;
- do_div(frac, frac_d);
- *val = sign * (whole * mult + frac);
- return 0;
-}
-EXPORT_SYMBOL(lprocfs_write_frac_u64_helper);
-
-static char *lprocfs_strnstr(const char *s1, const char *s2, size_t len)
-{
- size_t l2;
-
- l2 = strlen(s2);
- if (!l2)
- return (char *)s1;
- while (len >= l2) {
- len--;
- if (!memcmp(s1, s2, l2))
- return (char *)s1;
- s1++;
- }
- return NULL;
-}
-
-/**
- * Find the string \a name in the input \a buffer, and return a pointer to the
- * value immediately following \a name, reducing \a count appropriately.
- * If \a name is not found the original \a buffer is returned.
- */
-char *lprocfs_find_named_value(const char *buffer, const char *name,
- size_t *count)
-{
- char *val;
- size_t buflen = *count;
-
- /* there is no strnstr() in rhel5 and ubuntu kernels */
- val = lprocfs_strnstr(buffer, name, buflen);
- if (!val)
- return (char *)buffer;
-
- val += strlen(name); /* skip prefix */
- while (val < buffer + buflen && isspace(*val)) /* skip separator */
- val++;
-
- *count = 0;
- while (val < buffer + buflen && isalnum(*val)) {
- ++*count;
- ++val;
- }
-
- return val - *count;
-}
-EXPORT_SYMBOL(lprocfs_find_named_value);
-
-void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value)
-{
- if (value >= OBD_HIST_MAX)
- value = OBD_HIST_MAX - 1;
-
- spin_lock(&oh->oh_lock);
- oh->oh_buckets[value]++;
- spin_unlock(&oh->oh_lock);
-}
-EXPORT_SYMBOL(lprocfs_oh_tally);
-
-void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value)
-{
- unsigned int val = 0;
-
- if (likely(value != 0))
- val = min(fls(value - 1), OBD_HIST_MAX);
-
- lprocfs_oh_tally(oh, val);
-}
-EXPORT_SYMBOL(lprocfs_oh_tally_log2);
-
-unsigned long lprocfs_oh_sum(struct obd_histogram *oh)
-{
- unsigned long ret = 0;
- int i;
-
- for (i = 0; i < OBD_HIST_MAX; i++)
- ret += oh->oh_buckets[i];
- return ret;
-}
-EXPORT_SYMBOL(lprocfs_oh_sum);
-
-void lprocfs_oh_clear(struct obd_histogram *oh)
-{
- spin_lock(&oh->oh_lock);
- memset(oh->oh_buckets, 0, sizeof(oh->oh_buckets));
- spin_unlock(&oh->oh_lock);
-}
-EXPORT_SYMBOL(lprocfs_oh_clear);
-
-int lprocfs_wr_root_squash(const char __user *buffer, unsigned long count,
- struct root_squash_info *squash, char *name)
-{
- char kernbuf[64], *tmp, *errmsg;
- unsigned long uid, gid;
- int rc;
-
- if (count >= sizeof(kernbuf)) {
- errmsg = "string too long";
- rc = -EINVAL;
- goto failed_noprint;
- }
- if (copy_from_user(kernbuf, buffer, count)) {
- errmsg = "bad address";
- rc = -EFAULT;
- goto failed_noprint;
- }
- kernbuf[count] = '\0';
-
- /* look for uid gid separator */
- tmp = strchr(kernbuf, ':');
- if (!tmp) {
- errmsg = "needs uid:gid format";
- rc = -EINVAL;
- goto failed;
- }
- *tmp = '\0';
- tmp++;
-
- /* parse uid */
- if (kstrtoul(kernbuf, 0, &uid) != 0) {
- errmsg = "bad uid";
- rc = -EINVAL;
- goto failed;
- }
- /* parse gid */
- if (kstrtoul(tmp, 0, &gid) != 0) {
- errmsg = "bad gid";
- rc = -EINVAL;
- goto failed;
- }
-
- squash->rsi_uid = uid;
- squash->rsi_gid = gid;
-
- LCONSOLE_INFO("%s: root_squash is set to %u:%u\n",
- name, squash->rsi_uid, squash->rsi_gid);
- return count;
-
-failed:
- if (tmp) {
- tmp--;
- *tmp = ':';
- }
- CWARN("%s: failed to set root_squash to \"%s\", %s, rc = %d\n",
- name, kernbuf, errmsg, rc);
- return rc;
-failed_noprint:
- CWARN("%s: failed to set root_squash due to %s, rc = %d\n",
- name, errmsg, rc);
- return rc;
-}
-EXPORT_SYMBOL(lprocfs_wr_root_squash);
-
-int lprocfs_wr_nosquash_nids(const char __user *buffer, unsigned long count,
- struct root_squash_info *squash, char *name)
-{
- char *kernbuf = NULL, *errmsg;
- struct list_head tmp;
- int len = count;
- int rc;
-
- if (count > 4096) {
- errmsg = "string too long";
- rc = -EINVAL;
- goto failed;
- }
-
- kernbuf = kzalloc(count + 1, GFP_NOFS);
- if (!kernbuf) {
- errmsg = "no memory";
- rc = -ENOMEM;
- goto failed;
- }
-
- if (copy_from_user(kernbuf, buffer, count)) {
- errmsg = "bad address";
- rc = -EFAULT;
- goto failed;
- }
- kernbuf[count] = '\0';
-
- if (count > 0 && kernbuf[count - 1] == '\n')
- len = count - 1;
-
- if ((len == 4 && !strncmp(kernbuf, "NONE", len)) ||
- (len == 5 && !strncmp(kernbuf, "clear", len))) {
- /* empty string is special case */
- down_write(&squash->rsi_sem);
- if (!list_empty(&squash->rsi_nosquash_nids))
- cfs_free_nidlist(&squash->rsi_nosquash_nids);
- up_write(&squash->rsi_sem);
- LCONSOLE_INFO("%s: nosquash_nids is cleared\n", name);
- kfree(kernbuf);
- return count;
- }
-
- INIT_LIST_HEAD(&tmp);
- if (cfs_parse_nidlist(kernbuf, count, &tmp) <= 0) {
- errmsg = "can't parse";
- rc = -EINVAL;
- goto failed;
- }
- LCONSOLE_INFO("%s: nosquash_nids set to %s\n",
- name, kernbuf);
- kfree(kernbuf);
- kernbuf = NULL;
-
- down_write(&squash->rsi_sem);
- if (!list_empty(&squash->rsi_nosquash_nids))
- cfs_free_nidlist(&squash->rsi_nosquash_nids);
- list_splice(&tmp, &squash->rsi_nosquash_nids);
- up_write(&squash->rsi_sem);
-
- return count;
-
-failed:
- if (kernbuf) {
- CWARN("%s: failed to set nosquash_nids to \"%s\", %s rc = %d\n",
- name, kernbuf, errmsg, rc);
- kfree(kernbuf);
- kernbuf = NULL;
- } else {
- CWARN("%s: failed to set nosquash_nids due to %s rc = %d\n",
- name, errmsg, rc);
- }
- return rc;
-}
-EXPORT_SYMBOL(lprocfs_wr_nosquash_nids);
-
-static ssize_t lustre_attr_show(struct kobject *kobj,
- struct attribute *attr, char *buf)
-{
- struct lustre_attr *a = container_of(attr, struct lustre_attr, attr);
-
- return a->show ? a->show(kobj, attr, buf) : 0;
-}
-
-static ssize_t lustre_attr_store(struct kobject *kobj, struct attribute *attr,
- const char *buf, size_t len)
-{
- struct lustre_attr *a = container_of(attr, struct lustre_attr, attr);
-
- return a->store ? a->store(kobj, attr, buf, len) : len;
-}
-
-const struct sysfs_ops lustre_sysfs_ops = {
- .show = lustre_attr_show,
- .store = lustre_attr_store,
-};
-EXPORT_SYMBOL_GPL(lustre_sysfs_ops);
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
deleted file mode 100644
index aa9d74e087f4..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ /dev/null
@@ -1,2056 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/obdclass/lu_object.c
- *
- * Lustre Object.
- * These are the only exported functions, they provide some generic
- * infrastructure for managing object devices
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_CLASS
-
-#include <linux/module.h>
-
-/* hash_long() */
-#include <linux/libcfs/libcfs_hash.h>
-#include <obd_class.h>
-#include <obd_support.h>
-#include <lustre_disk.h>
-#include <lustre_fid.h>
-#include <lu_object.h>
-#include <cl_object.h>
-#include <lu_ref.h>
-#include <linux/list.h>
-
-struct lu_site_bkt_data {
- /**
- * LRU list, updated on each access to object. Protected by
- * bucket lock of lu_site::ls_obj_hash.
- *
- * "Cold" end of LRU is lu_site::ls_lru.next. Accessed object are
- * moved to the lu_site::ls_lru.prev (this is due to the non-existence
- * of list_for_each_entry_safe_reverse()).
- */
- struct list_head lsb_lru;
- /**
- * Wait-queue signaled when an object in this site is ultimately
- * destroyed (lu_object_free()). It is used by lu_object_find() to
- * wait before re-trying when object in the process of destruction is
- * found in the hash table.
- *
- * \see htable_lookup().
- */
- wait_queue_head_t lsb_marche_funebre;
-};
-
-enum {
- LU_CACHE_PERCENT_MAX = 50,
- LU_CACHE_PERCENT_DEFAULT = 20
-};
-
-#define LU_CACHE_NR_MAX_ADJUST 512
-#define LU_CACHE_NR_UNLIMITED -1
-#define LU_CACHE_NR_DEFAULT LU_CACHE_NR_UNLIMITED
-#define LU_CACHE_NR_LDISKFS_LIMIT LU_CACHE_NR_UNLIMITED
-#define LU_CACHE_NR_ZFS_LIMIT 256
-
-#define LU_SITE_BITS_MIN 12
-#define LU_SITE_BITS_MAX 24
-#define LU_SITE_BITS_MAX_CL 19
-/**
- * total 256 buckets, we don't want too many buckets because:
- * - consume too much memory
- * - avoid unbalanced LRU list
- */
-#define LU_SITE_BKT_BITS 8
-
-static unsigned int lu_cache_percent = LU_CACHE_PERCENT_DEFAULT;
-module_param(lu_cache_percent, int, 0644);
-MODULE_PARM_DESC(lu_cache_percent, "Percentage of memory to be used as lu_object cache");
-
-static long lu_cache_nr = LU_CACHE_NR_DEFAULT;
-module_param(lu_cache_nr, long, 0644);
-MODULE_PARM_DESC(lu_cache_nr, "Maximum number of objects in lu_object cache");
-
-static void lu_object_free(const struct lu_env *env, struct lu_object *o);
-static __u32 ls_stats_read(struct lprocfs_stats *stats, int idx);
-
-wait_queue_head_t *
-lu_site_wq_from_fid(struct lu_site *site, struct lu_fid *fid)
-{
- struct cfs_hash_bd bd;
- struct lu_site_bkt_data *bkt;
-
- cfs_hash_bd_get(site->ls_obj_hash, fid, &bd);
- bkt = cfs_hash_bd_extra_get(site->ls_obj_hash, &bd);
- return &bkt->lsb_marche_funebre;
-}
-EXPORT_SYMBOL(lu_site_wq_from_fid);
-
-/**
- * Decrease reference counter on object. If last reference is freed, return
- * object to the cache, unless lu_object_is_dying(o) holds. In the latter
- * case, free object immediately.
- */
-void lu_object_put(const struct lu_env *env, struct lu_object *o)
-{
- struct lu_site_bkt_data *bkt;
- struct lu_object_header *top;
- struct lu_site *site;
- struct lu_object *orig;
- struct cfs_hash_bd bd;
- const struct lu_fid *fid;
-
- top = o->lo_header;
- site = o->lo_dev->ld_site;
- orig = o;
-
- /*
- * till we have full fids-on-OST implemented anonymous objects
- * are possible in OSP. such an object isn't listed in the site
- * so we should not remove it from the site.
- */
- fid = lu_object_fid(o);
- if (fid_is_zero(fid)) {
- LASSERT(!top->loh_hash.next && !top->loh_hash.pprev);
- LASSERT(list_empty(&top->loh_lru));
- if (!atomic_dec_and_test(&top->loh_ref))
- return;
- list_for_each_entry_reverse(o, &top->loh_layers, lo_linkage) {
- if (o->lo_ops->loo_object_release)
- o->lo_ops->loo_object_release(env, o);
- }
- lu_object_free(env, orig);
- return;
- }
-
- cfs_hash_bd_get(site->ls_obj_hash, &top->loh_fid, &bd);
- bkt = cfs_hash_bd_extra_get(site->ls_obj_hash, &bd);
-
- if (!cfs_hash_bd_dec_and_lock(site->ls_obj_hash, &bd, &top->loh_ref)) {
- if (lu_object_is_dying(top)) {
- /*
- * somebody may be waiting for this, currently only
- * used for cl_object, see cl_object_put_last().
- */
- wake_up_all(&bkt->lsb_marche_funebre);
- }
- return;
- }
-
- /*
- * When last reference is released, iterate over object
- * layers, and notify them that object is no longer busy.
- */
- list_for_each_entry_reverse(o, &top->loh_layers, lo_linkage) {
- if (o->lo_ops->loo_object_release)
- o->lo_ops->loo_object_release(env, o);
- }
-
- if (!lu_object_is_dying(top)) {
- LASSERT(list_empty(&top->loh_lru));
- list_add_tail(&top->loh_lru, &bkt->lsb_lru);
- percpu_counter_inc(&site->ls_lru_len_counter);
- CDEBUG(D_INODE, "Add %p to site lru. hash: %p, bkt: %p\n",
- o, site->ls_obj_hash, bkt);
- cfs_hash_bd_unlock(site->ls_obj_hash, &bd, 1);
- return;
- }
-
- /*
- * If object is dying (will not be cached), then removed it
- * from hash table and LRU.
- *
- * This is done with hash table and LRU lists locked. As the only
- * way to acquire first reference to previously unreferenced
- * object is through hash-table lookup (lu_object_find()),
- * or LRU scanning (lu_site_purge()), that are done under hash-table
- * and LRU lock, no race with concurrent object lookup is possible
- * and we can safely destroy object below.
- */
- if (!test_and_set_bit(LU_OBJECT_UNHASHED, &top->loh_flags))
- cfs_hash_bd_del_locked(site->ls_obj_hash, &bd, &top->loh_hash);
- cfs_hash_bd_unlock(site->ls_obj_hash, &bd, 1);
- /*
- * Object was already removed from hash and lru above, can
- * kill it.
- */
- lu_object_free(env, orig);
-}
-EXPORT_SYMBOL(lu_object_put);
-
-/**
- * Kill the object and take it out of LRU cache.
- * Currently used by client code for layout change.
- */
-void lu_object_unhash(const struct lu_env *env, struct lu_object *o)
-{
- struct lu_object_header *top;
-
- top = o->lo_header;
- set_bit(LU_OBJECT_HEARD_BANSHEE, &top->loh_flags);
- if (!test_and_set_bit(LU_OBJECT_UNHASHED, &top->loh_flags)) {
- struct lu_site *site = o->lo_dev->ld_site;
- struct cfs_hash *obj_hash = site->ls_obj_hash;
- struct cfs_hash_bd bd;
-
- cfs_hash_bd_get_and_lock(obj_hash, &top->loh_fid, &bd, 1);
- if (!list_empty(&top->loh_lru)) {
- struct lu_site_bkt_data *bkt;
-
- list_del_init(&top->loh_lru);
- bkt = cfs_hash_bd_extra_get(obj_hash, &bd);
- percpu_counter_dec(&site->ls_lru_len_counter);
- }
- cfs_hash_bd_del_locked(obj_hash, &bd, &top->loh_hash);
- cfs_hash_bd_unlock(obj_hash, &bd, 1);
- }
-}
-EXPORT_SYMBOL(lu_object_unhash);
-
-/**
- * Allocate new object.
- *
- * This follows object creation protocol, described in the comment within
- * struct lu_device_operations definition.
- */
-static struct lu_object *lu_object_alloc(const struct lu_env *env,
- struct lu_device *dev,
- const struct lu_fid *f,
- const struct lu_object_conf *conf)
-{
- struct lu_object *scan;
- struct lu_object *top;
- struct list_head *layers;
- unsigned int init_mask = 0;
- unsigned int init_flag;
- int clean;
- int result;
-
- /*
- * Create top-level object slice. This will also create
- * lu_object_header.
- */
- top = dev->ld_ops->ldo_object_alloc(env, NULL, dev);
- if (!top)
- return ERR_PTR(-ENOMEM);
- if (IS_ERR(top))
- return top;
- /*
- * This is the only place where object fid is assigned. It's constant
- * after this point.
- */
- top->lo_header->loh_fid = *f;
- layers = &top->lo_header->loh_layers;
-
- do {
- /*
- * Call ->loo_object_init() repeatedly, until no more new
- * object slices are created.
- */
- clean = 1;
- init_flag = 1;
- list_for_each_entry(scan, layers, lo_linkage) {
- if (init_mask & init_flag)
- goto next;
- clean = 0;
- scan->lo_header = top->lo_header;
- result = scan->lo_ops->loo_object_init(env, scan, conf);
- if (result != 0) {
- lu_object_free(env, top);
- return ERR_PTR(result);
- }
- init_mask |= init_flag;
-next:
- init_flag <<= 1;
- }
- } while (!clean);
-
- list_for_each_entry_reverse(scan, layers, lo_linkage) {
- if (scan->lo_ops->loo_object_start) {
- result = scan->lo_ops->loo_object_start(env, scan);
- if (result != 0) {
- lu_object_free(env, top);
- return ERR_PTR(result);
- }
- }
- }
-
- lprocfs_counter_incr(dev->ld_site->ls_stats, LU_SS_CREATED);
- return top;
-}
-
-/**
- * Free an object.
- */
-static void lu_object_free(const struct lu_env *env, struct lu_object *o)
-{
- wait_queue_head_t *wq;
- struct lu_site *site;
- struct lu_object *scan;
- struct list_head *layers;
- struct list_head splice;
-
- site = o->lo_dev->ld_site;
- layers = &o->lo_header->loh_layers;
- wq = lu_site_wq_from_fid(site, &o->lo_header->loh_fid);
- /*
- * First call ->loo_object_delete() method to release all resources.
- */
- list_for_each_entry_reverse(scan, layers, lo_linkage) {
- if (scan->lo_ops->loo_object_delete)
- scan->lo_ops->loo_object_delete(env, scan);
- }
-
- /*
- * Then, splice object layers into stand-alone list, and call
- * ->loo_object_free() on all layers to free memory. Splice is
- * necessary, because lu_object_header is freed together with the
- * top-level slice.
- */
- INIT_LIST_HEAD(&splice);
- list_splice_init(layers, &splice);
- while (!list_empty(&splice)) {
- /*
- * Free layers in bottom-to-top order, so that object header
- * lives as long as possible and ->loo_object_free() methods
- * can look at its contents.
- */
- o = container_of(splice.prev, struct lu_object, lo_linkage);
- list_del_init(&o->lo_linkage);
- o->lo_ops->loo_object_free(env, o);
- }
-
- if (waitqueue_active(wq))
- wake_up_all(wq);
-}
-
-/**
- * Free \a nr objects from the cold end of the site LRU list.
- * if canblock is false, then don't block awaiting for another
- * instance of lu_site_purge() to complete
- */
-int lu_site_purge_objects(const struct lu_env *env, struct lu_site *s,
- int nr, bool canblock)
-{
- struct lu_object_header *h;
- struct lu_object_header *temp;
- struct lu_site_bkt_data *bkt;
- struct cfs_hash_bd bd;
- struct cfs_hash_bd bd2;
- struct list_head dispose;
- int did_sth;
- unsigned int start = 0;
- int count;
- int bnr;
- unsigned int i;
-
- if (OBD_FAIL_CHECK(OBD_FAIL_OBD_NO_LRU))
- return 0;
-
- INIT_LIST_HEAD(&dispose);
- /*
- * Under LRU list lock, scan LRU list and move unreferenced objects to
- * the dispose list, removing them from LRU and hash table.
- */
- if (nr != ~0)
- start = s->ls_purge_start;
- bnr = (nr == ~0) ? -1 : nr / (int)CFS_HASH_NBKT(s->ls_obj_hash) + 1;
- again:
- /*
- * It doesn't make any sense to make purge threads parallel, that can
- * only bring troubles to us. See LU-5331.
- */
- if (canblock)
- mutex_lock(&s->ls_purge_mutex);
- else if (!mutex_trylock(&s->ls_purge_mutex))
- goto out;
-
- did_sth = 0;
- cfs_hash_for_each_bucket(s->ls_obj_hash, &bd, i) {
- if (i < start)
- continue;
- count = bnr;
- cfs_hash_bd_lock(s->ls_obj_hash, &bd, 1);
- bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, &bd);
-
- list_for_each_entry_safe(h, temp, &bkt->lsb_lru, loh_lru) {
- LASSERT(atomic_read(&h->loh_ref) == 0);
-
- cfs_hash_bd_get(s->ls_obj_hash, &h->loh_fid, &bd2);
- LASSERT(bd.bd_bucket == bd2.bd_bucket);
-
- cfs_hash_bd_del_locked(s->ls_obj_hash,
- &bd2, &h->loh_hash);
- list_move(&h->loh_lru, &dispose);
- percpu_counter_dec(&s->ls_lru_len_counter);
- if (did_sth == 0)
- did_sth = 1;
-
- if (nr != ~0 && --nr == 0)
- break;
-
- if (count > 0 && --count == 0)
- break;
- }
- cfs_hash_bd_unlock(s->ls_obj_hash, &bd, 1);
- cond_resched();
- /*
- * Free everything on the dispose list. This is safe against
- * races due to the reasons described in lu_object_put().
- */
- while (!list_empty(&dispose)) {
- h = container_of(dispose.next,
- struct lu_object_header, loh_lru);
- list_del_init(&h->loh_lru);
- lu_object_free(env, lu_object_top(h));
- lprocfs_counter_incr(s->ls_stats, LU_SS_LRU_PURGED);
- }
-
- if (nr == 0)
- break;
- }
- mutex_unlock(&s->ls_purge_mutex);
-
- if (nr != 0 && did_sth && start != 0) {
- start = 0; /* restart from the first bucket */
- goto again;
- }
- /* race on s->ls_purge_start, but nobody cares */
- s->ls_purge_start = i % CFS_HASH_NBKT(s->ls_obj_hash);
-out:
- return nr;
-}
-EXPORT_SYMBOL(lu_site_purge_objects);
-
-/*
- * Object printing.
- *
- * Code below has to jump through certain loops to output object description
- * into libcfs_debug_msg-based log. The problem is that lu_object_print()
- * composes object description from strings that are parts of _lines_ of
- * output (i.e., strings that are not terminated by newline). This doesn't fit
- * very well into libcfs_debug_msg() interface that assumes that each message
- * supplied to it is a self-contained output line.
- *
- * To work around this, strings are collected in a temporary buffer
- * (implemented as a value of lu_cdebug_key key), until terminating newline
- * character is detected.
- *
- */
-
-enum {
- /**
- * Maximal line size.
- *
- * XXX overflow is not handled correctly.
- */
- LU_CDEBUG_LINE = 512
-};
-
-struct lu_cdebug_data {
- /**
- * Temporary buffer.
- */
- char lck_area[LU_CDEBUG_LINE];
-};
-
-/* context key constructor/destructor: lu_global_key_init, lu_global_key_fini */
-LU_KEY_INIT_FINI(lu_global, struct lu_cdebug_data);
-
-/**
- * Key, holding temporary buffer. This key is registered very early by
- * lu_global_init().
- */
-static struct lu_context_key lu_global_key = {
- .lct_tags = LCT_MD_THREAD | LCT_DT_THREAD |
- LCT_MG_THREAD | LCT_CL_THREAD | LCT_LOCAL,
- .lct_init = lu_global_key_init,
- .lct_fini = lu_global_key_fini
-};
-
-/**
- * Printer function emitting messages through libcfs_debug_msg().
- */
-int lu_cdebug_printer(const struct lu_env *env,
- void *cookie, const char *format, ...)
-{
- struct libcfs_debug_msg_data *msgdata = cookie;
- struct lu_cdebug_data *key;
- int used;
- int complete;
- va_list args;
-
- va_start(args, format);
-
- key = lu_context_key_get(&env->le_ctx, &lu_global_key);
-
- used = strlen(key->lck_area);
- complete = format[strlen(format) - 1] == '\n';
- /*
- * Append new chunk to the buffer.
- */
- vsnprintf(key->lck_area + used,
- ARRAY_SIZE(key->lck_area) - used, format, args);
- if (complete) {
- if (cfs_cdebug_show(msgdata->msg_mask, msgdata->msg_subsys))
- libcfs_debug_msg(msgdata, "%s\n", key->lck_area);
- key->lck_area[0] = 0;
- }
- va_end(args);
- return 0;
-}
-EXPORT_SYMBOL(lu_cdebug_printer);
-
-/**
- * Print object header.
- */
-void lu_object_header_print(const struct lu_env *env, void *cookie,
- lu_printer_t printer,
- const struct lu_object_header *hdr)
-{
- (*printer)(env, cookie, "header@%p[%#lx, %d, " DFID "%s%s%s]",
- hdr, hdr->loh_flags, atomic_read(&hdr->loh_ref),
- PFID(&hdr->loh_fid),
- hlist_unhashed(&hdr->loh_hash) ? "" : " hash",
- list_empty((struct list_head *)&hdr->loh_lru) ? \
- "" : " lru",
- hdr->loh_attr & LOHA_EXISTS ? " exist":"");
-}
-EXPORT_SYMBOL(lu_object_header_print);
-
-/**
- * Print human readable representation of the \a o to the \a printer.
- */
-void lu_object_print(const struct lu_env *env, void *cookie,
- lu_printer_t printer, const struct lu_object *o)
-{
- static const char ruler[] = "........................................";
- struct lu_object_header *top;
- int depth = 4;
-
- top = o->lo_header;
- lu_object_header_print(env, cookie, printer, top);
- (*printer)(env, cookie, "{\n");
-
- list_for_each_entry(o, &top->loh_layers, lo_linkage) {
- /*
- * print `.' \a depth times followed by type name and address
- */
- (*printer)(env, cookie, "%*.*s%s@%p", depth, depth, ruler,
- o->lo_dev->ld_type->ldt_name, o);
-
- if (o->lo_ops->loo_object_print)
- (*o->lo_ops->loo_object_print)(env, cookie, printer, o);
-
- (*printer)(env, cookie, "\n");
- }
-
- (*printer)(env, cookie, "} header@%p\n", top);
-}
-EXPORT_SYMBOL(lu_object_print);
-
-/*
- * NOTE: htable_lookup() is called with the relevant
- * hash bucket locked, but might drop and re-acquire the lock.
- */
-static struct lu_object *htable_lookup(struct lu_site *s,
- struct cfs_hash_bd *bd,
- const struct lu_fid *f,
- __u64 *version)
-{
- struct lu_site_bkt_data *bkt;
- struct lu_object_header *h;
- struct hlist_node *hnode;
- u64 ver = cfs_hash_bd_version_get(bd);
-
- if (*version == ver)
- return ERR_PTR(-ENOENT);
-
- *version = ver;
- bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, bd);
- /* cfs_hash_bd_peek_locked is a somehow "internal" function
- * of cfs_hash, it doesn't add refcount on object.
- */
- hnode = cfs_hash_bd_peek_locked(s->ls_obj_hash, bd, (void *)f);
- if (!hnode) {
- lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_MISS);
- return ERR_PTR(-ENOENT);
- }
-
- h = container_of(hnode, struct lu_object_header, loh_hash);
- cfs_hash_get(s->ls_obj_hash, hnode);
- lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_HIT);
- if (!list_empty(&h->loh_lru)) {
- list_del_init(&h->loh_lru);
- percpu_counter_dec(&s->ls_lru_len_counter);
- }
- return lu_object_top(h);
-}
-
-/**
- * Search cache for an object with the fid \a f. If such object is found,
- * return it. Otherwise, create new object, insert it into cache and return
- * it. In any case, additional reference is acquired on the returned object.
- */
-static struct lu_object *lu_object_find(const struct lu_env *env,
- struct lu_device *dev,
- const struct lu_fid *f,
- const struct lu_object_conf *conf)
-{
- return lu_object_find_at(env, dev->ld_site->ls_top_dev, f, conf);
-}
-
-/*
- * Limit the lu_object cache to a maximum of lu_cache_nr objects. Because
- * the calculation for the number of objects to reclaim is not covered by
- * a lock the maximum number of objects is capped by LU_CACHE_MAX_ADJUST.
- * This ensures that many concurrent threads will not accidentally purge
- * the entire cache.
- */
-static void lu_object_limit(const struct lu_env *env, struct lu_device *dev)
-{
- __u64 size, nr;
-
- if (lu_cache_nr == LU_CACHE_NR_UNLIMITED)
- return;
-
- size = cfs_hash_size_get(dev->ld_site->ls_obj_hash);
- nr = (__u64)lu_cache_nr;
- if (size <= nr)
- return;
-
- lu_site_purge_objects(env, dev->ld_site,
- min_t(__u64, size - nr, LU_CACHE_NR_MAX_ADJUST),
- false);
-}
-
-/**
- * Core logic of lu_object_find*() functions.
- *
- * Much like lu_object_find(), but top level device of object is specifically
- * \a dev rather than top level device of the site. This interface allows
- * objects of different "stacking" to be created within the same site.
- */
-struct lu_object *lu_object_find_at(const struct lu_env *env,
- struct lu_device *dev,
- const struct lu_fid *f,
- const struct lu_object_conf *conf)
-{
- struct lu_object *o;
- struct lu_object *shadow;
- struct lu_site *s;
- struct cfs_hash *hs;
- struct cfs_hash_bd bd;
- __u64 version = 0;
-
- /*
- * This uses standard index maintenance protocol:
- *
- * - search index under lock, and return object if found;
- * - otherwise, unlock index, allocate new object;
- * - lock index and search again;
- * - if nothing is found (usual case), insert newly created
- * object into index;
- * - otherwise (race: other thread inserted object), free
- * object just allocated.
- * - unlock index;
- * - return object.
- *
- * For "LOC_F_NEW" case, we are sure the object is new established.
- * It is unnecessary to perform lookup-alloc-lookup-insert, instead,
- * just alloc and insert directly.
- *
- */
- s = dev->ld_site;
- hs = s->ls_obj_hash;
-
- cfs_hash_bd_get(hs, f, &bd);
- if (!(conf && conf->loc_flags & LOC_F_NEW)) {
- cfs_hash_bd_lock(hs, &bd, 1);
- o = htable_lookup(s, &bd, f, &version);
- cfs_hash_bd_unlock(hs, &bd, 1);
-
- if (!IS_ERR(o) || PTR_ERR(o) != -ENOENT)
- return o;
- }
- /*
- * Allocate new object. This may result in rather complicated
- * operations, including fld queries, inode loading, etc.
- */
- o = lu_object_alloc(env, dev, f, conf);
- if (IS_ERR(o))
- return o;
-
- LASSERT(lu_fid_eq(lu_object_fid(o), f));
-
- cfs_hash_bd_lock(hs, &bd, 1);
-
- if (conf && conf->loc_flags & LOC_F_NEW)
- shadow = ERR_PTR(-ENOENT);
- else
- shadow = htable_lookup(s, &bd, f, &version);
- if (likely(PTR_ERR(shadow) == -ENOENT)) {
- cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
- cfs_hash_bd_unlock(hs, &bd, 1);
-
- lu_object_limit(env, dev);
-
- return o;
- }
-
- lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_RACE);
- cfs_hash_bd_unlock(hs, &bd, 1);
- lu_object_free(env, o);
- return shadow;
-}
-EXPORT_SYMBOL(lu_object_find_at);
-
-/**
- * Find object with given fid, and return its slice belonging to given device.
- */
-struct lu_object *lu_object_find_slice(const struct lu_env *env,
- struct lu_device *dev,
- const struct lu_fid *f,
- const struct lu_object_conf *conf)
-{
- struct lu_object *top;
- struct lu_object *obj;
-
- top = lu_object_find(env, dev, f, conf);
- if (IS_ERR(top))
- return top;
-
- obj = lu_object_locate(top->lo_header, dev->ld_type);
- if (unlikely(!obj)) {
- lu_object_put(env, top);
- obj = ERR_PTR(-ENOENT);
- }
-
- return obj;
-}
-EXPORT_SYMBOL(lu_object_find_slice);
-
-/**
- * Global list of all device types.
- */
-static LIST_HEAD(lu_device_types);
-
-int lu_device_type_init(struct lu_device_type *ldt)
-{
- int result = 0;
-
- atomic_set(&ldt->ldt_device_nr, 0);
- INIT_LIST_HEAD(&ldt->ldt_linkage);
- if (ldt->ldt_ops->ldto_init)
- result = ldt->ldt_ops->ldto_init(ldt);
-
- if (!result) {
- spin_lock(&obd_types_lock);
- list_add(&ldt->ldt_linkage, &lu_device_types);
- spin_unlock(&obd_types_lock);
- }
-
- return result;
-}
-EXPORT_SYMBOL(lu_device_type_init);
-
-void lu_device_type_fini(struct lu_device_type *ldt)
-{
- spin_lock(&obd_types_lock);
- list_del_init(&ldt->ldt_linkage);
- spin_unlock(&obd_types_lock);
- if (ldt->ldt_ops->ldto_fini)
- ldt->ldt_ops->ldto_fini(ldt);
-}
-EXPORT_SYMBOL(lu_device_type_fini);
-
-/**
- * Global list of all sites on this node
- */
-static LIST_HEAD(lu_sites);
-static DECLARE_RWSEM(lu_sites_guard);
-
-/**
- * Global environment used by site shrinker.
- */
-static struct lu_env lu_shrink_env;
-
-struct lu_site_print_arg {
- struct lu_env *lsp_env;
- void *lsp_cookie;
- lu_printer_t lsp_printer;
-};
-
-static int
-lu_site_obj_print(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode, void *data)
-{
- struct lu_site_print_arg *arg = (struct lu_site_print_arg *)data;
- struct lu_object_header *h;
-
- h = hlist_entry(hnode, struct lu_object_header, loh_hash);
- if (!list_empty(&h->loh_layers)) {
- const struct lu_object *o;
-
- o = lu_object_top(h);
- lu_object_print(arg->lsp_env, arg->lsp_cookie,
- arg->lsp_printer, o);
- } else {
- lu_object_header_print(arg->lsp_env, arg->lsp_cookie,
- arg->lsp_printer, h);
- }
- return 0;
-}
-
-/**
- * Print all objects in \a s.
- */
-void lu_site_print(const struct lu_env *env, struct lu_site *s, void *cookie,
- lu_printer_t printer)
-{
- struct lu_site_print_arg arg = {
- .lsp_env = (struct lu_env *)env,
- .lsp_cookie = cookie,
- .lsp_printer = printer,
- };
-
- cfs_hash_for_each(s->ls_obj_hash, lu_site_obj_print, &arg);
-}
-EXPORT_SYMBOL(lu_site_print);
-
-/**
- * Return desired hash table order.
- */
-static unsigned long lu_htable_order(struct lu_device *top)
-{
- unsigned long bits_max = LU_SITE_BITS_MAX;
- unsigned long cache_size;
- unsigned long bits;
-
- if (!strcmp(top->ld_type->ldt_name, LUSTRE_VVP_NAME))
- bits_max = LU_SITE_BITS_MAX_CL;
-
- /*
- * Calculate hash table size, assuming that we want reasonable
- * performance when 20% of total memory is occupied by cache of
- * lu_objects.
- *
- * Size of lu_object is (arbitrary) taken as 1K (together with inode).
- */
- cache_size = totalram_pages;
-
-#if BITS_PER_LONG == 32
- /* limit hashtable size for lowmem systems to low RAM */
- if (cache_size > 1 << (30 - PAGE_SHIFT))
- cache_size = 1 << (30 - PAGE_SHIFT) * 3 / 4;
-#endif
-
- /* clear off unreasonable cache setting. */
- if (lu_cache_percent == 0 || lu_cache_percent > LU_CACHE_PERCENT_MAX) {
- CWARN("obdclass: invalid lu_cache_percent: %u, it must be in the range of (0, %u]. Will use default value: %u.\n",
- lu_cache_percent, LU_CACHE_PERCENT_MAX,
- LU_CACHE_PERCENT_DEFAULT);
-
- lu_cache_percent = LU_CACHE_PERCENT_DEFAULT;
- }
- cache_size = cache_size / 100 * lu_cache_percent *
- (PAGE_SIZE / 1024);
-
- for (bits = 1; (1 << bits) < cache_size; ++bits)
- ;
- return clamp_t(typeof(bits), bits, LU_SITE_BITS_MIN, bits_max);
-}
-
-static unsigned int lu_obj_hop_hash(struct cfs_hash *hs,
- const void *key, unsigned int mask)
-{
- struct lu_fid *fid = (struct lu_fid *)key;
- __u32 hash;
-
- hash = fid_flatten32(fid);
- hash += (hash >> 4) + (hash << 12); /* mixing oid and seq */
- hash = hash_long(hash, hs->hs_bkt_bits);
-
- /* give me another random factor */
- hash -= hash_long((unsigned long)hs, fid_oid(fid) % 11 + 3);
-
- hash <<= hs->hs_cur_bits - hs->hs_bkt_bits;
- hash |= (fid_seq(fid) + fid_oid(fid)) & (CFS_HASH_NBKT(hs) - 1);
-
- return hash & mask;
-}
-
-static void *lu_obj_hop_object(struct hlist_node *hnode)
-{
- return hlist_entry(hnode, struct lu_object_header, loh_hash);
-}
-
-static void *lu_obj_hop_key(struct hlist_node *hnode)
-{
- struct lu_object_header *h;
-
- h = hlist_entry(hnode, struct lu_object_header, loh_hash);
- return &h->loh_fid;
-}
-
-static int lu_obj_hop_keycmp(const void *key, struct hlist_node *hnode)
-{
- struct lu_object_header *h;
-
- h = hlist_entry(hnode, struct lu_object_header, loh_hash);
- return lu_fid_eq(&h->loh_fid, (struct lu_fid *)key);
-}
-
-static void lu_obj_hop_get(struct cfs_hash *hs, struct hlist_node *hnode)
-{
- struct lu_object_header *h;
-
- h = hlist_entry(hnode, struct lu_object_header, loh_hash);
- atomic_inc(&h->loh_ref);
-}
-
-static void lu_obj_hop_put_locked(struct cfs_hash *hs, struct hlist_node *hnode)
-{
- LBUG(); /* we should never called it */
-}
-
-static struct cfs_hash_ops lu_site_hash_ops = {
- .hs_hash = lu_obj_hop_hash,
- .hs_key = lu_obj_hop_key,
- .hs_keycmp = lu_obj_hop_keycmp,
- .hs_object = lu_obj_hop_object,
- .hs_get = lu_obj_hop_get,
- .hs_put_locked = lu_obj_hop_put_locked,
-};
-
-static void lu_dev_add_linkage(struct lu_site *s, struct lu_device *d)
-{
- spin_lock(&s->ls_ld_lock);
- if (list_empty(&d->ld_linkage))
- list_add(&d->ld_linkage, &s->ls_ld_linkage);
- spin_unlock(&s->ls_ld_lock);
-}
-
-/**
- * Initialize site \a s, with \a d as the top level device.
- */
-int lu_site_init(struct lu_site *s, struct lu_device *top)
-{
- struct lu_site_bkt_data *bkt;
- struct cfs_hash_bd bd;
- unsigned long bits;
- unsigned long i;
- char name[16];
- int rc;
-
- memset(s, 0, sizeof(*s));
- mutex_init(&s->ls_purge_mutex);
-
- rc = percpu_counter_init(&s->ls_lru_len_counter, 0, GFP_NOFS);
- if (rc)
- return -ENOMEM;
-
- snprintf(name, sizeof(name), "lu_site_%s", top->ld_type->ldt_name);
- for (bits = lu_htable_order(top); bits >= LU_SITE_BITS_MIN; bits--) {
- s->ls_obj_hash = cfs_hash_create(name, bits, bits,
- bits - LU_SITE_BKT_BITS,
- sizeof(*bkt), 0, 0,
- &lu_site_hash_ops,
- CFS_HASH_SPIN_BKTLOCK |
- CFS_HASH_NO_ITEMREF |
- CFS_HASH_DEPTH |
- CFS_HASH_ASSERT_EMPTY |
- CFS_HASH_COUNTER);
- if (s->ls_obj_hash)
- break;
- }
-
- if (!s->ls_obj_hash) {
- CERROR("failed to create lu_site hash with bits: %lu\n", bits);
- return -ENOMEM;
- }
-
- cfs_hash_for_each_bucket(s->ls_obj_hash, &bd, i) {
- bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, &bd);
- INIT_LIST_HEAD(&bkt->lsb_lru);
- init_waitqueue_head(&bkt->lsb_marche_funebre);
- }
-
- s->ls_stats = lprocfs_alloc_stats(LU_SS_LAST_STAT, 0);
- if (!s->ls_stats) {
- cfs_hash_putref(s->ls_obj_hash);
- s->ls_obj_hash = NULL;
- return -ENOMEM;
- }
-
- lprocfs_counter_init(s->ls_stats, LU_SS_CREATED,
- 0, "created", "created");
- lprocfs_counter_init(s->ls_stats, LU_SS_CACHE_HIT,
- 0, "cache_hit", "cache_hit");
- lprocfs_counter_init(s->ls_stats, LU_SS_CACHE_MISS,
- 0, "cache_miss", "cache_miss");
- lprocfs_counter_init(s->ls_stats, LU_SS_CACHE_RACE,
- 0, "cache_race", "cache_race");
- lprocfs_counter_init(s->ls_stats, LU_SS_CACHE_DEATH_RACE,
- 0, "cache_death_race", "cache_death_race");
- lprocfs_counter_init(s->ls_stats, LU_SS_LRU_PURGED,
- 0, "lru_purged", "lru_purged");
-
- INIT_LIST_HEAD(&s->ls_linkage);
- s->ls_top_dev = top;
- top->ld_site = s;
- lu_device_get(top);
- lu_ref_add(&top->ld_reference, "site-top", s);
-
- INIT_LIST_HEAD(&s->ls_ld_linkage);
- spin_lock_init(&s->ls_ld_lock);
-
- lu_dev_add_linkage(s, top);
-
- return 0;
-}
-EXPORT_SYMBOL(lu_site_init);
-
-/**
- * Finalize \a s and release its resources.
- */
-void lu_site_fini(struct lu_site *s)
-{
- down_write(&lu_sites_guard);
- list_del_init(&s->ls_linkage);
- up_write(&lu_sites_guard);
-
- percpu_counter_destroy(&s->ls_lru_len_counter);
-
- if (s->ls_obj_hash) {
- cfs_hash_putref(s->ls_obj_hash);
- s->ls_obj_hash = NULL;
- }
-
- if (s->ls_top_dev) {
- s->ls_top_dev->ld_site = NULL;
- lu_ref_del(&s->ls_top_dev->ld_reference, "site-top", s);
- lu_device_put(s->ls_top_dev);
- s->ls_top_dev = NULL;
- }
-
- if (s->ls_stats)
- lprocfs_free_stats(&s->ls_stats);
-}
-EXPORT_SYMBOL(lu_site_fini);
-
-/**
- * Called when initialization of stack for this site is completed.
- */
-int lu_site_init_finish(struct lu_site *s)
-{
- int result;
-
- down_write(&lu_sites_guard);
- result = lu_context_refill(&lu_shrink_env.le_ctx);
- if (result == 0)
- list_add(&s->ls_linkage, &lu_sites);
- up_write(&lu_sites_guard);
- return result;
-}
-EXPORT_SYMBOL(lu_site_init_finish);
-
-/**
- * Acquire additional reference on device \a d
- */
-void lu_device_get(struct lu_device *d)
-{
- atomic_inc(&d->ld_ref);
-}
-EXPORT_SYMBOL(lu_device_get);
-
-/**
- * Release reference on device \a d.
- */
-void lu_device_put(struct lu_device *d)
-{
- LASSERT(atomic_read(&d->ld_ref) > 0);
- atomic_dec(&d->ld_ref);
-}
-EXPORT_SYMBOL(lu_device_put);
-
-/**
- * Initialize device \a d of type \a t.
- */
-int lu_device_init(struct lu_device *d, struct lu_device_type *t)
-{
- if (atomic_inc_return(&t->ldt_device_nr) == 1 &&
- t->ldt_ops->ldto_start)
- t->ldt_ops->ldto_start(t);
-
- memset(d, 0, sizeof(*d));
- atomic_set(&d->ld_ref, 0);
- d->ld_type = t;
- lu_ref_init(&d->ld_reference);
- INIT_LIST_HEAD(&d->ld_linkage);
- return 0;
-}
-EXPORT_SYMBOL(lu_device_init);
-
-/**
- * Finalize device \a d.
- */
-void lu_device_fini(struct lu_device *d)
-{
- struct lu_device_type *t = d->ld_type;
-
- if (d->ld_obd) {
- d->ld_obd->obd_lu_dev = NULL;
- d->ld_obd = NULL;
- }
-
- lu_ref_fini(&d->ld_reference);
- LASSERTF(atomic_read(&d->ld_ref) == 0,
- "Refcount is %u\n", atomic_read(&d->ld_ref));
- LASSERT(atomic_read(&t->ldt_device_nr) > 0);
-
- if (atomic_dec_and_test(&t->ldt_device_nr) &&
- t->ldt_ops->ldto_stop)
- t->ldt_ops->ldto_stop(t);
-}
-EXPORT_SYMBOL(lu_device_fini);
-
-/**
- * Initialize object \a o that is part of compound object \a h and was created
- * by device \a d.
- */
-int lu_object_init(struct lu_object *o, struct lu_object_header *h,
- struct lu_device *d)
-{
- memset(o, 0, sizeof(*o));
- o->lo_header = h;
- o->lo_dev = d;
- lu_device_get(d);
- lu_ref_add_at(&d->ld_reference, &o->lo_dev_ref, "lu_object", o);
- INIT_LIST_HEAD(&o->lo_linkage);
-
- return 0;
-}
-EXPORT_SYMBOL(lu_object_init);
-
-/**
- * Finalize object and release its resources.
- */
-void lu_object_fini(struct lu_object *o)
-{
- struct lu_device *dev = o->lo_dev;
-
- LASSERT(list_empty(&o->lo_linkage));
-
- if (dev) {
- lu_ref_del_at(&dev->ld_reference, &o->lo_dev_ref,
- "lu_object", o);
- lu_device_put(dev);
- o->lo_dev = NULL;
- }
-}
-EXPORT_SYMBOL(lu_object_fini);
-
-/**
- * Add object \a o as first layer of compound object \a h
- *
- * This is typically called by the ->ldo_object_alloc() method of top-level
- * device.
- */
-void lu_object_add_top(struct lu_object_header *h, struct lu_object *o)
-{
- list_move(&o->lo_linkage, &h->loh_layers);
-}
-EXPORT_SYMBOL(lu_object_add_top);
-
-/**
- * Add object \a o as a layer of compound object, going after \a before.
- *
- * This is typically called by the ->ldo_object_alloc() method of \a
- * before->lo_dev.
- */
-void lu_object_add(struct lu_object *before, struct lu_object *o)
-{
- list_move(&o->lo_linkage, &before->lo_linkage);
-}
-EXPORT_SYMBOL(lu_object_add);
-
-/**
- * Initialize compound object.
- */
-int lu_object_header_init(struct lu_object_header *h)
-{
- memset(h, 0, sizeof(*h));
- atomic_set(&h->loh_ref, 1);
- INIT_HLIST_NODE(&h->loh_hash);
- INIT_LIST_HEAD(&h->loh_lru);
- INIT_LIST_HEAD(&h->loh_layers);
- lu_ref_init(&h->loh_reference);
- return 0;
-}
-EXPORT_SYMBOL(lu_object_header_init);
-
-/**
- * Finalize compound object.
- */
-void lu_object_header_fini(struct lu_object_header *h)
-{
- LASSERT(list_empty(&h->loh_layers));
- LASSERT(list_empty(&h->loh_lru));
- LASSERT(hlist_unhashed(&h->loh_hash));
- lu_ref_fini(&h->loh_reference);
-}
-EXPORT_SYMBOL(lu_object_header_fini);
-
-/**
- * Given a compound object, find its slice, corresponding to the device type
- * \a dtype.
- */
-struct lu_object *lu_object_locate(struct lu_object_header *h,
- const struct lu_device_type *dtype)
-{
- struct lu_object *o;
-
- list_for_each_entry(o, &h->loh_layers, lo_linkage) {
- if (o->lo_dev->ld_type == dtype)
- return o;
- }
- return NULL;
-}
-EXPORT_SYMBOL(lu_object_locate);
-
-/**
- * Finalize and free devices in the device stack.
- *
- * Finalize device stack by purging object cache, and calling
- * lu_device_type_operations::ldto_device_fini() and
- * lu_device_type_operations::ldto_device_free() on all devices in the stack.
- */
-void lu_stack_fini(const struct lu_env *env, struct lu_device *top)
-{
- struct lu_site *site = top->ld_site;
- struct lu_device *scan;
- struct lu_device *next;
-
- lu_site_purge(env, site, ~0);
- for (scan = top; scan; scan = next) {
- next = scan->ld_type->ldt_ops->ldto_device_fini(env, scan);
- lu_ref_del(&scan->ld_reference, "lu-stack", &lu_site_init);
- lu_device_put(scan);
- }
-
- /* purge again. */
- lu_site_purge(env, site, ~0);
-
- for (scan = top; scan; scan = next) {
- const struct lu_device_type *ldt = scan->ld_type;
- struct obd_type *type;
-
- next = ldt->ldt_ops->ldto_device_free(env, scan);
- type = ldt->ldt_obd_type;
- if (type) {
- type->typ_refcnt--;
- class_put_type(type);
- }
- }
-}
-
-enum {
- /**
- * Maximal number of tld slots.
- */
- LU_CONTEXT_KEY_NR = 40
-};
-
-static struct lu_context_key *lu_keys[LU_CONTEXT_KEY_NR] = { NULL, };
-
-static DEFINE_RWLOCK(lu_keys_guard);
-static atomic_t lu_key_initing_cnt = ATOMIC_INIT(0);
-
-/**
- * Global counter incremented whenever key is registered, unregistered,
- * revived or quiesced. This is used to void unnecessary calls to
- * lu_context_refill(). No locking is provided, as initialization and shutdown
- * are supposed to be externally serialized.
- */
-static unsigned int key_set_version;
-
-/**
- * Register new key.
- */
-int lu_context_key_register(struct lu_context_key *key)
-{
- int result;
- unsigned int i;
-
- LASSERT(key->lct_init);
- LASSERT(key->lct_fini);
- LASSERT(key->lct_tags != 0);
-
- result = -ENFILE;
- write_lock(&lu_keys_guard);
- for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) {
- if (!lu_keys[i]) {
- key->lct_index = i;
- atomic_set(&key->lct_used, 1);
- lu_keys[i] = key;
- lu_ref_init(&key->lct_reference);
- result = 0;
- ++key_set_version;
- break;
- }
- }
- write_unlock(&lu_keys_guard);
- return result;
-}
-EXPORT_SYMBOL(lu_context_key_register);
-
-static void key_fini(struct lu_context *ctx, int index)
-{
- if (ctx->lc_value && ctx->lc_value[index]) {
- struct lu_context_key *key;
-
- key = lu_keys[index];
- LASSERT(atomic_read(&key->lct_used) > 1);
-
- key->lct_fini(ctx, key, ctx->lc_value[index]);
- lu_ref_del(&key->lct_reference, "ctx", ctx);
- atomic_dec(&key->lct_used);
-
- if ((ctx->lc_tags & LCT_NOREF) == 0)
- module_put(key->lct_owner);
- ctx->lc_value[index] = NULL;
- }
-}
-
-/**
- * Deregister key.
- */
-void lu_context_key_degister(struct lu_context_key *key)
-{
- LASSERT(atomic_read(&key->lct_used) >= 1);
- LINVRNT(0 <= key->lct_index && key->lct_index < ARRAY_SIZE(lu_keys));
-
- lu_context_key_quiesce(key);
-
- write_lock(&lu_keys_guard);
- ++key_set_version;
- key_fini(&lu_shrink_env.le_ctx, key->lct_index);
-
- /**
- * Wait until all transient contexts referencing this key have
- * run lu_context_key::lct_fini() method.
- */
- while (atomic_read(&key->lct_used) > 1) {
- write_unlock(&lu_keys_guard);
- CDEBUG(D_INFO, "%s: \"%s\" %p, %d\n",
- __func__, module_name(key->lct_owner),
- key, atomic_read(&key->lct_used));
- schedule();
- write_lock(&lu_keys_guard);
- }
- if (lu_keys[key->lct_index]) {
- lu_keys[key->lct_index] = NULL;
- lu_ref_fini(&key->lct_reference);
- }
- write_unlock(&lu_keys_guard);
-
- LASSERTF(atomic_read(&key->lct_used) == 1,
- "key has instances: %d\n",
- atomic_read(&key->lct_used));
-}
-EXPORT_SYMBOL(lu_context_key_degister);
-
-/**
- * Register a number of keys. This has to be called after all keys have been
- * initialized by a call to LU_CONTEXT_KEY_INIT().
- */
-int lu_context_key_register_many(struct lu_context_key *k, ...)
-{
- struct lu_context_key *key = k;
- va_list args;
- int result;
-
- va_start(args, k);
- do {
- result = lu_context_key_register(key);
- if (result)
- break;
- key = va_arg(args, struct lu_context_key *);
- } while (key);
- va_end(args);
-
- if (result != 0) {
- va_start(args, k);
- while (k != key) {
- lu_context_key_degister(k);
- k = va_arg(args, struct lu_context_key *);
- }
- va_end(args);
- }
-
- return result;
-}
-EXPORT_SYMBOL(lu_context_key_register_many);
-
-/**
- * De-register a number of keys. This is a dual to
- * lu_context_key_register_many().
- */
-void lu_context_key_degister_many(struct lu_context_key *k, ...)
-{
- va_list args;
-
- va_start(args, k);
- do {
- lu_context_key_degister(k);
- k = va_arg(args, struct lu_context_key*);
- } while (k);
- va_end(args);
-}
-EXPORT_SYMBOL(lu_context_key_degister_many);
-
-/**
- * Revive a number of keys.
- */
-void lu_context_key_revive_many(struct lu_context_key *k, ...)
-{
- va_list args;
-
- va_start(args, k);
- do {
- lu_context_key_revive(k);
- k = va_arg(args, struct lu_context_key*);
- } while (k);
- va_end(args);
-}
-EXPORT_SYMBOL(lu_context_key_revive_many);
-
-/**
- * Quiescent a number of keys.
- */
-void lu_context_key_quiesce_many(struct lu_context_key *k, ...)
-{
- va_list args;
-
- va_start(args, k);
- do {
- lu_context_key_quiesce(k);
- k = va_arg(args, struct lu_context_key*);
- } while (k);
- va_end(args);
-}
-EXPORT_SYMBOL(lu_context_key_quiesce_many);
-
-/**
- * Return value associated with key \a key in context \a ctx.
- */
-void *lu_context_key_get(const struct lu_context *ctx,
- const struct lu_context_key *key)
-{
- LINVRNT(ctx->lc_state == LCS_ENTERED);
- LINVRNT(0 <= key->lct_index && key->lct_index < ARRAY_SIZE(lu_keys));
- LASSERT(lu_keys[key->lct_index] == key);
- return ctx->lc_value[key->lct_index];
-}
-EXPORT_SYMBOL(lu_context_key_get);
-
-/**
- * List of remembered contexts. XXX document me.
- */
-static LIST_HEAD(lu_context_remembered);
-
-/**
- * Destroy \a key in all remembered contexts. This is used to destroy key
- * values in "shared" contexts (like service threads), when a module owning
- * the key is about to be unloaded.
- */
-void lu_context_key_quiesce(struct lu_context_key *key)
-{
- struct lu_context *ctx;
-
- if (!(key->lct_tags & LCT_QUIESCENT)) {
- /*
- * XXX memory barrier has to go here.
- */
- write_lock(&lu_keys_guard);
- key->lct_tags |= LCT_QUIESCENT;
-
- /**
- * Wait until all lu_context_key::lct_init() methods
- * have completed.
- */
- while (atomic_read(&lu_key_initing_cnt) > 0) {
- write_unlock(&lu_keys_guard);
- CDEBUG(D_INFO, "%s: \"%s\" %p, %d (%d)\n",
- __func__,
- module_name(key->lct_owner),
- key, atomic_read(&key->lct_used),
- atomic_read(&lu_key_initing_cnt));
- schedule();
- write_lock(&lu_keys_guard);
- }
-
- list_for_each_entry(ctx, &lu_context_remembered, lc_remember)
- key_fini(ctx, key->lct_index);
-
- ++key_set_version;
- write_unlock(&lu_keys_guard);
- }
-}
-
-void lu_context_key_revive(struct lu_context_key *key)
-{
- write_lock(&lu_keys_guard);
- key->lct_tags &= ~LCT_QUIESCENT;
- ++key_set_version;
- write_unlock(&lu_keys_guard);
-}
-
-static void keys_fini(struct lu_context *ctx)
-{
- unsigned int i;
-
- if (!ctx->lc_value)
- return;
-
- for (i = 0; i < ARRAY_SIZE(lu_keys); ++i)
- key_fini(ctx, i);
-
- kfree(ctx->lc_value);
- ctx->lc_value = NULL;
-}
-
-static int keys_fill(struct lu_context *ctx)
-{
- unsigned int pre_version;
- unsigned int i;
-
- /*
- * A serialisation with lu_context_key_quiesce() is needed, but some
- * "key->lct_init()" are calling kernel memory allocation routine and
- * can't be called while holding a spin_lock.
- * "lu_keys_guard" is held while incrementing "lu_key_initing_cnt"
- * to ensure the start of the serialisation.
- * An atomic_t variable is still used, in order not to reacquire the
- * lock when decrementing the counter.
- */
- read_lock(&lu_keys_guard);
- atomic_inc(&lu_key_initing_cnt);
- pre_version = key_set_version;
- read_unlock(&lu_keys_guard);
-
-refill:
- LINVRNT(ctx->lc_value);
- for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) {
- struct lu_context_key *key;
-
- key = lu_keys[i];
- if (!ctx->lc_value[i] && key &&
- (key->lct_tags & ctx->lc_tags) &&
- /*
- * Don't create values for a LCT_QUIESCENT key, as this
- * will pin module owning a key.
- */
- !(key->lct_tags & LCT_QUIESCENT)) {
- void *value;
-
- LINVRNT(key->lct_init);
- LINVRNT(key->lct_index == i);
-
- if (!(ctx->lc_tags & LCT_NOREF) &&
- !try_module_get(key->lct_owner)) {
- /* module is unloading, skip this key */
- continue;
- }
-
- value = key->lct_init(ctx, key);
- if (unlikely(IS_ERR(value))) {
- atomic_dec(&lu_key_initing_cnt);
- return PTR_ERR(value);
- }
-
- lu_ref_add_atomic(&key->lct_reference, "ctx", ctx);
- atomic_inc(&key->lct_used);
- /*
- * This is the only place in the code, where an
- * element of ctx->lc_value[] array is set to non-NULL
- * value.
- */
- ctx->lc_value[i] = value;
- if (key->lct_exit)
- ctx->lc_tags |= LCT_HAS_EXIT;
- }
- }
-
- read_lock(&lu_keys_guard);
- if (pre_version != key_set_version) {
- pre_version = key_set_version;
- read_unlock(&lu_keys_guard);
- goto refill;
- }
- ctx->lc_version = key_set_version;
- atomic_dec(&lu_key_initing_cnt);
- read_unlock(&lu_keys_guard);
- return 0;
-}
-
-static int keys_init(struct lu_context *ctx)
-{
- ctx->lc_value = kcalloc(ARRAY_SIZE(lu_keys), sizeof(ctx->lc_value[0]),
- GFP_NOFS);
- if (likely(ctx->lc_value))
- return keys_fill(ctx);
-
- return -ENOMEM;
-}
-
-/**
- * Initialize context data-structure. Create values for all keys.
- */
-int lu_context_init(struct lu_context *ctx, __u32 tags)
-{
- int rc;
-
- memset(ctx, 0, sizeof(*ctx));
- ctx->lc_state = LCS_INITIALIZED;
- ctx->lc_tags = tags;
- if (tags & LCT_REMEMBER) {
- write_lock(&lu_keys_guard);
- list_add(&ctx->lc_remember, &lu_context_remembered);
- write_unlock(&lu_keys_guard);
- } else {
- INIT_LIST_HEAD(&ctx->lc_remember);
- }
-
- rc = keys_init(ctx);
- if (rc != 0)
- lu_context_fini(ctx);
-
- return rc;
-}
-EXPORT_SYMBOL(lu_context_init);
-
-/**
- * Finalize context data-structure. Destroy key values.
- */
-void lu_context_fini(struct lu_context *ctx)
-{
- LINVRNT(ctx->lc_state == LCS_INITIALIZED || ctx->lc_state == LCS_LEFT);
- ctx->lc_state = LCS_FINALIZED;
-
- if ((ctx->lc_tags & LCT_REMEMBER) == 0) {
- LASSERT(list_empty(&ctx->lc_remember));
- keys_fini(ctx);
-
- } else { /* could race with key degister */
- write_lock(&lu_keys_guard);
- keys_fini(ctx);
- list_del_init(&ctx->lc_remember);
- write_unlock(&lu_keys_guard);
- }
-}
-EXPORT_SYMBOL(lu_context_fini);
-
-/**
- * Called before entering context.
- */
-void lu_context_enter(struct lu_context *ctx)
-{
- LINVRNT(ctx->lc_state == LCS_INITIALIZED || ctx->lc_state == LCS_LEFT);
- ctx->lc_state = LCS_ENTERED;
-}
-EXPORT_SYMBOL(lu_context_enter);
-
-/**
- * Called after exiting from \a ctx
- */
-void lu_context_exit(struct lu_context *ctx)
-{
- unsigned int i;
-
- LINVRNT(ctx->lc_state == LCS_ENTERED);
- ctx->lc_state = LCS_LEFT;
- if (ctx->lc_tags & LCT_HAS_EXIT && ctx->lc_value) {
- /* could race with key quiescency */
- if (ctx->lc_tags & LCT_REMEMBER)
- read_lock(&lu_keys_guard);
-
- for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) {
- if (ctx->lc_value[i]) {
- struct lu_context_key *key;
-
- key = lu_keys[i];
- if (key->lct_exit)
- key->lct_exit(ctx,
- key, ctx->lc_value[i]);
- }
- }
-
- if (ctx->lc_tags & LCT_REMEMBER)
- read_unlock(&lu_keys_guard);
- }
-}
-EXPORT_SYMBOL(lu_context_exit);
-
-/**
- * Allocate for context all missing keys that were registered after context
- * creation. key_set_version is only changed in rare cases when modules
- * are loaded and removed.
- */
-int lu_context_refill(struct lu_context *ctx)
-{
- read_lock(&lu_keys_guard);
- if (likely(ctx->lc_version == key_set_version)) {
- read_unlock(&lu_keys_guard);
- return 0;
- }
-
- read_unlock(&lu_keys_guard);
- return keys_fill(ctx);
-}
-
-/**
- * lu_ctx_tags/lu_ses_tags will be updated if there are new types of
- * obd being added. Currently, this is only used on client side, specifically
- * for echo device client, for other stack (like ptlrpc threads), context are
- * predefined when the lu_device type are registered, during the module probe
- * phase.
- */
-__u32 lu_context_tags_default;
-__u32 lu_session_tags_default;
-
-int lu_env_init(struct lu_env *env, __u32 tags)
-{
- int result;
-
- env->le_ses = NULL;
- result = lu_context_init(&env->le_ctx, tags);
- if (likely(result == 0))
- lu_context_enter(&env->le_ctx);
- return result;
-}
-EXPORT_SYMBOL(lu_env_init);
-
-void lu_env_fini(struct lu_env *env)
-{
- lu_context_exit(&env->le_ctx);
- lu_context_fini(&env->le_ctx);
- env->le_ses = NULL;
-}
-EXPORT_SYMBOL(lu_env_fini);
-
-int lu_env_refill(struct lu_env *env)
-{
- int result;
-
- result = lu_context_refill(&env->le_ctx);
- if (result == 0 && env->le_ses)
- result = lu_context_refill(env->le_ses);
- return result;
-}
-EXPORT_SYMBOL(lu_env_refill);
-
-struct lu_site_stats {
- unsigned int lss_populated;
- unsigned int lss_max_search;
- unsigned int lss_total;
- unsigned int lss_busy;
-};
-
-static void lu_site_stats_get(const struct lu_site *s,
- struct lu_site_stats *stats, int populated)
-{
- struct cfs_hash *hs = s->ls_obj_hash;
- struct cfs_hash_bd bd;
- unsigned int i;
- /*
- * percpu_counter_sum_positive() won't accept a const pointer
- * as it does modify the struct by taking a spinlock
- */
- struct lu_site *s2 = (struct lu_site *)s;
-
- stats->lss_busy += cfs_hash_size_get(hs) -
- percpu_counter_sum_positive(&s2->ls_lru_len_counter);
- cfs_hash_for_each_bucket(hs, &bd, i) {
- struct hlist_head *hhead;
-
- cfs_hash_bd_lock(hs, &bd, 1);
- stats->lss_total += cfs_hash_bd_count_get(&bd);
- stats->lss_max_search = max((int)stats->lss_max_search,
- cfs_hash_bd_depmax_get(&bd));
- if (!populated) {
- cfs_hash_bd_unlock(hs, &bd, 1);
- continue;
- }
-
- cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
- if (!hlist_empty(hhead))
- stats->lss_populated++;
- }
- cfs_hash_bd_unlock(hs, &bd, 1);
- }
-}
-
-/*
- * lu_cache_shrink_count() returns an approximate number of cached objects
- * that can be freed by shrink_slab(). A counter, which tracks the
- * number of items in the site's lru, is maintained in a percpu_counter
- * for each site. The percpu values are incremented and decremented as
- * objects are added or removed from the lru. The percpu values are summed
- * and saved whenever a percpu value exceeds a threshold. Thus the saved,
- * summed value at any given time may not accurately reflect the current
- * lru length. But this value is sufficiently accurate for the needs of
- * a shrinker.
- *
- * Using a per cpu counter is a compromise solution to concurrent access:
- * lu_object_put() can update the counter without locking the site and
- * lu_cache_shrink_count can sum the counters without locking each
- * ls_obj_hash bucket.
- */
-static unsigned long lu_cache_shrink_count(struct shrinker *sk,
- struct shrink_control *sc)
-{
- struct lu_site *s;
- struct lu_site *tmp;
- unsigned long cached = 0;
-
- if (!(sc->gfp_mask & __GFP_FS))
- return 0;
-
- down_read(&lu_sites_guard);
- list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage)
- cached += percpu_counter_read_positive(&s->ls_lru_len_counter);
- up_read(&lu_sites_guard);
-
- cached = (cached / 100) * sysctl_vfs_cache_pressure;
- CDEBUG(D_INODE, "%ld objects cached, cache pressure %d\n",
- cached, sysctl_vfs_cache_pressure);
-
- return cached;
-}
-
-static unsigned long lu_cache_shrink_scan(struct shrinker *sk,
- struct shrink_control *sc)
-{
- struct lu_site *s;
- struct lu_site *tmp;
- unsigned long remain = sc->nr_to_scan, freed = 0;
- LIST_HEAD(splice);
-
- if (!(sc->gfp_mask & __GFP_FS))
- /* We must not take the lu_sites_guard lock when
- * __GFP_FS is *not* set because of the deadlock
- * possibility detailed above. Additionally,
- * since we cannot determine the number of
- * objects in the cache without taking this
- * lock, we're in a particularly tough spot. As
- * a result, we'll just lie and say our cache is
- * empty. This _should_ be ok, as we can't
- * reclaim objects when __GFP_FS is *not* set
- * anyways.
- */
- return SHRINK_STOP;
-
- down_write(&lu_sites_guard);
- list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) {
- freed = lu_site_purge(&lu_shrink_env, s, remain);
- remain -= freed;
- /*
- * Move just shrunk site to the tail of site list to
- * assure shrinking fairness.
- */
- list_move_tail(&s->ls_linkage, &splice);
- }
- list_splice(&splice, lu_sites.prev);
- up_write(&lu_sites_guard);
-
- return sc->nr_to_scan - remain;
-}
-
-/**
- * Debugging printer function using printk().
- */
-static struct shrinker lu_site_shrinker = {
- .count_objects = lu_cache_shrink_count,
- .scan_objects = lu_cache_shrink_scan,
- .seeks = DEFAULT_SEEKS,
-};
-
-/**
- * Initialization of global lu_* data.
- */
-int lu_global_init(void)
-{
- int result;
-
- CDEBUG(D_INFO, "Lustre LU module (%p).\n", &lu_keys);
-
- result = lu_ref_global_init();
- if (result != 0)
- return result;
-
- LU_CONTEXT_KEY_INIT(&lu_global_key);
- result = lu_context_key_register(&lu_global_key);
- if (result != 0) {
- lu_ref_global_fini();
- return result;
- }
-
- /*
- * At this level, we don't know what tags are needed, so allocate them
- * conservatively. This should not be too bad, because this
- * environment is global.
- */
- down_write(&lu_sites_guard);
- result = lu_env_init(&lu_shrink_env, LCT_SHRINKER);
- up_write(&lu_sites_guard);
- if (result != 0) {
- lu_context_key_degister(&lu_global_key);
- lu_ref_global_fini();
- return result;
- }
-
- /*
- * seeks estimation: 3 seeks to read a record from oi, one to read
- * inode, one for ea. Unfortunately setting this high value results in
- * lu_object/inode cache consuming all the memory.
- */
- result = register_shrinker(&lu_site_shrinker);
- if (result != 0) {
- /* Order explained in lu_global_fini(). */
- lu_context_key_degister(&lu_global_key);
-
- down_write(&lu_sites_guard);
- lu_env_fini(&lu_shrink_env);
- up_write(&lu_sites_guard);
-
- lu_ref_global_fini();
- return result;
- }
-
- return 0;
-}
-
-/**
- * Dual to lu_global_init().
- */
-void lu_global_fini(void)
-{
- unregister_shrinker(&lu_site_shrinker);
- lu_context_key_degister(&lu_global_key);
-
- /*
- * Tear shrinker environment down _after_ de-registering
- * lu_global_key, because the latter has a value in the former.
- */
- down_write(&lu_sites_guard);
- lu_env_fini(&lu_shrink_env);
- up_write(&lu_sites_guard);
-
- lu_ref_global_fini();
-}
-
-static __u32 ls_stats_read(struct lprocfs_stats *stats, int idx)
-{
- struct lprocfs_counter ret;
-
- lprocfs_stats_collect(stats, idx, &ret);
- return (__u32)ret.lc_count;
-}
-
-/**
- * Output site statistical counters into a buffer. Suitable for
- * lprocfs_rd_*()-style functions.
- */
-int lu_site_stats_print(const struct lu_site *s, struct seq_file *m)
-{
- struct lu_site_stats stats;
-
- memset(&stats, 0, sizeof(stats));
- lu_site_stats_get(s, &stats, 1);
-
- seq_printf(m, "%d/%d %d/%ld %d %d %d %d %d %d %d\n",
- stats.lss_busy,
- stats.lss_total,
- stats.lss_populated,
- CFS_HASH_NHLIST(s->ls_obj_hash),
- stats.lss_max_search,
- ls_stats_read(s->ls_stats, LU_SS_CREATED),
- ls_stats_read(s->ls_stats, LU_SS_CACHE_HIT),
- ls_stats_read(s->ls_stats, LU_SS_CACHE_MISS),
- ls_stats_read(s->ls_stats, LU_SS_CACHE_RACE),
- ls_stats_read(s->ls_stats, LU_SS_CACHE_DEATH_RACE),
- ls_stats_read(s->ls_stats, LU_SS_LRU_PURGED));
- return 0;
-}
-EXPORT_SYMBOL(lu_site_stats_print);
-
-/**
- * Helper function to initialize a number of kmem slab caches at once.
- */
-int lu_kmem_init(struct lu_kmem_descr *caches)
-{
- int result;
- struct lu_kmem_descr *iter = caches;
-
- for (result = 0; iter->ckd_cache; ++iter) {
- *iter->ckd_cache = kmem_cache_create(iter->ckd_name,
- iter->ckd_size,
- 0, 0, NULL);
- if (!*iter->ckd_cache) {
- result = -ENOMEM;
- /* free all previously allocated caches */
- lu_kmem_fini(caches);
- break;
- }
- }
- return result;
-}
-EXPORT_SYMBOL(lu_kmem_init);
-
-/**
- * Helper function to finalize a number of kmem slab cached at once. Dual to
- * lu_kmem_init().
- */
-void lu_kmem_fini(struct lu_kmem_descr *caches)
-{
- for (; caches->ckd_cache; ++caches) {
- kmem_cache_destroy(*caches->ckd_cache);
- *caches->ckd_cache = NULL;
- }
-}
-EXPORT_SYMBOL(lu_kmem_fini);
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_ref.c b/drivers/staging/lustre/lustre/obdclass/lu_ref.c
deleted file mode 100644
index f67cb89ea0ba..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/lu_ref.c
+++ /dev/null
@@ -1,45 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/obdclass/lu_ref.c
- *
- * Lustre reference.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_CLASS
-
-#include <obd.h>
-#include <obd_class.h>
-#include <obd_support.h>
-#include <lu_ref.h>
diff --git a/drivers/staging/lustre/lustre/obdclass/lustre_handles.c b/drivers/staging/lustre/lustre/obdclass/lustre_handles.c
deleted file mode 100644
index cdc8dc10690d..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/lustre_handles.c
+++ /dev/null
@@ -1,241 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/obdclass/lustre_handles.c
- *
- * Author: Phil Schwan <phil@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_CLASS
-
-#include <linux/random.h>
-#include <obd_support.h>
-#include <lustre_handles.h>
-#include <lustre_lib.h>
-
-static __u64 handle_base;
-#define HANDLE_INCR 7
-static spinlock_t handle_base_lock;
-
-static struct handle_bucket {
- spinlock_t lock;
- struct list_head head;
-} *handle_hash;
-
-#define HANDLE_HASH_SIZE (1 << 16)
-#define HANDLE_HASH_MASK (HANDLE_HASH_SIZE - 1)
-
-/*
- * Generate a unique 64bit cookie (hash) for a handle and insert it into
- * global (per-node) hash-table.
- */
-void class_handle_hash(struct portals_handle *h,
- struct portals_handle_ops *ops)
-{
- struct handle_bucket *bucket;
-
- LASSERT(h);
- LASSERT(list_empty(&h->h_link));
-
- /*
- * This is fast, but simplistic cookie generation algorithm, it will
- * need a re-do at some point in the future for security.
- */
- spin_lock(&handle_base_lock);
- handle_base += HANDLE_INCR;
-
- if (unlikely(handle_base == 0)) {
- /*
- * Cookie of zero is "dangerous", because in many places it's
- * assumed that 0 means "unassigned" handle, not bound to any
- * object.
- */
- CWARN("The universe has been exhausted: cookie wrap-around.\n");
- handle_base += HANDLE_INCR;
- }
- h->h_cookie = handle_base;
- spin_unlock(&handle_base_lock);
-
- h->h_ops = ops;
- spin_lock_init(&h->h_lock);
-
- bucket = &handle_hash[h->h_cookie & HANDLE_HASH_MASK];
- spin_lock(&bucket->lock);
- list_add_rcu(&h->h_link, &bucket->head);
- h->h_in = 1;
- spin_unlock(&bucket->lock);
-
- CDEBUG(D_INFO, "added object %p with handle %#llx to hash\n",
- h, h->h_cookie);
-}
-EXPORT_SYMBOL(class_handle_hash);
-
-static void class_handle_unhash_nolock(struct portals_handle *h)
-{
- if (list_empty(&h->h_link)) {
- CERROR("removing an already-removed handle (%#llx)\n",
- h->h_cookie);
- return;
- }
-
- CDEBUG(D_INFO, "removing object %p with handle %#llx from hash\n",
- h, h->h_cookie);
-
- spin_lock(&h->h_lock);
- if (h->h_in == 0) {
- spin_unlock(&h->h_lock);
- return;
- }
- h->h_in = 0;
- spin_unlock(&h->h_lock);
- list_del_rcu(&h->h_link);
-}
-
-void class_handle_unhash(struct portals_handle *h)
-{
- struct handle_bucket *bucket;
-
- bucket = handle_hash + (h->h_cookie & HANDLE_HASH_MASK);
-
- spin_lock(&bucket->lock);
- class_handle_unhash_nolock(h);
- spin_unlock(&bucket->lock);
-}
-EXPORT_SYMBOL(class_handle_unhash);
-
-void *class_handle2object(__u64 cookie, const void *owner)
-{
- struct handle_bucket *bucket;
- struct portals_handle *h;
- void *retval = NULL;
-
- LASSERT(handle_hash);
-
- /* Be careful when you want to change this code. See the
- * rcu_read_lock() definition on top this file. - jxiong
- */
- bucket = handle_hash + (cookie & HANDLE_HASH_MASK);
-
- rcu_read_lock();
- list_for_each_entry_rcu(h, &bucket->head, h_link) {
- if (h->h_cookie != cookie || h->h_owner != owner)
- continue;
-
- spin_lock(&h->h_lock);
- if (likely(h->h_in != 0)) {
- h->h_ops->hop_addref(h);
- retval = h;
- }
- spin_unlock(&h->h_lock);
- break;
- }
- rcu_read_unlock();
-
- return retval;
-}
-EXPORT_SYMBOL(class_handle2object);
-
-void class_handle_free_cb(struct rcu_head *rcu)
-{
- struct portals_handle *h;
- void *ptr;
-
- h = container_of(rcu, struct portals_handle, h_rcu);
- ptr = (void *)(unsigned long)h->h_cookie;
-
- if (h->h_ops->hop_free)
- h->h_ops->hop_free(ptr, h->h_size);
- else
- kfree(ptr);
-}
-EXPORT_SYMBOL(class_handle_free_cb);
-
-int class_handle_init(void)
-{
- struct handle_bucket *bucket;
-
- LASSERT(!handle_hash);
-
- handle_hash = kvzalloc(sizeof(*bucket) * HANDLE_HASH_SIZE,
- GFP_KERNEL);
- if (!handle_hash)
- return -ENOMEM;
-
- spin_lock_init(&handle_base_lock);
- for (bucket = handle_hash + HANDLE_HASH_SIZE - 1; bucket >= handle_hash;
- bucket--) {
- INIT_LIST_HEAD(&bucket->head);
- spin_lock_init(&bucket->lock);
- }
-
- get_random_bytes(&handle_base, sizeof(handle_base));
- LASSERT(handle_base != 0ULL);
-
- return 0;
-}
-
-static int cleanup_all_handles(void)
-{
- int rc;
- int i;
-
- for (rc = i = 0; i < HANDLE_HASH_SIZE; i++) {
- struct portals_handle *h;
-
- spin_lock(&handle_hash[i].lock);
- list_for_each_entry_rcu(h, &handle_hash[i].head, h_link) {
- CERROR("force clean handle %#llx addr %p ops %p\n",
- h->h_cookie, h, h->h_ops);
-
- class_handle_unhash_nolock(h);
- rc++;
- }
- spin_unlock(&handle_hash[i].lock);
- }
-
- return rc;
-}
-
-void class_handle_cleanup(void)
-{
- int count;
-
- LASSERT(handle_hash);
-
- count = cleanup_all_handles();
-
- kvfree(handle_hash);
- handle_hash = NULL;
-
- if (count != 0)
- CERROR("handle_count at cleanup: %d\n", count);
-}
diff --git a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c b/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
deleted file mode 100644
index e286a2665423..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
+++ /dev/null
@@ -1,214 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_RPC
-
-#include <obd.h>
-#include <obd_support.h>
-#include <obd_class.h>
-#include <lustre_lib.h>
-#include <lustre_ha.h>
-#include <lustre_net.h>
-#include <lprocfs_status.h>
-
-#define NIDS_MAX 32
-
-struct uuid_nid_data {
- struct list_head un_list;
- struct obd_uuid un_uuid;
- int un_nid_count;
- lnet_nid_t un_nids[NIDS_MAX];
-};
-
-/* FIXME: This should probably become more elegant than a global linked list */
-static struct list_head g_uuid_list;
-static spinlock_t g_uuid_lock;
-
-void class_init_uuidlist(void)
-{
- INIT_LIST_HEAD(&g_uuid_list);
- spin_lock_init(&g_uuid_lock);
-}
-
-void class_exit_uuidlist(void)
-{
- /* delete all */
- class_del_uuid(NULL);
-}
-
-int lustre_uuid_to_peer(const char *uuid, lnet_nid_t *peer_nid, int index)
-{
- struct uuid_nid_data *data;
- struct obd_uuid tmp;
- int rc = -ENOENT;
-
- obd_str2uuid(&tmp, uuid);
- spin_lock(&g_uuid_lock);
- list_for_each_entry(data, &g_uuid_list, un_list) {
- if (obd_uuid_equals(&data->un_uuid, &tmp)) {
- if (index >= data->un_nid_count)
- break;
-
- rc = 0;
- *peer_nid = data->un_nids[index];
- break;
- }
- }
- spin_unlock(&g_uuid_lock);
- return rc;
-}
-EXPORT_SYMBOL(lustre_uuid_to_peer);
-
-/* Add a nid to a niduuid. Multiple nids can be added to a single uuid;
- * LNET will choose the best one.
- */
-int class_add_uuid(const char *uuid, __u64 nid)
-{
- struct uuid_nid_data *data, *entry;
- int found = 0;
-
- LASSERT(nid != 0); /* valid newconfig NID is never zero */
-
- if (strlen(uuid) > UUID_MAX - 1)
- return -EOVERFLOW;
-
- data = kzalloc(sizeof(*data), GFP_NOFS);
- if (!data)
- return -ENOMEM;
-
- obd_str2uuid(&data->un_uuid, uuid);
- data->un_nids[0] = nid;
- data->un_nid_count = 1;
-
- spin_lock(&g_uuid_lock);
- list_for_each_entry(entry, &g_uuid_list, un_list) {
- if (obd_uuid_equals(&entry->un_uuid, &data->un_uuid)) {
- int i;
-
- found = 1;
- for (i = 0; i < entry->un_nid_count; i++)
- if (nid == entry->un_nids[i])
- break;
-
- if (i == entry->un_nid_count) {
- LASSERT(entry->un_nid_count < NIDS_MAX);
- entry->un_nids[entry->un_nid_count++] = nid;
- }
- break;
- }
- }
- if (!found)
- list_add(&data->un_list, &g_uuid_list);
- spin_unlock(&g_uuid_lock);
-
- if (found) {
- CDEBUG(D_INFO, "found uuid %s %s cnt=%d\n", uuid,
- libcfs_nid2str(nid), entry->un_nid_count);
- kfree(data);
- } else {
- CDEBUG(D_INFO, "add uuid %s %s\n", uuid, libcfs_nid2str(nid));
- }
- return 0;
-}
-
-/* Delete the nids for one uuid if specified, otherwise delete all */
-int class_del_uuid(const char *uuid)
-{
- LIST_HEAD(deathrow);
- struct uuid_nid_data *data;
- struct uuid_nid_data *temp;
-
- spin_lock(&g_uuid_lock);
- if (uuid) {
- struct obd_uuid tmp;
-
- obd_str2uuid(&tmp, uuid);
- list_for_each_entry(data, &g_uuid_list, un_list) {
- if (obd_uuid_equals(&data->un_uuid, &tmp)) {
- list_move(&data->un_list, &deathrow);
- break;
- }
- }
- } else {
- list_splice_init(&g_uuid_list, &deathrow);
- }
- spin_unlock(&g_uuid_lock);
-
- if (uuid && list_empty(&deathrow)) {
- CDEBUG(D_INFO, "Try to delete a non-existent uuid %s\n", uuid);
- return -EINVAL;
- }
-
- list_for_each_entry_safe(data, temp, &deathrow, un_list) {
- list_del(&data->un_list);
-
- CDEBUG(D_INFO, "del uuid %s %s/%d\n",
- obd_uuid2str(&data->un_uuid),
- libcfs_nid2str(data->un_nids[0]),
- data->un_nid_count);
-
- kfree(data);
- }
-
- return 0;
-}
-
-/* check if @nid exists in nid list of @uuid */
-int class_check_uuid(struct obd_uuid *uuid, __u64 nid)
-{
- struct uuid_nid_data *entry;
- int found = 0;
-
- CDEBUG(D_INFO, "check if uuid %s has %s.\n",
- obd_uuid2str(uuid), libcfs_nid2str(nid));
-
- spin_lock(&g_uuid_lock);
- list_for_each_entry(entry, &g_uuid_list, un_list) {
- int i;
-
- if (!obd_uuid_equals(&entry->un_uuid, uuid))
- continue;
-
- /* found the uuid, check if it has @nid */
- for (i = 0; i < entry->un_nid_count; i++) {
- if (entry->un_nids[i] == nid) {
- found = 1;
- break;
- }
- }
- break;
- }
- spin_unlock(&g_uuid_lock);
- return found;
-}
-EXPORT_SYMBOL(class_check_uuid);
diff --git a/drivers/staging/lustre/lustre/obdclass/obd_config.c b/drivers/staging/lustre/lustre/obdclass/obd_config.c
deleted file mode 100644
index ffc1814398a5..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/obd_config.c
+++ /dev/null
@@ -1,1538 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/obdclass/obd_config.c
- *
- * Config API
- */
-
-#define DEBUG_SUBSYSTEM S_CLASS
-
-#include <linux/string.h>
-
-#include <uapi/linux/lustre/lustre_ioctl.h>
-#include <llog_swab.h>
-#include <lprocfs_status.h>
-#include <lustre_log.h>
-#include <uapi/linux/lustre/lustre_param.h>
-#include <obd_class.h>
-
-#include "llog_internal.h"
-
-/*
- * uuid<->export lustre hash operations
- */
-/*
- * NOTE: It is impossible to find an export that is in failed
- * state with this function
- */
-static int
-uuid_keycmp(struct rhashtable_compare_arg *arg, const void *obj)
-{
- const struct obd_uuid *uuid = arg->key;
- const struct obd_export *exp = obj;
-
- if (obd_uuid_equals(uuid, &exp->exp_client_uuid) &&
- !exp->exp_failed)
- return 0;
- return -ESRCH;
-}
-
-static void
-uuid_export_exit(void *vexport, void *data)
-{
- struct obd_export *exp = vexport;
-
- class_export_put(exp);
-}
-
-static const struct rhashtable_params uuid_hash_params = {
- .key_len = sizeof(struct obd_uuid),
- .key_offset = offsetof(struct obd_export, exp_client_uuid),
- .head_offset = offsetof(struct obd_export, exp_uuid_hash),
- .obj_cmpfn = uuid_keycmp,
- .automatic_shrinking = true,
-};
-
-int obd_uuid_add(struct obd_device *obd, struct obd_export *export)
-{
- int rc;
-
- rc = rhashtable_lookup_insert_fast(&obd->obd_uuid_hash,
- &export->exp_uuid_hash,
- uuid_hash_params);
- if (rc == 0)
- class_export_get(export);
- else if (rc == -EEXIST)
- rc = -EALREADY;
- else
- /* map obscure error codes to -ENOMEM */
- rc = -ENOMEM;
- return rc;
-}
-
-void obd_uuid_del(struct obd_device *obd, struct obd_export *export)
-{
- int rc;
-
- rc = rhashtable_remove_fast(&obd->obd_uuid_hash,
- &export->exp_uuid_hash,
- uuid_hash_params);
-
- if (rc == 0)
- class_export_put(export);
-}
-
-/*********** string parsing utils *********/
-
-/* returns 0 if we find this key in the buffer, else 1 */
-int class_find_param(char *buf, char *key, char **valp)
-{
- char *ptr;
-
- if (!buf)
- return 1;
-
- ptr = strstr(buf, key);
- if (!ptr)
- return 1;
-
- if (valp)
- *valp = ptr + strlen(key);
-
- return 0;
-}
-EXPORT_SYMBOL(class_find_param);
-
-/* returns 0 if this is the first key in the buffer, else 1.
- * valp points to first char after key.
- */
-static int class_match_param(char *buf, const char *key, char **valp)
-{
- if (!buf)
- return 1;
-
- if (memcmp(buf, key, strlen(key)) != 0)
- return 1;
-
- if (valp)
- *valp = buf + strlen(key);
-
- return 0;
-}
-
-static int parse_nid(char *buf, void *value, int quiet)
-{
- lnet_nid_t *nid = value;
-
- *nid = libcfs_str2nid(buf);
- if (*nid != LNET_NID_ANY)
- return 0;
-
- if (!quiet)
- LCONSOLE_ERROR_MSG(0x159, "Can't parse NID '%s'\n", buf);
- return -EINVAL;
-}
-
-static int parse_net(char *buf, void *value)
-{
- __u32 *net = value;
-
- *net = libcfs_str2net(buf);
- CDEBUG(D_INFO, "Net %s\n", libcfs_net2str(*net));
- return 0;
-}
-
-enum {
- CLASS_PARSE_NID = 1,
- CLASS_PARSE_NET,
-};
-
-/* 0 is good nid,
- * 1 not found
- * < 0 error
- * endh is set to next separator
- */
-static int class_parse_value(char *buf, int opc, void *value, char **endh,
- int quiet)
-{
- char *endp;
- char tmp;
- int rc = 0;
-
- if (!buf)
- return 1;
- while (*buf == ',' || *buf == ':')
- buf++;
- if (*buf == ' ' || *buf == '/' || *buf == '\0')
- return 1;
-
- /* nid separators or end of nids */
- endp = strpbrk(buf, ",: /");
- if (!endp)
- endp = buf + strlen(buf);
-
- tmp = *endp;
- *endp = '\0';
- switch (opc) {
- default:
- LBUG();
- case CLASS_PARSE_NID:
- rc = parse_nid(buf, value, quiet);
- break;
- case CLASS_PARSE_NET:
- rc = parse_net(buf, value);
- break;
- }
- *endp = tmp;
- if (rc != 0)
- return rc;
- if (endh)
- *endh = endp;
- return 0;
-}
-
-int class_parse_nid(char *buf, lnet_nid_t *nid, char **endh)
-{
- return class_parse_value(buf, CLASS_PARSE_NID, (void *)nid, endh, 0);
-}
-EXPORT_SYMBOL(class_parse_nid);
-
-int class_parse_nid_quiet(char *buf, lnet_nid_t *nid, char **endh)
-{
- return class_parse_value(buf, CLASS_PARSE_NID, (void *)nid, endh, 1);
-}
-EXPORT_SYMBOL(class_parse_nid_quiet);
-
-char *lustre_cfg_string(struct lustre_cfg *lcfg, u32 index)
-{
- char *s;
-
- if (!lcfg->lcfg_buflens[index])
- return NULL;
-
- s = lustre_cfg_buf(lcfg, index);
- if (!s)
- return NULL;
-
- /*
- * make sure it's NULL terminated, even if this kills a char
- * of data. Try to use the padding first though.
- */
- if (s[lcfg->lcfg_buflens[index] - 1] != '\0') {
- size_t last = ALIGN(lcfg->lcfg_buflens[index], 8) - 1;
- char lost;
-
- /* Use the smaller value */
- if (last > lcfg->lcfg_buflens[index])
- last = lcfg->lcfg_buflens[index];
-
- lost = s[last];
- s[last] = '\0';
- if (lost != '\0') {
- CWARN("Truncated buf %d to '%s' (lost '%c'...)\n",
- index, s, lost);
- }
- }
- return s;
-}
-EXPORT_SYMBOL(lustre_cfg_string);
-
-/********************** class fns **********************/
-
-/**
- * Create a new obd device and set the type, name and uuid. If successful,
- * the new device can be accessed by either name or uuid.
- */
-static int class_attach(struct lustre_cfg *lcfg)
-{
- struct obd_device *obd = NULL;
- char *typename, *name, *uuid;
- int rc, len;
-
- if (!LUSTRE_CFG_BUFLEN(lcfg, 1)) {
- CERROR("No type passed!\n");
- return -EINVAL;
- }
- typename = lustre_cfg_string(lcfg, 1);
-
- if (!LUSTRE_CFG_BUFLEN(lcfg, 0)) {
- CERROR("No name passed!\n");
- return -EINVAL;
- }
- name = lustre_cfg_string(lcfg, 0);
-
- if (!LUSTRE_CFG_BUFLEN(lcfg, 2)) {
- CERROR("No UUID passed!\n");
- return -EINVAL;
- }
- uuid = lustre_cfg_string(lcfg, 2);
-
- CDEBUG(D_IOCTL, "attach type %s name: %s uuid: %s\n",
- typename, name, uuid);
-
- obd = class_newdev(typename, name);
- if (IS_ERR(obd)) {
- /* Already exists or out of obds */
- rc = PTR_ERR(obd);
- obd = NULL;
- CERROR("Cannot create device %s of type %s : %d\n",
- name, typename, rc);
- goto out;
- }
- LASSERTF(obd, "Cannot get obd device %s of type %s\n",
- name, typename);
- LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC,
- "obd %p obd_magic %08X != %08X\n",
- obd, obd->obd_magic, OBD_DEVICE_MAGIC);
- LASSERTF(strncmp(obd->obd_name, name, strlen(name)) == 0,
- "%p obd_name %s != %s\n", obd, obd->obd_name, name);
-
- rwlock_init(&obd->obd_pool_lock);
- obd->obd_pool_limit = 0;
- obd->obd_pool_slv = 0;
-
- INIT_LIST_HEAD(&obd->obd_exports);
- INIT_LIST_HEAD(&obd->obd_unlinked_exports);
- INIT_LIST_HEAD(&obd->obd_delayed_exports);
- spin_lock_init(&obd->obd_nid_lock);
- spin_lock_init(&obd->obd_dev_lock);
- mutex_init(&obd->obd_dev_mutex);
- spin_lock_init(&obd->obd_osfs_lock);
- /* obd->obd_osfs_age must be set to a value in the distant
- * past to guarantee a fresh statfs is fetched on mount.
- */
- obd->obd_osfs_age = get_jiffies_64() - 1000 * HZ;
-
- /* XXX belongs in setup not attach */
- init_rwsem(&obd->obd_observer_link_sem);
- /* recovery data */
- init_waitqueue_head(&obd->obd_evict_inprogress_waitq);
-
- llog_group_init(&obd->obd_olg);
-
- obd->obd_conn_inprogress = 0;
-
- len = strlen(uuid);
- if (len >= sizeof(obd->obd_uuid)) {
- CERROR("uuid must be < %d bytes long\n",
- (int)sizeof(obd->obd_uuid));
- rc = -EINVAL;
- goto out;
- }
- memcpy(obd->obd_uuid.uuid, uuid, len);
-
- /* Detach drops this */
- spin_lock(&obd->obd_dev_lock);
- atomic_set(&obd->obd_refcount, 1);
- spin_unlock(&obd->obd_dev_lock);
- lu_ref_init(&obd->obd_reference);
- lu_ref_add(&obd->obd_reference, "attach", obd);
-
- obd->obd_attached = 1;
- CDEBUG(D_IOCTL, "OBD: dev %d attached type %s with refcount %d\n",
- obd->obd_minor, typename, atomic_read(&obd->obd_refcount));
- return 0;
- out:
- if (obd)
- class_release_dev(obd);
-
- return rc;
-}
-
-/** Create hashes, self-export, and call type-specific setup.
- * Setup is effectively the "start this obd" call.
- */
-static int class_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
-{
- int err = 0;
- struct obd_export *exp;
-
- LASSERT(obd);
- LASSERTF(obd == class_num2obd(obd->obd_minor),
- "obd %p != obd_devs[%d] %p\n",
- obd, obd->obd_minor, class_num2obd(obd->obd_minor));
- LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC,
- "obd %p obd_magic %08x != %08x\n",
- obd, obd->obd_magic, OBD_DEVICE_MAGIC);
-
- /* have we attached a type to this device? */
- if (!obd->obd_attached) {
- CERROR("Device %d not attached\n", obd->obd_minor);
- return -ENODEV;
- }
-
- if (obd->obd_set_up) {
- CERROR("Device %d already setup (type %s)\n",
- obd->obd_minor, obd->obd_type->typ_name);
- return -EEXIST;
- }
-
- /* is someone else setting us up right now? (attach inits spinlock) */
- spin_lock(&obd->obd_dev_lock);
- if (obd->obd_starting) {
- spin_unlock(&obd->obd_dev_lock);
- CERROR("Device %d setup in progress (type %s)\n",
- obd->obd_minor, obd->obd_type->typ_name);
- return -EEXIST;
- }
- /* just leave this on forever. I can't use obd_set_up here because
- * other fns check that status, and we're not actually set up yet.
- */
- obd->obd_starting = 1;
- spin_unlock(&obd->obd_dev_lock);
-
- /* create an uuid-export lustre hash */
- err = rhashtable_init(&obd->obd_uuid_hash, &uuid_hash_params);
-
- if (err)
- goto err_hash;
-
- exp = class_new_export(obd, &obd->obd_uuid);
- if (IS_ERR(exp)) {
- err = PTR_ERR(exp);
- goto err_new;
- }
-
- obd->obd_self_export = exp;
- class_export_put(exp);
-
- err = obd_setup(obd, lcfg);
- if (err)
- goto err_exp;
-
- obd->obd_set_up = 1;
-
- spin_lock(&obd->obd_dev_lock);
- /* cleanup drops this */
- class_incref(obd, "setup", obd);
- spin_unlock(&obd->obd_dev_lock);
-
- CDEBUG(D_IOCTL, "finished setup of obd %s (uuid %s)\n",
- obd->obd_name, obd->obd_uuid.uuid);
-
- return 0;
-err_exp:
- if (obd->obd_self_export) {
- class_unlink_export(obd->obd_self_export);
- obd->obd_self_export = NULL;
- }
-err_new:
- rhashtable_destroy(&obd->obd_uuid_hash);
-err_hash:
- obd->obd_starting = 0;
- CERROR("setup %s failed (%d)\n", obd->obd_name, err);
- return err;
-}
-
-/** We have finished using this obd and are ready to destroy it.
- * There can be no more references to this obd.
- */
-static int class_detach(struct obd_device *obd, struct lustre_cfg *lcfg)
-{
- if (obd->obd_set_up) {
- CERROR("OBD device %d still set up\n", obd->obd_minor);
- return -EBUSY;
- }
-
- spin_lock(&obd->obd_dev_lock);
- if (!obd->obd_attached) {
- spin_unlock(&obd->obd_dev_lock);
- CERROR("OBD device %d not attached\n", obd->obd_minor);
- return -ENODEV;
- }
- obd->obd_attached = 0;
- spin_unlock(&obd->obd_dev_lock);
-
- CDEBUG(D_IOCTL, "detach on obd %s (uuid %s)\n",
- obd->obd_name, obd->obd_uuid.uuid);
-
- class_decref(obd, "attach", obd);
- return 0;
-}
-
-/** Start shutting down the obd. There may be in-progress ops when
- * this is called. We tell them to start shutting down with a call
- * to class_disconnect_exports().
- */
-static int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg)
-{
- int err = 0;
- char *flag;
-
- OBD_RACE(OBD_FAIL_LDLM_RECOV_CLIENTS);
-
- if (!obd->obd_set_up) {
- CERROR("Device %d not setup\n", obd->obd_minor);
- return -ENODEV;
- }
-
- spin_lock(&obd->obd_dev_lock);
- if (obd->obd_stopping) {
- spin_unlock(&obd->obd_dev_lock);
- CERROR("OBD %d already stopping\n", obd->obd_minor);
- return -ENODEV;
- }
- /* Leave this on forever */
- obd->obd_stopping = 1;
- spin_unlock(&obd->obd_dev_lock);
-
- while (obd->obd_conn_inprogress > 0)
- cond_resched();
- smp_rmb();
-
- if (lcfg->lcfg_bufcount >= 2 && LUSTRE_CFG_BUFLEN(lcfg, 1) > 0) {
- for (flag = lustre_cfg_string(lcfg, 1); *flag != 0; flag++)
- switch (*flag) {
- case 'F':
- obd->obd_force = 1;
- break;
- case 'A':
- LCONSOLE_WARN("Failing over %s\n",
- obd->obd_name);
- obd->obd_fail = 1;
- obd->obd_no_transno = 1;
- obd->obd_no_recov = 1;
- if (OBP(obd, iocontrol)) {
- obd_iocontrol(OBD_IOC_SYNC,
- obd->obd_self_export,
- 0, NULL, NULL);
- }
- break;
- default:
- CERROR("Unrecognised flag '%c'\n", *flag);
- }
- }
-
- LASSERT(obd->obd_self_export);
-
- /* Precleanup, we must make sure all exports get destroyed. */
- err = obd_precleanup(obd);
- if (err)
- CERROR("Precleanup %s returned %d\n",
- obd->obd_name, err);
-
- /* destroy an uuid-export hash body */
- rhashtable_free_and_destroy(&obd->obd_uuid_hash, uuid_export_exit, NULL);
-
- class_decref(obd, "setup", obd);
- obd->obd_set_up = 0;
-
- return 0;
-}
-
-struct obd_device *class_incref(struct obd_device *obd,
- const char *scope, const void *source)
-{
- lu_ref_add_atomic(&obd->obd_reference, scope, source);
- atomic_inc(&obd->obd_refcount);
- CDEBUG(D_INFO, "incref %s (%p) now %d\n", obd->obd_name, obd,
- atomic_read(&obd->obd_refcount));
-
- return obd;
-}
-EXPORT_SYMBOL(class_incref);
-
-void class_decref(struct obd_device *obd, const char *scope, const void *source)
-{
- int err;
- int refs;
-
- spin_lock(&obd->obd_dev_lock);
- atomic_dec(&obd->obd_refcount);
- refs = atomic_read(&obd->obd_refcount);
- spin_unlock(&obd->obd_dev_lock);
- lu_ref_del(&obd->obd_reference, scope, source);
-
- CDEBUG(D_INFO, "Decref %s (%p) now %d\n", obd->obd_name, obd, refs);
-
- if ((refs == 1) && obd->obd_stopping) {
- /* All exports have been destroyed; there should
- * be no more in-progress ops by this point.
- */
-
- spin_lock(&obd->obd_self_export->exp_lock);
- obd->obd_self_export->exp_flags |= exp_flags_from_obd(obd);
- spin_unlock(&obd->obd_self_export->exp_lock);
-
- /* note that we'll recurse into class_decref again */
- class_unlink_export(obd->obd_self_export);
- return;
- }
-
- if (refs == 0) {
- CDEBUG(D_CONFIG, "finishing cleanup of obd %s (%s)\n",
- obd->obd_name, obd->obd_uuid.uuid);
- LASSERT(!obd->obd_attached);
- if (obd->obd_stopping) {
- /* If we're not stopping, we were never set up */
- err = obd_cleanup(obd);
- if (err)
- CERROR("Cleanup %s returned %d\n",
- obd->obd_name, err);
- }
- class_release_dev(obd);
- }
-}
-EXPORT_SYMBOL(class_decref);
-
-/** Add a failover nid location.
- * Client obd types contact server obd types using this nid list.
- */
-static int class_add_conn(struct obd_device *obd, struct lustre_cfg *lcfg)
-{
- struct obd_import *imp;
- struct obd_uuid uuid;
- int rc;
-
- if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1 ||
- LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof(struct obd_uuid)) {
- CERROR("invalid conn_uuid\n");
- return -EINVAL;
- }
- if (strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) &&
- strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) &&
- strcmp(obd->obd_type->typ_name, LUSTRE_OSP_NAME) &&
- strcmp(obd->obd_type->typ_name, LUSTRE_LWP_NAME) &&
- strcmp(obd->obd_type->typ_name, LUSTRE_MGC_NAME)) {
- CERROR("can't add connection on non-client dev\n");
- return -EINVAL;
- }
-
- imp = obd->u.cli.cl_import;
- if (!imp) {
- CERROR("try to add conn on immature client dev\n");
- return -EINVAL;
- }
-
- obd_str2uuid(&uuid, lustre_cfg_string(lcfg, 1));
- rc = obd_add_conn(imp, &uuid, lcfg->lcfg_num);
-
- return rc;
-}
-
-/** Remove a failover nid location.
- */
-static int class_del_conn(struct obd_device *obd, struct lustre_cfg *lcfg)
-{
- struct obd_import *imp;
- struct obd_uuid uuid;
- int rc;
-
- if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1 ||
- LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof(struct obd_uuid)) {
- CERROR("invalid conn_uuid\n");
- return -EINVAL;
- }
- if (strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) &&
- strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME)) {
- CERROR("can't del connection on non-client dev\n");
- return -EINVAL;
- }
-
- imp = obd->u.cli.cl_import;
- if (!imp) {
- CERROR("try to del conn on immature client dev\n");
- return -EINVAL;
- }
-
- obd_str2uuid(&uuid, lustre_cfg_string(lcfg, 1));
- rc = obd_del_conn(imp, &uuid);
-
- return rc;
-}
-
-static LIST_HEAD(lustre_profile_list);
-static DEFINE_SPINLOCK(lustre_profile_list_lock);
-
-struct lustre_profile *class_get_profile(const char *prof)
-{
- struct lustre_profile *lprof;
-
- spin_lock(&lustre_profile_list_lock);
- list_for_each_entry(lprof, &lustre_profile_list, lp_list) {
- if (!strcmp(lprof->lp_profile, prof)) {
- lprof->lp_refs++;
- spin_unlock(&lustre_profile_list_lock);
- return lprof;
- }
- }
- spin_unlock(&lustre_profile_list_lock);
- return NULL;
-}
-EXPORT_SYMBOL(class_get_profile);
-
-/** Create a named "profile".
- * This defines the mdc and osc names to use for a client.
- * This also is used to define the lov to be used by a mdt.
- */
-static int class_add_profile(int proflen, char *prof, int osclen, char *osc,
- int mdclen, char *mdc)
-{
- struct lustre_profile *lprof;
- int err = 0;
-
- CDEBUG(D_CONFIG, "Add profile %s\n", prof);
-
- lprof = kzalloc(sizeof(*lprof), GFP_NOFS);
- if (!lprof)
- return -ENOMEM;
- INIT_LIST_HEAD(&lprof->lp_list);
-
- LASSERT(proflen == (strlen(prof) + 1));
- lprof->lp_profile = kmemdup(prof, proflen, GFP_NOFS);
- if (!lprof->lp_profile) {
- err = -ENOMEM;
- goto free_lprof;
- }
-
- LASSERT(osclen == (strlen(osc) + 1));
- lprof->lp_dt = kmemdup(osc, osclen, GFP_NOFS);
- if (!lprof->lp_dt) {
- err = -ENOMEM;
- goto free_lp_profile;
- }
-
- if (mdclen > 0) {
- LASSERT(mdclen == (strlen(mdc) + 1));
- lprof->lp_md = kmemdup(mdc, mdclen, GFP_NOFS);
- if (!lprof->lp_md) {
- err = -ENOMEM;
- goto free_lp_dt;
- }
- }
-
- spin_lock(&lustre_profile_list_lock);
- lprof->lp_refs = 1;
- lprof->lp_list_deleted = false;
- list_add(&lprof->lp_list, &lustre_profile_list);
- spin_unlock(&lustre_profile_list_lock);
- return err;
-
-free_lp_dt:
- kfree(lprof->lp_dt);
-free_lp_profile:
- kfree(lprof->lp_profile);
-free_lprof:
- kfree(lprof);
- return err;
-}
-
-void class_del_profile(const char *prof)
-{
- struct lustre_profile *lprof;
-
- CDEBUG(D_CONFIG, "Del profile %s\n", prof);
-
- lprof = class_get_profile(prof);
- if (lprof) {
- spin_lock(&lustre_profile_list_lock);
- /* because get profile increments the ref counter */
- lprof->lp_refs--;
- list_del(&lprof->lp_list);
- lprof->lp_list_deleted = true;
- spin_unlock(&lustre_profile_list_lock);
-
- class_put_profile(lprof);
- }
-}
-EXPORT_SYMBOL(class_del_profile);
-
-void class_put_profile(struct lustre_profile *lprof)
-{
- spin_lock(&lustre_profile_list_lock);
- if (--lprof->lp_refs > 0) {
- LASSERT(lprof->lp_refs > 0);
- spin_unlock(&lustre_profile_list_lock);
- return;
- }
- spin_unlock(&lustre_profile_list_lock);
-
- /* confirm not a negative number */
- LASSERT(!lprof->lp_refs);
-
- /*
- * At least one class_del_profile/profiles must be called
- * on the target profile or lustre_profile_list will corrupt
- */
- LASSERT(lprof->lp_list_deleted);
- kfree(lprof->lp_profile);
- kfree(lprof->lp_dt);
- kfree(lprof->lp_md);
- kfree(lprof);
-}
-EXPORT_SYMBOL(class_put_profile);
-
-/* COMPAT_146 */
-void class_del_profiles(void)
-{
- struct lustre_profile *lprof, *n;
-
- spin_lock(&lustre_profile_list_lock);
- list_for_each_entry_safe(lprof, n, &lustre_profile_list, lp_list) {
- list_del(&lprof->lp_list);
- lprof->lp_list_deleted = true;
- spin_unlock(&lustre_profile_list_lock);
-
- class_put_profile(lprof);
-
- spin_lock(&lustre_profile_list_lock);
- }
- spin_unlock(&lustre_profile_list_lock);
-}
-EXPORT_SYMBOL(class_del_profiles);
-
-static int class_set_global(char *ptr, int val, struct lustre_cfg *lcfg)
-{
- if (class_match_param(ptr, PARAM_AT_MIN, NULL) == 0)
- at_min = val;
- else if (class_match_param(ptr, PARAM_AT_MAX, NULL) == 0)
- at_max = val;
- else if (class_match_param(ptr, PARAM_AT_EXTRA, NULL) == 0)
- at_extra = val;
- else if (class_match_param(ptr, PARAM_AT_EARLY_MARGIN, NULL) == 0)
- at_early_margin = val;
- else if (class_match_param(ptr, PARAM_AT_HISTORY, NULL) == 0)
- at_history = val;
- else if (class_match_param(ptr, PARAM_JOBID_VAR, NULL) == 0)
- strlcpy(obd_jobid_var, lustre_cfg_string(lcfg, 2),
- JOBSTATS_JOBID_VAR_MAX_LEN + 1);
- else
- return -EINVAL;
-
- CDEBUG(D_IOCTL, "global %s = %d\n", ptr, val);
- return 0;
-}
-
-/* We can't call ll_process_config or lquota_process_config directly because
- * it lives in a module that must be loaded after this one.
- */
-static int (*client_process_config)(struct lustre_cfg *lcfg);
-static int (*quota_process_config)(struct lustre_cfg *lcfg);
-
-void lustre_register_client_process_config(int (*cpc)(struct lustre_cfg *lcfg))
-{
- client_process_config = cpc;
-}
-EXPORT_SYMBOL(lustre_register_client_process_config);
-
-static int process_param2_config(struct lustre_cfg *lcfg)
-{
- char *param = lustre_cfg_string(lcfg, 1);
- char *upcall = lustre_cfg_string(lcfg, 2);
- char *argv[] = {
- [0] = "/usr/sbin/lctl",
- [1] = "set_param",
- [2] = param,
- [3] = NULL
- };
- ktime_t start;
- ktime_t end;
- int rc;
-
- /* Add upcall processing here. Now only lctl is supported */
- if (strcmp(upcall, LCTL_UPCALL) != 0) {
- CERROR("Unsupported upcall %s\n", upcall);
- return -EINVAL;
- }
-
- start = ktime_get();
- rc = call_usermodehelper(argv[0], argv, NULL, UMH_WAIT_PROC);
- end = ktime_get();
-
- if (rc < 0) {
- CERROR(
- "lctl: error invoking upcall %s %s %s: rc = %d; time %ldus\n",
- argv[0], argv[1], argv[2], rc,
- (long)ktime_us_delta(end, start));
- } else {
- CDEBUG(D_HA, "lctl: invoked upcall %s %s %s, time %ldus\n",
- argv[0], argv[1], argv[2],
- (long)ktime_us_delta(end, start));
- rc = 0;
- }
-
- return rc;
-}
-
-/** Process configuration commands given in lustre_cfg form.
- * These may come from direct calls (e.g. class_manual_cleanup)
- * or processing the config llog, or ioctl from lctl.
- */
-int class_process_config(struct lustre_cfg *lcfg)
-{
- struct obd_device *obd;
- int err;
-
- LASSERT(lcfg && !IS_ERR(lcfg));
- CDEBUG(D_IOCTL, "processing cmd: %x\n", lcfg->lcfg_command);
-
- /* Commands that don't need a device */
- switch (lcfg->lcfg_command) {
- case LCFG_ATTACH: {
- err = class_attach(lcfg);
- goto out;
- }
- case LCFG_ADD_UUID: {
- CDEBUG(D_IOCTL, "adding mapping from uuid %s to nid %#llx (%s)\n",
- lustre_cfg_string(lcfg, 1), lcfg->lcfg_nid,
- libcfs_nid2str(lcfg->lcfg_nid));
-
- err = class_add_uuid(lustre_cfg_string(lcfg, 1), lcfg->lcfg_nid);
- goto out;
- }
- case LCFG_DEL_UUID: {
- CDEBUG(D_IOCTL, "removing mappings for uuid %s\n",
- (lcfg->lcfg_bufcount < 2 || LUSTRE_CFG_BUFLEN(lcfg, 1) == 0)
- ? "<all uuids>" : lustre_cfg_string(lcfg, 1));
-
- err = class_del_uuid(lustre_cfg_string(lcfg, 1));
- goto out;
- }
- case LCFG_MOUNTOPT: {
- CDEBUG(D_IOCTL, "mountopt: profile %s osc %s mdc %s\n",
- lustre_cfg_string(lcfg, 1),
- lustre_cfg_string(lcfg, 2),
- lustre_cfg_string(lcfg, 3));
- /* set these mount options somewhere, so ll_fill_super
- * can find them.
- */
- err = class_add_profile(LUSTRE_CFG_BUFLEN(lcfg, 1),
- lustre_cfg_string(lcfg, 1),
- LUSTRE_CFG_BUFLEN(lcfg, 2),
- lustre_cfg_string(lcfg, 2),
- LUSTRE_CFG_BUFLEN(lcfg, 3),
- lustre_cfg_string(lcfg, 3));
- goto out;
- }
- case LCFG_DEL_MOUNTOPT: {
- CDEBUG(D_IOCTL, "mountopt: profile %s\n",
- lustre_cfg_string(lcfg, 1));
- class_del_profile(lustre_cfg_string(lcfg, 1));
- err = 0;
- goto out;
- }
- case LCFG_SET_TIMEOUT: {
- CDEBUG(D_IOCTL, "changing lustre timeout from %d to %d\n",
- obd_timeout, lcfg->lcfg_num);
- obd_timeout = max(lcfg->lcfg_num, 1U);
- obd_timeout_set = 1;
- err = 0;
- goto out;
- }
- case LCFG_SET_LDLM_TIMEOUT: {
- /* ldlm_timeout is not used on the client */
- err = 0;
- goto out;
- }
- case LCFG_SET_UPCALL: {
- LCONSOLE_ERROR_MSG(0x15a, "recovery upcall is deprecated\n");
- /* COMPAT_146 Don't fail on old configs */
- err = 0;
- goto out;
- }
- case LCFG_MARKER: {
- struct cfg_marker *marker;
-
- marker = lustre_cfg_buf(lcfg, 1);
- CDEBUG(D_IOCTL, "marker %d (%#x) %.16s %s\n", marker->cm_step,
- marker->cm_flags, marker->cm_tgtname, marker->cm_comment);
- err = 0;
- goto out;
- }
- case LCFG_PARAM: {
- char *tmp;
- /* llite has no obd */
- if ((class_match_param(lustre_cfg_string(lcfg, 1),
- PARAM_LLITE, NULL) == 0) &&
- client_process_config) {
- err = (*client_process_config)(lcfg);
- goto out;
- } else if ((class_match_param(lustre_cfg_string(lcfg, 1),
- PARAM_SYS, &tmp) == 0)) {
- /* Global param settings */
- err = class_set_global(tmp, lcfg->lcfg_num, lcfg);
- /*
- * Client or server should not fail to mount if
- * it hits an unknown configuration parameter.
- */
- if (err != 0)
- CWARN("Ignoring unknown param %s\n", tmp);
-
- err = 0;
- goto out;
- } else if ((class_match_param(lustre_cfg_string(lcfg, 1),
- PARAM_QUOTA, &tmp) == 0) &&
- quota_process_config) {
- err = (*quota_process_config)(lcfg);
- goto out;
- }
-
- break;
- }
- case LCFG_SET_PARAM: {
- err = process_param2_config(lcfg);
- goto out;
- }
- }
- /* Commands that require a device */
- obd = class_name2obd(lustre_cfg_string(lcfg, 0));
- if (!obd) {
- if (!LUSTRE_CFG_BUFLEN(lcfg, 0))
- CERROR("this lcfg command requires a device name\n");
- else
- CERROR("no device for: %s\n",
- lustre_cfg_string(lcfg, 0));
-
- err = -EINVAL;
- goto out;
- }
-
- switch (lcfg->lcfg_command) {
- case LCFG_SETUP: {
- err = class_setup(obd, lcfg);
- goto out;
- }
- case LCFG_DETACH: {
- err = class_detach(obd, lcfg);
- err = 0;
- goto out;
- }
- case LCFG_CLEANUP: {
- err = class_cleanup(obd, lcfg);
- err = 0;
- goto out;
- }
- case LCFG_ADD_CONN: {
- err = class_add_conn(obd, lcfg);
- err = 0;
- goto out;
- }
- case LCFG_DEL_CONN: {
- err = class_del_conn(obd, lcfg);
- err = 0;
- goto out;
- }
- case LCFG_POOL_NEW: {
- err = obd_pool_new(obd, lustre_cfg_string(lcfg, 2));
- err = 0;
- goto out;
- }
- case LCFG_POOL_ADD: {
- err = obd_pool_add(obd, lustre_cfg_string(lcfg, 2),
- lustre_cfg_string(lcfg, 3));
- err = 0;
- goto out;
- }
- case LCFG_POOL_REM: {
- err = obd_pool_rem(obd, lustre_cfg_string(lcfg, 2),
- lustre_cfg_string(lcfg, 3));
- err = 0;
- goto out;
- }
- case LCFG_POOL_DEL: {
- err = obd_pool_del(obd, lustre_cfg_string(lcfg, 2));
- err = 0;
- goto out;
- }
- default: {
- err = obd_process_config(obd, sizeof(*lcfg), lcfg);
- goto out;
- }
- }
-out:
- if ((err < 0) && !(lcfg->lcfg_command & LCFG_REQUIRED)) {
- CWARN("Ignoring error %d on optional command %#x\n", err,
- lcfg->lcfg_command);
- err = 0;
- }
- return err;
-}
-EXPORT_SYMBOL(class_process_config);
-
-int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
- struct lustre_cfg *lcfg, void *data)
-{
- struct lprocfs_vars *var;
- struct file fakefile;
- struct seq_file fake_seqfile;
- char *key, *sval;
- int i, keylen, vallen;
- int matched = 0, j = 0;
- int rc = 0;
- int skip = 0;
-
- if (lcfg->lcfg_command != LCFG_PARAM) {
- CERROR("Unknown command: %d\n", lcfg->lcfg_command);
- return -EINVAL;
- }
-
- /* fake a seq file so that var->fops->write can work... */
- fakefile.private_data = &fake_seqfile;
- fake_seqfile.private = data;
- /* e.g. tunefs.lustre --param mdt.group_upcall=foo /r/tmp/lustre-mdt
- * or lctl conf_param lustre-MDT0000.mdt.group_upcall=bar
- * or lctl conf_param lustre-OST0000.osc.max_dirty_mb=36
- */
- for (i = 1; i < lcfg->lcfg_bufcount; i++) {
- key = lustre_cfg_buf(lcfg, i);
- /* Strip off prefix */
- if (class_match_param(key, prefix, &key)) {
- /*
- * If the prefix doesn't match, return error so we
- * can pass it down the stack
- */
- return -ENOSYS;
- }
- sval = strchr(key, '=');
- if (!sval || (*(sval + 1) == 0)) {
- CERROR("Can't parse param %s (missing '=')\n", key);
- /* rc = -EINVAL; continue parsing other params */
- continue;
- }
- keylen = sval - key;
- sval++;
- vallen = strlen(sval);
- matched = 0;
- j = 0;
- /* Search proc entries */
- while (lvars[j].name) {
- var = &lvars[j];
- if (!class_match_param(key, var->name, NULL) &&
- keylen == strlen(var->name)) {
- matched++;
- rc = -EROFS;
- if (var->fops && var->fops->write) {
- mm_segment_t oldfs;
-
- oldfs = get_fs();
- set_fs(KERNEL_DS);
- rc = var->fops->write(&fakefile,
- (const char __user *)sval,
- vallen, NULL);
- set_fs(oldfs);
- }
- break;
- }
- j++;
- }
- if (!matched) {
- CERROR("%.*s: %s unknown param %s\n",
- (int)strlen(prefix) - 1, prefix,
- (char *)lustre_cfg_string(lcfg, 0), key);
- /* rc = -EINVAL; continue parsing other params */
- skip++;
- } else if (rc < 0) {
- CERROR("%s: error writing proc entry '%s': rc = %d\n",
- prefix, var->name, rc);
- rc = 0;
- } else {
- CDEBUG(D_CONFIG, "%s.%.*s: Set parameter %.*s=%s\n",
- lustre_cfg_string(lcfg, 0),
- (int)strlen(prefix) - 1, prefix,
- (int)(sval - key - 1), key, sval);
- }
- }
-
- if (rc > 0)
- rc = 0;
- if (!rc && skip)
- rc = skip;
- return rc;
-}
-EXPORT_SYMBOL(class_process_proc_param);
-
-/** Parse a configuration llog, doing various manipulations on them
- * for various reasons, (modifications for compatibility, skip obsolete
- * records, change uuids, etc), then class_process_config() resulting
- * net records.
- */
-int class_config_llog_handler(const struct lu_env *env,
- struct llog_handle *handle,
- struct llog_rec_hdr *rec, void *data)
-{
- struct config_llog_instance *clli = data;
- int cfg_len = rec->lrh_len;
- char *cfg_buf = (char *)(rec + 1);
- int rc = 0;
-
- switch (rec->lrh_type) {
- case OBD_CFG_REC: {
- struct lustre_cfg *lcfg, *lcfg_new;
- struct lustre_cfg_bufs bufs;
- char *inst_name = NULL;
- int inst_len = 0;
- size_t lcfg_len;
- int swab = 0;
-
- lcfg = (struct lustre_cfg *)cfg_buf;
- if (lcfg->lcfg_version == __swab32(LUSTRE_CFG_VERSION)) {
- lustre_swab_lustre_cfg(lcfg);
- swab = 1;
- }
-
- rc = lustre_cfg_sanity_check(cfg_buf, cfg_len);
- if (rc)
- goto out;
-
- /* Figure out config state info */
- if (lcfg->lcfg_command == LCFG_MARKER) {
- struct cfg_marker *marker = lustre_cfg_buf(lcfg, 1);
-
- lustre_swab_cfg_marker(marker, swab,
- LUSTRE_CFG_BUFLEN(lcfg, 1));
- CDEBUG(D_CONFIG, "Marker, inst_flg=%#x mark_flg=%#x\n",
- clli->cfg_flags, marker->cm_flags);
- if (marker->cm_flags & CM_START) {
- /* all previous flags off */
- clli->cfg_flags = CFG_F_MARKER;
- if (marker->cm_flags & CM_SKIP) {
- clli->cfg_flags |= CFG_F_SKIP;
- CDEBUG(D_CONFIG, "SKIP #%d\n",
- marker->cm_step);
- } else if ((marker->cm_flags & CM_EXCLUDE) ||
- (clli->cfg_sb &&
- lustre_check_exclusion(clli->cfg_sb,
- marker->cm_tgtname))) {
- clli->cfg_flags |= CFG_F_EXCLUDE;
- CDEBUG(D_CONFIG, "EXCLUDE %d\n",
- marker->cm_step);
- }
- } else if (marker->cm_flags & CM_END) {
- clli->cfg_flags = 0;
- }
- }
- /* A config command without a start marker before it is
- * illegal (post 146)
- */
- if (!(clli->cfg_flags & CFG_F_COMPAT146) &&
- !(clli->cfg_flags & CFG_F_MARKER) &&
- (lcfg->lcfg_command != LCFG_MARKER)) {
- CWARN("Config not inside markers, ignoring! (inst: %p, uuid: %s, flags: %#x)\n",
- clli->cfg_instance,
- clli->cfg_uuid.uuid, clli->cfg_flags);
- clli->cfg_flags |= CFG_F_SKIP;
- }
- if (clli->cfg_flags & CFG_F_SKIP) {
- CDEBUG(D_CONFIG, "skipping %#x\n",
- clli->cfg_flags);
- rc = 0;
- /* No processing! */
- break;
- }
-
- /*
- * For interoperability between 1.8 and 2.0,
- * rename "mds" obd device type to "mdt".
- */
- {
- char *typename = lustre_cfg_string(lcfg, 1);
- char *index = lustre_cfg_string(lcfg, 2);
-
- if ((lcfg->lcfg_command == LCFG_ATTACH && typename &&
- strcmp(typename, "mds") == 0)) {
- CWARN("For 1.8 interoperability, rename obd type from mds to mdt\n");
- typename[2] = 't';
- }
- if ((lcfg->lcfg_command == LCFG_SETUP && index &&
- strcmp(index, "type") == 0)) {
- CDEBUG(D_INFO, "For 1.8 interoperability, set this index to '0'\n");
- index[0] = '0';
- index[1] = 0;
- }
- }
-
- if (clli->cfg_flags & CFG_F_EXCLUDE) {
- CDEBUG(D_CONFIG, "cmd: %x marked EXCLUDED\n",
- lcfg->lcfg_command);
- if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD)
- /* Add inactive instead */
- lcfg->lcfg_command = LCFG_LOV_ADD_INA;
- }
-
- lustre_cfg_bufs_init(&bufs, lcfg);
-
- if (clli && clli->cfg_instance &&
- LUSTRE_CFG_BUFLEN(lcfg, 0) > 0) {
- inst_len = LUSTRE_CFG_BUFLEN(lcfg, 0) +
- sizeof(clli->cfg_instance) * 2 + 4;
- inst_name = kasprintf(GFP_NOFS, "%s-%p",
- lustre_cfg_string(lcfg, 0),
- clli->cfg_instance);
- if (!inst_name) {
- rc = -ENOMEM;
- goto out;
- }
- lustre_cfg_bufs_set_string(&bufs, 0, inst_name);
- CDEBUG(D_CONFIG, "cmd %x, instance name: %s\n",
- lcfg->lcfg_command, inst_name);
- }
-
- /* we override the llog's uuid for clients, to insure they
- * are unique
- */
- if (clli && clli->cfg_instance &&
- lcfg->lcfg_command == LCFG_ATTACH) {
- lustre_cfg_bufs_set_string(&bufs, 2,
- clli->cfg_uuid.uuid);
- }
- /*
- * sptlrpc config record, we expect 2 data segments:
- * [0]: fs_name/target_name,
- * [1]: rule string
- * moving them to index [1] and [2], and insert MGC's
- * obdname at index [0].
- */
- if (clli && !clli->cfg_instance &&
- lcfg->lcfg_command == LCFG_SPTLRPC_CONF) {
- lustre_cfg_bufs_set(&bufs, 2, bufs.lcfg_buf[1],
- bufs.lcfg_buflen[1]);
- lustre_cfg_bufs_set(&bufs, 1, bufs.lcfg_buf[0],
- bufs.lcfg_buflen[0]);
- lustre_cfg_bufs_set_string(&bufs, 0,
- clli->cfg_obdname);
- }
-
- lcfg_len = lustre_cfg_len(bufs.lcfg_bufcount, bufs.lcfg_buflen);
- lcfg_new = kzalloc(lcfg_len, GFP_NOFS);
- if (!lcfg_new) {
- rc = -ENOMEM;
- goto out;
- }
-
- lustre_cfg_init(lcfg_new, lcfg->lcfg_command, &bufs);
- lcfg_new->lcfg_num = lcfg->lcfg_num;
- lcfg_new->lcfg_flags = lcfg->lcfg_flags;
-
- /* XXX Hack to try to remain binary compatible with
- * pre-newconfig logs
- */
- if (lcfg->lcfg_nal != 0 && /* pre-newconfig log? */
- (lcfg->lcfg_nid >> 32) == 0) {
- __u32 addr = (__u32)(lcfg->lcfg_nid & 0xffffffff);
-
- lcfg_new->lcfg_nid =
- LNET_MKNID(LNET_MKNET(lcfg->lcfg_nal, 0), addr);
- CWARN("Converted pre-newconfig NAL %d NID %x to %s\n",
- lcfg->lcfg_nal, addr,
- libcfs_nid2str(lcfg_new->lcfg_nid));
- } else {
- lcfg_new->lcfg_nid = lcfg->lcfg_nid;
- }
-
- lcfg_new->lcfg_nal = 0; /* illegal value for obsolete field */
-
- rc = class_process_config(lcfg_new);
- kfree(lcfg_new);
- kfree(inst_name);
- break;
- }
- default:
- CERROR("Unknown llog record type %#x encountered\n",
- rec->lrh_type);
- break;
- }
-out:
- if (rc) {
- CERROR("%s: cfg command failed: rc = %d\n",
- handle->lgh_ctxt->loc_obd->obd_name, rc);
- class_config_dump_handler(NULL, handle, rec, data);
- }
- return rc;
-}
-EXPORT_SYMBOL(class_config_llog_handler);
-
-int class_config_parse_llog(const struct lu_env *env, struct llog_ctxt *ctxt,
- char *name, struct config_llog_instance *cfg)
-{
- struct llog_process_cat_data cd = {0, 0};
- struct llog_handle *llh;
- llog_cb_t callback;
- int rc;
-
- CDEBUG(D_INFO, "looking up llog %s\n", name);
- rc = llog_open(env, ctxt, &llh, NULL, name, LLOG_OPEN_EXISTS);
- if (rc)
- return rc;
-
- rc = llog_init_handle(env, llh, LLOG_F_IS_PLAIN, NULL);
- if (rc)
- goto parse_out;
-
- /* continue processing from where we last stopped to end-of-log */
- if (cfg) {
- cd.lpcd_first_idx = cfg->cfg_last_idx;
- callback = cfg->cfg_callback;
- LASSERT(callback);
- } else {
- callback = class_config_llog_handler;
- }
-
- cd.lpcd_last_idx = 0;
-
- rc = llog_process(env, llh, callback, cfg, &cd);
-
- CDEBUG(D_CONFIG, "Processed log %s gen %d-%d (rc=%d)\n", name,
- cd.lpcd_first_idx + 1, cd.lpcd_last_idx, rc);
- if (cfg)
- cfg->cfg_last_idx = cd.lpcd_last_idx;
-
-parse_out:
- llog_close(env, llh);
- return rc;
-}
-EXPORT_SYMBOL(class_config_parse_llog);
-
-/**
- * parse config record and output dump in supplied buffer.
- * This is separated from class_config_dump_handler() to use
- * for ioctl needs as well
- */
-static int class_config_parse_rec(struct llog_rec_hdr *rec, char *buf,
- int size)
-{
- struct lustre_cfg *lcfg = (struct lustre_cfg *)(rec + 1);
- char *ptr = buf;
- char *end = buf + size;
- int rc = 0;
-
- LASSERT(rec->lrh_type == OBD_CFG_REC);
- rc = lustre_cfg_sanity_check(lcfg, rec->lrh_len);
- if (rc < 0)
- return rc;
-
- ptr += snprintf(ptr, end - ptr, "cmd=%05x ", lcfg->lcfg_command);
- if (lcfg->lcfg_flags)
- ptr += snprintf(ptr, end - ptr, "flags=%#08x ",
- lcfg->lcfg_flags);
-
- if (lcfg->lcfg_num)
- ptr += snprintf(ptr, end - ptr, "num=%#08x ", lcfg->lcfg_num);
-
- if (lcfg->lcfg_nid) {
- char nidstr[LNET_NIDSTR_SIZE];
-
- libcfs_nid2str_r(lcfg->lcfg_nid, nidstr, sizeof(nidstr));
- ptr += snprintf(ptr, end - ptr, "nid=%s(%#llx)\n ",
- nidstr, lcfg->lcfg_nid);
- }
-
- if (lcfg->lcfg_command == LCFG_MARKER) {
- struct cfg_marker *marker = lustre_cfg_buf(lcfg, 1);
-
- ptr += snprintf(ptr, end - ptr, "marker=%d(%#x)%s '%s'",
- marker->cm_step, marker->cm_flags,
- marker->cm_tgtname, marker->cm_comment);
- } else {
- int i;
-
- for (i = 0; i < lcfg->lcfg_bufcount; i++) {
- ptr += snprintf(ptr, end - ptr, "%d:%s ", i,
- lustre_cfg_string(lcfg, i));
- }
- }
- ptr += snprintf(ptr, end - ptr, "\n");
- /* return consumed bytes */
- rc = ptr - buf;
- return rc;
-}
-
-int class_config_dump_handler(const struct lu_env *env,
- struct llog_handle *handle,
- struct llog_rec_hdr *rec, void *data)
-{
- char *outstr;
- int rc = 0;
-
- outstr = kzalloc(256, GFP_NOFS);
- if (!outstr)
- return -ENOMEM;
-
- if (rec->lrh_type == OBD_CFG_REC) {
- class_config_parse_rec(rec, outstr, 256);
- LCONSOLE(D_WARNING, " %s", outstr);
- } else {
- LCONSOLE(D_WARNING, "unhandled lrh_type: %#x\n", rec->lrh_type);
- rc = -EINVAL;
- }
-
- kfree(outstr);
- return rc;
-}
-
-/** Call class_cleanup and class_detach.
- * "Manual" only in the sense that we're faking lcfg commands.
- */
-int class_manual_cleanup(struct obd_device *obd)
-{
- char flags[3] = "";
- struct lustre_cfg *lcfg;
- struct lustre_cfg_bufs bufs;
- int rc;
-
- if (!obd) {
- CERROR("empty cleanup\n");
- return -EALREADY;
- }
-
- if (obd->obd_force)
- strcat(flags, "F");
- if (obd->obd_fail)
- strcat(flags, "A");
-
- CDEBUG(D_CONFIG, "Manual cleanup of %s (flags='%s')\n",
- obd->obd_name, flags);
-
- lustre_cfg_bufs_reset(&bufs, obd->obd_name);
- lustre_cfg_bufs_set_string(&bufs, 1, flags);
- lcfg = kzalloc(lustre_cfg_len(bufs.lcfg_bufcount, bufs.lcfg_buflen),
- GFP_NOFS);
- if (!lcfg)
- return -ENOMEM;
- lustre_cfg_init(lcfg, LCFG_CLEANUP, &bufs);
-
- rc = class_process_config(lcfg);
- if (rc) {
- CERROR("cleanup failed %d: %s\n", rc, obd->obd_name);
- goto out;
- }
-
- /* the lcfg is almost the same for both ops */
- lcfg->lcfg_command = LCFG_DETACH;
- rc = class_process_config(lcfg);
- if (rc)
- CERROR("detach failed %d: %s\n", rc, obd->obd_name);
-out:
- kfree(lcfg);
- return rc;
-}
-EXPORT_SYMBOL(class_manual_cleanup);
diff --git a/drivers/staging/lustre/lustre/obdclass/obd_mount.c b/drivers/staging/lustre/lustre/obdclass/obd_mount.c
deleted file mode 100644
index 06c38fdef7ba..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/obd_mount.c
+++ /dev/null
@@ -1,1245 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/obdclass/obd_mount.c
- *
- * Client mount routines
- *
- * Author: Nathan Rutman <nathan@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_CLASS
-#define D_MOUNT (D_SUPER | D_CONFIG/*|D_WARNING */)
-#define PRINT_CMD CDEBUG
-
-#include <linux/random.h>
-#include <obd.h>
-#include <lustre_compat.h>
-#include <obd_class.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-#include <lustre_log.h>
-#include <lustre_disk.h>
-#include <uapi/linux/lustre/lustre_param.h>
-
-static DEFINE_SPINLOCK(client_lock);
-static struct module *client_mod;
-static int (*client_fill_super)(struct super_block *sb);
-static void (*kill_super_cb)(struct super_block *sb);
-
-/**************** config llog ********************/
-
-/** Get a config log from the MGS and process it.
- * This func is called for both clients and servers.
- * Continue to process new statements appended to the logs
- * (whenever the config lock is revoked) until lustre_end_log
- * is called.
- * @param sb The superblock is used by the MGC to write to the local copy of
- * the config log
- * @param logname The name of the llog to replicate from the MGS
- * @param cfg Since the same mgc may be used to follow multiple config logs
- * (e.g. ost1, ost2, client), the config_llog_instance keeps the state for
- * this log, and is added to the mgc's list of logs to follow.
- */
-int lustre_process_log(struct super_block *sb, char *logname,
- struct config_llog_instance *cfg)
-{
- struct lustre_cfg *lcfg;
- struct lustre_cfg_bufs *bufs;
- struct lustre_sb_info *lsi = s2lsi(sb);
- struct obd_device *mgc = lsi->lsi_mgc;
- int rc;
-
- LASSERT(mgc);
- LASSERT(cfg);
-
- bufs = kzalloc(sizeof(*bufs), GFP_NOFS);
- if (!bufs)
- return -ENOMEM;
-
- /* mgc_process_config */
- lustre_cfg_bufs_reset(bufs, mgc->obd_name);
- lustre_cfg_bufs_set_string(bufs, 1, logname);
- lustre_cfg_bufs_set(bufs, 2, cfg, sizeof(*cfg));
- lustre_cfg_bufs_set(bufs, 3, &sb, sizeof(sb));
- lcfg = kzalloc(lustre_cfg_len(bufs->lcfg_bufcount, bufs->lcfg_buflen),
- GFP_NOFS);
- if (!lcfg) {
- rc = -ENOMEM;
- goto out;
- }
- lustre_cfg_init(lcfg, LCFG_LOG_START, bufs);
-
- rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
- kfree(lcfg);
-out:
- kfree(bufs);
-
- if (rc == -EINVAL)
- LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s' failed from the MGS (%d). Make sure this client and the MGS are running compatible versions of Lustre.\n",
- mgc->obd_name, logname, rc);
-
- else if (rc)
- LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' failed (%d). This may be the result of communication errors between this node and the MGS, a bad configuration, or other errors. See the syslog for more information.\n",
- mgc->obd_name, logname,
- rc);
-
- /* class_obd_list(); */
- return rc;
-}
-EXPORT_SYMBOL(lustre_process_log);
-
-/* Stop watching this config log for updates */
-int lustre_end_log(struct super_block *sb, char *logname,
- struct config_llog_instance *cfg)
-{
- struct lustre_cfg *lcfg;
- struct lustre_cfg_bufs bufs;
- struct lustre_sb_info *lsi = s2lsi(sb);
- struct obd_device *mgc = lsi->lsi_mgc;
- int rc;
-
- if (!mgc)
- return -ENOENT;
-
- /* mgc_process_config */
- lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
- lustre_cfg_bufs_set_string(&bufs, 1, logname);
- if (cfg)
- lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
- lcfg = kzalloc(lustre_cfg_len(bufs.lcfg_bufcount, bufs.lcfg_buflen),
- GFP_NOFS);
- if (!lcfg)
- return -ENOMEM;
- lustre_cfg_init(lcfg, LCFG_LOG_END, &bufs);
-
- rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
- kfree(lcfg);
- return rc;
-}
-EXPORT_SYMBOL(lustre_end_log);
-
-/**************** obd start *******************/
-
-/** lustre_cfg_bufs are a holdover from 1.4; we can still set these up from
- * lctl (and do for echo cli/srv.
- */
-static int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
- char *s1, char *s2, char *s3, char *s4)
-{
- struct lustre_cfg_bufs bufs;
- struct lustre_cfg *lcfg = NULL;
- int rc;
-
- CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
- cmd, s1, s2, s3, s4);
-
- lustre_cfg_bufs_reset(&bufs, cfgname);
- if (s1)
- lustre_cfg_bufs_set_string(&bufs, 1, s1);
- if (s2)
- lustre_cfg_bufs_set_string(&bufs, 2, s2);
- if (s3)
- lustre_cfg_bufs_set_string(&bufs, 3, s3);
- if (s4)
- lustre_cfg_bufs_set_string(&bufs, 4, s4);
-
- lcfg = kzalloc(lustre_cfg_len(bufs.lcfg_bufcount, bufs.lcfg_buflen),
- GFP_NOFS);
- if (!lcfg)
- return -ENOMEM;
- lustre_cfg_init(lcfg, cmd, &bufs);
- lcfg->lcfg_nid = nid;
- rc = class_process_config(lcfg);
- kfree(lcfg);
- return rc;
-}
-
-/** Call class_attach and class_setup. These methods in turn call
- * obd type-specific methods.
- */
-static int lustre_start_simple(char *obdname, char *type, char *uuid,
- char *s1, char *s2, char *s3, char *s4)
-{
- int rc;
-
- CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
-
- rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, NULL, NULL);
- if (rc) {
- CERROR("%s attach error %d\n", obdname, rc);
- return rc;
- }
- rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, s3, s4);
- if (rc) {
- CERROR("%s setup error %d\n", obdname, rc);
- do_lcfg(obdname, 0, LCFG_DETACH, NULL, NULL, NULL, NULL);
- }
- return rc;
-}
-
-static DEFINE_MUTEX(mgc_start_lock);
-
-/** Set up a mgc obd to process startup logs
- *
- * \param sb [in] super block of the mgc obd
- *
- * \retval 0 success, otherwise error code
- */
-int lustre_start_mgc(struct super_block *sb)
-{
- struct obd_connect_data *data = NULL;
- struct lustre_sb_info *lsi = s2lsi(sb);
- struct obd_device *obd;
- struct obd_export *exp;
- struct obd_uuid *uuid;
- class_uuid_t uuidc;
- lnet_nid_t nid;
- char nidstr[LNET_NIDSTR_SIZE];
- char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL;
- char *ptr;
- int rc = 0, i = 0, j;
-
- LASSERT(lsi->lsi_lmd);
-
- /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
- ptr = lsi->lsi_lmd->lmd_dev;
- if (class_parse_nid(ptr, &nid, &ptr) == 0)
- i++;
- if (i == 0) {
- CERROR("No valid MGS nids found.\n");
- return -EINVAL;
- }
-
- mutex_lock(&mgc_start_lock);
-
- libcfs_nid2str_r(nid, nidstr, sizeof(nidstr));
- mgcname = kasprintf(GFP_NOFS,
- "%s%s", LUSTRE_MGC_OBDNAME, nidstr);
- niduuid = kasprintf(GFP_NOFS, "%s_%x", mgcname, 0);
- if (!mgcname || !niduuid) {
- rc = -ENOMEM;
- goto out_free;
- }
-
- mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";
-
- data = kzalloc(sizeof(*data), GFP_NOFS);
- if (!data) {
- rc = -ENOMEM;
- goto out_free;
- }
-
- obd = class_name2obd(mgcname);
- if (obd && !obd->obd_stopping) {
- int recov_bk;
-
- rc = obd_set_info_async(NULL, obd->obd_self_export,
- strlen(KEY_MGSSEC), KEY_MGSSEC,
- strlen(mgssec), mgssec, NULL);
- if (rc)
- goto out_free;
-
- /* Re-using an existing MGC */
- atomic_inc(&obd->u.cli.cl_mgc_refcount);
-
- /* IR compatibility check, only for clients */
- if (lmd_is_client(lsi->lsi_lmd)) {
- int has_ir;
- int vallen = sizeof(*data);
- __u32 *flags = &lsi->lsi_lmd->lmd_flags;
-
- rc = obd_get_info(NULL, obd->obd_self_export,
- strlen(KEY_CONN_DATA), KEY_CONN_DATA,
- &vallen, data);
- LASSERT(rc == 0);
- has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
- if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
- /* LMD_FLG_NOIR is for test purpose only */
- LCONSOLE_WARN(
- "Trying to mount a client with IR setting not compatible with current mgc. Force to use current mgc setting that is IR %s.\n",
- has_ir ? "enabled" : "disabled");
- if (has_ir)
- *flags &= ~LMD_FLG_NOIR;
- else
- *flags |= LMD_FLG_NOIR;
- }
- }
-
- recov_bk = 0;
-
- /* Try all connections, but only once (again).
- * We don't want to block another target from starting
- * (using its local copy of the log), but we do want to connect
- * if at all possible.
- */
- recov_bk++;
- CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,
- recov_bk);
- rc = obd_set_info_async(NULL, obd->obd_self_export,
- sizeof(KEY_INIT_RECOV_BACKUP),
- KEY_INIT_RECOV_BACKUP,
- sizeof(recov_bk), &recov_bk, NULL);
- rc = 0;
- goto out;
- }
-
- CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);
-
- /* Add the primary nids for the MGS */
- i = 0;
- /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
- ptr = lsi->lsi_lmd->lmd_dev;
- while (class_parse_nid(ptr, &nid, &ptr) == 0) {
- rc = do_lcfg(mgcname, nid,
- LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
- if (!rc)
- i++;
- /* Stop at the first failover nid */
- if (*ptr == ':')
- break;
- }
- if (i == 0) {
- CERROR("No valid MGS nids found.\n");
- rc = -EINVAL;
- goto out_free;
- }
- lsi->lsi_lmd->lmd_mgs_failnodes = 1;
-
- /* Random uuid for MGC allows easier reconnects */
- uuid = kzalloc(sizeof(*uuid), GFP_NOFS);
- if (!uuid) {
- rc = -ENOMEM;
- goto out_free;
- }
-
- ll_generate_random_uuid(uuidc);
- class_uuid_unparse(uuidc, uuid);
-
- /* Start the MGC */
- rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
- (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
- niduuid, NULL, NULL);
- kfree(uuid);
- if (rc)
- goto out_free;
-
- /* Add any failover MGS nids */
- i = 1;
- while (ptr && ((*ptr == ':' ||
- class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0))) {
- /* New failover node */
- sprintf(niduuid, "%s_%x", mgcname, i);
- j = 0;
- while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) {
- rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID, niduuid,
- NULL, NULL, NULL);
- if (!rc)
- ++j;
- if (*ptr == ':')
- break;
- }
- if (j > 0) {
- rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
- niduuid, NULL, NULL, NULL);
- if (!rc)
- i++;
- } else {
- /* at ":/fsname" */
- break;
- }
- }
- lsi->lsi_lmd->lmd_mgs_failnodes = i;
-
- obd = class_name2obd(mgcname);
- if (!obd) {
- CERROR("Can't find mgcobd %s\n", mgcname);
- rc = -ENOTCONN;
- goto out_free;
- }
-
- rc = obd_set_info_async(NULL, obd->obd_self_export,
- strlen(KEY_MGSSEC), KEY_MGSSEC,
- strlen(mgssec), mgssec, NULL);
- if (rc)
- goto out_free;
-
- /* Keep a refcount of servers/clients who started with "mount",
- * so we know when we can get rid of the mgc.
- */
- atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
-
- /* We connect to the MGS at setup, and don't disconnect until cleanup */
- data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT |
- OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV |
- OBD_CONNECT_LVB_TYPE | OBD_CONNECT_BULK_MBITS;
-
-#if OBD_OCD_VERSION(3, 0, 53, 0) > LUSTRE_VERSION_CODE
- data->ocd_connect_flags |= OBD_CONNECT_MNE_SWAB;
-#endif
-
- if (lmd_is_client(lsi->lsi_lmd) &&
- lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
- data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
- data->ocd_version = LUSTRE_VERSION_CODE;
- rc = obd_connect(NULL, &exp, obd, &obd->obd_uuid, data, NULL);
- if (rc) {
- CERROR("connect failed %d\n", rc);
- goto out;
- }
-
- obd->u.cli.cl_mgc_mgsexp = exp;
-
-out:
- /* Keep the mgc info in the sb. Note that many lsi's can point
- * to the same mgc.
- */
- lsi->lsi_mgc = obd;
-out_free:
- mutex_unlock(&mgc_start_lock);
-
- kfree(data);
- kfree(mgcname);
- kfree(niduuid);
- return rc;
-}
-
-static int lustre_stop_mgc(struct super_block *sb)
-{
- struct lustre_sb_info *lsi = s2lsi(sb);
- struct obd_device *obd;
- char *niduuid = NULL, *ptr = NULL;
- int i, rc = 0, len = 0;
-
- if (!lsi)
- return -ENOENT;
- obd = lsi->lsi_mgc;
- if (!obd)
- return -ENOENT;
- lsi->lsi_mgc = NULL;
-
- mutex_lock(&mgc_start_lock);
- LASSERT(atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
- if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
- /* This is not fatal, every client that stops
- * will call in here.
- */
- CDEBUG(D_MOUNT, "mgc still has %d references.\n",
- atomic_read(&obd->u.cli.cl_mgc_refcount));
- rc = -EBUSY;
- goto out;
- }
-
- /* The MGC has no recoverable data in any case.
- * force shutdown set in umount_begin
- */
- obd->obd_no_recov = 1;
-
- if (obd->u.cli.cl_mgc_mgsexp) {
- /* An error is not fatal, if we are unable to send the
- * disconnect mgs ping evictor cleans up the export
- */
- rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
- if (rc)
- CDEBUG(D_MOUNT, "disconnect failed %d\n", rc);
- }
-
- /* Save the obdname for cleaning the nid uuids, which are obdname_XX */
- len = strlen(obd->obd_name) + 6;
- niduuid = kzalloc(len, GFP_NOFS);
- if (niduuid) {
- strcpy(niduuid, obd->obd_name);
- ptr = niduuid + strlen(niduuid);
- }
-
- rc = class_manual_cleanup(obd);
- if (rc)
- goto out;
-
- /* Clean the nid uuids */
- if (!niduuid) {
- rc = -ENOMEM;
- goto out;
- }
-
- for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
- sprintf(ptr, "_%x", i);
- rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
- niduuid, NULL, NULL, NULL);
- if (rc)
- CERROR("del MDC UUID %s failed: rc = %d\n",
- niduuid, rc);
- }
-out:
- kfree(niduuid);
-
- /* class_import_put will get rid of the additional connections */
- mutex_unlock(&mgc_start_lock);
- return rc;
-}
-
-/***************** lustre superblock **************/
-
-static struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
-{
- struct lustre_sb_info *lsi;
-
- lsi = kzalloc(sizeof(*lsi), GFP_NOFS);
- if (!lsi)
- return NULL;
- lsi->lsi_lmd = kzalloc(sizeof(*lsi->lsi_lmd), GFP_NOFS);
- if (!lsi->lsi_lmd) {
- kfree(lsi);
- return NULL;
- }
-
- lsi->lsi_lmd->lmd_exclude_count = 0;
- lsi->lsi_lmd->lmd_recovery_time_soft = 0;
- lsi->lsi_lmd->lmd_recovery_time_hard = 0;
- s2lsi_nocast(sb) = lsi;
- /* we take 1 extra ref for our setup */
- atomic_set(&lsi->lsi_mounts, 1);
-
- /* Default umount style */
- lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
-
- return lsi;
-}
-
-static int lustre_free_lsi(struct super_block *sb)
-{
- struct lustre_sb_info *lsi = s2lsi(sb);
-
- CDEBUG(D_MOUNT, "Freeing lsi %p\n", lsi);
-
- /* someone didn't call server_put_mount. */
- LASSERT(atomic_read(&lsi->lsi_mounts) == 0);
-
- if (lsi->lsi_lmd) {
- kfree(lsi->lsi_lmd->lmd_dev);
- kfree(lsi->lsi_lmd->lmd_profile);
- kfree(lsi->lsi_lmd->lmd_mgssec);
- kfree(lsi->lsi_lmd->lmd_opts);
- if (lsi->lsi_lmd->lmd_exclude_count)
- kfree(lsi->lsi_lmd->lmd_exclude);
- kfree(lsi->lsi_lmd->lmd_mgs);
- kfree(lsi->lsi_lmd->lmd_osd_type);
- kfree(lsi->lsi_lmd->lmd_params);
-
- kfree(lsi->lsi_lmd);
- }
-
- LASSERT(!lsi->lsi_llsbi);
- kfree(lsi);
- s2lsi_nocast(sb) = NULL;
-
- return 0;
-}
-
-/* The lsi has one reference for every server that is using the disk -
- * e.g. MDT, MGS, and potentially MGC
- */
-static int lustre_put_lsi(struct super_block *sb)
-{
- struct lustre_sb_info *lsi = s2lsi(sb);
-
- CDEBUG(D_MOUNT, "put %p %d\n", sb, atomic_read(&lsi->lsi_mounts));
- if (atomic_dec_and_test(&lsi->lsi_mounts)) {
- lustre_free_lsi(sb);
- return 1;
- }
- return 0;
-}
-
-/*** SERVER NAME ***
- * <FSNAME><SEPARATOR><TYPE><INDEX>
- * FSNAME is between 1 and 8 characters (inclusive).
- * Excluded characters are '/' and ':'
- * SEPARATOR is either ':' or '-'
- * TYPE: "OST", "MDT", etc.
- * INDEX: Hex representation of the index
- */
-
-/** Get the fsname ("lustre") from the server name ("lustre-OST003F").
- * @param [in] svname server name including type and index
- * @param [out] fsname Buffer to copy filesystem name prefix into.
- * Must have at least 'strlen(fsname) + 1' chars.
- * @param [out] endptr if endptr isn't NULL it is set to end of fsname
- * rc < 0 on error
- */
-static int server_name2fsname(const char *svname, char *fsname,
- const char **endptr)
-{
- const char *dash;
-
- dash = svname + strnlen(svname, 8); /* max fsname length is 8 */
- for (; dash > svname && *dash != '-' && *dash != ':'; dash--)
- ;
- if (dash == svname)
- return -EINVAL;
-
- if (fsname) {
- strncpy(fsname, svname, dash - svname);
- fsname[dash - svname] = '\0';
- }
-
- if (endptr)
- *endptr = dash;
-
- return 0;
-}
-
-/* Get the index from the obd name.
- * rc = server type, or
- * rc < 0 on error
- * if endptr isn't NULL it is set to end of name
- */
-static int server_name2index(const char *svname, __u32 *idx,
- const char **endptr)
-{
- unsigned long index;
- int rc;
- const char *dash;
-
- /* We use server_name2fsname() just for parsing */
- rc = server_name2fsname(svname, NULL, &dash);
- if (rc != 0)
- return rc;
-
- dash++;
-
- if (strncmp(dash, "MDT", 3) == 0)
- rc = LDD_F_SV_TYPE_MDT;
- else if (strncmp(dash, "OST", 3) == 0)
- rc = LDD_F_SV_TYPE_OST;
- else
- return -EINVAL;
-
- dash += 3;
-
- if (strncmp(dash, "all", 3) == 0) {
- if (endptr)
- *endptr = dash + 3;
- return rc | LDD_F_SV_ALL;
- }
-
- index = simple_strtoul(dash, (char **)endptr, 16);
- if (idx)
- *idx = index;
-
- /* Account for -mdc after index that is possible when specifying mdt */
- if (endptr && strncmp(LUSTRE_MDC_NAME, *endptr + 1,
- sizeof(LUSTRE_MDC_NAME) - 1) == 0)
- *endptr += sizeof(LUSTRE_MDC_NAME);
-
- return rc;
-}
-
-/*************** mount common between server and client ***************/
-
-/* Common umount */
-int lustre_common_put_super(struct super_block *sb)
-{
- int rc;
-
- CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
-
- /* Drop a ref to the MGC */
- rc = lustre_stop_mgc(sb);
- if (rc && (rc != -ENOENT)) {
- if (rc != -EBUSY) {
- CERROR("Can't stop MGC: %d\n", rc);
- return rc;
- }
- /* BUSY just means that there's some other obd that
- * needs the mgc. Let him clean it up.
- */
- CDEBUG(D_MOUNT, "MGC still in use\n");
- }
- /* Drop a ref to the mounted disk */
- lustre_put_lsi(sb);
- return rc;
-}
-EXPORT_SYMBOL(lustre_common_put_super);
-
-static void lmd_print(struct lustre_mount_data *lmd)
-{
- int i;
-
- PRINT_CMD(D_MOUNT, " mount data:\n");
- if (lmd_is_client(lmd))
- PRINT_CMD(D_MOUNT, "profile: %s\n", lmd->lmd_profile);
- PRINT_CMD(D_MOUNT, "device: %s\n", lmd->lmd_dev);
- PRINT_CMD(D_MOUNT, "flags: %x\n", lmd->lmd_flags);
-
- if (lmd->lmd_opts)
- PRINT_CMD(D_MOUNT, "options: %s\n", lmd->lmd_opts);
-
- if (lmd->lmd_recovery_time_soft)
- PRINT_CMD(D_MOUNT, "recovery time soft: %d\n",
- lmd->lmd_recovery_time_soft);
-
- if (lmd->lmd_recovery_time_hard)
- PRINT_CMD(D_MOUNT, "recovery time hard: %d\n",
- lmd->lmd_recovery_time_hard);
-
- for (i = 0; i < lmd->lmd_exclude_count; i++) {
- PRINT_CMD(D_MOUNT, "exclude %d: OST%04x\n", i,
- lmd->lmd_exclude[i]);
- }
-}
-
-/* Is this server on the exclusion list */
-int lustre_check_exclusion(struct super_block *sb, char *svname)
-{
- struct lustre_sb_info *lsi = s2lsi(sb);
- struct lustre_mount_data *lmd = lsi->lsi_lmd;
- __u32 index;
- int i, rc;
-
- rc = server_name2index(svname, &index, NULL);
- if (rc != LDD_F_SV_TYPE_OST)
- /* Only exclude OSTs */
- return 0;
-
- CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname,
- index, lmd->lmd_exclude_count, lmd->lmd_dev);
-
- for (i = 0; i < lmd->lmd_exclude_count; i++) {
- if (index == lmd->lmd_exclude[i]) {
- CWARN("Excluding %s (on exclusion list)\n", svname);
- return 1;
- }
- }
- return 0;
-}
-
-/* mount -v -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
-static int lmd_make_exclusion(struct lustre_mount_data *lmd, const char *ptr)
-{
- const char *s1 = ptr, *s2;
- __u32 index = 0, *exclude_list;
- int rc = 0, devmax;
-
- /* The shortest an ost name can be is 8 chars: -OST0000.
- * We don't actually know the fsname at this time, so in fact
- * a user could specify any fsname.
- */
- devmax = strlen(ptr) / 8 + 1;
-
- /* temp storage until we figure out how many we have */
- exclude_list = kcalloc(devmax, sizeof(index), GFP_NOFS);
- if (!exclude_list)
- return -ENOMEM;
-
- /* we enter this fn pointing at the '=' */
- while (*s1 && *s1 != ' ' && *s1 != ',') {
- s1++;
- rc = server_name2index(s1, &index, &s2);
- if (rc < 0) {
- CERROR("Can't parse server name '%s': rc = %d\n",
- s1, rc);
- break;
- }
- if (rc == LDD_F_SV_TYPE_OST)
- exclude_list[lmd->lmd_exclude_count++] = index;
- else
- CDEBUG(D_MOUNT, "ignoring exclude %.*s: type = %#x\n",
- (uint)(s2 - s1), s1, rc);
- s1 = s2;
- /* now we are pointing at ':' (next exclude)
- * or ',' (end of excludes)
- */
- if (lmd->lmd_exclude_count >= devmax)
- break;
- }
- if (rc >= 0) /* non-err */
- rc = 0;
-
- if (lmd->lmd_exclude_count) {
- /* permanent, freed in lustre_free_lsi */
- lmd->lmd_exclude = kcalloc(lmd->lmd_exclude_count,
- sizeof(index), GFP_NOFS);
- if (lmd->lmd_exclude) {
- memcpy(lmd->lmd_exclude, exclude_list,
- sizeof(index) * lmd->lmd_exclude_count);
- } else {
- rc = -ENOMEM;
- lmd->lmd_exclude_count = 0;
- }
- }
- kfree(exclude_list);
- return rc;
-}
-
-static int lmd_parse_mgssec(struct lustre_mount_data *lmd, char *ptr)
-{
- char *tail;
- int length;
-
- kfree(lmd->lmd_mgssec);
- lmd->lmd_mgssec = NULL;
-
- tail = strchr(ptr, ',');
- if (!tail)
- length = strlen(ptr);
- else
- length = tail - ptr;
-
- lmd->lmd_mgssec = kzalloc(length + 1, GFP_NOFS);
- if (!lmd->lmd_mgssec)
- return -ENOMEM;
-
- memcpy(lmd->lmd_mgssec, ptr, length);
- lmd->lmd_mgssec[length] = '\0';
- return 0;
-}
-
-static int lmd_parse_string(char **handle, char *ptr)
-{
- char *tail;
- int length;
-
- if (!handle || !ptr)
- return -EINVAL;
-
- kfree(*handle);
- *handle = NULL;
-
- tail = strchr(ptr, ',');
- if (!tail)
- length = strlen(ptr);
- else
- length = tail - ptr;
-
- *handle = kzalloc(length + 1, GFP_NOFS);
- if (!*handle)
- return -ENOMEM;
-
- memcpy(*handle, ptr, length);
- (*handle)[length] = '\0';
-
- return 0;
-}
-
-/* Collect multiple values for mgsnid specifiers */
-static int lmd_parse_mgs(struct lustre_mount_data *lmd, char **ptr)
-{
- lnet_nid_t nid;
- char *tail = *ptr;
- char *mgsnid;
- int length;
- int oldlen = 0;
-
- /* Find end of nidlist */
- while (class_parse_nid_quiet(tail, &nid, &tail) == 0)
- ;
- length = tail - *ptr;
- if (length == 0) {
- LCONSOLE_ERROR_MSG(0x159, "Can't parse NID '%s'\n", *ptr);
- return -EINVAL;
- }
-
- if (lmd->lmd_mgs)
- oldlen = strlen(lmd->lmd_mgs) + 1;
-
- mgsnid = kzalloc(oldlen + length + 1, GFP_NOFS);
- if (!mgsnid)
- return -ENOMEM;
-
- if (lmd->lmd_mgs) {
- /* Multiple mgsnid= are taken to mean failover locations */
- memcpy(mgsnid, lmd->lmd_mgs, oldlen);
- mgsnid[oldlen - 1] = ':';
- kfree(lmd->lmd_mgs);
- }
- memcpy(mgsnid + oldlen, *ptr, length);
- mgsnid[oldlen + length] = '\0';
- lmd->lmd_mgs = mgsnid;
- *ptr = tail;
-
- return 0;
-}
-
-/** Parse mount line options
- * e.g. mount -v -t lustre -o abort_recov uml1:uml2:/lustre-client /mnt/lustre
- * dev is passed as device=uml1:/lustre by mount.lustre
- */
-static int lmd_parse(char *options, struct lustre_mount_data *lmd)
-{
- char *s1, *s2, *devname = NULL;
- struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
- int rc = 0;
-
- LASSERT(lmd);
- if (!options) {
- LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that /sbin/mount.lustre is installed.\n");
- return -EINVAL;
- }
-
- /* Options should be a string - try to detect old lmd data */
- if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
- LCONSOLE_ERROR_MSG(0x163, "You're using an old version of /sbin/mount.lustre. Please install version %s\n",
- LUSTRE_VERSION_STRING);
- return -EINVAL;
- }
- lmd->lmd_magic = LMD_MAGIC;
-
- lmd->lmd_params = kzalloc(LMD_PARAMS_MAXLEN, GFP_NOFS);
- if (!lmd->lmd_params)
- return -ENOMEM;
- lmd->lmd_params[0] = '\0';
-
- /* Set default flags here */
-
- s1 = options;
- while (*s1) {
- int clear = 0;
- int time_min = OBD_RECOVERY_TIME_MIN;
- char *s3;
-
- /* Skip whitespace and extra commas */
- while (*s1 == ' ' || *s1 == ',')
- s1++;
- s3 = s1;
-
- /* Client options are parsed in ll_options: eg. flock,
- * user_xattr, acl
- */
-
- /* Parse non-ldiskfs options here. Rather than modifying
- * ldiskfs, we just zero these out here
- */
- if (strncmp(s1, "abort_recov", 11) == 0) {
- lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
- clear++;
- } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
- lmd->lmd_recovery_time_soft = max_t(int,
- simple_strtoul(s1 + 19, NULL, 10), time_min);
- clear++;
- } else if (strncmp(s1, "recovery_time_hard=", 19) == 0) {
- lmd->lmd_recovery_time_hard = max_t(int,
- simple_strtoul(s1 + 19, NULL, 10), time_min);
- clear++;
- } else if (strncmp(s1, "noir", 4) == 0) {
- lmd->lmd_flags |= LMD_FLG_NOIR; /* test purpose only. */
- clear++;
- } else if (strncmp(s1, "nosvc", 5) == 0) {
- lmd->lmd_flags |= LMD_FLG_NOSVC;
- clear++;
- } else if (strncmp(s1, "nomgs", 5) == 0) {
- lmd->lmd_flags |= LMD_FLG_NOMGS;
- clear++;
- } else if (strncmp(s1, "noscrub", 7) == 0) {
- lmd->lmd_flags |= LMD_FLG_NOSCRUB;
- clear++;
- } else if (strncmp(s1, PARAM_MGSNODE,
- sizeof(PARAM_MGSNODE) - 1) == 0) {
- s2 = s1 + sizeof(PARAM_MGSNODE) - 1;
- /* Assume the next mount opt is the first
- * invalid nid we get to.
- */
- rc = lmd_parse_mgs(lmd, &s2);
- if (rc)
- goto invalid;
- clear++;
- } else if (strncmp(s1, "writeconf", 9) == 0) {
- lmd->lmd_flags |= LMD_FLG_WRITECONF;
- clear++;
- } else if (strncmp(s1, "update", 6) == 0) {
- lmd->lmd_flags |= LMD_FLG_UPDATE;
- clear++;
- } else if (strncmp(s1, "virgin", 6) == 0) {
- lmd->lmd_flags |= LMD_FLG_VIRGIN;
- clear++;
- } else if (strncmp(s1, "noprimnode", 10) == 0) {
- lmd->lmd_flags |= LMD_FLG_NO_PRIMNODE;
- clear++;
- } else if (strncmp(s1, "mgssec=", 7) == 0) {
- rc = lmd_parse_mgssec(lmd, s1 + 7);
- if (rc)
- goto invalid;
- s3 = s2;
- clear++;
- /* ost exclusion list */
- } else if (strncmp(s1, "exclude=", 8) == 0) {
- rc = lmd_make_exclusion(lmd, s1 + 7);
- if (rc)
- goto invalid;
- clear++;
- } else if (strncmp(s1, "mgs", 3) == 0) {
- /* We are an MGS */
- lmd->lmd_flags |= LMD_FLG_MGS;
- clear++;
- } else if (strncmp(s1, "svname=", 7) == 0) {
- rc = lmd_parse_string(&lmd->lmd_profile, s1 + 7);
- if (rc)
- goto invalid;
- clear++;
- } else if (strncmp(s1, "param=", 6) == 0) {
- size_t length, params_length;
- char *tail = strchr(s1 + 6, ',');
-
- if (!tail) {
- length = strlen(s1);
- } else {
- lnet_nid_t nid;
- char *param_str = tail + 1;
- int supplementary = 1;
-
- while (!class_parse_nid_quiet(param_str, &nid,
- &param_str)) {
- supplementary = 0;
- }
- length = param_str - s1 - supplementary;
- }
- length -= 6;
- params_length = strlen(lmd->lmd_params);
- if (params_length + length + 1 >= LMD_PARAMS_MAXLEN)
- return -E2BIG;
- strncat(lmd->lmd_params, s1 + 6, length);
- lmd->lmd_params[params_length + length] = '\0';
- strlcat(lmd->lmd_params, " ", LMD_PARAMS_MAXLEN);
- s3 = s1 + 6 + length;
- clear++;
- } else if (strncmp(s1, "osd=", 4) == 0) {
- rc = lmd_parse_string(&lmd->lmd_osd_type, s1 + 4);
- if (rc)
- goto invalid;
- clear++;
- }
- /* Linux 2.4 doesn't pass the device, so we stuck it at the
- * end of the options.
- */
- else if (strncmp(s1, "device=", 7) == 0) {
- devname = s1 + 7;
- /* terminate options right before device. device
- * must be the last one.
- */
- *s1 = '\0';
- break;
- }
-
- /* Find next opt */
- s2 = strchr(s1, ',');
- if (!s2) {
- if (clear)
- *s1 = '\0';
- break;
- }
- s2++;
- if (clear)
- memmove(s1, s2, strlen(s2) + 1);
- else
- s1 = s2;
- }
-
- if (!devname) {
- LCONSOLE_ERROR_MSG(0x164, "Can't find the device name (need mount option 'device=...')\n");
- goto invalid;
- }
-
- s1 = strstr(devname, ":/");
- if (s1) {
- ++s1;
- lmd->lmd_flags |= LMD_FLG_CLIENT;
- /* Remove leading /s from fsname */
- while (*++s1 == '/')
- ;
- /* Freed in lustre_free_lsi */
- lmd->lmd_profile = kasprintf(GFP_NOFS, "%s-client", s1);
- if (!lmd->lmd_profile)
- return -ENOMEM;
- }
-
- /* Freed in lustre_free_lsi */
- lmd->lmd_dev = kzalloc(strlen(devname) + 1, GFP_NOFS);
- if (!lmd->lmd_dev)
- return -ENOMEM;
- strcpy(lmd->lmd_dev, devname);
-
- /* Save mount options */
- s1 = options + strlen(options) - 1;
- while (s1 >= options && (*s1 == ',' || *s1 == ' '))
- *s1-- = 0;
- if (*options != 0) {
- /* Freed in lustre_free_lsi */
- lmd->lmd_opts = kzalloc(strlen(options) + 1, GFP_NOFS);
- if (!lmd->lmd_opts)
- return -ENOMEM;
- strcpy(lmd->lmd_opts, options);
- }
-
- lmd_print(lmd);
- lmd->lmd_magic = LMD_MAGIC;
-
- return rc;
-
-invalid:
- CERROR("Bad mount options %s\n", options);
- return -EINVAL;
-}
-
-/** This is the entry point for the mount call into Lustre.
- * This is called when a server or client is mounted,
- * and this is where we start setting things up.
- * @param data Mount options (e.g. -o flock,abort_recov)
- */
-static int lustre_fill_super(struct super_block *sb, void *lmd2_data, int silent)
-{
- struct lustre_mount_data *lmd;
- struct lustre_sb_info *lsi;
- int rc;
-
- CDEBUG(D_MOUNT | D_VFSTRACE, "VFS Op: sb %p\n", sb);
-
- lsi = lustre_init_lsi(sb);
- if (!lsi)
- return -ENOMEM;
- lmd = lsi->lsi_lmd;
-
- /*
- * Disable lockdep during mount, because mount locking patterns are
- * `special'.
- */
- lockdep_off();
-
- /*
- * LU-639: the obd cleanup of last mount may not finish yet, wait here.
- */
- obd_zombie_barrier();
-
- /* Figure out the lmd from the mount options */
- if (lmd_parse(lmd2_data, lmd)) {
- lustre_put_lsi(sb);
- rc = -EINVAL;
- goto out;
- }
-
- if (lmd_is_client(lmd)) {
- bool have_client = false;
- CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile);
- if (!client_fill_super)
- request_module("lustre");
- spin_lock(&client_lock);
- if (client_fill_super && try_module_get(client_mod))
- have_client = true;
- spin_unlock(&client_lock);
- if (!have_client) {
- LCONSOLE_ERROR_MSG(0x165, "Nothing registered for client mount! Is the 'lustre' module loaded?\n");
- lustre_put_lsi(sb);
- rc = -ENODEV;
- } else {
- rc = lustre_start_mgc(sb);
- if (rc) {
- lustre_common_put_super(sb);
- goto out;
- }
- /* Connect and start */
- /* (should always be ll_fill_super) */
- rc = (*client_fill_super)(sb);
- /* c_f_s will call lustre_common_put_super on failure, otherwise
- * c_f_s will have taken another reference to the module */
- module_put(client_mod);
- }
- } else {
- CERROR("This is client-side-only module, cannot handle server mount.\n");
- rc = -EINVAL;
- }
-
- /* If error happens in fill_super() call, @lsi will be killed there.
- * This is why we do not put it here.
- */
- goto out;
-out:
- if (rc) {
- CERROR("Unable to mount %s (%d)\n",
- s2lsi(sb) ? lmd->lmd_dev : "", rc);
- } else {
- CDEBUG(D_SUPER, "Mount %s complete\n",
- lmd->lmd_dev);
- }
- lockdep_on();
- return rc;
-}
-
-/* We can't call ll_fill_super by name because it lives in a module that
- * must be loaded after this one.
- */
-void lustre_register_super_ops(struct module *mod,
- int (*cfs)(struct super_block *sb),
- void (*ksc)(struct super_block *sb))
-{
- spin_lock(&client_lock);
- client_mod = mod;
- client_fill_super = cfs;
- kill_super_cb = ksc;
- spin_unlock(&client_lock);
-}
-EXPORT_SYMBOL(lustre_register_super_ops);
-
-/***************** FS registration ******************/
-static struct dentry *lustre_mount(struct file_system_type *fs_type, int flags,
- const char *devname, void *data)
-{
- return mount_nodev(fs_type, flags, data, lustre_fill_super);
-}
-
-static void lustre_kill_super(struct super_block *sb)
-{
- struct lustre_sb_info *lsi = s2lsi(sb);
-
- if (kill_super_cb && lsi)
- (*kill_super_cb)(sb);
-
- kill_anon_super(sb);
-}
-
-/** Register the "lustre" fs type
- */
-static struct file_system_type lustre_fs_type = {
- .owner = THIS_MODULE,
- .name = "lustre",
- .mount = lustre_mount,
- .kill_sb = lustre_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE,
-};
-MODULE_ALIAS_FS("lustre");
-
-int lustre_register_fs(void)
-{
- return register_filesystem(&lustre_fs_type);
-}
-
-int lustre_unregister_fs(void)
-{
- return unregister_filesystem(&lustre_fs_type);
-}
diff --git a/drivers/staging/lustre/lustre/obdclass/obdo.c b/drivers/staging/lustre/lustre/obdclass/obdo.c
deleted file mode 100644
index c4503bc36591..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/obdo.c
+++ /dev/null
@@ -1,181 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/obdclass/obdo.c
- *
- * Object Devices Class Driver
- * These are the only exported functions, they provide some generic
- * infrastructure for managing object devices
- */
-
-#define DEBUG_SUBSYSTEM S_CLASS
-
-#include <obd_class.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-#include <lustre_obdo.h>
-
-void obdo_set_parent_fid(struct obdo *dst, const struct lu_fid *parent)
-{
- dst->o_parent_oid = fid_oid(parent);
- dst->o_parent_seq = fid_seq(parent);
- dst->o_parent_ver = fid_ver(parent);
- dst->o_valid |= OBD_MD_FLGENER | OBD_MD_FLFID;
-}
-EXPORT_SYMBOL(obdo_set_parent_fid);
-
-/* WARNING: the file systems must take care not to tinker with
- * attributes they don't manage (such as blocks).
- */
-void obdo_from_inode(struct obdo *dst, struct inode *src, u32 valid)
-{
- u32 newvalid = 0;
-
- if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
- CDEBUG(D_INODE, "valid %x, new time %lu/%lu\n",
- valid, LTIME_S(src->i_mtime),
- LTIME_S(src->i_ctime));
-
- if (valid & OBD_MD_FLATIME) {
- dst->o_atime = LTIME_S(src->i_atime);
- newvalid |= OBD_MD_FLATIME;
- }
- if (valid & OBD_MD_FLMTIME) {
- dst->o_mtime = LTIME_S(src->i_mtime);
- newvalid |= OBD_MD_FLMTIME;
- }
- if (valid & OBD_MD_FLCTIME) {
- dst->o_ctime = LTIME_S(src->i_ctime);
- newvalid |= OBD_MD_FLCTIME;
- }
- if (valid & OBD_MD_FLSIZE) {
- dst->o_size = i_size_read(src);
- newvalid |= OBD_MD_FLSIZE;
- }
- if (valid & OBD_MD_FLBLOCKS) { /* allocation of space (x512 bytes) */
- dst->o_blocks = src->i_blocks;
- newvalid |= OBD_MD_FLBLOCKS;
- }
- if (valid & OBD_MD_FLBLKSZ) { /* optimal block size */
- dst->o_blksize = 1 << src->i_blkbits;
- newvalid |= OBD_MD_FLBLKSZ;
- }
- if (valid & OBD_MD_FLTYPE) {
- dst->o_mode = (dst->o_mode & S_IALLUGO) |
- (src->i_mode & S_IFMT);
- newvalid |= OBD_MD_FLTYPE;
- }
- if (valid & OBD_MD_FLMODE) {
- dst->o_mode = (dst->o_mode & S_IFMT) |
- (src->i_mode & S_IALLUGO);
- newvalid |= OBD_MD_FLMODE;
- }
- if (valid & OBD_MD_FLUID) {
- dst->o_uid = from_kuid(&init_user_ns, src->i_uid);
- newvalid |= OBD_MD_FLUID;
- }
- if (valid & OBD_MD_FLGID) {
- dst->o_gid = from_kgid(&init_user_ns, src->i_gid);
- newvalid |= OBD_MD_FLGID;
- }
- if (valid & OBD_MD_FLFLAGS) {
- dst->o_flags = src->i_flags;
- newvalid |= OBD_MD_FLFLAGS;
- }
- dst->o_valid |= newvalid;
-}
-EXPORT_SYMBOL(obdo_from_inode);
-
-void obdo_to_ioobj(const struct obdo *oa, struct obd_ioobj *ioobj)
-{
- ioobj->ioo_oid = oa->o_oi;
- if (unlikely(!(oa->o_valid & OBD_MD_FLGROUP)))
- ostid_set_seq_mdt0(&ioobj->ioo_oid);
-
- /* Since 2.4 this does not contain o_mode in the low 16 bits.
- * Instead, it holds (bd_md_max_brw - 1) for multi-bulk BRW RPCs
- */
- ioobj->ioo_max_brw = 0;
-}
-EXPORT_SYMBOL(obdo_to_ioobj);
-
-/**
- * Create an obdo to send over the wire
- */
-void lustre_set_wire_obdo(const struct obd_connect_data *ocd,
- struct obdo *wobdo, const struct obdo *lobdo)
-{
- *wobdo = *lobdo;
- wobdo->o_flags &= ~OBD_FL_LOCAL_MASK;
- if (!ocd)
- return;
-
- if (unlikely(!(ocd->ocd_connect_flags & OBD_CONNECT_FID)) &&
- fid_seq_is_echo(ostid_seq(&lobdo->o_oi))) {
- /*
- * Currently OBD_FL_OSTID will only be used when 2.4 echo
- * client communicate with pre-2.4 server
- */
- wobdo->o_oi.oi.oi_id = fid_oid(&lobdo->o_oi.oi_fid);
- wobdo->o_oi.oi.oi_seq = fid_seq(&lobdo->o_oi.oi_fid);
- }
-}
-EXPORT_SYMBOL(lustre_set_wire_obdo);
-
-/**
- * Create a local obdo from a wire based odbo
- */
-void lustre_get_wire_obdo(const struct obd_connect_data *ocd,
- struct obdo *lobdo, const struct obdo *wobdo)
-{
- u32 local_flags = 0;
-
- if (lobdo->o_valid & OBD_MD_FLFLAGS)
- local_flags = lobdo->o_flags & OBD_FL_LOCAL_MASK;
-
- *lobdo = *wobdo;
- if (local_flags) {
- lobdo->o_valid |= OBD_MD_FLFLAGS;
- lobdo->o_flags &= ~OBD_FL_LOCAL_MASK;
- lobdo->o_flags |= local_flags;
- }
- if (!ocd)
- return;
-
- if (unlikely(!(ocd->ocd_connect_flags & OBD_CONNECT_FID)) &&
- fid_seq_is_echo(wobdo->o_oi.oi.oi_seq)) {
- /* see above */
- lobdo->o_oi.oi_fid.f_seq = wobdo->o_oi.oi.oi_seq;
- lobdo->o_oi.oi_fid.f_oid = wobdo->o_oi.oi.oi_id;
- lobdo->o_oi.oi_fid.f_ver = 0;
- }
-}
-EXPORT_SYMBOL(lustre_get_wire_obdo);
diff --git a/drivers/staging/lustre/lustre/obdclass/statfs_pack.c b/drivers/staging/lustre/lustre/obdclass/statfs_pack.c
deleted file mode 100644
index 355e888885f4..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/statfs_pack.c
+++ /dev/null
@@ -1,58 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/obdclass/statfs_pack.c
- *
- * (Un)packing of OST/MDS requests
- *
- * Author: Andreas Dilger <adilger@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_CLASS
-
-#include <linux/statfs.h>
-#include <lustre_export.h>
-#include <lustre_net.h>
-#include <obd_support.h>
-#include <obd_class.h>
-
-void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs)
-{
- memset(sfs, 0, sizeof(*sfs));
- sfs->f_type = osfs->os_type;
- sfs->f_blocks = osfs->os_blocks;
- sfs->f_bfree = osfs->os_bfree;
- sfs->f_bavail = osfs->os_bavail;
- sfs->f_files = osfs->os_files;
- sfs->f_ffree = osfs->os_ffree;
- sfs->f_bsize = osfs->os_bsize;
- sfs->f_namelen = osfs->os_namelen;
-}
-EXPORT_SYMBOL(statfs_unpack);
diff --git a/drivers/staging/lustre/lustre/obdclass/uuid.c b/drivers/staging/lustre/lustre/obdclass/uuid.c
deleted file mode 100644
index ec8c6dc5c9a7..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/uuid.c
+++ /dev/null
@@ -1,45 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/obdclass/uuid.c
- *
- * Public include file for the UUID library
- */
-
-#define DEBUG_SUBSYSTEM S_CLASS
-
-#include <obd_support.h>
-#include <obd_class.h>
-
-void class_uuid_unparse(class_uuid_t uu, struct obd_uuid *out)
-{
- sprintf(out->uuid, "%pU", uu);
-}
-EXPORT_SYMBOL(class_uuid_unparse);
diff --git a/drivers/staging/lustre/lustre/obdecho/Makefile b/drivers/staging/lustre/lustre/obdecho/Makefile
deleted file mode 100644
index 6be66fbab872..000000000000
--- a/drivers/staging/lustre/lustre/obdecho/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LUSTRE_FS) += obdecho.o
-obdecho-y := echo_client.o
diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c
deleted file mode 100644
index b692e76e7108..000000000000
--- a/drivers/staging/lustre/lustre/obdecho/echo_client.c
+++ /dev/null
@@ -1,1729 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_ECHO
-
-#include <obd.h>
-#include <obd_support.h>
-#include <obd_class.h>
-#include <lustre_debug.h>
-#include <lprocfs_status.h>
-#include <cl_object.h>
-#include <lustre_fid.h>
-#include <lustre_acl.h>
-#include <uapi/linux/lustre/lustre_ioctl.h>
-#include <lustre_net.h>
-
-#include "echo_internal.h"
-
-/** \defgroup echo_client Echo Client
- * @{
- */
-
-struct echo_device {
- struct cl_device ed_cl;
- struct echo_client_obd *ed_ec;
-
- struct cl_site ed_site_myself;
- struct lu_site *ed_site;
- struct lu_device *ed_next;
-};
-
-struct echo_object {
- struct cl_object eo_cl;
- struct cl_object_header eo_hdr;
-
- struct echo_device *eo_dev;
- struct list_head eo_obj_chain;
- struct lov_oinfo *eo_oinfo;
- atomic_t eo_npages;
- int eo_deleted;
-};
-
-struct echo_object_conf {
- struct cl_object_conf eoc_cl;
- struct lov_oinfo **eoc_oinfo;
-};
-
-struct echo_page {
- struct cl_page_slice ep_cl;
- struct mutex ep_lock;
-};
-
-struct echo_lock {
- struct cl_lock_slice el_cl;
- struct list_head el_chain;
- struct echo_object *el_object;
- __u64 el_cookie;
- atomic_t el_refcount;
-};
-
-static int echo_client_setup(const struct lu_env *env,
- struct obd_device *obddev,
- struct lustre_cfg *lcfg);
-static int echo_client_cleanup(struct obd_device *obddev);
-
-/** \defgroup echo_helpers Helper functions
- * @{
- */
-static inline struct echo_device *cl2echo_dev(const struct cl_device *dev)
-{
- return container_of_safe(dev, struct echo_device, ed_cl);
-}
-
-static inline struct cl_device *echo_dev2cl(struct echo_device *d)
-{
- return &d->ed_cl;
-}
-
-static inline struct echo_device *obd2echo_dev(const struct obd_device *obd)
-{
- return cl2echo_dev(lu2cl_dev(obd->obd_lu_dev));
-}
-
-static inline struct cl_object *echo_obj2cl(struct echo_object *eco)
-{
- return &eco->eo_cl;
-}
-
-static inline struct echo_object *cl2echo_obj(const struct cl_object *o)
-{
- return container_of(o, struct echo_object, eo_cl);
-}
-
-static inline struct echo_page *cl2echo_page(const struct cl_page_slice *s)
-{
- return container_of(s, struct echo_page, ep_cl);
-}
-
-static inline struct echo_lock *cl2echo_lock(const struct cl_lock_slice *s)
-{
- return container_of(s, struct echo_lock, el_cl);
-}
-
-static inline struct cl_lock *echo_lock2cl(const struct echo_lock *ecl)
-{
- return ecl->el_cl.cls_lock;
-}
-
-static struct lu_context_key echo_thread_key;
-static inline struct echo_thread_info *echo_env_info(const struct lu_env *env)
-{
- struct echo_thread_info *info;
-
- info = lu_context_key_get(&env->le_ctx, &echo_thread_key);
- LASSERT(info);
- return info;
-}
-
-static inline
-struct echo_object_conf *cl2echo_conf(const struct cl_object_conf *c)
-{
- return container_of(c, struct echo_object_conf, eoc_cl);
-}
-
-/** @} echo_helpers */
-static int cl_echo_object_put(struct echo_object *eco);
-static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset,
- struct page **pages, int npages, int async);
-
-struct echo_thread_info {
- struct echo_object_conf eti_conf;
- struct lustre_md eti_md;
-
- struct cl_2queue eti_queue;
- struct cl_io eti_io;
- struct cl_lock eti_lock;
- struct lu_fid eti_fid;
- struct lu_fid eti_fid2;
-};
-
-/* No session used right now */
-struct echo_session_info {
- unsigned long dummy;
-};
-
-static struct kmem_cache *echo_lock_kmem;
-static struct kmem_cache *echo_object_kmem;
-static struct kmem_cache *echo_thread_kmem;
-static struct kmem_cache *echo_session_kmem;
-
-static struct lu_kmem_descr echo_caches[] = {
- {
- .ckd_cache = &echo_lock_kmem,
- .ckd_name = "echo_lock_kmem",
- .ckd_size = sizeof(struct echo_lock)
- },
- {
- .ckd_cache = &echo_object_kmem,
- .ckd_name = "echo_object_kmem",
- .ckd_size = sizeof(struct echo_object)
- },
- {
- .ckd_cache = &echo_thread_kmem,
- .ckd_name = "echo_thread_kmem",
- .ckd_size = sizeof(struct echo_thread_info)
- },
- {
- .ckd_cache = &echo_session_kmem,
- .ckd_name = "echo_session_kmem",
- .ckd_size = sizeof(struct echo_session_info)
- },
- {
- .ckd_cache = NULL
- }
-};
-
-/** \defgroup echo_page Page operations
- *
- * Echo page operations.
- *
- * @{
- */
-static int echo_page_own(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *io, int nonblock)
-{
- struct echo_page *ep = cl2echo_page(slice);
-
- if (!nonblock)
- mutex_lock(&ep->ep_lock);
- else if (!mutex_trylock(&ep->ep_lock))
- return -EAGAIN;
- return 0;
-}
-
-static void echo_page_disown(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *io)
-{
- struct echo_page *ep = cl2echo_page(slice);
-
- LASSERT(mutex_is_locked(&ep->ep_lock));
- mutex_unlock(&ep->ep_lock);
-}
-
-static void echo_page_discard(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- cl_page_delete(env, slice->cpl_page);
-}
-
-static int echo_page_is_vmlocked(const struct lu_env *env,
- const struct cl_page_slice *slice)
-{
- if (mutex_is_locked(&cl2echo_page(slice)->ep_lock))
- return -EBUSY;
- return -ENODATA;
-}
-
-static void echo_page_completion(const struct lu_env *env,
- const struct cl_page_slice *slice,
- int ioret)
-{
- LASSERT(slice->cpl_page->cp_sync_io);
-}
-
-static void echo_page_fini(const struct lu_env *env,
- struct cl_page_slice *slice)
-{
- struct echo_object *eco = cl2echo_obj(slice->cpl_obj);
-
- atomic_dec(&eco->eo_npages);
- put_page(slice->cpl_page->cp_vmpage);
-}
-
-static int echo_page_prep(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- return 0;
-}
-
-static int echo_page_print(const struct lu_env *env,
- const struct cl_page_slice *slice,
- void *cookie, lu_printer_t printer)
-{
- struct echo_page *ep = cl2echo_page(slice);
-
- (*printer)(env, cookie, LUSTRE_ECHO_CLIENT_NAME "-page@%p %d vm@%p\n",
- ep, mutex_is_locked(&ep->ep_lock),
- slice->cpl_page->cp_vmpage);
- return 0;
-}
-
-static const struct cl_page_operations echo_page_ops = {
- .cpo_own = echo_page_own,
- .cpo_disown = echo_page_disown,
- .cpo_discard = echo_page_discard,
- .cpo_fini = echo_page_fini,
- .cpo_print = echo_page_print,
- .cpo_is_vmlocked = echo_page_is_vmlocked,
- .io = {
- [CRT_READ] = {
- .cpo_prep = echo_page_prep,
- .cpo_completion = echo_page_completion,
- },
- [CRT_WRITE] = {
- .cpo_prep = echo_page_prep,
- .cpo_completion = echo_page_completion,
- }
- }
-};
-
-/** @} echo_page */
-
-/** \defgroup echo_lock Locking
- *
- * echo lock operations
- *
- * @{
- */
-static void echo_lock_fini(const struct lu_env *env,
- struct cl_lock_slice *slice)
-{
- struct echo_lock *ecl = cl2echo_lock(slice);
-
- LASSERT(list_empty(&ecl->el_chain));
- kmem_cache_free(echo_lock_kmem, ecl);
-}
-
-static const struct cl_lock_operations echo_lock_ops = {
- .clo_fini = echo_lock_fini,
-};
-
-/** @} echo_lock */
-
-/** \defgroup echo_cl_ops cl_object operations
- *
- * operations for cl_object
- *
- * @{
- */
-static int echo_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, pgoff_t index)
-{
- struct echo_page *ep = cl_object_page_slice(obj, page);
- struct echo_object *eco = cl2echo_obj(obj);
-
- get_page(page->cp_vmpage);
- mutex_init(&ep->ep_lock);
- cl_page_slice_add(page, &ep->ep_cl, obj, index, &echo_page_ops);
- atomic_inc(&eco->eo_npages);
- return 0;
-}
-
-static int echo_io_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io)
-{
- return 0;
-}
-
-static int echo_lock_init(const struct lu_env *env,
- struct cl_object *obj, struct cl_lock *lock,
- const struct cl_io *unused)
-{
- struct echo_lock *el;
-
- el = kmem_cache_zalloc(echo_lock_kmem, GFP_NOFS);
- if (el) {
- cl_lock_slice_add(lock, &el->el_cl, obj, &echo_lock_ops);
- el->el_object = cl2echo_obj(obj);
- INIT_LIST_HEAD(&el->el_chain);
- atomic_set(&el->el_refcount, 0);
- }
- return !el ? -ENOMEM : 0;
-}
-
-static int echo_conf_set(const struct lu_env *env, struct cl_object *obj,
- const struct cl_object_conf *conf)
-{
- return 0;
-}
-
-static const struct cl_object_operations echo_cl_obj_ops = {
- .coo_page_init = echo_page_init,
- .coo_lock_init = echo_lock_init,
- .coo_io_init = echo_io_init,
- .coo_conf_set = echo_conf_set
-};
-
-/** @} echo_cl_ops */
-
-/** \defgroup echo_lu_ops lu_object operations
- *
- * operations for echo lu object.
- *
- * @{
- */
-static int echo_object_init(const struct lu_env *env, struct lu_object *obj,
- const struct lu_object_conf *conf)
-{
- struct echo_device *ed = cl2echo_dev(lu2cl_dev(obj->lo_dev));
- struct echo_client_obd *ec = ed->ed_ec;
- struct echo_object *eco = cl2echo_obj(lu2cl(obj));
- const struct cl_object_conf *cconf;
- struct echo_object_conf *econf;
-
- if (ed->ed_next) {
- struct lu_object *below;
- struct lu_device *under;
-
- under = ed->ed_next;
- below = under->ld_ops->ldo_object_alloc(env, obj->lo_header,
- under);
- if (!below)
- return -ENOMEM;
- lu_object_add(obj, below);
- }
-
- cconf = lu2cl_conf(conf);
- econf = cl2echo_conf(cconf);
-
- LASSERT(econf->eoc_oinfo);
- /*
- * Transfer the oinfo pointer to eco that it won't be
- * freed.
- */
- eco->eo_oinfo = *econf->eoc_oinfo;
- *econf->eoc_oinfo = NULL;
-
- eco->eo_dev = ed;
- atomic_set(&eco->eo_npages, 0);
- cl_object_page_init(lu2cl(obj), sizeof(struct echo_page));
-
- spin_lock(&ec->ec_lock);
- list_add_tail(&eco->eo_obj_chain, &ec->ec_objects);
- spin_unlock(&ec->ec_lock);
-
- return 0;
-}
-
-static void echo_object_free(const struct lu_env *env, struct lu_object *obj)
-{
- struct echo_object *eco = cl2echo_obj(lu2cl(obj));
- struct echo_client_obd *ec = eco->eo_dev->ed_ec;
-
- LASSERT(atomic_read(&eco->eo_npages) == 0);
-
- spin_lock(&ec->ec_lock);
- list_del_init(&eco->eo_obj_chain);
- spin_unlock(&ec->ec_lock);
-
- lu_object_fini(obj);
- lu_object_header_fini(obj->lo_header);
-
- kfree(eco->eo_oinfo);
- kmem_cache_free(echo_object_kmem, eco);
-}
-
-static int echo_object_print(const struct lu_env *env, void *cookie,
- lu_printer_t p, const struct lu_object *o)
-{
- struct echo_object *obj = cl2echo_obj(lu2cl(o));
-
- return (*p)(env, cookie, "echoclient-object@%p", obj);
-}
-
-static const struct lu_object_operations echo_lu_obj_ops = {
- .loo_object_init = echo_object_init,
- .loo_object_delete = NULL,
- .loo_object_release = NULL,
- .loo_object_free = echo_object_free,
- .loo_object_print = echo_object_print,
- .loo_object_invariant = NULL
-};
-
-/** @} echo_lu_ops */
-
-/** \defgroup echo_lu_dev_ops lu_device operations
- *
- * Operations for echo lu device.
- *
- * @{
- */
-static struct lu_object *echo_object_alloc(const struct lu_env *env,
- const struct lu_object_header *hdr,
- struct lu_device *dev)
-{
- struct echo_object *eco;
- struct lu_object *obj = NULL;
-
- /* we're the top dev. */
- LASSERT(!hdr);
- eco = kmem_cache_zalloc(echo_object_kmem, GFP_NOFS);
- if (eco) {
- struct cl_object_header *hdr = &eco->eo_hdr;
-
- obj = &echo_obj2cl(eco)->co_lu;
- cl_object_header_init(hdr);
- hdr->coh_page_bufsize = cfs_size_round(sizeof(struct cl_page));
-
- lu_object_init(obj, &hdr->coh_lu, dev);
- lu_object_add_top(&hdr->coh_lu, obj);
-
- eco->eo_cl.co_ops = &echo_cl_obj_ops;
- obj->lo_ops = &echo_lu_obj_ops;
- }
- return obj;
-}
-
-static const struct lu_device_operations echo_device_lu_ops = {
- .ldo_object_alloc = echo_object_alloc,
-};
-
-/** @} echo_lu_dev_ops */
-
-/** \defgroup echo_init Setup and teardown
- *
- * Init and fini functions for echo client.
- *
- * @{
- */
-static int echo_site_init(const struct lu_env *env, struct echo_device *ed)
-{
- struct cl_site *site = &ed->ed_site_myself;
- int rc;
-
- /* initialize site */
- rc = cl_site_init(site, &ed->ed_cl);
- if (rc) {
- CERROR("Cannot initialize site for echo client(%d)\n", rc);
- return rc;
- }
-
- rc = lu_site_init_finish(&site->cs_lu);
- if (rc) {
- cl_site_fini(site);
- return rc;
- }
-
- ed->ed_site = &site->cs_lu;
- return 0;
-}
-
-static void echo_site_fini(const struct lu_env *env, struct echo_device *ed)
-{
- if (ed->ed_site) {
- lu_site_fini(ed->ed_site);
- ed->ed_site = NULL;
- }
-}
-
-static void *echo_thread_key_init(const struct lu_context *ctx,
- struct lu_context_key *key)
-{
- struct echo_thread_info *info;
-
- info = kmem_cache_zalloc(echo_thread_kmem, GFP_NOFS);
- if (!info)
- info = ERR_PTR(-ENOMEM);
- return info;
-}
-
-static void echo_thread_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
-{
- struct echo_thread_info *info = data;
-
- kmem_cache_free(echo_thread_kmem, info);
-}
-
-static struct lu_context_key echo_thread_key = {
- .lct_tags = LCT_CL_THREAD,
- .lct_init = echo_thread_key_init,
- .lct_fini = echo_thread_key_fini,
-};
-
-static void *echo_session_key_init(const struct lu_context *ctx,
- struct lu_context_key *key)
-{
- struct echo_session_info *session;
-
- session = kmem_cache_zalloc(echo_session_kmem, GFP_NOFS);
- if (!session)
- session = ERR_PTR(-ENOMEM);
- return session;
-}
-
-static void echo_session_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
-{
- struct echo_session_info *session = data;
-
- kmem_cache_free(echo_session_kmem, session);
-}
-
-static struct lu_context_key echo_session_key = {
- .lct_tags = LCT_SESSION,
- .lct_init = echo_session_key_init,
- .lct_fini = echo_session_key_fini,
-};
-
-LU_TYPE_INIT_FINI(echo, &echo_thread_key, &echo_session_key);
-
-static struct lu_device *echo_device_alloc(const struct lu_env *env,
- struct lu_device_type *t,
- struct lustre_cfg *cfg)
-{
- struct lu_device *next;
- struct echo_device *ed;
- struct cl_device *cd;
- struct obd_device *obd = NULL; /* to keep compiler happy */
- struct obd_device *tgt;
- const char *tgt_type_name;
- int rc, err;
-
- ed = kzalloc(sizeof(*ed), GFP_NOFS);
- if (!ed) {
- rc = -ENOMEM;
- goto out;
- }
-
- cd = &ed->ed_cl;
- rc = cl_device_init(cd, t);
- if (rc)
- goto out_free;
-
- cd->cd_lu_dev.ld_ops = &echo_device_lu_ops;
-
- obd = class_name2obd(lustre_cfg_string(cfg, 0));
- LASSERT(obd);
- LASSERT(env);
-
- tgt = class_name2obd(lustre_cfg_string(cfg, 1));
- if (!tgt) {
- CERROR("Can not find tgt device %s\n",
- lustre_cfg_string(cfg, 1));
- rc = -ENODEV;
- goto out_device_fini;
- }
-
- next = tgt->obd_lu_dev;
- if (!strcmp(tgt->obd_type->typ_name, LUSTRE_MDT_NAME)) {
- CERROR("echo MDT client must be run on server\n");
- rc = -EOPNOTSUPP;
- goto out_device_fini;
- }
-
- rc = echo_site_init(env, ed);
- if (rc)
- goto out_device_fini;
-
- rc = echo_client_setup(env, obd, cfg);
- if (rc)
- goto out_site_fini;
-
- ed->ed_ec = &obd->u.echo_client;
-
- /* if echo client is to be stacked upon ost device, the next is
- * NULL since ost is not a clio device so far
- */
- if (next && !lu_device_is_cl(next))
- next = NULL;
-
- tgt_type_name = tgt->obd_type->typ_name;
- if (next) {
- if (next->ld_site) {
- rc = -EBUSY;
- goto out_cleanup;
- }
-
- next->ld_site = ed->ed_site;
- rc = next->ld_type->ldt_ops->ldto_device_init(env, next,
- next->ld_type->ldt_name,
- NULL);
- if (rc)
- goto out_cleanup;
-
- } else {
- LASSERT(strcmp(tgt_type_name, LUSTRE_OST_NAME) == 0);
- }
-
- ed->ed_next = next;
- return &cd->cd_lu_dev;
-
-out_cleanup:
- err = echo_client_cleanup(obd);
- if (err)
- CERROR("Cleanup obd device %s error(%d)\n",
- obd->obd_name, err);
-out_site_fini:
- echo_site_fini(env, ed);
-out_device_fini:
- cl_device_fini(&ed->ed_cl);
-out_free:
- kfree(ed);
-out:
- return ERR_PTR(rc);
-}
-
-static int echo_device_init(const struct lu_env *env, struct lu_device *d,
- const char *name, struct lu_device *next)
-{
- LBUG();
- return 0;
-}
-
-static struct lu_device *echo_device_fini(const struct lu_env *env,
- struct lu_device *d)
-{
- struct echo_device *ed = cl2echo_dev(lu2cl_dev(d));
- struct lu_device *next = ed->ed_next;
-
- while (next)
- next = next->ld_type->ldt_ops->ldto_device_fini(env, next);
- return NULL;
-}
-
-static void echo_lock_release(const struct lu_env *env,
- struct echo_lock *ecl,
- int still_used)
-{
- struct cl_lock *clk = echo_lock2cl(ecl);
-
- cl_lock_release(env, clk);
-}
-
-static struct lu_device *echo_device_free(const struct lu_env *env,
- struct lu_device *d)
-{
- struct echo_device *ed = cl2echo_dev(lu2cl_dev(d));
- struct echo_client_obd *ec = ed->ed_ec;
- struct echo_object *eco;
- struct lu_device *next = ed->ed_next;
-
- CDEBUG(D_INFO, "echo device:%p is going to be freed, next = %p\n",
- ed, next);
-
- lu_site_purge(env, ed->ed_site, -1);
-
- /* check if there are objects still alive.
- * It shouldn't have any object because lu_site_purge would cleanup
- * all of cached objects. Anyway, probably the echo device is being
- * parallelly accessed.
- */
- spin_lock(&ec->ec_lock);
- list_for_each_entry(eco, &ec->ec_objects, eo_obj_chain)
- eco->eo_deleted = 1;
- spin_unlock(&ec->ec_lock);
-
- /* purge again */
- lu_site_purge(env, ed->ed_site, -1);
-
- CDEBUG(D_INFO,
- "Waiting for the reference of echo object to be dropped\n");
-
- /* Wait for the last reference to be dropped. */
- spin_lock(&ec->ec_lock);
- while (!list_empty(&ec->ec_objects)) {
- spin_unlock(&ec->ec_lock);
- CERROR("echo_client still has objects at cleanup time, wait for 1 second\n");
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
- lu_site_purge(env, ed->ed_site, -1);
- spin_lock(&ec->ec_lock);
- }
- spin_unlock(&ec->ec_lock);
-
- LASSERT(list_empty(&ec->ec_locks));
-
- CDEBUG(D_INFO, "No object exists, exiting...\n");
-
- echo_client_cleanup(d->ld_obd);
-
- while (next)
- next = next->ld_type->ldt_ops->ldto_device_free(env, next);
-
- LASSERT(ed->ed_site == d->ld_site);
- echo_site_fini(env, ed);
- cl_device_fini(&ed->ed_cl);
- kfree(ed);
-
- cl_env_cache_purge(~0);
-
- return NULL;
-}
-
-static const struct lu_device_type_operations echo_device_type_ops = {
- .ldto_init = echo_type_init,
- .ldto_fini = echo_type_fini,
-
- .ldto_start = echo_type_start,
- .ldto_stop = echo_type_stop,
-
- .ldto_device_alloc = echo_device_alloc,
- .ldto_device_free = echo_device_free,
- .ldto_device_init = echo_device_init,
- .ldto_device_fini = echo_device_fini
-};
-
-static struct lu_device_type echo_device_type = {
- .ldt_tags = LU_DEVICE_CL,
- .ldt_name = LUSTRE_ECHO_CLIENT_NAME,
- .ldt_ops = &echo_device_type_ops,
- .ldt_ctx_tags = LCT_CL_THREAD,
-};
-
-/** @} echo_init */
-
-/** \defgroup echo_exports Exported operations
- *
- * exporting functions to echo client
- *
- * @{
- */
-
-/* Interfaces to echo client obd device */
-static struct echo_object *
-cl_echo_object_find(struct echo_device *d, const struct ost_id *oi)
-{
- struct lu_env *env;
- struct echo_thread_info *info;
- struct echo_object_conf *conf;
- struct lov_oinfo *oinfo = NULL;
- struct echo_object *eco;
- struct cl_object *obj;
- struct lu_fid *fid;
- u16 refcheck;
- int rc;
-
- LASSERTF(ostid_id(oi), DOSTID "\n", POSTID(oi));
- LASSERTF(ostid_seq(oi) == FID_SEQ_ECHO, DOSTID "\n", POSTID(oi));
-
- /* Never return an object if the obd is to be freed. */
- if (echo_dev2cl(d)->cd_lu_dev.ld_obd->obd_stopping)
- return ERR_PTR(-ENODEV);
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return (void *)env;
-
- info = echo_env_info(env);
- conf = &info->eti_conf;
- if (d->ed_next) {
- oinfo = kzalloc(sizeof(*oinfo), GFP_NOFS);
- if (!oinfo) {
- eco = ERR_PTR(-ENOMEM);
- goto out;
- }
-
- oinfo->loi_oi = *oi;
- conf->eoc_cl.u.coc_oinfo = oinfo;
- }
-
- /*
- * If echo_object_init() is successful then ownership of oinfo
- * is transferred to the object.
- */
- conf->eoc_oinfo = &oinfo;
-
- fid = &info->eti_fid;
- rc = ostid_to_fid(fid, (struct ost_id *)oi, 0);
- if (rc != 0) {
- eco = ERR_PTR(rc);
- goto out;
- }
-
- /* In the function below, .hs_keycmp resolves to
- * lu_obj_hop_keycmp()
- */
- /* coverity[overrun-buffer-val] */
- obj = cl_object_find(env, echo_dev2cl(d), fid, &conf->eoc_cl);
- if (IS_ERR(obj)) {
- eco = (void *)obj;
- goto out;
- }
-
- eco = cl2echo_obj(obj);
- if (eco->eo_deleted) {
- cl_object_put(env, obj);
- eco = ERR_PTR(-EAGAIN);
- }
-
-out:
- kfree(oinfo);
- cl_env_put(env, &refcheck);
- return eco;
-}
-
-static int cl_echo_object_put(struct echo_object *eco)
-{
- struct lu_env *env;
- struct cl_object *obj = echo_obj2cl(eco);
- u16 refcheck;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- /* an external function to kill an object? */
- if (eco->eo_deleted) {
- struct lu_object_header *loh = obj->co_lu.lo_header;
-
- LASSERT(&eco->eo_hdr == luh2coh(loh));
- set_bit(LU_OBJECT_HEARD_BANSHEE, &loh->loh_flags);
- }
-
- cl_object_put(env, obj);
- cl_env_put(env, &refcheck);
- return 0;
-}
-
-static int cl_echo_enqueue0(struct lu_env *env, struct echo_object *eco,
- u64 start, u64 end, int mode,
- __u64 *cookie, __u32 enqflags)
-{
- struct cl_io *io;
- struct cl_lock *lck;
- struct cl_object *obj;
- struct cl_lock_descr *descr;
- struct echo_thread_info *info;
- int rc = -ENOMEM;
-
- info = echo_env_info(env);
- io = &info->eti_io;
- lck = &info->eti_lock;
- obj = echo_obj2cl(eco);
-
- memset(lck, 0, sizeof(*lck));
- descr = &lck->cll_descr;
- descr->cld_obj = obj;
- descr->cld_start = cl_index(obj, start);
- descr->cld_end = cl_index(obj, end);
- descr->cld_mode = mode == LCK_PW ? CLM_WRITE : CLM_READ;
- descr->cld_enq_flags = enqflags;
- io->ci_obj = obj;
-
- rc = cl_lock_request(env, io, lck);
- if (rc == 0) {
- struct echo_client_obd *ec = eco->eo_dev->ed_ec;
- struct echo_lock *el;
-
- el = cl2echo_lock(cl_lock_at(lck, &echo_device_type));
- spin_lock(&ec->ec_lock);
- if (list_empty(&el->el_chain)) {
- list_add(&el->el_chain, &ec->ec_locks);
- el->el_cookie = ++ec->ec_unique;
- }
- atomic_inc(&el->el_refcount);
- *cookie = el->el_cookie;
- spin_unlock(&ec->ec_lock);
- }
- return rc;
-}
-
-static int cl_echo_cancel0(struct lu_env *env, struct echo_device *ed,
- __u64 cookie)
-{
- struct echo_client_obd *ec = ed->ed_ec;
- struct echo_lock *ecl = NULL;
- struct list_head *el;
- int found = 0, still_used = 0;
-
- spin_lock(&ec->ec_lock);
- list_for_each(el, &ec->ec_locks) {
- ecl = list_entry(el, struct echo_lock, el_chain);
- CDEBUG(D_INFO, "ecl: %p, cookie: %#llx\n", ecl, ecl->el_cookie);
- found = (ecl->el_cookie == cookie);
- if (found) {
- if (atomic_dec_and_test(&ecl->el_refcount))
- list_del_init(&ecl->el_chain);
- else
- still_used = 1;
- break;
- }
- }
- spin_unlock(&ec->ec_lock);
-
- if (!found)
- return -ENOENT;
-
- echo_lock_release(env, ecl, still_used);
- return 0;
-}
-
-static void echo_commit_callback(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page)
-{
- struct echo_thread_info *info;
- struct cl_2queue *queue;
-
- info = echo_env_info(env);
- LASSERT(io == &info->eti_io);
-
- queue = &info->eti_queue;
- cl_page_list_add(&queue->c2_qout, page);
-}
-
-static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset,
- struct page **pages, int npages, int async)
-{
- struct lu_env *env;
- struct echo_thread_info *info;
- struct cl_object *obj = echo_obj2cl(eco);
- struct echo_device *ed = eco->eo_dev;
- struct cl_2queue *queue;
- struct cl_io *io;
- struct cl_page *clp;
- struct lustre_handle lh = { 0 };
- size_t page_size = cl_page_size(obj);
- u16 refcheck;
- int rc;
- int i;
-
- LASSERT((offset & ~PAGE_MASK) == 0);
- LASSERT(ed->ed_next);
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- info = echo_env_info(env);
- io = &info->eti_io;
- queue = &info->eti_queue;
-
- cl_2queue_init(queue);
-
- io->ci_ignore_layout = 1;
- rc = cl_io_init(env, io, CIT_MISC, obj);
- if (rc < 0)
- goto out;
- LASSERT(rc == 0);
-
- rc = cl_echo_enqueue0(env, eco, offset,
- offset + npages * PAGE_SIZE - 1,
- rw == READ ? LCK_PR : LCK_PW, &lh.cookie,
- CEF_NEVER);
- if (rc < 0)
- goto error_lock;
-
- for (i = 0; i < npages; i++) {
- LASSERT(pages[i]);
- clp = cl_page_find(env, obj, cl_index(obj, offset),
- pages[i], CPT_TRANSIENT);
- if (IS_ERR(clp)) {
- rc = PTR_ERR(clp);
- break;
- }
- LASSERT(clp->cp_type == CPT_TRANSIENT);
-
- rc = cl_page_own(env, io, clp);
- if (rc) {
- LASSERT(clp->cp_state == CPS_FREEING);
- cl_page_put(env, clp);
- break;
- }
- /*
- * Add a page to the incoming page list of 2-queue.
- */
- cl_page_list_add(&queue->c2_qin, clp);
-
- /* drop the reference count for cl_page_find, so that the page
- * will be freed in cl_2queue_fini.
- */
- cl_page_put(env, clp);
- cl_page_clip(env, clp, 0, page_size);
-
- offset += page_size;
- }
-
- if (rc == 0) {
- enum cl_req_type typ = rw == READ ? CRT_READ : CRT_WRITE;
-
- async = async && (typ == CRT_WRITE);
- if (async)
- rc = cl_io_commit_async(env, io, &queue->c2_qin,
- 0, PAGE_SIZE,
- echo_commit_callback);
- else
- rc = cl_io_submit_sync(env, io, typ, queue, 0);
- CDEBUG(D_INFO, "echo_client %s write returns %d\n",
- async ? "async" : "sync", rc);
- }
-
- cl_echo_cancel0(env, ed, lh.cookie);
-error_lock:
- cl_2queue_discard(env, io, queue);
- cl_2queue_disown(env, io, queue);
- cl_2queue_fini(env, queue);
- cl_io_fini(env, io);
-out:
- cl_env_put(env, &refcheck);
- return rc;
-}
-
-/** @} echo_exports */
-
-static u64 last_object_id;
-
-static int echo_create_object(const struct lu_env *env, struct echo_device *ed,
- struct obdo *oa)
-{
- struct echo_object *eco;
- struct echo_client_obd *ec = ed->ed_ec;
- int rc;
- int created = 0;
-
- if (!(oa->o_valid & OBD_MD_FLID) ||
- !(oa->o_valid & OBD_MD_FLGROUP) ||
- !fid_seq_is_echo(ostid_seq(&oa->o_oi))) {
- CERROR("invalid oid " DOSTID "\n", POSTID(&oa->o_oi));
- return -EINVAL;
- }
-
- if (!ostid_id(&oa->o_oi)) {
- rc = ostid_set_id(&oa->o_oi, ++last_object_id);
- if (rc)
- goto failed;
- }
-
- rc = obd_create(env, ec->ec_exp, oa);
- if (rc != 0) {
- CERROR("Cannot create objects: rc = %d\n", rc);
- goto failed;
- }
- created = 1;
-
- oa->o_valid |= OBD_MD_FLID;
-
- eco = cl_echo_object_find(ed, &oa->o_oi);
- if (IS_ERR(eco)) {
- rc = PTR_ERR(eco);
- goto failed;
- }
- cl_echo_object_put(eco);
-
- CDEBUG(D_INFO, "oa oid " DOSTID "\n", POSTID(&oa->o_oi));
-
- failed:
- if (created && rc)
- obd_destroy(env, ec->ec_exp, oa);
- if (rc)
- CERROR("create object failed with: rc = %d\n", rc);
- return rc;
-}
-
-static int echo_get_object(struct echo_object **ecop, struct echo_device *ed,
- struct obdo *oa)
-{
- struct echo_object *eco;
- int rc;
-
- if (!(oa->o_valid & OBD_MD_FLID) || !(oa->o_valid & OBD_MD_FLGROUP) ||
- !ostid_id(&oa->o_oi)) {
- CERROR("invalid oid " DOSTID "\n", POSTID(&oa->o_oi));
- return -EINVAL;
- }
-
- rc = 0;
- eco = cl_echo_object_find(ed, &oa->o_oi);
- if (!IS_ERR(eco))
- *ecop = eco;
- else
- rc = PTR_ERR(eco);
- return rc;
-}
-
-static void echo_put_object(struct echo_object *eco)
-{
- int rc;
-
- rc = cl_echo_object_put(eco);
- if (rc)
- CERROR("%s: echo client drop an object failed: rc = %d\n",
- eco->eo_dev->ed_ec->ec_exp->exp_obd->obd_name, rc);
-}
-
-static void
-echo_client_page_debug_setup(struct page *page, int rw, u64 id,
- u64 offset, u64 count)
-{
- char *addr;
- u64 stripe_off;
- u64 stripe_id;
- int delta;
-
- /* no partial pages on the client */
- LASSERT(count == PAGE_SIZE);
-
- addr = kmap(page);
-
- for (delta = 0; delta < PAGE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) {
- if (rw == OBD_BRW_WRITE) {
- stripe_off = offset + delta;
- stripe_id = id;
- } else {
- stripe_off = 0xdeadbeef00c0ffeeULL;
- stripe_id = 0xdeadbeef00c0ffeeULL;
- }
- block_debug_setup(addr + delta, OBD_ECHO_BLOCK_SIZE,
- stripe_off, stripe_id);
- }
-
- kunmap(page);
-}
-
-static int echo_client_page_debug_check(struct page *page, u64 id,
- u64 offset, u64 count)
-{
- u64 stripe_off;
- u64 stripe_id;
- char *addr;
- int delta;
- int rc;
- int rc2;
-
- /* no partial pages on the client */
- LASSERT(count == PAGE_SIZE);
-
- addr = kmap(page);
-
- for (rc = delta = 0; delta < PAGE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) {
- stripe_off = offset + delta;
- stripe_id = id;
-
- rc2 = block_debug_check("test_brw",
- addr + delta, OBD_ECHO_BLOCK_SIZE,
- stripe_off, stripe_id);
- if (rc2 != 0) {
- CERROR("Error in echo object %#llx\n", id);
- rc = rc2;
- }
- }
-
- kunmap(page);
- return rc;
-}
-
-static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa,
- struct echo_object *eco, u64 offset,
- u64 count, int async)
-{
- u32 npages;
- struct brw_page *pga;
- struct brw_page *pgp;
- struct page **pages;
- u64 off;
- int i;
- int rc;
- int verify;
- gfp_t gfp_mask;
- int brw_flags = 0;
-
- verify = (ostid_id(&oa->o_oi) != ECHO_PERSISTENT_OBJID &&
- (oa->o_valid & OBD_MD_FLFLAGS) != 0 &&
- (oa->o_flags & OBD_FL_DEBUG_CHECK) != 0);
-
- gfp_mask = ((ostid_id(&oa->o_oi) & 2) == 0) ? GFP_KERNEL : GFP_HIGHUSER;
-
- LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ);
-
- if (count <= 0 ||
- (count & (~PAGE_MASK)) != 0)
- return -EINVAL;
-
- /* XXX think again with misaligned I/O */
- npages = count >> PAGE_SHIFT;
-
- if (rw == OBD_BRW_WRITE)
- brw_flags = OBD_BRW_ASYNC;
-
- pga = kcalloc(npages, sizeof(*pga), GFP_NOFS);
- if (!pga)
- return -ENOMEM;
-
- pages = kcalloc(npages, sizeof(*pages), GFP_NOFS);
- if (!pages) {
- kfree(pga);
- return -ENOMEM;
- }
-
- for (i = 0, pgp = pga, off = offset;
- i < npages;
- i++, pgp++, off += PAGE_SIZE) {
- LASSERT(!pgp->pg); /* for cleanup */
-
- rc = -ENOMEM;
- pgp->pg = alloc_page(gfp_mask);
- if (!pgp->pg)
- goto out;
-
- pages[i] = pgp->pg;
- pgp->count = PAGE_SIZE;
- pgp->off = off;
- pgp->flag = brw_flags;
-
- if (verify)
- echo_client_page_debug_setup(pgp->pg, rw,
- ostid_id(&oa->o_oi), off,
- pgp->count);
- }
-
- /* brw mode can only be used at client */
- LASSERT(ed->ed_next);
- rc = cl_echo_object_brw(eco, rw, offset, pages, npages, async);
-
- out:
- if (rc != 0 || rw != OBD_BRW_READ)
- verify = 0;
-
- for (i = 0, pgp = pga; i < npages; i++, pgp++) {
- if (!pgp->pg)
- continue;
-
- if (verify) {
- int vrc;
-
- vrc = echo_client_page_debug_check(pgp->pg,
- ostid_id(&oa->o_oi),
- pgp->off, pgp->count);
- if (vrc != 0 && rc == 0)
- rc = vrc;
- }
- __free_page(pgp->pg);
- }
- kfree(pga);
- kfree(pages);
- return rc;
-}
-
-static int echo_client_prep_commit(const struct lu_env *env,
- struct obd_export *exp, int rw,
- struct obdo *oa, struct echo_object *eco,
- u64 offset, u64 count,
- u64 batch, int async)
-{
- struct obd_ioobj ioo;
- struct niobuf_local *lnb;
- struct niobuf_remote rnb;
- u64 off;
- u64 npages, tot_pages;
- int i, ret = 0, brw_flags = 0;
-
- if (count <= 0 || (count & (~PAGE_MASK)) != 0)
- return -EINVAL;
-
- npages = batch >> PAGE_SHIFT;
- tot_pages = count >> PAGE_SHIFT;
-
- lnb = kcalloc(npages, sizeof(struct niobuf_local), GFP_NOFS);
- if (!lnb) {
- ret = -ENOMEM;
- goto out;
- }
-
- if (rw == OBD_BRW_WRITE && async)
- brw_flags |= OBD_BRW_ASYNC;
-
- obdo_to_ioobj(oa, &ioo);
-
- off = offset;
-
- for (; tot_pages > 0; tot_pages -= npages) {
- int lpages;
-
- if (tot_pages < npages)
- npages = tot_pages;
-
- rnb.rnb_offset = off;
- rnb.rnb_len = npages * PAGE_SIZE;
- rnb.rnb_flags = brw_flags;
- ioo.ioo_bufcnt = 1;
- off += npages * PAGE_SIZE;
-
- lpages = npages;
- ret = obd_preprw(env, rw, exp, oa, 1, &ioo, &rnb, &lpages, lnb);
- if (ret != 0)
- goto out;
-
- for (i = 0; i < lpages; i++) {
- struct page *page = lnb[i].lnb_page;
-
- /* read past eof? */
- if (!page && lnb[i].lnb_rc == 0)
- continue;
-
- if (async)
- lnb[i].lnb_flags |= OBD_BRW_ASYNC;
-
- if (ostid_id(&oa->o_oi) == ECHO_PERSISTENT_OBJID ||
- (oa->o_valid & OBD_MD_FLFLAGS) == 0 ||
- (oa->o_flags & OBD_FL_DEBUG_CHECK) == 0)
- continue;
-
- if (rw == OBD_BRW_WRITE)
- echo_client_page_debug_setup(page, rw,
- ostid_id(&oa->o_oi),
- lnb[i].lnb_file_offset,
- lnb[i].lnb_len);
- else
- echo_client_page_debug_check(page,
- ostid_id(&oa->o_oi),
- lnb[i].lnb_file_offset,
- lnb[i].lnb_len);
- }
-
- ret = obd_commitrw(env, rw, exp, oa, 1, &ioo, &rnb, npages, lnb,
- ret);
- if (ret != 0)
- goto out;
-
- /* Reuse env context. */
- lu_context_exit((struct lu_context *)&env->le_ctx);
- lu_context_enter((struct lu_context *)&env->le_ctx);
- }
-
-out:
- kfree(lnb);
- return ret;
-}
-
-static int echo_client_brw_ioctl(const struct lu_env *env, int rw,
- struct obd_export *exp,
- struct obd_ioctl_data *data)
-{
- struct obd_device *obd = class_exp2obd(exp);
- struct echo_device *ed = obd2echo_dev(obd);
- struct echo_client_obd *ec = ed->ed_ec;
- struct obdo *oa = &data->ioc_obdo1;
- struct echo_object *eco;
- int rc;
- int async = 1;
- long test_mode;
-
- LASSERT(oa->o_valid & OBD_MD_FLGROUP);
-
- rc = echo_get_object(&eco, ed, oa);
- if (rc)
- return rc;
-
- oa->o_valid &= ~OBD_MD_FLHANDLE;
-
- /* OFD/obdfilter works only via prep/commit */
- test_mode = (long)data->ioc_pbuf1;
- if (test_mode == 1)
- async = 0;
-
- if (!ed->ed_next && test_mode != 3) {
- test_mode = 3;
- data->ioc_plen1 = data->ioc_count;
- }
-
- /* Truncate batch size to maximum */
- if (data->ioc_plen1 > PTLRPC_MAX_BRW_SIZE)
- data->ioc_plen1 = PTLRPC_MAX_BRW_SIZE;
-
- switch (test_mode) {
- case 1:
- /* fall through */
- case 2:
- rc = echo_client_kbrw(ed, rw, oa, eco, data->ioc_offset,
- data->ioc_count, async);
- break;
- case 3:
- rc = echo_client_prep_commit(env, ec->ec_exp, rw, oa, eco,
- data->ioc_offset, data->ioc_count,
- data->ioc_plen1, async);
- break;
- default:
- rc = -EINVAL;
- }
- echo_put_object(eco);
- return rc;
-}
-
-static int
-echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
- void *karg, void __user *uarg)
-{
- struct obd_device *obd = exp->exp_obd;
- struct echo_device *ed = obd2echo_dev(obd);
- struct echo_client_obd *ec = ed->ed_ec;
- struct echo_object *eco;
- struct obd_ioctl_data *data = karg;
- struct lu_env *env;
- struct obdo *oa;
- struct lu_fid fid;
- int rw = OBD_BRW_READ;
- int rc = 0;
-
- oa = &data->ioc_obdo1;
- if (!(oa->o_valid & OBD_MD_FLGROUP)) {
- oa->o_valid |= OBD_MD_FLGROUP;
- ostid_set_seq_echo(&oa->o_oi);
- }
-
- /* This FID is unpacked just for validation at this point */
- rc = ostid_to_fid(&fid, &oa->o_oi, 0);
- if (rc < 0)
- return rc;
-
- env = kzalloc(sizeof(*env), GFP_NOFS);
- if (!env)
- return -ENOMEM;
-
- rc = lu_env_init(env, LCT_DT_THREAD);
- if (rc) {
- rc = -ENOMEM;
- goto out;
- }
-
- switch (cmd) {
- case OBD_IOC_CREATE: /* may create echo object */
- if (!capable(CAP_SYS_ADMIN)) {
- rc = -EPERM;
- goto out;
- }
-
- rc = echo_create_object(env, ed, oa);
- goto out;
-
- case OBD_IOC_DESTROY:
- if (!capable(CAP_SYS_ADMIN)) {
- rc = -EPERM;
- goto out;
- }
-
- rc = echo_get_object(&eco, ed, oa);
- if (rc == 0) {
- rc = obd_destroy(env, ec->ec_exp, oa);
- if (rc == 0)
- eco->eo_deleted = 1;
- echo_put_object(eco);
- }
- goto out;
-
- case OBD_IOC_GETATTR:
- rc = echo_get_object(&eco, ed, oa);
- if (rc == 0) {
- rc = obd_getattr(env, ec->ec_exp, oa);
- echo_put_object(eco);
- }
- goto out;
-
- case OBD_IOC_SETATTR:
- if (!capable(CAP_SYS_ADMIN)) {
- rc = -EPERM;
- goto out;
- }
-
- rc = echo_get_object(&eco, ed, oa);
- if (rc == 0) {
- rc = obd_setattr(env, ec->ec_exp, oa);
- echo_put_object(eco);
- }
- goto out;
-
- case OBD_IOC_BRW_WRITE:
- if (!capable(CAP_SYS_ADMIN)) {
- rc = -EPERM;
- goto out;
- }
-
- rw = OBD_BRW_WRITE;
- /* fall through */
- case OBD_IOC_BRW_READ:
- rc = echo_client_brw_ioctl(env, rw, exp, data);
- goto out;
-
- default:
- CERROR("echo_ioctl(): unrecognised ioctl %#x\n", cmd);
- rc = -ENOTTY;
- goto out;
- }
-
-out:
- lu_env_fini(env);
- kfree(env);
-
- return rc;
-}
-
-static int echo_client_setup(const struct lu_env *env,
- struct obd_device *obddev, struct lustre_cfg *lcfg)
-{
- struct echo_client_obd *ec = &obddev->u.echo_client;
- struct obd_device *tgt;
- struct obd_uuid echo_uuid = { "ECHO_UUID" };
- struct obd_connect_data *ocd = NULL;
- int rc;
-
- if (lcfg->lcfg_bufcount < 2 || LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) {
- CERROR("requires a TARGET OBD name\n");
- return -EINVAL;
- }
-
- tgt = class_name2obd(lustre_cfg_string(lcfg, 1));
- if (!tgt || !tgt->obd_attached || !tgt->obd_set_up) {
- CERROR("device not attached or not set up (%s)\n",
- lustre_cfg_string(lcfg, 1));
- return -EINVAL;
- }
-
- spin_lock_init(&ec->ec_lock);
- INIT_LIST_HEAD(&ec->ec_objects);
- INIT_LIST_HEAD(&ec->ec_locks);
- ec->ec_unique = 0;
-
- ocd = kzalloc(sizeof(*ocd), GFP_NOFS);
- if (!ocd)
- return -ENOMEM;
-
- ocd->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_REQPORTAL |
- OBD_CONNECT_BRW_SIZE |
- OBD_CONNECT_GRANT | OBD_CONNECT_FULL20 |
- OBD_CONNECT_64BITHASH | OBD_CONNECT_LVB_TYPE |
- OBD_CONNECT_FID;
- ocd->ocd_brw_size = DT_MAX_BRW_SIZE;
- ocd->ocd_version = LUSTRE_VERSION_CODE;
- ocd->ocd_group = FID_SEQ_ECHO;
-
- rc = obd_connect(env, &ec->ec_exp, tgt, &echo_uuid, ocd, NULL);
-
- kfree(ocd);
-
- if (rc != 0) {
- CERROR("fail to connect to device %s\n",
- lustre_cfg_string(lcfg, 1));
- return rc;
- }
-
- return rc;
-}
-
-static int echo_client_cleanup(struct obd_device *obddev)
-{
- struct echo_client_obd *ec = &obddev->u.echo_client;
- int rc;
-
- if (!list_empty(&obddev->obd_exports)) {
- CERROR("still has clients!\n");
- return -EBUSY;
- }
-
- LASSERT(atomic_read(&ec->ec_exp->exp_refcount) > 0);
- rc = obd_disconnect(ec->ec_exp);
- if (rc != 0)
- CERROR("fail to disconnect device: %d\n", rc);
-
- return rc;
-}
-
-static int echo_client_connect(const struct lu_env *env,
- struct obd_export **exp,
- struct obd_device *src, struct obd_uuid *cluuid,
- struct obd_connect_data *data, void *localdata)
-{
- int rc;
- struct lustre_handle conn = { 0 };
-
- rc = class_connect(&conn, src, cluuid);
- if (rc == 0)
- *exp = class_conn2export(&conn);
-
- return rc;
-}
-
-static int echo_client_disconnect(struct obd_export *exp)
-{
- int rc;
-
- if (!exp) {
- rc = -EINVAL;
- goto out;
- }
-
- rc = class_disconnect(exp);
- goto out;
- out:
- return rc;
-}
-
-static struct obd_ops echo_client_obd_ops = {
- .owner = THIS_MODULE,
- .iocontrol = echo_client_iocontrol,
- .connect = echo_client_connect,
- .disconnect = echo_client_disconnect
-};
-
-static int echo_client_init(void)
-{
- int rc;
-
- rc = lu_kmem_init(echo_caches);
- if (rc == 0) {
- rc = class_register_type(&echo_client_obd_ops, NULL,
- LUSTRE_ECHO_CLIENT_NAME,
- &echo_device_type);
- if (rc)
- lu_kmem_fini(echo_caches);
- }
- return rc;
-}
-
-static void echo_client_exit(void)
-{
- class_unregister_type(LUSTRE_ECHO_CLIENT_NAME);
- lu_kmem_fini(echo_caches);
-}
-
-static int __init obdecho_init(void)
-{
- int rc;
-
- LCONSOLE_INFO("Echo OBD driver; http://www.lustre.org/\n");
-
- LASSERT(PAGE_SIZE % OBD_ECHO_BLOCK_SIZE == 0);
-
- rc = libcfs_setup();
- if (rc)
- return rc;
-
- return echo_client_init();
-}
-
-static void /*__exit*/ obdecho_exit(void)
-{
- echo_client_exit();
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre Echo Client test driver");
-MODULE_VERSION(LUSTRE_VERSION_STRING);
-MODULE_LICENSE("GPL");
-
-module_init(obdecho_init);
-module_exit(obdecho_exit);
-
-/** @} echo_client */
diff --git a/drivers/staging/lustre/lustre/obdecho/echo_internal.h b/drivers/staging/lustre/lustre/obdecho/echo_internal.h
deleted file mode 100644
index 42faa164fabb..000000000000
--- a/drivers/staging/lustre/lustre/obdecho/echo_internal.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Whamcloud, Inc.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/obdecho/echo_internal.h
- */
-
-#ifndef _ECHO_INTERNAL_H
-#define _ECHO_INTERNAL_H
-
-/* The persistent object (i.e. actually stores stuff!) */
-#define ECHO_PERSISTENT_OBJID 1ULL
-#define ECHO_PERSISTENT_SIZE ((__u64)(1 << 20))
-
-/* block size to use for data verification */
-#define OBD_ECHO_BLOCK_SIZE (4 << 10)
-
-#endif
diff --git a/drivers/staging/lustre/lustre/osc/Makefile b/drivers/staging/lustre/lustre/osc/Makefile
deleted file mode 100644
index 30dec90e64e8..000000000000
--- a/drivers/staging/lustre/lustre/osc/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LUSTRE_FS) += osc.o
-osc-y := osc_request.o osc_dev.o osc_object.o \
- osc_page.o osc_lock.o osc_io.o osc_quota.o osc_cache.o lproc_osc.o
diff --git a/drivers/staging/lustre/lustre/osc/lproc_osc.c b/drivers/staging/lustre/lustre/osc/lproc_osc.c
deleted file mode 100644
index 6a705bc5420c..000000000000
--- a/drivers/staging/lustre/lustre/osc/lproc_osc.c
+++ /dev/null
@@ -1,838 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-#define DEBUG_SUBSYSTEM S_CLASS
-
-#include <linux/statfs.h>
-#include <obd_cksum.h>
-#include <obd_class.h>
-#include <lprocfs_status.h>
-#include <linux/seq_file.h>
-#include "osc_internal.h"
-
-static ssize_t active_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
-
- return sprintf(buf, "%d\n", !dev->u.cli.cl_import->imp_deactive);
-}
-
-static ssize_t active_store(struct kobject *kobj, struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
- int rc;
- unsigned long val;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
- if (val > 1)
- return -ERANGE;
-
- /* opposite senses */
- if (dev->u.cli.cl_import->imp_deactive == val)
- rc = ptlrpc_set_import_active(dev->u.cli.cl_import, val);
- else
- CDEBUG(D_CONFIG, "activate %ld: ignoring repeat request\n",
- val);
-
- return count;
-}
-LUSTRE_RW_ATTR(active);
-
-static ssize_t max_rpcs_in_flight_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
- struct client_obd *cli = &dev->u.cli;
-
- return sprintf(buf, "%u\n", cli->cl_max_rpcs_in_flight);
-}
-
-static ssize_t max_rpcs_in_flight_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
- struct client_obd *cli = &dev->u.cli;
- int rc;
- unsigned long val;
- int adding, added, req_count;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- if (val < 1 || val > OSC_MAX_RIF_MAX)
- return -ERANGE;
-
- adding = val - cli->cl_max_rpcs_in_flight;
- req_count = atomic_read(&osc_pool_req_count);
- if (adding > 0 && req_count < osc_reqpool_maxreqcount) {
- /*
- * There might be some race which will cause over-limit
- * allocation, but it is fine.
- */
- if (req_count + adding > osc_reqpool_maxreqcount)
- adding = osc_reqpool_maxreqcount - req_count;
-
- added = osc_rq_pool->prp_populate(osc_rq_pool, adding);
- atomic_add(added, &osc_pool_req_count);
- }
-
- spin_lock(&cli->cl_loi_list_lock);
- cli->cl_max_rpcs_in_flight = val;
- client_adjust_max_dirty(cli);
- spin_unlock(&cli->cl_loi_list_lock);
-
- return count;
-}
-LUSTRE_RW_ATTR(max_rpcs_in_flight);
-
-static ssize_t max_dirty_mb_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
- struct client_obd *cli = &dev->u.cli;
- long val;
- int mult;
-
- spin_lock(&cli->cl_loi_list_lock);
- val = cli->cl_dirty_max_pages;
- spin_unlock(&cli->cl_loi_list_lock);
-
- mult = 1 << (20 - PAGE_SHIFT);
- return lprocfs_read_frac_helper(buf, PAGE_SIZE, val, mult);
-}
-
-static ssize_t max_dirty_mb_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
- struct client_obd *cli = &dev->u.cli;
- int rc;
- unsigned long pages_number;
-
- rc = kstrtoul(buffer, 10, &pages_number);
- if (rc)
- return rc;
-
- pages_number *= 1 << (20 - PAGE_SHIFT); /* MB -> pages */
-
- if (pages_number <= 0 ||
- pages_number >= OSC_MAX_DIRTY_MB_MAX << (20 - PAGE_SHIFT) ||
- pages_number > totalram_pages / 4) /* 1/4 of RAM */
- return -ERANGE;
-
- spin_lock(&cli->cl_loi_list_lock);
- cli->cl_dirty_max_pages = pages_number;
- osc_wake_cache_waiters(cli);
- spin_unlock(&cli->cl_loi_list_lock);
-
- return count;
-}
-LUSTRE_RW_ATTR(max_dirty_mb);
-
-static int osc_cached_mb_seq_show(struct seq_file *m, void *v)
-{
- struct obd_device *dev = m->private;
- struct client_obd *cli = &dev->u.cli;
- int shift = 20 - PAGE_SHIFT;
-
- seq_printf(m,
- "used_mb: %ld\n"
- "busy_cnt: %ld\n"
- "reclaim: %llu\n",
- (atomic_long_read(&cli->cl_lru_in_list) +
- atomic_long_read(&cli->cl_lru_busy)) >> shift,
- atomic_long_read(&cli->cl_lru_busy),
- cli->cl_lru_reclaim);
-
- return 0;
-}
-
-/* shrink the number of caching pages to a specific number */
-static ssize_t osc_cached_mb_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct obd_device *dev = ((struct seq_file *)file->private_data)->private;
- struct client_obd *cli = &dev->u.cli;
- long pages_number, rc;
- char kernbuf[128];
- int mult;
- u64 val;
-
- if (count >= sizeof(kernbuf))
- return -EINVAL;
-
- if (copy_from_user(kernbuf, buffer, count))
- return -EFAULT;
- kernbuf[count] = 0;
-
- mult = 1 << (20 - PAGE_SHIFT);
- buffer += lprocfs_find_named_value(kernbuf, "used_mb:", &count) -
- kernbuf;
- rc = lprocfs_write_frac_u64_helper(buffer, count, &val, mult);
- if (rc)
- return rc;
-
- if (val > LONG_MAX)
- return -ERANGE;
- pages_number = (long)val;
-
- if (pages_number < 0)
- return -ERANGE;
-
- rc = atomic_long_read(&cli->cl_lru_in_list) - pages_number;
- if (rc > 0) {
- struct lu_env *env;
- u16 refcheck;
-
- env = cl_env_get(&refcheck);
- if (!IS_ERR(env)) {
- (void)osc_lru_shrink(env, cli, rc, true);
- cl_env_put(env, &refcheck);
- }
- }
-
- return count;
-}
-
-LPROC_SEQ_FOPS(osc_cached_mb);
-
-static ssize_t cur_dirty_bytes_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
- struct client_obd *cli = &dev->u.cli;
- int len;
-
- spin_lock(&cli->cl_loi_list_lock);
- len = sprintf(buf, "%lu\n", cli->cl_dirty_pages << PAGE_SHIFT);
- spin_unlock(&cli->cl_loi_list_lock);
-
- return len;
-}
-LUSTRE_RO_ATTR(cur_dirty_bytes);
-
-static ssize_t cur_grant_bytes_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
- struct client_obd *cli = &dev->u.cli;
- int len;
-
- spin_lock(&cli->cl_loi_list_lock);
- len = sprintf(buf, "%lu\n", cli->cl_avail_grant);
- spin_unlock(&cli->cl_loi_list_lock);
-
- return len;
-}
-
-static ssize_t cur_grant_bytes_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kobj);
- struct client_obd *cli = &obd->u.cli;
- int rc;
- unsigned long long val;
-
- rc = kstrtoull(buffer, 10, &val);
- if (rc)
- return rc;
-
- /* this is only for shrinking grant */
- spin_lock(&cli->cl_loi_list_lock);
- if (val >= cli->cl_avail_grant) {
- spin_unlock(&cli->cl_loi_list_lock);
- return -EINVAL;
- }
- spin_unlock(&cli->cl_loi_list_lock);
-
- if (cli->cl_import->imp_state == LUSTRE_IMP_FULL)
- rc = osc_shrink_grant_to_target(cli, val);
- if (rc)
- return rc;
- return count;
-}
-LUSTRE_RW_ATTR(cur_grant_bytes);
-
-static ssize_t cur_lost_grant_bytes_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
- struct client_obd *cli = &dev->u.cli;
- int len;
-
- spin_lock(&cli->cl_loi_list_lock);
- len = sprintf(buf, "%lu\n", cli->cl_lost_grant);
- spin_unlock(&cli->cl_loi_list_lock);
-
- return len;
-}
-LUSTRE_RO_ATTR(cur_lost_grant_bytes);
-
-static ssize_t grant_shrink_interval_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kobj);
-
- return sprintf(buf, "%d\n", obd->u.cli.cl_grant_shrink_interval);
-}
-
-static ssize_t grant_shrink_interval_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kobj);
- int rc;
- unsigned long val;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- if (val <= 0)
- return -ERANGE;
-
- obd->u.cli.cl_grant_shrink_interval = val;
-
- return count;
-}
-LUSTRE_RW_ATTR(grant_shrink_interval);
-
-static ssize_t checksums_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kobj);
-
- return sprintf(buf, "%d\n", obd->u.cli.cl_checksum ? 1 : 0);
-}
-
-static ssize_t checksums_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kobj);
- int rc;
- unsigned long val;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- obd->u.cli.cl_checksum = (val ? 1 : 0);
-
- return count;
-}
-LUSTRE_RW_ATTR(checksums);
-
-static int osc_checksum_type_seq_show(struct seq_file *m, void *v)
-{
- struct obd_device *obd = m->private;
- int i;
-
- DECLARE_CKSUM_NAME;
-
- if (!obd)
- return 0;
-
- for (i = 0; i < ARRAY_SIZE(cksum_name); i++) {
- if (((1 << i) & obd->u.cli.cl_supp_cksum_types) == 0)
- continue;
- if (obd->u.cli.cl_cksum_type == (1 << i))
- seq_printf(m, "[%s] ", cksum_name[i]);
- else
- seq_printf(m, "%s ", cksum_name[i]);
- }
- seq_putc(m, '\n');
- return 0;
-}
-
-static ssize_t osc_checksum_type_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
- int i;
-
- DECLARE_CKSUM_NAME;
- char kernbuf[10];
-
- if (!obd)
- return 0;
-
- if (count > sizeof(kernbuf) - 1)
- return -EINVAL;
- if (copy_from_user(kernbuf, buffer, count))
- return -EFAULT;
- if (count > 0 && kernbuf[count - 1] == '\n')
- kernbuf[count - 1] = '\0';
- else
- kernbuf[count] = '\0';
-
- for (i = 0; i < ARRAY_SIZE(cksum_name); i++) {
- if (((1 << i) & obd->u.cli.cl_supp_cksum_types) == 0)
- continue;
- if (!strcmp(kernbuf, cksum_name[i])) {
- obd->u.cli.cl_cksum_type = 1 << i;
- return count;
- }
- }
- return -EINVAL;
-}
-
-LPROC_SEQ_FOPS(osc_checksum_type);
-
-static ssize_t resend_count_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kobj);
-
- return sprintf(buf, "%u\n", atomic_read(&obd->u.cli.cl_resends));
-}
-
-static ssize_t resend_count_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kobj);
- int rc;
- unsigned long val;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- atomic_set(&obd->u.cli.cl_resends, val);
-
- return count;
-}
-LUSTRE_RW_ATTR(resend_count);
-
-static ssize_t contention_seconds_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kobj);
- struct osc_device *od = obd2osc_dev(obd);
-
- return sprintf(buf, "%u\n", od->od_contention_time);
-}
-
-static ssize_t contention_seconds_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kobj);
- struct osc_device *od = obd2osc_dev(obd);
- int rc;
- int val;
-
- rc = kstrtoint(buffer, 10, &val);
- if (rc)
- return rc;
-
- if (val < 0)
- return -EINVAL;
-
- od->od_contention_time = val;
-
- return count;
-}
-LUSTRE_RW_ATTR(contention_seconds);
-
-static ssize_t lockless_truncate_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kobj);
- struct osc_device *od = obd2osc_dev(obd);
-
- return sprintf(buf, "%u\n", od->od_lockless_truncate);
-}
-
-static ssize_t lockless_truncate_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kobj);
- struct osc_device *od = obd2osc_dev(obd);
- int rc;
- unsigned int val;
-
- rc = kstrtouint(buffer, 10, &val);
- if (rc)
- return rc;
-
- od->od_lockless_truncate = val;
-
- return count;
-}
-LUSTRE_RW_ATTR(lockless_truncate);
-
-static ssize_t destroys_in_flight_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kobj);
-
- return sprintf(buf, "%u\n",
- atomic_read(&obd->u.cli.cl_destroy_in_flight));
-}
-LUSTRE_RO_ATTR(destroys_in_flight);
-
-static ssize_t max_pages_per_rpc_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
- struct client_obd *cli = &dev->u.cli;
-
- return sprintf(buf, "%d\n", cli->cl_max_pages_per_rpc);
-}
-
-static ssize_t max_pages_per_rpc_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
- struct client_obd *cli = &dev->u.cli;
- struct obd_connect_data *ocd = &cli->cl_import->imp_connect_data;
- int chunk_mask, rc;
- unsigned long long val;
-
- rc = kstrtoull(buffer, 10, &val);
- if (rc)
- return rc;
-
- /* if the max_pages is specified in bytes, convert to pages */
- if (val >= ONE_MB_BRW_SIZE)
- val >>= PAGE_SHIFT;
-
- chunk_mask = ~((1 << (cli->cl_chunkbits - PAGE_SHIFT)) - 1);
- /* max_pages_per_rpc must be chunk aligned */
- val = (val + ~chunk_mask) & chunk_mask;
- if (!val || (ocd->ocd_brw_size &&
- val > ocd->ocd_brw_size >> PAGE_SHIFT)) {
- return -ERANGE;
- }
- spin_lock(&cli->cl_loi_list_lock);
- cli->cl_max_pages_per_rpc = val;
- client_adjust_max_dirty(cli);
- spin_unlock(&cli->cl_loi_list_lock);
-
- return count;
-}
-LUSTRE_RW_ATTR(max_pages_per_rpc);
-
-static ssize_t unstable_stats_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct obd_device *dev = container_of(kobj, struct obd_device,
- obd_kobj);
- struct client_obd *cli = &dev->u.cli;
- long pages;
- int mb;
-
- pages = atomic_long_read(&cli->cl_unstable_count);
- mb = (pages * PAGE_SIZE) >> 20;
-
- return sprintf(buf, "unstable_pages: %20ld\n"
- "unstable_mb: %10d\n", pages, mb);
-}
-LUSTRE_RO_ATTR(unstable_stats);
-
-LPROC_SEQ_FOPS_RO_TYPE(osc, connect_flags);
-LPROC_SEQ_FOPS_RO_TYPE(osc, server_uuid);
-LPROC_SEQ_FOPS_RO_TYPE(osc, conn_uuid);
-LPROC_SEQ_FOPS_RO_TYPE(osc, timeouts);
-LPROC_SEQ_FOPS_RO_TYPE(osc, state);
-
-LPROC_SEQ_FOPS_WR_ONLY(osc, ping);
-
-LPROC_SEQ_FOPS_RW_TYPE(osc, import);
-LPROC_SEQ_FOPS_RW_TYPE(osc, pinger_recov);
-
-static struct lprocfs_vars lprocfs_osc_obd_vars[] = {
- { "ping", &osc_ping_fops, NULL, 0222 },
- { "connect_flags", &osc_connect_flags_fops, NULL, 0 },
- /*{ "filegroups", lprocfs_rd_filegroups, NULL, 0 },*/
- { "ost_server_uuid", &osc_server_uuid_fops, NULL, 0 },
- { "ost_conn_uuid", &osc_conn_uuid_fops, NULL, 0 },
- { "osc_cached_mb", &osc_cached_mb_fops, NULL },
- { "checksum_type", &osc_checksum_type_fops, NULL },
- { "timeouts", &osc_timeouts_fops, NULL, 0 },
- { "import", &osc_import_fops, NULL },
- { "state", &osc_state_fops, NULL, 0 },
- { "pinger_recov", &osc_pinger_recov_fops, NULL },
- { NULL }
-};
-
-#define pct(a, b) (b ? a * 100 / b : 0)
-
-static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v)
-{
- struct timespec64 now;
- struct obd_device *dev = seq->private;
- struct client_obd *cli = &dev->u.cli;
- unsigned long read_tot = 0, write_tot = 0, read_cum, write_cum;
- int i;
-
- ktime_get_real_ts64(&now);
-
- spin_lock(&cli->cl_loi_list_lock);
-
- seq_printf(seq, "snapshot_time: %llu.%9lu (secs.usecs)\n",
- (s64)now.tv_sec, (unsigned long)now.tv_nsec);
- seq_printf(seq, "read RPCs in flight: %d\n",
- cli->cl_r_in_flight);
- seq_printf(seq, "write RPCs in flight: %d\n",
- cli->cl_w_in_flight);
- seq_printf(seq, "pending write pages: %d\n",
- atomic_read(&cli->cl_pending_w_pages));
- seq_printf(seq, "pending read pages: %d\n",
- atomic_read(&cli->cl_pending_r_pages));
-
- seq_puts(seq, "\n\t\t\tread\t\t\twrite\n");
- seq_puts(seq, "pages per rpc rpcs % cum % |");
- seq_puts(seq, " rpcs % cum %\n");
-
- read_tot = lprocfs_oh_sum(&cli->cl_read_page_hist);
- write_tot = lprocfs_oh_sum(&cli->cl_write_page_hist);
-
- read_cum = 0;
- write_cum = 0;
- for (i = 0; i < OBD_HIST_MAX; i++) {
- unsigned long r = cli->cl_read_page_hist.oh_buckets[i];
- unsigned long w = cli->cl_write_page_hist.oh_buckets[i];
-
- read_cum += r;
- write_cum += w;
- seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n",
- 1 << i, r, pct(r, read_tot),
- pct(read_cum, read_tot), w,
- pct(w, write_tot),
- pct(write_cum, write_tot));
- if (read_cum == read_tot && write_cum == write_tot)
- break;
- }
-
- seq_puts(seq, "\n\t\t\tread\t\t\twrite\n");
- seq_puts(seq, "rpcs in flight rpcs % cum % |");
- seq_puts(seq, " rpcs % cum %\n");
-
- read_tot = lprocfs_oh_sum(&cli->cl_read_rpc_hist);
- write_tot = lprocfs_oh_sum(&cli->cl_write_rpc_hist);
-
- read_cum = 0;
- write_cum = 0;
- for (i = 0; i < OBD_HIST_MAX; i++) {
- unsigned long r = cli->cl_read_rpc_hist.oh_buckets[i];
- unsigned long w = cli->cl_write_rpc_hist.oh_buckets[i];
-
- read_cum += r;
- write_cum += w;
- seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n",
- i, r, pct(r, read_tot),
- pct(read_cum, read_tot), w,
- pct(w, write_tot),
- pct(write_cum, write_tot));
- if (read_cum == read_tot && write_cum == write_tot)
- break;
- }
-
- seq_puts(seq, "\n\t\t\tread\t\t\twrite\n");
- seq_puts(seq, "offset rpcs % cum % |");
- seq_puts(seq, " rpcs % cum %\n");
-
- read_tot = lprocfs_oh_sum(&cli->cl_read_offset_hist);
- write_tot = lprocfs_oh_sum(&cli->cl_write_offset_hist);
-
- read_cum = 0;
- write_cum = 0;
- for (i = 0; i < OBD_HIST_MAX; i++) {
- unsigned long r = cli->cl_read_offset_hist.oh_buckets[i];
- unsigned long w = cli->cl_write_offset_hist.oh_buckets[i];
-
- read_cum += r;
- write_cum += w;
- seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n",
- (i == 0) ? 0 : 1 << (i - 1),
- r, pct(r, read_tot), pct(read_cum, read_tot),
- w, pct(w, write_tot), pct(write_cum, write_tot));
- if (read_cum == read_tot && write_cum == write_tot)
- break;
- }
-
- spin_unlock(&cli->cl_loi_list_lock);
-
- return 0;
-}
-
-#undef pct
-
-static ssize_t osc_rpc_stats_seq_write(struct file *file,
- const char __user *buf,
- size_t len, loff_t *off)
-{
- struct seq_file *seq = file->private_data;
- struct obd_device *dev = seq->private;
- struct client_obd *cli = &dev->u.cli;
-
- lprocfs_oh_clear(&cli->cl_read_rpc_hist);
- lprocfs_oh_clear(&cli->cl_write_rpc_hist);
- lprocfs_oh_clear(&cli->cl_read_page_hist);
- lprocfs_oh_clear(&cli->cl_write_page_hist);
- lprocfs_oh_clear(&cli->cl_read_offset_hist);
- lprocfs_oh_clear(&cli->cl_write_offset_hist);
-
- return len;
-}
-
-LPROC_SEQ_FOPS(osc_rpc_stats);
-
-static int osc_stats_seq_show(struct seq_file *seq, void *v)
-{
- struct timespec64 now;
- struct obd_device *dev = seq->private;
- struct osc_stats *stats = &obd2osc_dev(dev)->od_stats;
-
- ktime_get_real_ts64(&now);
-
- seq_printf(seq, "snapshot_time: %llu.%9lu (secs.usecs)\n",
- (s64)now.tv_sec, (unsigned long)now.tv_nsec);
- seq_printf(seq, "lockless_write_bytes\t\t%llu\n",
- stats->os_lockless_writes);
- seq_printf(seq, "lockless_read_bytes\t\t%llu\n",
- stats->os_lockless_reads);
- seq_printf(seq, "lockless_truncate\t\t%llu\n",
- stats->os_lockless_truncates);
- return 0;
-}
-
-static ssize_t osc_stats_seq_write(struct file *file,
- const char __user *buf,
- size_t len, loff_t *off)
-{
- struct seq_file *seq = file->private_data;
- struct obd_device *dev = seq->private;
- struct osc_stats *stats = &obd2osc_dev(dev)->od_stats;
-
- memset(stats, 0, sizeof(*stats));
- return len;
-}
-
-LPROC_SEQ_FOPS(osc_stats);
-
-void lproc_osc_attach_seqstat(struct obd_device *dev)
-{
- debugfs_create_file("osc_stats", 0644, dev->obd_debugfs_entry, dev,
- &osc_stats_fops);
- debugfs_create_file("rpc_stats", 0644, dev->obd_debugfs_entry, dev,
- &osc_rpc_stats_fops);
-}
-
-static struct attribute *osc_attrs[] = {
- &lustre_attr_active.attr,
- &lustre_attr_checksums.attr,
- &lustre_attr_contention_seconds.attr,
- &lustre_attr_cur_dirty_bytes.attr,
- &lustre_attr_cur_grant_bytes.attr,
- &lustre_attr_cur_lost_grant_bytes.attr,
- &lustre_attr_destroys_in_flight.attr,
- &lustre_attr_grant_shrink_interval.attr,
- &lustre_attr_lockless_truncate.attr,
- &lustre_attr_max_dirty_mb.attr,
- &lustre_attr_max_pages_per_rpc.attr,
- &lustre_attr_max_rpcs_in_flight.attr,
- &lustre_attr_resend_count.attr,
- &lustre_attr_unstable_stats.attr,
- NULL,
-};
-
-static const struct attribute_group osc_attr_group = {
- .attrs = osc_attrs,
-};
-
-void lprocfs_osc_init_vars(struct lprocfs_static_vars *lvars)
-{
- lvars->sysfs_vars = &osc_attr_group;
- lvars->obd_vars = lprocfs_osc_obd_vars;
-}
diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c
deleted file mode 100644
index f26983004843..000000000000
--- a/drivers/staging/lustre/lustre/osc/osc_cache.c
+++ /dev/null
@@ -1,3306 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- *
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * osc cache management.
- *
- * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
- */
-
-#define DEBUG_SUBSYSTEM S_OSC
-
-#include "osc_cl_internal.h"
-#include "osc_internal.h"
-
-static int extent_debug; /* set it to be true for more debug */
-
-static void osc_update_pending(struct osc_object *obj, int cmd, int delta);
-static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext,
- enum osc_extent_state state);
-static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli,
- struct osc_async_page *oap, int sent, int rc);
-static int osc_make_ready(const struct lu_env *env, struct osc_async_page *oap,
- int cmd);
-static int osc_refresh_count(const struct lu_env *env,
- struct osc_async_page *oap, int cmd);
-static int osc_io_unplug_async(const struct lu_env *env,
- struct client_obd *cli, struct osc_object *osc);
-static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages,
- unsigned int lost_grant);
-
-static void osc_extent_tree_dump0(int level, struct osc_object *obj,
- const char *func, int line);
-#define osc_extent_tree_dump(lvl, obj) \
- osc_extent_tree_dump0(lvl, obj, __func__, __LINE__)
-
-/** \addtogroup osc
- * @{
- */
-
-/* ------------------ osc extent ------------------ */
-static inline char *ext_flags(struct osc_extent *ext, char *flags)
-{
- char *buf = flags;
- *buf++ = ext->oe_rw ? 'r' : 'w';
- if (ext->oe_intree)
- *buf++ = 'i';
- if (ext->oe_sync)
- *buf++ = 'S';
- if (ext->oe_srvlock)
- *buf++ = 's';
- if (ext->oe_hp)
- *buf++ = 'h';
- if (ext->oe_urgent)
- *buf++ = 'u';
- if (ext->oe_memalloc)
- *buf++ = 'm';
- if (ext->oe_trunc_pending)
- *buf++ = 't';
- if (ext->oe_fsync_wait)
- *buf++ = 'Y';
- *buf = 0;
- return flags;
-}
-
-static inline char list_empty_marker(struct list_head *list)
-{
- return list_empty(list) ? '-' : '+';
-}
-
-#define EXTSTR "[%lu -> %lu/%lu]"
-#define EXTPARA(ext) (ext)->oe_start, (ext)->oe_end, (ext)->oe_max_end
-static const char *oes_strings[] = {
- "inv", "active", "cache", "locking", "lockdone", "rpc", "trunc", NULL };
-
-#define OSC_EXTENT_DUMP(lvl, extent, fmt, ...) do { \
- struct osc_extent *__ext = (extent); \
- char __buf[16]; \
- \
- CDEBUG(lvl, \
- "extent %p@{" EXTSTR ", " \
- "[%d|%d|%c|%s|%s|%p], [%d|%d|%c|%c|%p|%u|%p]} " fmt, \
- /* ----- extent part 0 ----- */ \
- __ext, EXTPARA(__ext), \
- /* ----- part 1 ----- */ \
- atomic_read(&__ext->oe_refc), \
- atomic_read(&__ext->oe_users), \
- list_empty_marker(&__ext->oe_link), \
- oes_strings[__ext->oe_state], ext_flags(__ext, __buf), \
- __ext->oe_obj, \
- /* ----- part 2 ----- */ \
- __ext->oe_grants, __ext->oe_nr_pages, \
- list_empty_marker(&__ext->oe_pages), \
- waitqueue_active(&__ext->oe_waitq) ? '+' : '-', \
- __ext->oe_dlmlock, __ext->oe_mppr, __ext->oe_owner, \
- /* ----- part 4 ----- */ \
- ## __VA_ARGS__); \
- if (lvl == D_ERROR && __ext->oe_dlmlock) \
- LDLM_ERROR(__ext->oe_dlmlock, "extent: %p", __ext); \
- else \
- LDLM_DEBUG(__ext->oe_dlmlock, "extent: %p", __ext); \
-} while (0)
-
-#undef EASSERTF
-#define EASSERTF(expr, ext, fmt, args...) do { \
- if (!(expr)) { \
- OSC_EXTENT_DUMP(D_ERROR, (ext), fmt, ##args); \
- osc_extent_tree_dump(D_ERROR, (ext)->oe_obj); \
- LASSERT(expr); \
- } \
-} while (0)
-
-#undef EASSERT
-#define EASSERT(expr, ext) EASSERTF(expr, ext, "\n")
-
-static inline struct osc_extent *rb_extent(struct rb_node *n)
-{
- return rb_entry_safe(n, struct osc_extent, oe_node);
-}
-
-static inline struct osc_extent *next_extent(struct osc_extent *ext)
-{
- if (!ext)
- return NULL;
-
- LASSERT(ext->oe_intree);
- return rb_extent(rb_next(&ext->oe_node));
-}
-
-static inline struct osc_extent *prev_extent(struct osc_extent *ext)
-{
- if (!ext)
- return NULL;
-
- LASSERT(ext->oe_intree);
- return rb_extent(rb_prev(&ext->oe_node));
-}
-
-static inline struct osc_extent *first_extent(struct osc_object *obj)
-{
- return rb_extent(rb_first(&obj->oo_root));
-}
-
-/* object must be locked by caller. */
-static int osc_extent_sanity_check0(struct osc_extent *ext,
- const char *func, const int line)
-{
- struct osc_object *obj = ext->oe_obj;
- struct osc_async_page *oap;
- size_t page_count;
- int rc = 0;
-
- if (!osc_object_is_locked(obj)) {
- rc = 9;
- goto out;
- }
-
- if (ext->oe_state >= OES_STATE_MAX) {
- rc = 10;
- goto out;
- }
-
- if (atomic_read(&ext->oe_refc) <= 0) {
- rc = 20;
- goto out;
- }
-
- if (atomic_read(&ext->oe_refc) < atomic_read(&ext->oe_users)) {
- rc = 30;
- goto out;
- }
-
- switch (ext->oe_state) {
- case OES_INV:
- if (ext->oe_nr_pages > 0 || !list_empty(&ext->oe_pages))
- rc = 35;
- else
- rc = 0;
- goto out;
- case OES_ACTIVE:
- if (atomic_read(&ext->oe_users) == 0) {
- rc = 40;
- goto out;
- }
- if (ext->oe_hp) {
- rc = 50;
- goto out;
- }
- if (ext->oe_fsync_wait && !ext->oe_urgent) {
- rc = 55;
- goto out;
- }
- break;
- case OES_CACHE:
- if (ext->oe_grants == 0) {
- rc = 60;
- goto out;
- }
- if (ext->oe_fsync_wait && !ext->oe_urgent && !ext->oe_hp) {
- rc = 65;
- goto out;
- }
- /* fall through */
- default:
- if (atomic_read(&ext->oe_users) > 0) {
- rc = 70;
- goto out;
- }
- }
-
- if (ext->oe_max_end < ext->oe_end || ext->oe_end < ext->oe_start) {
- rc = 80;
- goto out;
- }
-
- if (ext->oe_sync && ext->oe_grants > 0) {
- rc = 90;
- goto out;
- }
-
- if (ext->oe_dlmlock && !ldlm_is_failed(ext->oe_dlmlock)) {
- struct ldlm_extent *extent;
-
- extent = &ext->oe_dlmlock->l_policy_data.l_extent;
- if (!(extent->start <= cl_offset(osc2cl(obj), ext->oe_start) &&
- extent->end >= cl_offset(osc2cl(obj), ext->oe_max_end))) {
- rc = 100;
- goto out;
- }
-
- if (!(ext->oe_dlmlock->l_granted_mode & (LCK_PW | LCK_GROUP))) {
- rc = 102;
- goto out;
- }
- }
-
- if (ext->oe_nr_pages > ext->oe_mppr) {
- rc = 105;
- goto out;
- }
-
- /* Do not verify page list if extent is in RPC. This is because an
- * in-RPC extent is supposed to be exclusively accessible w/o lock.
- */
- if (ext->oe_state > OES_CACHE) {
- rc = 0;
- goto out;
- }
-
- if (!extent_debug) {
- rc = 0;
- goto out;
- }
-
- page_count = 0;
- list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
- pgoff_t index = osc_index(oap2osc(oap));
- ++page_count;
- if (index > ext->oe_end || index < ext->oe_start) {
- rc = 110;
- goto out;
- }
- }
- if (page_count != ext->oe_nr_pages) {
- rc = 120;
- goto out;
- }
-
-out:
- if (rc != 0)
- OSC_EXTENT_DUMP(D_ERROR, ext,
- "%s:%d sanity check %p failed with rc = %d\n",
- func, line, ext, rc);
- return rc;
-}
-
-#define sanity_check_nolock(ext) \
- osc_extent_sanity_check0(ext, __func__, __LINE__)
-
-#define sanity_check(ext) ({ \
- int __res; \
- osc_object_lock((ext)->oe_obj); \
- __res = sanity_check_nolock(ext); \
- osc_object_unlock((ext)->oe_obj); \
- __res; \
-})
-
-/**
- * sanity check - to make sure there is no overlapped extent in the tree.
- */
-static int osc_extent_is_overlapped(struct osc_object *obj,
- struct osc_extent *ext)
-{
- struct osc_extent *tmp;
-
- LASSERT(osc_object_is_locked(obj));
-
- if (!extent_debug)
- return 0;
-
- for (tmp = first_extent(obj); tmp; tmp = next_extent(tmp)) {
- if (tmp == ext)
- continue;
- if (tmp->oe_end >= ext->oe_start &&
- tmp->oe_start <= ext->oe_end)
- return 1;
- }
- return 0;
-}
-
-static void osc_extent_state_set(struct osc_extent *ext, int state)
-{
- LASSERT(osc_object_is_locked(ext->oe_obj));
- LASSERT(state >= OES_INV && state < OES_STATE_MAX);
-
- /* Never try to sanity check a state changing extent :-) */
- /* LASSERT(sanity_check_nolock(ext) == 0); */
-
- /* TODO: validate the state machine */
- ext->oe_state = state;
- wake_up_all(&ext->oe_waitq);
-}
-
-static struct osc_extent *osc_extent_alloc(struct osc_object *obj)
-{
- struct osc_extent *ext;
-
- ext = kmem_cache_zalloc(osc_extent_kmem, GFP_NOFS);
- if (!ext)
- return NULL;
-
- RB_CLEAR_NODE(&ext->oe_node);
- ext->oe_obj = obj;
- cl_object_get(osc2cl(obj));
- atomic_set(&ext->oe_refc, 1);
- atomic_set(&ext->oe_users, 0);
- INIT_LIST_HEAD(&ext->oe_link);
- ext->oe_state = OES_INV;
- INIT_LIST_HEAD(&ext->oe_pages);
- init_waitqueue_head(&ext->oe_waitq);
- ext->oe_dlmlock = NULL;
-
- return ext;
-}
-
-static void osc_extent_free(struct osc_extent *ext)
-{
- kmem_cache_free(osc_extent_kmem, ext);
-}
-
-static struct osc_extent *osc_extent_get(struct osc_extent *ext)
-{
- LASSERT(atomic_read(&ext->oe_refc) >= 0);
- atomic_inc(&ext->oe_refc);
- return ext;
-}
-
-static void osc_extent_put(const struct lu_env *env, struct osc_extent *ext)
-{
- LASSERT(atomic_read(&ext->oe_refc) > 0);
- if (atomic_dec_and_test(&ext->oe_refc)) {
- LASSERT(list_empty(&ext->oe_link));
- LASSERT(atomic_read(&ext->oe_users) == 0);
- LASSERT(ext->oe_state == OES_INV);
- LASSERT(!ext->oe_intree);
-
- if (ext->oe_dlmlock) {
- lu_ref_add(&ext->oe_dlmlock->l_reference,
- "osc_extent", ext);
- LDLM_LOCK_PUT(ext->oe_dlmlock);
- ext->oe_dlmlock = NULL;
- }
- cl_object_put(env, osc2cl(ext->oe_obj));
- osc_extent_free(ext);
- }
-}
-
-/**
- * osc_extent_put_trust() is a special version of osc_extent_put() when
- * it's known that the caller is not the last user. This is to address the
- * problem of lacking of lu_env ;-).
- */
-static void osc_extent_put_trust(struct osc_extent *ext)
-{
- LASSERT(atomic_read(&ext->oe_refc) > 1);
- LASSERT(osc_object_is_locked(ext->oe_obj));
- atomic_dec(&ext->oe_refc);
-}
-
-/**
- * Return the extent which includes pgoff @index, or return the greatest
- * previous extent in the tree.
- */
-static struct osc_extent *osc_extent_search(struct osc_object *obj,
- pgoff_t index)
-{
- struct rb_node *n = obj->oo_root.rb_node;
- struct osc_extent *tmp, *p = NULL;
-
- LASSERT(osc_object_is_locked(obj));
- while (n) {
- tmp = rb_extent(n);
- if (index < tmp->oe_start) {
- n = n->rb_left;
- } else if (index > tmp->oe_end) {
- p = rb_extent(n);
- n = n->rb_right;
- } else {
- return tmp;
- }
- }
- return p;
-}
-
-/*
- * Return the extent covering @index, otherwise return NULL.
- * caller must have held object lock.
- */
-static struct osc_extent *osc_extent_lookup(struct osc_object *obj,
- pgoff_t index)
-{
- struct osc_extent *ext;
-
- ext = osc_extent_search(obj, index);
- if (ext && ext->oe_start <= index && index <= ext->oe_end)
- return osc_extent_get(ext);
- return NULL;
-}
-
-/* caller must have held object lock. */
-static void osc_extent_insert(struct osc_object *obj, struct osc_extent *ext)
-{
- struct rb_node **n = &obj->oo_root.rb_node;
- struct rb_node *parent = NULL;
- struct osc_extent *tmp;
-
- LASSERT(ext->oe_intree == 0);
- LASSERT(ext->oe_obj == obj);
- LASSERT(osc_object_is_locked(obj));
- while (*n) {
- tmp = rb_extent(*n);
- parent = *n;
-
- if (ext->oe_end < tmp->oe_start)
- n = &(*n)->rb_left;
- else if (ext->oe_start > tmp->oe_end)
- n = &(*n)->rb_right;
- else
- EASSERTF(0, tmp, EXTSTR "\n", EXTPARA(ext));
- }
- rb_link_node(&ext->oe_node, parent, n);
- rb_insert_color(&ext->oe_node, &obj->oo_root);
- osc_extent_get(ext);
- ext->oe_intree = 1;
-}
-
-/* caller must have held object lock. */
-static void osc_extent_erase(struct osc_extent *ext)
-{
- struct osc_object *obj = ext->oe_obj;
-
- LASSERT(osc_object_is_locked(obj));
- if (ext->oe_intree) {
- rb_erase(&ext->oe_node, &obj->oo_root);
- ext->oe_intree = 0;
- /* rbtree held a refcount */
- osc_extent_put_trust(ext);
- }
-}
-
-static struct osc_extent *osc_extent_hold(struct osc_extent *ext)
-{
- struct osc_object *obj = ext->oe_obj;
-
- LASSERT(osc_object_is_locked(obj));
- LASSERT(ext->oe_state == OES_ACTIVE || ext->oe_state == OES_CACHE);
- if (ext->oe_state == OES_CACHE) {
- osc_extent_state_set(ext, OES_ACTIVE);
- osc_update_pending(obj, OBD_BRW_WRITE, -ext->oe_nr_pages);
- }
- atomic_inc(&ext->oe_users);
- list_del_init(&ext->oe_link);
- return osc_extent_get(ext);
-}
-
-static void __osc_extent_remove(struct osc_extent *ext)
-{
- LASSERT(osc_object_is_locked(ext->oe_obj));
- LASSERT(list_empty(&ext->oe_pages));
- osc_extent_erase(ext);
- list_del_init(&ext->oe_link);
- osc_extent_state_set(ext, OES_INV);
- OSC_EXTENT_DUMP(D_CACHE, ext, "destroyed.\n");
-}
-
-static void osc_extent_remove(struct osc_extent *ext)
-{
- struct osc_object *obj = ext->oe_obj;
-
- osc_object_lock(obj);
- __osc_extent_remove(ext);
- osc_object_unlock(obj);
-}
-
-/**
- * This function is used to merge extents to get better performance. It checks
- * if @cur and @victim are contiguous at chunk level.
- */
-static int osc_extent_merge(const struct lu_env *env, struct osc_extent *cur,
- struct osc_extent *victim)
-{
- struct osc_object *obj = cur->oe_obj;
- pgoff_t chunk_start;
- pgoff_t chunk_end;
- int ppc_bits;
-
- LASSERT(cur->oe_state == OES_CACHE);
- LASSERT(osc_object_is_locked(obj));
- if (!victim)
- return -EINVAL;
-
- if (victim->oe_state != OES_CACHE || victim->oe_fsync_wait)
- return -EBUSY;
-
- if (cur->oe_max_end != victim->oe_max_end)
- return -ERANGE;
-
- LASSERT(cur->oe_dlmlock == victim->oe_dlmlock);
- ppc_bits = osc_cli(obj)->cl_chunkbits - PAGE_SHIFT;
- chunk_start = cur->oe_start >> ppc_bits;
- chunk_end = cur->oe_end >> ppc_bits;
- if (chunk_start != (victim->oe_end >> ppc_bits) + 1 &&
- chunk_end + 1 != victim->oe_start >> ppc_bits)
- return -ERANGE;
-
- OSC_EXTENT_DUMP(D_CACHE, victim, "will be merged by %p.\n", cur);
-
- cur->oe_start = min(cur->oe_start, victim->oe_start);
- cur->oe_end = max(cur->oe_end, victim->oe_end);
- cur->oe_grants += victim->oe_grants;
- cur->oe_nr_pages += victim->oe_nr_pages;
- /* only the following bits are needed to merge */
- cur->oe_urgent |= victim->oe_urgent;
- cur->oe_memalloc |= victim->oe_memalloc;
- list_splice_init(&victim->oe_pages, &cur->oe_pages);
- list_del_init(&victim->oe_link);
- victim->oe_nr_pages = 0;
-
- osc_extent_get(victim);
- __osc_extent_remove(victim);
- osc_extent_put(env, victim);
-
- OSC_EXTENT_DUMP(D_CACHE, cur, "after merging %p.\n", victim);
- return 0;
-}
-
-/**
- * Drop user count of osc_extent, and unplug IO asynchronously.
- */
-void osc_extent_release(const struct lu_env *env, struct osc_extent *ext)
-{
- struct osc_object *obj = ext->oe_obj;
-
- LASSERT(atomic_read(&ext->oe_users) > 0);
- LASSERT(sanity_check(ext) == 0);
- LASSERT(ext->oe_grants > 0);
-
- if (atomic_dec_and_lock(&ext->oe_users, &obj->oo_lock)) {
- LASSERT(ext->oe_state == OES_ACTIVE);
- if (ext->oe_trunc_pending) {
- /* a truncate process is waiting for this extent.
- * This may happen due to a race, check
- * osc_cache_truncate_start().
- */
- osc_extent_state_set(ext, OES_TRUNC);
- ext->oe_trunc_pending = 0;
- } else {
- osc_extent_state_set(ext, OES_CACHE);
- osc_update_pending(obj, OBD_BRW_WRITE,
- ext->oe_nr_pages);
-
- /* try to merge the previous and next extent. */
- osc_extent_merge(env, ext, prev_extent(ext));
- osc_extent_merge(env, ext, next_extent(ext));
-
- if (ext->oe_urgent)
- list_move_tail(&ext->oe_link,
- &obj->oo_urgent_exts);
- }
- osc_object_unlock(obj);
-
- osc_io_unplug_async(env, osc_cli(obj), obj);
- }
- osc_extent_put(env, ext);
-}
-
-static inline int overlapped(struct osc_extent *ex1, struct osc_extent *ex2)
-{
- return !(ex1->oe_end < ex2->oe_start || ex2->oe_end < ex1->oe_start);
-}
-
-/**
- * Find or create an extent which includes @index, core function to manage
- * extent tree.
- */
-static struct osc_extent *osc_extent_find(const struct lu_env *env,
- struct osc_object *obj, pgoff_t index,
- unsigned int *grants)
-{
- struct client_obd *cli = osc_cli(obj);
- struct osc_lock *olck;
- struct cl_lock_descr *descr;
- struct osc_extent *cur;
- struct osc_extent *ext;
- struct osc_extent *conflict = NULL;
- struct osc_extent *found = NULL;
- pgoff_t chunk;
- pgoff_t max_end;
- unsigned int max_pages; /* max_pages_per_rpc */
- unsigned int chunksize;
- int ppc_bits; /* pages per chunk bits */
- pgoff_t chunk_mask;
- int rc;
-
- cur = osc_extent_alloc(obj);
- if (!cur)
- return ERR_PTR(-ENOMEM);
-
- olck = osc_env_io(env)->oi_write_osclock;
- LASSERTF(olck, "page %lu is not covered by lock\n", index);
- LASSERT(olck->ols_state == OLS_GRANTED);
-
- descr = &olck->ols_cl.cls_lock->cll_descr;
- LASSERT(descr->cld_mode >= CLM_WRITE);
-
- LASSERT(cli->cl_chunkbits >= PAGE_SHIFT);
- ppc_bits = cli->cl_chunkbits - PAGE_SHIFT;
- chunk_mask = ~((1 << ppc_bits) - 1);
- chunksize = 1 << cli->cl_chunkbits;
- chunk = index >> ppc_bits;
-
- /* align end to rpc edge, rpc size may not be a power 2 integer. */
- max_pages = cli->cl_max_pages_per_rpc;
- LASSERT((max_pages & ~chunk_mask) == 0);
- max_end = index - (index % max_pages) + max_pages - 1;
- max_end = min_t(pgoff_t, max_end, descr->cld_end);
-
- /* initialize new extent by parameters so far */
- cur->oe_max_end = max_end;
- cur->oe_start = index & chunk_mask;
- cur->oe_end = ((index + ~chunk_mask + 1) & chunk_mask) - 1;
- if (cur->oe_start < descr->cld_start)
- cur->oe_start = descr->cld_start;
- if (cur->oe_end > max_end)
- cur->oe_end = max_end;
- cur->oe_grants = 0;
- cur->oe_mppr = max_pages;
- if (olck->ols_dlmlock) {
- LASSERT(olck->ols_hold);
- cur->oe_dlmlock = LDLM_LOCK_GET(olck->ols_dlmlock);
- lu_ref_add(&olck->ols_dlmlock->l_reference, "osc_extent", cur);
- }
-
- /* grants has been allocated by caller */
- LASSERTF(*grants >= chunksize + cli->cl_extent_tax,
- "%u/%u/%u.\n", *grants, chunksize, cli->cl_extent_tax);
- LASSERTF((max_end - cur->oe_start) < max_pages, EXTSTR "\n",
- EXTPARA(cur));
-
-restart:
- osc_object_lock(obj);
- ext = osc_extent_search(obj, cur->oe_start);
- if (!ext)
- ext = first_extent(obj);
- while (ext) {
- pgoff_t ext_chk_start = ext->oe_start >> ppc_bits;
- pgoff_t ext_chk_end = ext->oe_end >> ppc_bits;
-
- LASSERT(sanity_check_nolock(ext) == 0);
- if (chunk > ext_chk_end + 1)
- break;
-
- /* if covering by different locks, no chance to match */
- if (olck->ols_dlmlock != ext->oe_dlmlock) {
- EASSERTF(!overlapped(ext, cur), ext,
- EXTSTR "\n", EXTPARA(cur));
-
- ext = next_extent(ext);
- continue;
- }
-
- /* discontiguous chunks? */
- if (chunk + 1 < ext_chk_start) {
- ext = next_extent(ext);
- continue;
- }
-
- /* ok, from now on, ext and cur have these attrs:
- * 1. covered by the same lock
- * 2. contiguous at chunk level or overlapping.
- */
-
- if (overlapped(ext, cur)) {
- /* cur is the minimum unit, so overlapping means
- * full contain.
- */
- EASSERTF((ext->oe_start <= cur->oe_start &&
- ext->oe_end >= cur->oe_end),
- ext, EXTSTR "\n", EXTPARA(cur));
-
- if (ext->oe_state > OES_CACHE || ext->oe_fsync_wait) {
- /* for simplicity, we wait for this extent to
- * finish before going forward.
- */
- conflict = osc_extent_get(ext);
- break;
- }
-
- found = osc_extent_hold(ext);
- break;
- }
-
- /* non-overlapped extent */
- if (ext->oe_state != OES_CACHE || ext->oe_fsync_wait) {
- /* we can't do anything for a non OES_CACHE extent, or
- * if there is someone waiting for this extent to be
- * flushed, try next one.
- */
- ext = next_extent(ext);
- continue;
- }
-
- /* check if they belong to the same rpc slot before trying to
- * merge. the extents are not overlapped and contiguous at
- * chunk level to get here.
- */
- if (ext->oe_max_end != max_end) {
- /* if they don't belong to the same RPC slot or
- * max_pages_per_rpc has ever changed, do not merge.
- */
- ext = next_extent(ext);
- continue;
- }
-
- /* it's required that an extent must be contiguous at chunk
- * level so that we know the whole extent is covered by grant
- * (the pages in the extent are NOT required to be contiguous).
- * Otherwise, it will be too much difficult to know which
- * chunks have grants allocated.
- */
-
- /* try to do front merge - extend ext's start */
- if (chunk + 1 == ext_chk_start) {
- /* ext must be chunk size aligned */
- EASSERT((ext->oe_start & ~chunk_mask) == 0, ext);
-
- /* pull ext's start back to cover cur */
- ext->oe_start = cur->oe_start;
- ext->oe_grants += chunksize;
- LASSERT(*grants >= chunksize);
- *grants -= chunksize;
-
- found = osc_extent_hold(ext);
- } else if (chunk == ext_chk_end + 1) {
- /* rear merge */
- ext->oe_end = cur->oe_end;
- ext->oe_grants += chunksize;
- LASSERT(*grants >= chunksize);
- *grants -= chunksize;
-
- /* try to merge with the next one because we just fill
- * in a gap
- */
- if (osc_extent_merge(env, ext, next_extent(ext)) == 0)
- /* we can save extent tax from next extent */
- *grants += cli->cl_extent_tax;
-
- found = osc_extent_hold(ext);
- }
- if (found)
- break;
-
- ext = next_extent(ext);
- }
-
- osc_extent_tree_dump(D_CACHE, obj);
- if (found) {
- LASSERT(!conflict);
- if (!IS_ERR(found)) {
- LASSERT(found->oe_dlmlock == cur->oe_dlmlock);
- OSC_EXTENT_DUMP(D_CACHE, found,
- "found caching ext for %lu.\n", index);
- }
- } else if (!conflict) {
- /* create a new extent */
- EASSERT(osc_extent_is_overlapped(obj, cur) == 0, cur);
- cur->oe_grants = chunksize + cli->cl_extent_tax;
- LASSERT(*grants >= cur->oe_grants);
- *grants -= cur->oe_grants;
-
- cur->oe_state = OES_CACHE;
- found = osc_extent_hold(cur);
- osc_extent_insert(obj, cur);
- OSC_EXTENT_DUMP(D_CACHE, cur, "add into tree %lu/%lu.\n",
- index, descr->cld_end);
- }
- osc_object_unlock(obj);
-
- if (conflict) {
- LASSERT(!found);
-
- /* waiting for IO to finish. Please notice that it's impossible
- * to be an OES_TRUNC extent.
- */
- rc = osc_extent_wait(env, conflict, OES_INV);
- osc_extent_put(env, conflict);
- conflict = NULL;
- if (rc < 0) {
- found = ERR_PTR(rc);
- goto out;
- }
-
- goto restart;
- }
-
-out:
- osc_extent_put(env, cur);
- return found;
-}
-
-/**
- * Called when IO is finished to an extent.
- */
-int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
- int sent, int rc)
-{
- struct client_obd *cli = osc_cli(ext->oe_obj);
- struct osc_async_page *oap;
- struct osc_async_page *tmp;
- int nr_pages = ext->oe_nr_pages;
- int lost_grant = 0;
- int blocksize = cli->cl_import->imp_obd->obd_osfs.os_bsize ? : 4096;
- __u64 last_off = 0;
- int last_count = -1;
-
- OSC_EXTENT_DUMP(D_CACHE, ext, "extent finished.\n");
-
- ext->oe_rc = rc ?: ext->oe_nr_pages;
- EASSERT(ergo(rc == 0, ext->oe_state == OES_RPC), ext);
-
- osc_lru_add_batch(cli, &ext->oe_pages);
- list_for_each_entry_safe(oap, tmp, &ext->oe_pages, oap_pending_item) {
- list_del_init(&oap->oap_rpc_item);
- list_del_init(&oap->oap_pending_item);
- if (last_off <= oap->oap_obj_off) {
- last_off = oap->oap_obj_off;
- last_count = oap->oap_count;
- }
-
- --ext->oe_nr_pages;
- osc_ap_completion(env, cli, oap, sent, rc);
- }
- EASSERT(ext->oe_nr_pages == 0, ext);
-
- if (!sent) {
- lost_grant = ext->oe_grants;
- } else if (blocksize < PAGE_SIZE &&
- last_count != PAGE_SIZE) {
- /* For short writes we shouldn't count parts of pages that
- * span a whole chunk on the OST side, or our accounting goes
- * wrong. Should match the code in filter_grant_check.
- */
- int offset = last_off & ~PAGE_MASK;
- int count = last_count + (offset & (blocksize - 1));
- int end = (offset + last_count) & (blocksize - 1);
-
- if (end)
- count += blocksize - end;
-
- lost_grant = PAGE_SIZE - count;
- }
- if (ext->oe_grants > 0)
- osc_free_grant(cli, nr_pages, lost_grant);
-
- osc_extent_remove(ext);
- /* put the refcount for RPC */
- osc_extent_put(env, ext);
- return 0;
-}
-
-static int extent_wait_cb(struct osc_extent *ext, enum osc_extent_state state)
-{
- int ret;
-
- osc_object_lock(ext->oe_obj);
- ret = ext->oe_state == state;
- osc_object_unlock(ext->oe_obj);
-
- return ret;
-}
-
-/**
- * Wait for the extent's state to become @state.
- */
-static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext,
- enum osc_extent_state state)
-{
- struct osc_object *obj = ext->oe_obj;
- int rc = 0;
-
- osc_object_lock(obj);
- LASSERT(sanity_check_nolock(ext) == 0);
- /* `Kick' this extent only if the caller is waiting for it to be
- * written out.
- */
- if (state == OES_INV && !ext->oe_urgent && !ext->oe_hp &&
- !ext->oe_trunc_pending) {
- if (ext->oe_state == OES_ACTIVE) {
- ext->oe_urgent = 1;
- } else if (ext->oe_state == OES_CACHE) {
- ext->oe_urgent = 1;
- osc_extent_hold(ext);
- rc = 1;
- }
- }
- osc_object_unlock(obj);
- if (rc == 1)
- osc_extent_release(env, ext);
-
- /* wait for the extent until its state becomes @state */
- rc = wait_event_idle_timeout(ext->oe_waitq,
- extent_wait_cb(ext, state), 600 * HZ);
- if (rc == 0) {
- OSC_EXTENT_DUMP(D_ERROR, ext,
- "%s: wait ext to %u timedout, recovery in progress?\n",
- cli_name(osc_cli(obj)), state);
-
- wait_event_idle(ext->oe_waitq, extent_wait_cb(ext, state));
- }
- if (ext->oe_rc < 0)
- rc = ext->oe_rc;
- else
- rc = 0;
- return rc;
-}
-
-/**
- * Discard pages with index greater than @size. If @ext is overlapped with
- * @size, then partial truncate happens.
- */
-static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index,
- bool partial)
-{
- struct lu_env *env;
- struct cl_io *io;
- struct osc_object *obj = ext->oe_obj;
- struct client_obd *cli = osc_cli(obj);
- struct osc_async_page *oap;
- struct osc_async_page *tmp;
- int pages_in_chunk = 0;
- int ppc_bits = cli->cl_chunkbits - PAGE_SHIFT;
- __u64 trunc_chunk = trunc_index >> ppc_bits;
- int grants = 0;
- int nr_pages = 0;
- int rc = 0;
- u16 refcheck;
-
- LASSERT(sanity_check(ext) == 0);
- EASSERT(ext->oe_state == OES_TRUNC, ext);
- EASSERT(!ext->oe_urgent, ext);
-
- /* Request new lu_env.
- * We can't use that env from osc_cache_truncate_start() because
- * it's from lov_io_sub and not fully initialized.
- */
- env = cl_env_get(&refcheck);
- io = &osc_env_info(env)->oti_io;
- io->ci_obj = cl_object_top(osc2cl(obj));
- io->ci_ignore_layout = 1;
- rc = cl_io_init(env, io, CIT_MISC, io->ci_obj);
- if (rc < 0)
- goto out;
-
- /* discard all pages with index greater then trunc_index */
- list_for_each_entry_safe(oap, tmp, &ext->oe_pages, oap_pending_item) {
- pgoff_t index = osc_index(oap2osc(oap));
- struct cl_page *page = oap2cl_page(oap);
-
- LASSERT(list_empty(&oap->oap_rpc_item));
-
- /* only discard the pages with their index greater than
- * trunc_index, and ...
- */
- if (index < trunc_index ||
- (index == trunc_index && partial)) {
- /* accounting how many pages remaining in the chunk
- * so that we can calculate grants correctly. */
- if (index >> ppc_bits == trunc_chunk)
- ++pages_in_chunk;
- continue;
- }
-
- list_del_init(&oap->oap_pending_item);
-
- cl_page_get(page);
- lu_ref_add(&page->cp_reference, "truncate", current);
-
- if (cl_page_own(env, io, page) == 0) {
- cl_page_discard(env, io, page);
- cl_page_disown(env, io, page);
- } else {
- LASSERT(page->cp_state == CPS_FREEING);
- LASSERT(0);
- }
-
- lu_ref_del(&page->cp_reference, "truncate", current);
- cl_page_put(env, page);
-
- --ext->oe_nr_pages;
- ++nr_pages;
- }
- EASSERTF(ergo(ext->oe_start >= trunc_index + !!partial,
- ext->oe_nr_pages == 0),
- ext, "trunc_index %lu, partial %d\n", trunc_index, partial);
-
- osc_object_lock(obj);
- if (ext->oe_nr_pages == 0) {
- LASSERT(pages_in_chunk == 0);
- grants = ext->oe_grants;
- ext->oe_grants = 0;
- } else { /* calculate how many grants we can free */
- int chunks = (ext->oe_end >> ppc_bits) - trunc_chunk;
- pgoff_t last_index;
-
- /* if there is no pages in this chunk, we can also free grants
- * for the last chunk
- */
- if (pages_in_chunk == 0) {
- /* if this is the 1st chunk and no pages in this chunk,
- * ext->oe_nr_pages must be zero, so we should be in
- * the other if-clause.
- */
- LASSERT(trunc_chunk > 0);
- --trunc_chunk;
- ++chunks;
- }
-
- /* this is what we can free from this extent */
- grants = chunks << cli->cl_chunkbits;
- ext->oe_grants -= grants;
- last_index = ((trunc_chunk + 1) << ppc_bits) - 1;
- ext->oe_end = min(last_index, ext->oe_max_end);
- LASSERT(ext->oe_end >= ext->oe_start);
- LASSERT(ext->oe_grants > 0);
- }
- osc_object_unlock(obj);
-
- if (grants > 0 || nr_pages > 0)
- osc_free_grant(cli, nr_pages, grants);
-
-out:
- cl_io_fini(env, io);
- cl_env_put(env, &refcheck);
- return rc;
-}
-
-/**
- * This function is used to make the extent prepared for transfer.
- * A race with flushing page - ll_writepage() has to be handled cautiously.
- */
-static int osc_extent_make_ready(const struct lu_env *env,
- struct osc_extent *ext)
-{
- struct osc_async_page *oap;
- struct osc_async_page *last = NULL;
- struct osc_object *obj = ext->oe_obj;
- unsigned int page_count = 0;
- int rc;
-
- /* we're going to grab page lock, so object lock must not be taken. */
- LASSERT(sanity_check(ext) == 0);
- /* in locking state, any process should not touch this extent. */
- EASSERT(ext->oe_state == OES_LOCKING, ext);
- EASSERT(ext->oe_owner, ext);
-
- OSC_EXTENT_DUMP(D_CACHE, ext, "make ready\n");
-
- list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
- ++page_count;
- if (!last || last->oap_obj_off < oap->oap_obj_off)
- last = oap;
-
- /* checking ASYNC_READY is race safe */
- if ((oap->oap_async_flags & ASYNC_READY) != 0)
- continue;
-
- rc = osc_make_ready(env, oap, OBD_BRW_WRITE);
- switch (rc) {
- case 0:
- spin_lock(&oap->oap_lock);
- oap->oap_async_flags |= ASYNC_READY;
- spin_unlock(&oap->oap_lock);
- break;
- case -EALREADY:
- LASSERT((oap->oap_async_flags & ASYNC_READY) != 0);
- break;
- default:
- LASSERTF(0, "unknown return code: %d\n", rc);
- }
- }
-
- LASSERT(page_count == ext->oe_nr_pages);
- LASSERT(last);
- /* the last page is the only one we need to refresh its count by
- * the size of file.
- */
- if (!(last->oap_async_flags & ASYNC_COUNT_STABLE)) {
- int last_oap_count = osc_refresh_count(env, last, OBD_BRW_WRITE);
-
- LASSERT(last_oap_count > 0);
- LASSERT(last->oap_page_off + last_oap_count <= PAGE_SIZE);
- last->oap_count = last_oap_count;
- spin_lock(&last->oap_lock);
- last->oap_async_flags |= ASYNC_COUNT_STABLE;
- spin_unlock(&last->oap_lock);
- }
-
- /* for the rest of pages, we don't need to call osf_refresh_count()
- * because it's known they are not the last page
- */
- list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
- if (!(oap->oap_async_flags & ASYNC_COUNT_STABLE)) {
- oap->oap_count = PAGE_SIZE - oap->oap_page_off;
- spin_lock(&last->oap_lock);
- oap->oap_async_flags |= ASYNC_COUNT_STABLE;
- spin_unlock(&last->oap_lock);
- }
- }
-
- osc_object_lock(obj);
- osc_extent_state_set(ext, OES_RPC);
- osc_object_unlock(obj);
- /* get a refcount for RPC. */
- osc_extent_get(ext);
-
- return 0;
-}
-
-/**
- * Quick and simple version of osc_extent_find(). This function is frequently
- * called to expand the extent for the same IO. To expand the extent, the
- * page index must be in the same or next chunk of ext->oe_end.
- */
-static int osc_extent_expand(struct osc_extent *ext, pgoff_t index,
- unsigned int *grants)
-{
- struct osc_object *obj = ext->oe_obj;
- struct client_obd *cli = osc_cli(obj);
- struct osc_extent *next;
- int ppc_bits = cli->cl_chunkbits - PAGE_SHIFT;
- pgoff_t chunk = index >> ppc_bits;
- pgoff_t end_chunk;
- pgoff_t end_index;
- unsigned int chunksize = 1 << cli->cl_chunkbits;
- int rc = 0;
-
- LASSERT(ext->oe_max_end >= index && ext->oe_start <= index);
- osc_object_lock(obj);
- LASSERT(sanity_check_nolock(ext) == 0);
- end_chunk = ext->oe_end >> ppc_bits;
- if (chunk > end_chunk + 1) {
- rc = -ERANGE;
- goto out;
- }
-
- if (end_chunk >= chunk) {
- rc = 0;
- goto out;
- }
-
- LASSERT(end_chunk + 1 == chunk);
- /* try to expand this extent to cover @index */
- end_index = min(ext->oe_max_end, ((chunk + 1) << ppc_bits) - 1);
-
- next = next_extent(ext);
- if (next && next->oe_start <= end_index) {
- /* complex mode - overlapped with the next extent,
- * this case will be handled by osc_extent_find()
- */
- rc = -EAGAIN;
- goto out;
- }
-
- ext->oe_end = end_index;
- ext->oe_grants += chunksize;
- LASSERT(*grants >= chunksize);
- *grants -= chunksize;
- EASSERTF(osc_extent_is_overlapped(obj, ext) == 0, ext,
- "overlapped after expanding for %lu.\n", index);
-
-out:
- osc_object_unlock(obj);
- return rc;
-}
-
-static void osc_extent_tree_dump0(int level, struct osc_object *obj,
- const char *func, int line)
-{
- struct osc_extent *ext;
- int cnt;
-
- CDEBUG(level, "Dump object %p extents at %s:%d, mppr: %u.\n",
- obj, func, line, osc_cli(obj)->cl_max_pages_per_rpc);
-
- /* osc_object_lock(obj); */
- cnt = 1;
- for (ext = first_extent(obj); ext; ext = next_extent(ext))
- OSC_EXTENT_DUMP(level, ext, "in tree %d.\n", cnt++);
-
- cnt = 1;
- list_for_each_entry(ext, &obj->oo_hp_exts, oe_link)
- OSC_EXTENT_DUMP(level, ext, "hp %d.\n", cnt++);
-
- cnt = 1;
- list_for_each_entry(ext, &obj->oo_urgent_exts, oe_link)
- OSC_EXTENT_DUMP(level, ext, "urgent %d.\n", cnt++);
-
- cnt = 1;
- list_for_each_entry(ext, &obj->oo_reading_exts, oe_link)
- OSC_EXTENT_DUMP(level, ext, "reading %d.\n", cnt++);
- /* osc_object_unlock(obj); */
-}
-
-/* ------------------ osc extent end ------------------ */
-
-static inline int osc_is_ready(struct osc_object *osc)
-{
- return !list_empty(&osc->oo_ready_item) ||
- !list_empty(&osc->oo_hp_ready_item);
-}
-
-#define OSC_IO_DEBUG(OSC, STR, args...) \
- CDEBUG(D_CACHE, "obj %p ready %d|%c|%c wr %d|%c|%c rd %d|%c " STR, \
- (OSC), osc_is_ready(OSC), \
- list_empty_marker(&(OSC)->oo_hp_ready_item), \
- list_empty_marker(&(OSC)->oo_ready_item), \
- atomic_read(&(OSC)->oo_nr_writes), \
- list_empty_marker(&(OSC)->oo_hp_exts), \
- list_empty_marker(&(OSC)->oo_urgent_exts), \
- atomic_read(&(OSC)->oo_nr_reads), \
- list_empty_marker(&(OSC)->oo_reading_exts), \
- ##args)
-
-static int osc_make_ready(const struct lu_env *env, struct osc_async_page *oap,
- int cmd)
-{
- struct osc_page *opg = oap2osc_page(oap);
- struct cl_page *page = oap2cl_page(oap);
- int result;
-
- LASSERT(cmd == OBD_BRW_WRITE); /* no cached reads */
-
- result = cl_page_make_ready(env, page, CRT_WRITE);
- if (result == 0)
- opg->ops_submit_time = jiffies;
- return result;
-}
-
-static int osc_refresh_count(const struct lu_env *env,
- struct osc_async_page *oap, int cmd)
-{
- struct osc_page *opg = oap2osc_page(oap);
- pgoff_t index = osc_index(oap2osc(oap));
- struct cl_object *obj;
- struct cl_attr *attr = &osc_env_info(env)->oti_attr;
-
- int result;
- loff_t kms;
-
- /* readpage queues with _COUNT_STABLE, shouldn't get here. */
- LASSERT(!(cmd & OBD_BRW_READ));
- obj = opg->ops_cl.cpl_obj;
-
- cl_object_attr_lock(obj);
- result = cl_object_attr_get(env, obj, attr);
- cl_object_attr_unlock(obj);
- if (result < 0)
- return result;
- kms = attr->cat_kms;
- if (cl_offset(obj, index) >= kms)
- /* catch race with truncate */
- return 0;
- else if (cl_offset(obj, index + 1) > kms)
- /* catch sub-page write at end of file */
- return kms % PAGE_SIZE;
- else
- return PAGE_SIZE;
-}
-
-static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
- int cmd, int rc)
-{
- struct osc_page *opg = oap2osc_page(oap);
- struct cl_page *page = oap2cl_page(oap);
- enum cl_req_type crt;
- int srvlock;
-
- cmd &= ~OBD_BRW_NOQUOTA;
- LASSERTF(equi(page->cp_state == CPS_PAGEIN, cmd == OBD_BRW_READ),
- "cp_state:%u, cmd:%d\n", page->cp_state, cmd);
- LASSERTF(equi(page->cp_state == CPS_PAGEOUT, cmd == OBD_BRW_WRITE),
- "cp_state:%u, cmd:%d\n", page->cp_state, cmd);
- LASSERT(opg->ops_transfer_pinned);
-
- crt = cmd == OBD_BRW_READ ? CRT_READ : CRT_WRITE;
- /* Clear opg->ops_transfer_pinned before VM lock is released. */
- opg->ops_transfer_pinned = 0;
-
- opg->ops_submit_time = 0;
- srvlock = oap->oap_brw_flags & OBD_BRW_SRVLOCK;
-
- /* statistic */
- if (rc == 0 && srvlock) {
- struct lu_device *ld = opg->ops_cl.cpl_obj->co_lu.lo_dev;
- struct osc_stats *stats = &lu2osc_dev(ld)->od_stats;
- size_t bytes = oap->oap_count;
-
- if (crt == CRT_READ)
- stats->os_lockless_reads += bytes;
- else
- stats->os_lockless_writes += bytes;
- }
-
- /*
- * This has to be the last operation with the page, as locks are
- * released in cl_page_completion() and nothing except for the
- * reference counter protects page from concurrent reclaim.
- */
- lu_ref_del(&page->cp_reference, "transfer", page);
-
- cl_page_completion(env, page, crt, rc);
- cl_page_put(env, page);
-
- return 0;
-}
-
-#define OSC_DUMP_GRANT(lvl, cli, fmt, args...) do { \
- struct client_obd *__tmp = (cli); \
- CDEBUG(lvl, "%s: grant { dirty: %lu/%lu dirty_pages: %ld/%lu " \
- "dropped: %ld avail: %ld, reserved: %ld, flight: %d }" \
- "lru {in list: %ld, left: %ld, waiters: %d }" fmt "\n", \
- cli_name(__tmp), \
- __tmp->cl_dirty_pages, __tmp->cl_dirty_max_pages, \
- atomic_long_read(&obd_dirty_pages), obd_max_dirty_pages, \
- __tmp->cl_lost_grant, __tmp->cl_avail_grant, \
- __tmp->cl_reserved_grant, __tmp->cl_w_in_flight, \
- atomic_long_read(&__tmp->cl_lru_in_list), \
- atomic_long_read(&__tmp->cl_lru_busy), \
- atomic_read(&__tmp->cl_lru_shrinkers), ##args); \
-} while (0)
-
-/* caller must hold loi_list_lock */
-static void osc_consume_write_grant(struct client_obd *cli,
- struct brw_page *pga)
-{
- assert_spin_locked(&cli->cl_loi_list_lock);
- LASSERT(!(pga->flag & OBD_BRW_FROM_GRANT));
- atomic_long_inc(&obd_dirty_pages);
- cli->cl_dirty_pages++;
- pga->flag |= OBD_BRW_FROM_GRANT;
- CDEBUG(D_CACHE, "using %lu grant credits for brw %p page %p\n",
- PAGE_SIZE, pga, pga->pg);
- osc_update_next_shrink(cli);
-}
-
-/* the companion to osc_consume_write_grant, called when a brw has completed.
- * must be called with the loi lock held.
- */
-static void osc_release_write_grant(struct client_obd *cli,
- struct brw_page *pga)
-{
- assert_spin_locked(&cli->cl_loi_list_lock);
- if (!(pga->flag & OBD_BRW_FROM_GRANT))
- return;
-
- pga->flag &= ~OBD_BRW_FROM_GRANT;
- atomic_long_dec(&obd_dirty_pages);
- cli->cl_dirty_pages--;
- if (pga->flag & OBD_BRW_NOCACHE) {
- pga->flag &= ~OBD_BRW_NOCACHE;
- atomic_long_dec(&obd_dirty_transit_pages);
- cli->cl_dirty_transit--;
- }
-}
-
-/**
- * To avoid sleeping with object lock held, it's good for us allocate enough
- * grants before entering into critical section.
- *
- * spin_lock held by caller
- */
-static int osc_reserve_grant(struct client_obd *cli, unsigned int bytes)
-{
- int rc = -EDQUOT;
-
- if (cli->cl_avail_grant >= bytes) {
- cli->cl_avail_grant -= bytes;
- cli->cl_reserved_grant += bytes;
- rc = 0;
- }
- return rc;
-}
-
-static void __osc_unreserve_grant(struct client_obd *cli,
- unsigned int reserved, unsigned int unused)
-{
- /* it's quite normal for us to get more grant than reserved.
- * Thinking about a case that two extents merged by adding a new
- * chunk, we can save one extent tax. If extent tax is greater than
- * one chunk, we can save more grant by adding a new chunk
- */
- cli->cl_reserved_grant -= reserved;
- if (unused > reserved) {
- cli->cl_avail_grant += reserved;
- cli->cl_lost_grant += unused - reserved;
- } else {
- cli->cl_avail_grant += unused;
- }
-}
-
-static void osc_unreserve_grant(struct client_obd *cli,
- unsigned int reserved, unsigned int unused)
-{
- spin_lock(&cli->cl_loi_list_lock);
- __osc_unreserve_grant(cli, reserved, unused);
- if (unused > 0)
- osc_wake_cache_waiters(cli);
- spin_unlock(&cli->cl_loi_list_lock);
-}
-
-/**
- * Free grant after IO is finished or canceled.
- *
- * @lost_grant is used to remember how many grants we have allocated but not
- * used, we should return these grants to OST. There're two cases where grants
- * can be lost:
- * 1. truncate;
- * 2. blocksize at OST is less than PAGE_SIZE and a partial page was
- * written. In this case OST may use less chunks to serve this partial
- * write. OSTs don't actually know the page size on the client side. so
- * clients have to calculate lost grant by the blocksize on the OST.
- * See filter_grant_check() for details.
- */
-static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages,
- unsigned int lost_grant)
-{
- unsigned long grant = (1 << cli->cl_chunkbits) + cli->cl_extent_tax;
-
- spin_lock(&cli->cl_loi_list_lock);
- atomic_long_sub(nr_pages, &obd_dirty_pages);
- cli->cl_dirty_pages -= nr_pages;
- cli->cl_lost_grant += lost_grant;
- if (cli->cl_avail_grant < grant && cli->cl_lost_grant >= grant) {
- /* borrow some grant from truncate to avoid the case that
- * truncate uses up all avail grant
- */
- cli->cl_lost_grant -= grant;
- cli->cl_avail_grant += grant;
- }
- osc_wake_cache_waiters(cli);
- spin_unlock(&cli->cl_loi_list_lock);
- CDEBUG(D_CACHE, "lost %u grant: %lu avail: %lu dirty: %lu\n",
- lost_grant, cli->cl_lost_grant,
- cli->cl_avail_grant, cli->cl_dirty_pages << PAGE_SHIFT);
-}
-
-/**
- * The companion to osc_enter_cache(), called when @oap is no longer part of
- * the dirty accounting due to error.
- */
-static void osc_exit_cache(struct client_obd *cli, struct osc_async_page *oap)
-{
- spin_lock(&cli->cl_loi_list_lock);
- osc_release_write_grant(cli, &oap->oap_brw_page);
- spin_unlock(&cli->cl_loi_list_lock);
-}
-
-/**
- * Non-blocking version of osc_enter_cache() that consumes grant only when it
- * is available.
- */
-static int osc_enter_cache_try(struct client_obd *cli,
- struct osc_async_page *oap,
- int bytes, int transient)
-{
- int rc;
-
- OSC_DUMP_GRANT(D_CACHE, cli, "need:%d\n", bytes);
-
- rc = osc_reserve_grant(cli, bytes);
- if (rc < 0)
- return 0;
-
- if (cli->cl_dirty_pages < cli->cl_dirty_max_pages &&
- atomic_long_read(&obd_dirty_pages) + 1 <= obd_max_dirty_pages) {
- osc_consume_write_grant(cli, &oap->oap_brw_page);
- if (transient) {
- cli->cl_dirty_transit++;
- atomic_long_inc(&obd_dirty_transit_pages);
- oap->oap_brw_flags |= OBD_BRW_NOCACHE;
- }
- rc = 1;
- } else {
- __osc_unreserve_grant(cli, bytes, bytes);
- rc = 0;
- }
- return rc;
-}
-
-static int ocw_granted(struct client_obd *cli, struct osc_cache_waiter *ocw)
-{
- int rc;
-
- spin_lock(&cli->cl_loi_list_lock);
- rc = list_empty(&ocw->ocw_entry);
- spin_unlock(&cli->cl_loi_list_lock);
- return rc;
-}
-
-/**
- * The main entry to reserve dirty page accounting. Usually the grant reserved
- * in this function will be freed in bulk in osc_free_grant() unless it fails
- * to add osc cache, in that case, it will be freed in osc_exit_cache().
- *
- * The process will be put into sleep if it's already run out of grant.
- */
-static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
- struct osc_async_page *oap, int bytes)
-{
- struct osc_object *osc = oap->oap_obj;
- struct lov_oinfo *loi = osc->oo_oinfo;
- struct osc_cache_waiter ocw;
- unsigned long timeout = (AT_OFF ? obd_timeout : at_max) * HZ;
- int rc = -EDQUOT;
-
- OSC_DUMP_GRANT(D_CACHE, cli, "need:%d\n", bytes);
-
- spin_lock(&cli->cl_loi_list_lock);
-
- /* force the caller to try sync io. this can jump the list
- * of queued writes and create a discontiguous rpc stream
- */
- if (OBD_FAIL_CHECK(OBD_FAIL_OSC_NO_GRANT) ||
- !cli->cl_dirty_max_pages || cli->cl_ar.ar_force_sync ||
- loi->loi_ar.ar_force_sync) {
- OSC_DUMP_GRANT(D_CACHE, cli, "forced sync i/o\n");
- rc = -EDQUOT;
- goto out;
- }
-
- /* Hopefully normal case - cache space and write credits available */
- if (osc_enter_cache_try(cli, oap, bytes, 0)) {
- OSC_DUMP_GRANT(D_CACHE, cli, "granted from cache\n");
- rc = 0;
- goto out;
- }
-
- /* We can get here for two reasons: too many dirty pages in cache, or
- * run out of grants. In both cases we should write dirty pages out.
- * Adding a cache waiter will trigger urgent write-out no matter what
- * RPC size will be.
- * The exiting condition is no avail grants and no dirty pages caching,
- * that really means there is no space on the OST.
- */
- init_waitqueue_head(&ocw.ocw_waitq);
- ocw.ocw_oap = oap;
- ocw.ocw_grant = bytes;
- while (cli->cl_dirty_pages > 0 || cli->cl_w_in_flight > 0) {
- list_add_tail(&ocw.ocw_entry, &cli->cl_cache_waiters);
- ocw.ocw_rc = 0;
- spin_unlock(&cli->cl_loi_list_lock);
-
- osc_io_unplug_async(env, cli, NULL);
-
- CDEBUG(D_CACHE, "%s: sleeping for cache space @ %p for %p\n",
- cli_name(cli), &ocw, oap);
-
- rc = wait_event_idle_timeout(ocw.ocw_waitq,
- ocw_granted(cli, &ocw), timeout);
-
- spin_lock(&cli->cl_loi_list_lock);
-
- if (rc == 0) {
- /* wait_event is interrupted by signal, or timed out */
- list_del_init(&ocw.ocw_entry);
- rc = -ETIMEDOUT;
- break;
- }
- LASSERT(list_empty(&ocw.ocw_entry));
- rc = ocw.ocw_rc;
-
- if (rc != -EDQUOT)
- break;
- if (osc_enter_cache_try(cli, oap, bytes, 0)) {
- rc = 0;
- break;
- }
- }
-
- switch (rc) {
- case 0:
- OSC_DUMP_GRANT(D_CACHE, cli, "finally got grant space\n");
- break;
- case -ETIMEDOUT:
- OSC_DUMP_GRANT(D_CACHE, cli,
- "timeout, fall back to sync i/o\n");
- osc_extent_tree_dump(D_CACHE, osc);
- /* fall back to synchronous I/O */
- rc = -EDQUOT;
- break;
- case -EINTR:
- /* Ensures restartability - LU-3581 */
- OSC_DUMP_GRANT(D_CACHE, cli, "interrupted\n");
- rc = -ERESTARTSYS;
- break;
- case -EDQUOT:
- OSC_DUMP_GRANT(D_CACHE, cli,
- "no grant space, fall back to sync i/o\n");
- break;
- default:
- CDEBUG(D_CACHE, "%s: event for cache space @ %p never arrived due to %d, fall back to sync i/o\n",
- cli_name(cli), &ocw, rc);
- break;
- }
-out:
- spin_unlock(&cli->cl_loi_list_lock);
- return rc;
-}
-
-/* caller must hold loi_list_lock */
-void osc_wake_cache_waiters(struct client_obd *cli)
-{
- struct list_head *l, *tmp;
- struct osc_cache_waiter *ocw;
-
- list_for_each_safe(l, tmp, &cli->cl_cache_waiters) {
- ocw = list_entry(l, struct osc_cache_waiter, ocw_entry);
- list_del_init(&ocw->ocw_entry);
-
- ocw->ocw_rc = -EDQUOT;
- /* we can't dirty more */
- if ((cli->cl_dirty_pages > cli->cl_dirty_max_pages) ||
- (atomic_long_read(&obd_dirty_pages) + 1 >
- obd_max_dirty_pages)) {
- CDEBUG(D_CACHE, "no dirty room: dirty: %ld osc max %ld, sys max %ld\n",
- cli->cl_dirty_pages, cli->cl_dirty_max_pages,
- obd_max_dirty_pages);
- goto wakeup;
- }
-
- if (osc_enter_cache_try(cli, ocw->ocw_oap, ocw->ocw_grant, 0))
- ocw->ocw_rc = 0;
-wakeup:
- CDEBUG(D_CACHE, "wake up %p for oap %p, avail grant %ld, %d\n",
- ocw, ocw->ocw_oap, cli->cl_avail_grant, ocw->ocw_rc);
-
- wake_up(&ocw->ocw_waitq);
- }
-}
-
-static int osc_max_rpc_in_flight(struct client_obd *cli, struct osc_object *osc)
-{
- int hprpc = !!list_empty(&osc->oo_hp_exts);
-
- return rpcs_in_flight(cli) >= cli->cl_max_rpcs_in_flight + hprpc;
-}
-
-/* This maintains the lists of pending pages to read/write for a given object
- * (lop). This is used by osc_check_rpcs->osc_next_obj() and osc_list_maint()
- * to quickly find objects that are ready to send an RPC.
- */
-static int osc_makes_rpc(struct client_obd *cli, struct osc_object *osc,
- int cmd)
-{
- int invalid_import = 0;
-
- /* if we have an invalid import we want to drain the queued pages
- * by forcing them through rpcs that immediately fail and complete
- * the pages. recovery relies on this to empty the queued pages
- * before canceling the locks and evicting down the llite pages
- */
- if (!cli->cl_import || cli->cl_import->imp_invalid)
- invalid_import = 1;
-
- if (cmd & OBD_BRW_WRITE) {
- if (atomic_read(&osc->oo_nr_writes) == 0)
- return 0;
- if (invalid_import) {
- CDEBUG(D_CACHE, "invalid import forcing RPC\n");
- return 1;
- }
- if (!list_empty(&osc->oo_hp_exts)) {
- CDEBUG(D_CACHE, "high prio request forcing RPC\n");
- return 1;
- }
- if (!list_empty(&osc->oo_urgent_exts)) {
- CDEBUG(D_CACHE, "urgent request forcing RPC\n");
- return 1;
- }
- /* trigger a write rpc stream as long as there are dirtiers
- * waiting for space. as they're waiting, they're not going to
- * create more pages to coalesce with what's waiting..
- */
- if (!list_empty(&cli->cl_cache_waiters)) {
- CDEBUG(D_CACHE, "cache waiters forcing RPC\n");
- return 1;
- }
- if (atomic_read(&osc->oo_nr_writes) >=
- cli->cl_max_pages_per_rpc)
- return 1;
- } else {
- if (atomic_read(&osc->oo_nr_reads) == 0)
- return 0;
- if (invalid_import) {
- CDEBUG(D_CACHE, "invalid import forcing RPC\n");
- return 1;
- }
- /* all read are urgent. */
- if (!list_empty(&osc->oo_reading_exts))
- return 1;
- }
-
- return 0;
-}
-
-static void osc_update_pending(struct osc_object *obj, int cmd, int delta)
-{
- struct client_obd *cli = osc_cli(obj);
-
- if (cmd & OBD_BRW_WRITE) {
- atomic_add(delta, &obj->oo_nr_writes);
- atomic_add(delta, &cli->cl_pending_w_pages);
- LASSERT(atomic_read(&obj->oo_nr_writes) >= 0);
- } else {
- atomic_add(delta, &obj->oo_nr_reads);
- atomic_add(delta, &cli->cl_pending_r_pages);
- LASSERT(atomic_read(&obj->oo_nr_reads) >= 0);
- }
- OSC_IO_DEBUG(obj, "update pending cmd %d delta %d.\n", cmd, delta);
-}
-
-static int osc_makes_hprpc(struct osc_object *obj)
-{
- return !list_empty(&obj->oo_hp_exts);
-}
-
-static void on_list(struct list_head *item, struct list_head *list, int should_be_on)
-{
- if (list_empty(item) && should_be_on)
- list_add_tail(item, list);
- else if (!list_empty(item) && !should_be_on)
- list_del_init(item);
-}
-
-/* maintain the osc's cli list membership invariants so that osc_send_oap_rpc
- * can find pages to build into rpcs quickly
- */
-static int __osc_list_maint(struct client_obd *cli, struct osc_object *osc)
-{
- if (osc_makes_hprpc(osc)) {
- /* HP rpc */
- on_list(&osc->oo_ready_item, &cli->cl_loi_ready_list, 0);
- on_list(&osc->oo_hp_ready_item, &cli->cl_loi_hp_ready_list, 1);
- } else {
- on_list(&osc->oo_hp_ready_item, &cli->cl_loi_hp_ready_list, 0);
- on_list(&osc->oo_ready_item, &cli->cl_loi_ready_list,
- osc_makes_rpc(cli, osc, OBD_BRW_WRITE) ||
- osc_makes_rpc(cli, osc, OBD_BRW_READ));
- }
-
- on_list(&osc->oo_write_item, &cli->cl_loi_write_list,
- atomic_read(&osc->oo_nr_writes) > 0);
-
- on_list(&osc->oo_read_item, &cli->cl_loi_read_list,
- atomic_read(&osc->oo_nr_reads) > 0);
-
- return osc_is_ready(osc);
-}
-
-static int osc_list_maint(struct client_obd *cli, struct osc_object *osc)
-{
- int is_ready;
-
- spin_lock(&cli->cl_loi_list_lock);
- is_ready = __osc_list_maint(cli, osc);
- spin_unlock(&cli->cl_loi_list_lock);
-
- return is_ready;
-}
-
-/* this is trying to propagate async writeback errors back up to the
- * application. As an async write fails we record the error code for later if
- * the app does an fsync. As long as errors persist we force future rpcs to be
- * sync so that the app can get a sync error and break the cycle of queueing
- * pages for which writeback will fail.
- */
-static void osc_process_ar(struct osc_async_rc *ar, __u64 xid,
- int rc)
-{
- if (rc) {
- if (!ar->ar_rc)
- ar->ar_rc = rc;
-
- ar->ar_force_sync = 1;
- ar->ar_min_xid = ptlrpc_sample_next_xid();
- return;
- }
-
- if (ar->ar_force_sync && (xid >= ar->ar_min_xid))
- ar->ar_force_sync = 0;
-}
-
-/* this must be called holding the loi list lock to give coverage to exit_cache,
- * async_flag maintenance, and oap_request
- */
-static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli,
- struct osc_async_page *oap, int sent, int rc)
-{
- struct osc_object *osc = oap->oap_obj;
- struct lov_oinfo *loi = osc->oo_oinfo;
- __u64 xid = 0;
-
- if (oap->oap_request) {
- xid = ptlrpc_req_xid(oap->oap_request);
- ptlrpc_req_finished(oap->oap_request);
- oap->oap_request = NULL;
- }
-
- /* As the transfer for this page is being done, clear the flags */
- spin_lock(&oap->oap_lock);
- oap->oap_async_flags = 0;
- spin_unlock(&oap->oap_lock);
- oap->oap_interrupted = 0;
-
- if (oap->oap_cmd & OBD_BRW_WRITE && xid > 0) {
- spin_lock(&cli->cl_loi_list_lock);
- osc_process_ar(&cli->cl_ar, xid, rc);
- osc_process_ar(&loi->loi_ar, xid, rc);
- spin_unlock(&cli->cl_loi_list_lock);
- }
-
- rc = osc_completion(env, oap, oap->oap_cmd, rc);
- if (rc)
- CERROR("completion on oap %p obj %p returns %d.\n",
- oap, osc, rc);
-}
-
-struct extent_rpc_data {
- struct list_head *erd_rpc_list;
- unsigned int erd_page_count;
- unsigned int erd_max_pages;
- unsigned int erd_max_chunks;
- unsigned int erd_max_extents;
-};
-
-static inline unsigned int osc_extent_chunks(const struct osc_extent *ext)
-{
- struct client_obd *cli = osc_cli(ext->oe_obj);
- unsigned int ppc_bits = cli->cl_chunkbits - PAGE_SHIFT;
-
- return (ext->oe_end >> ppc_bits) - (ext->oe_start >> ppc_bits) + 1;
-}
-
-/**
- * Try to add extent to one RPC. We need to think about the following things:
- * - # of pages must not be over max_pages_per_rpc
- * - extent must be compatible with previous ones
- */
-static int try_to_add_extent_for_io(struct client_obd *cli,
- struct osc_extent *ext,
- struct extent_rpc_data *data)
-{
- struct osc_extent *tmp;
- unsigned int chunk_count;
- struct osc_async_page *oap = list_first_entry(&ext->oe_pages,
- struct osc_async_page,
- oap_pending_item);
-
- EASSERT((ext->oe_state == OES_CACHE || ext->oe_state == OES_LOCK_DONE),
- ext);
-
- if (!data->erd_max_extents)
- return 0;
-
- chunk_count = osc_extent_chunks(ext);
- EASSERTF(data->erd_page_count != 0 ||
- chunk_count <= data->erd_max_chunks, ext,
- "The first extent to be fit in a RPC contains %u chunks, which is over the limit %u.\n",
- chunk_count, data->erd_max_chunks);
-
- if (chunk_count > data->erd_max_chunks)
- return 0;
-
- data->erd_max_pages = max(ext->oe_mppr, data->erd_max_pages);
- EASSERTF(data->erd_page_count != 0 ||
- ext->oe_nr_pages <= data->erd_max_pages, ext,
- "The first extent to be fit in a RPC contains %u pages, which is over the limit %u.\n",
- ext->oe_nr_pages, data->erd_max_pages);
- if (data->erd_page_count + ext->oe_nr_pages > data->erd_max_pages)
- return 0;
-
- list_for_each_entry(tmp, data->erd_rpc_list, oe_link) {
- struct osc_async_page *oap2;
-
- oap2 = list_first_entry(&tmp->oe_pages, struct osc_async_page,
- oap_pending_item);
- EASSERT(tmp->oe_owner == current, tmp);
- if (oap2cl_page(oap)->cp_type != oap2cl_page(oap2)->cp_type) {
- CDEBUG(D_CACHE, "Do not permit different type of IO in one RPC\n");
- return 0;
- }
-
- if (tmp->oe_srvlock != ext->oe_srvlock ||
- !tmp->oe_grants != !ext->oe_grants ||
- tmp->oe_no_merge || ext->oe_no_merge)
- return 0;
-
- /* remove break for strict check */
- break;
- }
-
- data->erd_max_extents--;
- data->erd_max_chunks -= chunk_count;
- data->erd_page_count += ext->oe_nr_pages;
- list_move_tail(&ext->oe_link, data->erd_rpc_list);
- ext->oe_owner = current;
- return 1;
-}
-
-static inline unsigned int osc_max_write_chunks(const struct client_obd *cli)
-{
- /*
- * LU-8135:
- *
- * The maximum size of a single transaction is about 64MB in ZFS.
- * #define DMU_MAX_ACCESS (64 * 1024 * 1024)
- *
- * Since ZFS is a copy-on-write file system, a single dirty page in
- * a chunk will result in the rewrite of the whole chunk, therefore
- * an RPC shouldn't be allowed to contain too many chunks otherwise
- * it will make transaction size much bigger than 64MB, especially
- * with big block size for ZFS.
- *
- * This piece of code is to make sure that OSC won't send write RPCs
- * with too many chunks. The maximum chunk size that an RPC can cover
- * is set to PTLRPC_MAX_BRW_SIZE, which is defined to 16MB. Ideally
- * OST should tell the client what the biggest transaction size is,
- * but it's good enough for now.
- *
- * This limitation doesn't apply to ldiskfs, which allows as many
- * chunks in one RPC as we want. However, it won't have any benefits
- * to have too many discontiguous pages in one RPC.
- *
- * An osc_extent won't cover over a RPC size, so the chunks in an
- * osc_extent won't bigger than PTLRPC_MAX_BRW_SIZE >> chunkbits.
- */
- return PTLRPC_MAX_BRW_SIZE >> cli->cl_chunkbits;
-}
-
-/**
- * In order to prevent multiple ptlrpcd from breaking contiguous extents,
- * get_write_extent() takes all appropriate extents in atomic.
- *
- * The following policy is used to collect extents for IO:
- * 1. Add as many HP extents as possible;
- * 2. Add the first urgent extent in urgent extent list and take it out of
- * urgent list;
- * 3. Add subsequent extents of this urgent extent;
- * 4. If urgent list is not empty, goto 2;
- * 5. Traverse the extent tree from the 1st extent;
- * 6. Above steps exit if there is no space in this RPC.
- */
-static unsigned int get_write_extents(struct osc_object *obj,
- struct list_head *rpclist)
-{
- struct client_obd *cli = osc_cli(obj);
- struct osc_extent *ext;
- struct osc_extent *temp;
- struct extent_rpc_data data = {
- .erd_rpc_list = rpclist,
- .erd_page_count = 0,
- .erd_max_pages = cli->cl_max_pages_per_rpc,
- .erd_max_chunks = osc_max_write_chunks(cli),
- .erd_max_extents = 256,
- };
-
- LASSERT(osc_object_is_locked(obj));
- list_for_each_entry_safe(ext, temp, &obj->oo_hp_exts, oe_link) {
- LASSERT(ext->oe_state == OES_CACHE);
- if (!try_to_add_extent_for_io(cli, ext, &data))
- return data.erd_page_count;
- EASSERT(ext->oe_nr_pages <= data.erd_max_pages, ext);
- }
- if (data.erd_page_count == data.erd_max_pages)
- return data.erd_page_count;
-
- while (!list_empty(&obj->oo_urgent_exts)) {
- ext = list_entry(obj->oo_urgent_exts.next,
- struct osc_extent, oe_link);
- if (!try_to_add_extent_for_io(cli, ext, &data))
- return data.erd_page_count;
-
- if (!ext->oe_intree)
- continue;
-
- while ((ext = next_extent(ext)) != NULL) {
- if ((ext->oe_state != OES_CACHE) ||
- (!list_empty(&ext->oe_link) &&
- ext->oe_owner))
- continue;
-
- if (!try_to_add_extent_for_io(cli, ext, &data))
- return data.erd_page_count;
- }
- }
- if (data.erd_page_count == data.erd_max_pages)
- return data.erd_page_count;
-
- ext = first_extent(obj);
- while (ext) {
- if ((ext->oe_state != OES_CACHE) ||
- /* this extent may be already in current rpclist */
- (!list_empty(&ext->oe_link) && ext->oe_owner)) {
- ext = next_extent(ext);
- continue;
- }
-
- if (!try_to_add_extent_for_io(cli, ext, &data))
- return data.erd_page_count;
-
- ext = next_extent(ext);
- }
- return data.erd_page_count;
-}
-
-static int
-osc_send_write_rpc(const struct lu_env *env, struct client_obd *cli,
- struct osc_object *osc)
- __must_hold(osc)
-{
- LIST_HEAD(rpclist);
- struct osc_extent *ext;
- struct osc_extent *tmp;
- struct osc_extent *first = NULL;
- u32 page_count = 0;
- int srvlock = 0;
- int rc = 0;
-
- LASSERT(osc_object_is_locked(osc));
-
- page_count = get_write_extents(osc, &rpclist);
- LASSERT(equi(page_count == 0, list_empty(&rpclist)));
-
- if (list_empty(&rpclist))
- return 0;
-
- osc_update_pending(osc, OBD_BRW_WRITE, -page_count);
-
- list_for_each_entry(ext, &rpclist, oe_link) {
- LASSERT(ext->oe_state == OES_CACHE ||
- ext->oe_state == OES_LOCK_DONE);
- if (ext->oe_state == OES_CACHE)
- osc_extent_state_set(ext, OES_LOCKING);
- else
- osc_extent_state_set(ext, OES_RPC);
- }
-
- /* we're going to grab page lock, so release object lock because
- * lock order is page lock -> object lock.
- */
- osc_object_unlock(osc);
-
- list_for_each_entry_safe(ext, tmp, &rpclist, oe_link) {
- if (ext->oe_state == OES_LOCKING) {
- rc = osc_extent_make_ready(env, ext);
- if (unlikely(rc < 0)) {
- list_del_init(&ext->oe_link);
- osc_extent_finish(env, ext, 0, rc);
- continue;
- }
- }
- if (!first) {
- first = ext;
- srvlock = ext->oe_srvlock;
- } else {
- LASSERT(srvlock == ext->oe_srvlock);
- }
- }
-
- if (!list_empty(&rpclist)) {
- LASSERT(page_count > 0);
- rc = osc_build_rpc(env, cli, &rpclist, OBD_BRW_WRITE);
- LASSERT(list_empty(&rpclist));
- }
-
- osc_object_lock(osc);
- return rc;
-}
-
-/**
- * prepare pages for ASYNC io and put pages in send queue.
- *
- * \param cmd OBD_BRW_* macroses
- * \param lop pending pages
- *
- * \return zero if no page added to send queue.
- * \return 1 if pages successfully added to send queue.
- * \return negative on errors.
- */
-static int
-osc_send_read_rpc(const struct lu_env *env, struct client_obd *cli,
- struct osc_object *osc)
- __must_hold(osc)
-{
- struct osc_extent *ext;
- struct osc_extent *next;
- LIST_HEAD(rpclist);
- struct extent_rpc_data data = {
- .erd_rpc_list = &rpclist,
- .erd_page_count = 0,
- .erd_max_pages = cli->cl_max_pages_per_rpc,
- .erd_max_chunks = UINT_MAX,
- .erd_max_extents = UINT_MAX,
- };
- int rc = 0;
-
- LASSERT(osc_object_is_locked(osc));
- list_for_each_entry_safe(ext, next, &osc->oo_reading_exts, oe_link) {
- EASSERT(ext->oe_state == OES_LOCK_DONE, ext);
- if (!try_to_add_extent_for_io(cli, ext, &data))
- break;
- osc_extent_state_set(ext, OES_RPC);
- EASSERT(ext->oe_nr_pages <= data.erd_max_pages, ext);
- }
- LASSERT(data.erd_page_count <= data.erd_max_pages);
-
- osc_update_pending(osc, OBD_BRW_READ, -data.erd_page_count);
-
- if (!list_empty(&rpclist)) {
- osc_object_unlock(osc);
-
- rc = osc_build_rpc(env, cli, &rpclist, OBD_BRW_READ);
- LASSERT(list_empty(&rpclist));
-
- osc_object_lock(osc);
- }
- return rc;
-}
-
-#define list_to_obj(list, item) ({ \
- struct list_head *__tmp = (list)->next; \
- list_del_init(__tmp); \
- list_entry(__tmp, struct osc_object, oo_##item); \
-})
-
-/* This is called by osc_check_rpcs() to find which objects have pages that
- * we could be sending. These lists are maintained by osc_makes_rpc().
- */
-static struct osc_object *osc_next_obj(struct client_obd *cli)
-{
- /* First return objects that have blocked locks so that they
- * will be flushed quickly and other clients can get the lock,
- * then objects which have pages ready to be stuffed into RPCs
- */
- if (!list_empty(&cli->cl_loi_hp_ready_list))
- return list_to_obj(&cli->cl_loi_hp_ready_list, hp_ready_item);
- if (!list_empty(&cli->cl_loi_ready_list))
- return list_to_obj(&cli->cl_loi_ready_list, ready_item);
-
- /* then if we have cache waiters, return all objects with queued
- * writes. This is especially important when many small files
- * have filled up the cache and not been fired into rpcs because
- * they don't pass the nr_pending/object threshold
- */
- if (!list_empty(&cli->cl_cache_waiters) &&
- !list_empty(&cli->cl_loi_write_list))
- return list_to_obj(&cli->cl_loi_write_list, write_item);
-
- /* then return all queued objects when we have an invalid import
- * so that they get flushed
- */
- if (!cli->cl_import || cli->cl_import->imp_invalid) {
- if (!list_empty(&cli->cl_loi_write_list))
- return list_to_obj(&cli->cl_loi_write_list, write_item);
- if (!list_empty(&cli->cl_loi_read_list))
- return list_to_obj(&cli->cl_loi_read_list, read_item);
- }
- return NULL;
-}
-
-/* called with the loi list lock held */
-static void osc_check_rpcs(const struct lu_env *env, struct client_obd *cli)
- __must_hold(&cli->cl_loi_list_lock)
-{
- struct osc_object *osc;
- int rc = 0;
-
- while ((osc = osc_next_obj(cli)) != NULL) {
- struct cl_object *obj = osc2cl(osc);
- struct lu_ref_link link;
-
- OSC_IO_DEBUG(osc, "%lu in flight\n", rpcs_in_flight(cli));
-
- if (osc_max_rpc_in_flight(cli, osc)) {
- __osc_list_maint(cli, osc);
- break;
- }
-
- cl_object_get(obj);
- spin_unlock(&cli->cl_loi_list_lock);
- lu_object_ref_add_at(&obj->co_lu, &link, "check", current);
-
- /* attempt some read/write balancing by alternating between
- * reads and writes in an object. The makes_rpc checks here
- * would be redundant if we were getting read/write work items
- * instead of objects. we don't want send_oap_rpc to drain a
- * partial read pending queue when we're given this object to
- * do io on writes while there are cache waiters
- */
- osc_object_lock(osc);
- if (osc_makes_rpc(cli, osc, OBD_BRW_WRITE)) {
- rc = osc_send_write_rpc(env, cli, osc);
- if (rc < 0) {
- CERROR("Write request failed with %d\n", rc);
-
- /* osc_send_write_rpc failed, mostly because of
- * memory pressure.
- *
- * It can't break here, because if:
- * - a page was submitted by osc_io_submit, so
- * page locked;
- * - no request in flight
- * - no subsequent request
- * The system will be in live-lock state,
- * because there is no chance to call
- * osc_io_unplug() and osc_check_rpcs() any
- * more. pdflush can't help in this case,
- * because it might be blocked at grabbing
- * the page lock as we mentioned.
- *
- * Anyway, continue to drain pages.
- */
- /* break; */
- }
- }
- if (osc_makes_rpc(cli, osc, OBD_BRW_READ)) {
- rc = osc_send_read_rpc(env, cli, osc);
- if (rc < 0)
- CERROR("Read request failed with %d\n", rc);
- }
- osc_object_unlock(osc);
-
- osc_list_maint(cli, osc);
- lu_object_ref_del_at(&obj->co_lu, &link, "check", current);
- cl_object_put(env, obj);
-
- spin_lock(&cli->cl_loi_list_lock);
- }
-}
-
-static int osc_io_unplug0(const struct lu_env *env, struct client_obd *cli,
- struct osc_object *osc, int async)
-{
- int rc = 0;
-
- if (osc && osc_list_maint(cli, osc) == 0)
- return 0;
-
- if (!async) {
- spin_lock(&cli->cl_loi_list_lock);
- osc_check_rpcs(env, cli);
- spin_unlock(&cli->cl_loi_list_lock);
- } else {
- CDEBUG(D_CACHE, "Queue writeback work for client %p.\n", cli);
- LASSERT(cli->cl_writeback_work);
- rc = ptlrpcd_queue_work(cli->cl_writeback_work);
- }
- return rc;
-}
-
-static int osc_io_unplug_async(const struct lu_env *env,
- struct client_obd *cli, struct osc_object *osc)
-{
- return osc_io_unplug0(env, cli, osc, 1);
-}
-
-void osc_io_unplug(const struct lu_env *env, struct client_obd *cli,
- struct osc_object *osc)
-{
- (void)osc_io_unplug0(env, cli, osc, 0);
-}
-
-int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops,
- struct page *page, loff_t offset)
-{
- struct obd_export *exp = osc_export(osc);
- struct osc_async_page *oap = &ops->ops_oap;
-
- if (!page)
- return cfs_size_round(sizeof(*oap));
-
- oap->oap_magic = OAP_MAGIC;
- oap->oap_cli = &exp->exp_obd->u.cli;
- oap->oap_obj = osc;
-
- oap->oap_page = page;
- oap->oap_obj_off = offset;
- LASSERT(!(offset & ~PAGE_MASK));
-
- if (capable(CAP_SYS_RESOURCE))
- oap->oap_brw_flags = OBD_BRW_NOQUOTA;
-
- INIT_LIST_HEAD(&oap->oap_pending_item);
- INIT_LIST_HEAD(&oap->oap_rpc_item);
-
- spin_lock_init(&oap->oap_lock);
- CDEBUG(D_INFO, "oap %p page %p obj off %llu\n",
- oap, page, oap->oap_obj_off);
- return 0;
-}
-
-int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
- struct osc_page *ops)
-{
- struct osc_io *oio = osc_env_io(env);
- struct osc_extent *ext = NULL;
- struct osc_async_page *oap = &ops->ops_oap;
- struct client_obd *cli = oap->oap_cli;
- struct osc_object *osc = oap->oap_obj;
- pgoff_t index;
- unsigned int grants = 0, tmp;
- int brw_flags = OBD_BRW_ASYNC;
- int cmd = OBD_BRW_WRITE;
- int need_release = 0;
- int rc = 0;
-
- if (oap->oap_magic != OAP_MAGIC)
- return -EINVAL;
-
- if (!cli->cl_import || cli->cl_import->imp_invalid)
- return -EIO;
-
- if (!list_empty(&oap->oap_pending_item) ||
- !list_empty(&oap->oap_rpc_item))
- return -EBUSY;
-
- /* Set the OBD_BRW_SRVLOCK before the page is queued. */
- brw_flags |= ops->ops_srvlock ? OBD_BRW_SRVLOCK : 0;
- if (capable(CAP_SYS_RESOURCE)) {
- brw_flags |= OBD_BRW_NOQUOTA;
- cmd |= OBD_BRW_NOQUOTA;
- }
-
- /* check if the file's owner/group is over quota */
- if (!(cmd & OBD_BRW_NOQUOTA)) {
- struct cl_object *obj;
- struct cl_attr *attr;
- unsigned int qid[MAXQUOTAS];
-
- obj = cl_object_top(&osc->oo_cl);
- attr = &osc_env_info(env)->oti_attr;
-
- cl_object_attr_lock(obj);
- rc = cl_object_attr_get(env, obj, attr);
- cl_object_attr_unlock(obj);
-
- qid[USRQUOTA] = attr->cat_uid;
- qid[GRPQUOTA] = attr->cat_gid;
- if (rc == 0 && osc_quota_chkdq(cli, qid) == NO_QUOTA)
- rc = -EDQUOT;
- if (rc)
- return rc;
- }
-
- oap->oap_cmd = cmd;
- oap->oap_page_off = ops->ops_from;
- oap->oap_count = ops->ops_to - ops->ops_from;
- /*
- * No need to hold a lock here,
- * since this page is not in any list yet.
- */
- oap->oap_async_flags = 0;
- oap->oap_brw_flags = brw_flags;
-
- OSC_IO_DEBUG(osc, "oap %p page %p added for cmd %d\n",
- oap, oap->oap_page, oap->oap_cmd & OBD_BRW_RWMASK);
-
- index = osc_index(oap2osc(oap));
-
- /* Add this page into extent by the following steps:
- * 1. if there exists an active extent for this IO, mostly this page
- * can be added to the active extent and sometimes we need to
- * expand extent to accommodate this page;
- * 2. otherwise, a new extent will be allocated.
- */
-
- ext = oio->oi_active;
- if (ext && ext->oe_start <= index && ext->oe_max_end >= index) {
- /* one chunk plus extent overhead must be enough to write this
- * page
- */
- grants = (1 << cli->cl_chunkbits) + cli->cl_extent_tax;
- if (ext->oe_end >= index)
- grants = 0;
-
- /* it doesn't need any grant to dirty this page */
- spin_lock(&cli->cl_loi_list_lock);
- rc = osc_enter_cache_try(cli, oap, grants, 0);
- spin_unlock(&cli->cl_loi_list_lock);
- if (rc == 0) { /* try failed */
- grants = 0;
- need_release = 1;
- } else if (ext->oe_end < index) {
- tmp = grants;
- /* try to expand this extent */
- rc = osc_extent_expand(ext, index, &tmp);
- if (rc < 0) {
- need_release = 1;
- /* don't free reserved grant */
- } else {
- OSC_EXTENT_DUMP(D_CACHE, ext,
- "expanded for %lu.\n", index);
- osc_unreserve_grant(cli, grants, tmp);
- grants = 0;
- }
- }
- rc = 0;
- } else if (ext) {
- /* index is located outside of active extent */
- need_release = 1;
- }
- if (need_release) {
- osc_extent_release(env, ext);
- oio->oi_active = NULL;
- ext = NULL;
- }
-
- if (!ext) {
- tmp = (1 << cli->cl_chunkbits) + cli->cl_extent_tax;
-
- /* try to find new extent to cover this page */
- LASSERT(!oio->oi_active);
- /* we may have allocated grant for this page if we failed
- * to expand the previous active extent.
- */
- LASSERT(ergo(grants > 0, grants >= tmp));
-
- rc = 0;
- if (grants == 0) {
- /* we haven't allocated grant for this page. */
- rc = osc_enter_cache(env, cli, oap, tmp);
- if (rc == 0)
- grants = tmp;
- }
-
- tmp = grants;
- if (rc == 0) {
- ext = osc_extent_find(env, osc, index, &tmp);
- if (IS_ERR(ext)) {
- LASSERT(tmp == grants);
- osc_exit_cache(cli, oap);
- rc = PTR_ERR(ext);
- ext = NULL;
- } else {
- oio->oi_active = ext;
- }
- }
- if (grants > 0)
- osc_unreserve_grant(cli, grants, tmp);
- }
-
- LASSERT(ergo(rc == 0, ext));
- if (ext) {
- EASSERTF(ext->oe_end >= index && ext->oe_start <= index,
- ext, "index = %lu.\n", index);
- LASSERT((oap->oap_brw_flags & OBD_BRW_FROM_GRANT) != 0);
-
- osc_object_lock(osc);
- if (ext->oe_nr_pages == 0)
- ext->oe_srvlock = ops->ops_srvlock;
- else
- LASSERT(ext->oe_srvlock == ops->ops_srvlock);
- ++ext->oe_nr_pages;
- list_add_tail(&oap->oap_pending_item, &ext->oe_pages);
- osc_object_unlock(osc);
- }
- return rc;
-}
-
-int osc_teardown_async_page(const struct lu_env *env,
- struct osc_object *obj, struct osc_page *ops)
-{
- struct osc_async_page *oap = &ops->ops_oap;
- int rc = 0;
-
- LASSERT(oap->oap_magic == OAP_MAGIC);
-
- CDEBUG(D_INFO, "teardown oap %p page %p at index %lu.\n",
- oap, ops, osc_index(oap2osc(oap)));
-
- if (!list_empty(&oap->oap_rpc_item)) {
- CDEBUG(D_CACHE, "oap %p is not in cache.\n", oap);
- rc = -EBUSY;
- } else if (!list_empty(&oap->oap_pending_item)) {
- struct osc_extent *ext = NULL;
-
- osc_object_lock(obj);
- ext = osc_extent_lookup(obj, osc_index(oap2osc(oap)));
- osc_object_unlock(obj);
- /* only truncated pages are allowed to be taken out.
- * See osc_extent_truncate() and osc_cache_truncate_start()
- * for details.
- */
- if (ext && ext->oe_state != OES_TRUNC) {
- OSC_EXTENT_DUMP(D_ERROR, ext, "trunc at %lu.\n",
- osc_index(oap2osc(oap)));
- rc = -EBUSY;
- }
- if (ext)
- osc_extent_put(env, ext);
- }
- return rc;
-}
-
-/**
- * This is called when a page is picked up by kernel to write out.
- *
- * We should find out the corresponding extent and add the whole extent
- * into urgent list. The extent may be being truncated or used, handle it
- * carefully.
- */
-int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
- struct osc_page *ops)
-{
- struct osc_extent *ext = NULL;
- struct osc_object *obj = cl2osc(ops->ops_cl.cpl_obj);
- struct cl_page *cp = ops->ops_cl.cpl_page;
- pgoff_t index = osc_index(ops);
- struct osc_async_page *oap = &ops->ops_oap;
- bool unplug = false;
- int rc = 0;
-
- osc_object_lock(obj);
- ext = osc_extent_lookup(obj, index);
- if (!ext) {
- osc_extent_tree_dump(D_ERROR, obj);
- LASSERTF(0, "page index %lu is NOT covered.\n", index);
- }
-
- switch (ext->oe_state) {
- case OES_RPC:
- case OES_LOCK_DONE:
- CL_PAGE_DEBUG(D_ERROR, env, cp, "flush an in-rpc page?\n");
- LASSERT(0);
- break;
- case OES_LOCKING:
- /* If we know this extent is being written out, we should abort
- * so that the writer can make this page ready. Otherwise, there
- * exists a deadlock problem because other process can wait for
- * page writeback bit holding page lock; and meanwhile in
- * vvp_page_make_ready(), we need to grab page lock before
- * really sending the RPC.
- */
- case OES_TRUNC:
- /* race with truncate, page will be redirtied */
- case OES_ACTIVE:
- /* The extent is active so we need to abort and let the caller
- * re-dirty the page. If we continued on here, and we were the
- * one making the extent active, we could deadlock waiting for
- * the page writeback to clear but it won't because the extent
- * is active and won't be written out.
- */
- rc = -EAGAIN;
- goto out;
- default:
- break;
- }
-
- rc = cl_page_prep(env, io, cp, CRT_WRITE);
- if (rc)
- goto out;
-
- spin_lock(&oap->oap_lock);
- oap->oap_async_flags |= ASYNC_READY | ASYNC_URGENT;
- spin_unlock(&oap->oap_lock);
-
- if (current->flags & PF_MEMALLOC)
- ext->oe_memalloc = 1;
-
- ext->oe_urgent = 1;
- if (ext->oe_state == OES_CACHE) {
- OSC_EXTENT_DUMP(D_CACHE, ext,
- "flush page %p make it urgent.\n", oap);
- if (list_empty(&ext->oe_link))
- list_add_tail(&ext->oe_link, &obj->oo_urgent_exts);
- unplug = true;
- }
- rc = 0;
-
-out:
- osc_object_unlock(obj);
- osc_extent_put(env, ext);
- if (unplug)
- osc_io_unplug_async(env, osc_cli(obj), obj);
- return rc;
-}
-
-/**
- * this is called when a sync waiter receives an interruption. Its job is to
- * get the caller woken as soon as possible. If its page hasn't been put in an
- * rpc yet it can dequeue immediately. Otherwise it has to mark the rpc as
- * desiring interruption which will forcefully complete the rpc once the rpc
- * has timed out.
- */
-int osc_cancel_async_page(const struct lu_env *env, struct osc_page *ops)
-{
- struct osc_async_page *oap = &ops->ops_oap;
- struct osc_object *obj = oap->oap_obj;
- struct client_obd *cli = osc_cli(obj);
- struct osc_extent *ext;
- struct osc_extent *found = NULL;
- struct list_head *plist;
- pgoff_t index = osc_index(ops);
- int rc = -EBUSY;
- int cmd;
-
- LASSERT(!oap->oap_interrupted);
- oap->oap_interrupted = 1;
-
- /* Find out the caching extent */
- osc_object_lock(obj);
- if (oap->oap_cmd & OBD_BRW_WRITE) {
- plist = &obj->oo_urgent_exts;
- cmd = OBD_BRW_WRITE;
- } else {
- plist = &obj->oo_reading_exts;
- cmd = OBD_BRW_READ;
- }
- list_for_each_entry(ext, plist, oe_link) {
- if (ext->oe_start <= index && ext->oe_end >= index) {
- LASSERT(ext->oe_state == OES_LOCK_DONE);
- /* For OES_LOCK_DONE state extent, it has already held
- * a refcount for RPC.
- */
- found = osc_extent_get(ext);
- break;
- }
- }
- if (found) {
- list_del_init(&found->oe_link);
- osc_update_pending(obj, cmd, -found->oe_nr_pages);
- osc_object_unlock(obj);
-
- osc_extent_finish(env, found, 0, -EINTR);
- osc_extent_put(env, found);
- rc = 0;
- } else {
- osc_object_unlock(obj);
- /* ok, it's been put in an rpc. only one oap gets a request
- * reference
- */
- if (oap->oap_request) {
- ptlrpc_mark_interrupted(oap->oap_request);
- ptlrpcd_wake(oap->oap_request);
- ptlrpc_req_finished(oap->oap_request);
- oap->oap_request = NULL;
- }
- }
-
- osc_list_maint(cli, obj);
- return rc;
-}
-
-int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
- struct list_head *list, int cmd, int brw_flags)
-{
- struct client_obd *cli = osc_cli(obj);
- struct osc_extent *ext;
- struct osc_async_page *oap, *tmp;
- int page_count = 0;
- int mppr = cli->cl_max_pages_per_rpc;
- bool can_merge = true;
- pgoff_t start = CL_PAGE_EOF;
- pgoff_t end = 0;
-
- list_for_each_entry(oap, list, oap_pending_item) {
- struct osc_page *opg = oap2osc_page(oap);
- pgoff_t index = osc_index(opg);
-
- if (index > end)
- end = index;
- if (index < start)
- start = index;
- ++page_count;
- mppr <<= (page_count > mppr);
-
- if (unlikely(opg->ops_from > 0 || opg->ops_to < PAGE_SIZE))
- can_merge = false;
- }
-
- ext = osc_extent_alloc(obj);
- if (!ext) {
- list_for_each_entry_safe(oap, tmp, list, oap_pending_item) {
- list_del_init(&oap->oap_pending_item);
- osc_ap_completion(env, cli, oap, 0, -ENOMEM);
- }
- return -ENOMEM;
- }
-
- ext->oe_rw = !!(cmd & OBD_BRW_READ);
- ext->oe_sync = 1;
- ext->oe_no_merge = !can_merge;
- ext->oe_urgent = 1;
- ext->oe_start = start;
- ext->oe_end = end;
- ext->oe_max_end = end;
- ext->oe_obj = obj;
- ext->oe_srvlock = !!(brw_flags & OBD_BRW_SRVLOCK);
- ext->oe_nr_pages = page_count;
- ext->oe_mppr = mppr;
- list_splice_init(list, &ext->oe_pages);
-
- osc_object_lock(obj);
- /* Reuse the initial refcount for RPC, don't drop it */
- osc_extent_state_set(ext, OES_LOCK_DONE);
- if (cmd & OBD_BRW_WRITE) {
- list_add_tail(&ext->oe_link, &obj->oo_urgent_exts);
- osc_update_pending(obj, OBD_BRW_WRITE, page_count);
- } else {
- list_add_tail(&ext->oe_link, &obj->oo_reading_exts);
- osc_update_pending(obj, OBD_BRW_READ, page_count);
- }
- osc_object_unlock(obj);
-
- osc_io_unplug_async(env, cli, obj);
- return 0;
-}
-
-/**
- * Called by osc_io_setattr_start() to freeze and destroy covering extents.
- */
-int osc_cache_truncate_start(const struct lu_env *env, struct osc_object *obj,
- u64 size, struct osc_extent **extp)
-{
- struct client_obd *cli = osc_cli(obj);
- struct osc_extent *ext;
- struct osc_extent *temp;
- struct osc_extent *waiting = NULL;
- pgoff_t index;
- LIST_HEAD(list);
- int result = 0;
- bool partial;
-
- /* pages with index greater or equal to index will be truncated. */
- index = cl_index(osc2cl(obj), size);
- partial = size > cl_offset(osc2cl(obj), index);
-
-again:
- osc_object_lock(obj);
- ext = osc_extent_search(obj, index);
- if (!ext)
- ext = first_extent(obj);
- else if (ext->oe_end < index)
- ext = next_extent(ext);
- while (ext) {
- EASSERT(ext->oe_state != OES_TRUNC, ext);
-
- if (ext->oe_state > OES_CACHE || ext->oe_urgent) {
- /* if ext is in urgent state, it means there must exist
- * a page already having been flushed by write_page().
- * We have to wait for this extent because we can't
- * truncate that page.
- */
- OSC_EXTENT_DUMP(D_CACHE, ext,
- "waiting for busy extent\n");
- waiting = osc_extent_get(ext);
- break;
- }
-
- OSC_EXTENT_DUMP(D_CACHE, ext, "try to trunc:%llu.\n", size);
-
- osc_extent_get(ext);
- if (ext->oe_state == OES_ACTIVE) {
- /* though we grab inode mutex for write path, but we
- * release it before releasing extent(in osc_io_end()),
- * so there is a race window that an extent is still
- * in OES_ACTIVE when truncate starts.
- */
- LASSERT(!ext->oe_trunc_pending);
- ext->oe_trunc_pending = 1;
- } else {
- EASSERT(ext->oe_state == OES_CACHE, ext);
- osc_extent_state_set(ext, OES_TRUNC);
- osc_update_pending(obj, OBD_BRW_WRITE,
- -ext->oe_nr_pages);
- }
- EASSERT(list_empty(&ext->oe_link), ext);
- list_add_tail(&ext->oe_link, &list);
-
- ext = next_extent(ext);
- }
- osc_object_unlock(obj);
-
- osc_list_maint(cli, obj);
-
- list_for_each_entry_safe(ext, temp, &list, oe_link) {
- int rc;
-
- list_del_init(&ext->oe_link);
-
- /* extent may be in OES_ACTIVE state because inode mutex
- * is released before osc_io_end() in file write case
- */
- if (ext->oe_state != OES_TRUNC)
- osc_extent_wait(env, ext, OES_TRUNC);
-
- rc = osc_extent_truncate(ext, index, partial);
- if (rc < 0) {
- if (result == 0)
- result = rc;
-
- OSC_EXTENT_DUMP(D_ERROR, ext,
- "truncate error %d\n", rc);
- } else if (ext->oe_nr_pages == 0) {
- osc_extent_remove(ext);
- } else {
- /* this must be an overlapped extent which means only
- * part of pages in this extent have been truncated.
- */
- EASSERTF(ext->oe_start <= index, ext,
- "trunc index = %lu/%d.\n", index, partial);
- /* fix index to skip this partially truncated extent */
- index = ext->oe_end + 1;
- partial = false;
-
- /* we need to hold this extent in OES_TRUNC state so
- * that no writeback will happen. This is to avoid
- * BUG 17397.
- * Only partial truncate can reach here, if @size is
- * not zero, the caller should provide a valid @extp.
- */
- LASSERT(!*extp);
- *extp = osc_extent_get(ext);
- OSC_EXTENT_DUMP(D_CACHE, ext,
- "trunc at %llu\n", size);
- }
- osc_extent_put(env, ext);
- }
- if (waiting) {
- int rc;
-
- /* ignore the result of osc_extent_wait the write initiator
- * should take care of it.
- */
- rc = osc_extent_wait(env, waiting, OES_INV);
- if (rc < 0)
- OSC_EXTENT_DUMP(D_CACHE, waiting, "error: %d.\n", rc);
-
- osc_extent_put(env, waiting);
- waiting = NULL;
- goto again;
- }
- return result;
-}
-
-/**
- * Called after osc_io_setattr_end to add oio->oi_trunc back to cache.
- */
-void osc_cache_truncate_end(const struct lu_env *env, struct osc_extent *ext)
-{
- if (ext) {
- struct osc_object *obj = ext->oe_obj;
- bool unplug = false;
-
- EASSERT(ext->oe_nr_pages > 0, ext);
- EASSERT(ext->oe_state == OES_TRUNC, ext);
- EASSERT(!ext->oe_urgent, ext);
-
- OSC_EXTENT_DUMP(D_CACHE, ext, "trunc -> cache.\n");
- osc_object_lock(obj);
- osc_extent_state_set(ext, OES_CACHE);
- if (ext->oe_fsync_wait && !ext->oe_urgent) {
- ext->oe_urgent = 1;
- list_move_tail(&ext->oe_link, &obj->oo_urgent_exts);
- unplug = true;
- }
- osc_update_pending(obj, OBD_BRW_WRITE, ext->oe_nr_pages);
- osc_object_unlock(obj);
- osc_extent_put(env, ext);
-
- if (unplug)
- osc_io_unplug_async(env, osc_cli(obj), obj);
- }
-}
-
-/**
- * Wait for extents in a specific range to be written out.
- * The caller must have called osc_cache_writeback_range() to issue IO
- * otherwise it will take a long time for this function to finish.
- *
- * Caller must hold inode_mutex , or cancel exclusive dlm lock so that
- * nobody else can dirty this range of file while we're waiting for
- * extents to be written.
- */
-int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj,
- pgoff_t start, pgoff_t end)
-{
- struct osc_extent *ext;
- pgoff_t index = start;
- int result = 0;
-
-again:
- osc_object_lock(obj);
- ext = osc_extent_search(obj, index);
- if (!ext)
- ext = first_extent(obj);
- else if (ext->oe_end < index)
- ext = next_extent(ext);
- while (ext) {
- int rc;
-
- if (ext->oe_start > end)
- break;
-
- if (!ext->oe_fsync_wait) {
- ext = next_extent(ext);
- continue;
- }
-
- EASSERT(ergo(ext->oe_state == OES_CACHE,
- ext->oe_hp || ext->oe_urgent), ext);
- EASSERT(ergo(ext->oe_state == OES_ACTIVE,
- !ext->oe_hp && ext->oe_urgent), ext);
-
- index = ext->oe_end + 1;
- osc_extent_get(ext);
- osc_object_unlock(obj);
-
- rc = osc_extent_wait(env, ext, OES_INV);
- if (result == 0)
- result = rc;
- osc_extent_put(env, ext);
- goto again;
- }
- osc_object_unlock(obj);
-
- OSC_IO_DEBUG(obj, "sync file range.\n");
- return result;
-}
-
-/**
- * Called to write out a range of osc object.
- *
- * @hp : should be set this is caused by lock cancel;
- * @discard: is set if dirty pages should be dropped - file will be deleted or
- * truncated, this implies there is no partially discarding extents.
- *
- * Return how many pages will be issued, or error code if error occurred.
- */
-int osc_cache_writeback_range(const struct lu_env *env, struct osc_object *obj,
- pgoff_t start, pgoff_t end, int hp, int discard)
-{
- struct osc_extent *ext;
- LIST_HEAD(discard_list);
- bool unplug = false;
- int result = 0;
-
- osc_object_lock(obj);
- ext = osc_extent_search(obj, start);
- if (!ext)
- ext = first_extent(obj);
- else if (ext->oe_end < start)
- ext = next_extent(ext);
- while (ext) {
- if (ext->oe_start > end)
- break;
-
- ext->oe_fsync_wait = 1;
- switch (ext->oe_state) {
- case OES_CACHE:
- result += ext->oe_nr_pages;
- if (!discard) {
- struct list_head *list = NULL;
-
- if (hp) {
- EASSERT(!ext->oe_hp, ext);
- ext->oe_hp = 1;
- list = &obj->oo_hp_exts;
- } else if (!ext->oe_urgent) {
- ext->oe_urgent = 1;
- list = &obj->oo_urgent_exts;
- }
- if (list)
- list_move_tail(&ext->oe_link, list);
- unplug = true;
- } else {
- /* the only discarder is lock cancelling, so
- * [start, end] must contain this extent
- */
- EASSERT(ext->oe_start >= start &&
- ext->oe_max_end <= end, ext);
- osc_extent_state_set(ext, OES_LOCKING);
- ext->oe_owner = current;
- list_move_tail(&ext->oe_link, &discard_list);
- osc_update_pending(obj, OBD_BRW_WRITE,
- -ext->oe_nr_pages);
- }
- break;
- case OES_ACTIVE:
- /* It's pretty bad to wait for ACTIVE extents, because
- * we don't know how long we will wait for it to be
- * flushed since it may be blocked at awaiting more
- * grants. We do this for the correctness of fsync.
- */
- LASSERT(hp == 0 && discard == 0);
- ext->oe_urgent = 1;
- break;
- case OES_TRUNC:
- /* this extent is being truncated, can't do anything
- * for it now. it will be set to urgent after truncate
- * is finished in osc_cache_truncate_end().
- */
- default:
- break;
- }
- ext = next_extent(ext);
- }
- osc_object_unlock(obj);
-
- LASSERT(ergo(!discard, list_empty(&discard_list)));
- if (!list_empty(&discard_list)) {
- struct osc_extent *tmp;
- int rc;
-
- osc_list_maint(osc_cli(obj), obj);
- list_for_each_entry_safe(ext, tmp, &discard_list, oe_link) {
- list_del_init(&ext->oe_link);
- EASSERT(ext->oe_state == OES_LOCKING, ext);
-
- /* Discard caching pages. We don't actually write this
- * extent out but we complete it as if we did.
- */
- rc = osc_extent_make_ready(env, ext);
- if (unlikely(rc < 0)) {
- OSC_EXTENT_DUMP(D_ERROR, ext,
- "make_ready returned %d\n", rc);
- if (result >= 0)
- result = rc;
- }
-
- /* finish the extent as if the pages were sent */
- osc_extent_finish(env, ext, 0, 0);
- }
- }
-
- if (unplug)
- osc_io_unplug(env, osc_cli(obj), obj);
-
- if (hp || discard) {
- int rc;
-
- rc = osc_cache_wait_range(env, obj, start, end);
- if (result >= 0 && rc < 0)
- result = rc;
- }
-
- OSC_IO_DEBUG(obj, "pageout [%lu, %lu], %d.\n", start, end, result);
- return result;
-}
-
-/**
- * Returns a list of pages by a given [start, end] of \a obj.
- *
- * \param resched If not NULL, then we give up before hogging CPU for too
- * long and set *resched = 1, in that case caller should implement a retry
- * logic.
- *
- * Gang tree lookup (radix_tree_gang_lookup()) optimization is absolutely
- * crucial in the face of [offset, EOF] locks.
- *
- * Return at least one page in @queue unless there is no covered page.
- */
-int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io,
- struct osc_object *osc, pgoff_t start, pgoff_t end,
- osc_page_gang_cbt cb, void *cbdata)
-{
- struct osc_page *ops;
- void **pvec;
- pgoff_t idx;
- unsigned int nr;
- unsigned int i;
- unsigned int j;
- int res = CLP_GANG_OKAY;
- bool tree_lock = true;
-
- idx = start;
- pvec = osc_env_info(env)->oti_pvec;
- spin_lock(&osc->oo_tree_lock);
- while ((nr = radix_tree_gang_lookup(&osc->oo_tree, pvec,
- idx, OTI_PVEC_SIZE)) > 0) {
- struct cl_page *page;
- bool end_of_region = false;
-
- for (i = 0, j = 0; i < nr; ++i) {
- ops = pvec[i];
- pvec[i] = NULL;
-
- idx = osc_index(ops);
- if (idx > end) {
- end_of_region = true;
- break;
- }
-
- page = ops->ops_cl.cpl_page;
- LASSERT(page->cp_type == CPT_CACHEABLE);
- if (page->cp_state == CPS_FREEING)
- continue;
-
- cl_page_get(page);
- lu_ref_add_atomic(&page->cp_reference,
- "gang_lookup", current);
- pvec[j++] = ops;
- }
- ++idx;
-
- /*
- * Here a delicate locking dance is performed. Current thread
- * holds a reference to a page, but has to own it before it
- * can be placed into queue. Owning implies waiting, so
- * radix-tree lock is to be released. After a wait one has to
- * check that pages weren't truncated (cl_page_own() returns
- * error in the latter case).
- */
- spin_unlock(&osc->oo_tree_lock);
- tree_lock = false;
-
- for (i = 0; i < j; ++i) {
- ops = pvec[i];
- if (res == CLP_GANG_OKAY)
- res = (*cb)(env, io, ops, cbdata);
-
- page = ops->ops_cl.cpl_page;
- lu_ref_del(&page->cp_reference, "gang_lookup", current);
- cl_page_put(env, page);
- }
- if (nr < OTI_PVEC_SIZE || end_of_region)
- break;
-
- if (res == CLP_GANG_OKAY && need_resched())
- res = CLP_GANG_RESCHED;
- if (res != CLP_GANG_OKAY)
- break;
-
- spin_lock(&osc->oo_tree_lock);
- tree_lock = true;
- }
- if (tree_lock)
- spin_unlock(&osc->oo_tree_lock);
- return res;
-}
-
-/**
- * Check if page @page is covered by an extra lock or discard it.
- */
-static int check_and_discard_cb(const struct lu_env *env, struct cl_io *io,
- struct osc_page *ops, void *cbdata)
-{
- struct osc_thread_info *info = osc_env_info(env);
- struct osc_object *osc = cbdata;
- pgoff_t index;
-
- index = osc_index(ops);
- if (index >= info->oti_fn_index) {
- struct ldlm_lock *tmp;
- struct cl_page *page = ops->ops_cl.cpl_page;
-
- /* refresh non-overlapped index */
- tmp = osc_dlmlock_at_pgoff(env, osc, index,
- OSC_DAP_FL_TEST_LOCK);
- if (tmp) {
- __u64 end = tmp->l_policy_data.l_extent.end;
- /* Cache the first-non-overlapped index so as to skip
- * all pages within [index, oti_fn_index). This is safe
- * because if tmp lock is canceled, it will discard
- * these pages.
- */
- info->oti_fn_index = cl_index(osc2cl(osc), end + 1);
- if (end == OBD_OBJECT_EOF)
- info->oti_fn_index = CL_PAGE_EOF;
- LDLM_LOCK_PUT(tmp);
- } else if (cl_page_own(env, io, page) == 0) {
- /* discard the page */
- cl_page_discard(env, io, page);
- cl_page_disown(env, io, page);
- } else {
- LASSERT(page->cp_state == CPS_FREEING);
- }
- }
-
- info->oti_next_index = index + 1;
- return CLP_GANG_OKAY;
-}
-
-static int discard_cb(const struct lu_env *env, struct cl_io *io,
- struct osc_page *ops, void *cbdata)
-{
- struct osc_thread_info *info = osc_env_info(env);
- struct cl_page *page = ops->ops_cl.cpl_page;
-
- /* page is top page. */
- info->oti_next_index = osc_index(ops) + 1;
- if (cl_page_own(env, io, page) == 0) {
- if (page->cp_type == CPT_CACHEABLE &&
- PageDirty(cl_page_vmpage(page)))
- CL_PAGE_DEBUG(D_ERROR, env, page,
- "discard dirty page?\n");
-
- /* discard the page */
- cl_page_discard(env, io, page);
- cl_page_disown(env, io, page);
- } else {
- LASSERT(page->cp_state == CPS_FREEING);
- }
-
- return CLP_GANG_OKAY;
-}
-
-/**
- * Discard pages protected by the given lock. This function traverses radix
- * tree to find all covering pages and discard them. If a page is being covered
- * by other locks, it should remain in cache.
- *
- * If error happens on any step, the process continues anyway (the reasoning
- * behind this being that lock cancellation cannot be delayed indefinitely).
- */
-int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc,
- pgoff_t start, pgoff_t end, enum cl_lock_mode mode)
-{
- struct osc_thread_info *info = osc_env_info(env);
- struct cl_io *io = &info->oti_io;
- osc_page_gang_cbt cb;
- int res;
- int result;
-
- io->ci_obj = cl_object_top(osc2cl(osc));
- io->ci_ignore_layout = 1;
- result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
- if (result != 0)
- goto out;
-
- cb = mode == CLM_READ ? check_and_discard_cb : discard_cb;
- info->oti_fn_index = start;
- info->oti_next_index = start;
- do {
- res = osc_page_gang_lookup(env, io, osc,
- info->oti_next_index, end, cb, osc);
- if (info->oti_next_index > end)
- break;
-
- if (res == CLP_GANG_RESCHED)
- cond_resched();
- } while (res != CLP_GANG_OKAY);
-out:
- cl_io_fini(env, io);
- return result;
-}
-
-/** @} osc */
diff --git a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
deleted file mode 100644
index 2d3cba16ef34..000000000000
--- a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
+++ /dev/null
@@ -1,681 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Internal interfaces of OSC layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
- */
-
-#ifndef OSC_CL_INTERNAL_H
-#define OSC_CL_INTERNAL_H
-
-#include <obd.h>
-/* osc_build_res_name() */
-#include <cl_object.h>
-#include "osc_internal.h"
-
-/** \defgroup osc osc
- * @{
- */
-
-struct osc_extent;
-
-/**
- * State maintained by osc layer for each IO context.
- */
-struct osc_io {
- /** super class */
- struct cl_io_slice oi_cl;
- /** true if this io is lockless. */
- unsigned int oi_lockless:1,
- /** true if this io is counted as active IO */
- oi_is_active:1;
- /** how many LRU pages are reserved for this IO */
- unsigned long oi_lru_reserved;
-
- /** active extents, we know how many bytes is going to be written,
- * so having an active extent will prevent it from being fragmented
- */
- struct osc_extent *oi_active;
- /** partially truncated extent, we need to hold this extent to prevent
- * page writeback from happening.
- */
- struct osc_extent *oi_trunc;
-
- /** write osc_lock for this IO, used by osc_extent_find(). */
- struct osc_lock *oi_write_osclock;
- struct obdo oi_oa;
- struct osc_async_cbargs {
- bool opc_rpc_sent;
- int opc_rc;
- struct completion opc_sync;
- } oi_cbarg;
-};
-
-/**
- * State maintained by osc layer for the duration of a system call.
- */
-struct osc_session {
- struct osc_io os_io;
-};
-
-#define OTI_PVEC_SIZE 256
-struct osc_thread_info {
- struct ldlm_res_id oti_resname;
- union ldlm_policy_data oti_policy;
- struct cl_lock_descr oti_descr;
- struct cl_attr oti_attr;
- struct lustre_handle oti_handle;
- struct cl_page_list oti_plist;
- struct cl_io oti_io;
- void *oti_pvec[OTI_PVEC_SIZE];
- /**
- * Fields used by cl_lock_discard_pages().
- */
- pgoff_t oti_next_index;
- pgoff_t oti_fn_index; /* first non-overlapped index */
- struct cl_sync_io oti_anchor;
- struct cl_req_attr oti_req_attr;
-};
-
-struct osc_object {
- struct cl_object oo_cl;
- struct lov_oinfo *oo_oinfo;
- /**
- * True if locking against this stripe got -EUSERS.
- */
- int oo_contended;
- unsigned long oo_contention_time;
- /**
- * used by the osc to keep track of what objects to build into rpcs.
- * Protected by client_obd->cli_loi_list_lock.
- */
- struct list_head oo_ready_item;
- struct list_head oo_hp_ready_item;
- struct list_head oo_write_item;
- struct list_head oo_read_item;
-
- /**
- * extent is a red black tree to manage (async) dirty pages.
- */
- struct rb_root oo_root;
- /**
- * Manage write(dirty) extents.
- */
- struct list_head oo_hp_exts; /* list of hp extents */
- struct list_head oo_urgent_exts; /* list of writeback extents */
- struct list_head oo_rpc_exts;
-
- struct list_head oo_reading_exts;
-
- atomic_t oo_nr_reads;
- atomic_t oo_nr_writes;
-
- /** Protect extent tree. Will be used to protect
- * oo_{read|write}_pages soon.
- */
- spinlock_t oo_lock;
-
- /**
- * Radix tree for caching pages
- */
- struct radix_tree_root oo_tree;
- spinlock_t oo_tree_lock;
- unsigned long oo_npages;
-
- /* Protect osc_lock this osc_object has */
- spinlock_t oo_ol_spin;
- struct list_head oo_ol_list;
-
- /** number of active IOs of this object */
- atomic_t oo_nr_ios;
- wait_queue_head_t oo_io_waitq;
-};
-
-static inline void osc_object_lock(struct osc_object *obj)
-{
- spin_lock(&obj->oo_lock);
-}
-
-static inline int osc_object_trylock(struct osc_object *obj)
-{
- return spin_trylock(&obj->oo_lock);
-}
-
-static inline void osc_object_unlock(struct osc_object *obj)
-{
- spin_unlock(&obj->oo_lock);
-}
-
-static inline int osc_object_is_locked(struct osc_object *obj)
-{
-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
- return spin_is_locked(&obj->oo_lock);
-#else
- /*
- * It is not perfect to return true all the time.
- * But since this function is only used for assertion
- * and checking, it seems OK.
- */
- return 1;
-#endif
-}
-
-/*
- * Lock "micro-states" for osc layer.
- */
-enum osc_lock_state {
- OLS_NEW,
- OLS_ENQUEUED,
- OLS_UPCALL_RECEIVED,
- OLS_GRANTED,
- OLS_CANCELLED
-};
-
-/**
- * osc-private state of cl_lock.
- *
- * Interaction with DLM.
- *
- * Once receive upcall is invoked, osc_lock remembers a handle of DLM lock in
- * osc_lock::ols_handle and a pointer to that lock in osc_lock::ols_dlmlock.
- *
- * This pointer is protected through a reference, acquired by
- * osc_lock_upcall0(). Also, an additional reference is acquired by
- * ldlm_lock_addref() call protecting the lock from cancellation, until
- * osc_lock_unuse() releases it.
- *
- * Below is a description of how lock references are acquired and released
- * inside of DLM.
- *
- * - When new lock is created and enqueued to the server (ldlm_cli_enqueue())
- * - ldlm_lock_create()
- * - ldlm_lock_new(): initializes a lock with 2 references. One for
- * the caller (released when reply from the server is received, or on
- * error), and another for the hash table.
- * - ldlm_lock_addref_internal(): protects the lock from cancellation.
- *
- * - When reply is received from the server (osc_enqueue_interpret())
- * - ldlm_cli_enqueue_fini()
- * - LDLM_LOCK_PUT(): releases caller reference acquired by
- * ldlm_lock_new().
- * - if (rc != 0)
- * ldlm_lock_decref(): error case: matches ldlm_cli_enqueue().
- * - ldlm_lock_decref(): for async locks, matches ldlm_cli_enqueue().
- *
- * - When lock is being cancelled (ldlm_lock_cancel())
- * - ldlm_lock_destroy()
- * - LDLM_LOCK_PUT(): releases hash-table reference acquired by
- * ldlm_lock_new().
- *
- * osc_lock is detached from ldlm_lock by osc_lock_detach() that is called
- * either when lock is cancelled (osc_lock_blocking()), or when locks is
- * deleted without cancellation (e.g., from cl_locks_prune()). In the latter
- * case ldlm lock remains in memory, and can be re-attached to osc_lock in the
- * future.
- */
-struct osc_lock {
- struct cl_lock_slice ols_cl;
- /** Internal lock to protect states, etc. */
- spinlock_t ols_lock;
- /** Owner sleeps on this channel for state change */
- struct cl_sync_io *ols_owner;
- /** waiting list for this lock to be cancelled */
- struct list_head ols_waiting_list;
- /** wait entry of ols_waiting_list */
- struct list_head ols_wait_entry;
- /** list entry for osc_object::oo_ol_list */
- struct list_head ols_nextlock_oscobj;
-
- /** underlying DLM lock */
- struct ldlm_lock *ols_dlmlock;
- /** DLM flags with which osc_lock::ols_lock was enqueued */
- __u64 ols_flags;
- /** osc_lock::ols_lock handle */
- struct lustre_handle ols_handle;
- struct ldlm_enqueue_info ols_einfo;
- enum osc_lock_state ols_state;
- /** lock value block */
- struct ost_lvb ols_lvb;
-
- /**
- * true, if ldlm_lock_addref() was called against
- * osc_lock::ols_lock. This is used for sanity checking.
- *
- * \see osc_lock::ols_has_ref
- */
- unsigned ols_hold :1,
- /**
- * this is much like osc_lock::ols_hold, except that this bit is
- * cleared _after_ reference in released in osc_lock_unuse(). This
- * fine distinction is needed because:
- *
- * - if ldlm lock still has a reference, osc_ast_data_get() needs
- * to return associated cl_lock (so that a flag is needed that is
- * cleared after ldlm_lock_decref() returned), and
- *
- * - ldlm_lock_decref() can invoke blocking ast (for a
- * LDLM_FL_CBPENDING lock), and osc_lock functions like
- * osc_lock_cancel() called from there need to know whether to
- * release lock reference (so that a flag is needed that is
- * cleared before ldlm_lock_decref() is called).
- */
- ols_has_ref:1,
- /**
- * inherit the lockless attribute from top level cl_io.
- * If true, osc_lock_enqueue is able to tolerate the -EUSERS error.
- */
- ols_locklessable:1,
- /**
- * if set, the osc_lock is a glimpse lock. For glimpse locks, we treat
- * the EVAVAIL error as tolerable, this will make upper logic happy
- * to wait all glimpse locks to each OSTs to be completed.
- * Glimpse lock converts to normal lock if the server lock is
- * granted.
- * Glimpse lock should be destroyed immediately after use.
- */
- ols_glimpse:1,
- /**
- * For async glimpse lock.
- */
- ols_agl:1;
-};
-
-/**
- * Page state private for osc layer.
- */
-struct osc_page {
- struct cl_page_slice ops_cl;
- /**
- * Page queues used by osc to detect when RPC can be formed.
- */
- struct osc_async_page ops_oap;
- /**
- * An offset within page from which next transfer starts. This is used
- * by cl_page_clip() to submit partial page transfers.
- */
- int ops_from;
- /**
- * An offset within page at which next transfer ends.
- *
- * \see osc_page::ops_from.
- */
- int ops_to;
- /**
- * Boolean, true iff page is under transfer. Used for sanity checking.
- */
- unsigned ops_transfer_pinned:1,
- /**
- * in LRU?
- */
- ops_in_lru:1,
- /**
- * Set if the page must be transferred with OBD_BRW_SRVLOCK.
- */
- ops_srvlock:1;
- /**
- * lru page list. See osc_lru_{del|use}() in osc_page.c for usage.
- */
- struct list_head ops_lru;
- /**
- * Submit time - the time when the page is starting RPC. For debugging.
- */
- unsigned long ops_submit_time;
-};
-
-extern struct kmem_cache *osc_lock_kmem;
-extern struct kmem_cache *osc_object_kmem;
-extern struct kmem_cache *osc_thread_kmem;
-extern struct kmem_cache *osc_session_kmem;
-extern struct kmem_cache *osc_extent_kmem;
-
-extern struct lu_device_type osc_device_type;
-extern struct lu_context_key osc_key;
-extern struct lu_context_key osc_session_key;
-
-#define OSC_FLAGS (ASYNC_URGENT | ASYNC_READY)
-
-int osc_lock_init(const struct lu_env *env,
- struct cl_object *obj, struct cl_lock *lock,
- const struct cl_io *io);
-int osc_io_init(const struct lu_env *env,
- struct cl_object *obj, struct cl_io *io);
-struct lu_object *osc_object_alloc(const struct lu_env *env,
- const struct lu_object_header *hdr,
- struct lu_device *dev);
-int osc_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, pgoff_t ind);
-
-void osc_index2policy(union ldlm_policy_data *policy,
- const struct cl_object *obj,
- pgoff_t start, pgoff_t end);
-int osc_lvb_print(const struct lu_env *env, void *cookie,
- lu_printer_t p, const struct ost_lvb *lvb);
-
-void osc_lru_add_batch(struct client_obd *cli, struct list_head *list);
-void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
- enum cl_req_type crt, int brw_flags);
-int osc_cancel_async_page(const struct lu_env *env, struct osc_page *ops);
-int osc_set_async_flags(struct osc_object *obj, struct osc_page *opg,
- u32 async_flags);
-int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops,
- struct page *page, loff_t offset);
-int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
- struct osc_page *ops);
-int osc_page_cache_add(const struct lu_env *env,
- const struct cl_page_slice *slice, struct cl_io *io);
-int osc_teardown_async_page(const struct lu_env *env, struct osc_object *obj,
- struct osc_page *ops);
-int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
- struct osc_page *ops);
-int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
- struct list_head *list, int cmd, int brw_flags);
-int osc_cache_truncate_start(const struct lu_env *env, struct osc_object *obj,
- u64 size, struct osc_extent **extp);
-void osc_cache_truncate_end(const struct lu_env *env, struct osc_extent *ext);
-int osc_cache_writeback_range(const struct lu_env *env, struct osc_object *obj,
- pgoff_t start, pgoff_t end, int hp, int discard);
-int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj,
- pgoff_t start, pgoff_t end);
-void osc_io_unplug(const struct lu_env *env, struct client_obd *cli,
- struct osc_object *osc);
-int lru_queue_work(const struct lu_env *env, void *data);
-
-void osc_object_set_contended(struct osc_object *obj);
-void osc_object_clear_contended(struct osc_object *obj);
-int osc_object_is_contended(struct osc_object *obj);
-
-int osc_lock_is_lockless(const struct osc_lock *olck);
-
-/*****************************************************************************
- *
- * Accessors.
- *
- */
-
-static inline struct osc_thread_info *osc_env_info(const struct lu_env *env)
-{
- struct osc_thread_info *info;
-
- info = lu_context_key_get(&env->le_ctx, &osc_key);
- LASSERT(info);
- return info;
-}
-
-static inline struct osc_session *osc_env_session(const struct lu_env *env)
-{
- struct osc_session *ses;
-
- ses = lu_context_key_get(env->le_ses, &osc_session_key);
- LASSERT(ses);
- return ses;
-}
-
-static inline struct osc_io *osc_env_io(const struct lu_env *env)
-{
- return &osc_env_session(env)->os_io;
-}
-
-static inline int osc_is_object(const struct lu_object *obj)
-{
- return obj->lo_dev->ld_type == &osc_device_type;
-}
-
-static inline struct osc_device *lu2osc_dev(const struct lu_device *d)
-{
- LINVRNT(d->ld_type == &osc_device_type);
- return container_of(d, struct osc_device, od_cl.cd_lu_dev);
-}
-
-static inline struct obd_export *osc_export(const struct osc_object *obj)
-{
- return lu2osc_dev(obj->oo_cl.co_lu.lo_dev)->od_exp;
-}
-
-static inline struct client_obd *osc_cli(const struct osc_object *obj)
-{
- return &osc_export(obj)->exp_obd->u.cli;
-}
-
-static inline struct osc_object *cl2osc(const struct cl_object *obj)
-{
- LINVRNT(osc_is_object(&obj->co_lu));
- return container_of(obj, struct osc_object, oo_cl);
-}
-
-static inline struct cl_object *osc2cl(const struct osc_object *obj)
-{
- return (struct cl_object *)&obj->oo_cl;
-}
-
-static inline enum ldlm_mode osc_cl_lock2ldlm(enum cl_lock_mode mode)
-{
- LASSERT(mode == CLM_READ || mode == CLM_WRITE || mode == CLM_GROUP);
- if (mode == CLM_READ)
- return LCK_PR;
- else if (mode == CLM_WRITE)
- return LCK_PW;
- else
- return LCK_GROUP;
-}
-
-static inline enum cl_lock_mode osc_ldlm2cl_lock(enum ldlm_mode mode)
-{
- LASSERT(mode == LCK_PR || mode == LCK_PW || mode == LCK_GROUP);
- if (mode == LCK_PR)
- return CLM_READ;
- else if (mode == LCK_PW)
- return CLM_WRITE;
- else
- return CLM_GROUP;
-}
-
-static inline struct osc_page *cl2osc_page(const struct cl_page_slice *slice)
-{
- LINVRNT(osc_is_object(&slice->cpl_obj->co_lu));
- return container_of(slice, struct osc_page, ops_cl);
-}
-
-static inline struct osc_page *oap2osc(struct osc_async_page *oap)
-{
- return container_of_safe(oap, struct osc_page, ops_oap);
-}
-
-static inline pgoff_t osc_index(struct osc_page *opg)
-{
- return opg->ops_cl.cpl_index;
-}
-
-static inline struct cl_page *oap2cl_page(struct osc_async_page *oap)
-{
- return oap2osc(oap)->ops_cl.cpl_page;
-}
-
-static inline struct osc_page *oap2osc_page(struct osc_async_page *oap)
-{
- return (struct osc_page *)container_of(oap, struct osc_page, ops_oap);
-}
-
-static inline struct osc_page *
-osc_cl_page_osc(struct cl_page *page, struct osc_object *osc)
-{
- const struct cl_page_slice *slice;
-
- LASSERT(osc);
- slice = cl_object_page_slice(&osc->oo_cl, page);
- return cl2osc_page(slice);
-}
-
-static inline struct osc_lock *cl2osc_lock(const struct cl_lock_slice *slice)
-{
- LINVRNT(osc_is_object(&slice->cls_obj->co_lu));
- return container_of(slice, struct osc_lock, ols_cl);
-}
-
-static inline struct osc_lock *osc_lock_at(const struct cl_lock *lock)
-{
- return cl2osc_lock(cl_lock_at(lock, &osc_device_type));
-}
-
-static inline int osc_io_srvlock(struct osc_io *oio)
-{
- return (oio->oi_lockless && !oio->oi_cl.cis_io->ci_no_srvlock);
-}
-
-enum osc_extent_state {
- OES_INV = 0, /** extent is just initialized or destroyed */
- OES_ACTIVE = 1, /** process is using this extent */
- OES_CACHE = 2, /** extent is ready for IO */
- OES_LOCKING = 3, /** locking page to prepare IO */
- OES_LOCK_DONE = 4, /** locking finished, ready to send */
- OES_RPC = 5, /** in RPC */
- OES_TRUNC = 6, /** being truncated */
- OES_STATE_MAX
-};
-
-/**
- * osc_extent data to manage dirty pages.
- * osc_extent has the following attributes:
- * 1. all pages in the same must be in one RPC in write back;
- * 2. # of pages must be less than max_pages_per_rpc - implied by 1;
- * 3. must be covered by only 1 osc_lock;
- * 4. exclusive. It's impossible to have overlapped osc_extent.
- *
- * The lifetime of an extent is from when the 1st page is dirtied to when
- * all pages inside it are written out.
- *
- * LOCKING ORDER
- * =============
- * page lock -> cl_loi_list_lock -> object lock(osc_object::oo_lock)
- */
-struct osc_extent {
- /** red-black tree node */
- struct rb_node oe_node;
- /** osc_object of this extent */
- struct osc_object *oe_obj;
- /** refcount, removed from red-black tree if reaches zero. */
- atomic_t oe_refc;
- /** busy if non-zero */
- atomic_t oe_users;
- /** link list of osc_object's oo_{hp|urgent|locking}_exts. */
- struct list_head oe_link;
- /** state of this extent */
- enum osc_extent_state oe_state;
- /** flags for this extent. */
- unsigned int oe_intree:1,
- /** 0 is write, 1 is read */
- oe_rw:1,
- /** sync extent, queued by osc_queue_sync_pages() */
- oe_sync:1,
- /** set if this extent has partial, sync pages.
- * Extents with partial page(s) can't merge with others in RPC
- */
- oe_no_merge:1,
- oe_srvlock:1,
- oe_memalloc:1,
- /** an ACTIVE extent is going to be truncated, so when this extent
- * is released, it will turn into TRUNC state instead of CACHE.
- */
- oe_trunc_pending:1,
- /** this extent should be written asap and someone may wait for the
- * write to finish. This bit is usually set along with urgent if
- * the extent was CACHE state.
- * fsync_wait extent can't be merged because new extent region may
- * exceed fsync range.
- */
- oe_fsync_wait:1,
- /** covering lock is being canceled */
- oe_hp:1,
- /** this extent should be written back asap. set if one of pages is
- * called by page WB daemon, or sync write or reading requests.
- */
- oe_urgent:1;
- /** how many grants allocated for this extent.
- * Grant allocated for this extent. There is no grant allocated
- * for reading extents and sync write extents.
- */
- unsigned int oe_grants;
- /** # of dirty pages in this extent */
- unsigned int oe_nr_pages;
- /** list of pending oap pages. Pages in this list are NOT sorted. */
- struct list_head oe_pages;
- /** Since an extent has to be written out in atomic, this is used to
- * remember the next page need to be locked to write this extent out.
- * Not used right now.
- */
- struct osc_page *oe_next_page;
- /** start and end index of this extent, include start and end
- * themselves. Page offset here is the page index of osc_pages.
- * oe_start is used as keyword for red-black tree.
- */
- pgoff_t oe_start;
- pgoff_t oe_end;
- /** maximum ending index of this extent, this is limited by
- * max_pages_per_rpc, lock extent and chunk size.
- */
- pgoff_t oe_max_end;
- /** waitqueue - for those who want to be notified if this extent's
- * state has changed.
- */
- wait_queue_head_t oe_waitq;
- /** lock covering this extent */
- struct ldlm_lock *oe_dlmlock;
- /** terminator of this extent. Must be true if this extent is in IO. */
- struct task_struct *oe_owner;
- /** return value of writeback. If somebody is waiting for this extent,
- * this value can be known by outside world.
- */
- int oe_rc;
- /** max pages per rpc when this extent was created */
- unsigned int oe_mppr;
-};
-
-int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
- int sent, int rc);
-void osc_extent_release(const struct lu_env *env, struct osc_extent *ext);
-
-int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc,
- pgoff_t start, pgoff_t end, enum cl_lock_mode mode);
-
-typedef int (*osc_page_gang_cbt)(const struct lu_env *, struct cl_io *,
- struct osc_page *, void *);
-int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io,
- struct osc_object *osc, pgoff_t start, pgoff_t end,
- osc_page_gang_cbt cb, void *cbdata);
-/** @} osc */
-
-#endif /* OSC_CL_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/osc/osc_dev.c b/drivers/staging/lustre/lustre/osc/osc_dev.c
deleted file mode 100644
index 2b5f324743e2..000000000000
--- a/drivers/staging/lustre/lustre/osc/osc_dev.c
+++ /dev/null
@@ -1,246 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_device, for OSC layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_OSC
-
-/* class_name2obd() */
-#include <obd_class.h>
-
-#include "osc_cl_internal.h"
-
-/** \addtogroup osc
- * @{
- */
-
-struct kmem_cache *osc_lock_kmem;
-struct kmem_cache *osc_object_kmem;
-struct kmem_cache *osc_thread_kmem;
-struct kmem_cache *osc_session_kmem;
-struct kmem_cache *osc_extent_kmem;
-struct kmem_cache *osc_quota_kmem;
-
-struct lu_kmem_descr osc_caches[] = {
- {
- .ckd_cache = &osc_lock_kmem,
- .ckd_name = "osc_lock_kmem",
- .ckd_size = sizeof(struct osc_lock)
- },
- {
- .ckd_cache = &osc_object_kmem,
- .ckd_name = "osc_object_kmem",
- .ckd_size = sizeof(struct osc_object)
- },
- {
- .ckd_cache = &osc_thread_kmem,
- .ckd_name = "osc_thread_kmem",
- .ckd_size = sizeof(struct osc_thread_info)
- },
- {
- .ckd_cache = &osc_session_kmem,
- .ckd_name = "osc_session_kmem",
- .ckd_size = sizeof(struct osc_session)
- },
- {
- .ckd_cache = &osc_extent_kmem,
- .ckd_name = "osc_extent_kmem",
- .ckd_size = sizeof(struct osc_extent)
- },
- {
- .ckd_cache = &osc_quota_kmem,
- .ckd_name = "osc_quota_kmem",
- .ckd_size = sizeof(struct osc_quota_info)
- },
- {
- .ckd_cache = NULL
- }
-};
-
-/*****************************************************************************
- *
- * Type conversions.
- *
- */
-
-static struct lu_device *osc2lu_dev(struct osc_device *osc)
-{
- return &osc->od_cl.cd_lu_dev;
-}
-
-/*****************************************************************************
- *
- * Osc device and device type functions.
- *
- */
-
-static void *osc_key_init(const struct lu_context *ctx,
- struct lu_context_key *key)
-{
- struct osc_thread_info *info;
-
- info = kmem_cache_zalloc(osc_thread_kmem, GFP_NOFS);
- if (!info)
- info = ERR_PTR(-ENOMEM);
- return info;
-}
-
-static void osc_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
-{
- struct osc_thread_info *info = data;
-
- kmem_cache_free(osc_thread_kmem, info);
-}
-
-struct lu_context_key osc_key = {
- .lct_tags = LCT_CL_THREAD,
- .lct_init = osc_key_init,
- .lct_fini = osc_key_fini
-};
-
-static void *osc_session_init(const struct lu_context *ctx,
- struct lu_context_key *key)
-{
- struct osc_session *info;
-
- info = kmem_cache_zalloc(osc_session_kmem, GFP_NOFS);
- if (!info)
- info = ERR_PTR(-ENOMEM);
- return info;
-}
-
-static void osc_session_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
-{
- struct osc_session *info = data;
-
- kmem_cache_free(osc_session_kmem, info);
-}
-
-struct lu_context_key osc_session_key = {
- .lct_tags = LCT_SESSION,
- .lct_init = osc_session_init,
- .lct_fini = osc_session_fini
-};
-
-/* type constructor/destructor: osc_type_{init,fini,start,stop}(). */
-LU_TYPE_INIT_FINI(osc, &osc_key, &osc_session_key);
-
-static int osc_cl_process_config(const struct lu_env *env,
- struct lu_device *d, struct lustre_cfg *cfg)
-{
- return osc_process_config_base(d->ld_obd, cfg);
-}
-
-static const struct lu_device_operations osc_lu_ops = {
- .ldo_object_alloc = osc_object_alloc,
- .ldo_process_config = osc_cl_process_config,
- .ldo_recovery_complete = NULL
-};
-
-static int osc_device_init(const struct lu_env *env, struct lu_device *d,
- const char *name, struct lu_device *next)
-{
- return 0;
-}
-
-static struct lu_device *osc_device_fini(const struct lu_env *env,
- struct lu_device *d)
-{
- return NULL;
-}
-
-static struct lu_device *osc_device_free(const struct lu_env *env,
- struct lu_device *d)
-{
- struct osc_device *od = lu2osc_dev(d);
-
- cl_device_fini(lu2cl_dev(d));
- kfree(od);
- return NULL;
-}
-
-static struct lu_device *osc_device_alloc(const struct lu_env *env,
- struct lu_device_type *t,
- struct lustre_cfg *cfg)
-{
- struct lu_device *d;
- struct osc_device *od;
- struct obd_device *obd;
- int rc;
-
- od = kzalloc(sizeof(*od), GFP_NOFS);
- if (!od)
- return ERR_PTR(-ENOMEM);
-
- cl_device_init(&od->od_cl, t);
- d = osc2lu_dev(od);
- d->ld_ops = &osc_lu_ops;
-
- /* Setup OSC OBD */
- obd = class_name2obd(lustre_cfg_string(cfg, 0));
- LASSERT(obd);
- rc = osc_setup(obd, cfg);
- if (rc) {
- osc_device_free(env, d);
- return ERR_PTR(rc);
- }
- od->od_exp = obd->obd_self_export;
- return d;
-}
-
-static const struct lu_device_type_operations osc_device_type_ops = {
- .ldto_init = osc_type_init,
- .ldto_fini = osc_type_fini,
-
- .ldto_start = osc_type_start,
- .ldto_stop = osc_type_stop,
-
- .ldto_device_alloc = osc_device_alloc,
- .ldto_device_free = osc_device_free,
-
- .ldto_device_init = osc_device_init,
- .ldto_device_fini = osc_device_fini
-};
-
-struct lu_device_type osc_device_type = {
- .ldt_tags = LU_DEVICE_CL,
- .ldt_name = LUSTRE_OSC_NAME,
- .ldt_ops = &osc_device_type_ops,
- .ldt_ctx_tags = LCT_CL_THREAD
-};
-
-/** @} osc */
diff --git a/drivers/staging/lustre/lustre/osc/osc_internal.h b/drivers/staging/lustre/lustre/osc/osc_internal.h
deleted file mode 100644
index 4ddba1354bef..000000000000
--- a/drivers/staging/lustre/lustre/osc/osc_internal.h
+++ /dev/null
@@ -1,237 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef OSC_INTERNAL_H
-#define OSC_INTERNAL_H
-
-#define OAP_MAGIC 8675309
-
-extern atomic_t osc_pool_req_count;
-extern unsigned int osc_reqpool_maxreqcount;
-extern struct ptlrpc_request_pool *osc_rq_pool;
-
-struct lu_env;
-
-enum async_flags {
- ASYNC_READY = 0x1, /* ap_make_ready will not be called before this
- * page is added to an rpc
- */
- ASYNC_URGENT = 0x2, /* page must be put into an RPC before return */
- ASYNC_COUNT_STABLE = 0x4, /* ap_refresh_count will not be called
- * to give the caller a chance to update
- * or cancel the size of the io
- */
- ASYNC_HP = 0x10,
-};
-
-struct osc_async_page {
- int oap_magic;
- unsigned short oap_cmd;
- unsigned short oap_interrupted:1;
-
- struct list_head oap_pending_item;
- struct list_head oap_rpc_item;
-
- u64 oap_obj_off;
- unsigned int oap_page_off;
- enum async_flags oap_async_flags;
-
- struct brw_page oap_brw_page;
-
- struct ptlrpc_request *oap_request;
- struct client_obd *oap_cli;
- struct osc_object *oap_obj;
-
- spinlock_t oap_lock;
-};
-
-#define oap_page oap_brw_page.pg
-#define oap_count oap_brw_page.count
-#define oap_brw_flags oap_brw_page.flag
-
-static inline struct osc_async_page *brw_page2oap(struct brw_page *pga)
-{
- return (struct osc_async_page *)container_of(pga, struct osc_async_page,
- oap_brw_page);
-}
-
-struct osc_cache_waiter {
- struct list_head ocw_entry;
- wait_queue_head_t ocw_waitq;
- struct osc_async_page *ocw_oap;
- int ocw_grant;
- int ocw_rc;
-};
-
-void osc_wake_cache_waiters(struct client_obd *cli);
-int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes);
-void osc_update_next_shrink(struct client_obd *cli);
-
-/*
- * cl integration.
- */
-#include <cl_object.h>
-
-extern struct ptlrpc_request_set *PTLRPCD_SET;
-
-typedef int (*osc_enqueue_upcall_f)(void *cookie, struct lustre_handle *lockh,
- int rc);
-
-int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
- __u64 *flags, union ldlm_policy_data *policy,
- struct ost_lvb *lvb, int kms_valid,
- osc_enqueue_upcall_f upcall,
- void *cookie, struct ldlm_enqueue_info *einfo,
- struct ptlrpc_request_set *rqset, int async, int agl);
-
-int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id,
- enum ldlm_type type, union ldlm_policy_data *policy,
- enum ldlm_mode mode, __u64 *flags, void *data,
- struct lustre_handle *lockh, int unref);
-
-int osc_setattr_async(struct obd_export *exp, struct obdo *oa,
- obd_enqueue_update_f upcall, void *cookie,
- struct ptlrpc_request_set *rqset);
-int osc_punch_base(struct obd_export *exp, struct obdo *oa,
- obd_enqueue_update_f upcall, void *cookie,
- struct ptlrpc_request_set *rqset);
-int osc_sync_base(struct osc_object *exp, struct obdo *oa,
- obd_enqueue_update_f upcall, void *cookie,
- struct ptlrpc_request_set *rqset);
-
-int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *cfg);
-int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
- struct list_head *ext_list, int cmd);
-long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
- long target, bool force);
-unsigned long osc_lru_reserve(struct client_obd *cli, unsigned long npages);
-void osc_lru_unreserve(struct client_obd *cli, unsigned long npages);
-
-unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock);
-
-int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg);
-
-void lproc_osc_attach_seqstat(struct obd_device *dev);
-void lprocfs_osc_init_vars(struct lprocfs_static_vars *lvars);
-
-extern struct lu_device_type osc_device_type;
-
-static inline int osc_recoverable_error(int rc)
-{
- return (rc == -EIO || rc == -EROFS || rc == -ENOMEM ||
- rc == -EAGAIN || rc == -EINPROGRESS);
-}
-
-static inline unsigned long rpcs_in_flight(struct client_obd *cli)
-{
- return cli->cl_r_in_flight + cli->cl_w_in_flight;
-}
-
-static inline char *cli_name(struct client_obd *cli)
-{
- return cli->cl_import->imp_obd->obd_name;
-}
-
-struct osc_device {
- struct cl_device od_cl;
- struct obd_export *od_exp;
-
- /* Write stats is actually protected by client_obd's lock. */
- struct osc_stats {
- u64 os_lockless_writes; /* by bytes */
- u64 os_lockless_reads; /* by bytes */
- u64 os_lockless_truncates; /* by times */
- } od_stats;
-
- /* configuration item(s) */
- int od_contention_time;
- int od_lockless_truncate;
-};
-
-static inline struct osc_device *obd2osc_dev(const struct obd_device *d)
-{
- return container_of_safe(d->obd_lu_dev, struct osc_device, od_cl.cd_lu_dev);
-}
-
-extern struct lu_kmem_descr osc_caches[];
-
-extern struct kmem_cache *osc_quota_kmem;
-struct osc_quota_info {
- /** linkage for quota hash table */
- struct rhash_head oqi_hash;
- u32 oqi_id;
- struct rcu_head rcu;
-};
-
-int osc_quota_setup(struct obd_device *obd);
-int osc_quota_cleanup(struct obd_device *obd);
-int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[],
- u32 valid, u32 flags);
-int osc_quota_chkdq(struct client_obd *cli, const unsigned int qid[]);
-int osc_quotactl(struct obd_device *unused, struct obd_export *exp,
- struct obd_quotactl *oqctl);
-void osc_inc_unstable_pages(struct ptlrpc_request *req);
-void osc_dec_unstable_pages(struct ptlrpc_request *req);
-bool osc_over_unstable_soft_limit(struct client_obd *cli);
-
-/**
- * Bit flags for osc_dlm_lock_at_pageoff().
- */
-enum osc_dap_flags {
- /**
- * Just check if the desired lock exists, it won't hold reference
- * count on lock.
- */
- OSC_DAP_FL_TEST_LOCK = BIT(0),
- /**
- * Return the lock even if it is being canceled.
- */
- OSC_DAP_FL_CANCELING = BIT(1),
-};
-
-struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env,
- struct osc_object *obj, pgoff_t index,
- enum osc_dap_flags flags);
-
-int osc_object_invalidate(const struct lu_env *env, struct osc_object *osc);
-
-/** osc shrink list to link all osc client obd */
-extern struct list_head osc_shrink_list;
-/** spin lock to protect osc_shrink_list */
-extern spinlock_t osc_shrink_lock;
-unsigned long osc_cache_shrink_count(struct shrinker *sk,
- struct shrink_control *sc);
-unsigned long osc_cache_shrink_scan(struct shrinker *sk,
- struct shrink_control *sc);
-
-#endif /* OSC_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/osc/osc_io.c b/drivers/staging/lustre/lustre/osc/osc_io.c
deleted file mode 100644
index 67734a8ed331..000000000000
--- a/drivers/staging/lustre/lustre/osc/osc_io.c
+++ /dev/null
@@ -1,918 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_io for OSC layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
- */
-
-#define DEBUG_SUBSYSTEM S_OSC
-
-#include <lustre_obdo.h>
-
-#include "osc_cl_internal.h"
-
-/** \addtogroup osc
- * @{
- */
-
-/*****************************************************************************
- *
- * Type conversions.
- *
- */
-
-static struct osc_io *cl2osc_io(const struct lu_env *env,
- const struct cl_io_slice *slice)
-{
- struct osc_io *oio = container_of_safe(slice, struct osc_io, oi_cl);
-
- LINVRNT(oio == osc_env_io(env));
- return oio;
-}
-
-/*****************************************************************************
- *
- * io operations.
- *
- */
-
-static void osc_io_fini(const struct lu_env *env, const struct cl_io_slice *io)
-{
-}
-
-static void osc_read_ahead_release(const struct lu_env *env, void *cbdata)
-{
- struct ldlm_lock *dlmlock = cbdata;
- struct lustre_handle lockh;
-
- ldlm_lock2handle(dlmlock, &lockh);
- ldlm_lock_decref(&lockh, LCK_PR);
- LDLM_LOCK_PUT(dlmlock);
-}
-
-static int osc_io_read_ahead(const struct lu_env *env,
- const struct cl_io_slice *ios,
- pgoff_t start, struct cl_read_ahead *ra)
-{
- struct osc_object *osc = cl2osc(ios->cis_obj);
- struct ldlm_lock *dlmlock;
- int result = -ENODATA;
-
- dlmlock = osc_dlmlock_at_pgoff(env, osc, start, 0);
- if (dlmlock) {
- LASSERT(dlmlock->l_ast_data == osc);
- if (dlmlock->l_req_mode != LCK_PR) {
- struct lustre_handle lockh;
-
- ldlm_lock2handle(dlmlock, &lockh);
- ldlm_lock_addref(&lockh, LCK_PR);
- ldlm_lock_decref(&lockh, dlmlock->l_req_mode);
- }
-
- ra->cra_rpc_size = osc_cli(osc)->cl_max_pages_per_rpc;
- ra->cra_end = cl_index(osc2cl(osc),
- dlmlock->l_policy_data.l_extent.end);
- ra->cra_release = osc_read_ahead_release;
- ra->cra_cbdata = dlmlock;
- result = 0;
- }
-
- return result;
-}
-
-/**
- * An implementation of cl_io_operations::cio_io_submit() method for osc
- * layer. Iterates over pages in the in-queue, prepares each for io by calling
- * cl_page_prep() and then either submits them through osc_io_submit_page()
- * or, if page is already submitted, changes osc flags through
- * osc_set_async_flags().
- */
-static int osc_io_submit(const struct lu_env *env,
- const struct cl_io_slice *ios,
- enum cl_req_type crt, struct cl_2queue *queue)
-{
- struct cl_page *page;
- struct cl_page *tmp;
- struct client_obd *cli = NULL;
- struct osc_object *osc = NULL; /* to keep gcc happy */
- struct osc_page *opg;
- struct cl_io *io;
- LIST_HEAD(list);
-
- struct cl_page_list *qin = &queue->c2_qin;
- struct cl_page_list *qout = &queue->c2_qout;
- unsigned int queued = 0;
- int result = 0;
- int cmd;
- int brw_flags;
- unsigned int max_pages;
-
- LASSERT(qin->pl_nr > 0);
-
- CDEBUG(D_CACHE | D_READA, "%d %d\n", qin->pl_nr, crt);
-
- osc = cl2osc(ios->cis_obj);
- cli = osc_cli(osc);
- max_pages = cli->cl_max_pages_per_rpc;
-
- cmd = crt == CRT_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ;
- brw_flags = osc_io_srvlock(cl2osc_io(env, ios)) ? OBD_BRW_SRVLOCK : 0;
-
- /*
- * NOTE: here @page is a top-level page. This is done to avoid
- * creation of sub-page-list.
- */
- cl_page_list_for_each_safe(page, tmp, qin) {
- struct osc_async_page *oap;
-
- /* Top level IO. */
- io = page->cp_owner;
- LASSERT(io);
-
- opg = osc_cl_page_osc(page, osc);
- oap = &opg->ops_oap;
- LASSERT(osc == oap->oap_obj);
-
- if (!list_empty(&oap->oap_pending_item) ||
- !list_empty(&oap->oap_rpc_item)) {
- CDEBUG(D_CACHE, "Busy oap %p page %p for submit.\n",
- oap, opg);
- result = -EBUSY;
- break;
- }
-
- result = cl_page_prep(env, io, page, crt);
- if (result != 0) {
- LASSERT(result < 0);
- if (result != -EALREADY)
- break;
- /*
- * Handle -EALREADY error: for read case, the page is
- * already in UPTODATE state; for write, the page
- * is not dirty.
- */
- result = 0;
- continue;
- }
-
- spin_lock(&oap->oap_lock);
- oap->oap_async_flags = ASYNC_URGENT | ASYNC_READY;
- oap->oap_async_flags |= ASYNC_COUNT_STABLE;
- spin_unlock(&oap->oap_lock);
-
- osc_page_submit(env, opg, crt, brw_flags);
- list_add_tail(&oap->oap_pending_item, &list);
-
- if (page->cp_sync_io)
- cl_page_list_move(qout, qin, page);
- else /* async IO */
- cl_page_list_del(env, qin, page);
-
- if (++queued == max_pages) {
- queued = 0;
- result = osc_queue_sync_pages(env, osc, &list, cmd,
- brw_flags);
- if (result < 0)
- break;
- }
- }
-
- if (queued > 0)
- result = osc_queue_sync_pages(env, osc, &list, cmd, brw_flags);
-
- /* Update c/mtime for sync write. LU-7310 */
- if (qout->pl_nr > 0 && !result) {
- struct cl_attr *attr = &osc_env_info(env)->oti_attr;
- struct cl_object *obj = ios->cis_obj;
-
- cl_object_attr_lock(obj);
- attr->cat_mtime = ktime_get_real_seconds();
- attr->cat_ctime = attr->cat_mtime;
- cl_object_attr_update(env, obj, attr, CAT_MTIME | CAT_CTIME);
- cl_object_attr_unlock(obj);
- }
-
- CDEBUG(D_INFO, "%d/%d %d\n", qin->pl_nr, qout->pl_nr, result);
- return qout->pl_nr > 0 ? 0 : result;
-}
-
-/**
- * This is called when a page is accessed within file in a way that creates
- * new page, if one were missing (i.e., if there were a hole at that place in
- * the file, or accessed page is beyond the current file size).
- *
- * Expand stripe KMS if necessary.
- */
-static void osc_page_touch_at(const struct lu_env *env,
- struct cl_object *obj, pgoff_t idx, size_t to)
-{
- struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo;
- struct cl_attr *attr = &osc_env_info(env)->oti_attr;
- int valid;
- __u64 kms;
-
- /* offset within stripe */
- kms = cl_offset(obj, idx) + to;
-
- cl_object_attr_lock(obj);
- /*
- * XXX old code used
- *
- * ll_inode_size_lock(inode, 0); lov_stripe_lock(lsm);
- *
- * here
- */
- CDEBUG(D_INODE, "stripe KMS %sincreasing %llu->%llu %llu\n",
- kms > loi->loi_kms ? "" : "not ", loi->loi_kms, kms,
- loi->loi_lvb.lvb_size);
-
- attr->cat_ctime = ktime_get_real_seconds();
- attr->cat_mtime = attr->cat_ctime;
- valid = CAT_MTIME | CAT_CTIME;
- if (kms > loi->loi_kms) {
- attr->cat_kms = kms;
- valid |= CAT_KMS;
- }
- if (kms > loi->loi_lvb.lvb_size) {
- attr->cat_size = kms;
- valid |= CAT_SIZE;
- }
- cl_object_attr_update(env, obj, attr, valid);
- cl_object_attr_unlock(obj);
-}
-
-static int osc_io_commit_async(const struct lu_env *env,
- const struct cl_io_slice *ios,
- struct cl_page_list *qin, int from, int to,
- cl_commit_cbt cb)
-{
- struct cl_io *io = ios->cis_io;
- struct osc_io *oio = cl2osc_io(env, ios);
- struct osc_object *osc = cl2osc(ios->cis_obj);
- struct cl_page *page;
- struct cl_page *last_page;
- struct osc_page *opg;
- int result = 0;
-
- LASSERT(qin->pl_nr > 0);
-
- /* Handle partial page cases */
- last_page = cl_page_list_last(qin);
- if (oio->oi_lockless) {
- page = cl_page_list_first(qin);
- if (page == last_page) {
- cl_page_clip(env, page, from, to);
- } else {
- if (from != 0)
- cl_page_clip(env, page, from, PAGE_SIZE);
- if (to != PAGE_SIZE)
- cl_page_clip(env, last_page, 0, to);
- }
- }
-
- while (qin->pl_nr > 0) {
- struct osc_async_page *oap;
-
- page = cl_page_list_first(qin);
- opg = osc_cl_page_osc(page, osc);
- oap = &opg->ops_oap;
-
- if (!list_empty(&oap->oap_rpc_item)) {
- CDEBUG(D_CACHE, "Busy oap %p page %p for submit.\n",
- oap, opg);
- result = -EBUSY;
- break;
- }
-
- /* The page may be already in dirty cache. */
- if (list_empty(&oap->oap_pending_item)) {
- result = osc_page_cache_add(env, &opg->ops_cl, io);
- if (result != 0)
- break;
- }
-
- osc_page_touch_at(env, osc2cl(osc), osc_index(opg),
- page == last_page ? to : PAGE_SIZE);
-
- cl_page_list_del(env, qin, page);
-
- (*cb)(env, io, page);
- /* Can't access page any more. Page can be in transfer and
- * complete at any time.
- */
- }
-
- /* for sync write, kernel will wait for this page to be flushed before
- * osc_io_end() is called, so release it earlier.
- * for mkwrite(), it's known there is no further pages.
- */
- if (cl_io_is_sync_write(io) && oio->oi_active) {
- osc_extent_release(env, oio->oi_active);
- oio->oi_active = NULL;
- }
-
- CDEBUG(D_INFO, "%d %d\n", qin->pl_nr, result);
- return result;
-}
-
-static int osc_io_iter_init(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct osc_object *osc = cl2osc(ios->cis_obj);
- struct obd_import *imp = osc_cli(osc)->cl_import;
- int rc = -EIO;
-
- spin_lock(&imp->imp_lock);
- if (likely(!imp->imp_invalid)) {
- struct osc_io *oio = osc_env_io(env);
-
- atomic_inc(&osc->oo_nr_ios);
- oio->oi_is_active = 1;
- rc = 0;
- }
- spin_unlock(&imp->imp_lock);
-
- return rc;
-}
-
-static int osc_io_write_iter_init(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- struct osc_io *oio = osc_env_io(env);
- struct osc_object *osc = cl2osc(ios->cis_obj);
- unsigned long npages;
-
- if (cl_io_is_append(io))
- return osc_io_iter_init(env, ios);
-
- npages = io->u.ci_rw.crw_count >> PAGE_SHIFT;
- if (io->u.ci_rw.crw_pos & ~PAGE_MASK)
- ++npages;
-
- oio->oi_lru_reserved = osc_lru_reserve(osc_cli(osc), npages);
-
- return osc_io_iter_init(env, ios);
-}
-
-static void osc_io_iter_fini(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct osc_io *oio = osc_env_io(env);
-
- if (oio->oi_is_active) {
- struct osc_object *osc = cl2osc(ios->cis_obj);
-
- oio->oi_is_active = 0;
- LASSERT(atomic_read(&osc->oo_nr_ios) > 0);
- if (atomic_dec_and_test(&osc->oo_nr_ios))
- wake_up_all(&osc->oo_io_waitq);
- }
-}
-
-static void osc_io_write_iter_fini(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct osc_io *oio = osc_env_io(env);
- struct osc_object *osc = cl2osc(ios->cis_obj);
-
- if (oio->oi_lru_reserved > 0) {
- osc_lru_unreserve(osc_cli(osc), oio->oi_lru_reserved);
- oio->oi_lru_reserved = 0;
- }
- oio->oi_write_osclock = NULL;
-
- osc_io_iter_fini(env, ios);
-}
-
-static int osc_io_fault_start(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io;
- struct cl_fault_io *fio;
-
- io = ios->cis_io;
- fio = &io->u.ci_fault;
- CDEBUG(D_INFO, "%lu %d %zu\n",
- fio->ft_index, fio->ft_writable, fio->ft_nob);
- /*
- * If mapping is writeable, adjust kms to cover this page,
- * but do not extend kms beyond actual file size.
- * See bug 10919.
- */
- if (fio->ft_writable)
- osc_page_touch_at(env, ios->cis_obj,
- fio->ft_index, fio->ft_nob);
- return 0;
-}
-
-static int osc_async_upcall(void *a, int rc)
-{
- struct osc_async_cbargs *args = a;
-
- args->opc_rc = rc;
- complete(&args->opc_sync);
- return 0;
-}
-
-/**
- * Checks that there are no pages being written in the extent being truncated.
- */
-static int trunc_check_cb(const struct lu_env *env, struct cl_io *io,
- struct osc_page *ops, void *cbdata)
-{
- struct cl_page *page = ops->ops_cl.cpl_page;
- struct osc_async_page *oap;
- __u64 start = *(__u64 *)cbdata;
-
- oap = &ops->ops_oap;
- if (oap->oap_cmd & OBD_BRW_WRITE &&
- !list_empty(&oap->oap_pending_item))
- CL_PAGE_DEBUG(D_ERROR, env, page, "exists %llu/%s.\n",
- start, current->comm);
-
- if (PageLocked(page->cp_vmpage))
- CDEBUG(D_CACHE, "page %p index %lu locked for %d.\n",
- ops, osc_index(ops), oap->oap_cmd & OBD_BRW_RWMASK);
-
- return CLP_GANG_OKAY;
-}
-
-static void osc_trunc_check(const struct lu_env *env, struct cl_io *io,
- struct osc_io *oio, __u64 size)
-{
- struct cl_object *clob;
- int partial;
- pgoff_t start;
-
- clob = oio->oi_cl.cis_obj;
- start = cl_index(clob, size);
- partial = cl_offset(clob, start) < size;
-
- /*
- * Complain if there are pages in the truncated region.
- */
- osc_page_gang_lookup(env, io, cl2osc(clob),
- start + partial, CL_PAGE_EOF,
- trunc_check_cb, (void *)&size);
-}
-
-static int osc_io_setattr_start(const struct lu_env *env,
- const struct cl_io_slice *slice)
-{
- struct cl_io *io = slice->cis_io;
- struct osc_io *oio = cl2osc_io(env, slice);
- struct cl_object *obj = slice->cis_obj;
- struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo;
- struct cl_attr *attr = &osc_env_info(env)->oti_attr;
- struct obdo *oa = &oio->oi_oa;
- struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
- __u64 size = io->u.ci_setattr.sa_attr.lvb_size;
- unsigned int ia_valid = io->u.ci_setattr.sa_valid;
- int result = 0;
-
- /* truncate cache dirty pages first */
- if (cl_io_is_trunc(io))
- result = osc_cache_truncate_start(env, cl2osc(obj), size,
- &oio->oi_trunc);
-
- if (result == 0 && oio->oi_lockless == 0) {
- cl_object_attr_lock(obj);
- result = cl_object_attr_get(env, obj, attr);
- if (result == 0) {
- struct ost_lvb *lvb = &io->u.ci_setattr.sa_attr;
- unsigned int cl_valid = 0;
-
- if (ia_valid & ATTR_SIZE) {
- attr->cat_size = size;
- attr->cat_kms = size;
- cl_valid = CAT_SIZE | CAT_KMS;
- }
- if (ia_valid & ATTR_MTIME_SET) {
- attr->cat_mtime = lvb->lvb_mtime;
- cl_valid |= CAT_MTIME;
- }
- if (ia_valid & ATTR_ATIME_SET) {
- attr->cat_atime = lvb->lvb_atime;
- cl_valid |= CAT_ATIME;
- }
- if (ia_valid & ATTR_CTIME_SET) {
- attr->cat_ctime = lvb->lvb_ctime;
- cl_valid |= CAT_CTIME;
- }
- result = cl_object_attr_update(env, obj, attr,
- cl_valid);
- }
- cl_object_attr_unlock(obj);
- }
- memset(oa, 0, sizeof(*oa));
- if (result == 0) {
- oa->o_oi = loi->loi_oi;
- obdo_set_parent_fid(oa, io->u.ci_setattr.sa_parent_fid);
- oa->o_stripe_idx = io->u.ci_setattr.sa_stripe_index;
- oa->o_valid |= OBD_MD_FLID | OBD_MD_FLGROUP;
- if (ia_valid & ATTR_CTIME) {
- oa->o_valid |= OBD_MD_FLCTIME;
- oa->o_ctime = attr->cat_ctime;
- }
- if (ia_valid & ATTR_ATIME) {
- oa->o_valid |= OBD_MD_FLATIME;
- oa->o_atime = attr->cat_atime;
- }
- if (ia_valid & ATTR_MTIME) {
- oa->o_valid |= OBD_MD_FLMTIME;
- oa->o_mtime = attr->cat_mtime;
- }
- if (ia_valid & ATTR_SIZE) {
- oa->o_size = size;
- oa->o_blocks = OBD_OBJECT_EOF;
- oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
-
- if (oio->oi_lockless) {
- oa->o_flags = OBD_FL_SRVLOCK;
- oa->o_valid |= OBD_MD_FLFLAGS;
- }
- } else {
- LASSERT(oio->oi_lockless == 0);
- }
- if (ia_valid & ATTR_ATTR_FLAG) {
- oa->o_flags = io->u.ci_setattr.sa_attr_flags;
- oa->o_valid |= OBD_MD_FLFLAGS;
- }
-
- init_completion(&cbargs->opc_sync);
-
- if (ia_valid & ATTR_SIZE)
- result = osc_punch_base(osc_export(cl2osc(obj)),
- oa, osc_async_upcall,
- cbargs, PTLRPCD_SET);
- else
- result = osc_setattr_async(osc_export(cl2osc(obj)),
- oa, osc_async_upcall,
- cbargs, PTLRPCD_SET);
- cbargs->opc_rpc_sent = result == 0;
- }
- return result;
-}
-
-static void osc_io_setattr_end(const struct lu_env *env,
- const struct cl_io_slice *slice)
-{
- struct cl_io *io = slice->cis_io;
- struct osc_io *oio = cl2osc_io(env, slice);
- struct cl_object *obj = slice->cis_obj;
- struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
- int result = 0;
-
- if (cbargs->opc_rpc_sent) {
- wait_for_completion(&cbargs->opc_sync);
- result = cbargs->opc_rc;
- io->ci_result = cbargs->opc_rc;
- }
- if (result == 0) {
- if (oio->oi_lockless) {
- /* lockless truncate */
- struct osc_device *osd = lu2osc_dev(obj->co_lu.lo_dev);
-
- LASSERT(cl_io_is_trunc(io));
- /* XXX: Need a lock. */
- osd->od_stats.os_lockless_truncates++;
- }
- }
-
- if (cl_io_is_trunc(io)) {
- __u64 size = io->u.ci_setattr.sa_attr.lvb_size;
-
- osc_trunc_check(env, io, oio, size);
- osc_cache_truncate_end(env, oio->oi_trunc);
- oio->oi_trunc = NULL;
- }
-}
-
-struct osc_data_version_args {
- struct osc_io *dva_oio;
-};
-
-static int
-osc_data_version_interpret(const struct lu_env *env, struct ptlrpc_request *req,
- void *arg, int rc)
-{
- struct osc_data_version_args *dva = arg;
- struct osc_io *oio = dva->dva_oio;
- const struct ost_body *body;
-
- if (rc < 0)
- goto out;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
- if (!body) {
- rc = -EPROTO;
- goto out;
- }
-
- lustre_get_wire_obdo(&req->rq_import->imp_connect_data, &oio->oi_oa,
- &body->oa);
-out:
- oio->oi_cbarg.opc_rc = rc;
- complete(&oio->oi_cbarg.opc_sync);
-
- return 0;
-}
-
-static int osc_io_data_version_start(const struct lu_env *env,
- const struct cl_io_slice *slice)
-{
- struct cl_data_version_io *dv = &slice->cis_io->u.ci_data_version;
- struct osc_io *oio = cl2osc_io(env, slice);
- struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
- struct osc_object *obj = cl2osc(slice->cis_obj);
- struct obd_export *exp = osc_export(obj);
- struct lov_oinfo *loi = obj->oo_oinfo;
- struct osc_data_version_args *dva;
- struct obdo *oa = &oio->oi_oa;
- struct ptlrpc_request *req;
- struct ost_body *body;
- int rc;
-
- memset(oa, 0, sizeof(*oa));
- oa->o_oi = loi->loi_oi;
- oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
-
- if (dv->dv_flags & (LL_DV_RD_FLUSH | LL_DV_WR_FLUSH)) {
- oa->o_valid |= OBD_MD_FLFLAGS;
- oa->o_flags |= OBD_FL_SRVLOCK;
- if (dv->dv_flags & LL_DV_WR_FLUSH)
- oa->o_flags |= OBD_FL_FLUSH;
- }
-
- init_completion(&cbargs->opc_sync);
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_GETATTR);
- if (!req)
- return -ENOMEM;
-
- rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GETATTR);
- if (rc < 0) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
- lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa);
-
- ptlrpc_request_set_replen(req);
- req->rq_interpret_reply = osc_data_version_interpret;
- BUILD_BUG_ON(sizeof(*dva) > sizeof(req->rq_async_args));
- dva = ptlrpc_req_async_args(req);
- dva->dva_oio = oio;
-
- ptlrpcd_add_req(req);
-
- return 0;
-}
-
-static void osc_io_data_version_end(const struct lu_env *env,
- const struct cl_io_slice *slice)
-{
- struct cl_data_version_io *dv = &slice->cis_io->u.ci_data_version;
- struct osc_io *oio = cl2osc_io(env, slice);
- struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
-
- wait_for_completion(&cbargs->opc_sync);
-
- if (cbargs->opc_rc) {
- slice->cis_io->ci_result = cbargs->opc_rc;
- } else if (!(oio->oi_oa.o_valid & OBD_MD_FLDATAVERSION)) {
- slice->cis_io->ci_result = -EOPNOTSUPP;
- } else {
- dv->dv_data_version = oio->oi_oa.o_data_version;
- slice->cis_io->ci_result = 0;
- }
-}
-
-static int osc_io_read_start(const struct lu_env *env,
- const struct cl_io_slice *slice)
-{
- struct cl_object *obj = slice->cis_obj;
- struct cl_attr *attr = &osc_env_info(env)->oti_attr;
- int rc = 0;
-
- if (!slice->cis_io->ci_noatime) {
- cl_object_attr_lock(obj);
- attr->cat_atime = ktime_get_real_seconds();
- rc = cl_object_attr_update(env, obj, attr, CAT_ATIME);
- cl_object_attr_unlock(obj);
- }
- return rc;
-}
-
-static int osc_io_write_start(const struct lu_env *env,
- const struct cl_io_slice *slice)
-{
- struct cl_object *obj = slice->cis_obj;
- struct cl_attr *attr = &osc_env_info(env)->oti_attr;
- int rc = 0;
-
- OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_DELAY_SETTIME, 1);
- cl_object_attr_lock(obj);
- attr->cat_ctime = ktime_get_real_seconds();
- attr->cat_mtime = attr->cat_ctime;
- rc = cl_object_attr_update(env, obj, attr, CAT_MTIME | CAT_CTIME);
- cl_object_attr_unlock(obj);
-
- return rc;
-}
-
-static int osc_fsync_ost(const struct lu_env *env, struct osc_object *obj,
- struct cl_fsync_io *fio)
-{
- struct osc_io *oio = osc_env_io(env);
- struct obdo *oa = &oio->oi_oa;
- struct lov_oinfo *loi = obj->oo_oinfo;
- struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
- int rc = 0;
-
- memset(oa, 0, sizeof(*oa));
- oa->o_oi = loi->loi_oi;
- oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
-
- /* reload size abd blocks for start and end of sync range */
- oa->o_size = fio->fi_start;
- oa->o_blocks = fio->fi_end;
- oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
-
- obdo_set_parent_fid(oa, fio->fi_fid);
-
- init_completion(&cbargs->opc_sync);
-
- rc = osc_sync_base(obj, oa, osc_async_upcall, cbargs, PTLRPCD_SET);
- return rc;
-}
-
-static int osc_io_fsync_start(const struct lu_env *env,
- const struct cl_io_slice *slice)
-{
- struct cl_io *io = slice->cis_io;
- struct cl_fsync_io *fio = &io->u.ci_fsync;
- struct cl_object *obj = slice->cis_obj;
- struct osc_object *osc = cl2osc(obj);
- pgoff_t start = cl_index(obj, fio->fi_start);
- pgoff_t end = cl_index(obj, fio->fi_end);
- int result = 0;
-
- if (fio->fi_end == OBD_OBJECT_EOF)
- end = CL_PAGE_EOF;
-
- result = osc_cache_writeback_range(env, osc, start, end, 0,
- fio->fi_mode == CL_FSYNC_DISCARD);
- if (result > 0) {
- fio->fi_nr_written += result;
- result = 0;
- }
- if (fio->fi_mode == CL_FSYNC_ALL) {
- int rc;
-
- /* we have to wait for writeback to finish before we can
- * send OST_SYNC RPC. This is bad because it causes extents
- * to be written osc by osc. However, we usually start
- * writeback before CL_FSYNC_ALL so this won't have any real
- * problem.
- */
- rc = osc_cache_wait_range(env, osc, start, end);
- if (result == 0)
- result = rc;
- rc = osc_fsync_ost(env, osc, fio);
- if (result == 0)
- result = rc;
- }
-
- return result;
-}
-
-static void osc_io_fsync_end(const struct lu_env *env,
- const struct cl_io_slice *slice)
-{
- struct cl_fsync_io *fio = &slice->cis_io->u.ci_fsync;
- struct cl_object *obj = slice->cis_obj;
- pgoff_t start = cl_index(obj, fio->fi_start);
- pgoff_t end = cl_index(obj, fio->fi_end);
- int result = 0;
-
- if (fio->fi_mode == CL_FSYNC_LOCAL) {
- result = osc_cache_wait_range(env, cl2osc(obj), start, end);
- } else if (fio->fi_mode == CL_FSYNC_ALL) {
- struct osc_io *oio = cl2osc_io(env, slice);
- struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
-
- wait_for_completion(&cbargs->opc_sync);
- if (result == 0)
- result = cbargs->opc_rc;
- }
- slice->cis_io->ci_result = result;
-}
-
-static void osc_io_end(const struct lu_env *env,
- const struct cl_io_slice *slice)
-{
- struct osc_io *oio = cl2osc_io(env, slice);
-
- if (oio->oi_active) {
- osc_extent_release(env, oio->oi_active);
- oio->oi_active = NULL;
- }
-}
-
-static const struct cl_io_operations osc_io_ops = {
- .op = {
- [CIT_READ] = {
- .cio_iter_init = osc_io_iter_init,
- .cio_iter_fini = osc_io_iter_fini,
- .cio_start = osc_io_read_start,
- .cio_fini = osc_io_fini
- },
- [CIT_WRITE] = {
- .cio_iter_init = osc_io_write_iter_init,
- .cio_iter_fini = osc_io_write_iter_fini,
- .cio_start = osc_io_write_start,
- .cio_end = osc_io_end,
- .cio_fini = osc_io_fini
- },
- [CIT_SETATTR] = {
- .cio_iter_init = osc_io_iter_init,
- .cio_iter_fini = osc_io_iter_fini,
- .cio_start = osc_io_setattr_start,
- .cio_end = osc_io_setattr_end
- },
- [CIT_DATA_VERSION] = {
- .cio_start = osc_io_data_version_start,
- .cio_end = osc_io_data_version_end,
- },
- [CIT_FAULT] = {
- .cio_iter_init = osc_io_iter_init,
- .cio_iter_fini = osc_io_iter_fini,
- .cio_start = osc_io_fault_start,
- .cio_end = osc_io_end,
- .cio_fini = osc_io_fini
- },
- [CIT_FSYNC] = {
- .cio_start = osc_io_fsync_start,
- .cio_end = osc_io_fsync_end,
- .cio_fini = osc_io_fini
- },
- [CIT_MISC] = {
- .cio_fini = osc_io_fini
- }
- },
- .cio_read_ahead = osc_io_read_ahead,
- .cio_submit = osc_io_submit,
- .cio_commit_async = osc_io_commit_async
-};
-
-/*****************************************************************************
- *
- * Transfer operations.
- *
- */
-
-int osc_io_init(const struct lu_env *env,
- struct cl_object *obj, struct cl_io *io)
-{
- struct osc_io *oio = osc_env_io(env);
-
- CL_IO_SLICE_CLEAN(oio, oi_cl);
- cl_io_slice_add(io, &oio->oi_cl, obj, &osc_io_ops);
- return 0;
-}
-
-/** @} osc */
diff --git a/drivers/staging/lustre/lustre/osc/osc_lock.c b/drivers/staging/lustre/lustre/osc/osc_lock.c
deleted file mode 100644
index d93d33dc8dc4..000000000000
--- a/drivers/staging/lustre/lustre/osc/osc_lock.c
+++ /dev/null
@@ -1,1230 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_lock for OSC layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Jinshan Xiong <jinshan.xiong@intel.com>
- */
-
-#define DEBUG_SUBSYSTEM S_OSC
-
-/* fid_build_reg_res_name() */
-#include <lustre_fid.h>
-
-#include "osc_cl_internal.h"
-
-/** \addtogroup osc
- * @{
- */
-
-/*****************************************************************************
- *
- * Type conversions.
- *
- */
-
-static const struct cl_lock_operations osc_lock_ops;
-static const struct cl_lock_operations osc_lock_lockless_ops;
-static void osc_lock_to_lockless(const struct lu_env *env,
- struct osc_lock *ols, int force);
-
-int osc_lock_is_lockless(const struct osc_lock *olck)
-{
- return (olck->ols_cl.cls_ops == &osc_lock_lockless_ops);
-}
-
-/**
- * Returns a weak pointer to the ldlm lock identified by a handle. Returned
- * pointer cannot be dereferenced, as lock is not protected from concurrent
- * reclaim. This function is a helper for osc_lock_invariant().
- */
-static struct ldlm_lock *osc_handle_ptr(struct lustre_handle *handle)
-{
- struct ldlm_lock *lock;
-
- lock = ldlm_handle2lock(handle);
- if (lock)
- LDLM_LOCK_PUT(lock);
- return lock;
-}
-
-/**
- * Invariant that has to be true all of the time.
- */
-static int osc_lock_invariant(struct osc_lock *ols)
-{
- struct ldlm_lock *lock = osc_handle_ptr(&ols->ols_handle);
- struct ldlm_lock *olock = ols->ols_dlmlock;
- int handle_used = lustre_handle_is_used(&ols->ols_handle);
-
- if (ergo(osc_lock_is_lockless(ols),
- ols->ols_locklessable && !ols->ols_dlmlock))
- return 1;
-
- /*
- * If all the following "ergo"s are true, return 1, otherwise 0
- */
- if (!ergo(olock, handle_used))
- return 0;
-
- if (!ergo(olock, olock->l_handle.h_cookie == ols->ols_handle.cookie))
- return 0;
-
- if (!ergo(handle_used,
- ergo(lock && olock, lock == olock) &&
- ergo(!lock, !olock)))
- return 0;
- /*
- * Check that ->ols_handle and ->ols_dlmlock are consistent, but
- * take into account that they are set at the different time.
- */
- if (!ergo(ols->ols_state == OLS_CANCELLED,
- !olock && !handle_used))
- return 0;
- /*
- * DLM lock is destroyed only after we have seen cancellation
- * ast.
- */
- if (!ergo(olock && ols->ols_state < OLS_CANCELLED,
- !ldlm_is_destroyed(olock)))
- return 0;
-
- if (!ergo(ols->ols_state == OLS_GRANTED,
- olock && olock->l_req_mode == olock->l_granted_mode &&
- ols->ols_hold))
- return 0;
- return 1;
-}
-
-/*****************************************************************************
- *
- * Lock operations.
- *
- */
-
-static void osc_lock_fini(const struct lu_env *env,
- struct cl_lock_slice *slice)
-{
- struct osc_lock *ols = cl2osc_lock(slice);
-
- LINVRNT(osc_lock_invariant(ols));
- LASSERT(!ols->ols_dlmlock);
-
- kmem_cache_free(osc_lock_kmem, ols);
-}
-
-static void osc_lock_build_policy(const struct lu_env *env,
- const struct cl_lock *lock,
- union ldlm_policy_data *policy)
-{
- const struct cl_lock_descr *d = &lock->cll_descr;
-
- osc_index2policy(policy, d->cld_obj, d->cld_start, d->cld_end);
- policy->l_extent.gid = d->cld_gid;
-}
-
-static __u64 osc_enq2ldlm_flags(__u32 enqflags)
-{
- __u64 result = 0;
-
- LASSERT((enqflags & ~CEF_MASK) == 0);
-
- if (enqflags & CEF_NONBLOCK)
- result |= LDLM_FL_BLOCK_NOWAIT;
- if (enqflags & CEF_ASYNC)
- result |= LDLM_FL_HAS_INTENT;
- if (enqflags & CEF_DISCARD_DATA)
- result |= LDLM_FL_AST_DISCARD_DATA;
- if (enqflags & CEF_PEEK)
- result |= LDLM_FL_TEST_LOCK;
- if (enqflags & CEF_LOCK_MATCH)
- result |= LDLM_FL_MATCH_LOCK;
- return result;
-}
-
-/**
- * Updates object attributes from a lock value block (lvb) received together
- * with the DLM lock reply from the server. Copy of osc_update_enqueue()
- * logic.
- *
- * This can be optimized to not update attributes when lock is a result of a
- * local match.
- *
- * Called under lock and resource spin-locks.
- */
-static void osc_lock_lvb_update(const struct lu_env *env,
- struct osc_object *osc,
- struct ldlm_lock *dlmlock,
- struct ost_lvb *lvb)
-{
- struct cl_object *obj = osc2cl(osc);
- struct lov_oinfo *oinfo = osc->oo_oinfo;
- struct cl_attr *attr = &osc_env_info(env)->oti_attr;
- unsigned int valid;
-
- valid = CAT_BLOCKS | CAT_ATIME | CAT_CTIME | CAT_MTIME | CAT_SIZE;
- if (!lvb)
- lvb = dlmlock->l_lvb_data;
-
- cl_lvb2attr(attr, lvb);
-
- cl_object_attr_lock(obj);
- if (dlmlock) {
- __u64 size;
-
- check_res_locked(dlmlock->l_resource);
- LASSERT(lvb == dlmlock->l_lvb_data);
- size = lvb->lvb_size;
-
- /* Extend KMS up to the end of this lock and no further
- * A lock on [x,y] means a KMS of up to y + 1 bytes!
- */
- if (size > dlmlock->l_policy_data.l_extent.end)
- size = dlmlock->l_policy_data.l_extent.end + 1;
- if (size >= oinfo->loi_kms) {
- LDLM_DEBUG(dlmlock, "lock acquired, setting rss=%llu, kms=%llu",
- lvb->lvb_size, size);
- valid |= CAT_KMS;
- attr->cat_kms = size;
- } else {
- LDLM_DEBUG(dlmlock, "lock acquired, setting rss=%llu; leaving kms=%llu, end=%llu",
- lvb->lvb_size, oinfo->loi_kms,
- dlmlock->l_policy_data.l_extent.end);
- }
- ldlm_lock_allow_match_locked(dlmlock);
- }
-
- cl_object_attr_update(env, obj, attr, valid);
- cl_object_attr_unlock(obj);
-}
-
-static void osc_lock_granted(const struct lu_env *env, struct osc_lock *oscl,
- struct lustre_handle *lockh, bool lvb_update)
-{
- struct ldlm_lock *dlmlock;
-
- dlmlock = ldlm_handle2lock_long(lockh, 0);
- LASSERT(dlmlock);
-
- /* lock reference taken by ldlm_handle2lock_long() is
- * owned by osc_lock and released in osc_lock_detach()
- */
- lu_ref_add(&dlmlock->l_reference, "osc_lock", oscl);
- oscl->ols_has_ref = 1;
-
- LASSERT(!oscl->ols_dlmlock);
- oscl->ols_dlmlock = dlmlock;
-
- /* This may be a matched lock for glimpse request, do not hold
- * lock reference in that case.
- */
- if (!oscl->ols_glimpse) {
- /* hold a refc for non glimpse lock which will
- * be released in osc_lock_cancel()
- */
- lustre_handle_copy(&oscl->ols_handle, lockh);
- ldlm_lock_addref(lockh, oscl->ols_einfo.ei_mode);
- oscl->ols_hold = 1;
- }
-
- /* Lock must have been granted. */
- lock_res_and_lock(dlmlock);
- if (dlmlock->l_granted_mode == dlmlock->l_req_mode) {
- struct ldlm_extent *ext = &dlmlock->l_policy_data.l_extent;
- struct cl_lock_descr *descr = &oscl->ols_cl.cls_lock->cll_descr;
-
- /* extend the lock extent, otherwise it will have problem when
- * we decide whether to grant a lockless lock.
- */
- descr->cld_mode = osc_ldlm2cl_lock(dlmlock->l_granted_mode);
- descr->cld_start = cl_index(descr->cld_obj, ext->start);
- descr->cld_end = cl_index(descr->cld_obj, ext->end);
- descr->cld_gid = ext->gid;
-
- /* no lvb update for matched lock */
- if (lvb_update) {
- LASSERT(oscl->ols_flags & LDLM_FL_LVB_READY);
- osc_lock_lvb_update(env, cl2osc(oscl->ols_cl.cls_obj),
- dlmlock, NULL);
- }
- LINVRNT(osc_lock_invariant(oscl));
- }
- unlock_res_and_lock(dlmlock);
-
- LASSERT(oscl->ols_state != OLS_GRANTED);
- oscl->ols_state = OLS_GRANTED;
-}
-
-/**
- * Lock upcall function that is executed either when a reply to ENQUEUE rpc is
- * received from a server, or after osc_enqueue_base() matched a local DLM
- * lock.
- */
-static int osc_lock_upcall(void *cookie, struct lustre_handle *lockh,
- int errcode)
-{
- struct osc_lock *oscl = cookie;
- struct cl_lock_slice *slice = &oscl->ols_cl;
- struct lu_env *env;
- int rc;
- u16 refcheck;
-
- env = cl_env_get(&refcheck);
- /* should never happen, similar to osc_ldlm_blocking_ast(). */
- LASSERT(!IS_ERR(env));
-
- rc = ldlm_error2errno(errcode);
- if (oscl->ols_state == OLS_ENQUEUED) {
- oscl->ols_state = OLS_UPCALL_RECEIVED;
- } else if (oscl->ols_state == OLS_CANCELLED) {
- rc = -EIO;
- } else {
- CERROR("Impossible state: %d\n", oscl->ols_state);
- LBUG();
- }
-
- if (rc == 0)
- osc_lock_granted(env, oscl, lockh, errcode == ELDLM_OK);
-
- /* Error handling, some errors are tolerable. */
- if (oscl->ols_locklessable && rc == -EUSERS) {
- /* This is a tolerable error, turn this lock into
- * lockless lock.
- */
- osc_object_set_contended(cl2osc(slice->cls_obj));
- LASSERT(slice->cls_ops == &osc_lock_ops);
-
- /* Change this lock to ldlmlock-less lock. */
- osc_lock_to_lockless(env, oscl, 1);
- oscl->ols_state = OLS_GRANTED;
- rc = 0;
- } else if (oscl->ols_glimpse && rc == -ENAVAIL) {
- LASSERT(oscl->ols_flags & LDLM_FL_LVB_READY);
- osc_lock_lvb_update(env, cl2osc(slice->cls_obj),
- NULL, &oscl->ols_lvb);
- /* Hide the error. */
- rc = 0;
- }
-
- if (oscl->ols_owner)
- cl_sync_io_note(env, oscl->ols_owner, rc);
- cl_env_put(env, &refcheck);
-
- return rc;
-}
-
-static int osc_lock_upcall_agl(void *cookie, struct lustre_handle *lockh,
- int errcode)
-{
- struct osc_object *osc = cookie;
- struct ldlm_lock *dlmlock;
- struct lu_env *env;
- u16 refcheck;
-
- env = cl_env_get(&refcheck);
- LASSERT(!IS_ERR(env));
-
- if (errcode == ELDLM_LOCK_MATCHED) {
- errcode = ELDLM_OK;
- goto out;
- }
-
- if (errcode != ELDLM_OK)
- goto out;
-
- dlmlock = ldlm_handle2lock(lockh);
- LASSERT(dlmlock);
-
- lock_res_and_lock(dlmlock);
- LASSERT(dlmlock->l_granted_mode == dlmlock->l_req_mode);
-
- /* there is no osc_lock associated with AGL lock */
- osc_lock_lvb_update(env, osc, dlmlock, NULL);
-
- unlock_res_and_lock(dlmlock);
- LDLM_LOCK_PUT(dlmlock);
-
-out:
- cl_object_put(env, osc2cl(osc));
- cl_env_put(env, &refcheck);
- return ldlm_error2errno(errcode);
-}
-
-static int osc_lock_flush(struct osc_object *obj, pgoff_t start, pgoff_t end,
- enum cl_lock_mode mode, int discard)
-{
- struct lu_env *env;
- u16 refcheck;
- int rc = 0;
- int rc2 = 0;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- if (mode == CLM_WRITE) {
- rc = osc_cache_writeback_range(env, obj, start, end, 1,
- discard);
- CDEBUG(D_CACHE, "object %p: [%lu -> %lu] %d pages were %s.\n",
- obj, start, end, rc,
- discard ? "discarded" : "written back");
- if (rc > 0)
- rc = 0;
- }
-
- rc2 = osc_lock_discard_pages(env, obj, start, end, mode);
- if (rc == 0 && rc2 < 0)
- rc = rc2;
-
- cl_env_put(env, &refcheck);
- return rc;
-}
-
-/**
- * Helper for osc_dlm_blocking_ast() handling discrepancies between cl_lock
- * and ldlm_lock caches.
- */
-static int osc_dlm_blocking_ast0(const struct lu_env *env,
- struct ldlm_lock *dlmlock,
- void *data, int flag)
-{
- struct cl_object *obj = NULL;
- int result = 0;
- int discard;
- enum cl_lock_mode mode = CLM_READ;
-
- LASSERT(flag == LDLM_CB_CANCELING);
-
- lock_res_and_lock(dlmlock);
- if (dlmlock->l_granted_mode != dlmlock->l_req_mode) {
- dlmlock->l_ast_data = NULL;
- unlock_res_and_lock(dlmlock);
- return 0;
- }
-
- discard = ldlm_is_discard_data(dlmlock);
- if (dlmlock->l_granted_mode & (LCK_PW | LCK_GROUP))
- mode = CLM_WRITE;
-
- if (dlmlock->l_ast_data) {
- obj = osc2cl(dlmlock->l_ast_data);
- dlmlock->l_ast_data = NULL;
-
- cl_object_get(obj);
- }
-
- unlock_res_and_lock(dlmlock);
-
- /* if l_ast_data is NULL, the dlmlock was enqueued by AGL or
- * the object has been destroyed.
- */
- if (obj) {
- struct ldlm_extent *extent = &dlmlock->l_policy_data.l_extent;
- struct cl_attr *attr = &osc_env_info(env)->oti_attr;
- __u64 old_kms;
-
- /* Destroy pages covered by the extent of the DLM lock */
- result = osc_lock_flush(cl2osc(obj),
- cl_index(obj, extent->start),
- cl_index(obj, extent->end),
- mode, discard);
-
- /* losing a lock, update kms */
- lock_res_and_lock(dlmlock);
- cl_object_attr_lock(obj);
- /* Must get the value under the lock to avoid race. */
- old_kms = cl2osc(obj)->oo_oinfo->loi_kms;
- /* Update the kms. Need to loop all granted locks.
- * Not a problem for the client
- */
- attr->cat_kms = ldlm_extent_shift_kms(dlmlock, old_kms);
-
- cl_object_attr_update(env, obj, attr, CAT_KMS);
- cl_object_attr_unlock(obj);
- unlock_res_and_lock(dlmlock);
-
- cl_object_put(env, obj);
- }
- return result;
-}
-
-/**
- * Blocking ast invoked by ldlm when dlm lock is either blocking progress of
- * some other lock, or is canceled. This function is installed as a
- * ldlm_lock::l_blocking_ast() for client extent locks.
- *
- * Control flow is tricky, because ldlm uses the same call-back
- * (ldlm_lock::l_blocking_ast()) for both blocking and cancellation ast's.
- *
- * \param dlmlock lock for which ast occurred.
- *
- * \param new description of a conflicting lock in case of blocking ast.
- *
- * \param data value of dlmlock->l_ast_data
- *
- * \param flag LDLM_CB_BLOCKING or LDLM_CB_CANCELING. Used to distinguish
- * cancellation and blocking ast's.
- *
- * Possible use cases:
- *
- * - ldlm calls dlmlock->l_blocking_ast(..., LDLM_CB_CANCELING) to cancel
- * lock due to lock lru pressure, or explicit user request to purge
- * locks.
- *
- * - ldlm calls dlmlock->l_blocking_ast(..., LDLM_CB_BLOCKING) to notify
- * us that dlmlock conflicts with another lock that some client is
- * enqueing. Lock is canceled.
- *
- * - cl_lock_cancel() is called. osc_lock_cancel() calls
- * ldlm_cli_cancel() that calls
- *
- * dlmlock->l_blocking_ast(..., LDLM_CB_CANCELING)
- *
- * recursively entering osc_ldlm_blocking_ast().
- *
- * - client cancels lock voluntary (e.g., as a part of early cancellation):
- *
- * cl_lock_cancel()->
- * osc_lock_cancel()->
- * ldlm_cli_cancel()->
- * dlmlock->l_blocking_ast(..., LDLM_CB_CANCELING)
- *
- */
-static int osc_ldlm_blocking_ast(struct ldlm_lock *dlmlock,
- struct ldlm_lock_desc *new, void *data,
- int flag)
-{
- int result = 0;
-
- switch (flag) {
- case LDLM_CB_BLOCKING: {
- struct lustre_handle lockh;
-
- ldlm_lock2handle(dlmlock, &lockh);
- result = ldlm_cli_cancel(&lockh, LCF_ASYNC);
- if (result == -ENODATA)
- result = 0;
- break;
- }
- case LDLM_CB_CANCELING: {
- struct lu_env *env;
- u16 refcheck;
-
- /*
- * This can be called in the context of outer IO, e.g.,
- *
- * osc_enqueue_base()->...
- * ->ldlm_prep_elc_req()->...
- * ->ldlm_cancel_callback()->...
- * ->osc_ldlm_blocking_ast()
- *
- * new environment has to be created to not corrupt outer
- * context.
- */
- env = cl_env_get(&refcheck);
- if (IS_ERR(env)) {
- result = PTR_ERR(env);
- break;
- }
-
- result = osc_dlm_blocking_ast0(env, dlmlock, data, flag);
- cl_env_put(env, &refcheck);
- break;
- }
- default:
- LBUG();
- }
- return result;
-}
-
-static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
-{
- struct ptlrpc_request *req = data;
- struct lu_env *env;
- struct ost_lvb *lvb;
- struct req_capsule *cap;
- struct cl_object *obj = NULL;
- int result;
- u16 refcheck;
-
- LASSERT(lustre_msg_get_opc(req->rq_reqmsg) == LDLM_GL_CALLBACK);
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env)) {
- result = PTR_ERR(env);
- goto out;
- }
-
- lock_res_and_lock(dlmlock);
- if (dlmlock->l_ast_data) {
- obj = osc2cl(dlmlock->l_ast_data);
- cl_object_get(obj);
- }
- unlock_res_and_lock(dlmlock);
-
- if (obj) {
- /* Do not grab the mutex of cl_lock for glimpse.
- * See LU-1274 for details.
- * BTW, it's okay for cl_lock to be cancelled during
- * this period because server can handle this race.
- * See ldlm_server_glimpse_ast() for details.
- * cl_lock_mutex_get(env, lock);
- */
- cap = &req->rq_pill;
- req_capsule_extend(cap, &RQF_LDLM_GL_CALLBACK);
- req_capsule_set_size(cap, &RMF_DLM_LVB, RCL_SERVER,
- sizeof(*lvb));
- result = req_capsule_server_pack(cap);
- if (result == 0) {
- lvb = req_capsule_server_get(cap, &RMF_DLM_LVB);
- result = cl_object_glimpse(env, obj, lvb);
- }
- if (!exp_connect_lvb_type(req->rq_export)) {
- req_capsule_shrink(&req->rq_pill, &RMF_DLM_LVB,
- sizeof(struct ost_lvb_v1),
- RCL_SERVER);
- }
- cl_object_put(env, obj);
- } else {
- /*
- * These errors are normal races, so we don't want to
- * fill the console with messages by calling
- * ptlrpc_error()
- */
- lustre_pack_reply(req, 1, NULL, NULL);
- result = -ELDLM_NO_LOCK_DATA;
- }
- cl_env_put(env, &refcheck);
-
-out:
- req->rq_status = result;
- return result;
-}
-
-static int weigh_cb(const struct lu_env *env, struct cl_io *io,
- struct osc_page *ops, void *cbdata)
-{
- struct cl_page *page = ops->ops_cl.cpl_page;
-
- if (cl_page_is_vmlocked(env, page) ||
- PageDirty(page->cp_vmpage) || PageWriteback(page->cp_vmpage)
- )
- return CLP_GANG_ABORT;
-
- *(pgoff_t *)cbdata = osc_index(ops) + 1;
- return CLP_GANG_OKAY;
-}
-
-static unsigned long osc_lock_weight(const struct lu_env *env,
- struct osc_object *oscobj,
- struct ldlm_extent *extent)
-{
- struct cl_io *io = &osc_env_info(env)->oti_io;
- struct cl_object *obj = cl_object_top(&oscobj->oo_cl);
- pgoff_t page_index;
- int result;
-
- io->ci_obj = obj;
- io->ci_ignore_layout = 1;
- result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
- if (result != 0)
- return result;
-
- page_index = cl_index(obj, extent->start);
- do {
- result = osc_page_gang_lookup(env, io, oscobj,
- page_index,
- cl_index(obj, extent->end),
- weigh_cb, (void *)&page_index);
- if (result == CLP_GANG_ABORT)
- break;
- if (result == CLP_GANG_RESCHED)
- cond_resched();
- } while (result != CLP_GANG_OKAY);
- cl_io_fini(env, io);
-
- return result == CLP_GANG_ABORT ? 1 : 0;
-}
-
-/**
- * Get the weight of dlm lock for early cancellation.
- */
-unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock)
-{
- struct lu_env *env;
- struct osc_object *obj;
- struct osc_lock *oscl;
- unsigned long weight;
- bool found = false;
- u16 refcheck;
-
- might_sleep();
- /*
- * osc_ldlm_weigh_ast has a complex context since it might be called
- * because of lock canceling, or from user's input. We have to make
- * a new environment for it. Probably it is implementation safe to use
- * the upper context because cl_lock_put don't modify environment
- * variables. But just in case ..
- */
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- /* Mostly because lack of memory, do not eliminate this lock */
- return 1;
-
- LASSERT(dlmlock->l_resource->lr_type == LDLM_EXTENT);
- obj = dlmlock->l_ast_data;
- if (!obj) {
- weight = 1;
- goto out;
- }
-
- spin_lock(&obj->oo_ol_spin);
- list_for_each_entry(oscl, &obj->oo_ol_list, ols_nextlock_oscobj) {
- if (oscl->ols_dlmlock && oscl->ols_dlmlock != dlmlock)
- continue;
- found = true;
- }
- spin_unlock(&obj->oo_ol_spin);
- if (found) {
- /*
- * If the lock is being used by an IO, definitely not cancel it.
- */
- weight = 1;
- goto out;
- }
-
- weight = osc_lock_weight(env, obj, &dlmlock->l_policy_data.l_extent);
-
-out:
- cl_env_put(env, &refcheck);
- return weight;
-}
-
-static void osc_lock_build_einfo(const struct lu_env *env,
- const struct cl_lock *lock,
- struct osc_object *osc,
- struct ldlm_enqueue_info *einfo)
-{
- einfo->ei_type = LDLM_EXTENT;
- einfo->ei_mode = osc_cl_lock2ldlm(lock->cll_descr.cld_mode);
- einfo->ei_cb_bl = osc_ldlm_blocking_ast;
- einfo->ei_cb_cp = ldlm_completion_ast;
- einfo->ei_cb_gl = osc_ldlm_glimpse_ast;
- einfo->ei_cbdata = osc; /* value to be put into ->l_ast_data */
-}
-
-/**
- * Determine if the lock should be converted into a lockless lock.
- *
- * Steps to check:
- * - if the lock has an explicit requirement for a non-lockless lock;
- * - if the io lock request type ci_lockreq;
- * - send the enqueue rpc to ost to make the further decision;
- * - special treat to truncate lockless lock
- *
- * Additional policy can be implemented here, e.g., never do lockless-io
- * for large extents.
- */
-static void osc_lock_to_lockless(const struct lu_env *env,
- struct osc_lock *ols, int force)
-{
- struct cl_lock_slice *slice = &ols->ols_cl;
-
- LASSERT(ols->ols_state == OLS_NEW ||
- ols->ols_state == OLS_UPCALL_RECEIVED);
-
- if (force) {
- ols->ols_locklessable = 1;
- slice->cls_ops = &osc_lock_lockless_ops;
- } else {
- struct osc_io *oio = osc_env_io(env);
- struct cl_io *io = oio->oi_cl.cis_io;
- struct cl_object *obj = slice->cls_obj;
- struct osc_object *oob = cl2osc(obj);
- const struct osc_device *osd = lu2osc_dev(obj->co_lu.lo_dev);
- struct obd_connect_data *ocd;
-
- LASSERT(io->ci_lockreq == CILR_MANDATORY ||
- io->ci_lockreq == CILR_MAYBE ||
- io->ci_lockreq == CILR_NEVER);
-
- ocd = &class_exp2cliimp(osc_export(oob))->imp_connect_data;
- ols->ols_locklessable = (io->ci_type != CIT_SETATTR) &&
- (io->ci_lockreq == CILR_MAYBE) &&
- (ocd->ocd_connect_flags & OBD_CONNECT_SRVLOCK);
- if (io->ci_lockreq == CILR_NEVER ||
- /* lockless IO */
- (ols->ols_locklessable && osc_object_is_contended(oob)) ||
- /* lockless truncate */
- (cl_io_is_trunc(io) &&
- (ocd->ocd_connect_flags & OBD_CONNECT_TRUNCLOCK) &&
- osd->od_lockless_truncate)) {
- ols->ols_locklessable = 1;
- slice->cls_ops = &osc_lock_lockless_ops;
- }
- }
- LASSERT(ergo(ols->ols_glimpse, !osc_lock_is_lockless(ols)));
-}
-
-static bool osc_lock_compatible(const struct osc_lock *qing,
- const struct osc_lock *qed)
-{
- struct cl_lock_descr *qed_descr = &qed->ols_cl.cls_lock->cll_descr;
- struct cl_lock_descr *qing_descr = &qing->ols_cl.cls_lock->cll_descr;
-
- if (qed->ols_glimpse)
- return true;
-
- if (qing_descr->cld_mode == CLM_READ && qed_descr->cld_mode == CLM_READ)
- return true;
-
- if (qed->ols_state < OLS_GRANTED)
- return true;
-
- if (qed_descr->cld_mode >= qing_descr->cld_mode &&
- qed_descr->cld_start <= qing_descr->cld_start &&
- qed_descr->cld_end >= qing_descr->cld_end)
- return true;
-
- return false;
-}
-
-static void osc_lock_wake_waiters(const struct lu_env *env,
- struct osc_object *osc,
- struct osc_lock *oscl)
-{
- spin_lock(&osc->oo_ol_spin);
- list_del_init(&oscl->ols_nextlock_oscobj);
- spin_unlock(&osc->oo_ol_spin);
-
- spin_lock(&oscl->ols_lock);
- while (!list_empty(&oscl->ols_waiting_list)) {
- struct osc_lock *scan;
-
- scan = list_entry(oscl->ols_waiting_list.next, struct osc_lock,
- ols_wait_entry);
- list_del_init(&scan->ols_wait_entry);
-
- cl_sync_io_note(env, scan->ols_owner, 0);
- }
- spin_unlock(&oscl->ols_lock);
-}
-
-static int osc_lock_enqueue_wait(const struct lu_env *env,
- struct osc_object *obj,
- struct osc_lock *oscl)
-{
- struct osc_lock *tmp_oscl;
- struct cl_lock_descr *need = &oscl->ols_cl.cls_lock->cll_descr;
- struct cl_sync_io *waiter = &osc_env_info(env)->oti_anchor;
- int rc = 0;
-
- spin_lock(&obj->oo_ol_spin);
- list_add_tail(&oscl->ols_nextlock_oscobj, &obj->oo_ol_list);
-
-restart:
- list_for_each_entry(tmp_oscl, &obj->oo_ol_list,
- ols_nextlock_oscobj) {
- struct cl_lock_descr *descr;
-
- if (tmp_oscl == oscl)
- break;
-
- descr = &tmp_oscl->ols_cl.cls_lock->cll_descr;
- if (descr->cld_start > need->cld_end ||
- descr->cld_end < need->cld_start)
- continue;
-
- /* We're not supposed to give up group lock */
- if (descr->cld_mode == CLM_GROUP)
- break;
-
- if (!osc_lock_is_lockless(oscl) &&
- osc_lock_compatible(oscl, tmp_oscl))
- continue;
-
- /* wait for conflicting lock to be canceled */
- cl_sync_io_init(waiter, 1, cl_sync_io_end);
- oscl->ols_owner = waiter;
-
- spin_lock(&tmp_oscl->ols_lock);
- /* add oscl into tmp's ols_waiting list */
- list_add_tail(&oscl->ols_wait_entry,
- &tmp_oscl->ols_waiting_list);
- spin_unlock(&tmp_oscl->ols_lock);
-
- spin_unlock(&obj->oo_ol_spin);
- rc = cl_sync_io_wait(env, waiter, 0);
- spin_lock(&obj->oo_ol_spin);
- if (rc < 0)
- break;
-
- oscl->ols_owner = NULL;
- goto restart;
- }
- spin_unlock(&obj->oo_ol_spin);
-
- return rc;
-}
-
-/**
- * Implementation of cl_lock_operations::clo_enqueue() method for osc
- * layer. This initiates ldlm enqueue:
- *
- * - cancels conflicting locks early (osc_lock_enqueue_wait());
- *
- * - calls osc_enqueue_base() to do actual enqueue.
- *
- * osc_enqueue_base() is supplied with an upcall function that is executed
- * when lock is received either after a local cached ldlm lock is matched, or
- * when a reply from the server is received.
- *
- * This function does not wait for the network communication to complete.
- */
-static int osc_lock_enqueue(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- struct cl_io *unused, struct cl_sync_io *anchor)
-{
- struct osc_thread_info *info = osc_env_info(env);
- struct osc_io *oio = osc_env_io(env);
- struct osc_object *osc = cl2osc(slice->cls_obj);
- struct osc_lock *oscl = cl2osc_lock(slice);
- struct cl_lock *lock = slice->cls_lock;
- struct ldlm_res_id *resname = &info->oti_resname;
- union ldlm_policy_data *policy = &info->oti_policy;
- osc_enqueue_upcall_f upcall = osc_lock_upcall;
- void *cookie = oscl;
- bool async = false;
- int result;
-
- LASSERTF(ergo(oscl->ols_glimpse, lock->cll_descr.cld_mode <= CLM_READ),
- "lock = %p, ols = %p\n", lock, oscl);
-
- if (oscl->ols_state == OLS_GRANTED)
- return 0;
-
- if (oscl->ols_flags & LDLM_FL_TEST_LOCK)
- goto enqueue_base;
-
- if (oscl->ols_glimpse) {
- LASSERT(equi(oscl->ols_agl, !anchor));
- async = true;
- goto enqueue_base;
- }
-
- result = osc_lock_enqueue_wait(env, osc, oscl);
- if (result < 0)
- goto out;
-
- /* we can grant lockless lock right after all conflicting locks
- * are canceled.
- */
- if (osc_lock_is_lockless(oscl)) {
- oscl->ols_state = OLS_GRANTED;
- oio->oi_lockless = 1;
- return 0;
- }
-
-enqueue_base:
- oscl->ols_state = OLS_ENQUEUED;
- if (anchor) {
- atomic_inc(&anchor->csi_sync_nr);
- oscl->ols_owner = anchor;
- }
-
- /**
- * DLM lock's ast data must be osc_object;
- * if glimpse or AGL lock, async of osc_enqueue_base() must be true,
- * DLM's enqueue callback set to osc_lock_upcall() with cookie as
- * osc_lock.
- */
- ostid_build_res_name(&osc->oo_oinfo->loi_oi, resname);
- osc_lock_build_policy(env, lock, policy);
- if (oscl->ols_agl) {
- oscl->ols_einfo.ei_cbdata = NULL;
- /* hold a reference for callback */
- cl_object_get(osc2cl(osc));
- upcall = osc_lock_upcall_agl;
- cookie = osc;
- }
- result = osc_enqueue_base(osc_export(osc), resname, &oscl->ols_flags,
- policy, &oscl->ols_lvb,
- osc->oo_oinfo->loi_kms_valid,
- upcall, cookie,
- &oscl->ols_einfo, PTLRPCD_SET, async,
- oscl->ols_agl);
- if (!result) {
- if (osc_lock_is_lockless(oscl)) {
- oio->oi_lockless = 1;
- } else if (!async) {
- LASSERT(oscl->ols_state == OLS_GRANTED);
- LASSERT(oscl->ols_hold);
- LASSERT(oscl->ols_dlmlock);
- }
- } else if (oscl->ols_agl) {
- cl_object_put(env, osc2cl(osc));
- result = 0;
- }
-
-out:
- if (result < 0) {
- oscl->ols_state = OLS_CANCELLED;
- osc_lock_wake_waiters(env, osc, oscl);
-
- if (anchor)
- cl_sync_io_note(env, anchor, result);
- }
- return result;
-}
-
-/**
- * Breaks a link between osc_lock and dlm_lock.
- */
-static void osc_lock_detach(const struct lu_env *env, struct osc_lock *olck)
-{
- struct ldlm_lock *dlmlock;
-
- dlmlock = olck->ols_dlmlock;
- if (!dlmlock)
- return;
-
- if (olck->ols_hold) {
- olck->ols_hold = 0;
- ldlm_lock_decref(&olck->ols_handle, olck->ols_einfo.ei_mode);
- olck->ols_handle.cookie = 0ULL;
- }
-
- olck->ols_dlmlock = NULL;
-
- /* release a reference taken in osc_lock_upcall(). */
- LASSERT(olck->ols_has_ref);
- lu_ref_del(&dlmlock->l_reference, "osc_lock", olck);
- LDLM_LOCK_RELEASE(dlmlock);
- olck->ols_has_ref = 0;
-}
-
-/**
- * Implements cl_lock_operations::clo_cancel() method for osc layer. This is
- * called (as part of cl_lock_cancel()) when lock is canceled either voluntary
- * (LRU pressure, early cancellation, umount, etc.) or due to the conflict
- * with some other lock some where in the cluster. This function does the
- * following:
- *
- * - invalidates all pages protected by this lock (after sending dirty
- * ones to the server, as necessary);
- *
- * - decref's underlying ldlm lock;
- *
- * - cancels ldlm lock (ldlm_cli_cancel()).
- */
-static void osc_lock_cancel(const struct lu_env *env,
- const struct cl_lock_slice *slice)
-{
- struct osc_object *obj = cl2osc(slice->cls_obj);
- struct osc_lock *oscl = cl2osc_lock(slice);
-
- LINVRNT(osc_lock_invariant(oscl));
-
- osc_lock_detach(env, oscl);
- oscl->ols_state = OLS_CANCELLED;
- oscl->ols_flags &= ~LDLM_FL_LVB_READY;
-
- osc_lock_wake_waiters(env, obj, oscl);
-}
-
-static int osc_lock_print(const struct lu_env *env, void *cookie,
- lu_printer_t p, const struct cl_lock_slice *slice)
-{
- struct osc_lock *lock = cl2osc_lock(slice);
-
- (*p)(env, cookie, "%p %#16llx %#llx %d %p ",
- lock->ols_dlmlock, lock->ols_flags, lock->ols_handle.cookie,
- lock->ols_state, lock->ols_owner);
- osc_lvb_print(env, cookie, p, &lock->ols_lvb);
- return 0;
-}
-
-static const struct cl_lock_operations osc_lock_ops = {
- .clo_fini = osc_lock_fini,
- .clo_enqueue = osc_lock_enqueue,
- .clo_cancel = osc_lock_cancel,
- .clo_print = osc_lock_print,
-};
-
-static void osc_lock_lockless_cancel(const struct lu_env *env,
- const struct cl_lock_slice *slice)
-{
- struct osc_lock *ols = cl2osc_lock(slice);
- struct osc_object *osc = cl2osc(slice->cls_obj);
- struct cl_lock_descr *descr = &slice->cls_lock->cll_descr;
- int result;
-
- LASSERT(!ols->ols_dlmlock);
- result = osc_lock_flush(osc, descr->cld_start, descr->cld_end,
- descr->cld_mode, 0);
- if (result)
- CERROR("Pages for lockless lock %p were not purged(%d)\n",
- ols, result);
-
- osc_lock_wake_waiters(env, osc, ols);
-}
-
-static const struct cl_lock_operations osc_lock_lockless_ops = {
- .clo_fini = osc_lock_fini,
- .clo_enqueue = osc_lock_enqueue,
- .clo_cancel = osc_lock_lockless_cancel,
- .clo_print = osc_lock_print
-};
-
-static void osc_lock_set_writer(const struct lu_env *env,
- const struct cl_io *io,
- struct cl_object *obj, struct osc_lock *oscl)
-{
- struct cl_lock_descr *descr = &oscl->ols_cl.cls_lock->cll_descr;
- pgoff_t io_start;
- pgoff_t io_end;
-
- if (!cl_object_same(io->ci_obj, obj))
- return;
-
- if (likely(io->ci_type == CIT_WRITE)) {
- io_start = cl_index(obj, io->u.ci_rw.crw_pos);
- io_end = cl_index(obj, io->u.ci_rw.crw_pos +
- io->u.ci_rw.crw_count - 1);
- if (cl_io_is_append(io)) {
- io_start = 0;
- io_end = CL_PAGE_EOF;
- }
- } else {
- LASSERT(cl_io_is_mkwrite(io));
- io_start = io->u.ci_fault.ft_index;
- io_end = io->u.ci_fault.ft_index;
- }
-
- if (descr->cld_mode >= CLM_WRITE &&
- descr->cld_start <= io_start && descr->cld_end >= io_end) {
- struct osc_io *oio = osc_env_io(env);
-
- /* There must be only one lock to match the write region */
- LASSERT(!oio->oi_write_osclock);
- oio->oi_write_osclock = oscl;
- }
-}
-
-int osc_lock_init(const struct lu_env *env,
- struct cl_object *obj, struct cl_lock *lock,
- const struct cl_io *io)
-{
- struct osc_lock *oscl;
- __u32 enqflags = lock->cll_descr.cld_enq_flags;
-
- oscl = kmem_cache_zalloc(osc_lock_kmem, GFP_NOFS);
- if (!oscl)
- return -ENOMEM;
-
- oscl->ols_state = OLS_NEW;
- spin_lock_init(&oscl->ols_lock);
- INIT_LIST_HEAD(&oscl->ols_waiting_list);
- INIT_LIST_HEAD(&oscl->ols_wait_entry);
- INIT_LIST_HEAD(&oscl->ols_nextlock_oscobj);
-
- oscl->ols_flags = osc_enq2ldlm_flags(enqflags);
- oscl->ols_agl = !!(enqflags & CEF_AGL);
- if (oscl->ols_agl)
- oscl->ols_flags |= LDLM_FL_BLOCK_NOWAIT;
- if (oscl->ols_flags & LDLM_FL_HAS_INTENT) {
- oscl->ols_flags |= LDLM_FL_BLOCK_GRANTED;
- oscl->ols_glimpse = 1;
- }
- osc_lock_build_einfo(env, lock, cl2osc(obj), &oscl->ols_einfo);
-
- cl_lock_slice_add(lock, &oscl->ols_cl, obj, &osc_lock_ops);
-
- if (!(enqflags & CEF_MUST))
- /* try to convert this lock to a lockless lock */
- osc_lock_to_lockless(env, oscl, (enqflags & CEF_NEVER));
- if (oscl->ols_locklessable && !(enqflags & CEF_DISCARD_DATA))
- oscl->ols_flags |= LDLM_FL_DENY_ON_CONTENTION;
-
- if (io->ci_type == CIT_WRITE || cl_io_is_mkwrite(io))
- osc_lock_set_writer(env, io, obj, oscl);
-
-
- LDLM_DEBUG_NOLOCK("lock %p, osc lock %p, flags %llx",
- lock, oscl, oscl->ols_flags);
-
- return 0;
-}
-
-/**
- * Finds an existing lock covering given index and optionally different from a
- * given \a except lock.
- */
-struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env,
- struct osc_object *obj, pgoff_t index,
- enum osc_dap_flags dap_flags)
-{
- struct osc_thread_info *info = osc_env_info(env);
- struct ldlm_res_id *resname = &info->oti_resname;
- union ldlm_policy_data *policy = &info->oti_policy;
- struct lustre_handle lockh;
- struct ldlm_lock *lock = NULL;
- enum ldlm_mode mode;
- __u64 flags;
-
- ostid_build_res_name(&obj->oo_oinfo->loi_oi, resname);
- osc_index2policy(policy, osc2cl(obj), index, index);
- policy->l_extent.gid = LDLM_GID_ANY;
-
- flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
- if (dap_flags & OSC_DAP_FL_TEST_LOCK)
- flags |= LDLM_FL_TEST_LOCK;
-
- /*
- * It is fine to match any group lock since there could be only one
- * with a uniq gid and it conflicts with all other lock modes too
- */
-again:
- mode = osc_match_base(osc_export(obj), resname, LDLM_EXTENT, policy,
- LCK_PR | LCK_PW | LCK_GROUP, &flags, obj, &lockh,
- dap_flags & OSC_DAP_FL_CANCELING);
- if (mode != 0) {
- lock = ldlm_handle2lock(&lockh);
- /* RACE: the lock is cancelled so let's try again */
- if (unlikely(!lock))
- goto again;
- }
- return lock;
-}
-
-/** @} osc */
diff --git a/drivers/staging/lustre/lustre/osc/osc_object.c b/drivers/staging/lustre/lustre/osc/osc_object.c
deleted file mode 100644
index 84240181c7ea..000000000000
--- a/drivers/staging/lustre/lustre/osc/osc_object.c
+++ /dev/null
@@ -1,473 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_object for OSC layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Jinshan Xiong <jinshan.xiong@intel.com>
- */
-
-#define DEBUG_SUBSYSTEM S_OSC
-
-#include "osc_cl_internal.h"
-
-/** \addtogroup osc
- * @{
- */
-
-/*****************************************************************************
- *
- * Type conversions.
- *
- */
-
-static struct lu_object *osc2lu(struct osc_object *osc)
-{
- return &osc->oo_cl.co_lu;
-}
-
-static struct osc_object *lu2osc(const struct lu_object *obj)
-{
- LINVRNT(osc_is_object(obj));
- return container_of(obj, struct osc_object, oo_cl.co_lu);
-}
-
-/*****************************************************************************
- *
- * Object operations.
- *
- */
-
-static int osc_object_init(const struct lu_env *env, struct lu_object *obj,
- const struct lu_object_conf *conf)
-{
- struct osc_object *osc = lu2osc(obj);
- const struct cl_object_conf *cconf = lu2cl_conf(conf);
-
- osc->oo_oinfo = cconf->u.coc_oinfo;
- INIT_LIST_HEAD(&osc->oo_ready_item);
- INIT_LIST_HEAD(&osc->oo_hp_ready_item);
- INIT_LIST_HEAD(&osc->oo_write_item);
- INIT_LIST_HEAD(&osc->oo_read_item);
-
- atomic_set(&osc->oo_nr_ios, 0);
- init_waitqueue_head(&osc->oo_io_waitq);
-
- osc->oo_root.rb_node = NULL;
- INIT_LIST_HEAD(&osc->oo_hp_exts);
- INIT_LIST_HEAD(&osc->oo_urgent_exts);
- INIT_LIST_HEAD(&osc->oo_rpc_exts);
- INIT_LIST_HEAD(&osc->oo_reading_exts);
- atomic_set(&osc->oo_nr_reads, 0);
- atomic_set(&osc->oo_nr_writes, 0);
- spin_lock_init(&osc->oo_lock);
- spin_lock_init(&osc->oo_tree_lock);
- spin_lock_init(&osc->oo_ol_spin);
- INIT_LIST_HEAD(&osc->oo_ol_list);
-
- cl_object_page_init(lu2cl(obj), sizeof(struct osc_page));
-
- return 0;
-}
-
-static void osc_object_free(const struct lu_env *env, struct lu_object *obj)
-{
- struct osc_object *osc = lu2osc(obj);
-
- LASSERT(list_empty(&osc->oo_ready_item));
- LASSERT(list_empty(&osc->oo_hp_ready_item));
- LASSERT(list_empty(&osc->oo_write_item));
- LASSERT(list_empty(&osc->oo_read_item));
-
- LASSERT(!osc->oo_root.rb_node);
- LASSERT(list_empty(&osc->oo_hp_exts));
- LASSERT(list_empty(&osc->oo_urgent_exts));
- LASSERT(list_empty(&osc->oo_rpc_exts));
- LASSERT(list_empty(&osc->oo_reading_exts));
- LASSERT(atomic_read(&osc->oo_nr_reads) == 0);
- LASSERT(atomic_read(&osc->oo_nr_writes) == 0);
- LASSERT(list_empty(&osc->oo_ol_list));
- LASSERT(!atomic_read(&osc->oo_nr_ios));
-
- lu_object_fini(obj);
- kmem_cache_free(osc_object_kmem, osc);
-}
-
-int osc_lvb_print(const struct lu_env *env, void *cookie,
- lu_printer_t p, const struct ost_lvb *lvb)
-{
- return (*p)(env, cookie, "size: %llu mtime: %llu atime: %llu ctime: %llu blocks: %llu",
- lvb->lvb_size, lvb->lvb_mtime, lvb->lvb_atime,
- lvb->lvb_ctime, lvb->lvb_blocks);
-}
-
-static int osc_object_print(const struct lu_env *env, void *cookie,
- lu_printer_t p, const struct lu_object *obj)
-{
- struct osc_object *osc = lu2osc(obj);
- struct lov_oinfo *oinfo = osc->oo_oinfo;
- struct osc_async_rc *ar = &oinfo->loi_ar;
-
- (*p)(env, cookie, "id: " DOSTID " idx: %d gen: %d kms_valid: %u kms %llu rc: %d force_sync: %d min_xid: %llu ",
- POSTID(&oinfo->loi_oi), oinfo->loi_ost_idx,
- oinfo->loi_ost_gen, oinfo->loi_kms_valid, oinfo->loi_kms,
- ar->ar_rc, ar->ar_force_sync, ar->ar_min_xid);
- osc_lvb_print(env, cookie, p, &oinfo->loi_lvb);
- return 0;
-}
-
-static int osc_attr_get(const struct lu_env *env, struct cl_object *obj,
- struct cl_attr *attr)
-{
- struct lov_oinfo *oinfo = cl2osc(obj)->oo_oinfo;
-
- cl_lvb2attr(attr, &oinfo->loi_lvb);
- attr->cat_kms = oinfo->loi_kms_valid ? oinfo->loi_kms : 0;
- return 0;
-}
-
-static int osc_attr_update(const struct lu_env *env, struct cl_object *obj,
- const struct cl_attr *attr, unsigned int valid)
-{
- struct lov_oinfo *oinfo = cl2osc(obj)->oo_oinfo;
- struct ost_lvb *lvb = &oinfo->loi_lvb;
-
- if (valid & CAT_SIZE)
- lvb->lvb_size = attr->cat_size;
- if (valid & CAT_MTIME)
- lvb->lvb_mtime = attr->cat_mtime;
- if (valid & CAT_ATIME)
- lvb->lvb_atime = attr->cat_atime;
- if (valid & CAT_CTIME)
- lvb->lvb_ctime = attr->cat_ctime;
- if (valid & CAT_BLOCKS)
- lvb->lvb_blocks = attr->cat_blocks;
- if (valid & CAT_KMS) {
- CDEBUG(D_CACHE, "set kms from %llu to %llu\n",
- oinfo->loi_kms, (__u64)attr->cat_kms);
- loi_kms_set(oinfo, attr->cat_kms);
- }
- return 0;
-}
-
-static int osc_object_glimpse(const struct lu_env *env,
- const struct cl_object *obj, struct ost_lvb *lvb)
-{
- struct lov_oinfo *oinfo = cl2osc(obj)->oo_oinfo;
-
- lvb->lvb_size = oinfo->loi_kms;
- lvb->lvb_blocks = oinfo->loi_lvb.lvb_blocks;
- return 0;
-}
-
-static int osc_object_ast_clear(struct ldlm_lock *lock, void *data)
-{
- if (lock->l_ast_data == data)
- lock->l_ast_data = NULL;
- return LDLM_ITER_CONTINUE;
-}
-
-static int osc_object_prune(const struct lu_env *env, struct cl_object *obj)
-{
- struct osc_object *osc = cl2osc(obj);
- struct ldlm_res_id *resname = &osc_env_info(env)->oti_resname;
-
- /* DLM locks don't hold a reference of osc_object so we have to
- * clear it before the object is being destroyed.
- */
- ostid_build_res_name(&osc->oo_oinfo->loi_oi, resname);
- ldlm_resource_iterate(osc_export(osc)->exp_obd->obd_namespace, resname,
- osc_object_ast_clear, osc);
- return 0;
-}
-
-static int osc_object_fiemap(const struct lu_env *env, struct cl_object *obj,
- struct ll_fiemap_info_key *fmkey,
- struct fiemap *fiemap, size_t *buflen)
-{
- struct obd_export *exp = osc_export(cl2osc(obj));
- union ldlm_policy_data policy;
- struct ptlrpc_request *req;
- struct lustre_handle lockh;
- struct ldlm_res_id resid;
- enum ldlm_mode mode = 0;
- struct fiemap *reply;
- char *tmp;
- int rc;
-
- fmkey->lfik_oa.o_oi = cl2osc(obj)->oo_oinfo->loi_oi;
- if (!(fmkey->lfik_fiemap.fm_flags & FIEMAP_FLAG_SYNC))
- goto skip_locking;
-
- policy.l_extent.start = fmkey->lfik_fiemap.fm_start & PAGE_MASK;
-
- if (OBD_OBJECT_EOF - fmkey->lfik_fiemap.fm_length <=
- fmkey->lfik_fiemap.fm_start + PAGE_SIZE - 1)
- policy.l_extent.end = OBD_OBJECT_EOF;
- else
- policy.l_extent.end = (fmkey->lfik_fiemap.fm_start +
- fmkey->lfik_fiemap.fm_length +
- PAGE_SIZE - 1) & PAGE_MASK;
-
- ostid_build_res_name(&fmkey->lfik_oa.o_oi, &resid);
- mode = ldlm_lock_match(exp->exp_obd->obd_namespace,
- LDLM_FL_BLOCK_GRANTED | LDLM_FL_LVB_READY,
- &resid, LDLM_EXTENT, &policy,
- LCK_PR | LCK_PW, &lockh, 0);
- if (mode) { /* lock is cached on client */
- if (mode != LCK_PR) {
- ldlm_lock_addref(&lockh, LCK_PR);
- ldlm_lock_decref(&lockh, LCK_PW);
- }
- } else { /* no cached lock, needs acquire lock on server side */
- fmkey->lfik_oa.o_valid |= OBD_MD_FLFLAGS;
- fmkey->lfik_oa.o_flags |= OBD_FL_SRVLOCK;
- }
-
-skip_locking:
- req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- &RQF_OST_GET_INFO_FIEMAP);
- if (!req) {
- rc = -ENOMEM;
- goto drop_lock;
- }
-
- req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_KEY, RCL_CLIENT,
- sizeof(*fmkey));
- req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_VAL, RCL_CLIENT,
- *buflen);
- req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_VAL, RCL_SERVER,
- *buflen);
-
- rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GET_INFO);
- if (rc) {
- ptlrpc_request_free(req);
- goto drop_lock;
- }
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_FIEMAP_KEY);
- memcpy(tmp, fmkey, sizeof(*fmkey));
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_FIEMAP_VAL);
- memcpy(tmp, fiemap, *buflen);
- ptlrpc_request_set_replen(req);
-
- rc = ptlrpc_queue_wait(req);
- if (rc)
- goto fini_req;
-
- reply = req_capsule_server_get(&req->rq_pill, &RMF_FIEMAP_VAL);
- if (!reply) {
- rc = -EPROTO;
- goto fini_req;
- }
-
- memcpy(fiemap, reply, *buflen);
-fini_req:
- ptlrpc_req_finished(req);
-drop_lock:
- if (mode)
- ldlm_lock_decref(&lockh, LCK_PR);
- return rc;
-}
-
-void osc_object_set_contended(struct osc_object *obj)
-{
- obj->oo_contention_time = jiffies;
- /* mb(); */
- obj->oo_contended = 1;
-}
-
-void osc_object_clear_contended(struct osc_object *obj)
-{
- obj->oo_contended = 0;
-}
-
-int osc_object_is_contended(struct osc_object *obj)
-{
- struct osc_device *dev = lu2osc_dev(obj->oo_cl.co_lu.lo_dev);
- int osc_contention_time = dev->od_contention_time;
- unsigned long cur_time = jiffies;
- unsigned long retry_time;
-
- if (OBD_FAIL_CHECK(OBD_FAIL_OSC_OBJECT_CONTENTION))
- return 1;
-
- if (!obj->oo_contended)
- return 0;
-
- /*
- * I like copy-paste. the code is copied from
- * ll_file_is_contended.
- */
- retry_time = obj->oo_contention_time + osc_contention_time * HZ;
- if (time_after(cur_time, retry_time)) {
- osc_object_clear_contended(obj);
- return 0;
- }
- return 1;
-}
-
-/**
- * Implementation of struct cl_object_operations::coo_req_attr_set() for osc
- * layer. osc is responsible for struct obdo::o_id and struct obdo::o_seq
- * fields.
- */
-static void osc_req_attr_set(const struct lu_env *env, struct cl_object *obj,
- struct cl_req_attr *attr)
-{
- u64 flags = attr->cra_flags;
- struct lov_oinfo *oinfo;
- struct ost_lvb *lvb;
- struct obdo *oa;
-
- oinfo = cl2osc(obj)->oo_oinfo;
- lvb = &oinfo->loi_lvb;
- oa = attr->cra_oa;
-
- if (flags & OBD_MD_FLMTIME) {
- oa->o_mtime = lvb->lvb_mtime;
- oa->o_valid |= OBD_MD_FLMTIME;
- }
- if (flags & OBD_MD_FLATIME) {
- oa->o_atime = lvb->lvb_atime;
- oa->o_valid |= OBD_MD_FLATIME;
- }
- if (flags & OBD_MD_FLCTIME) {
- oa->o_ctime = lvb->lvb_ctime;
- oa->o_valid |= OBD_MD_FLCTIME;
- }
- if (flags & OBD_MD_FLGROUP) {
- ostid_set_seq(&oa->o_oi, ostid_seq(&oinfo->loi_oi));
- oa->o_valid |= OBD_MD_FLGROUP;
- }
- if (flags & OBD_MD_FLID) {
- int rc;
-
- rc = ostid_set_id(&oa->o_oi, ostid_id(&oinfo->loi_oi));
- if (rc) {
- CERROR("Bad %llu to set " DOSTID " : rc %d\n",
- (unsigned long long)ostid_id(&oinfo->loi_oi),
- POSTID(&oa->o_oi), rc);
- }
- oa->o_valid |= OBD_MD_FLID;
- }
- if (flags & OBD_MD_FLHANDLE) {
- struct ldlm_lock *lock;
- struct osc_page *opg;
-
- opg = osc_cl_page_osc(attr->cra_page, cl2osc(obj));
- lock = osc_dlmlock_at_pgoff(env, cl2osc(obj), osc_index(opg),
- OSC_DAP_FL_TEST_LOCK | OSC_DAP_FL_CANCELING);
- if (!lock && !opg->ops_srvlock) {
- struct ldlm_resource *res;
- struct ldlm_res_id *resname;
-
- CL_PAGE_DEBUG(D_ERROR, env, attr->cra_page,
- "uncovered page!\n");
-
- resname = &osc_env_info(env)->oti_resname;
- ostid_build_res_name(&oinfo->loi_oi, resname);
- res = ldlm_resource_get(
- osc_export(cl2osc(obj))->exp_obd->obd_namespace,
- NULL, resname, LDLM_EXTENT, 0);
- ldlm_resource_dump(D_ERROR, res);
-
- LBUG();
- }
-
- /* check for lockless io. */
- if (lock) {
- oa->o_handle = lock->l_remote_handle;
- oa->o_valid |= OBD_MD_FLHANDLE;
- LDLM_LOCK_PUT(lock);
- }
- }
-}
-
-static const struct cl_object_operations osc_ops = {
- .coo_page_init = osc_page_init,
- .coo_lock_init = osc_lock_init,
- .coo_io_init = osc_io_init,
- .coo_attr_get = osc_attr_get,
- .coo_attr_update = osc_attr_update,
- .coo_glimpse = osc_object_glimpse,
- .coo_prune = osc_object_prune,
- .coo_fiemap = osc_object_fiemap,
- .coo_req_attr_set = osc_req_attr_set
-};
-
-static const struct lu_object_operations osc_lu_obj_ops = {
- .loo_object_init = osc_object_init,
- .loo_object_release = NULL,
- .loo_object_free = osc_object_free,
- .loo_object_print = osc_object_print,
- .loo_object_invariant = NULL
-};
-
-struct lu_object *osc_object_alloc(const struct lu_env *env,
- const struct lu_object_header *unused,
- struct lu_device *dev)
-{
- struct osc_object *osc;
- struct lu_object *obj;
-
- osc = kmem_cache_zalloc(osc_object_kmem, GFP_NOFS);
- if (osc) {
- obj = osc2lu(osc);
- lu_object_init(obj, NULL, dev);
- osc->oo_cl.co_ops = &osc_ops;
- obj->lo_ops = &osc_lu_obj_ops;
- } else {
- obj = NULL;
- }
- return obj;
-}
-
-int osc_object_invalidate(const struct lu_env *env, struct osc_object *osc)
-{
- CDEBUG(D_INODE, "Invalidate osc object: %p, # of active IOs: %d\n",
- osc, atomic_read(&osc->oo_nr_ios));
-
- wait_event_idle(osc->oo_io_waitq, !atomic_read(&osc->oo_nr_ios));
-
- /* Discard all dirty pages of this object. */
- osc_cache_truncate_start(env, osc, 0, NULL);
-
- /* Discard all caching pages */
- osc_lock_discard_pages(env, osc, 0, CL_PAGE_EOF, CLM_WRITE);
-
- /* Clear ast data of dlm lock. Do this after discarding all pages */
- osc_object_prune(env, osc2cl(osc));
-
- return 0;
-}
-
-/** @} osc */
diff --git a/drivers/staging/lustre/lustre/osc/osc_page.c b/drivers/staging/lustre/lustre/osc/osc_page.c
deleted file mode 100644
index 20c553ef3a5e..000000000000
--- a/drivers/staging/lustre/lustre/osc/osc_page.c
+++ /dev/null
@@ -1,1094 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_page for OSC layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Jinshan Xiong <jinshan.xiong@intel.com>
- */
-
-#define DEBUG_SUBSYSTEM S_OSC
-
-#include <linux/math64.h>
-#include "osc_cl_internal.h"
-
-static void osc_lru_del(struct client_obd *cli, struct osc_page *opg);
-static void osc_lru_use(struct client_obd *cli, struct osc_page *opg);
-static int osc_lru_alloc(const struct lu_env *env, struct client_obd *cli,
- struct osc_page *opg);
-
-/** \addtogroup osc
- * @{
- */
-
-/*****************************************************************************
- *
- * Page operations.
- *
- */
-static void osc_page_transfer_get(struct osc_page *opg, const char *label)
-{
- struct cl_page *page = opg->ops_cl.cpl_page;
-
- LASSERT(!opg->ops_transfer_pinned);
- cl_page_get(page);
- lu_ref_add_atomic(&page->cp_reference, label, page);
- opg->ops_transfer_pinned = 1;
-}
-
-static void osc_page_transfer_put(const struct lu_env *env,
- struct osc_page *opg)
-{
- struct cl_page *page = opg->ops_cl.cpl_page;
-
- if (opg->ops_transfer_pinned) {
- opg->ops_transfer_pinned = 0;
- lu_ref_del(&page->cp_reference, "transfer", page);
- cl_page_put(env, page);
- }
-}
-
-/**
- * This is called once for every page when it is submitted for a transfer
- * either opportunistic (osc_page_cache_add()), or immediate
- * (osc_page_submit()).
- */
-static void osc_page_transfer_add(const struct lu_env *env,
- struct osc_page *opg, enum cl_req_type crt)
-{
- struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
-
- osc_lru_use(osc_cli(obj), opg);
-}
-
-int osc_page_cache_add(const struct lu_env *env,
- const struct cl_page_slice *slice, struct cl_io *io)
-{
- struct osc_page *opg = cl2osc_page(slice);
- int result;
-
- osc_page_transfer_get(opg, "transfer\0cache");
- result = osc_queue_async_io(env, io, opg);
- if (result != 0)
- osc_page_transfer_put(env, opg);
- else
- osc_page_transfer_add(env, opg, CRT_WRITE);
-
- return result;
-}
-
-void osc_index2policy(union ldlm_policy_data *policy,
- const struct cl_object *obj,
- pgoff_t start, pgoff_t end)
-{
- memset(policy, 0, sizeof(*policy));
- policy->l_extent.start = cl_offset(obj, start);
- policy->l_extent.end = cl_offset(obj, end + 1) - 1;
-}
-
-static const char *osc_list(struct list_head *head)
-{
- return list_empty(head) ? "-" : "+";
-}
-
-static inline unsigned long osc_submit_duration(struct osc_page *opg)
-{
- if (opg->ops_submit_time == 0)
- return 0;
-
- return (jiffies - opg->ops_submit_time);
-}
-
-static int osc_page_print(const struct lu_env *env,
- const struct cl_page_slice *slice,
- void *cookie, lu_printer_t printer)
-{
- struct osc_page *opg = cl2osc_page(slice);
- struct osc_async_page *oap = &opg->ops_oap;
- struct osc_object *obj = cl2osc(slice->cpl_obj);
- struct client_obd *cli = &osc_export(obj)->exp_obd->u.cli;
-
- return (*printer)(env, cookie, LUSTRE_OSC_NAME "-page@%p %lu: 1< %#x %d %u %s %s > 2< %llu %u %u %#x %#x | %p %p %p > 3< %d %lu %d > 4< %d %d %d %lu %s | %s %s %s %s > 5< %s %s %s %s | %d %s | %d %s %s>\n",
- opg, osc_index(opg),
- /* 1 */
- oap->oap_magic, oap->oap_cmd,
- oap->oap_interrupted,
- osc_list(&oap->oap_pending_item),
- osc_list(&oap->oap_rpc_item),
- /* 2 */
- oap->oap_obj_off, oap->oap_page_off, oap->oap_count,
- oap->oap_async_flags, oap->oap_brw_flags,
- oap->oap_request, oap->oap_cli, obj,
- /* 3 */
- opg->ops_transfer_pinned,
- osc_submit_duration(opg), opg->ops_srvlock,
- /* 4 */
- cli->cl_r_in_flight, cli->cl_w_in_flight,
- cli->cl_max_rpcs_in_flight,
- cli->cl_avail_grant,
- osc_list(&cli->cl_cache_waiters),
- osc_list(&cli->cl_loi_ready_list),
- osc_list(&cli->cl_loi_hp_ready_list),
- osc_list(&cli->cl_loi_write_list),
- osc_list(&cli->cl_loi_read_list),
- /* 5 */
- osc_list(&obj->oo_ready_item),
- osc_list(&obj->oo_hp_ready_item),
- osc_list(&obj->oo_write_item),
- osc_list(&obj->oo_read_item),
- atomic_read(&obj->oo_nr_reads),
- osc_list(&obj->oo_reading_exts),
- atomic_read(&obj->oo_nr_writes),
- osc_list(&obj->oo_hp_exts),
- osc_list(&obj->oo_urgent_exts));
-}
-
-static void osc_page_delete(const struct lu_env *env,
- const struct cl_page_slice *slice)
-{
- struct osc_page *opg = cl2osc_page(slice);
- struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
- int rc;
-
- CDEBUG(D_TRACE, "%p\n", opg);
- osc_page_transfer_put(env, opg);
- rc = osc_teardown_async_page(env, obj, opg);
- if (rc) {
- CL_PAGE_DEBUG(D_ERROR, env, slice->cpl_page,
- "Trying to teardown failed: %d\n", rc);
- LASSERT(0);
- }
-
- osc_lru_del(osc_cli(obj), opg);
-
- if (slice->cpl_page->cp_type == CPT_CACHEABLE) {
- void *value;
-
- spin_lock(&obj->oo_tree_lock);
- value = radix_tree_delete(&obj->oo_tree, osc_index(opg));
- if (value)
- --obj->oo_npages;
- spin_unlock(&obj->oo_tree_lock);
-
- LASSERT(ergo(value, value == opg));
- }
-}
-
-static void osc_page_clip(const struct lu_env *env,
- const struct cl_page_slice *slice, int from, int to)
-{
- struct osc_page *opg = cl2osc_page(slice);
- struct osc_async_page *oap = &opg->ops_oap;
-
- opg->ops_from = from;
- opg->ops_to = to;
- spin_lock(&oap->oap_lock);
- oap->oap_async_flags |= ASYNC_COUNT_STABLE;
- spin_unlock(&oap->oap_lock);
-}
-
-static int osc_page_cancel(const struct lu_env *env,
- const struct cl_page_slice *slice)
-{
- struct osc_page *opg = cl2osc_page(slice);
- int rc = 0;
-
- /* Check if the transferring against this page
- * is completed, or not even queued.
- */
- if (opg->ops_transfer_pinned)
- /* FIXME: may not be interrupted.. */
- rc = osc_cancel_async_page(env, opg);
- LASSERT(ergo(rc == 0, opg->ops_transfer_pinned == 0));
- return rc;
-}
-
-static int osc_page_flush(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *io)
-{
- struct osc_page *opg = cl2osc_page(slice);
- int rc;
-
- rc = osc_flush_async_page(env, io, opg);
- return rc;
-}
-
-static const struct cl_page_operations osc_page_ops = {
- .cpo_print = osc_page_print,
- .cpo_delete = osc_page_delete,
- .cpo_clip = osc_page_clip,
- .cpo_cancel = osc_page_cancel,
- .cpo_flush = osc_page_flush
-};
-
-int osc_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, pgoff_t index)
-{
- struct osc_object *osc = cl2osc(obj);
- struct osc_page *opg = cl_object_page_slice(obj, page);
- int result;
-
- opg->ops_from = 0;
- opg->ops_to = PAGE_SIZE;
-
- result = osc_prep_async_page(osc, opg, page->cp_vmpage,
- cl_offset(obj, index));
- if (result == 0) {
- struct osc_io *oio = osc_env_io(env);
-
- opg->ops_srvlock = osc_io_srvlock(oio);
- cl_page_slice_add(page, &opg->ops_cl, obj, index,
- &osc_page_ops);
- }
- INIT_LIST_HEAD(&opg->ops_lru);
-
- /* reserve an LRU space for this page */
- if (page->cp_type == CPT_CACHEABLE && result == 0) {
- result = osc_lru_alloc(env, osc_cli(osc), opg);
- if (result == 0) {
- spin_lock(&osc->oo_tree_lock);
- result = radix_tree_insert(&osc->oo_tree, index, opg);
- if (result == 0)
- ++osc->oo_npages;
- spin_unlock(&osc->oo_tree_lock);
- LASSERT(result == 0);
- }
- }
-
- return result;
-}
-
-/**
- * Helper function called by osc_io_submit() for every page in an immediate
- * transfer (i.e., transferred synchronously).
- */
-void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
- enum cl_req_type crt, int brw_flags)
-{
- struct osc_async_page *oap = &opg->ops_oap;
-
- LASSERTF(oap->oap_magic == OAP_MAGIC, "Bad oap magic: oap %p, magic 0x%x\n",
- oap, oap->oap_magic);
- LASSERT(oap->oap_async_flags & ASYNC_READY);
- LASSERT(oap->oap_async_flags & ASYNC_COUNT_STABLE);
-
- oap->oap_cmd = crt == CRT_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ;
- oap->oap_page_off = opg->ops_from;
- oap->oap_count = opg->ops_to - opg->ops_from;
- oap->oap_brw_flags = brw_flags | OBD_BRW_SYNC;
-
- if (capable(CAP_SYS_RESOURCE)) {
- oap->oap_brw_flags |= OBD_BRW_NOQUOTA;
- oap->oap_cmd |= OBD_BRW_NOQUOTA;
- }
-
- opg->ops_submit_time = jiffies;
- osc_page_transfer_get(opg, "transfer\0imm");
- osc_page_transfer_add(env, opg, crt);
-}
-
-/* --------------- LRU page management ------------------ */
-
-/* OSC is a natural place to manage LRU pages as applications are specialized
- * to write OSC by OSC. Ideally, if one OSC is used more frequently it should
- * occupy more LRU slots. On the other hand, we should avoid using up all LRU
- * slots (client_obd::cl_lru_left) otherwise process has to be put into sleep
- * for free LRU slots - this will be very bad so the algorithm requires each
- * OSC to free slots voluntarily to maintain a reasonable number of free slots
- * at any time.
- */
-static DECLARE_WAIT_QUEUE_HEAD(osc_lru_waitq);
-
-/**
- * LRU pages are freed in batch mode. OSC should at least free this
- * number of pages to avoid running out of LRU slots.
- */
-static inline int lru_shrink_min(struct client_obd *cli)
-{
- return cli->cl_max_pages_per_rpc * 2;
-}
-
-/**
- * free this number at most otherwise it will take too long time to finish.
- */
-static inline int lru_shrink_max(struct client_obd *cli)
-{
- return cli->cl_max_pages_per_rpc * cli->cl_max_rpcs_in_flight;
-}
-
-/**
- * Check if we can free LRU slots from this OSC. If there exists LRU waiters,
- * we should free slots aggressively. In this way, slots are freed in a steady
- * step to maintain fairness among OSCs.
- *
- * Return how many LRU pages should be freed.
- */
-static int osc_cache_too_much(struct client_obd *cli)
-{
- struct cl_client_cache *cache = cli->cl_cache;
- long pages = atomic_long_read(&cli->cl_lru_in_list);
- unsigned long budget;
-
- budget = cache->ccc_lru_max / (atomic_read(&cache->ccc_users) - 2);
-
- /* if it's going to run out LRU slots, we should free some, but not
- * too much to maintain fairness among OSCs.
- */
- if (atomic_long_read(cli->cl_lru_left) < cache->ccc_lru_max >> 2) {
- if (pages >= budget)
- return lru_shrink_max(cli);
- else if (pages >= budget / 2)
- return lru_shrink_min(cli);
- } else {
- time64_t duration = ktime_get_real_seconds();
- long timediff;
-
- /* knock out pages by duration of no IO activity */
- duration -= cli->cl_lru_last_used;
- /*
- * The difference shouldn't be more than 70 years
- * so we can safely case to a long. Round to
- * approximately 1 minute.
- */
- timediff = (long)(duration >> 6);
- if (timediff > 0 && pages >= budget / timediff)
- return lru_shrink_min(cli);
- }
- return 0;
-}
-
-int lru_queue_work(const struct lu_env *env, void *data)
-{
- struct client_obd *cli = data;
- int count;
-
- CDEBUG(D_CACHE, "%s: run LRU work for client obd\n", cli_name(cli));
-
- count = osc_cache_too_much(cli);
- if (count > 0) {
- int rc = osc_lru_shrink(env, cli, count, false);
-
- CDEBUG(D_CACHE, "%s: shrank %d/%d pages from client obd\n",
- cli_name(cli), rc, count);
- if (rc >= count) {
- CDEBUG(D_CACHE, "%s: queue again\n", cli_name(cli));
- ptlrpcd_queue_work(cli->cl_lru_work);
- }
- }
-
- return 0;
-}
-
-void osc_lru_add_batch(struct client_obd *cli, struct list_head *plist)
-{
- LIST_HEAD(lru);
- struct osc_async_page *oap;
- long npages = 0;
-
- list_for_each_entry(oap, plist, oap_pending_item) {
- struct osc_page *opg = oap2osc_page(oap);
-
- if (!opg->ops_in_lru)
- continue;
-
- ++npages;
- LASSERT(list_empty(&opg->ops_lru));
- list_add(&opg->ops_lru, &lru);
- }
-
- if (npages > 0) {
- spin_lock(&cli->cl_lru_list_lock);
- list_splice_tail(&lru, &cli->cl_lru_list);
- atomic_long_sub(npages, &cli->cl_lru_busy);
- atomic_long_add(npages, &cli->cl_lru_in_list);
- cli->cl_lru_last_used = ktime_get_real_seconds();
- spin_unlock(&cli->cl_lru_list_lock);
-
- if (waitqueue_active(&osc_lru_waitq))
- (void)ptlrpcd_queue_work(cli->cl_lru_work);
- }
-}
-
-static void __osc_lru_del(struct client_obd *cli, struct osc_page *opg)
-{
- LASSERT(atomic_long_read(&cli->cl_lru_in_list) > 0);
- list_del_init(&opg->ops_lru);
- atomic_long_dec(&cli->cl_lru_in_list);
-}
-
-/**
- * Page is being destroyed. The page may be not in LRU list, if the transfer
- * has never finished(error occurred).
- */
-static void osc_lru_del(struct client_obd *cli, struct osc_page *opg)
-{
- if (opg->ops_in_lru) {
- spin_lock(&cli->cl_lru_list_lock);
- if (!list_empty(&opg->ops_lru)) {
- __osc_lru_del(cli, opg);
- } else {
- LASSERT(atomic_long_read(&cli->cl_lru_busy) > 0);
- atomic_long_dec(&cli->cl_lru_busy);
- }
- spin_unlock(&cli->cl_lru_list_lock);
-
- atomic_long_inc(cli->cl_lru_left);
- /* this is a great place to release more LRU pages if
- * this osc occupies too many LRU pages and kernel is
- * stealing one of them.
- */
- if (osc_cache_too_much(cli)) {
- CDEBUG(D_CACHE, "%s: queue LRU work\n", cli_name(cli));
- (void)ptlrpcd_queue_work(cli->cl_lru_work);
- }
- wake_up(&osc_lru_waitq);
- } else {
- LASSERT(list_empty(&opg->ops_lru));
- }
-}
-
-/**
- * Delete page from LRUlist for redirty.
- */
-static void osc_lru_use(struct client_obd *cli, struct osc_page *opg)
-{
- /* If page is being transferred for the first time,
- * ops_lru should be empty
- */
- if (opg->ops_in_lru && !list_empty(&opg->ops_lru)) {
- spin_lock(&cli->cl_lru_list_lock);
- __osc_lru_del(cli, opg);
- spin_unlock(&cli->cl_lru_list_lock);
- atomic_long_inc(&cli->cl_lru_busy);
- }
-}
-
-static void discard_pagevec(const struct lu_env *env, struct cl_io *io,
- struct cl_page **pvec, int max_index)
-{
- int i;
-
- for (i = 0; i < max_index; i++) {
- struct cl_page *page = pvec[i];
-
- LASSERT(cl_page_is_owned(page, io));
- cl_page_delete(env, page);
- cl_page_discard(env, io, page);
- cl_page_disown(env, io, page);
- cl_page_put(env, page);
-
- pvec[i] = NULL;
- }
-}
-
-/**
- * Check if a cl_page can be released, i.e, it's not being used.
- *
- * If unstable account is turned on, bulk transfer may hold one refcount
- * for recovery so we need to check vmpage refcount as well; otherwise,
- * even we can destroy cl_page but the corresponding vmpage can't be reused.
- */
-static inline bool lru_page_busy(struct client_obd *cli, struct cl_page *page)
-{
- if (cl_page_in_use_noref(page))
- return true;
-
- if (cli->cl_cache->ccc_unstable_check) {
- struct page *vmpage = cl_page_vmpage(page);
-
- /* vmpage have two known users: cl_page and VM page cache */
- if (page_count(vmpage) - page_mapcount(vmpage) > 2)
- return true;
- }
- return false;
-}
-
-/**
- * Drop @target of pages from LRU at most.
- */
-long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
- long target, bool force)
-{
- struct cl_io *io;
- struct cl_object *clobj = NULL;
- struct cl_page **pvec;
- struct osc_page *opg;
- int maxscan = 0;
- long count = 0;
- int index = 0;
- int rc = 0;
-
- LASSERT(atomic_long_read(&cli->cl_lru_in_list) >= 0);
- if (atomic_long_read(&cli->cl_lru_in_list) == 0 || target <= 0)
- return 0;
-
- CDEBUG(D_CACHE, "%s: shrinkers: %d, force: %d\n",
- cli_name(cli), atomic_read(&cli->cl_lru_shrinkers), force);
- if (!force) {
- if (atomic_read(&cli->cl_lru_shrinkers) > 0)
- return -EBUSY;
-
- if (atomic_inc_return(&cli->cl_lru_shrinkers) > 1) {
- atomic_dec(&cli->cl_lru_shrinkers);
- return -EBUSY;
- }
- } else {
- atomic_inc(&cli->cl_lru_shrinkers);
- }
-
- pvec = (struct cl_page **)osc_env_info(env)->oti_pvec;
- io = &osc_env_info(env)->oti_io;
-
- spin_lock(&cli->cl_lru_list_lock);
- if (force)
- cli->cl_lru_reclaim++;
- maxscan = min(target << 1, atomic_long_read(&cli->cl_lru_in_list));
- while (!list_empty(&cli->cl_lru_list)) {
- struct cl_page *page;
- bool will_free = false;
-
- if (!force && atomic_read(&cli->cl_lru_shrinkers) > 1)
- break;
-
- if (--maxscan < 0)
- break;
-
- opg = list_entry(cli->cl_lru_list.next, struct osc_page,
- ops_lru);
- page = opg->ops_cl.cpl_page;
- if (lru_page_busy(cli, page)) {
- list_move_tail(&opg->ops_lru, &cli->cl_lru_list);
- continue;
- }
-
- LASSERT(page->cp_obj);
- if (clobj != page->cp_obj) {
- struct cl_object *tmp = page->cp_obj;
-
- cl_object_get(tmp);
- spin_unlock(&cli->cl_lru_list_lock);
-
- if (clobj) {
- discard_pagevec(env, io, pvec, index);
- index = 0;
-
- cl_io_fini(env, io);
- cl_object_put(env, clobj);
- clobj = NULL;
- }
-
- clobj = tmp;
- io->ci_obj = clobj;
- io->ci_ignore_layout = 1;
- rc = cl_io_init(env, io, CIT_MISC, clobj);
-
- spin_lock(&cli->cl_lru_list_lock);
-
- if (rc != 0)
- break;
-
- ++maxscan;
- continue;
- }
-
- if (cl_page_own_try(env, io, page) == 0) {
- if (!lru_page_busy(cli, page)) {
- /* remove it from lru list earlier to avoid
- * lock contention
- */
- __osc_lru_del(cli, opg);
- opg->ops_in_lru = 0; /* will be discarded */
-
- cl_page_get(page);
- will_free = true;
- } else {
- cl_page_disown(env, io, page);
- }
- }
-
- if (!will_free) {
- list_move_tail(&opg->ops_lru, &cli->cl_lru_list);
- continue;
- }
-
- /* Don't discard and free the page with cl_lru_list held */
- pvec[index++] = page;
- if (unlikely(index == OTI_PVEC_SIZE)) {
- spin_unlock(&cli->cl_lru_list_lock);
- discard_pagevec(env, io, pvec, index);
- index = 0;
-
- spin_lock(&cli->cl_lru_list_lock);
- }
-
- if (++count >= target)
- break;
- }
- spin_unlock(&cli->cl_lru_list_lock);
-
- if (clobj) {
- discard_pagevec(env, io, pvec, index);
-
- cl_io_fini(env, io);
- cl_object_put(env, clobj);
- }
-
- atomic_dec(&cli->cl_lru_shrinkers);
- if (count > 0) {
- atomic_long_add(count, cli->cl_lru_left);
- wake_up_all(&osc_lru_waitq);
- }
- return count > 0 ? count : rc;
-}
-
-/**
- * Reclaim LRU pages by an IO thread. The caller wants to reclaim at least
- * \@npages of LRU slots. For performance consideration, it's better to drop
- * LRU pages in batch. Therefore, the actual number is adjusted at least
- * max_pages_per_rpc.
- */
-static long osc_lru_reclaim(struct client_obd *cli, unsigned long npages)
-{
- struct lu_env *env;
- struct cl_client_cache *cache = cli->cl_cache;
- int max_scans;
- u16 refcheck;
- long rc = 0;
-
- LASSERT(cache);
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return 0;
-
- npages = max_t(int, npages, cli->cl_max_pages_per_rpc);
- CDEBUG(D_CACHE, "%s: start to reclaim %ld pages from LRU\n",
- cli_name(cli), npages);
- rc = osc_lru_shrink(env, cli, npages, true);
- if (rc >= npages) {
- CDEBUG(D_CACHE, "%s: reclaimed %ld/%ld pages from LRU\n",
- cli_name(cli), rc, npages);
- if (osc_cache_too_much(cli) > 0)
- ptlrpcd_queue_work(cli->cl_lru_work);
- goto out;
- } else if (rc > 0) {
- npages -= rc;
- }
-
- CDEBUG(D_CACHE, "%s: cli %p no free slots, pages: %ld/%ld, want: %ld\n",
- cli_name(cli), cli, atomic_long_read(&cli->cl_lru_in_list),
- atomic_long_read(&cli->cl_lru_busy), npages);
-
- /* Reclaim LRU slots from other client_obd as it can't free enough
- * from its own. This should rarely happen.
- */
- spin_lock(&cache->ccc_lru_lock);
- LASSERT(!list_empty(&cache->ccc_lru));
-
- cache->ccc_lru_shrinkers++;
- list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru);
-
- max_scans = atomic_read(&cache->ccc_users) - 2;
- while (--max_scans > 0 && !list_empty(&cache->ccc_lru)) {
- cli = list_entry(cache->ccc_lru.next, struct client_obd,
- cl_lru_osc);
-
- CDEBUG(D_CACHE, "%s: cli %p LRU pages: %ld, busy: %ld.\n",
- cli_name(cli), cli,
- atomic_long_read(&cli->cl_lru_in_list),
- atomic_long_read(&cli->cl_lru_busy));
-
- list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru);
- if (osc_cache_too_much(cli) > 0) {
- spin_unlock(&cache->ccc_lru_lock);
-
- rc = osc_lru_shrink(env, cli, npages, true);
- spin_lock(&cache->ccc_lru_lock);
- if (rc >= npages)
- break;
- if (rc > 0)
- npages -= rc;
- }
- }
- spin_unlock(&cache->ccc_lru_lock);
-
-out:
- cl_env_put(env, &refcheck);
- CDEBUG(D_CACHE, "%s: cli %p freed %ld pages.\n",
- cli_name(cli), cli, rc);
- return rc;
-}
-
-/**
- * osc_lru_alloc() is called to reserve an LRU slot for a cl_page.
- *
- * Usually the LRU slots are reserved in osc_io_iter_rw_init().
- * Only in the case that the LRU slots are in extreme shortage, it should
- * have reserved enough slots for an IO.
- */
-static int osc_lru_alloc(const struct lu_env *env, struct client_obd *cli,
- struct osc_page *opg)
-{
- struct osc_io *oio = osc_env_io(env);
- int rc = 0;
-
- if (!cli->cl_cache) /* shall not be in LRU */
- return 0;
-
- if (oio->oi_lru_reserved > 0) {
- --oio->oi_lru_reserved;
- goto out;
- }
-
- LASSERT(atomic_long_read(cli->cl_lru_left) >= 0);
- while (!atomic_long_add_unless(cli->cl_lru_left, -1, 0)) {
- /* run out of LRU spaces, try to drop some by itself */
- rc = osc_lru_reclaim(cli, 1);
- if (rc < 0)
- break;
- if (rc > 0)
- continue;
-
- cond_resched();
-
- rc = l_wait_event_abortable(osc_lru_waitq,
- atomic_long_read(cli->cl_lru_left) > 0);
-
- if (rc < 0)
- break;
- }
-
-out:
- if (rc >= 0) {
- atomic_long_inc(&cli->cl_lru_busy);
- opg->ops_in_lru = 1;
- rc = 0;
- }
-
- return rc;
-}
-
-/**
- * osc_lru_reserve() is called to reserve enough LRU slots for I/O.
- *
- * The benefit of doing this is to reduce contention against atomic counter
- * cl_lru_left by changing it from per-page access to per-IO access.
- */
-unsigned long osc_lru_reserve(struct client_obd *cli, unsigned long npages)
-{
- unsigned long reserved = 0;
- unsigned long max_pages;
- unsigned long c;
-
- /*
- * reserve a full RPC window at most to avoid that a thread accidentally
- * consumes too many LRU slots
- */
- max_pages = cli->cl_max_pages_per_rpc * cli->cl_max_rpcs_in_flight;
- if (npages > max_pages)
- npages = max_pages;
-
- c = atomic_long_read(cli->cl_lru_left);
- if (c < npages && osc_lru_reclaim(cli, npages) > 0)
- c = atomic_long_read(cli->cl_lru_left);
- while (c >= npages) {
- if (c == atomic_long_cmpxchg(cli->cl_lru_left, c, c - npages)) {
- reserved = npages;
- break;
- }
- c = atomic_long_read(cli->cl_lru_left);
- }
- if (atomic_long_read(cli->cl_lru_left) < max_pages) {
- /*
- * If there aren't enough pages in the per-OSC LRU then
- * wake up the LRU thread to try and clear out space, so
- * we don't block if pages are being dirtied quickly.
- */
- CDEBUG(D_CACHE, "%s: queue LRU, left: %lu/%ld.\n",
- cli_name(cli), atomic_long_read(cli->cl_lru_left),
- max_pages);
- (void)ptlrpcd_queue_work(cli->cl_lru_work);
- }
-
- return reserved;
-}
-
-/**
- * osc_lru_unreserve() is called to unreserve LRU slots.
- *
- * LRU slots reserved by osc_lru_reserve() may have entries left due to several
- * reasons such as page already existing or I/O error. Those reserved slots
- * should be freed by calling this function.
- */
-void osc_lru_unreserve(struct client_obd *cli, unsigned long npages)
-{
- atomic_long_add(npages, cli->cl_lru_left);
- wake_up_all(&osc_lru_waitq);
-}
-
-/**
- * Atomic operations are expensive. We accumulate the accounting for the
- * same page pgdat to get better performance.
- * In practice this can work pretty good because the pages in the same RPC
- * are likely from the same page zone.
- */
-static inline void unstable_page_accounting(struct ptlrpc_bulk_desc *desc,
- int factor)
-{
- int page_count = desc->bd_iov_count;
- pg_data_t *last = NULL;
- int count = 0;
- int i;
-
- LASSERT(ptlrpc_is_bulk_desc_kiov(desc->bd_type));
-
- for (i = 0; i < page_count; i++) {
- pg_data_t *pgdat = page_pgdat(BD_GET_KIOV(desc, i).bv_page);
-
- if (likely(pgdat == last)) {
- ++count;
- continue;
- }
-
- if (count > 0) {
- mod_node_page_state(pgdat, NR_UNSTABLE_NFS,
- factor * count);
- count = 0;
- }
- last = pgdat;
- ++count;
- }
- if (count > 0)
- mod_node_page_state(last, NR_UNSTABLE_NFS, factor * count);
-}
-
-static inline void add_unstable_page_accounting(struct ptlrpc_bulk_desc *desc)
-{
- unstable_page_accounting(desc, 1);
-}
-
-static inline void dec_unstable_page_accounting(struct ptlrpc_bulk_desc *desc)
-{
- unstable_page_accounting(desc, -1);
-}
-
-/**
- * Performs "unstable" page accounting. This function balances the
- * increment operations performed in osc_inc_unstable_pages. It is
- * registered as the RPC request callback, and is executed when the
- * bulk RPC is committed on the server. Thus at this point, the pages
- * involved in the bulk transfer are no longer considered unstable.
- *
- * If this function is called, the request should have been committed
- * or req:rq_unstable must have been set; it implies that the unstable
- * statistic have been added.
- */
-void osc_dec_unstable_pages(struct ptlrpc_request *req)
-{
- struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
- struct ptlrpc_bulk_desc *desc = req->rq_bulk;
- int page_count = desc->bd_iov_count;
- long unstable_count;
-
- LASSERT(page_count >= 0);
- dec_unstable_page_accounting(desc);
-
- unstable_count = atomic_long_sub_return(page_count,
- &cli->cl_unstable_count);
- LASSERT(unstable_count >= 0);
-
- unstable_count = atomic_long_sub_return(page_count,
- &cli->cl_cache->ccc_unstable_nr);
- LASSERT(unstable_count >= 0);
- if (!unstable_count)
- wake_up_all(&cli->cl_cache->ccc_unstable_waitq);
-
- if (waitqueue_active(&osc_lru_waitq))
- (void)ptlrpcd_queue_work(cli->cl_lru_work);
-}
-
-/**
- * "unstable" page accounting. See: osc_dec_unstable_pages.
- */
-void osc_inc_unstable_pages(struct ptlrpc_request *req)
-{
- struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
- struct ptlrpc_bulk_desc *desc = req->rq_bulk;
- long page_count = desc->bd_iov_count;
-
- /* No unstable page tracking */
- if (!cli->cl_cache || !cli->cl_cache->ccc_unstable_check)
- return;
-
- add_unstable_page_accounting(desc);
- atomic_long_add(page_count, &cli->cl_unstable_count);
- atomic_long_add(page_count, &cli->cl_cache->ccc_unstable_nr);
-
- /*
- * If the request has already been committed (i.e. brw_commit
- * called via rq_commit_cb), we need to undo the unstable page
- * increments we just performed because rq_commit_cb wont be
- * called again.
- */
- spin_lock(&req->rq_lock);
- if (unlikely(req->rq_committed)) {
- spin_unlock(&req->rq_lock);
-
- osc_dec_unstable_pages(req);
- } else {
- req->rq_unstable = 1;
- spin_unlock(&req->rq_lock);
- }
-}
-
-/**
- * Check if it piggybacks SOFT_SYNC flag to OST from this OSC.
- * This function will be called by every BRW RPC so it's critical
- * to make this function fast.
- */
-bool osc_over_unstable_soft_limit(struct client_obd *cli)
-{
- long unstable_nr, osc_unstable_count;
-
- /* Can't check cli->cl_unstable_count, therefore, no soft limit */
- if (!cli->cl_cache || !cli->cl_cache->ccc_unstable_check)
- return false;
-
- osc_unstable_count = atomic_long_read(&cli->cl_unstable_count);
- unstable_nr = atomic_long_read(&cli->cl_cache->ccc_unstable_nr);
-
- CDEBUG(D_CACHE,
- "%s: cli: %p unstable pages: %lu, osc unstable pages: %lu\n",
- cli_name(cli), cli, unstable_nr, osc_unstable_count);
-
- /*
- * If the LRU slots are in shortage - 25% remaining AND this OSC
- * has one full RPC window of unstable pages, it's a good chance
- * to piggyback a SOFT_SYNC flag.
- * Please notice that the OST won't take immediate response for the
- * SOFT_SYNC request so active OSCs will have more chance to carry
- * the flag, this is reasonable.
- */
- return unstable_nr > cli->cl_cache->ccc_lru_max >> 2 &&
- osc_unstable_count > cli->cl_max_pages_per_rpc *
- cli->cl_max_rpcs_in_flight;
-}
-
-/**
- * Return how many LRU pages in the cache of all OSC devices
- *
- * Return: return # of cached LRU pages times reclaimation tendency
- * SHRINK_STOP if it cannot do any scanning in this time
- */
-unsigned long osc_cache_shrink_count(struct shrinker *sk,
- struct shrink_control *sc)
-{
- struct client_obd *cli;
- unsigned long cached = 0;
-
- spin_lock(&osc_shrink_lock);
- list_for_each_entry(cli, &osc_shrink_list, cl_shrink_list)
- cached += atomic_long_read(&cli->cl_lru_in_list);
- spin_unlock(&osc_shrink_lock);
-
- return (cached * sysctl_vfs_cache_pressure) / 100;
-}
-
-/**
- * Scan and try to reclaim sc->nr_to_scan cached LRU pages
- *
- * Return: number of cached LRU pages reclaimed
- * SHRINK_STOP if it cannot do any scanning in this time
- *
- * Linux kernel will loop calling this shrinker scan routine with
- * sc->nr_to_scan = SHRINK_BATCH(128 for now) until kernel got enough memory.
- *
- * If sc->nr_to_scan is 0, the VM is querying the cache size, we don't need
- * to scan and try to reclaim LRU pages, just return 0 and
- * osc_cache_shrink_count() will report the LRU page number.
- */
-unsigned long osc_cache_shrink_scan(struct shrinker *sk,
- struct shrink_control *sc)
-{
- struct client_obd *stop_anchor = NULL;
- struct client_obd *cli;
- struct lu_env *env;
- long shrank = 0;
- u16 refcheck;
- int rc;
-
- if (!sc->nr_to_scan)
- return 0;
-
- if (!(sc->gfp_mask & __GFP_FS))
- return SHRINK_STOP;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return SHRINK_STOP;
-
- spin_lock(&osc_shrink_lock);
- while (!list_empty(&osc_shrink_list)) {
- cli = list_entry(osc_shrink_list.next, struct client_obd,
- cl_shrink_list);
-
- if (!stop_anchor)
- stop_anchor = cli;
- else if (cli == stop_anchor)
- break;
-
- list_move_tail(&cli->cl_shrink_list, &osc_shrink_list);
- spin_unlock(&osc_shrink_lock);
-
- /* shrink no more than max_pages_per_rpc for an OSC */
- rc = osc_lru_shrink(env, cli, (sc->nr_to_scan - shrank) >
- cli->cl_max_pages_per_rpc ?
- cli->cl_max_pages_per_rpc :
- sc->nr_to_scan - shrank, true);
- if (rc > 0)
- shrank += rc;
-
- if (shrank >= sc->nr_to_scan)
- goto out;
-
- spin_lock(&osc_shrink_lock);
- }
- spin_unlock(&osc_shrink_lock);
-
-out:
- cl_env_put(env, &refcheck);
-
- return shrank;
-}
-
-/** @} osc */
diff --git a/drivers/staging/lustre/lustre/osc/osc_quota.c b/drivers/staging/lustre/lustre/osc/osc_quota.c
deleted file mode 100644
index 723ec2fb18bf..000000000000
--- a/drivers/staging/lustre/lustre/osc/osc_quota.c
+++ /dev/null
@@ -1,236 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- *
- * Code originally extracted from quota directory
- */
-
-#include <obd_class.h>
-#include "osc_internal.h"
-
-static const struct rhashtable_params quota_hash_params = {
- .key_len = sizeof(u32),
- .key_offset = offsetof(struct osc_quota_info, oqi_id),
- .head_offset = offsetof(struct osc_quota_info, oqi_hash),
- .automatic_shrinking = true,
-};
-
-static inline struct osc_quota_info *osc_oqi_alloc(u32 id)
-{
- struct osc_quota_info *oqi;
-
- oqi = kmem_cache_zalloc(osc_quota_kmem, GFP_NOFS);
- if (oqi)
- oqi->oqi_id = id;
-
- return oqi;
-}
-
-int osc_quota_chkdq(struct client_obd *cli, const unsigned int qid[])
-{
- int type;
-
- for (type = 0; type < MAXQUOTAS; type++) {
- struct osc_quota_info *oqi;
-
- oqi = rhashtable_lookup_fast(&cli->cl_quota_hash[type], &qid[type],
- quota_hash_params);
- if (oqi) {
- /* Must not access oqi here, it could have been
- * freed by osc_quota_setdq()
- */
-
- /* the slot is busy, the user is about to run out of
- * quota space on this OST
- */
- CDEBUG(D_QUOTA, "chkdq found noquota for %s %d\n",
- type == USRQUOTA ? "user" : "grout", qid[type]);
- return NO_QUOTA;
- }
- }
-
- return QUOTA_OK;
-}
-
-static void osc_quota_free(struct rcu_head *head)
-{
- struct osc_quota_info *oqi = container_of(head, struct osc_quota_info, rcu);
-
- kmem_cache_free(osc_quota_kmem, oqi);
-}
-
-
-#define MD_QUOTA_FLAG(type) ((type == USRQUOTA) ? OBD_MD_FLUSRQUOTA \
- : OBD_MD_FLGRPQUOTA)
-#define FL_QUOTA_FLAG(type) ((type == USRQUOTA) ? OBD_FL_NO_USRQUOTA \
- : OBD_FL_NO_GRPQUOTA)
-
-int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[],
- u32 valid, u32 flags)
-{
- int type;
- int rc = 0;
-
- if ((valid & (OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA)) == 0)
- return 0;
-
- for (type = 0; type < MAXQUOTAS; type++) {
- struct osc_quota_info *oqi;
-
- if ((valid & MD_QUOTA_FLAG(type)) == 0)
- continue;
-
- /* lookup the ID in the per-type hash table */
- rcu_read_lock();
- oqi = rhashtable_lookup_fast(&cli->cl_quota_hash[type], &qid[type],
- quota_hash_params);
- if ((flags & FL_QUOTA_FLAG(type)) != 0) {
- /* This ID is getting close to its quota limit, let's
- * switch to sync I/O
- */
- rcu_read_unlock();
- if (oqi)
- continue;
-
- oqi = osc_oqi_alloc(qid[type]);
- if (!oqi) {
- rc = -ENOMEM;
- break;
- }
-
- rc = rhashtable_lookup_insert_fast(&cli->cl_quota_hash[type],
- &oqi->oqi_hash, quota_hash_params);
- /* race with others? */
- if (rc) {
- kmem_cache_free(osc_quota_kmem, oqi);
- if (rc != -EEXIST) {
- rc = -ENOMEM;
- break;
- }
- rc = 0;
- }
-
- CDEBUG(D_QUOTA, "%s: setdq to insert for %s %d (%d)\n",
- cli_name(cli),
- type == USRQUOTA ? "user" : "group",
- qid[type], rc);
- } else {
- /* This ID is now off the hook, let's remove it from
- * the hash table
- */
- if (!oqi) {
- rcu_read_unlock();
- continue;
- }
- if (rhashtable_remove_fast(&cli->cl_quota_hash[type],
- &oqi->oqi_hash, quota_hash_params) == 0)
- call_rcu(&oqi->rcu, osc_quota_free);
- rcu_read_unlock();
- CDEBUG(D_QUOTA, "%s: setdq to remove for %s %d (%p)\n",
- cli_name(cli),
- type == USRQUOTA ? "user" : "group",
- qid[type], oqi);
- }
- }
-
- return rc;
-}
-
-static void
-oqi_exit(void *vquota, void *data)
-{
- struct osc_quota_info *oqi = vquota;
-
- osc_quota_free(&oqi->rcu);
-}
-
-int osc_quota_setup(struct obd_device *obd)
-{
- struct client_obd *cli = &obd->u.cli;
- int i, type;
-
- for (type = 0; type < MAXQUOTAS; type++) {
- if (rhashtable_init(&cli->cl_quota_hash[type], &quota_hash_params) != 0)
- break;
- }
-
- if (type == MAXQUOTAS)
- return 0;
-
- for (i = 0; i < type; i++)
- rhashtable_destroy(&cli->cl_quota_hash[i]);
-
- return -ENOMEM;
-}
-
-int osc_quota_cleanup(struct obd_device *obd)
-{
- struct client_obd *cli = &obd->u.cli;
- int type;
-
- for (type = 0; type < MAXQUOTAS; type++)
- rhashtable_free_and_destroy(&cli->cl_quota_hash[type],
- oqi_exit, NULL);
-
- return 0;
-}
-
-int osc_quotactl(struct obd_device *unused, struct obd_export *exp,
- struct obd_quotactl *oqctl)
-{
- struct ptlrpc_request *req;
- struct obd_quotactl *oqc;
- int rc;
-
- req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
- &RQF_OST_QUOTACTL, LUSTRE_OST_VERSION,
- OST_QUOTACTL);
- if (!req)
- return -ENOMEM;
-
- oqc = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL);
- *oqc = *oqctl;
-
- ptlrpc_request_set_replen(req);
- ptlrpc_at_set_req_timeout(req);
- req->rq_no_resend = 1;
-
- rc = ptlrpc_queue_wait(req);
- if (rc)
- CERROR("ptlrpc_queue_wait failed, rc: %d\n", rc);
-
- if (req->rq_repmsg) {
- oqc = req_capsule_server_get(&req->rq_pill, &RMF_OBD_QUOTACTL);
- if (oqc) {
- *oqctl = *oqc;
- } else if (!rc) {
- CERROR("Can't unpack obd_quotactl\n");
- rc = -EPROTO;
- }
- } else if (!rc) {
- CERROR("Can't unpack obd_quotactl\n");
- rc = -EPROTO;
- }
- ptlrpc_req_finished(req);
-
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/osc/osc_request.c b/drivers/staging/lustre/lustre/osc/osc_request.c
deleted file mode 100644
index 61ef6c8d7a12..000000000000
--- a/drivers/staging/lustre/lustre/osc/osc_request.c
+++ /dev/null
@@ -1,2907 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_OSC
-
-#include <linux/libcfs/libcfs_hash.h>
-#include <linux/sched/mm.h>
-
-#include <lustre_dlm.h>
-#include <lustre_net.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-#include <obd_cksum.h>
-
-#include <lustre_ha.h>
-#include <lprocfs_status.h>
-#include <uapi/linux/lustre/lustre_ioctl.h>
-#include <lustre_debug.h>
-#include <lustre_obdo.h>
-#include <uapi/linux/lustre/lustre_param.h>
-#include <lustre_fid.h>
-#include <obd_class.h>
-#include <obd.h>
-#include "osc_internal.h"
-#include "osc_cl_internal.h"
-
-atomic_t osc_pool_req_count;
-unsigned int osc_reqpool_maxreqcount;
-struct ptlrpc_request_pool *osc_rq_pool;
-
-/* max memory used for request pool, unit is MB */
-static unsigned int osc_reqpool_mem_max = 5;
-module_param(osc_reqpool_mem_max, uint, 0444);
-
-struct osc_brw_async_args {
- struct obdo *aa_oa;
- int aa_requested_nob;
- int aa_nio_count;
- u32 aa_page_count;
- int aa_resends;
- struct brw_page **aa_ppga;
- struct client_obd *aa_cli;
- struct list_head aa_oaps;
- struct list_head aa_exts;
-};
-
-struct osc_async_args {
- struct obd_info *aa_oi;
-};
-
-struct osc_setattr_args {
- struct obdo *sa_oa;
- obd_enqueue_update_f sa_upcall;
- void *sa_cookie;
-};
-
-struct osc_fsync_args {
- struct osc_object *fa_obj;
- struct obdo *fa_oa;
- obd_enqueue_update_f fa_upcall;
- void *fa_cookie;
-};
-
-struct osc_enqueue_args {
- struct obd_export *oa_exp;
- enum ldlm_type oa_type;
- enum ldlm_mode oa_mode;
- __u64 *oa_flags;
- osc_enqueue_upcall_f oa_upcall;
- void *oa_cookie;
- struct ost_lvb *oa_lvb;
- struct lustre_handle oa_lockh;
- unsigned int oa_agl:1;
-};
-
-static void osc_release_ppga(struct brw_page **ppga, u32 count);
-static int brw_interpret(const struct lu_env *env,
- struct ptlrpc_request *req, void *data, int rc);
-
-static inline void osc_pack_req_body(struct ptlrpc_request *req,
- struct obdo *oa)
-{
- struct ost_body *body;
-
- body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
- LASSERT(body);
-
- lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa);
-}
-
-static int osc_getattr(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa)
-{
- struct ptlrpc_request *req;
- struct ost_body *body;
- int rc;
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_GETATTR);
- if (!req)
- return -ENOMEM;
-
- rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GETATTR);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- osc_pack_req_body(req, oa);
-
- ptlrpc_request_set_replen(req);
-
- rc = ptlrpc_queue_wait(req);
- if (rc)
- goto out;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
- if (!body) {
- rc = -EPROTO;
- goto out;
- }
-
- CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode);
- lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oa,
- &body->oa);
-
- oa->o_blksize = cli_brw_size(exp->exp_obd);
- oa->o_valid |= OBD_MD_FLBLKSZ;
-
- out:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-static int osc_setattr(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa)
-{
- struct ptlrpc_request *req;
- struct ost_body *body;
- int rc;
-
- LASSERT(oa->o_valid & OBD_MD_FLGROUP);
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_SETATTR);
- if (!req)
- return -ENOMEM;
-
- rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SETATTR);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- osc_pack_req_body(req, oa);
-
- ptlrpc_request_set_replen(req);
-
- rc = ptlrpc_queue_wait(req);
- if (rc)
- goto out;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
- if (!body) {
- rc = -EPROTO;
- goto out;
- }
-
- lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oa,
- &body->oa);
-
-out:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-static int osc_setattr_interpret(const struct lu_env *env,
- struct ptlrpc_request *req,
- struct osc_setattr_args *sa, int rc)
-{
- struct ost_body *body;
-
- if (rc != 0)
- goto out;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
- if (!body) {
- rc = -EPROTO;
- goto out;
- }
-
- lustre_get_wire_obdo(&req->rq_import->imp_connect_data, sa->sa_oa,
- &body->oa);
-out:
- rc = sa->sa_upcall(sa->sa_cookie, rc);
- return rc;
-}
-
-int osc_setattr_async(struct obd_export *exp, struct obdo *oa,
- obd_enqueue_update_f upcall, void *cookie,
- struct ptlrpc_request_set *rqset)
-{
- struct ptlrpc_request *req;
- struct osc_setattr_args *sa;
- int rc;
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_SETATTR);
- if (!req)
- return -ENOMEM;
-
- rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SETATTR);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- osc_pack_req_body(req, oa);
-
- ptlrpc_request_set_replen(req);
-
- /* do mds to ost setattr asynchronously */
- if (!rqset) {
- /* Do not wait for response. */
- ptlrpcd_add_req(req);
- } else {
- req->rq_interpret_reply =
- (ptlrpc_interpterer_t)osc_setattr_interpret;
-
- BUILD_BUG_ON(sizeof(*sa) > sizeof(req->rq_async_args));
- sa = ptlrpc_req_async_args(req);
- sa->sa_oa = oa;
- sa->sa_upcall = upcall;
- sa->sa_cookie = cookie;
-
- if (rqset == PTLRPCD_SET)
- ptlrpcd_add_req(req);
- else
- ptlrpc_set_add_req(rqset, req);
- }
-
- return 0;
-}
-
-static int osc_create(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa)
-{
- struct ptlrpc_request *req;
- struct ost_body *body;
- int rc;
-
- LASSERT(oa);
- LASSERT(oa->o_valid & OBD_MD_FLGROUP);
- LASSERT(fid_seq_is_echo(ostid_seq(&oa->o_oi)));
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_CREATE);
- if (!req) {
- rc = -ENOMEM;
- goto out;
- }
-
- rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_CREATE);
- if (rc) {
- ptlrpc_request_free(req);
- goto out;
- }
-
- body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
- LASSERT(body);
-
- lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa);
-
- ptlrpc_request_set_replen(req);
-
- rc = ptlrpc_queue_wait(req);
- if (rc)
- goto out_req;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
- if (!body) {
- rc = -EPROTO;
- goto out_req;
- }
-
- CDEBUG(D_INFO, "oa flags %x\n", oa->o_flags);
- lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oa, &body->oa);
-
- oa->o_blksize = cli_brw_size(exp->exp_obd);
- oa->o_valid |= OBD_MD_FLBLKSZ;
-
- CDEBUG(D_HA, "transno: %lld\n",
- lustre_msg_get_transno(req->rq_repmsg));
-out_req:
- ptlrpc_req_finished(req);
-out:
- return rc;
-}
-
-int osc_punch_base(struct obd_export *exp, struct obdo *oa,
- obd_enqueue_update_f upcall, void *cookie,
- struct ptlrpc_request_set *rqset)
-{
- struct ptlrpc_request *req;
- struct osc_setattr_args *sa;
- struct ost_body *body;
- int rc;
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_PUNCH);
- if (!req)
- return -ENOMEM;
-
- rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_PUNCH);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
- req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */
- ptlrpc_at_set_req_timeout(req);
-
- body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
- LASSERT(body);
- lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa,
- oa);
-
- ptlrpc_request_set_replen(req);
-
- req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_setattr_interpret;
- BUILD_BUG_ON(sizeof(*sa) > sizeof(req->rq_async_args));
- sa = ptlrpc_req_async_args(req);
- sa->sa_oa = oa;
- sa->sa_upcall = upcall;
- sa->sa_cookie = cookie;
- if (rqset == PTLRPCD_SET)
- ptlrpcd_add_req(req);
- else
- ptlrpc_set_add_req(rqset, req);
-
- return 0;
-}
-
-static int osc_sync_interpret(const struct lu_env *env,
- struct ptlrpc_request *req,
- void *arg, int rc)
-{
- struct cl_attr *attr = &osc_env_info(env)->oti_attr;
- struct osc_fsync_args *fa = arg;
- unsigned long valid = 0;
- struct ost_body *body;
- struct cl_object *obj;
-
- if (rc)
- goto out;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
- if (!body) {
- CERROR("can't unpack ost_body\n");
- rc = -EPROTO;
- goto out;
- }
-
- *fa->fa_oa = body->oa;
- obj = osc2cl(fa->fa_obj);
-
- /* Update osc object's blocks attribute */
- cl_object_attr_lock(obj);
- if (body->oa.o_valid & OBD_MD_FLBLOCKS) {
- attr->cat_blocks = body->oa.o_blocks;
- valid |= CAT_BLOCKS;
- }
-
- if (valid)
- cl_object_attr_update(env, obj, attr, valid);
- cl_object_attr_unlock(obj);
-
-out:
- rc = fa->fa_upcall(fa->fa_cookie, rc);
- return rc;
-}
-
-int osc_sync_base(struct osc_object *obj, struct obdo *oa,
- obd_enqueue_update_f upcall, void *cookie,
- struct ptlrpc_request_set *rqset)
-{
- struct obd_export *exp = osc_export(obj);
- struct ptlrpc_request *req;
- struct ost_body *body;
- struct osc_fsync_args *fa;
- int rc;
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_SYNC);
- if (!req)
- return -ENOMEM;
-
- rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SYNC);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- /* overload the size and blocks fields in the oa with start/end */
- body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
- LASSERT(body);
- lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa,
- oa);
-
- ptlrpc_request_set_replen(req);
- req->rq_interpret_reply = osc_sync_interpret;
-
- BUILD_BUG_ON(sizeof(*fa) > sizeof(req->rq_async_args));
- fa = ptlrpc_req_async_args(req);
- fa->fa_obj = obj;
- fa->fa_oa = oa;
- fa->fa_upcall = upcall;
- fa->fa_cookie = cookie;
-
- if (rqset == PTLRPCD_SET)
- ptlrpcd_add_req(req);
- else
- ptlrpc_set_add_req(rqset, req);
-
- return 0;
-}
-
-/* Find and cancel locally locks matched by @mode in the resource found by
- * @objid. Found locks are added into @cancel list. Returns the amount of
- * locks added to @cancels list.
- */
-static int osc_resource_get_unused(struct obd_export *exp, struct obdo *oa,
- struct list_head *cancels,
- enum ldlm_mode mode, __u64 lock_flags)
-{
- struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
- struct ldlm_res_id res_id;
- struct ldlm_resource *res;
- int count;
-
- /* Return, i.e. cancel nothing, only if ELC is supported (flag in
- * export) but disabled through procfs (flag in NS).
- *
- * This distinguishes from a case when ELC is not supported originally,
- * when we still want to cancel locks in advance and just cancel them
- * locally, without sending any RPC.
- */
- if (exp_connect_cancelset(exp) && !ns_connect_cancelset(ns))
- return 0;
-
- ostid_build_res_name(&oa->o_oi, &res_id);
- res = ldlm_resource_get(ns, NULL, &res_id, 0, 0);
- if (IS_ERR(res))
- return 0;
-
- LDLM_RESOURCE_ADDREF(res);
- count = ldlm_cancel_resource_local(res, cancels, NULL, mode,
- lock_flags, 0, NULL);
- LDLM_RESOURCE_DELREF(res);
- ldlm_resource_putref(res);
- return count;
-}
-
-static int osc_destroy_interpret(const struct lu_env *env,
- struct ptlrpc_request *req, void *data,
- int rc)
-{
- struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
-
- atomic_dec(&cli->cl_destroy_in_flight);
- wake_up(&cli->cl_destroy_waitq);
- return 0;
-}
-
-static int osc_can_send_destroy(struct client_obd *cli)
-{
- if (atomic_inc_return(&cli->cl_destroy_in_flight) <=
- cli->cl_max_rpcs_in_flight) {
- /* The destroy request can be sent */
- return 1;
- }
- if (atomic_dec_return(&cli->cl_destroy_in_flight) <
- cli->cl_max_rpcs_in_flight) {
- /*
- * The counter has been modified between the two atomic
- * operations.
- */
- wake_up(&cli->cl_destroy_waitq);
- }
- return 0;
-}
-
-static int osc_destroy(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa)
-{
- struct client_obd *cli = &exp->exp_obd->u.cli;
- struct ptlrpc_request *req;
- struct ost_body *body;
- LIST_HEAD(cancels);
- int rc, count;
-
- if (!oa) {
- CDEBUG(D_INFO, "oa NULL\n");
- return -EINVAL;
- }
-
- count = osc_resource_get_unused(exp, oa, &cancels, LCK_PW,
- LDLM_FL_DISCARD_DATA);
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_DESTROY);
- if (!req) {
- ldlm_lock_list_put(&cancels, l_bl_ast, count);
- return -ENOMEM;
- }
-
- rc = ldlm_prep_elc_req(exp, req, LUSTRE_OST_VERSION, OST_DESTROY,
- 0, &cancels, count);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */
- ptlrpc_at_set_req_timeout(req);
-
- body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
- LASSERT(body);
- lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa);
-
- ptlrpc_request_set_replen(req);
-
- req->rq_interpret_reply = osc_destroy_interpret;
- if (!osc_can_send_destroy(cli)) {
- /*
- * Wait until the number of on-going destroy RPCs drops
- * under max_rpc_in_flight
- */
- l_wait_event_abortable_exclusive(cli->cl_destroy_waitq,
- osc_can_send_destroy(cli));
- }
-
- /* Do not wait for response */
- ptlrpcd_add_req(req);
- return 0;
-}
-
-static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
- long writing_bytes)
-{
- u32 bits = OBD_MD_FLBLOCKS | OBD_MD_FLGRANT;
-
- LASSERT(!(oa->o_valid & bits));
-
- oa->o_valid |= bits;
- spin_lock(&cli->cl_loi_list_lock);
- oa->o_dirty = cli->cl_dirty_pages << PAGE_SHIFT;
- if (unlikely(cli->cl_dirty_pages - cli->cl_dirty_transit >
- cli->cl_dirty_max_pages)) {
- CERROR("dirty %lu - %lu > dirty_max %lu\n",
- cli->cl_dirty_pages, cli->cl_dirty_transit,
- cli->cl_dirty_max_pages);
- oa->o_undirty = 0;
- } else if (unlikely(atomic_long_read(&obd_dirty_pages) -
- atomic_long_read(&obd_dirty_transit_pages) >
- (long)(obd_max_dirty_pages + 1))) {
- /* The atomic_read() allowing the atomic_inc() are
- * not covered by a lock thus they may safely race and trip
- * this CERROR() unless we add in a small fudge factor (+1).
- */
- CERROR("%s: dirty %ld + %ld > system dirty_max %ld\n",
- cli_name(cli), atomic_long_read(&obd_dirty_pages),
- atomic_long_read(&obd_dirty_transit_pages),
- obd_max_dirty_pages);
- oa->o_undirty = 0;
- } else if (unlikely(cli->cl_dirty_max_pages - cli->cl_dirty_pages >
- 0x7fffffff)) {
- CERROR("dirty %lu - dirty_max %lu too big???\n",
- cli->cl_dirty_pages, cli->cl_dirty_max_pages);
- oa->o_undirty = 0;
- } else {
- unsigned long max_in_flight;
-
- max_in_flight = (cli->cl_max_pages_per_rpc << PAGE_SHIFT) *
- (cli->cl_max_rpcs_in_flight + 1);
- oa->o_undirty = max(cli->cl_dirty_max_pages << PAGE_SHIFT,
- max_in_flight);
- }
- oa->o_grant = cli->cl_avail_grant + cli->cl_reserved_grant;
- oa->o_dropped = cli->cl_lost_grant;
- cli->cl_lost_grant = 0;
- spin_unlock(&cli->cl_loi_list_lock);
- CDEBUG(D_CACHE, "dirty: %llu undirty: %u dropped %u grant: %llu\n",
- oa->o_dirty, oa->o_undirty, oa->o_dropped, oa->o_grant);
-}
-
-void osc_update_next_shrink(struct client_obd *cli)
-{
- cli->cl_next_shrink_grant =
- jiffies + cli->cl_grant_shrink_interval * HZ;
- CDEBUG(D_CACHE, "next time %ld to shrink grant\n",
- cli->cl_next_shrink_grant);
-}
-
-static void __osc_update_grant(struct client_obd *cli, u64 grant)
-{
- spin_lock(&cli->cl_loi_list_lock);
- cli->cl_avail_grant += grant;
- spin_unlock(&cli->cl_loi_list_lock);
-}
-
-static void osc_update_grant(struct client_obd *cli, struct ost_body *body)
-{
- if (body->oa.o_valid & OBD_MD_FLGRANT) {
- CDEBUG(D_CACHE, "got %llu extra grant\n", body->oa.o_grant);
- __osc_update_grant(cli, body->oa.o_grant);
- }
-}
-
-static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
- u32 keylen, void *key, u32 vallen,
- void *val, struct ptlrpc_request_set *set);
-
-static int osc_shrink_grant_interpret(const struct lu_env *env,
- struct ptlrpc_request *req,
- void *aa, int rc)
-{
- struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
- struct obdo *oa = ((struct osc_brw_async_args *)aa)->aa_oa;
- struct ost_body *body;
-
- if (rc != 0) {
- __osc_update_grant(cli, oa->o_grant);
- goto out;
- }
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
- LASSERT(body);
- osc_update_grant(cli, body);
-out:
- kmem_cache_free(obdo_cachep, oa);
- return rc;
-}
-
-static void osc_shrink_grant_local(struct client_obd *cli, struct obdo *oa)
-{
- spin_lock(&cli->cl_loi_list_lock);
- oa->o_grant = cli->cl_avail_grant / 4;
- cli->cl_avail_grant -= oa->o_grant;
- spin_unlock(&cli->cl_loi_list_lock);
- if (!(oa->o_valid & OBD_MD_FLFLAGS)) {
- oa->o_valid |= OBD_MD_FLFLAGS;
- oa->o_flags = 0;
- }
- oa->o_flags |= OBD_FL_SHRINK_GRANT;
- osc_update_next_shrink(cli);
-}
-
-/* Shrink the current grant, either from some large amount to enough for a
- * full set of in-flight RPCs, or if we have already shrunk to that limit
- * then to enough for a single RPC. This avoids keeping more grant than
- * needed, and avoids shrinking the grant piecemeal.
- */
-static int osc_shrink_grant(struct client_obd *cli)
-{
- __u64 target_bytes = (cli->cl_max_rpcs_in_flight + 1) *
- (cli->cl_max_pages_per_rpc << PAGE_SHIFT);
-
- spin_lock(&cli->cl_loi_list_lock);
- if (cli->cl_avail_grant <= target_bytes)
- target_bytes = cli->cl_max_pages_per_rpc << PAGE_SHIFT;
- spin_unlock(&cli->cl_loi_list_lock);
-
- return osc_shrink_grant_to_target(cli, target_bytes);
-}
-
-int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes)
-{
- int rc = 0;
- struct ost_body *body;
-
- spin_lock(&cli->cl_loi_list_lock);
- /* Don't shrink if we are already above or below the desired limit
- * We don't want to shrink below a single RPC, as that will negatively
- * impact block allocation and long-term performance.
- */
- if (target_bytes < cli->cl_max_pages_per_rpc << PAGE_SHIFT)
- target_bytes = cli->cl_max_pages_per_rpc << PAGE_SHIFT;
-
- if (target_bytes >= cli->cl_avail_grant) {
- spin_unlock(&cli->cl_loi_list_lock);
- return 0;
- }
- spin_unlock(&cli->cl_loi_list_lock);
-
- body = kzalloc(sizeof(*body), GFP_NOFS);
- if (!body)
- return -ENOMEM;
-
- osc_announce_cached(cli, &body->oa, 0);
-
- spin_lock(&cli->cl_loi_list_lock);
- body->oa.o_grant = cli->cl_avail_grant - target_bytes;
- cli->cl_avail_grant = target_bytes;
- spin_unlock(&cli->cl_loi_list_lock);
- if (!(body->oa.o_valid & OBD_MD_FLFLAGS)) {
- body->oa.o_valid |= OBD_MD_FLFLAGS;
- body->oa.o_flags = 0;
- }
- body->oa.o_flags |= OBD_FL_SHRINK_GRANT;
- osc_update_next_shrink(cli);
-
- rc = osc_set_info_async(NULL, cli->cl_import->imp_obd->obd_self_export,
- sizeof(KEY_GRANT_SHRINK), KEY_GRANT_SHRINK,
- sizeof(*body), body, NULL);
- if (rc != 0)
- __osc_update_grant(cli, body->oa.o_grant);
- kfree(body);
- return rc;
-}
-
-static int osc_should_shrink_grant(struct client_obd *client)
-{
- unsigned long time = jiffies;
- unsigned long next_shrink = client->cl_next_shrink_grant;
-
- if ((client->cl_import->imp_connect_data.ocd_connect_flags &
- OBD_CONNECT_GRANT_SHRINK) == 0)
- return 0;
-
- if (time_after_eq(time, next_shrink - 5)) {
- /* Get the current RPC size directly, instead of going via:
- * cli_brw_size(obd->u.cli.cl_import->imp_obd->obd_self_export)
- * Keep comment here so that it can be found by searching.
- */
- int brw_size = client->cl_max_pages_per_rpc << PAGE_SHIFT;
-
- if (client->cl_import->imp_state == LUSTRE_IMP_FULL &&
- client->cl_avail_grant > brw_size)
- return 1;
-
- osc_update_next_shrink(client);
- }
- return 0;
-}
-
-static int osc_grant_shrink_grant_cb(struct timeout_item *item, void *data)
-{
- struct client_obd *client;
-
- list_for_each_entry(client, &item->ti_obd_list, cl_grant_shrink_list) {
- if (osc_should_shrink_grant(client))
- osc_shrink_grant(client);
- }
- return 0;
-}
-
-static int osc_add_shrink_grant(struct client_obd *client)
-{
- int rc;
-
- rc = ptlrpc_add_timeout_client(client->cl_grant_shrink_interval,
- TIMEOUT_GRANT,
- osc_grant_shrink_grant_cb, NULL,
- &client->cl_grant_shrink_list);
- if (rc) {
- CERROR("add grant client %s error %d\n", cli_name(client), rc);
- return rc;
- }
- CDEBUG(D_CACHE, "add grant client %s\n", cli_name(client));
- osc_update_next_shrink(client);
- return 0;
-}
-
-static int osc_del_shrink_grant(struct client_obd *client)
-{
- return ptlrpc_del_timeout_client(&client->cl_grant_shrink_list,
- TIMEOUT_GRANT);
-}
-
-static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd)
-{
- /*
- * ocd_grant is the total grant amount we're expect to hold: if we've
- * been evicted, it's the new avail_grant amount, cl_dirty_pages will
- * drop to 0 as inflight RPCs fail out; otherwise, it's avail_grant +
- * dirty.
- *
- * race is tolerable here: if we're evicted, but imp_state already
- * left EVICTED state, then cl_dirty_pages must be 0 already.
- */
- spin_lock(&cli->cl_loi_list_lock);
- if (cli->cl_import->imp_state == LUSTRE_IMP_EVICTED)
- cli->cl_avail_grant = ocd->ocd_grant;
- else
- cli->cl_avail_grant = ocd->ocd_grant -
- (cli->cl_dirty_pages << PAGE_SHIFT);
-
- /* determine the appropriate chunk size used by osc_extent. */
- cli->cl_chunkbits = max_t(int, PAGE_SHIFT, ocd->ocd_blocksize);
- spin_unlock(&cli->cl_loi_list_lock);
-
- CDEBUG(D_CACHE, "%s, setting cl_avail_grant: %ld cl_lost_grant: %ld chunk bits: %d\n",
- cli_name(cli), cli->cl_avail_grant, cli->cl_lost_grant,
- cli->cl_chunkbits);
-
- if (ocd->ocd_connect_flags & OBD_CONNECT_GRANT_SHRINK &&
- list_empty(&cli->cl_grant_shrink_list))
- osc_add_shrink_grant(cli);
-}
-
-/* We assume that the reason this OSC got a short read is because it read
- * beyond the end of a stripe file; i.e. lustre is reading a sparse file
- * via the LOV, and it _knows_ it's reading inside the file, it's just that
- * this stripe never got written at or beyond this stripe offset yet.
- */
-static void handle_short_read(int nob_read, u32 page_count,
- struct brw_page **pga)
-{
- char *ptr;
- int i = 0;
-
- /* skip bytes read OK */
- while (nob_read > 0) {
- LASSERT(page_count > 0);
-
- if (pga[i]->count > nob_read) {
- /* EOF inside this page */
- ptr = kmap(pga[i]->pg) +
- (pga[i]->off & ~PAGE_MASK);
- memset(ptr + nob_read, 0, pga[i]->count - nob_read);
- kunmap(pga[i]->pg);
- page_count--;
- i++;
- break;
- }
-
- nob_read -= pga[i]->count;
- page_count--;
- i++;
- }
-
- /* zero remaining pages */
- while (page_count-- > 0) {
- ptr = kmap(pga[i]->pg) + (pga[i]->off & ~PAGE_MASK);
- memset(ptr, 0, pga[i]->count);
- kunmap(pga[i]->pg);
- i++;
- }
-}
-
-static int check_write_rcs(struct ptlrpc_request *req,
- int requested_nob, int niocount,
- u32 page_count, struct brw_page **pga)
-{
- int i;
- __u32 *remote_rcs;
-
- remote_rcs = req_capsule_server_sized_get(&req->rq_pill, &RMF_RCS,
- sizeof(*remote_rcs) *
- niocount);
- if (!remote_rcs) {
- CDEBUG(D_INFO, "Missing/short RC vector on BRW_WRITE reply\n");
- return -EPROTO;
- }
-
- /* return error if any niobuf was in error */
- for (i = 0; i < niocount; i++) {
- if ((int)remote_rcs[i] < 0)
- return remote_rcs[i];
-
- if (remote_rcs[i] != 0) {
- CDEBUG(D_INFO, "rc[%d] invalid (%d) req %p\n",
- i, remote_rcs[i], req);
- return -EPROTO;
- }
- }
-
- if (req->rq_bulk->bd_nob_transferred != requested_nob) {
- CERROR("Unexpected # bytes transferred: %d (requested %d)\n",
- req->rq_bulk->bd_nob_transferred, requested_nob);
- return -EPROTO;
- }
-
- return 0;
-}
-
-static inline int can_merge_pages(struct brw_page *p1, struct brw_page *p2)
-{
- if (p1->flag != p2->flag) {
- unsigned int mask = ~(OBD_BRW_FROM_GRANT | OBD_BRW_NOCACHE |
- OBD_BRW_SYNC | OBD_BRW_ASYNC |
- OBD_BRW_NOQUOTA | OBD_BRW_SOFT_SYNC);
-
- /* warn if we try to combine flags that we don't know to be
- * safe to combine
- */
- if (unlikely((p1->flag & mask) != (p2->flag & mask))) {
- CWARN("Saw flags 0x%x and 0x%x in the same brw, please report this at http://bugs.whamcloud.com/\n",
- p1->flag, p2->flag);
- }
- return 0;
- }
-
- return (p1->off + p1->count == p2->off);
-}
-
-static u32 osc_checksum_bulk(int nob, u32 pg_count,
- struct brw_page **pga, int opc,
- enum cksum_type cksum_type)
-{
- __u32 cksum;
- int i = 0;
- struct ahash_request *hdesc;
- unsigned int bufsize;
- unsigned char cfs_alg = cksum_obd2cfs(cksum_type);
-
- LASSERT(pg_count > 0);
-
- hdesc = cfs_crypto_hash_init(cfs_alg, NULL, 0);
- if (IS_ERR(hdesc)) {
- CERROR("Unable to initialize checksum hash %s\n",
- cfs_crypto_hash_name(cfs_alg));
- return PTR_ERR(hdesc);
- }
-
- while (nob > 0 && pg_count > 0) {
- unsigned int count = pga[i]->count > nob ? nob : pga[i]->count;
-
- /* corrupt the data before we compute the checksum, to
- * simulate an OST->client data error
- */
- if (i == 0 && opc == OST_READ &&
- OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_RECEIVE)) {
- unsigned char *ptr = kmap(pga[i]->pg);
- int off = pga[i]->off & ~PAGE_MASK;
-
- memcpy(ptr + off, "bad1", min_t(typeof(nob), 4, nob));
- kunmap(pga[i]->pg);
- }
- cfs_crypto_hash_update_page(hdesc, pga[i]->pg,
- pga[i]->off & ~PAGE_MASK,
- count);
- CDEBUG(D_PAGE,
- "page %p map %p index %lu flags %lx count %u priv %0lx: off %d\n",
- pga[i]->pg, pga[i]->pg->mapping, pga[i]->pg->index,
- (long)pga[i]->pg->flags, page_count(pga[i]->pg),
- page_private(pga[i]->pg),
- (int)(pga[i]->off & ~PAGE_MASK));
-
- nob -= pga[i]->count;
- pg_count--;
- i++;
- }
-
- bufsize = sizeof(cksum);
- cfs_crypto_hash_final(hdesc, (unsigned char *)&cksum, &bufsize);
-
- /* For sending we only compute the wrong checksum instead
- * of corrupting the data so it is still correct on a redo
- */
- if (opc == OST_WRITE && OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_SEND))
- cksum++;
-
- return cksum;
-}
-
-static int osc_brw_prep_request(int cmd, struct client_obd *cli,
- struct obdo *oa, u32 page_count,
- struct brw_page **pga,
- struct ptlrpc_request **reqp,
- int reserve,
- int resend)
-{
- struct ptlrpc_request *req;
- struct ptlrpc_bulk_desc *desc;
- struct ost_body *body;
- struct obd_ioobj *ioobj;
- struct niobuf_remote *niobuf;
- int niocount, i, requested_nob, opc, rc;
- struct osc_brw_async_args *aa;
- struct req_capsule *pill;
- struct brw_page *pg_prev;
-
- if (OBD_FAIL_CHECK(OBD_FAIL_OSC_BRW_PREP_REQ))
- return -ENOMEM; /* Recoverable */
- if (OBD_FAIL_CHECK(OBD_FAIL_OSC_BRW_PREP_REQ2))
- return -EINVAL; /* Fatal */
-
- if ((cmd & OBD_BRW_WRITE) != 0) {
- opc = OST_WRITE;
- req = ptlrpc_request_alloc_pool(cli->cl_import,
- osc_rq_pool,
- &RQF_OST_BRW_WRITE);
- } else {
- opc = OST_READ;
- req = ptlrpc_request_alloc(cli->cl_import, &RQF_OST_BRW_READ);
- }
- if (!req)
- return -ENOMEM;
-
- for (niocount = i = 1; i < page_count; i++) {
- if (!can_merge_pages(pga[i - 1], pga[i]))
- niocount++;
- }
-
- pill = &req->rq_pill;
- req_capsule_set_size(pill, &RMF_OBD_IOOBJ, RCL_CLIENT,
- sizeof(*ioobj));
- req_capsule_set_size(pill, &RMF_NIOBUF_REMOTE, RCL_CLIENT,
- niocount * sizeof(*niobuf));
-
- rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, opc);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
- req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */
- ptlrpc_at_set_req_timeout(req);
- /* ask ptlrpc not to resend on EINPROGRESS since BRWs have their own
- * retry logic
- */
- req->rq_no_retry_einprogress = 1;
-
- desc = ptlrpc_prep_bulk_imp(req, page_count,
- cli->cl_import->imp_connect_data.ocd_brw_size >> LNET_MTU_BITS,
- (opc == OST_WRITE ? PTLRPC_BULK_GET_SOURCE :
- PTLRPC_BULK_PUT_SINK) | PTLRPC_BULK_BUF_KIOV, OST_BULK_PORTAL,
- &ptlrpc_bulk_kiov_pin_ops);
-
- if (!desc) {
- rc = -ENOMEM;
- goto out;
- }
- /* NB request now owns desc and will free it when it gets freed */
-
- body = req_capsule_client_get(pill, &RMF_OST_BODY);
- ioobj = req_capsule_client_get(pill, &RMF_OBD_IOOBJ);
- niobuf = req_capsule_client_get(pill, &RMF_NIOBUF_REMOTE);
- LASSERT(body && ioobj && niobuf);
-
- lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa);
-
- obdo_to_ioobj(oa, ioobj);
- ioobj->ioo_bufcnt = niocount;
- /* The high bits of ioo_max_brw tells server _maximum_ number of bulks
- * that might be send for this request. The actual number is decided
- * when the RPC is finally sent in ptlrpc_register_bulk(). It sends
- * "max - 1" for old client compatibility sending "0", and also so the
- * the actual maximum is a power-of-two number, not one less. LU-1431
- */
- ioobj_max_brw_set(ioobj, desc->bd_md_max_brw);
- LASSERT(page_count > 0);
- pg_prev = pga[0];
- for (requested_nob = i = 0; i < page_count; i++, niobuf++) {
- struct brw_page *pg = pga[i];
- int poff = pg->off & ~PAGE_MASK;
-
- LASSERT(pg->count > 0);
- /* make sure there is no gap in the middle of page array */
- LASSERTF(page_count == 1 ||
- (ergo(i == 0, poff + pg->count == PAGE_SIZE) &&
- ergo(i > 0 && i < page_count - 1,
- poff == 0 && pg->count == PAGE_SIZE) &&
- ergo(i == page_count - 1, poff == 0)),
- "i: %d/%d pg: %p off: %llu, count: %u\n",
- i, page_count, pg, pg->off, pg->count);
- LASSERTF(i == 0 || pg->off > pg_prev->off,
- "i %d p_c %u pg %p [pri %lu ind %lu] off %llu prev_pg %p [pri %lu ind %lu] off %llu\n",
- i, page_count,
- pg->pg, page_private(pg->pg), pg->pg->index, pg->off,
- pg_prev->pg, page_private(pg_prev->pg),
- pg_prev->pg->index, pg_prev->off);
- LASSERT((pga[0]->flag & OBD_BRW_SRVLOCK) ==
- (pg->flag & OBD_BRW_SRVLOCK));
-
- desc->bd_frag_ops->add_kiov_frag(desc, pg->pg, poff, pg->count);
- requested_nob += pg->count;
-
- if (i > 0 && can_merge_pages(pg_prev, pg)) {
- niobuf--;
- niobuf->rnb_len += pg->count;
- } else {
- niobuf->rnb_offset = pg->off;
- niobuf->rnb_len = pg->count;
- niobuf->rnb_flags = pg->flag;
- }
- pg_prev = pg;
- }
-
- LASSERTF((void *)(niobuf - niocount) ==
- req_capsule_client_get(&req->rq_pill, &RMF_NIOBUF_REMOTE),
- "want %p - real %p\n", req_capsule_client_get(&req->rq_pill,
- &RMF_NIOBUF_REMOTE), (void *)(niobuf - niocount));
-
- osc_announce_cached(cli, &body->oa, opc == OST_WRITE ? requested_nob:0);
- if (resend) {
- if ((body->oa.o_valid & OBD_MD_FLFLAGS) == 0) {
- body->oa.o_valid |= OBD_MD_FLFLAGS;
- body->oa.o_flags = 0;
- }
- body->oa.o_flags |= OBD_FL_RECOV_RESEND;
- }
-
- if (osc_should_shrink_grant(cli))
- osc_shrink_grant_local(cli, &body->oa);
-
- /* size[REQ_REC_OFF] still sizeof (*body) */
- if (opc == OST_WRITE) {
- if (cli->cl_checksum &&
- !sptlrpc_flavor_has_bulk(&req->rq_flvr)) {
- /* store cl_cksum_type in a local variable since
- * it can be changed via lprocfs
- */
- enum cksum_type cksum_type = cli->cl_cksum_type;
-
- if ((body->oa.o_valid & OBD_MD_FLFLAGS) == 0) {
- oa->o_flags &= OBD_FL_LOCAL_MASK;
- body->oa.o_flags = 0;
- }
- body->oa.o_flags |= cksum_type_pack(cksum_type);
- body->oa.o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
- body->oa.o_cksum = osc_checksum_bulk(requested_nob,
- page_count, pga,
- OST_WRITE,
- cksum_type);
- CDEBUG(D_PAGE, "checksum at write origin: %x\n",
- body->oa.o_cksum);
- /* save this in 'oa', too, for later checking */
- oa->o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
- oa->o_flags |= cksum_type_pack(cksum_type);
- } else {
- /* clear out the checksum flag, in case this is a
- * resend but cl_checksum is no longer set. b=11238
- */
- oa->o_valid &= ~OBD_MD_FLCKSUM;
- }
- oa->o_cksum = body->oa.o_cksum;
- /* 1 RC per niobuf */
- req_capsule_set_size(pill, &RMF_RCS, RCL_SERVER,
- sizeof(__u32) * niocount);
- } else {
- if (cli->cl_checksum &&
- !sptlrpc_flavor_has_bulk(&req->rq_flvr)) {
- if ((body->oa.o_valid & OBD_MD_FLFLAGS) == 0)
- body->oa.o_flags = 0;
- body->oa.o_flags |= cksum_type_pack(cli->cl_cksum_type);
- body->oa.o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
- }
- }
- ptlrpc_request_set_replen(req);
-
- BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args));
- aa = ptlrpc_req_async_args(req);
- aa->aa_oa = oa;
- aa->aa_requested_nob = requested_nob;
- aa->aa_nio_count = niocount;
- aa->aa_page_count = page_count;
- aa->aa_resends = 0;
- aa->aa_ppga = pga;
- aa->aa_cli = cli;
- INIT_LIST_HEAD(&aa->aa_oaps);
-
- *reqp = req;
- niobuf = req_capsule_client_get(pill, &RMF_NIOBUF_REMOTE);
- CDEBUG(D_RPCTRACE, "brw rpc %p - object " DOSTID " offset %lld<>%lld\n",
- req, POSTID(&oa->o_oi), niobuf[0].rnb_offset,
- niobuf[niocount - 1].rnb_offset + niobuf[niocount - 1].rnb_len);
-
- return 0;
-
- out:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-static int check_write_checksum(struct obdo *oa,
- const struct lnet_process_id *peer,
- __u32 client_cksum, __u32 server_cksum, int nob,
- u32 page_count, struct brw_page **pga,
- enum cksum_type client_cksum_type)
-{
- __u32 new_cksum;
- char *msg;
- enum cksum_type cksum_type;
-
- if (server_cksum == client_cksum) {
- CDEBUG(D_PAGE, "checksum %x confirmed\n", client_cksum);
- return 0;
- }
-
- cksum_type = cksum_type_unpack(oa->o_valid & OBD_MD_FLFLAGS ?
- oa->o_flags : 0);
- new_cksum = osc_checksum_bulk(nob, page_count, pga, OST_WRITE,
- cksum_type);
-
- if (cksum_type != client_cksum_type)
- msg = "the server did not use the checksum type specified in the original request - likely a protocol problem"
- ;
- else if (new_cksum == server_cksum)
- msg = "changed on the client after we checksummed it - likely false positive due to mmap IO (bug 11742)"
- ;
- else if (new_cksum == client_cksum)
- msg = "changed in transit before arrival at OST";
- else
- msg = "changed in transit AND doesn't match the original - likely false positive due to mmap IO (bug 11742)"
- ;
-
- LCONSOLE_ERROR_MSG(0x132, "BAD WRITE CHECKSUM: %s: from %s inode " DFID " object " DOSTID " extent [%llu-%llu]\n",
- msg, libcfs_nid2str(peer->nid),
- oa->o_valid & OBD_MD_FLFID ? oa->o_parent_seq : (__u64)0,
- oa->o_valid & OBD_MD_FLFID ? oa->o_parent_oid : 0,
- oa->o_valid & OBD_MD_FLFID ? oa->o_parent_ver : 0,
- POSTID(&oa->o_oi), pga[0]->off,
- pga[page_count - 1]->off +
- pga[page_count - 1]->count - 1);
- CERROR("original client csum %x (type %x), server csum %x (type %x), client csum now %x\n",
- client_cksum, client_cksum_type,
- server_cksum, cksum_type, new_cksum);
- return 1;
-}
-
-/* Note rc enters this function as number of bytes transferred */
-static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
-{
- struct osc_brw_async_args *aa = (void *)&req->rq_async_args;
- const struct lnet_process_id *peer =
- &req->rq_import->imp_connection->c_peer;
- struct client_obd *cli = aa->aa_cli;
- struct ost_body *body;
- __u32 client_cksum = 0;
-
- if (rc < 0 && rc != -EDQUOT) {
- DEBUG_REQ(D_INFO, req, "Failed request with rc = %d\n", rc);
- return rc;
- }
-
- LASSERTF(req->rq_repmsg, "rc = %d\n", rc);
- body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
- if (!body) {
- DEBUG_REQ(D_INFO, req, "Can't unpack body\n");
- return -EPROTO;
- }
-
- /* set/clear over quota flag for a uid/gid */
- if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE &&
- body->oa.o_valid & (OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA)) {
- unsigned int qid[MAXQUOTAS] = { body->oa.o_uid, body->oa.o_gid };
-
- CDEBUG(D_QUOTA, "setdq for [%u %u] with valid %#llx, flags %x\n",
- body->oa.o_uid, body->oa.o_gid, body->oa.o_valid,
- body->oa.o_flags);
- osc_quota_setdq(cli, qid, body->oa.o_valid, body->oa.o_flags);
- }
-
- osc_update_grant(cli, body);
-
- if (rc < 0)
- return rc;
-
- if (aa->aa_oa->o_valid & OBD_MD_FLCKSUM)
- client_cksum = aa->aa_oa->o_cksum; /* save for later */
-
- if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE) {
- if (rc > 0) {
- CERROR("Unexpected +ve rc %d\n", rc);
- return -EPROTO;
- }
- LASSERT(req->rq_bulk->bd_nob == aa->aa_requested_nob);
-
- if (sptlrpc_cli_unwrap_bulk_write(req, req->rq_bulk))
- return -EAGAIN;
-
- if ((aa->aa_oa->o_valid & OBD_MD_FLCKSUM) && client_cksum &&
- check_write_checksum(&body->oa, peer, client_cksum,
- body->oa.o_cksum, aa->aa_requested_nob,
- aa->aa_page_count, aa->aa_ppga,
- cksum_type_unpack(aa->aa_oa->o_flags)))
- return -EAGAIN;
-
- rc = check_write_rcs(req, aa->aa_requested_nob,
- aa->aa_nio_count,
- aa->aa_page_count, aa->aa_ppga);
- goto out;
- }
-
- /* The rest of this function executes only for OST_READs */
-
- /* if unwrap_bulk failed, return -EAGAIN to retry */
- rc = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk, rc);
- if (rc < 0) {
- rc = -EAGAIN;
- goto out;
- }
-
- if (rc > aa->aa_requested_nob) {
- CERROR("Unexpected rc %d (%d requested)\n", rc,
- aa->aa_requested_nob);
- return -EPROTO;
- }
-
- if (rc != req->rq_bulk->bd_nob_transferred) {
- CERROR("Unexpected rc %d (%d transferred)\n",
- rc, req->rq_bulk->bd_nob_transferred);
- return -EPROTO;
- }
-
- if (rc < aa->aa_requested_nob)
- handle_short_read(rc, aa->aa_page_count, aa->aa_ppga);
-
- if (body->oa.o_valid & OBD_MD_FLCKSUM) {
- static int cksum_counter;
- __u32 server_cksum = body->oa.o_cksum;
- char *via = "";
- char *router = "";
- enum cksum_type cksum_type;
-
- cksum_type = cksum_type_unpack(body->oa.o_valid &
- OBD_MD_FLFLAGS ?
- body->oa.o_flags : 0);
- client_cksum = osc_checksum_bulk(rc, aa->aa_page_count,
- aa->aa_ppga, OST_READ,
- cksum_type);
-
- if (peer->nid != req->rq_bulk->bd_sender) {
- via = " via ";
- router = libcfs_nid2str(req->rq_bulk->bd_sender);
- }
-
- if (server_cksum != client_cksum) {
- LCONSOLE_ERROR_MSG(0x133, "%s: BAD READ CHECKSUM: from %s%s%s inode " DFID " object " DOSTID " extent [%llu-%llu]\n",
- req->rq_import->imp_obd->obd_name,
- libcfs_nid2str(peer->nid),
- via, router,
- body->oa.o_valid & OBD_MD_FLFID ?
- body->oa.o_parent_seq : (__u64)0,
- body->oa.o_valid & OBD_MD_FLFID ?
- body->oa.o_parent_oid : 0,
- body->oa.o_valid & OBD_MD_FLFID ?
- body->oa.o_parent_ver : 0,
- POSTID(&body->oa.o_oi),
- aa->aa_ppga[0]->off,
- aa->aa_ppga[aa->aa_page_count-1]->off +
- aa->aa_ppga[aa->aa_page_count-1]->count -
- 1);
- CERROR("client %x, server %x, cksum_type %x\n",
- client_cksum, server_cksum, cksum_type);
- cksum_counter = 0;
- aa->aa_oa->o_cksum = client_cksum;
- rc = -EAGAIN;
- } else {
- cksum_counter++;
- CDEBUG(D_PAGE, "checksum %x confirmed\n", client_cksum);
- rc = 0;
- }
- } else if (unlikely(client_cksum)) {
- static int cksum_missed;
-
- cksum_missed++;
- if ((cksum_missed & (-cksum_missed)) == cksum_missed)
- CERROR("Checksum %u requested from %s but not sent\n",
- cksum_missed, libcfs_nid2str(peer->nid));
- } else {
- rc = 0;
- }
-out:
- if (rc >= 0)
- lustre_get_wire_obdo(&req->rq_import->imp_connect_data,
- aa->aa_oa, &body->oa);
-
- return rc;
-}
-
-static int osc_brw_redo_request(struct ptlrpc_request *request,
- struct osc_brw_async_args *aa, int rc)
-{
- struct ptlrpc_request *new_req;
- struct osc_brw_async_args *new_aa;
- struct osc_async_page *oap;
-
- DEBUG_REQ(rc == -EINPROGRESS ? D_RPCTRACE : D_ERROR, request,
- "redo for recoverable error %d", rc);
-
- rc = osc_brw_prep_request(lustre_msg_get_opc(request->rq_reqmsg) ==
- OST_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ,
- aa->aa_cli, aa->aa_oa,
- aa->aa_page_count, aa->aa_ppga,
- &new_req, 0, 1);
- if (rc)
- return rc;
-
- list_for_each_entry(oap, &aa->aa_oaps, oap_rpc_item) {
- if (oap->oap_request) {
- LASSERTF(request == oap->oap_request,
- "request %p != oap_request %p\n",
- request, oap->oap_request);
- if (oap->oap_interrupted) {
- ptlrpc_req_finished(new_req);
- return -EINTR;
- }
- }
- }
- /* New request takes over pga and oaps from old request.
- * Note that copying a list_head doesn't work, need to move it...
- */
- aa->aa_resends++;
- new_req->rq_interpret_reply = request->rq_interpret_reply;
- new_req->rq_async_args = request->rq_async_args;
- new_req->rq_commit_cb = request->rq_commit_cb;
- /* cap resend delay to the current request timeout, this is similar to
- * what ptlrpc does (see after_reply())
- */
- if (aa->aa_resends > new_req->rq_timeout)
- new_req->rq_sent = ktime_get_real_seconds() + new_req->rq_timeout;
- else
- new_req->rq_sent = ktime_get_real_seconds() + aa->aa_resends;
- new_req->rq_generation_set = 1;
- new_req->rq_import_generation = request->rq_import_generation;
-
- new_aa = ptlrpc_req_async_args(new_req);
-
- INIT_LIST_HEAD(&new_aa->aa_oaps);
- list_splice_init(&aa->aa_oaps, &new_aa->aa_oaps);
- INIT_LIST_HEAD(&new_aa->aa_exts);
- list_splice_init(&aa->aa_exts, &new_aa->aa_exts);
- new_aa->aa_resends = aa->aa_resends;
-
- list_for_each_entry(oap, &new_aa->aa_oaps, oap_rpc_item) {
- if (oap->oap_request) {
- ptlrpc_req_finished(oap->oap_request);
- oap->oap_request = ptlrpc_request_addref(new_req);
- }
- }
-
- /* XXX: This code will run into problem if we're going to support
- * to add a series of BRW RPCs into a self-defined ptlrpc_request_set
- * and wait for all of them to be finished. We should inherit request
- * set from old request.
- */
- ptlrpcd_add_req(new_req);
-
- DEBUG_REQ(D_INFO, new_req, "new request");
- return 0;
-}
-
-/*
- * ugh, we want disk allocation on the target to happen in offset order. we'll
- * follow sedgewicks advice and stick to the dead simple shellsort -- it'll do
- * fine for our small page arrays and doesn't require allocation. its an
- * insertion sort that swaps elements that are strides apart, shrinking the
- * stride down until its '1' and the array is sorted.
- */
-static void sort_brw_pages(struct brw_page **array, int num)
-{
- int stride, i, j;
- struct brw_page *tmp;
-
- if (num == 1)
- return;
- for (stride = 1; stride < num ; stride = (stride * 3) + 1)
- ;
-
- do {
- stride /= 3;
- for (i = stride ; i < num ; i++) {
- tmp = array[i];
- j = i;
- while (j >= stride && array[j - stride]->off > tmp->off) {
- array[j] = array[j - stride];
- j -= stride;
- }
- array[j] = tmp;
- }
- } while (stride > 1);
-}
-
-static void osc_release_ppga(struct brw_page **ppga, u32 count)
-{
- LASSERT(ppga);
- kfree(ppga);
-}
-
-static int brw_interpret(const struct lu_env *env,
- struct ptlrpc_request *req, void *data, int rc)
-{
- struct osc_brw_async_args *aa = data;
- struct osc_extent *ext;
- struct osc_extent *tmp;
- struct client_obd *cli = aa->aa_cli;
-
- rc = osc_brw_fini_request(req, rc);
- CDEBUG(D_INODE, "request %p aa %p rc %d\n", req, aa, rc);
- /* When server return -EINPROGRESS, client should always retry
- * regardless of the number of times the bulk was resent already.
- */
- if (osc_recoverable_error(rc)) {
- if (req->rq_import_generation !=
- req->rq_import->imp_generation) {
- CDEBUG(D_HA, "%s: resend cross eviction for object: " DOSTID ", rc = %d.\n",
- req->rq_import->imp_obd->obd_name,
- POSTID(&aa->aa_oa->o_oi), rc);
- } else if (rc == -EINPROGRESS ||
- client_should_resend(aa->aa_resends, aa->aa_cli)) {
- rc = osc_brw_redo_request(req, aa, rc);
- } else {
- CERROR("%s: too many resent retries for object: %llu:%llu, rc = %d.\n",
- req->rq_import->imp_obd->obd_name,
- POSTID(&aa->aa_oa->o_oi), rc);
- }
-
- if (rc == 0)
- return 0;
- else if (rc == -EAGAIN || rc == -EINPROGRESS)
- rc = -EIO;
- }
-
- if (rc == 0) {
- struct obdo *oa = aa->aa_oa;
- struct cl_attr *attr = &osc_env_info(env)->oti_attr;
- unsigned long valid = 0;
- struct cl_object *obj;
- struct osc_async_page *last;
-
- last = brw_page2oap(aa->aa_ppga[aa->aa_page_count - 1]);
- obj = osc2cl(last->oap_obj);
-
- cl_object_attr_lock(obj);
- if (oa->o_valid & OBD_MD_FLBLOCKS) {
- attr->cat_blocks = oa->o_blocks;
- valid |= CAT_BLOCKS;
- }
- if (oa->o_valid & OBD_MD_FLMTIME) {
- attr->cat_mtime = oa->o_mtime;
- valid |= CAT_MTIME;
- }
- if (oa->o_valid & OBD_MD_FLATIME) {
- attr->cat_atime = oa->o_atime;
- valid |= CAT_ATIME;
- }
- if (oa->o_valid & OBD_MD_FLCTIME) {
- attr->cat_ctime = oa->o_ctime;
- valid |= CAT_CTIME;
- }
-
- if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE) {
- struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo;
- loff_t last_off = last->oap_count + last->oap_obj_off +
- last->oap_page_off;
-
- /* Change file size if this is an out of quota or
- * direct IO write and it extends the file size
- */
- if (loi->loi_lvb.lvb_size < last_off) {
- attr->cat_size = last_off;
- valid |= CAT_SIZE;
- }
- /* Extend KMS if it's not a lockless write */
- if (loi->loi_kms < last_off &&
- oap2osc_page(last)->ops_srvlock == 0) {
- attr->cat_kms = last_off;
- valid |= CAT_KMS;
- }
- }
-
- if (valid != 0)
- cl_object_attr_update(env, obj, attr, valid);
- cl_object_attr_unlock(obj);
- }
- kmem_cache_free(obdo_cachep, aa->aa_oa);
-
- if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE && rc == 0)
- osc_inc_unstable_pages(req);
-
- list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) {
- list_del_init(&ext->oe_link);
- osc_extent_finish(env, ext, 1, rc);
- }
- LASSERT(list_empty(&aa->aa_exts));
- LASSERT(list_empty(&aa->aa_oaps));
-
- osc_release_ppga(aa->aa_ppga, aa->aa_page_count);
- ptlrpc_lprocfs_brw(req, req->rq_bulk->bd_nob_transferred);
-
- spin_lock(&cli->cl_loi_list_lock);
- /* We need to decrement before osc_ap_completion->osc_wake_cache_waiters
- * is called so we know whether to go to sync BRWs or wait for more
- * RPCs to complete
- */
- if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE)
- cli->cl_w_in_flight--;
- else
- cli->cl_r_in_flight--;
- osc_wake_cache_waiters(cli);
- spin_unlock(&cli->cl_loi_list_lock);
-
- osc_io_unplug(env, cli, NULL);
- return rc;
-}
-
-static void brw_commit(struct ptlrpc_request *req)
-{
- /*
- * If osc_inc_unstable_pages (via osc_extent_finish) races with
- * this called via the rq_commit_cb, I need to ensure
- * osc_dec_unstable_pages is still called. Otherwise unstable
- * pages may be leaked.
- */
- spin_lock(&req->rq_lock);
- if (unlikely(req->rq_unstable)) {
- req->rq_unstable = 0;
- spin_unlock(&req->rq_lock);
- osc_dec_unstable_pages(req);
- } else {
- req->rq_committed = 1;
- spin_unlock(&req->rq_lock);
- }
-}
-
-/**
- * Build an RPC by the list of extent @ext_list. The caller must ensure
- * that the total pages in this list are NOT over max pages per RPC.
- * Extents in the list must be in OES_RPC state.
- */
-int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
- struct list_head *ext_list, int cmd)
-{
- struct ptlrpc_request *req = NULL;
- struct osc_extent *ext;
- struct brw_page **pga = NULL;
- struct osc_brw_async_args *aa = NULL;
- struct obdo *oa = NULL;
- struct osc_async_page *oap;
- struct osc_object *obj = NULL;
- struct cl_req_attr *crattr = NULL;
- u64 starting_offset = OBD_OBJECT_EOF;
- u64 ending_offset = 0;
- unsigned int mpflag = 0;
- int mem_tight = 0;
- int page_count = 0;
- bool soft_sync = false;
- bool interrupted = false;
- int i;
- int rc;
- struct ost_body *body;
- LIST_HEAD(rpc_list);
-
- LASSERT(!list_empty(ext_list));
-
- /* add pages into rpc_list to build BRW rpc */
- list_for_each_entry(ext, ext_list, oe_link) {
- LASSERT(ext->oe_state == OES_RPC);
- mem_tight |= ext->oe_memalloc;
- page_count += ext->oe_nr_pages;
- if (!obj)
- obj = ext->oe_obj;
- }
-
- soft_sync = osc_over_unstable_soft_limit(cli);
- if (mem_tight)
- mpflag = memalloc_noreclaim_save();
-
- pga = kcalloc(page_count, sizeof(*pga), GFP_NOFS);
- if (!pga) {
- rc = -ENOMEM;
- goto out;
- }
-
- oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
- if (!oa) {
- rc = -ENOMEM;
- goto out;
- }
-
- i = 0;
- list_for_each_entry(ext, ext_list, oe_link) {
- list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
- if (mem_tight)
- oap->oap_brw_flags |= OBD_BRW_MEMALLOC;
- if (soft_sync)
- oap->oap_brw_flags |= OBD_BRW_SOFT_SYNC;
- pga[i] = &oap->oap_brw_page;
- pga[i]->off = oap->oap_obj_off + oap->oap_page_off;
- i++;
-
- list_add_tail(&oap->oap_rpc_item, &rpc_list);
- if (starting_offset == OBD_OBJECT_EOF ||
- starting_offset > oap->oap_obj_off)
- starting_offset = oap->oap_obj_off;
- else
- LASSERT(!oap->oap_page_off);
- if (ending_offset < oap->oap_obj_off + oap->oap_count)
- ending_offset = oap->oap_obj_off +
- oap->oap_count;
- else
- LASSERT(oap->oap_page_off + oap->oap_count ==
- PAGE_SIZE);
- if (oap->oap_interrupted)
- interrupted = true;
- }
- }
-
- /* first page in the list */
- oap = list_entry(rpc_list.next, typeof(*oap), oap_rpc_item);
-
- crattr = &osc_env_info(env)->oti_req_attr;
- memset(crattr, 0, sizeof(*crattr));
- crattr->cra_type = (cmd & OBD_BRW_WRITE) ? CRT_WRITE : CRT_READ;
- crattr->cra_flags = ~0ULL;
- crattr->cra_page = oap2cl_page(oap);
- crattr->cra_oa = oa;
- cl_req_attr_set(env, osc2cl(obj), crattr);
-
- sort_brw_pages(pga, page_count);
- rc = osc_brw_prep_request(cmd, cli, oa, page_count, pga, &req, 1, 0);
- if (rc != 0) {
- CERROR("prep_req failed: %d\n", rc);
- goto out;
- }
-
- req->rq_commit_cb = brw_commit;
- req->rq_interpret_reply = brw_interpret;
-
- req->rq_memalloc = mem_tight != 0;
- oap->oap_request = ptlrpc_request_addref(req);
- if (interrupted && !req->rq_intr)
- ptlrpc_mark_interrupted(req);
-
- /* Need to update the timestamps after the request is built in case
- * we race with setattr (locally or in queue at OST). If OST gets
- * later setattr before earlier BRW (as determined by the request xid),
- * the OST will not use BRW timestamps. Sadly, there is no obvious
- * way to do this in a single call. bug 10150
- */
- body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
- crattr->cra_oa = &body->oa;
- crattr->cra_flags = OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLATIME;
- cl_req_attr_set(env, osc2cl(obj), crattr);
- lustre_msg_set_jobid(req->rq_reqmsg, crattr->cra_jobid);
-
- BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args));
- aa = ptlrpc_req_async_args(req);
- INIT_LIST_HEAD(&aa->aa_oaps);
- list_splice_init(&rpc_list, &aa->aa_oaps);
- INIT_LIST_HEAD(&aa->aa_exts);
- list_splice_init(ext_list, &aa->aa_exts);
-
- spin_lock(&cli->cl_loi_list_lock);
- starting_offset >>= PAGE_SHIFT;
- if (cmd == OBD_BRW_READ) {
- cli->cl_r_in_flight++;
- lprocfs_oh_tally_log2(&cli->cl_read_page_hist, page_count);
- lprocfs_oh_tally(&cli->cl_read_rpc_hist, cli->cl_r_in_flight);
- lprocfs_oh_tally_log2(&cli->cl_read_offset_hist,
- starting_offset + 1);
- } else {
- cli->cl_w_in_flight++;
- lprocfs_oh_tally_log2(&cli->cl_write_page_hist, page_count);
- lprocfs_oh_tally(&cli->cl_write_rpc_hist, cli->cl_w_in_flight);
- lprocfs_oh_tally_log2(&cli->cl_write_offset_hist,
- starting_offset + 1);
- }
- spin_unlock(&cli->cl_loi_list_lock);
-
- DEBUG_REQ(D_INODE, req, "%d pages, aa %p. now %ur/%dw in flight",
- page_count, aa, cli->cl_r_in_flight,
- cli->cl_w_in_flight);
- OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_DELAY_IO, cfs_fail_val);
-
- ptlrpcd_add_req(req);
- rc = 0;
-
-out:
- if (mem_tight != 0)
- memalloc_noreclaim_restore(mpflag);
-
- if (rc != 0) {
- LASSERT(!req);
-
- if (oa)
- kmem_cache_free(obdo_cachep, oa);
- kfree(pga);
- /* this should happen rarely and is pretty bad, it makes the
- * pending list not follow the dirty order
- */
- while (!list_empty(ext_list)) {
- ext = list_entry(ext_list->next, struct osc_extent,
- oe_link);
- list_del_init(&ext->oe_link);
- osc_extent_finish(env, ext, 0, rc);
- }
- }
- return rc;
-}
-
-static int osc_set_lock_data(struct ldlm_lock *lock, void *data)
-{
- int set = 0;
-
- LASSERT(lock);
-
- lock_res_and_lock(lock);
-
- if (!lock->l_ast_data)
- lock->l_ast_data = data;
- if (lock->l_ast_data == data)
- set = 1;
-
- unlock_res_and_lock(lock);
-
- return set;
-}
-
-static int osc_enqueue_fini(struct ptlrpc_request *req,
- osc_enqueue_upcall_f upcall, void *cookie,
- struct lustre_handle *lockh, enum ldlm_mode mode,
- __u64 *flags, int agl, int errcode)
-{
- bool intent = *flags & LDLM_FL_HAS_INTENT;
- int rc;
-
- /* The request was created before ldlm_cli_enqueue call. */
- if (intent && errcode == ELDLM_LOCK_ABORTED) {
- struct ldlm_reply *rep;
-
- rep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
-
- rep->lock_policy_res1 =
- ptlrpc_status_ntoh(rep->lock_policy_res1);
- if (rep->lock_policy_res1)
- errcode = rep->lock_policy_res1;
- if (!agl)
- *flags |= LDLM_FL_LVB_READY;
- } else if (errcode == ELDLM_OK) {
- *flags |= LDLM_FL_LVB_READY;
- }
-
- /* Call the update callback. */
- rc = (*upcall)(cookie, lockh, errcode);
- /* release the reference taken in ldlm_cli_enqueue() */
- if (errcode == ELDLM_LOCK_MATCHED)
- errcode = ELDLM_OK;
- if (errcode == ELDLM_OK && lustre_handle_is_used(lockh))
- ldlm_lock_decref(lockh, mode);
-
- return rc;
-}
-
-static int osc_enqueue_interpret(const struct lu_env *env,
- struct ptlrpc_request *req,
- struct osc_enqueue_args *aa, int rc)
-{
- struct ldlm_lock *lock;
- struct lustre_handle *lockh = &aa->oa_lockh;
- enum ldlm_mode mode = aa->oa_mode;
- struct ost_lvb *lvb = aa->oa_lvb;
- __u32 lvb_len = sizeof(*lvb);
- __u64 flags = 0;
-
-
- /* ldlm_cli_enqueue is holding a reference on the lock, so it must
- * be valid.
- */
- lock = ldlm_handle2lock(lockh);
- LASSERTF(lock, "lockh %llx, req %p, aa %p - client evicted?\n",
- lockh->cookie, req, aa);
-
- /* Take an additional reference so that a blocking AST that
- * ldlm_cli_enqueue_fini() might post for a failed lock, is guaranteed
- * to arrive after an upcall has been executed by
- * osc_enqueue_fini().
- */
- ldlm_lock_addref(lockh, mode);
-
- /* Let cl_lock_state_wait fail with -ERESTARTSYS to unuse sublocks. */
- OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_ENQUEUE_HANG, 2);
-
- /* Let CP AST to grant the lock first. */
- OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 1);
-
- if (aa->oa_agl) {
- LASSERT(!aa->oa_lvb);
- LASSERT(!aa->oa_flags);
- aa->oa_flags = &flags;
- }
-
- /* Complete obtaining the lock procedure. */
- rc = ldlm_cli_enqueue_fini(aa->oa_exp, req, aa->oa_type, 1,
- aa->oa_mode, aa->oa_flags, lvb, lvb_len,
- lockh, rc);
- /* Complete osc stuff. */
- rc = osc_enqueue_fini(req, aa->oa_upcall, aa->oa_cookie, lockh, mode,
- aa->oa_flags, aa->oa_agl, rc);
-
- OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_CANCEL_RACE, 10);
-
- ldlm_lock_decref(lockh, mode);
- LDLM_LOCK_PUT(lock);
- return rc;
-}
-
-struct ptlrpc_request_set *PTLRPCD_SET = (void *)1;
-
-/* When enqueuing asynchronously, locks are not ordered, we can obtain a lock
- * from the 2nd OSC before a lock from the 1st one. This does not deadlock with
- * other synchronous requests, however keeping some locks and trying to obtain
- * others may take a considerable amount of time in a case of ost failure; and
- * when other sync requests do not get released lock from a client, the client
- * is evicted from the cluster -- such scenaries make the life difficult, so
- * release locks just after they are obtained.
- */
-int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
- __u64 *flags, union ldlm_policy_data *policy,
- struct ost_lvb *lvb, int kms_valid,
- osc_enqueue_upcall_f upcall, void *cookie,
- struct ldlm_enqueue_info *einfo,
- struct ptlrpc_request_set *rqset, int async, int agl)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lustre_handle lockh = { 0 };
- struct ptlrpc_request *req = NULL;
- int intent = *flags & LDLM_FL_HAS_INTENT;
- __u64 match_flags = *flags;
- enum ldlm_mode mode;
- int rc;
-
- /* Filesystem lock extents are extended to page boundaries so that
- * dealing with the page cache is a little smoother.
- */
- policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK;
- policy->l_extent.end |= ~PAGE_MASK;
-
- /*
- * kms is not valid when either object is completely fresh (so that no
- * locks are cached), or object was evicted. In the latter case cached
- * lock cannot be used, because it would prime inode state with
- * potentially stale LVB.
- */
- if (!kms_valid)
- goto no_match;
-
- /* Next, search for already existing extent locks that will cover us */
- /* If we're trying to read, we also search for an existing PW lock. The
- * VFS and page cache already protect us locally, so lots of readers/
- * writers can share a single PW lock.
- *
- * There are problems with conversion deadlocks, so instead of
- * converting a read lock to a write lock, we'll just enqueue a new
- * one.
- *
- * At some point we should cancel the read lock instead of making them
- * send us a blocking callback, but there are problems with canceling
- * locks out from other users right now, too.
- */
- mode = einfo->ei_mode;
- if (einfo->ei_mode == LCK_PR)
- mode |= LCK_PW;
- if (agl == 0)
- match_flags |= LDLM_FL_LVB_READY;
- if (intent != 0)
- match_flags |= LDLM_FL_BLOCK_GRANTED;
- mode = ldlm_lock_match(obd->obd_namespace, match_flags, res_id,
- einfo->ei_type, policy, mode, &lockh, 0);
- if (mode) {
- struct ldlm_lock *matched;
-
- if (*flags & LDLM_FL_TEST_LOCK)
- return ELDLM_OK;
-
- matched = ldlm_handle2lock(&lockh);
- if (agl) {
- /* AGL enqueues DLM locks speculatively. Therefore if
- * it already exists a DLM lock, it wll just inform the
- * caller to cancel the AGL process for this stripe.
- */
- ldlm_lock_decref(&lockh, mode);
- LDLM_LOCK_PUT(matched);
- return -ECANCELED;
- } else if (osc_set_lock_data(matched, einfo->ei_cbdata)) {
- *flags |= LDLM_FL_LVB_READY;
- /* We already have a lock, and it's referenced. */
- (*upcall)(cookie, &lockh, ELDLM_LOCK_MATCHED);
-
- ldlm_lock_decref(&lockh, mode);
- LDLM_LOCK_PUT(matched);
- return ELDLM_OK;
- } else {
- ldlm_lock_decref(&lockh, mode);
- LDLM_LOCK_PUT(matched);
- }
- }
-
-no_match:
- if (*flags & (LDLM_FL_TEST_LOCK | LDLM_FL_MATCH_LOCK))
- return -ENOLCK;
- if (intent) {
- req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- &RQF_LDLM_ENQUEUE_LVB);
- if (!req)
- return -ENOMEM;
-
- rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER,
- sizeof(*lvb));
- ptlrpc_request_set_replen(req);
- }
-
- /* users of osc_enqueue() can pass this flag for ldlm_lock_match() */
- *flags &= ~LDLM_FL_BLOCK_GRANTED;
-
- rc = ldlm_cli_enqueue(exp, &req, einfo, res_id, policy, flags, lvb,
- sizeof(*lvb), LVB_T_OST, &lockh, async);
- if (async) {
- if (!rc) {
- struct osc_enqueue_args *aa;
-
- BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args));
- aa = ptlrpc_req_async_args(req);
- aa->oa_exp = exp;
- aa->oa_mode = einfo->ei_mode;
- aa->oa_type = einfo->ei_type;
- lustre_handle_copy(&aa->oa_lockh, &lockh);
- aa->oa_upcall = upcall;
- aa->oa_cookie = cookie;
- aa->oa_agl = !!agl;
- if (!agl) {
- aa->oa_flags = flags;
- aa->oa_lvb = lvb;
- } else {
- /* AGL is essentially to enqueue an DLM lock
- * in advance, so we don't care about the
- * result of AGL enqueue.
- */
- aa->oa_lvb = NULL;
- aa->oa_flags = NULL;
- }
-
- req->rq_interpret_reply =
- (ptlrpc_interpterer_t)osc_enqueue_interpret;
- if (rqset == PTLRPCD_SET)
- ptlrpcd_add_req(req);
- else
- ptlrpc_set_add_req(rqset, req);
- } else if (intent) {
- ptlrpc_req_finished(req);
- }
- return rc;
- }
-
- rc = osc_enqueue_fini(req, upcall, cookie, &lockh, einfo->ei_mode,
- flags, agl, rc);
- if (intent)
- ptlrpc_req_finished(req);
-
- return rc;
-}
-
-int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id,
- enum ldlm_type type, union ldlm_policy_data *policy,
- enum ldlm_mode mode, __u64 *flags, void *data,
- struct lustre_handle *lockh, int unref)
-{
- struct obd_device *obd = exp->exp_obd;
- __u64 lflags = *flags;
- enum ldlm_mode rc;
-
- if (OBD_FAIL_CHECK(OBD_FAIL_OSC_MATCH))
- return -EIO;
-
- /* Filesystem lock extents are extended to page boundaries so that
- * dealing with the page cache is a little smoother
- */
- policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK;
- policy->l_extent.end |= ~PAGE_MASK;
-
- /* Next, search for already existing extent locks that will cover us */
- /* If we're trying to read, we also search for an existing PW lock. The
- * VFS and page cache already protect us locally, so lots of readers/
- * writers can share a single PW lock.
- */
- rc = mode;
- if (mode == LCK_PR)
- rc |= LCK_PW;
- rc = ldlm_lock_match(obd->obd_namespace, lflags,
- res_id, type, policy, rc, lockh, unref);
- if (!rc || lflags & LDLM_FL_TEST_LOCK)
- return rc;
-
- if (data) {
- struct ldlm_lock *lock = ldlm_handle2lock(lockh);
-
- LASSERT(lock);
- if (!osc_set_lock_data(lock, data)) {
- ldlm_lock_decref(lockh, rc);
- rc = 0;
- }
- LDLM_LOCK_PUT(lock);
- }
- return rc;
-}
-
-static int osc_statfs_interpret(const struct lu_env *env,
- struct ptlrpc_request *req,
- struct osc_async_args *aa, int rc)
-{
- struct obd_statfs *msfs;
-
- if (rc == -EBADR)
- /* The request has in fact never been sent
- * due to issues at a higher level (LOV).
- * Exit immediately since the caller is
- * aware of the problem and takes care
- * of the clean up
- */
- return rc;
-
- if ((rc == -ENOTCONN || rc == -EAGAIN) &&
- (aa->aa_oi->oi_flags & OBD_STATFS_NODELAY)) {
- rc = 0;
- goto out;
- }
-
- if (rc != 0)
- goto out;
-
- msfs = req_capsule_server_get(&req->rq_pill, &RMF_OBD_STATFS);
- if (!msfs) {
- rc = -EPROTO;
- goto out;
- }
-
- *aa->aa_oi->oi_osfs = *msfs;
-out:
- rc = aa->aa_oi->oi_cb_up(aa->aa_oi, rc);
- return rc;
-}
-
-static int osc_statfs_async(struct obd_export *exp,
- struct obd_info *oinfo, __u64 max_age,
- struct ptlrpc_request_set *rqset)
-{
- struct obd_device *obd = class_exp2obd(exp);
- struct ptlrpc_request *req;
- struct osc_async_args *aa;
- int rc;
-
- /* We could possibly pass max_age in the request (as an absolute
- * timestamp or a "seconds.usec ago") so the target can avoid doing
- * extra calls into the filesystem if that isn't necessary (e.g.
- * during mount that would help a bit). Having relative timestamps
- * is not so great if request processing is slow, while absolute
- * timestamps are not ideal because they need time synchronization.
- */
- req = ptlrpc_request_alloc(obd->u.cli.cl_import, &RQF_OST_STATFS);
- if (!req)
- return -ENOMEM;
-
- rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_STATFS);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
- ptlrpc_request_set_replen(req);
- req->rq_request_portal = OST_CREATE_PORTAL;
- ptlrpc_at_set_req_timeout(req);
-
- if (oinfo->oi_flags & OBD_STATFS_NODELAY) {
- /* procfs requests not want stat in wait for avoid deadlock */
- req->rq_no_resend = 1;
- req->rq_no_delay = 1;
- }
-
- req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_statfs_interpret;
- BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args));
- aa = ptlrpc_req_async_args(req);
- aa->aa_oi = oinfo;
-
- ptlrpc_set_add_req(rqset, req);
- return 0;
-}
-
-static int osc_statfs(const struct lu_env *env, struct obd_export *exp,
- struct obd_statfs *osfs, __u64 max_age, __u32 flags)
-{
- struct obd_device *obd = class_exp2obd(exp);
- struct obd_statfs *msfs;
- struct ptlrpc_request *req;
- struct obd_import *imp = NULL;
- int rc;
-
- /* Since the request might also come from lprocfs, so we need
- * sync this with client_disconnect_export Bug15684
- */
- down_read(&obd->u.cli.cl_sem);
- if (obd->u.cli.cl_import)
- imp = class_import_get(obd->u.cli.cl_import);
- up_read(&obd->u.cli.cl_sem);
- if (!imp)
- return -ENODEV;
-
- /* We could possibly pass max_age in the request (as an absolute
- * timestamp or a "seconds.usec ago") so the target can avoid doing
- * extra calls into the filesystem if that isn't necessary (e.g.
- * during mount that would help a bit). Having relative timestamps
- * is not so great if request processing is slow, while absolute
- * timestamps are not ideal because they need time synchronization.
- */
- req = ptlrpc_request_alloc(imp, &RQF_OST_STATFS);
-
- class_import_put(imp);
-
- if (!req)
- return -ENOMEM;
-
- rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_STATFS);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
- ptlrpc_request_set_replen(req);
- req->rq_request_portal = OST_CREATE_PORTAL;
- ptlrpc_at_set_req_timeout(req);
-
- if (flags & OBD_STATFS_NODELAY) {
- /* procfs requests not want stat in wait for avoid deadlock */
- req->rq_no_resend = 1;
- req->rq_no_delay = 1;
- }
-
- rc = ptlrpc_queue_wait(req);
- if (rc)
- goto out;
-
- msfs = req_capsule_server_get(&req->rq_pill, &RMF_OBD_STATFS);
- if (!msfs) {
- rc = -EPROTO;
- goto out;
- }
-
- *osfs = *msfs;
-
- out:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
- void *karg, void __user *uarg)
-{
- struct obd_device *obd = exp->exp_obd;
- struct obd_ioctl_data *data = karg;
- int err = 0;
-
- if (!try_module_get(THIS_MODULE)) {
- CERROR("%s: cannot get module '%s'\n", obd->obd_name,
- module_name(THIS_MODULE));
- return -EINVAL;
- }
- switch (cmd) {
- case OBD_IOC_CLIENT_RECOVER:
- err = ptlrpc_recover_import(obd->u.cli.cl_import,
- data->ioc_inlbuf1, 0);
- if (err > 0)
- err = 0;
- goto out;
- case IOC_OSC_SET_ACTIVE:
- err = ptlrpc_set_import_active(obd->u.cli.cl_import,
- data->ioc_offset);
- goto out;
- case OBD_IOC_PING_TARGET:
- err = ptlrpc_obd_ping(obd);
- goto out;
- default:
- CDEBUG(D_INODE, "unrecognised ioctl %#x by %s\n",
- cmd, current->comm);
- err = -ENOTTY;
- goto out;
- }
-out:
- module_put(THIS_MODULE);
- return err;
-}
-
-static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
- u32 keylen, void *key, u32 vallen,
- void *val, struct ptlrpc_request_set *set)
-{
- struct ptlrpc_request *req;
- struct obd_device *obd = exp->exp_obd;
- struct obd_import *imp = class_exp2cliimp(exp);
- char *tmp;
- int rc;
-
- OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_SHUTDOWN, 10);
-
- if (KEY_IS(KEY_CHECKSUM)) {
- if (vallen != sizeof(int))
- return -EINVAL;
- exp->exp_obd->u.cli.cl_checksum = (*(int *)val) ? 1 : 0;
- return 0;
- }
-
- if (KEY_IS(KEY_SPTLRPC_CONF)) {
- sptlrpc_conf_client_adapt(obd);
- return 0;
- }
-
- if (KEY_IS(KEY_FLUSH_CTX)) {
- sptlrpc_import_flush_my_ctx(imp);
- return 0;
- }
-
- if (KEY_IS(KEY_CACHE_SET)) {
- struct client_obd *cli = &obd->u.cli;
-
- LASSERT(!cli->cl_cache); /* only once */
- cli->cl_cache = val;
- cl_cache_incref(cli->cl_cache);
- cli->cl_lru_left = &cli->cl_cache->ccc_lru_left;
-
- /* add this osc into entity list */
- LASSERT(list_empty(&cli->cl_lru_osc));
- spin_lock(&cli->cl_cache->ccc_lru_lock);
- list_add(&cli->cl_lru_osc, &cli->cl_cache->ccc_lru);
- spin_unlock(&cli->cl_cache->ccc_lru_lock);
-
- return 0;
- }
-
- if (KEY_IS(KEY_CACHE_LRU_SHRINK)) {
- struct client_obd *cli = &obd->u.cli;
- long nr = atomic_long_read(&cli->cl_lru_in_list) >> 1;
- long target = *(long *)val;
-
- nr = osc_lru_shrink(env, cli, min(nr, target), true);
- *(long *)val -= nr;
- return 0;
- }
-
- if (!set && !KEY_IS(KEY_GRANT_SHRINK))
- return -EINVAL;
-
- /* We pass all other commands directly to OST. Since nobody calls osc
- * methods directly and everybody is supposed to go through LOV, we
- * assume lov checked invalid values for us.
- * The only recognised values so far are evict_by_nid and mds_conn.
- * Even if something bad goes through, we'd get a -EINVAL from OST
- * anyway.
- */
-
- req = ptlrpc_request_alloc(imp, KEY_IS(KEY_GRANT_SHRINK) ?
- &RQF_OST_SET_GRANT_INFO :
- &RQF_OBD_SET_INFO);
- if (!req)
- return -ENOMEM;
-
- req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_KEY,
- RCL_CLIENT, keylen);
- if (!KEY_IS(KEY_GRANT_SHRINK))
- req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_VAL,
- RCL_CLIENT, vallen);
- rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SET_INFO);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY);
- memcpy(tmp, key, keylen);
- tmp = req_capsule_client_get(&req->rq_pill, KEY_IS(KEY_GRANT_SHRINK) ?
- &RMF_OST_BODY :
- &RMF_SETINFO_VAL);
- memcpy(tmp, val, vallen);
-
- if (KEY_IS(KEY_GRANT_SHRINK)) {
- struct osc_brw_async_args *aa;
- struct obdo *oa;
-
- BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args));
- aa = ptlrpc_req_async_args(req);
- oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
- if (!oa) {
- ptlrpc_req_finished(req);
- return -ENOMEM;
- }
- *oa = ((struct ost_body *)val)->oa;
- aa->aa_oa = oa;
- req->rq_interpret_reply = osc_shrink_grant_interpret;
- }
-
- ptlrpc_request_set_replen(req);
- if (!KEY_IS(KEY_GRANT_SHRINK)) {
- LASSERT(set);
- ptlrpc_set_add_req(set, req);
- ptlrpc_check_set(NULL, set);
- } else {
- ptlrpcd_add_req(req);
- }
-
- return 0;
-}
-
-static int osc_reconnect(const struct lu_env *env,
- struct obd_export *exp, struct obd_device *obd,
- struct obd_uuid *cluuid,
- struct obd_connect_data *data,
- void *localdata)
-{
- struct client_obd *cli = &obd->u.cli;
-
- if (data && (data->ocd_connect_flags & OBD_CONNECT_GRANT)) {
- long lost_grant;
-
- spin_lock(&cli->cl_loi_list_lock);
- data->ocd_grant = (cli->cl_avail_grant +
- (cli->cl_dirty_pages << PAGE_SHIFT)) ?:
- 2 * cli_brw_size(obd);
- lost_grant = cli->cl_lost_grant;
- cli->cl_lost_grant = 0;
- spin_unlock(&cli->cl_loi_list_lock);
-
- CDEBUG(D_RPCTRACE, "ocd_connect_flags: %#llx ocd_version: %d ocd_grant: %d, lost: %ld.\n",
- data->ocd_connect_flags,
- data->ocd_version, data->ocd_grant, lost_grant);
- }
-
- return 0;
-}
-
-static int osc_disconnect(struct obd_export *exp)
-{
- struct obd_device *obd = class_exp2obd(exp);
- int rc;
-
- rc = client_disconnect_export(exp);
- /**
- * Initially we put del_shrink_grant before disconnect_export, but it
- * causes the following problem if setup (connect) and cleanup
- * (disconnect) are tangled together.
- * connect p1 disconnect p2
- * ptlrpc_connect_import
- * ............... class_manual_cleanup
- * osc_disconnect
- * del_shrink_grant
- * ptlrpc_connect_interrupt
- * init_grant_shrink
- * add this client to shrink list
- * cleanup_osc
- * Bang! pinger trigger the shrink.
- * So the osc should be disconnected from the shrink list, after we
- * are sure the import has been destroyed. BUG18662
- */
- if (!obd->u.cli.cl_import)
- osc_del_shrink_grant(&obd->u.cli);
- return rc;
-}
-
-static int osc_ldlm_resource_invalidate(struct cfs_hash *hs,
- struct cfs_hash_bd *bd,
- struct hlist_node *hnode, void *arg)
-{
- struct ldlm_resource *res = cfs_hash_object(hs, hnode);
- struct osc_object *osc = NULL;
- struct lu_env *env = arg;
- struct ldlm_lock *lock;
-
- lock_res(res);
- list_for_each_entry(lock, &res->lr_granted, l_res_link) {
- if (lock->l_ast_data && !osc) {
- osc = lock->l_ast_data;
- cl_object_get(osc2cl(osc));
- }
-
- /*
- * clear LDLM_FL_CLEANED flag to make sure it will be canceled
- * by the 2nd round of ldlm_namespace_clean() call in
- * osc_import_event().
- */
- ldlm_clear_cleaned(lock);
- }
- unlock_res(res);
-
- if (osc) {
- osc_object_invalidate(env, osc);
- cl_object_put(env, osc2cl(osc));
- }
-
- return 0;
-}
-
-static int osc_import_event(struct obd_device *obd,
- struct obd_import *imp,
- enum obd_import_event event)
-{
- struct client_obd *cli;
- int rc = 0;
-
- LASSERT(imp->imp_obd == obd);
-
- switch (event) {
- case IMP_EVENT_DISCON: {
- cli = &obd->u.cli;
- spin_lock(&cli->cl_loi_list_lock);
- cli->cl_avail_grant = 0;
- cli->cl_lost_grant = 0;
- spin_unlock(&cli->cl_loi_list_lock);
- break;
- }
- case IMP_EVENT_INACTIVE: {
- rc = obd_notify_observer(obd, obd, OBD_NOTIFY_INACTIVE, NULL);
- break;
- }
- case IMP_EVENT_INVALIDATE: {
- struct ldlm_namespace *ns = obd->obd_namespace;
- struct lu_env *env;
- u16 refcheck;
-
- ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
-
- env = cl_env_get(&refcheck);
- if (!IS_ERR(env)) {
- osc_io_unplug(env, &obd->u.cli, NULL);
-
- cfs_hash_for_each_nolock(ns->ns_rs_hash,
- osc_ldlm_resource_invalidate,
- env, 0);
- cl_env_put(env, &refcheck);
-
- ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
- } else {
- rc = PTR_ERR(env);
- }
- break;
- }
- case IMP_EVENT_ACTIVE: {
- rc = obd_notify_observer(obd, obd, OBD_NOTIFY_ACTIVE, NULL);
- break;
- }
- case IMP_EVENT_OCD: {
- struct obd_connect_data *ocd = &imp->imp_connect_data;
-
- if (ocd->ocd_connect_flags & OBD_CONNECT_GRANT)
- osc_init_grant(&obd->u.cli, ocd);
-
- /* See bug 7198 */
- if (ocd->ocd_connect_flags & OBD_CONNECT_REQPORTAL)
- imp->imp_client->cli_request_portal = OST_REQUEST_PORTAL;
-
- rc = obd_notify_observer(obd, obd, OBD_NOTIFY_OCD, NULL);
- break;
- }
- case IMP_EVENT_DEACTIVATE: {
- rc = obd_notify_observer(obd, obd, OBD_NOTIFY_DEACTIVATE, NULL);
- break;
- }
- case IMP_EVENT_ACTIVATE: {
- rc = obd_notify_observer(obd, obd, OBD_NOTIFY_ACTIVATE, NULL);
- break;
- }
- default:
- CERROR("Unknown import event %d\n", event);
- LBUG();
- }
- return rc;
-}
-
-/**
- * Determine whether the lock can be canceled before replaying the lock
- * during recovery, see bug16774 for detailed information.
- *
- * \retval zero the lock can't be canceled
- * \retval other ok to cancel
- */
-static int osc_cancel_weight(struct ldlm_lock *lock)
-{
- /*
- * Cancel all unused and granted extent lock.
- */
- if (lock->l_resource->lr_type == LDLM_EXTENT &&
- lock->l_granted_mode == lock->l_req_mode &&
- osc_ldlm_weigh_ast(lock) == 0)
- return 1;
-
- return 0;
-}
-
-static int brw_queue_work(const struct lu_env *env, void *data)
-{
- struct client_obd *cli = data;
-
- CDEBUG(D_CACHE, "Run writeback work for client obd %p.\n", cli);
-
- osc_io_unplug(env, cli, NULL);
- return 0;
-}
-
-int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
-{
- struct lprocfs_static_vars lvars = { NULL };
- struct client_obd *cli = &obd->u.cli;
- void *handler;
- int rc;
- int adding;
- int added;
- int req_count;
-
- rc = ptlrpcd_addref();
- if (rc)
- return rc;
-
- rc = client_obd_setup(obd, lcfg);
- if (rc)
- goto out_ptlrpcd;
-
- handler = ptlrpcd_alloc_work(cli->cl_import, brw_queue_work, cli);
- if (IS_ERR(handler)) {
- rc = PTR_ERR(handler);
- goto out_client_setup;
- }
- cli->cl_writeback_work = handler;
-
- handler = ptlrpcd_alloc_work(cli->cl_import, lru_queue_work, cli);
- if (IS_ERR(handler)) {
- rc = PTR_ERR(handler);
- goto out_ptlrpcd_work;
- }
-
- cli->cl_lru_work = handler;
-
- rc = osc_quota_setup(obd);
- if (rc)
- goto out_ptlrpcd_work;
-
- cli->cl_grant_shrink_interval = GRANT_SHRINK_INTERVAL;
- lprocfs_osc_init_vars(&lvars);
- if (lprocfs_obd_setup(obd, lvars.obd_vars, lvars.sysfs_vars) == 0) {
- lproc_osc_attach_seqstat(obd);
- sptlrpc_lprocfs_cliobd_attach(obd);
- ptlrpc_lprocfs_register_obd(obd);
- }
-
- /*
- * We try to control the total number of requests with a upper limit
- * osc_reqpool_maxreqcount. There might be some race which will cause
- * over-limit allocation, but it is fine.
- */
- req_count = atomic_read(&osc_pool_req_count);
- if (req_count < osc_reqpool_maxreqcount) {
- adding = cli->cl_max_rpcs_in_flight + 2;
- if (req_count + adding > osc_reqpool_maxreqcount)
- adding = osc_reqpool_maxreqcount - req_count;
-
- added = ptlrpc_add_rqs_to_pool(osc_rq_pool, adding);
- atomic_add(added, &osc_pool_req_count);
- }
-
- INIT_LIST_HEAD(&cli->cl_grant_shrink_list);
- ns_register_cancel(obd->obd_namespace, osc_cancel_weight);
-
- spin_lock(&osc_shrink_lock);
- list_add_tail(&cli->cl_shrink_list, &osc_shrink_list);
- spin_unlock(&osc_shrink_lock);
-
- return rc;
-
-out_ptlrpcd_work:
- if (cli->cl_writeback_work) {
- ptlrpcd_destroy_work(cli->cl_writeback_work);
- cli->cl_writeback_work = NULL;
- }
- if (cli->cl_lru_work) {
- ptlrpcd_destroy_work(cli->cl_lru_work);
- cli->cl_lru_work = NULL;
- }
-out_client_setup:
- client_obd_cleanup(obd);
-out_ptlrpcd:
- ptlrpcd_decref();
- return rc;
-}
-
-static int osc_precleanup(struct obd_device *obd)
-{
- struct client_obd *cli = &obd->u.cli;
-
- /* LU-464
- * for echo client, export may be on zombie list, wait for
- * zombie thread to cull it, because cli.cl_import will be
- * cleared in client_disconnect_export():
- * class_export_destroy() -> obd_cleanup() ->
- * echo_device_free() -> echo_client_cleanup() ->
- * obd_disconnect() -> osc_disconnect() ->
- * client_disconnect_export()
- */
- obd_zombie_barrier();
- if (cli->cl_writeback_work) {
- ptlrpcd_destroy_work(cli->cl_writeback_work);
- cli->cl_writeback_work = NULL;
- }
-
- if (cli->cl_lru_work) {
- ptlrpcd_destroy_work(cli->cl_lru_work);
- cli->cl_lru_work = NULL;
- }
-
- obd_cleanup_client_import(obd);
- ptlrpc_lprocfs_unregister_obd(obd);
- lprocfs_obd_cleanup(obd);
- return 0;
-}
-
-static int osc_cleanup(struct obd_device *obd)
-{
- struct client_obd *cli = &obd->u.cli;
- int rc;
-
- spin_lock(&osc_shrink_lock);
- list_del(&cli->cl_shrink_list);
- spin_unlock(&osc_shrink_lock);
-
- /* lru cleanup */
- if (cli->cl_cache) {
- LASSERT(atomic_read(&cli->cl_cache->ccc_users) > 0);
- spin_lock(&cli->cl_cache->ccc_lru_lock);
- list_del_init(&cli->cl_lru_osc);
- spin_unlock(&cli->cl_cache->ccc_lru_lock);
- cli->cl_lru_left = NULL;
- cl_cache_decref(cli->cl_cache);
- cli->cl_cache = NULL;
- }
-
- /* free memory of osc quota cache */
- osc_quota_cleanup(obd);
-
- rc = client_obd_cleanup(obd);
-
- ptlrpcd_decref();
- return rc;
-}
-
-int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg)
-{
- struct lprocfs_static_vars lvars = { NULL };
- int rc = 0;
-
- lprocfs_osc_init_vars(&lvars);
-
- switch (lcfg->lcfg_command) {
- default:
- rc = class_process_proc_param(PARAM_OSC, lvars.obd_vars,
- lcfg, obd);
- if (rc > 0)
- rc = 0;
- break;
- }
-
- return rc;
-}
-
-static int osc_process_config(struct obd_device *obd, u32 len, void *buf)
-{
- return osc_process_config_base(obd, buf);
-}
-
-static struct obd_ops osc_obd_ops = {
- .owner = THIS_MODULE,
- .setup = osc_setup,
- .precleanup = osc_precleanup,
- .cleanup = osc_cleanup,
- .add_conn = client_import_add_conn,
- .del_conn = client_import_del_conn,
- .connect = client_connect_import,
- .reconnect = osc_reconnect,
- .disconnect = osc_disconnect,
- .statfs = osc_statfs,
- .statfs_async = osc_statfs_async,
- .create = osc_create,
- .destroy = osc_destroy,
- .getattr = osc_getattr,
- .setattr = osc_setattr,
- .iocontrol = osc_iocontrol,
- .set_info_async = osc_set_info_async,
- .import_event = osc_import_event,
- .process_config = osc_process_config,
- .quotactl = osc_quotactl,
-};
-
-struct list_head osc_shrink_list = LIST_HEAD_INIT(osc_shrink_list);
-DEFINE_SPINLOCK(osc_shrink_lock);
-
-static struct shrinker osc_cache_shrinker = {
- .count_objects = osc_cache_shrink_count,
- .scan_objects = osc_cache_shrink_scan,
- .seeks = DEFAULT_SEEKS,
-};
-
-static int __init osc_init(void)
-{
- struct lprocfs_static_vars lvars = { NULL };
- unsigned int reqpool_size;
- unsigned int reqsize;
- int rc;
-
- /* print an address of _any_ initialized kernel symbol from this
- * module, to allow debugging with gdb that doesn't support data
- * symbols from modules.
- */
- CDEBUG(D_INFO, "Lustre OSC module (%p).\n", &osc_caches);
-
- rc = libcfs_setup();
- if (rc)
- return rc;
-
- rc = lu_kmem_init(osc_caches);
- if (rc)
- return rc;
-
- lprocfs_osc_init_vars(&lvars);
-
- rc = register_shrinker(&osc_cache_shrinker);
- if (rc)
- goto err;
-
- /* This is obviously too much memory, only prevent overflow here */
- if (osc_reqpool_mem_max >= 1 << 12 || osc_reqpool_mem_max == 0) {
- rc = -EINVAL;
- goto err;
- }
-
- reqpool_size = osc_reqpool_mem_max << 20;
-
- reqsize = 1;
- while (reqsize < OST_MAXREQSIZE)
- reqsize = reqsize << 1;
-
- /*
- * We don't enlarge the request count in OSC pool according to
- * cl_max_rpcs_in_flight. The allocation from the pool will only be
- * tried after normal allocation failed. So a small OSC pool won't
- * cause much performance degression in most of cases.
- */
- osc_reqpool_maxreqcount = reqpool_size / reqsize;
-
- atomic_set(&osc_pool_req_count, 0);
- osc_rq_pool = ptlrpc_init_rq_pool(0, OST_MAXREQSIZE,
- ptlrpc_add_rqs_to_pool);
-
- rc = -ENOMEM;
-
- if (!osc_rq_pool)
- goto err;
-
- rc = class_register_type(&osc_obd_ops, NULL,
- LUSTRE_OSC_NAME, &osc_device_type);
- if (rc)
- goto err;
-
- return rc;
-
-err:
- if (osc_rq_pool)
- ptlrpc_free_rq_pool(osc_rq_pool);
- unregister_shrinker(&osc_cache_shrinker);
- lu_kmem_fini(osc_caches);
- return rc;
-}
-
-static void /*__exit*/ osc_exit(void)
-{
- unregister_shrinker(&osc_cache_shrinker);
- class_unregister_type(LUSTRE_OSC_NAME);
- lu_kmem_fini(osc_caches);
- ptlrpc_free_rq_pool(osc_rq_pool);
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre Object Storage Client (OSC)");
-MODULE_LICENSE("GPL");
-MODULE_VERSION(LUSTRE_VERSION_STRING);
-
-module_init(osc_init);
-module_exit(osc_exit);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/Makefile b/drivers/staging/lustre/lustre/ptlrpc/Makefile
deleted file mode 100644
index 1deb1971b39e..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/Makefile
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LUSTRE_FS) += ptlrpc.o
-LDLM := ../../lustre/ldlm/
-
-ldlm_objs := $(LDLM)l_lock.o $(LDLM)ldlm_lock.o
-ldlm_objs += $(LDLM)ldlm_resource.o $(LDLM)ldlm_lib.o
-ldlm_objs += $(LDLM)ldlm_plain.o $(LDLM)ldlm_extent.o
-ldlm_objs += $(LDLM)ldlm_request.o $(LDLM)ldlm_lockd.o
-ldlm_objs += $(LDLM)ldlm_flock.o $(LDLM)ldlm_inodebits.o
-ldlm_objs += $(LDLM)ldlm_pool.o
-ldlm_objs += $(LDLM)interval_tree.o
-ptlrpc_objs := client.o recover.o connection.o niobuf.o pack_generic.o
-ptlrpc_objs += events.o ptlrpc_module.o service.o pinger.o
-ptlrpc_objs += llog_net.o llog_client.o import.o ptlrpcd.o
-ptlrpc_objs += pers.o lproc_ptlrpc.o wiretest.o layout.o
-ptlrpc_objs += sec.o sec_bulk.o sec_gc.o sec_config.o
-ptlrpc_objs += sec_null.o sec_plain.o nrs.o nrs_fifo.o
-
-ptlrpc-y := $(ldlm_objs) $(ptlrpc_objs) sec_lproc.o
-ptlrpc-$(CONFIG_LUSTRE_TRANSLATE_ERRNOS) += errno.o
diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c
deleted file mode 100644
index c1b82bf20f08..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/client.c
+++ /dev/null
@@ -1,3271 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-/** Implementation of client-side PortalRPC interfaces */
-
-#define DEBUG_SUBSYSTEM S_RPC
-
-#include <linux/libcfs/libcfs_cpu.h>
-#include <linux/random.h>
-#include <obd_support.h>
-#include <obd_class.h>
-#include <lustre_lib.h>
-#include <lustre_ha.h>
-#include <lustre_import.h>
-#include <lustre_req_layout.h>
-
-#include "ptlrpc_internal.h"
-
-const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_pin_ops = {
- .add_kiov_frag = ptlrpc_prep_bulk_page_pin,
- .release_frags = ptlrpc_release_bulk_page_pin,
-};
-EXPORT_SYMBOL(ptlrpc_bulk_kiov_pin_ops);
-
-const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_nopin_ops = {
- .add_kiov_frag = ptlrpc_prep_bulk_page_nopin,
- .release_frags = NULL,
-};
-EXPORT_SYMBOL(ptlrpc_bulk_kiov_nopin_ops);
-
-static int ptlrpc_send_new_req(struct ptlrpc_request *req);
-static int ptlrpcd_check_work(struct ptlrpc_request *req);
-static int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async);
-
-/**
- * Initialize passed in client structure \a cl.
- */
-void ptlrpc_init_client(int req_portal, int rep_portal, char *name,
- struct ptlrpc_client *cl)
-{
- cl->cli_request_portal = req_portal;
- cl->cli_reply_portal = rep_portal;
- cl->cli_name = name;
-}
-EXPORT_SYMBOL(ptlrpc_init_client);
-
-/**
- * Return PortalRPC connection for remote uud \a uuid
- */
-struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid)
-{
- struct ptlrpc_connection *c;
- lnet_nid_t self;
- struct lnet_process_id peer;
- int err;
-
- /*
- * ptlrpc_uuid_to_peer() initializes its 2nd parameter
- * before accessing its values.
- * coverity[uninit_use_in_call]
- */
- err = ptlrpc_uuid_to_peer(uuid, &peer, &self);
- if (err != 0) {
- CNETERR("cannot find peer %s!\n", uuid->uuid);
- return NULL;
- }
-
- c = ptlrpc_connection_get(peer, self, uuid);
- if (c) {
- memcpy(c->c_remote_uuid.uuid,
- uuid->uuid, sizeof(c->c_remote_uuid.uuid));
- }
-
- CDEBUG(D_INFO, "%s -> %p\n", uuid->uuid, c);
-
- return c;
-}
-
-/**
- * Allocate and initialize new bulk descriptor on the sender.
- * Returns pointer to the descriptor or NULL on error.
- */
-struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned int nfrags,
- unsigned int max_brw,
- enum ptlrpc_bulk_op_type type,
- unsigned int portal,
- const struct ptlrpc_bulk_frag_ops *ops)
-{
- struct ptlrpc_bulk_desc *desc;
- int i;
-
- /* ensure that only one of KIOV or IOVEC is set but not both */
- LASSERT((ptlrpc_is_bulk_desc_kiov(type) && ops->add_kiov_frag) ||
- (ptlrpc_is_bulk_desc_kvec(type) && ops->add_iov_frag));
-
- desc = kzalloc(sizeof(*desc), GFP_NOFS);
- if (!desc)
- return NULL;
-
- if (type & PTLRPC_BULK_BUF_KIOV) {
- GET_KIOV(desc) = kcalloc(nfrags, sizeof(*GET_KIOV(desc)),
- GFP_NOFS);
- if (!GET_KIOV(desc))
- goto free_desc;
- } else {
- GET_KVEC(desc) = kcalloc(nfrags, sizeof(*GET_KVEC(desc)),
- GFP_NOFS);
- if (!GET_KVEC(desc))
- goto free_desc;
- }
-
- spin_lock_init(&desc->bd_lock);
- init_waitqueue_head(&desc->bd_waitq);
- desc->bd_max_iov = nfrags;
- desc->bd_iov_count = 0;
- desc->bd_portal = portal;
- desc->bd_type = type;
- desc->bd_md_count = 0;
- desc->bd_frag_ops = (struct ptlrpc_bulk_frag_ops *)ops;
- LASSERT(max_brw > 0);
- desc->bd_md_max_brw = min(max_brw, PTLRPC_BULK_OPS_COUNT);
- /*
- * PTLRPC_BULK_OPS_COUNT is the compile-time transfer limit for this
- * node. Negotiated ocd_brw_size will always be <= this number.
- */
- for (i = 0; i < PTLRPC_BULK_OPS_COUNT; i++)
- LNetInvalidateMDHandle(&desc->bd_mds[i]);
-
- return desc;
-free_desc:
- kfree(desc);
- return NULL;
-}
-
-/**
- * Prepare bulk descriptor for specified outgoing request \a req that
- * can fit \a nfrags * pages. \a type is bulk type. \a portal is where
- * the bulk to be sent. Used on client-side.
- * Returns pointer to newly allocated initialized bulk descriptor or NULL on
- * error.
- */
-struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req,
- unsigned int nfrags,
- unsigned int max_brw,
- unsigned int type,
- unsigned int portal,
- const struct ptlrpc_bulk_frag_ops *ops)
-{
- struct obd_import *imp = req->rq_import;
- struct ptlrpc_bulk_desc *desc;
-
- LASSERT(ptlrpc_is_bulk_op_passive(type));
-
- desc = ptlrpc_new_bulk(nfrags, max_brw, type, portal, ops);
- if (!desc)
- return NULL;
-
- desc->bd_import_generation = req->rq_import_generation;
- desc->bd_import = class_import_get(imp);
- desc->bd_req = req;
-
- desc->bd_cbid.cbid_fn = client_bulk_callback;
- desc->bd_cbid.cbid_arg = desc;
-
- /* This makes req own desc, and free it when she frees herself */
- req->rq_bulk = desc;
-
- return desc;
-}
-EXPORT_SYMBOL(ptlrpc_prep_bulk_imp);
-
-void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
- struct page *page, int pageoffset, int len, int pin)
-{
- struct bio_vec *kiov;
-
- LASSERT(desc->bd_iov_count < desc->bd_max_iov);
- LASSERT(page);
- LASSERT(pageoffset >= 0);
- LASSERT(len > 0);
- LASSERT(pageoffset + len <= PAGE_SIZE);
- LASSERT(ptlrpc_is_bulk_desc_kiov(desc->bd_type));
-
- kiov = &BD_GET_KIOV(desc, desc->bd_iov_count);
-
- desc->bd_nob += len;
-
- if (pin)
- get_page(page);
-
- kiov->bv_page = page;
- kiov->bv_offset = pageoffset;
- kiov->bv_len = len;
-
- desc->bd_iov_count++;
-}
-EXPORT_SYMBOL(__ptlrpc_prep_bulk_page);
-
-int ptlrpc_prep_bulk_frag(struct ptlrpc_bulk_desc *desc,
- void *frag, int len)
-{
- struct kvec *iovec;
-
- LASSERT(desc->bd_iov_count < desc->bd_max_iov);
- LASSERT(frag);
- LASSERT(len > 0);
- LASSERT(ptlrpc_is_bulk_desc_kvec(desc->bd_type));
-
- iovec = &BD_GET_KVEC(desc, desc->bd_iov_count);
-
- desc->bd_nob += len;
-
- iovec->iov_base = frag;
- iovec->iov_len = len;
-
- desc->bd_iov_count++;
-
- return desc->bd_nob;
-}
-EXPORT_SYMBOL(ptlrpc_prep_bulk_frag);
-
-void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
-{
- LASSERT(desc->bd_iov_count != LI_POISON); /* not freed already */
- LASSERT(desc->bd_md_count == 0); /* network hands off */
- LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL));
- LASSERT(desc->bd_frag_ops);
-
- if (ptlrpc_is_bulk_desc_kiov(desc->bd_type))
- sptlrpc_enc_pool_put_pages(desc);
-
- if (desc->bd_export)
- class_export_put(desc->bd_export);
- else
- class_import_put(desc->bd_import);
-
- if (desc->bd_frag_ops->release_frags)
- desc->bd_frag_ops->release_frags(desc);
-
- if (ptlrpc_is_bulk_desc_kiov(desc->bd_type))
- kfree(GET_KIOV(desc));
- else
- kfree(GET_KVEC(desc));
-
- kfree(desc);
-}
-EXPORT_SYMBOL(ptlrpc_free_bulk);
-
-/**
- * Set server timelimit for this req, i.e. how long are we willing to wait
- * for reply before timing out this request.
- */
-void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req)
-{
- __u32 serv_est;
- int idx;
- struct imp_at *at;
-
- LASSERT(req->rq_import);
-
- if (AT_OFF) {
- /*
- * non-AT settings
- *
- * \a imp_server_timeout means this is reverse import and
- * we send (currently only) ASTs to the client and cannot afford
- * to wait too long for the reply, otherwise the other client
- * (because of which we are sending this request) would
- * timeout waiting for us
- */
- req->rq_timeout = req->rq_import->imp_server_timeout ?
- obd_timeout / 2 : obd_timeout;
- } else {
- at = &req->rq_import->imp_at;
- idx = import_at_get_index(req->rq_import,
- req->rq_request_portal);
- serv_est = at_get(&at->iat_service_estimate[idx]);
- req->rq_timeout = at_est2timeout(serv_est);
- }
- /*
- * We could get even fancier here, using history to predict increased
- * loading...
- */
-
- /*
- * Let the server know what this RPC timeout is by putting it in the
- * reqmsg
- */
- lustre_msg_set_timeout(req->rq_reqmsg, req->rq_timeout);
-}
-EXPORT_SYMBOL(ptlrpc_at_set_req_timeout);
-
-/* Adjust max service estimate based on server value */
-static void ptlrpc_at_adj_service(struct ptlrpc_request *req,
- unsigned int serv_est)
-{
- int idx;
- unsigned int oldse;
- struct imp_at *at;
-
- LASSERT(req->rq_import);
- at = &req->rq_import->imp_at;
-
- idx = import_at_get_index(req->rq_import, req->rq_request_portal);
- /*
- * max service estimates are tracked on the server side,
- * so just keep minimal history here
- */
- oldse = at_measured(&at->iat_service_estimate[idx], serv_est);
- if (oldse != 0)
- CDEBUG(D_ADAPTTO, "The RPC service estimate for %s ptl %d has changed from %d to %d\n",
- req->rq_import->imp_obd->obd_name, req->rq_request_portal,
- oldse, at_get(&at->iat_service_estimate[idx]));
-}
-
-/* Expected network latency per remote node (secs) */
-int ptlrpc_at_get_net_latency(struct ptlrpc_request *req)
-{
- return AT_OFF ? 0 : at_get(&req->rq_import->imp_at.iat_net_latency);
-}
-
-/* Adjust expected network latency */
-void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
- unsigned int service_time)
-{
- unsigned int nl, oldnl;
- struct imp_at *at;
- time64_t now = ktime_get_real_seconds();
-
- LASSERT(req->rq_import);
-
- if (service_time > now - req->rq_sent + 3) {
- /*
- * bz16408, however, this can also happen if early reply
- * is lost and client RPC is expired and resent, early reply
- * or reply of original RPC can still be fit in reply buffer
- * of resent RPC, now client is measuring time from the
- * resent time, but server sent back service time of original
- * RPC.
- */
- CDEBUG((lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) ?
- D_ADAPTTO : D_WARNING,
- "Reported service time %u > total measured time %lld\n",
- service_time, now - req->rq_sent);
- return;
- }
-
- /* Network latency is total time less server processing time */
- nl = max_t(int, now - req->rq_sent -
- service_time, 0) + 1; /* st rounding */
- at = &req->rq_import->imp_at;
-
- oldnl = at_measured(&at->iat_net_latency, nl);
- if (oldnl != 0)
- CDEBUG(D_ADAPTTO, "The network latency for %s (nid %s) has changed from %d to %d\n",
- req->rq_import->imp_obd->obd_name,
- obd_uuid2str(
- &req->rq_import->imp_connection->c_remote_uuid),
- oldnl, at_get(&at->iat_net_latency));
-}
-
-static int unpack_reply(struct ptlrpc_request *req)
-{
- int rc;
-
- if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL) {
- rc = ptlrpc_unpack_rep_msg(req, req->rq_replen);
- if (rc) {
- DEBUG_REQ(D_ERROR, req, "unpack_rep failed: %d", rc);
- return -EPROTO;
- }
- }
-
- rc = lustre_unpack_rep_ptlrpc_body(req, MSG_PTLRPC_BODY_OFF);
- if (rc) {
- DEBUG_REQ(D_ERROR, req, "unpack ptlrpc body failed: %d", rc);
- return -EPROTO;
- }
- return 0;
-}
-
-/**
- * Handle an early reply message, called with the rq_lock held.
- * If anything goes wrong just ignore it - same as if it never happened
- */
-static int ptlrpc_at_recv_early_reply(struct ptlrpc_request *req)
- __must_hold(&req->rq_lock)
-{
- struct ptlrpc_request *early_req;
- time64_t olddl;
- int rc;
-
- req->rq_early = 0;
- spin_unlock(&req->rq_lock);
-
- rc = sptlrpc_cli_unwrap_early_reply(req, &early_req);
- if (rc) {
- spin_lock(&req->rq_lock);
- return rc;
- }
-
- rc = unpack_reply(early_req);
- if (rc) {
- sptlrpc_cli_finish_early_reply(early_req);
- spin_lock(&req->rq_lock);
- return rc;
- }
-
- /*
- * Use new timeout value just to adjust the local value for this
- * request, don't include it into at_history. It is unclear yet why
- * service time increased and should it be counted or skipped, e.g.
- * that can be recovery case or some error or server, the real reply
- * will add all new data if it is worth to add.
- */
- req->rq_timeout = lustre_msg_get_timeout(early_req->rq_repmsg);
- lustre_msg_set_timeout(req->rq_reqmsg, req->rq_timeout);
-
- /* Network latency can be adjusted, it is pure network delays */
- ptlrpc_at_adj_net_latency(req,
- lustre_msg_get_service_time(early_req->rq_repmsg));
-
- sptlrpc_cli_finish_early_reply(early_req);
-
- spin_lock(&req->rq_lock);
- olddl = req->rq_deadline;
- /*
- * server assumes it now has rq_timeout from when the request
- * arrived, so the client should give it at least that long.
- * since we don't know the arrival time we'll use the original
- * sent time
- */
- req->rq_deadline = req->rq_sent + req->rq_timeout +
- ptlrpc_at_get_net_latency(req);
-
- DEBUG_REQ(D_ADAPTTO, req,
- "Early reply #%d, new deadline in %lds (%lds)",
- req->rq_early_count,
- (long)(req->rq_deadline - ktime_get_real_seconds()),
- (long)(req->rq_deadline - olddl));
-
- return rc;
-}
-
-static struct kmem_cache *request_cache;
-
-int ptlrpc_request_cache_init(void)
-{
- request_cache = kmem_cache_create("ptlrpc_cache",
- sizeof(struct ptlrpc_request),
- 0, SLAB_HWCACHE_ALIGN, NULL);
- return !request_cache ? -ENOMEM : 0;
-}
-
-void ptlrpc_request_cache_fini(void)
-{
- kmem_cache_destroy(request_cache);
-}
-
-struct ptlrpc_request *ptlrpc_request_cache_alloc(gfp_t flags)
-{
- struct ptlrpc_request *req;
-
- req = kmem_cache_zalloc(request_cache, flags);
- return req;
-}
-
-void ptlrpc_request_cache_free(struct ptlrpc_request *req)
-{
- kmem_cache_free(request_cache, req);
-}
-
-/**
- * Wind down request pool \a pool.
- * Frees all requests from the pool too
- */
-void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool)
-{
- struct ptlrpc_request *req;
-
- while ((req = list_first_entry_or_null(&pool->prp_req_list,
- struct ptlrpc_request, rq_list))) {
- list_del(&req->rq_list);
- LASSERT(req->rq_reqbuf);
- LASSERT(req->rq_reqbuf_len == pool->prp_rq_size);
- kvfree(req->rq_reqbuf);
- ptlrpc_request_cache_free(req);
- }
- kfree(pool);
-}
-EXPORT_SYMBOL(ptlrpc_free_rq_pool);
-
-/**
- * Allocates, initializes and adds \a num_rq requests to the pool \a pool
- */
-int ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq)
-{
- int i;
- int size = 1;
-
- while (size < pool->prp_rq_size)
- size <<= 1;
-
- LASSERTF(list_empty(&pool->prp_req_list) ||
- size == pool->prp_rq_size,
- "Trying to change pool size with nonempty pool from %d to %d bytes\n",
- pool->prp_rq_size, size);
-
- spin_lock(&pool->prp_lock);
- pool->prp_rq_size = size;
- for (i = 0; i < num_rq; i++) {
- struct ptlrpc_request *req;
- struct lustre_msg *msg;
-
- spin_unlock(&pool->prp_lock);
- req = ptlrpc_request_cache_alloc(GFP_KERNEL);
- if (!req)
- return i;
- msg = kvzalloc(size, GFP_KERNEL);
- if (!msg) {
- ptlrpc_request_cache_free(req);
- return i;
- }
- req->rq_reqbuf = msg;
- req->rq_reqbuf_len = size;
- req->rq_pool = pool;
- spin_lock(&pool->prp_lock);
- list_add_tail(&req->rq_list, &pool->prp_req_list);
- }
- spin_unlock(&pool->prp_lock);
- return num_rq;
-}
-EXPORT_SYMBOL(ptlrpc_add_rqs_to_pool);
-
-/**
- * Create and initialize new request pool with given attributes:
- * \a num_rq - initial number of requests to create for the pool
- * \a msgsize - maximum message size possible for requests in thid pool
- * \a populate_pool - function to be called when more requests need to be added
- * to the pool
- * Returns pointer to newly created pool or NULL on error.
- */
-struct ptlrpc_request_pool *
-ptlrpc_init_rq_pool(int num_rq, int msgsize,
- int (*populate_pool)(struct ptlrpc_request_pool *, int))
-{
- struct ptlrpc_request_pool *pool;
-
- pool = kzalloc(sizeof(struct ptlrpc_request_pool), GFP_NOFS);
- if (!pool)
- return NULL;
-
- /*
- * Request next power of two for the allocation, because internally
- * kernel would do exactly this
- */
-
- spin_lock_init(&pool->prp_lock);
- INIT_LIST_HEAD(&pool->prp_req_list);
- pool->prp_rq_size = msgsize + SPTLRPC_MAX_PAYLOAD;
- pool->prp_populate = populate_pool;
-
- populate_pool(pool, num_rq);
-
- return pool;
-}
-EXPORT_SYMBOL(ptlrpc_init_rq_pool);
-
-/**
- * Fetches one request from pool \a pool
- */
-static struct ptlrpc_request *
-ptlrpc_prep_req_from_pool(struct ptlrpc_request_pool *pool)
-{
- struct ptlrpc_request *request;
- struct lustre_msg *reqbuf;
-
- if (!pool)
- return NULL;
-
- spin_lock(&pool->prp_lock);
-
- /*
- * See if we have anything in a pool, and bail out if nothing,
- * in writeout path, where this matters, this is safe to do, because
- * nothing is lost in this case, and when some in-flight requests
- * complete, this code will be called again.
- */
- if (unlikely(list_empty(&pool->prp_req_list))) {
- spin_unlock(&pool->prp_lock);
- return NULL;
- }
-
- request = list_entry(pool->prp_req_list.next, struct ptlrpc_request,
- rq_list);
- list_del_init(&request->rq_list);
- spin_unlock(&pool->prp_lock);
-
- LASSERT(request->rq_reqbuf);
- LASSERT(request->rq_pool);
-
- reqbuf = request->rq_reqbuf;
- memset(request, 0, sizeof(*request));
- request->rq_reqbuf = reqbuf;
- request->rq_reqbuf_len = pool->prp_rq_size;
- request->rq_pool = pool;
-
- return request;
-}
-
-/**
- * Returns freed \a request to pool.
- */
-static void __ptlrpc_free_req_to_pool(struct ptlrpc_request *request)
-{
- struct ptlrpc_request_pool *pool = request->rq_pool;
-
- spin_lock(&pool->prp_lock);
- LASSERT(list_empty(&request->rq_list));
- LASSERT(!request->rq_receiving_reply);
- list_add_tail(&request->rq_list, &pool->prp_req_list);
- spin_unlock(&pool->prp_lock);
-}
-
-void ptlrpc_add_unreplied(struct ptlrpc_request *req)
-{
- struct obd_import *imp = req->rq_import;
- struct ptlrpc_request *iter;
-
- assert_spin_locked(&imp->imp_lock);
- LASSERT(list_empty(&req->rq_unreplied_list));
-
- /* unreplied list is sorted by xid in ascending order */
- list_for_each_entry_reverse(iter, &imp->imp_unreplied_list, rq_unreplied_list) {
-
- LASSERT(req->rq_xid != iter->rq_xid);
- if (req->rq_xid < iter->rq_xid)
- continue;
- list_add(&req->rq_unreplied_list, &iter->rq_unreplied_list);
- return;
- }
- list_add(&req->rq_unreplied_list, &imp->imp_unreplied_list);
-}
-
-void ptlrpc_assign_next_xid_nolock(struct ptlrpc_request *req)
-{
- req->rq_xid = ptlrpc_next_xid();
- ptlrpc_add_unreplied(req);
-}
-
-static inline void ptlrpc_assign_next_xid(struct ptlrpc_request *req)
-{
- spin_lock(&req->rq_import->imp_lock);
- ptlrpc_assign_next_xid_nolock(req);
- spin_unlock(&req->rq_import->imp_lock);
-}
-
-int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
- __u32 version, int opcode, char **bufs,
- struct ptlrpc_cli_ctx *ctx)
-{
- int count;
- struct obd_import *imp;
- __u32 *lengths;
- int rc;
-
- count = req_capsule_filled_sizes(&request->rq_pill, RCL_CLIENT);
- imp = request->rq_import;
- lengths = request->rq_pill.rc_area[RCL_CLIENT];
-
- if (unlikely(ctx)) {
- request->rq_cli_ctx = sptlrpc_cli_ctx_get(ctx);
- } else {
- rc = sptlrpc_req_get_ctx(request);
- if (rc)
- goto out_free;
- }
- sptlrpc_req_set_flavor(request, opcode);
-
- rc = lustre_pack_request(request, imp->imp_msg_magic, count,
- lengths, bufs);
- if (rc)
- goto out_ctx;
-
- lustre_msg_add_version(request->rq_reqmsg, version);
- request->rq_send_state = LUSTRE_IMP_FULL;
- request->rq_type = PTL_RPC_MSG_REQUEST;
-
- request->rq_req_cbid.cbid_fn = request_out_callback;
- request->rq_req_cbid.cbid_arg = request;
-
- request->rq_reply_cbid.cbid_fn = reply_in_callback;
- request->rq_reply_cbid.cbid_arg = request;
-
- request->rq_reply_deadline = 0;
- request->rq_bulk_deadline = 0;
- request->rq_req_deadline = 0;
- request->rq_phase = RQ_PHASE_NEW;
- request->rq_next_phase = RQ_PHASE_UNDEFINED;
-
- request->rq_request_portal = imp->imp_client->cli_request_portal;
- request->rq_reply_portal = imp->imp_client->cli_reply_portal;
-
- ptlrpc_at_set_req_timeout(request);
-
- lustre_msg_set_opc(request->rq_reqmsg, opcode);
- ptlrpc_assign_next_xid(request);
-
- /* Let's setup deadline for req/reply/bulk unlink for opcode. */
- if (cfs_fail_val == opcode) {
- time64_t *fail_t = NULL, *fail2_t = NULL;
-
- if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK)) {
- fail_t = &request->rq_bulk_deadline;
- } else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) {
- fail_t = &request->rq_reply_deadline;
- } else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REQ_UNLINK)) {
- fail_t = &request->rq_req_deadline;
- } else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK)) {
- fail_t = &request->rq_reply_deadline;
- fail2_t = &request->rq_bulk_deadline;
- }
-
- if (fail_t) {
- *fail_t = ktime_get_real_seconds() + LONG_UNLINK;
-
- if (fail2_t)
- *fail2_t = ktime_get_real_seconds() +
- LONG_UNLINK;
-
- /* The RPC is infected, let the test change the
- * fail_loc
- */
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(2 * HZ);
- set_current_state(TASK_RUNNING);
- }
- }
-
- return 0;
-
-out_ctx:
- LASSERT(!request->rq_pool);
- sptlrpc_cli_ctx_put(request->rq_cli_ctx, 1);
-out_free:
- class_import_put(imp);
- return rc;
-}
-EXPORT_SYMBOL(ptlrpc_request_bufs_pack);
-
-/**
- * Pack request buffers for network transfer, performing necessary encryption
- * steps if necessary.
- */
-int ptlrpc_request_pack(struct ptlrpc_request *request,
- __u32 version, int opcode)
-{
- int rc;
-
- rc = ptlrpc_request_bufs_pack(request, version, opcode, NULL, NULL);
- if (rc)
- return rc;
-
- /*
- * For some old 1.8 clients (< 1.8.7), they will LASSERT the size of
- * ptlrpc_body sent from server equal to local ptlrpc_body size, so we
- * have to send old ptlrpc_body to keep interoperability with these
- * clients.
- *
- * Only three kinds of server->client RPCs so far:
- * - LDLM_BL_CALLBACK
- * - LDLM_CP_CALLBACK
- * - LDLM_GL_CALLBACK
- *
- * XXX This should be removed whenever we drop the interoperability with
- * the these old clients.
- */
- if (opcode == LDLM_BL_CALLBACK || opcode == LDLM_CP_CALLBACK ||
- opcode == LDLM_GL_CALLBACK)
- req_capsule_shrink(&request->rq_pill, &RMF_PTLRPC_BODY,
- sizeof(struct ptlrpc_body_v2), RCL_CLIENT);
-
- return rc;
-}
-EXPORT_SYMBOL(ptlrpc_request_pack);
-
-/**
- * Helper function to allocate new request on import \a imp
- * and possibly using existing request from pool \a pool if provided.
- * Returns allocated request structure with import field filled or
- * NULL on error.
- */
-static inline
-struct ptlrpc_request *__ptlrpc_request_alloc(struct obd_import *imp,
- struct ptlrpc_request_pool *pool)
-{
- struct ptlrpc_request *request;
-
- request = ptlrpc_request_cache_alloc(GFP_NOFS);
-
- if (!request && pool)
- request = ptlrpc_prep_req_from_pool(pool);
-
- if (request) {
- ptlrpc_cli_req_init(request);
-
- LASSERTF((unsigned long)imp > 0x1000, "%p", imp);
- LASSERT(imp != LP_POISON);
- LASSERTF((unsigned long)imp->imp_client > 0x1000, "%p\n",
- imp->imp_client);
- LASSERT(imp->imp_client != LP_POISON);
-
- request->rq_import = class_import_get(imp);
- } else {
- CERROR("request allocation out of memory\n");
- }
-
- return request;
-}
-
-/**
- * Helper function for creating a request.
- * Calls __ptlrpc_request_alloc to allocate new request structure and inits
- * buffer structures according to capsule template \a format.
- * Returns allocated request structure pointer or NULL on error.
- */
-static struct ptlrpc_request *
-ptlrpc_request_alloc_internal(struct obd_import *imp,
- struct ptlrpc_request_pool *pool,
- const struct req_format *format)
-{
- struct ptlrpc_request *request;
-
- request = __ptlrpc_request_alloc(imp, pool);
- if (!request)
- return NULL;
-
- req_capsule_init(&request->rq_pill, request, RCL_CLIENT);
- req_capsule_set(&request->rq_pill, format);
- return request;
-}
-
-/**
- * Allocate new request structure for import \a imp and initialize its
- * buffer structure according to capsule template \a format.
- */
-struct ptlrpc_request *ptlrpc_request_alloc(struct obd_import *imp,
- const struct req_format *format)
-{
- return ptlrpc_request_alloc_internal(imp, NULL, format);
-}
-EXPORT_SYMBOL(ptlrpc_request_alloc);
-
-/**
- * Allocate new request structure for import \a imp from pool \a pool and
- * initialize its buffer structure according to capsule template \a format.
- */
-struct ptlrpc_request *ptlrpc_request_alloc_pool(struct obd_import *imp,
- struct ptlrpc_request_pool *pool,
- const struct req_format *format)
-{
- return ptlrpc_request_alloc_internal(imp, pool, format);
-}
-EXPORT_SYMBOL(ptlrpc_request_alloc_pool);
-
-/**
- * For requests not from pool, free memory of the request structure.
- * For requests obtained from a pool earlier, return request back to pool.
- */
-void ptlrpc_request_free(struct ptlrpc_request *request)
-{
- if (request->rq_pool)
- __ptlrpc_free_req_to_pool(request);
- else
- ptlrpc_request_cache_free(request);
-}
-EXPORT_SYMBOL(ptlrpc_request_free);
-
-/**
- * Allocate new request for operation \a opcode and immediately pack it for
- * network transfer.
- * Only used for simple requests like OBD_PING where the only important
- * part of the request is operation itself.
- * Returns allocated request or NULL on error.
- */
-struct ptlrpc_request *ptlrpc_request_alloc_pack(struct obd_import *imp,
- const struct req_format *format,
- __u32 version, int opcode)
-{
- struct ptlrpc_request *req = ptlrpc_request_alloc(imp, format);
- int rc;
-
- if (req) {
- rc = ptlrpc_request_pack(req, version, opcode);
- if (rc) {
- ptlrpc_request_free(req);
- req = NULL;
- }
- }
- return req;
-}
-EXPORT_SYMBOL(ptlrpc_request_alloc_pack);
-
-/**
- * Allocate and initialize new request set structure on the current CPT.
- * Returns a pointer to the newly allocated set structure or NULL on error.
- */
-struct ptlrpc_request_set *ptlrpc_prep_set(void)
-{
- struct ptlrpc_request_set *set;
- int cpt;
-
- cpt = cfs_cpt_current(cfs_cpt_tab, 0);
- set = kzalloc_node(sizeof(*set), GFP_NOFS,
- cfs_cpt_spread_node(cfs_cpt_tab, cpt));
- if (!set)
- return NULL;
- atomic_set(&set->set_refcount, 1);
- INIT_LIST_HEAD(&set->set_requests);
- init_waitqueue_head(&set->set_waitq);
- atomic_set(&set->set_new_count, 0);
- atomic_set(&set->set_remaining, 0);
- spin_lock_init(&set->set_new_req_lock);
- INIT_LIST_HEAD(&set->set_new_requests);
- INIT_LIST_HEAD(&set->set_cblist);
- set->set_max_inflight = UINT_MAX;
- set->set_producer = NULL;
- set->set_producer_arg = NULL;
- set->set_rc = 0;
-
- return set;
-}
-EXPORT_SYMBOL(ptlrpc_prep_set);
-
-/**
- * Allocate and initialize new request set structure with flow control
- * extension. This extension allows to control the number of requests in-flight
- * for the whole set. A callback function to generate requests must be provided
- * and the request set will keep the number of requests sent over the wire to
- * @max_inflight.
- * Returns a pointer to the newly allocated set structure or NULL on error.
- */
-struct ptlrpc_request_set *ptlrpc_prep_fcset(int max, set_producer_func func,
- void *arg)
-
-{
- struct ptlrpc_request_set *set;
-
- set = ptlrpc_prep_set();
- if (!set)
- return NULL;
-
- set->set_max_inflight = max;
- set->set_producer = func;
- set->set_producer_arg = arg;
-
- return set;
-}
-
-/**
- * Wind down and free request set structure previously allocated with
- * ptlrpc_prep_set.
- * Ensures that all requests on the set have completed and removes
- * all requests from the request list in a set.
- * If any unsent request happen to be on the list, pretends that they got
- * an error in flight and calls their completion handler.
- */
-void ptlrpc_set_destroy(struct ptlrpc_request_set *set)
-{
- struct ptlrpc_request *req;
- int expected_phase;
- int n = 0;
-
- /* Requests on the set should either all be completed, or all be new */
- expected_phase = (atomic_read(&set->set_remaining) == 0) ?
- RQ_PHASE_COMPLETE : RQ_PHASE_NEW;
- list_for_each_entry(req, &set->set_requests, rq_set_chain) {
- LASSERT(req->rq_phase == expected_phase);
- n++;
- }
-
- LASSERTF(atomic_read(&set->set_remaining) == 0 ||
- atomic_read(&set->set_remaining) == n, "%d / %d\n",
- atomic_read(&set->set_remaining), n);
-
- while ((req = list_first_entry_or_null(&set->set_requests,
- struct ptlrpc_request,
- rq_set_chain))) {
- list_del_init(&req->rq_set_chain);
-
- LASSERT(req->rq_phase == expected_phase);
-
- if (req->rq_phase == RQ_PHASE_NEW) {
- ptlrpc_req_interpret(NULL, req, -EBADR);
- atomic_dec(&set->set_remaining);
- }
-
- spin_lock(&req->rq_lock);
- req->rq_set = NULL;
- req->rq_invalid_rqset = 0;
- spin_unlock(&req->rq_lock);
-
- ptlrpc_req_finished(req);
- }
-
- LASSERT(atomic_read(&set->set_remaining) == 0);
-
- ptlrpc_reqset_put(set);
-}
-EXPORT_SYMBOL(ptlrpc_set_destroy);
-
-/**
- * Add a new request to the general purpose request set.
- * Assumes request reference from the caller.
- */
-void ptlrpc_set_add_req(struct ptlrpc_request_set *set,
- struct ptlrpc_request *req)
-{
- LASSERT(list_empty(&req->rq_set_chain));
-
- /* The set takes over the caller's request reference */
- list_add_tail(&req->rq_set_chain, &set->set_requests);
- req->rq_set = set;
- atomic_inc(&set->set_remaining);
- req->rq_queued_time = jiffies;
-
- if (req->rq_reqmsg)
- lustre_msg_set_jobid(req->rq_reqmsg, NULL);
-
- if (set->set_producer)
- /*
- * If the request set has a producer callback, the RPC must be
- * sent straight away
- */
- ptlrpc_send_new_req(req);
-}
-EXPORT_SYMBOL(ptlrpc_set_add_req);
-
-/**
- * Add a request to a request with dedicated server thread
- * and wake the thread to make any necessary processing.
- * Currently only used for ptlrpcd.
- */
-void ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc,
- struct ptlrpc_request *req)
-{
- struct ptlrpc_request_set *set = pc->pc_set;
- int count, i;
-
- LASSERT(!req->rq_set);
- LASSERT(test_bit(LIOD_STOP, &pc->pc_flags) == 0);
-
- spin_lock(&set->set_new_req_lock);
- /* The set takes over the caller's request reference. */
- req->rq_set = set;
- req->rq_queued_time = jiffies;
- list_add_tail(&req->rq_set_chain, &set->set_new_requests);
- count = atomic_inc_return(&set->set_new_count);
- spin_unlock(&set->set_new_req_lock);
-
- /* Only need to call wakeup once for the first entry. */
- if (count == 1) {
- wake_up(&set->set_waitq);
-
- /*
- * XXX: It maybe unnecessary to wakeup all the partners. But to
- * guarantee the async RPC can be processed ASAP, we have
- * no other better choice. It maybe fixed in future.
- */
- for (i = 0; i < pc->pc_npartners; i++)
- wake_up(&pc->pc_partners[i]->pc_set->set_waitq);
- }
-}
-
-/**
- * Based on the current state of the import, determine if the request
- * can be sent, is an error, or should be delayed.
- *
- * Returns true if this request should be delayed. If false, and
- * *status is set, then the request can not be sent and *status is the
- * error code. If false and status is 0, then request can be sent.
- *
- * The imp->imp_lock must be held.
- */
-static int ptlrpc_import_delay_req(struct obd_import *imp,
- struct ptlrpc_request *req, int *status)
-{
- int delay = 0;
-
- *status = 0;
-
- if (req->rq_ctx_init || req->rq_ctx_fini) {
- /* always allow ctx init/fini rpc go through */
- } else if (imp->imp_state == LUSTRE_IMP_NEW) {
- DEBUG_REQ(D_ERROR, req, "Uninitialized import.");
- *status = -EIO;
- } else if (imp->imp_state == LUSTRE_IMP_CLOSED) {
- /* pings may safely race with umount */
- DEBUG_REQ(lustre_msg_get_opc(req->rq_reqmsg) == OBD_PING ?
- D_HA : D_ERROR, req, "IMP_CLOSED ");
- *status = -EIO;
- } else if (ptlrpc_send_limit_expired(req)) {
- /* probably doesn't need to be a D_ERROR after initial testing */
- DEBUG_REQ(D_HA, req, "send limit expired ");
- *status = -ETIMEDOUT;
- } else if (req->rq_send_state == LUSTRE_IMP_CONNECTING &&
- imp->imp_state == LUSTRE_IMP_CONNECTING) {
- /* allow CONNECT even if import is invalid */
- if (atomic_read(&imp->imp_inval_count) != 0) {
- DEBUG_REQ(D_ERROR, req, "invalidate in flight");
- *status = -EIO;
- }
- } else if (imp->imp_invalid || imp->imp_obd->obd_no_recov) {
- if (!imp->imp_deactive)
- DEBUG_REQ(D_NET, req, "IMP_INVALID");
- *status = -ESHUTDOWN; /* bz 12940 */
- } else if (req->rq_import_generation != imp->imp_generation) {
- DEBUG_REQ(D_ERROR, req, "req wrong generation:");
- *status = -EIO;
- } else if (req->rq_send_state != imp->imp_state) {
- /* invalidate in progress - any requests should be drop */
- if (atomic_read(&imp->imp_inval_count) != 0) {
- DEBUG_REQ(D_ERROR, req, "invalidate in flight");
- *status = -EIO;
- } else if (req->rq_no_delay) {
- *status = -EWOULDBLOCK;
- } else if (req->rq_allow_replay &&
- (imp->imp_state == LUSTRE_IMP_REPLAY ||
- imp->imp_state == LUSTRE_IMP_REPLAY_LOCKS ||
- imp->imp_state == LUSTRE_IMP_REPLAY_WAIT ||
- imp->imp_state == LUSTRE_IMP_RECOVER)) {
- DEBUG_REQ(D_HA, req, "allow during recovery.\n");
- } else {
- delay = 1;
- }
- }
-
- return delay;
-}
-
-/**
- * Decide if the error message should be printed to the console or not.
- * Makes its decision based on request type, status, and failure frequency.
- *
- * \param[in] req request that failed and may need a console message
- *
- * \retval false if no message should be printed
- * \retval true if console message should be printed
- */
-static bool ptlrpc_console_allow(struct ptlrpc_request *req)
-{
- __u32 opc;
-
- LASSERT(req->rq_reqmsg);
- opc = lustre_msg_get_opc(req->rq_reqmsg);
-
- /* Suppress particular reconnect errors which are to be expected. */
- if (opc == OST_CONNECT || opc == MDS_CONNECT || opc == MGS_CONNECT) {
- int err;
-
- /* Suppress timed out reconnect requests */
- if (lustre_handle_is_used(&req->rq_import->imp_remote_handle) ||
- req->rq_timedout)
- return false;
-
- /*
- * Suppress most unavailable/again reconnect requests, but
- * print occasionally so it is clear client is trying to
- * connect to a server where no target is running.
- */
- err = lustre_msg_get_status(req->rq_repmsg);
- if ((err == -ENODEV || err == -EAGAIN) &&
- req->rq_import->imp_conn_cnt % 30 != 20)
- return false;
- }
-
- return true;
-}
-
-/**
- * Check request processing status.
- * Returns the status.
- */
-static int ptlrpc_check_status(struct ptlrpc_request *req)
-{
- int err;
-
- err = lustre_msg_get_status(req->rq_repmsg);
- if (lustre_msg_get_type(req->rq_repmsg) == PTL_RPC_MSG_ERR) {
- struct obd_import *imp = req->rq_import;
- lnet_nid_t nid = imp->imp_connection->c_peer.nid;
- __u32 opc = lustre_msg_get_opc(req->rq_reqmsg);
-
- /* -EAGAIN is normal when using POSIX flocks */
- if (ptlrpc_console_allow(req) &&
- !(opc == LDLM_ENQUEUE && err == -EAGAIN))
- LCONSOLE_ERROR_MSG(0x011, "%s: operation %s to node %s failed: rc = %d\n",
- imp->imp_obd->obd_name,
- ll_opcode2str(opc),
- libcfs_nid2str(nid), err);
- return err < 0 ? err : -EINVAL;
- }
-
- if (err < 0)
- DEBUG_REQ(D_INFO, req, "status is %d", err);
- else if (err > 0)
- /* XXX: translate this error from net to host */
- DEBUG_REQ(D_INFO, req, "status is %d", err);
-
- return err;
-}
-
-/**
- * save pre-versions of objects into request for replay.
- * Versions are obtained from server reply.
- * used for VBR.
- */
-static void ptlrpc_save_versions(struct ptlrpc_request *req)
-{
- struct lustre_msg *repmsg = req->rq_repmsg;
- struct lustre_msg *reqmsg = req->rq_reqmsg;
- __u64 *versions = lustre_msg_get_versions(repmsg);
-
- if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)
- return;
-
- LASSERT(versions);
- lustre_msg_set_versions(reqmsg, versions);
- CDEBUG(D_INFO, "Client save versions [%#llx/%#llx]\n",
- versions[0], versions[1]);
-}
-
-__u64 ptlrpc_known_replied_xid(struct obd_import *imp)
-{
- struct ptlrpc_request *req;
-
- assert_spin_locked(&imp->imp_lock);
- if (list_empty(&imp->imp_unreplied_list))
- return 0;
-
- req = list_entry(imp->imp_unreplied_list.next, struct ptlrpc_request,
- rq_unreplied_list);
- LASSERTF(req->rq_xid >= 1, "XID:%llu\n", req->rq_xid);
-
- if (imp->imp_known_replied_xid < req->rq_xid - 1)
- imp->imp_known_replied_xid = req->rq_xid - 1;
-
- return req->rq_xid - 1;
-}
-
-/**
- * Callback function called when client receives RPC reply for \a req.
- * Returns 0 on success or error code.
- * The return value would be assigned to req->rq_status by the caller
- * as request processing status.
- * This function also decides if the request needs to be saved for later replay.
- */
-static int after_reply(struct ptlrpc_request *req)
-{
- struct obd_import *imp = req->rq_import;
- struct obd_device *obd = req->rq_import->imp_obd;
- int rc;
- struct timespec64 work_start;
- long timediff;
- u64 committed;
-
- LASSERT(obd);
- /* repbuf must be unlinked */
- LASSERT(!req->rq_receiving_reply && req->rq_reply_unlinked);
-
- if (req->rq_reply_truncated) {
- if (ptlrpc_no_resend(req)) {
- DEBUG_REQ(D_ERROR, req, "reply buffer overflow, expected: %d, actual size: %d",
- req->rq_nob_received, req->rq_repbuf_len);
- return -EOVERFLOW;
- }
-
- sptlrpc_cli_free_repbuf(req);
- /*
- * Pass the required reply buffer size (include space for early
- * reply). NB: no need to round up because alloc_repbuf will
- * round it up
- */
- req->rq_replen = req->rq_nob_received;
- req->rq_nob_received = 0;
- spin_lock(&req->rq_lock);
- req->rq_resend = 1;
- spin_unlock(&req->rq_lock);
- return 0;
- }
-
- ktime_get_real_ts64(&work_start);
- timediff = (work_start.tv_sec - req->rq_sent_tv.tv_sec) * USEC_PER_SEC +
- (work_start.tv_nsec - req->rq_sent_tv.tv_nsec) /
- NSEC_PER_USEC;
- /*
- * NB Until this point, the whole of the incoming message,
- * including buflens, status etc is in the sender's byte order.
- */
- rc = sptlrpc_cli_unwrap_reply(req);
- if (rc) {
- DEBUG_REQ(D_ERROR, req, "unwrap reply failed (%d):", rc);
- return rc;
- }
-
- /* Security layer unwrap might ask resend this request. */
- if (req->rq_resend)
- return 0;
-
- rc = unpack_reply(req);
- if (rc)
- return rc;
-
- /* retry indefinitely on EINPROGRESS */
- if (lustre_msg_get_status(req->rq_repmsg) == -EINPROGRESS &&
- ptlrpc_no_resend(req) == 0 && !req->rq_no_retry_einprogress) {
- time64_t now = ktime_get_real_seconds();
-
- DEBUG_REQ(D_RPCTRACE, req, "Resending request on EINPROGRESS");
- spin_lock(&req->rq_lock);
- req->rq_resend = 1;
- spin_unlock(&req->rq_lock);
- req->rq_nr_resend++;
-
- /* Readjust the timeout for current conditions */
- ptlrpc_at_set_req_timeout(req);
- /*
- * delay resend to give a chance to the server to get ready.
- * The delay is increased by 1s on every resend and is capped to
- * the current request timeout (i.e. obd_timeout if AT is off,
- * or AT service time x 125% + 5s, see at_est2timeout)
- */
- if (req->rq_nr_resend > req->rq_timeout)
- req->rq_sent = now + req->rq_timeout;
- else
- req->rq_sent = now + req->rq_nr_resend;
-
- /* Resend for EINPROGRESS will use a new XID */
- spin_lock(&imp->imp_lock);
- list_del_init(&req->rq_unreplied_list);
- spin_unlock(&imp->imp_lock);
-
- return 0;
- }
-
- if (obd->obd_svc_stats) {
- lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQWAIT_CNTR,
- timediff);
- ptlrpc_lprocfs_rpc_sent(req, timediff);
- }
-
- if (lustre_msg_get_type(req->rq_repmsg) != PTL_RPC_MSG_REPLY &&
- lustre_msg_get_type(req->rq_repmsg) != PTL_RPC_MSG_ERR) {
- DEBUG_REQ(D_ERROR, req, "invalid packet received (type=%u)",
- lustre_msg_get_type(req->rq_repmsg));
- return -EPROTO;
- }
-
- if (lustre_msg_get_opc(req->rq_reqmsg) != OBD_PING)
- CFS_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_PAUSE_REP, cfs_fail_val);
- ptlrpc_at_adj_service(req, lustre_msg_get_timeout(req->rq_repmsg));
- ptlrpc_at_adj_net_latency(req,
- lustre_msg_get_service_time(req->rq_repmsg));
-
- rc = ptlrpc_check_status(req);
- imp->imp_connect_error = rc;
-
- if (rc) {
- /*
- * Either we've been evicted, or the server has failed for
- * some reason. Try to reconnect, and if that fails, punt to
- * the upcall.
- */
- if (ptlrpc_recoverable_error(rc)) {
- if (req->rq_send_state != LUSTRE_IMP_FULL ||
- imp->imp_obd->obd_no_recov || imp->imp_dlm_fake) {
- return rc;
- }
- ptlrpc_request_handle_notconn(req);
- return rc;
- }
- } else {
- /*
- * Let's look if server sent slv. Do it only for RPC with
- * rc == 0.
- */
- ldlm_cli_update_pool(req);
- }
-
- /* Store transno in reqmsg for replay. */
- if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)) {
- req->rq_transno = lustre_msg_get_transno(req->rq_repmsg);
- lustre_msg_set_transno(req->rq_reqmsg, req->rq_transno);
- }
-
- if (imp->imp_replayable) {
- spin_lock(&imp->imp_lock);
- /*
- * No point in adding already-committed requests to the replay
- * list, we will just remove them immediately. b=9829
- */
- if (req->rq_transno != 0 &&
- (req->rq_transno >
- lustre_msg_get_last_committed(req->rq_repmsg) ||
- req->rq_replay)) {
- /* version recovery */
- ptlrpc_save_versions(req);
- ptlrpc_retain_replayable_request(req, imp);
- } else if (req->rq_commit_cb &&
- list_empty(&req->rq_replay_list)) {
- /*
- * NB: don't call rq_commit_cb if it's already on
- * rq_replay_list, ptlrpc_free_committed() will call
- * it later, see LU-3618 for details
- */
- spin_unlock(&imp->imp_lock);
- req->rq_commit_cb(req);
- spin_lock(&imp->imp_lock);
- }
-
- /* Replay-enabled imports return commit-status information. */
- committed = lustre_msg_get_last_committed(req->rq_repmsg);
- if (likely(committed > imp->imp_peer_committed_transno))
- imp->imp_peer_committed_transno = committed;
-
- ptlrpc_free_committed(imp);
-
- if (!list_empty(&imp->imp_replay_list)) {
- struct ptlrpc_request *last;
-
- last = list_entry(imp->imp_replay_list.prev,
- struct ptlrpc_request,
- rq_replay_list);
- /*
- * Requests with rq_replay stay on the list even if no
- * commit is expected.
- */
- if (last->rq_transno > imp->imp_peer_committed_transno)
- ptlrpc_pinger_commit_expected(imp);
- }
-
- spin_unlock(&imp->imp_lock);
- }
-
- return rc;
-}
-
-/**
- * Helper function to send request \a req over the network for the first time
- * Also adjusts request phase.
- * Returns 0 on success or error code.
- */
-static int ptlrpc_send_new_req(struct ptlrpc_request *req)
-{
- struct obd_import *imp = req->rq_import;
- u64 min_xid = 0;
- int rc;
-
- LASSERT(req->rq_phase == RQ_PHASE_NEW);
-
- /* do not try to go further if there is not enough memory in enc_pool */
- if (req->rq_sent && req->rq_bulk)
- if (req->rq_bulk->bd_iov_count > get_free_pages_in_pool() &&
- pool_is_at_full_capacity())
- return -ENOMEM;
-
- if (req->rq_sent && (req->rq_sent > ktime_get_real_seconds()) &&
- (!req->rq_generation_set ||
- req->rq_import_generation == imp->imp_generation))
- return 0;
-
- ptlrpc_rqphase_move(req, RQ_PHASE_RPC);
-
- spin_lock(&imp->imp_lock);
-
- LASSERT(req->rq_xid);
- LASSERT(!list_empty(&req->rq_unreplied_list));
-
- if (!req->rq_generation_set)
- req->rq_import_generation = imp->imp_generation;
-
- if (ptlrpc_import_delay_req(imp, req, &rc)) {
- spin_lock(&req->rq_lock);
- req->rq_waiting = 1;
- spin_unlock(&req->rq_lock);
-
- DEBUG_REQ(D_HA, req, "req from PID %d waiting for recovery: (%s != %s)",
- lustre_msg_get_status(req->rq_reqmsg),
- ptlrpc_import_state_name(req->rq_send_state),
- ptlrpc_import_state_name(imp->imp_state));
- LASSERT(list_empty(&req->rq_list));
- list_add_tail(&req->rq_list, &imp->imp_delayed_list);
- atomic_inc(&req->rq_import->imp_inflight);
- spin_unlock(&imp->imp_lock);
- return 0;
- }
-
- if (rc != 0) {
- spin_unlock(&imp->imp_lock);
- req->rq_status = rc;
- ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
- return rc;
- }
-
- LASSERT(list_empty(&req->rq_list));
- list_add_tail(&req->rq_list, &imp->imp_sending_list);
- atomic_inc(&req->rq_import->imp_inflight);
-
- /* find the known replied XID from the unreplied list, CONNECT
- * and DISCONNECT requests are skipped to make the sanity check
- * on server side happy. see process_req_last_xid().
- *
- * For CONNECT: Because replay requests have lower XID, it'll
- * break the sanity check if CONNECT bump the exp_last_xid on
- * server.
- *
- * For DISCONNECT: Since client will abort inflight RPC before
- * sending DISCONNECT, DISCONNECT may carry an XID which higher
- * than the inflight RPC.
- */
- if (!ptlrpc_req_is_connect(req) && !ptlrpc_req_is_disconnect(req))
- min_xid = ptlrpc_known_replied_xid(imp);
- spin_unlock(&imp->imp_lock);
-
- lustre_msg_set_last_xid(req->rq_reqmsg, min_xid);
-
- lustre_msg_set_status(req->rq_reqmsg, current->pid);
-
- rc = sptlrpc_req_refresh_ctx(req, -1);
- if (rc) {
- if (req->rq_err) {
- req->rq_status = rc;
- return 1;
- }
- spin_lock(&req->rq_lock);
- req->rq_wait_ctx = 1;
- spin_unlock(&req->rq_lock);
- return 0;
- }
-
- CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:nid:opc %s:%s:%d:%llu:%s:%d\n",
- current->comm,
- imp->imp_obd->obd_uuid.uuid,
- lustre_msg_get_status(req->rq_reqmsg), req->rq_xid,
- libcfs_nid2str(imp->imp_connection->c_peer.nid),
- lustre_msg_get_opc(req->rq_reqmsg));
-
- rc = ptl_send_rpc(req, 0);
- if (rc == -ENOMEM) {
- spin_lock(&imp->imp_lock);
- if (!list_empty(&req->rq_list)) {
- list_del_init(&req->rq_list);
- if (atomic_dec_and_test(&req->rq_import->imp_inflight))
- wake_up_all(&req->rq_import->imp_recovery_waitq);
- }
- spin_unlock(&imp->imp_lock);
- ptlrpc_rqphase_move(req, RQ_PHASE_NEW);
- return rc;
- }
- if (rc) {
- DEBUG_REQ(D_HA, req, "send failed (%d); expect timeout", rc);
- spin_lock(&req->rq_lock);
- req->rq_net_err = 1;
- spin_unlock(&req->rq_lock);
- return rc;
- }
- return 0;
-}
-
-static inline int ptlrpc_set_producer(struct ptlrpc_request_set *set)
-{
- int remaining, rc;
-
- LASSERT(set->set_producer);
-
- remaining = atomic_read(&set->set_remaining);
-
- /*
- * populate the ->set_requests list with requests until we
- * reach the maximum number of RPCs in flight for this set
- */
- while (atomic_read(&set->set_remaining) < set->set_max_inflight) {
- rc = set->set_producer(set, set->set_producer_arg);
- if (rc == -ENOENT) {
- /* no more RPC to produce */
- set->set_producer = NULL;
- set->set_producer_arg = NULL;
- return 0;
- }
- }
-
- return (atomic_read(&set->set_remaining) - remaining);
-}
-
-/**
- * this sends any unsent RPCs in \a set and returns 1 if all are sent
- * and no more replies are expected.
- * (it is possible to get less replies than requests sent e.g. due to timed out
- * requests or requests that we had trouble to send out)
- *
- * NOTE: This function contains a potential schedule point (cond_resched()).
- */
-int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
-{
- struct ptlrpc_request *req, *next;
- struct list_head comp_reqs;
- int force_timer_recalc = 0;
-
- if (atomic_read(&set->set_remaining) == 0)
- return 1;
-
- INIT_LIST_HEAD(&comp_reqs);
- list_for_each_entry_safe(req, next, &set->set_requests, rq_set_chain) {
- struct obd_import *imp = req->rq_import;
- int unregistered = 0;
- int rc = 0;
-
- /*
- * This schedule point is mainly for the ptlrpcd caller of this
- * function. Most ptlrpc sets are not long-lived and unbounded
- * in length, but at the least the set used by the ptlrpcd is.
- * Since the processing time is unbounded, we need to insert an
- * explicit schedule point to make the thread well-behaved.
- */
- cond_resched();
-
- if (req->rq_phase == RQ_PHASE_NEW &&
- ptlrpc_send_new_req(req)) {
- force_timer_recalc = 1;
- }
-
- /* delayed send - skip */
- if (req->rq_phase == RQ_PHASE_NEW && req->rq_sent)
- continue;
-
- /* delayed resend - skip */
- if (req->rq_phase == RQ_PHASE_RPC && req->rq_resend &&
- req->rq_sent > ktime_get_real_seconds())
- continue;
-
- if (!(req->rq_phase == RQ_PHASE_RPC ||
- req->rq_phase == RQ_PHASE_BULK ||
- req->rq_phase == RQ_PHASE_INTERPRET ||
- req->rq_phase == RQ_PHASE_UNREG_RPC ||
- req->rq_phase == RQ_PHASE_UNREG_BULK ||
- req->rq_phase == RQ_PHASE_COMPLETE)) {
- DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase);
- LBUG();
- }
-
- if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
- req->rq_phase == RQ_PHASE_UNREG_BULK) {
- LASSERT(req->rq_next_phase != req->rq_phase);
- LASSERT(req->rq_next_phase != RQ_PHASE_UNDEFINED);
-
- if (req->rq_req_deadline &&
- !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REQ_UNLINK))
- req->rq_req_deadline = 0;
- if (req->rq_reply_deadline &&
- !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK))
- req->rq_reply_deadline = 0;
- if (req->rq_bulk_deadline &&
- !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK))
- req->rq_bulk_deadline = 0;
-
- /*
- * Skip processing until reply is unlinked. We
- * can't return to pool before that and we can't
- * call interpret before that. We need to make
- * sure that all rdma transfers finished and will
- * not corrupt any data.
- */
- if (req->rq_phase == RQ_PHASE_UNREG_RPC &&
- ptlrpc_client_recv_or_unlink(req))
- continue;
- if (req->rq_phase == RQ_PHASE_UNREG_BULK &&
- ptlrpc_client_bulk_active(req))
- continue;
-
- /*
- * Turn fail_loc off to prevent it from looping
- * forever.
- */
- if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) {
- OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK,
- OBD_FAIL_ONCE);
- }
- if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK)) {
- OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK,
- OBD_FAIL_ONCE);
- }
-
- /* Move to next phase if reply was successfully
- * unlinked.
- */
- ptlrpc_rqphase_move(req, req->rq_next_phase);
- }
-
- if (req->rq_phase == RQ_PHASE_COMPLETE) {
- list_move_tail(&req->rq_set_chain, &comp_reqs);
- continue;
- }
-
- if (req->rq_phase == RQ_PHASE_INTERPRET)
- goto interpret;
-
- /* Note that this also will start async reply unlink. */
- if (req->rq_net_err && !req->rq_timedout) {
- ptlrpc_expire_one_request(req, 1);
-
- /* Check if we still need to wait for unlink. */
- if (ptlrpc_client_recv_or_unlink(req) ||
- ptlrpc_client_bulk_active(req))
- continue;
- /* If there is no need to resend, fail it now. */
- if (req->rq_no_resend) {
- if (req->rq_status == 0)
- req->rq_status = -EIO;
- ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
- goto interpret;
- } else {
- continue;
- }
- }
-
- if (req->rq_err) {
- spin_lock(&req->rq_lock);
- req->rq_replied = 0;
- spin_unlock(&req->rq_lock);
- if (req->rq_status == 0)
- req->rq_status = -EIO;
- ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
- goto interpret;
- }
-
- /*
- * ptlrpc_set_wait allow signal to abort the timeout
- * so it sets rq_intr regardless of individual rpc
- * timeouts. The synchronous IO waiting path sets
- * rq_intr irrespective of whether ptlrpcd
- * has seen a timeout. Our policy is to only interpret
- * interrupted rpcs after they have timed out, so we
- * need to enforce that here.
- */
-
- if (req->rq_intr && (req->rq_timedout || req->rq_waiting ||
- req->rq_wait_ctx)) {
- req->rq_status = -EINTR;
- ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
- goto interpret;
- }
-
- if (req->rq_phase == RQ_PHASE_RPC) {
- if (req->rq_timedout || req->rq_resend ||
- req->rq_waiting || req->rq_wait_ctx) {
- int status;
-
- if (!ptlrpc_unregister_reply(req, 1)) {
- ptlrpc_unregister_bulk(req, 1);
- continue;
- }
-
- spin_lock(&imp->imp_lock);
- if (ptlrpc_import_delay_req(imp, req,
- &status)) {
- /*
- * put on delay list - only if we wait
- * recovery finished - before send
- */
- list_del_init(&req->rq_list);
- list_add_tail(&req->rq_list,
- &imp->imp_delayed_list);
- spin_unlock(&imp->imp_lock);
- continue;
- }
-
- if (status != 0) {
- req->rq_status = status;
- ptlrpc_rqphase_move(req,
- RQ_PHASE_INTERPRET);
- spin_unlock(&imp->imp_lock);
- goto interpret;
- }
- if (ptlrpc_no_resend(req) &&
- !req->rq_wait_ctx) {
- req->rq_status = -ENOTCONN;
- ptlrpc_rqphase_move(req,
- RQ_PHASE_INTERPRET);
- spin_unlock(&imp->imp_lock);
- goto interpret;
- }
-
- list_del_init(&req->rq_list);
- list_add_tail(&req->rq_list,
- &imp->imp_sending_list);
-
- spin_unlock(&imp->imp_lock);
-
- spin_lock(&req->rq_lock);
- req->rq_waiting = 0;
- spin_unlock(&req->rq_lock);
-
- if (req->rq_timedout || req->rq_resend) {
- /* This is re-sending anyway, let's mark req as resend. */
- spin_lock(&req->rq_lock);
- req->rq_resend = 1;
- spin_unlock(&req->rq_lock);
- if (req->rq_bulk &&
- !ptlrpc_unregister_bulk(req, 1))
- continue;
- }
- /*
- * rq_wait_ctx is only touched by ptlrpcd,
- * so no lock is needed here.
- */
- status = sptlrpc_req_refresh_ctx(req, -1);
- if (status) {
- if (req->rq_err) {
- req->rq_status = status;
- spin_lock(&req->rq_lock);
- req->rq_wait_ctx = 0;
- spin_unlock(&req->rq_lock);
- force_timer_recalc = 1;
- } else {
- spin_lock(&req->rq_lock);
- req->rq_wait_ctx = 1;
- spin_unlock(&req->rq_lock);
- }
-
- continue;
- } else {
- spin_lock(&req->rq_lock);
- req->rq_wait_ctx = 0;
- spin_unlock(&req->rq_lock);
- }
-
- rc = ptl_send_rpc(req, 0);
- if (rc == -ENOMEM) {
- spin_lock(&imp->imp_lock);
- if (!list_empty(&req->rq_list))
- list_del_init(&req->rq_list);
- spin_unlock(&imp->imp_lock);
- ptlrpc_rqphase_move(req, RQ_PHASE_NEW);
- continue;
- }
- if (rc) {
- DEBUG_REQ(D_HA, req,
- "send failed: rc = %d", rc);
- force_timer_recalc = 1;
- spin_lock(&req->rq_lock);
- req->rq_net_err = 1;
- spin_unlock(&req->rq_lock);
- continue;
- }
- /* need to reset the timeout */
- force_timer_recalc = 1;
- }
-
- spin_lock(&req->rq_lock);
-
- if (ptlrpc_client_early(req)) {
- ptlrpc_at_recv_early_reply(req);
- spin_unlock(&req->rq_lock);
- continue;
- }
-
- /* Still waiting for a reply? */
- if (ptlrpc_client_recv(req)) {
- spin_unlock(&req->rq_lock);
- continue;
- }
-
- /* Did we actually receive a reply? */
- if (!ptlrpc_client_replied(req)) {
- spin_unlock(&req->rq_lock);
- continue;
- }
-
- spin_unlock(&req->rq_lock);
-
- /*
- * unlink from net because we are going to
- * swab in-place of reply buffer
- */
- unregistered = ptlrpc_unregister_reply(req, 1);
- if (!unregistered)
- continue;
-
- req->rq_status = after_reply(req);
- if (req->rq_resend)
- continue;
-
- /*
- * If there is no bulk associated with this request,
- * then we're done and should let the interpreter
- * process the reply. Similarly if the RPC returned
- * an error, and therefore the bulk will never arrive.
- */
- if (!req->rq_bulk || req->rq_status < 0) {
- ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
- goto interpret;
- }
-
- ptlrpc_rqphase_move(req, RQ_PHASE_BULK);
- }
-
- LASSERT(req->rq_phase == RQ_PHASE_BULK);
- if (ptlrpc_client_bulk_active(req))
- continue;
-
- if (req->rq_bulk->bd_failure) {
- /*
- * The RPC reply arrived OK, but the bulk screwed
- * up! Dead weird since the server told us the RPC
- * was good after getting the REPLY for her GET or
- * the ACK for her PUT.
- */
- DEBUG_REQ(D_ERROR, req, "bulk transfer failed");
- req->rq_status = -EIO;
- }
-
- ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
-
-interpret:
- LASSERT(req->rq_phase == RQ_PHASE_INTERPRET);
-
- /*
- * This moves to "unregistering" phase we need to wait for
- * reply unlink.
- */
- if (!unregistered && !ptlrpc_unregister_reply(req, 1)) {
- /* start async bulk unlink too */
- ptlrpc_unregister_bulk(req, 1);
- continue;
- }
-
- if (!ptlrpc_unregister_bulk(req, 1))
- continue;
-
- /* When calling interpret receive should already be finished. */
- LASSERT(!req->rq_receiving_reply);
-
- ptlrpc_req_interpret(env, req, req->rq_status);
-
- if (ptlrpcd_check_work(req)) {
- atomic_dec(&set->set_remaining);
- continue;
- }
- ptlrpc_rqphase_move(req, RQ_PHASE_COMPLETE);
-
- CDEBUG(req->rq_reqmsg ? D_RPCTRACE : 0,
- "Completed RPC pname:cluuid:pid:xid:nid:opc %s:%s:%d:%llu:%s:%d\n",
- current->comm, imp->imp_obd->obd_uuid.uuid,
- lustre_msg_get_status(req->rq_reqmsg), req->rq_xid,
- libcfs_nid2str(imp->imp_connection->c_peer.nid),
- lustre_msg_get_opc(req->rq_reqmsg));
-
- spin_lock(&imp->imp_lock);
- /*
- * Request already may be not on sending or delaying list. This
- * may happen in the case of marking it erroneous for the case
- * ptlrpc_import_delay_req(req, status) find it impossible to
- * allow sending this rpc and returns *status != 0.
- */
- if (!list_empty(&req->rq_list)) {
- list_del_init(&req->rq_list);
- atomic_dec(&imp->imp_inflight);
- }
- list_del_init(&req->rq_unreplied_list);
- spin_unlock(&imp->imp_lock);
-
- atomic_dec(&set->set_remaining);
- wake_up_all(&imp->imp_recovery_waitq);
-
- if (set->set_producer) {
- /* produce a new request if possible */
- if (ptlrpc_set_producer(set) > 0)
- force_timer_recalc = 1;
-
- /*
- * free the request that has just been completed
- * in order not to pollute set->set_requests
- */
- list_del_init(&req->rq_set_chain);
- spin_lock(&req->rq_lock);
- req->rq_set = NULL;
- req->rq_invalid_rqset = 0;
- spin_unlock(&req->rq_lock);
-
- /* record rq_status to compute the final status later */
- if (req->rq_status != 0)
- set->set_rc = req->rq_status;
- ptlrpc_req_finished(req);
- } else {
- list_move_tail(&req->rq_set_chain, &comp_reqs);
- }
- }
-
- /*
- * move completed request at the head of list so it's easier for
- * caller to find them
- */
- list_splice(&comp_reqs, &set->set_requests);
-
- /* If we hit an error, we want to recover promptly. */
- return atomic_read(&set->set_remaining) == 0 || force_timer_recalc;
-}
-EXPORT_SYMBOL(ptlrpc_check_set);
-
-/**
- * Time out request \a req. is \a async_unlink is set, that means do not wait
- * until LNet actually confirms network buffer unlinking.
- * Return 1 if we should give up further retrying attempts or 0 otherwise.
- */
-int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink)
-{
- struct obd_import *imp = req->rq_import;
- int rc = 0;
-
- spin_lock(&req->rq_lock);
- req->rq_timedout = 1;
- spin_unlock(&req->rq_lock);
-
- DEBUG_REQ(D_WARNING, req, "Request sent has %s: [sent %lld/real %lld]",
- req->rq_net_err ? "failed due to network error" :
- ((req->rq_real_sent == 0 ||
- req->rq_real_sent < req->rq_sent ||
- req->rq_real_sent >= req->rq_deadline) ?
- "timed out for sent delay" : "timed out for slow reply"),
- (s64)req->rq_sent, (s64)req->rq_real_sent);
-
- if (imp && obd_debug_peer_on_timeout)
- LNetDebugPeer(imp->imp_connection->c_peer);
-
- ptlrpc_unregister_reply(req, async_unlink);
- ptlrpc_unregister_bulk(req, async_unlink);
-
- if (obd_dump_on_timeout)
- libcfs_debug_dumplog();
-
- if (!imp) {
- DEBUG_REQ(D_HA, req, "NULL import: already cleaned up?");
- return 1;
- }
-
- atomic_inc(&imp->imp_timeouts);
-
- /* The DLM server doesn't want recovery run on its imports. */
- if (imp->imp_dlm_fake)
- return 1;
-
- /*
- * If this request is for recovery or other primordial tasks,
- * then error it out here.
- */
- if (req->rq_ctx_init || req->rq_ctx_fini ||
- req->rq_send_state != LUSTRE_IMP_FULL ||
- imp->imp_obd->obd_no_recov) {
- DEBUG_REQ(D_RPCTRACE, req, "err -110, sent_state=%s (now=%s)",
- ptlrpc_import_state_name(req->rq_send_state),
- ptlrpc_import_state_name(imp->imp_state));
- spin_lock(&req->rq_lock);
- req->rq_status = -ETIMEDOUT;
- req->rq_err = 1;
- spin_unlock(&req->rq_lock);
- return 1;
- }
-
- /*
- * if a request can't be resent we can't wait for an answer after
- * the timeout
- */
- if (ptlrpc_no_resend(req)) {
- DEBUG_REQ(D_RPCTRACE, req, "TIMEOUT-NORESEND:");
- rc = 1;
- }
-
- ptlrpc_fail_import(imp, lustre_msg_get_conn_cnt(req->rq_reqmsg));
-
- return rc;
-}
-
-/**
- * Time out all uncompleted requests in request set pointed by \a data
- * Called when wait_event_idle_timeout times out.
- */
-void ptlrpc_expired_set(struct ptlrpc_request_set *set)
-{
- struct ptlrpc_request *req;
- time64_t now = ktime_get_real_seconds();
-
- /* A timeout expired. See which reqs it applies to... */
- list_for_each_entry(req, &set->set_requests, rq_set_chain) {
-
- /* don't expire request waiting for context */
- if (req->rq_wait_ctx)
- continue;
-
- /* Request in-flight? */
- if (!((req->rq_phase == RQ_PHASE_RPC &&
- !req->rq_waiting && !req->rq_resend) ||
- (req->rq_phase == RQ_PHASE_BULK)))
- continue;
-
- if (req->rq_timedout || /* already dealt with */
- req->rq_deadline > now) /* not expired */
- continue;
-
- /*
- * Deal with this guy. Do it asynchronously to not block
- * ptlrpcd thread.
- */
- ptlrpc_expire_one_request(req, 1);
- }
-}
-
-/**
- * Sets rq_intr flag in \a req under spinlock.
- */
-void ptlrpc_mark_interrupted(struct ptlrpc_request *req)
-{
- spin_lock(&req->rq_lock);
- req->rq_intr = 1;
- spin_unlock(&req->rq_lock);
-}
-EXPORT_SYMBOL(ptlrpc_mark_interrupted);
-
-/**
- * Interrupts (sets interrupted flag) all uncompleted requests in
- * a set \a data. Called when l_wait_event_abortable_timeout receives signal.
- */
-static void ptlrpc_interrupted_set(struct ptlrpc_request_set *set)
-{
- struct ptlrpc_request *req;
- CDEBUG(D_RPCTRACE, "INTERRUPTED SET %p\n", set);
-
- list_for_each_entry(req, &set->set_requests, rq_set_chain) {
-
- if (req->rq_phase != RQ_PHASE_RPC &&
- req->rq_phase != RQ_PHASE_UNREG_RPC)
- continue;
-
- ptlrpc_mark_interrupted(req);
- }
-}
-
-/**
- * Get the smallest timeout in the set; this does NOT set a timeout.
- */
-int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set)
-{
- time64_t now = ktime_get_real_seconds();
- int timeout = 0;
- struct ptlrpc_request *req;
- time64_t deadline;
-
- list_for_each_entry(req, &set->set_requests, rq_set_chain) {
-
- /* Request in-flight? */
- if (!(((req->rq_phase == RQ_PHASE_RPC) && !req->rq_waiting) ||
- (req->rq_phase == RQ_PHASE_BULK) ||
- (req->rq_phase == RQ_PHASE_NEW)))
- continue;
-
- /* Already timed out. */
- if (req->rq_timedout)
- continue;
-
- /* Waiting for ctx. */
- if (req->rq_wait_ctx)
- continue;
-
- if (req->rq_phase == RQ_PHASE_NEW)
- deadline = req->rq_sent;
- else if (req->rq_phase == RQ_PHASE_RPC && req->rq_resend)
- deadline = req->rq_sent;
- else
- deadline = req->rq_sent + req->rq_timeout;
-
- if (deadline <= now) /* actually expired already */
- timeout = 1; /* ASAP */
- else if (timeout == 0 || timeout > deadline - now)
- timeout = deadline - now;
- }
- return timeout;
-}
-
-/**
- * Send all unset request from the set and then wait until all
- * requests in the set complete (either get a reply, timeout, get an
- * error or otherwise be interrupted).
- * Returns 0 on success or error code otherwise.
- */
-int ptlrpc_set_wait(struct ptlrpc_request_set *set)
-{
- struct ptlrpc_request *req;
- int rc, timeout;
-
- if (set->set_producer)
- (void)ptlrpc_set_producer(set);
- else
- list_for_each_entry(req, &set->set_requests, rq_set_chain) {
- if (req->rq_phase == RQ_PHASE_NEW)
- (void)ptlrpc_send_new_req(req);
- }
-
- if (list_empty(&set->set_requests))
- return 0;
-
- do {
- timeout = ptlrpc_set_next_timeout(set);
-
- /*
- * wait until all complete, interrupted, or an in-flight
- * req times out
- */
- CDEBUG(D_RPCTRACE, "set %p going to sleep for %d seconds\n",
- set, timeout);
-
- if (timeout == 0 && !signal_pending(current)) {
- /*
- * No requests are in-flight (ether timed out
- * or delayed), so we can allow interrupts.
- * We still want to block for a limited time,
- * so we allow interrupts during the timeout.
- */
- rc = l_wait_event_abortable_timeout(set->set_waitq,
- ptlrpc_check_set(NULL, set),
- HZ);
- if (rc == 0) {
- rc = -ETIMEDOUT;
- ptlrpc_expired_set(set);
- } else if (rc < 0) {
- rc = -EINTR;
- ptlrpc_interrupted_set(set);
- } else
- rc = 0;
- } else {
- /*
- * At least one request is in flight, so no
- * interrupts are allowed. Wait until all
- * complete, or an in-flight req times out.
- */
- rc = wait_event_idle_timeout(set->set_waitq,
- ptlrpc_check_set(NULL, set),
- (timeout ? timeout : 1) * HZ);
- if (rc == 0) {
- ptlrpc_expired_set(set);
- rc = -ETIMEDOUT;
- /*
- * LU-769 - if we ignored the signal
- * because it was already pending when
- * we started, we need to handle it
- * now or we risk it being ignored
- * forever
- */
- if (l_fatal_signal_pending(current))
- ptlrpc_interrupted_set(set);
- } else
- rc = 0;
- }
-
- LASSERT(rc == 0 || rc == -EINTR || rc == -ETIMEDOUT);
-
- /*
- * -EINTR => all requests have been flagged rq_intr so next
- * check completes.
- * -ETIMEDOUT => someone timed out. When all reqs have
- * timed out, signals are enabled allowing completion with
- * EINTR.
- * I don't really care if we go once more round the loop in
- * the error cases -eeb.
- */
- if (rc == 0 && atomic_read(&set->set_remaining) == 0) {
- list_for_each_entry(req, &set->set_requests, rq_set_chain) {
- spin_lock(&req->rq_lock);
- req->rq_invalid_rqset = 1;
- spin_unlock(&req->rq_lock);
- }
- }
- } while (rc != 0 || atomic_read(&set->set_remaining) != 0);
-
- LASSERT(atomic_read(&set->set_remaining) == 0);
-
- rc = set->set_rc; /* rq_status of already freed requests if any */
- list_for_each_entry(req, &set->set_requests, rq_set_chain) {
- LASSERT(req->rq_phase == RQ_PHASE_COMPLETE);
- if (req->rq_status != 0)
- rc = req->rq_status;
- }
-
- if (set->set_interpret) {
- int (*interpreter)(struct ptlrpc_request_set *set, void *, int) =
- set->set_interpret;
- rc = interpreter(set, set->set_arg, rc);
- } else {
- struct ptlrpc_set_cbdata *cbdata, *n;
- int err;
-
- list_for_each_entry_safe(cbdata, n,
- &set->set_cblist, psc_item) {
- list_del_init(&cbdata->psc_item);
- err = cbdata->psc_interpret(set, cbdata->psc_data, rc);
- if (err && !rc)
- rc = err;
- kfree(cbdata);
- }
- }
-
- return rc;
-}
-EXPORT_SYMBOL(ptlrpc_set_wait);
-
-/**
- * Helper function for request freeing.
- * Called when request count reached zero and request needs to be freed.
- * Removes request from all sorts of sending/replay lists it might be on,
- * frees network buffers if any are present.
- * If \a locked is set, that means caller is already holding import imp_lock
- * and so we no longer need to reobtain it (for certain lists manipulations)
- */
-static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
-{
- if (!request)
- return;
- LASSERT(!request->rq_srv_req);
- LASSERT(!request->rq_export);
- LASSERTF(!request->rq_receiving_reply, "req %p\n", request);
- LASSERTF(list_empty(&request->rq_list), "req %p\n", request);
- LASSERTF(list_empty(&request->rq_set_chain), "req %p\n", request);
- LASSERTF(!request->rq_replay, "req %p\n", request);
-
- req_capsule_fini(&request->rq_pill);
-
- /*
- * We must take it off the imp_replay_list first. Otherwise, we'll set
- * request->rq_reqmsg to NULL while osc_close is dereferencing it.
- */
- if (request->rq_import) {
- if (!locked)
- spin_lock(&request->rq_import->imp_lock);
- list_del_init(&request->rq_replay_list);
- list_del_init(&request->rq_unreplied_list);
- if (!locked)
- spin_unlock(&request->rq_import->imp_lock);
- }
- LASSERTF(list_empty(&request->rq_replay_list), "req %p\n", request);
-
- if (atomic_read(&request->rq_refcount) != 0) {
- DEBUG_REQ(D_ERROR, request,
- "freeing request with nonzero refcount");
- LBUG();
- }
-
- if (request->rq_repbuf)
- sptlrpc_cli_free_repbuf(request);
-
- if (request->rq_import) {
- class_import_put(request->rq_import);
- request->rq_import = NULL;
- }
- if (request->rq_bulk)
- ptlrpc_free_bulk(request->rq_bulk);
-
- if (request->rq_reqbuf || request->rq_clrbuf)
- sptlrpc_cli_free_reqbuf(request);
-
- if (request->rq_cli_ctx)
- sptlrpc_req_put_ctx(request, !locked);
-
- if (request->rq_pool)
- __ptlrpc_free_req_to_pool(request);
- else
- ptlrpc_request_cache_free(request);
-}
-
-/**
- * Helper function
- * Drops one reference count for request \a request.
- * \a locked set indicates that caller holds import imp_lock.
- * Frees the request when reference count reaches zero.
- */
-static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked)
-{
- if (!request)
- return 1;
-
- if (request == LP_POISON ||
- request->rq_reqmsg == LP_POISON) {
- CERROR("dereferencing freed request (bug 575)\n");
- LBUG();
- return 1;
- }
-
- DEBUG_REQ(D_INFO, request, "refcount now %u",
- atomic_read(&request->rq_refcount) - 1);
-
- if (atomic_dec_and_test(&request->rq_refcount)) {
- __ptlrpc_free_req(request, locked);
- return 1;
- }
-
- return 0;
-}
-
-/**
- * Drops one reference count for a request.
- */
-void ptlrpc_req_finished(struct ptlrpc_request *request)
-{
- __ptlrpc_req_finished(request, 0);
-}
-EXPORT_SYMBOL(ptlrpc_req_finished);
-
-/**
- * Returns xid of a \a request
- */
-__u64 ptlrpc_req_xid(struct ptlrpc_request *request)
-{
- return request->rq_xid;
-}
-EXPORT_SYMBOL(ptlrpc_req_xid);
-
-/**
- * Disengage the client's reply buffer from the network
- * NB does _NOT_ unregister any client-side bulk.
- * IDEMPOTENT, but _not_ safe against concurrent callers.
- * The request owner (i.e. the thread doing the I/O) must call...
- * Returns 0 on success or 1 if unregistering cannot be made.
- */
-static int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
-{
- int rc;
- wait_queue_head_t *wq;
-
- /* Might sleep. */
- LASSERT(!in_interrupt());
-
- /* Let's setup deadline for reply unlink. */
- if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
- async && request->rq_reply_deadline == 0 && cfs_fail_val == 0)
- request->rq_reply_deadline =
- ktime_get_real_seconds() + LONG_UNLINK;
-
- /* Nothing left to do. */
- if (!ptlrpc_client_recv_or_unlink(request))
- return 1;
-
- LNetMDUnlink(request->rq_reply_md_h);
-
- /* Let's check it once again. */
- if (!ptlrpc_client_recv_or_unlink(request))
- return 1;
-
- /* Move to "Unregistering" phase as reply was not unlinked yet. */
- ptlrpc_rqphase_move(request, RQ_PHASE_UNREG_RPC);
-
- /* Do not wait for unlink to finish. */
- if (async)
- return 0;
-
- /*
- * We have to wait_event_idle_timeout() whatever the result, to give liblustre
- * a chance to run reply_in_callback(), and to make sure we've
- * unlinked before returning a req to the pool.
- */
- if (request->rq_set)
- wq = &request->rq_set->set_waitq;
- else
- wq = &request->rq_reply_waitq;
-
- for (;;) {
- /*
- * Network access will complete in finite time but the HUGE
- * timeout lets us CWARN for visibility of sluggish NALs
- */
- int cnt = 0;
- while (cnt < LONG_UNLINK &&
- (rc = wait_event_idle_timeout(*wq,
- !ptlrpc_client_recv_or_unlink(request),
- HZ)) == 0)
- cnt += 1;
- if (rc > 0) {
- ptlrpc_rqphase_move(request, request->rq_next_phase);
- return 1;
- }
-
- DEBUG_REQ(D_WARNING, request,
- "Unexpectedly long timeout receiving_reply=%d req_unlinked=%d reply_unlinked=%d",
- request->rq_receiving_reply,
- request->rq_req_unlinked,
- request->rq_reply_unlinked);
- }
- return 0;
-}
-
-static void ptlrpc_free_request(struct ptlrpc_request *req)
-{
- spin_lock(&req->rq_lock);
- req->rq_replay = 0;
- spin_unlock(&req->rq_lock);
-
- if (req->rq_commit_cb)
- req->rq_commit_cb(req);
- list_del_init(&req->rq_replay_list);
-
- __ptlrpc_req_finished(req, 1);
-}
-
-/**
- * the request is committed and dropped from the replay list of its import
- */
-void ptlrpc_request_committed(struct ptlrpc_request *req, int force)
-{
- struct obd_import *imp = req->rq_import;
-
- spin_lock(&imp->imp_lock);
- if (list_empty(&req->rq_replay_list)) {
- spin_unlock(&imp->imp_lock);
- return;
- }
-
- if (force || req->rq_transno <= imp->imp_peer_committed_transno)
- ptlrpc_free_request(req);
-
- spin_unlock(&imp->imp_lock);
-}
-EXPORT_SYMBOL(ptlrpc_request_committed);
-
-/**
- * Iterates through replay_list on import and prunes
- * all requests have transno smaller than last_committed for the
- * import and don't have rq_replay set.
- * Since requests are sorted in transno order, stops when meeting first
- * transno bigger than last_committed.
- * caller must hold imp->imp_lock
- */
-void ptlrpc_free_committed(struct obd_import *imp)
-{
- struct ptlrpc_request *req, *saved;
- struct ptlrpc_request *last_req = NULL; /* temporary fire escape */
- bool skip_committed_list = true;
-
- assert_spin_locked(&imp->imp_lock);
-
- if (imp->imp_peer_committed_transno == imp->imp_last_transno_checked &&
- imp->imp_generation == imp->imp_last_generation_checked) {
- CDEBUG(D_INFO, "%s: skip recheck: last_committed %llu\n",
- imp->imp_obd->obd_name, imp->imp_peer_committed_transno);
- return;
- }
- CDEBUG(D_RPCTRACE, "%s: committing for last_committed %llu gen %d\n",
- imp->imp_obd->obd_name, imp->imp_peer_committed_transno,
- imp->imp_generation);
-
- if (imp->imp_generation != imp->imp_last_generation_checked ||
- !imp->imp_last_transno_checked)
- skip_committed_list = false;
-
- imp->imp_last_transno_checked = imp->imp_peer_committed_transno;
- imp->imp_last_generation_checked = imp->imp_generation;
-
- list_for_each_entry_safe(req, saved, &imp->imp_replay_list,
- rq_replay_list) {
- /* XXX ok to remove when 1357 resolved - rread 05/29/03 */
- LASSERT(req != last_req);
- last_req = req;
-
- if (req->rq_transno == 0) {
- DEBUG_REQ(D_EMERG, req, "zero transno during replay");
- LBUG();
- }
- if (req->rq_import_generation < imp->imp_generation) {
- DEBUG_REQ(D_RPCTRACE, req, "free request with old gen");
- goto free_req;
- }
-
- /* not yet committed */
- if (req->rq_transno > imp->imp_peer_committed_transno) {
- DEBUG_REQ(D_RPCTRACE, req, "stopping search");
- break;
- }
-
- if (req->rq_replay) {
- DEBUG_REQ(D_RPCTRACE, req, "keeping (FL_REPLAY)");
- list_move_tail(&req->rq_replay_list,
- &imp->imp_committed_list);
- continue;
- }
-
- DEBUG_REQ(D_INFO, req, "commit (last_committed %llu)",
- imp->imp_peer_committed_transno);
-free_req:
- ptlrpc_free_request(req);
- }
- if (skip_committed_list)
- return;
-
- list_for_each_entry_safe(req, saved, &imp->imp_committed_list,
- rq_replay_list) {
- LASSERT(req->rq_transno != 0);
- if (req->rq_import_generation < imp->imp_generation ||
- !req->rq_replay) {
- DEBUG_REQ(D_RPCTRACE, req, "free %s open request",
- req->rq_import_generation <
- imp->imp_generation ? "stale" : "closed");
-
- if (imp->imp_replay_cursor == &req->rq_replay_list)
- imp->imp_replay_cursor =
- req->rq_replay_list.next;
-
- ptlrpc_free_request(req);
- }
- }
-}
-
-/**
- * Schedule previously sent request for resend.
- * For bulk requests we assign new xid (to avoid problems with
- * lost replies and therefore several transfers landing into same buffer
- * from different sending attempts).
- */
-void ptlrpc_resend_req(struct ptlrpc_request *req)
-{
- DEBUG_REQ(D_HA, req, "going to resend");
- spin_lock(&req->rq_lock);
-
- /*
- * Request got reply but linked to the import list still.
- * Let ptlrpc_check_set() to process it.
- */
- if (ptlrpc_client_replied(req)) {
- spin_unlock(&req->rq_lock);
- DEBUG_REQ(D_HA, req, "it has reply, so skip it");
- return;
- }
-
- lustre_msg_set_handle(req->rq_reqmsg, &(struct lustre_handle){ 0 });
- req->rq_status = -EAGAIN;
-
- req->rq_resend = 1;
- req->rq_net_err = 0;
- req->rq_timedout = 0;
- ptlrpc_client_wake_req(req);
- spin_unlock(&req->rq_lock);
-}
-
-/**
- * Grab additional reference on a request \a req
- */
-struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req)
-{
- atomic_inc(&req->rq_refcount);
- return req;
-}
-EXPORT_SYMBOL(ptlrpc_request_addref);
-
-/**
- * Add a request to import replay_list.
- * Must be called under imp_lock
- */
-void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
- struct obd_import *imp)
-{
- struct ptlrpc_request *iter;
- assert_spin_locked(&imp->imp_lock);
-
- if (req->rq_transno == 0) {
- DEBUG_REQ(D_EMERG, req, "saving request with zero transno");
- LBUG();
- }
-
- /*
- * clear this for new requests that were resent as well
- * as resent replayed requests.
- */
- lustre_msg_clear_flags(req->rq_reqmsg, MSG_RESENT);
-
- /* don't re-add requests that have been replayed */
- if (!list_empty(&req->rq_replay_list))
- return;
-
- lustre_msg_add_flags(req->rq_reqmsg, MSG_REPLAY);
-
- spin_lock(&req->rq_lock);
- req->rq_resend = 0;
- spin_unlock(&req->rq_lock);
-
- LASSERT(imp->imp_replayable);
- /* Balanced in ptlrpc_free_committed, usually. */
- ptlrpc_request_addref(req);
- list_for_each_entry_reverse(iter, &imp->imp_replay_list, rq_replay_list) {
- /*
- * We may have duplicate transnos if we create and then
- * open a file, or for closes retained if to match creating
- * opens, so use req->rq_xid as a secondary key.
- * (See bugs 684, 685, and 428.)
- * XXX no longer needed, but all opens need transnos!
- */
- if (iter->rq_transno > req->rq_transno)
- continue;
-
- if (iter->rq_transno == req->rq_transno) {
- LASSERT(iter->rq_xid != req->rq_xid);
- if (iter->rq_xid > req->rq_xid)
- continue;
- }
-
- list_add(&req->rq_replay_list, &iter->rq_replay_list);
- return;
- }
-
- list_add(&req->rq_replay_list, &imp->imp_replay_list);
-}
-
-/**
- * Send request and wait until it completes.
- * Returns request processing status.
- */
-int ptlrpc_queue_wait(struct ptlrpc_request *req)
-{
- struct ptlrpc_request_set *set;
- int rc;
-
- LASSERT(!req->rq_set);
- LASSERT(!req->rq_receiving_reply);
-
- set = ptlrpc_prep_set();
- if (!set) {
- CERROR("cannot allocate ptlrpc set: rc = %d\n", -ENOMEM);
- return -ENOMEM;
- }
-
- /* for distributed debugging */
- lustre_msg_set_status(req->rq_reqmsg, current->pid);
-
- /* add a ref for the set (see comment in ptlrpc_set_add_req) */
- ptlrpc_request_addref(req);
- ptlrpc_set_add_req(set, req);
- rc = ptlrpc_set_wait(set);
- ptlrpc_set_destroy(set);
-
- return rc;
-}
-EXPORT_SYMBOL(ptlrpc_queue_wait);
-
-/**
- * Callback used for replayed requests reply processing.
- * In case of successful reply calls registered request replay callback.
- * In case of error restart replay process.
- */
-static int ptlrpc_replay_interpret(const struct lu_env *env,
- struct ptlrpc_request *req,
- void *data, int rc)
-{
- struct ptlrpc_replay_async_args *aa = data;
- struct obd_import *imp = req->rq_import;
-
- atomic_dec(&imp->imp_replay_inflight);
-
- /*
- * Note: if it is bulk replay (MDS-MDS replay), then even if
- * server got the request, but bulk transfer timeout, let's
- * replay the bulk req again
- */
- if (!ptlrpc_client_replied(req) ||
- (req->rq_bulk &&
- lustre_msg_get_status(req->rq_repmsg) == -ETIMEDOUT)) {
- DEBUG_REQ(D_ERROR, req, "request replay timed out.\n");
- rc = -ETIMEDOUT;
- goto out;
- }
-
- if (lustre_msg_get_type(req->rq_repmsg) == PTL_RPC_MSG_ERR &&
- (lustre_msg_get_status(req->rq_repmsg) == -ENOTCONN ||
- lustre_msg_get_status(req->rq_repmsg) == -ENODEV)) {
- rc = lustre_msg_get_status(req->rq_repmsg);
- goto out;
- }
-
- /** VBR: check version failure */
- if (lustre_msg_get_status(req->rq_repmsg) == -EOVERFLOW) {
- /** replay was failed due to version mismatch */
- DEBUG_REQ(D_WARNING, req, "Version mismatch during replay\n");
- spin_lock(&imp->imp_lock);
- imp->imp_vbr_failed = 1;
- imp->imp_no_lock_replay = 1;
- spin_unlock(&imp->imp_lock);
- lustre_msg_set_status(req->rq_repmsg, aa->praa_old_status);
- } else {
- /** The transno had better not change over replay. */
- LASSERTF(lustre_msg_get_transno(req->rq_reqmsg) ==
- lustre_msg_get_transno(req->rq_repmsg) ||
- lustre_msg_get_transno(req->rq_repmsg) == 0,
- "%#llx/%#llx\n",
- lustre_msg_get_transno(req->rq_reqmsg),
- lustre_msg_get_transno(req->rq_repmsg));
- }
-
- spin_lock(&imp->imp_lock);
- /** if replays by version then gap occur on server, no trust to locks */
- if (lustre_msg_get_flags(req->rq_repmsg) & MSG_VERSION_REPLAY)
- imp->imp_no_lock_replay = 1;
- imp->imp_last_replay_transno = lustre_msg_get_transno(req->rq_reqmsg);
- spin_unlock(&imp->imp_lock);
- LASSERT(imp->imp_last_replay_transno);
-
- /* transaction number shouldn't be bigger than the latest replayed */
- if (req->rq_transno > lustre_msg_get_transno(req->rq_reqmsg)) {
- DEBUG_REQ(D_ERROR, req,
- "Reported transno %llu is bigger than the replayed one: %llu",
- req->rq_transno,
- lustre_msg_get_transno(req->rq_reqmsg));
- rc = -EINVAL;
- goto out;
- }
-
- DEBUG_REQ(D_HA, req, "got rep");
-
- /* let the callback do fixups, possibly including in the request */
- if (req->rq_replay_cb)
- req->rq_replay_cb(req);
-
- if (ptlrpc_client_replied(req) &&
- lustre_msg_get_status(req->rq_repmsg) != aa->praa_old_status) {
- DEBUG_REQ(D_ERROR, req, "status %d, old was %d",
- lustre_msg_get_status(req->rq_repmsg),
- aa->praa_old_status);
- } else {
- /* Put it back for re-replay. */
- lustre_msg_set_status(req->rq_repmsg, aa->praa_old_status);
- }
-
- /*
- * Errors while replay can set transno to 0, but
- * imp_last_replay_transno shouldn't be set to 0 anyway
- */
- if (req->rq_transno == 0)
- CERROR("Transno is 0 during replay!\n");
-
- /* continue with recovery */
- rc = ptlrpc_import_recovery_state_machine(imp);
- out:
- req->rq_send_state = aa->praa_old_state;
-
- if (rc != 0)
- /* this replay failed, so restart recovery */
- ptlrpc_connect_import(imp);
-
- return rc;
-}
-
-/**
- * Prepares and queues request for replay.
- * Adds it to ptlrpcd queue for actual sending.
- * Returns 0 on success.
- */
-int ptlrpc_replay_req(struct ptlrpc_request *req)
-{
- struct ptlrpc_replay_async_args *aa;
-
- LASSERT(req->rq_import->imp_state == LUSTRE_IMP_REPLAY);
-
- LASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
- aa = ptlrpc_req_async_args(req);
- memset(aa, 0, sizeof(*aa));
-
- /* Prepare request to be resent with ptlrpcd */
- aa->praa_old_state = req->rq_send_state;
- req->rq_send_state = LUSTRE_IMP_REPLAY;
- req->rq_phase = RQ_PHASE_NEW;
- req->rq_next_phase = RQ_PHASE_UNDEFINED;
- if (req->rq_repmsg)
- aa->praa_old_status = lustre_msg_get_status(req->rq_repmsg);
- req->rq_status = 0;
- req->rq_interpret_reply = ptlrpc_replay_interpret;
- /* Readjust the timeout for current conditions */
- ptlrpc_at_set_req_timeout(req);
-
- /*
- * Tell server the net_latency, so the server can calculate how long
- * it should wait for next replay
- */
- lustre_msg_set_service_time(req->rq_reqmsg,
- ptlrpc_at_get_net_latency(req));
- DEBUG_REQ(D_HA, req, "REPLAY");
-
- atomic_inc(&req->rq_import->imp_replay_inflight);
- ptlrpc_request_addref(req); /* ptlrpcd needs a ref */
-
- ptlrpcd_add_req(req);
- return 0;
-}
-
-/**
- * Aborts all in-flight request on import \a imp sending and delayed lists
- */
-void ptlrpc_abort_inflight(struct obd_import *imp)
-{
- struct ptlrpc_request *req, *n;
-
- /*
- * Make sure that no new requests get processed for this import.
- * ptlrpc_{queue,set}_wait must (and does) hold imp_lock while testing
- * this flag and then putting requests on sending_list or delayed_list.
- */
- spin_lock(&imp->imp_lock);
-
- /*
- * XXX locking? Maybe we should remove each request with the list
- * locked? Also, how do we know if the requests on the list are
- * being freed at this time?
- */
- list_for_each_entry_safe(req, n, &imp->imp_sending_list, rq_list) {
- DEBUG_REQ(D_RPCTRACE, req, "inflight");
-
- spin_lock(&req->rq_lock);
- if (req->rq_import_generation < imp->imp_generation) {
- req->rq_err = 1;
- req->rq_status = -EIO;
- ptlrpc_client_wake_req(req);
- }
- spin_unlock(&req->rq_lock);
- }
-
- list_for_each_entry_safe(req, n, &imp->imp_delayed_list, rq_list) {
- DEBUG_REQ(D_RPCTRACE, req, "aborting waiting req");
-
- spin_lock(&req->rq_lock);
- if (req->rq_import_generation < imp->imp_generation) {
- req->rq_err = 1;
- req->rq_status = -EIO;
- ptlrpc_client_wake_req(req);
- }
- spin_unlock(&req->rq_lock);
- }
-
- /*
- * Last chance to free reqs left on the replay list, but we
- * will still leak reqs that haven't committed.
- */
- if (imp->imp_replayable)
- ptlrpc_free_committed(imp);
-
- spin_unlock(&imp->imp_lock);
-}
-
-/**
- * Abort all uncompleted requests in request set \a set
- */
-void ptlrpc_abort_set(struct ptlrpc_request_set *set)
-{
- struct ptlrpc_request *req, *tmp;
-
- list_for_each_entry_safe(req, tmp, &set->set_requests, rq_set_chain) {
- spin_lock(&req->rq_lock);
- if (req->rq_phase != RQ_PHASE_RPC) {
- spin_unlock(&req->rq_lock);
- continue;
- }
-
- req->rq_err = 1;
- req->rq_status = -EINTR;
- ptlrpc_client_wake_req(req);
- spin_unlock(&req->rq_lock);
- }
-}
-
-static __u64 ptlrpc_last_xid;
-static spinlock_t ptlrpc_last_xid_lock;
-
-/**
- * Initialize the XID for the node. This is common among all requests on
- * this node, and only requires the property that it is monotonically
- * increasing. It does not need to be sequential. Since this is also used
- * as the RDMA match bits, it is important that a single client NOT have
- * the same match bits for two different in-flight requests, hence we do
- * NOT want to have an XID per target or similar.
- *
- * To avoid an unlikely collision between match bits after a client reboot
- * (which would deliver old data into the wrong RDMA buffer) initialize
- * the XID based on the current time, assuming a maximum RPC rate of 1M RPC/s.
- * If the time is clearly incorrect, we instead use a 62-bit random number.
- * In the worst case the random number will overflow 1M RPCs per second in
- * 9133 years, or permutations thereof.
- */
-#define YEAR_2004 (1ULL << 30)
-void ptlrpc_init_xid(void)
-{
- time64_t now = ktime_get_real_seconds();
-
- spin_lock_init(&ptlrpc_last_xid_lock);
- if (now < YEAR_2004) {
- get_random_bytes(&ptlrpc_last_xid, sizeof(ptlrpc_last_xid));
- ptlrpc_last_xid >>= 2;
- ptlrpc_last_xid |= (1ULL << 61);
- } else {
- ptlrpc_last_xid = (__u64)now << 20;
- }
-
- /* Always need to be aligned to a power-of-two for multi-bulk BRW */
- BUILD_BUG_ON(((PTLRPC_BULK_OPS_COUNT - 1) & PTLRPC_BULK_OPS_COUNT) != 0);
- ptlrpc_last_xid &= PTLRPC_BULK_OPS_MASK;
-}
-
-/**
- * Increase xid and returns resulting new value to the caller.
- *
- * Multi-bulk BRW RPCs consume multiple XIDs for each bulk transfer, starting
- * at the returned xid, up to xid + PTLRPC_BULK_OPS_COUNT - 1. The BRW RPC
- * itself uses the last bulk xid needed, so the server can determine the
- * the number of bulk transfers from the RPC XID and a bitmask. The starting
- * xid must align to a power-of-two value.
- *
- * This is assumed to be true due to the initial ptlrpc_last_xid
- * value also being initialized to a power-of-two value. LU-1431
- */
-__u64 ptlrpc_next_xid(void)
-{
- __u64 next;
-
- spin_lock(&ptlrpc_last_xid_lock);
- next = ptlrpc_last_xid + PTLRPC_BULK_OPS_COUNT;
- ptlrpc_last_xid = next;
- spin_unlock(&ptlrpc_last_xid_lock);
-
- return next;
-}
-
-/**
- * If request has a new allocated XID (new request or EINPROGRESS resend),
- * use this XID as matchbits of bulk, otherwise allocate a new matchbits for
- * request to ensure previous bulk fails and avoid problems with lost replies
- * and therefore several transfers landing into the same buffer from different
- * sending attempts.
- */
-void ptlrpc_set_bulk_mbits(struct ptlrpc_request *req)
-{
- struct ptlrpc_bulk_desc *bd = req->rq_bulk;
-
- LASSERT(bd);
-
- /*
- * Generate new matchbits for all resend requests, including
- * resend replay.
- */
- if (req->rq_resend) {
- u64 old_mbits = req->rq_mbits;
-
- /*
- * First time resend on -EINPROGRESS will generate new xid,
- * so we can actually use the rq_xid as rq_mbits in such case,
- * however, it's bit hard to distinguish such resend with a
- * 'resend for the -EINPROGRESS resend'. To make it simple,
- * we opt to generate mbits for all resend cases.
- */
- if ((bd->bd_import->imp_connect_data.ocd_connect_flags &
- OBD_CONNECT_BULK_MBITS)) {
- req->rq_mbits = ptlrpc_next_xid();
- } else {
- /* old version transfers rq_xid to peer as matchbits */
- spin_lock(&req->rq_import->imp_lock);
- list_del_init(&req->rq_unreplied_list);
- ptlrpc_assign_next_xid_nolock(req);
- spin_unlock(&req->rq_import->imp_lock);
- req->rq_mbits = req->rq_xid;
- }
-
- CDEBUG(D_HA, "resend bulk old x%llu new x%llu\n",
- old_mbits, req->rq_mbits);
- } else if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)) {
- /* Request being sent first time, use xid as matchbits. */
- req->rq_mbits = req->rq_xid;
- } else {
- /*
- * Replay request, xid and matchbits have already been
- * correctly assigned.
- */
- return;
- }
-
- /*
- * For multi-bulk RPCs, rq_mbits is the last mbits needed for bulks so
- * that server can infer the number of bulks that were prepared,
- * see LU-1431
- */
- req->rq_mbits += DIV_ROUND_UP(bd->bd_iov_count, LNET_MAX_IOV) - 1;
-}
-
-/**
- * Get a glimpse at what next xid value might have been.
- * Returns possible next xid.
- */
-__u64 ptlrpc_sample_next_xid(void)
-{
-#if BITS_PER_LONG == 32
- /* need to avoid possible word tearing on 32-bit systems */
- __u64 next;
-
- spin_lock(&ptlrpc_last_xid_lock);
- next = ptlrpc_last_xid + PTLRPC_BULK_OPS_COUNT;
- spin_unlock(&ptlrpc_last_xid_lock);
-
- return next;
-#else
- /* No need to lock, since returned value is racy anyways */
- return ptlrpc_last_xid + PTLRPC_BULK_OPS_COUNT;
-#endif
-}
-EXPORT_SYMBOL(ptlrpc_sample_next_xid);
-
-/**
- * Functions for operating ptlrpc workers.
- *
- * A ptlrpc work is a function which will be running inside ptlrpc context.
- * The callback shouldn't sleep otherwise it will block that ptlrpcd thread.
- *
- * 1. after a work is created, it can be used many times, that is:
- * handler = ptlrpcd_alloc_work();
- * ptlrpcd_queue_work();
- *
- * queue it again when necessary:
- * ptlrpcd_queue_work();
- * ptlrpcd_destroy_work();
- * 2. ptlrpcd_queue_work() can be called by multiple processes meanwhile, but
- * it will only be queued once in any time. Also as its name implies, it may
- * have delay before it really runs by ptlrpcd thread.
- */
-struct ptlrpc_work_async_args {
- int (*cb)(const struct lu_env *, void *);
- void *cbdata;
-};
-
-static void ptlrpcd_add_work_req(struct ptlrpc_request *req)
-{
- /* re-initialize the req */
- req->rq_timeout = obd_timeout;
- req->rq_sent = ktime_get_real_seconds();
- req->rq_deadline = req->rq_sent + req->rq_timeout;
- req->rq_phase = RQ_PHASE_INTERPRET;
- req->rq_next_phase = RQ_PHASE_COMPLETE;
- req->rq_xid = ptlrpc_next_xid();
- req->rq_import_generation = req->rq_import->imp_generation;
-
- ptlrpcd_add_req(req);
-}
-
-static int work_interpreter(const struct lu_env *env,
- struct ptlrpc_request *req, void *data, int rc)
-{
- struct ptlrpc_work_async_args *arg = data;
-
- LASSERT(ptlrpcd_check_work(req));
-
- rc = arg->cb(env, arg->cbdata);
-
- list_del_init(&req->rq_set_chain);
- req->rq_set = NULL;
-
- if (atomic_dec_return(&req->rq_refcount) > 1) {
- atomic_set(&req->rq_refcount, 2);
- ptlrpcd_add_work_req(req);
- }
- return rc;
-}
-
-static int worker_format;
-
-static int ptlrpcd_check_work(struct ptlrpc_request *req)
-{
- return req->rq_pill.rc_fmt == (void *)&worker_format;
-}
-
-/**
- * Create a work for ptlrpc.
- */
-void *ptlrpcd_alloc_work(struct obd_import *imp,
- int (*cb)(const struct lu_env *, void *), void *cbdata)
-{
- struct ptlrpc_request *req = NULL;
- struct ptlrpc_work_async_args *args;
-
- might_sleep();
-
- if (!cb)
- return ERR_PTR(-EINVAL);
-
- /* copy some code from deprecated fakereq. */
- req = ptlrpc_request_cache_alloc(GFP_NOFS);
- if (!req) {
- CERROR("ptlrpc: run out of memory!\n");
- return ERR_PTR(-ENOMEM);
- }
-
- ptlrpc_cli_req_init(req);
-
- req->rq_send_state = LUSTRE_IMP_FULL;
- req->rq_type = PTL_RPC_MSG_REQUEST;
- req->rq_import = class_import_get(imp);
- req->rq_interpret_reply = work_interpreter;
- /* don't want reply */
- req->rq_no_delay = 1;
- req->rq_no_resend = 1;
- req->rq_pill.rc_fmt = (void *)&worker_format;
-
- BUILD_BUG_ON(sizeof(*args) > sizeof(req->rq_async_args));
- args = ptlrpc_req_async_args(req);
- args->cb = cb;
- args->cbdata = cbdata;
-
- return req;
-}
-EXPORT_SYMBOL(ptlrpcd_alloc_work);
-
-void ptlrpcd_destroy_work(void *handler)
-{
- struct ptlrpc_request *req = handler;
-
- if (req)
- ptlrpc_req_finished(req);
-}
-EXPORT_SYMBOL(ptlrpcd_destroy_work);
-
-int ptlrpcd_queue_work(void *handler)
-{
- struct ptlrpc_request *req = handler;
-
- /*
- * Check if the req is already being queued.
- *
- * Here comes a trick: it lacks a way of checking if a req is being
- * processed reliably in ptlrpc. Here I have to use refcount of req
- * for this purpose. This is okay because the caller should use this
- * req as opaque data. - Jinshan
- */
- LASSERT(atomic_read(&req->rq_refcount) > 0);
- if (atomic_inc_return(&req->rq_refcount) == 2)
- ptlrpcd_add_work_req(req);
- return 0;
-}
-EXPORT_SYMBOL(ptlrpcd_queue_work);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/connection.c b/drivers/staging/lustre/lustre/ptlrpc/connection.c
deleted file mode 100644
index fb35a89ca6c6..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/connection.c
+++ /dev/null
@@ -1,192 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_RPC
-#include <obd_support.h>
-#include <obd_class.h>
-#include <lustre_net.h>
-
-#include "ptlrpc_internal.h"
-
-static struct rhashtable conn_hash;
-
-/*
- * struct lnet_process_id may contain unassigned bytes which might not
- * be zero, so we cannot just hash and compare bytes.
- */
-
-static u32 lnet_process_id_hash(const void *data, u32 len, u32 seed)
-{
- const struct lnet_process_id *lpi = data;
-
- seed = hash_32(seed ^ lpi->pid, 32);
- seed ^= hash_64(lpi->nid, 32);
- return seed;
-}
-
-static int lnet_process_id_cmp(struct rhashtable_compare_arg *arg,
- const void *obj)
-{
- const struct lnet_process_id *lpi = arg->key;
- const struct ptlrpc_connection *con = obj;
-
- if (lpi->nid == con->c_peer.nid &&
- lpi->pid == con->c_peer.pid)
- return 0;
- return -ESRCH;
-}
-
-static const struct rhashtable_params conn_hash_params = {
- .key_len = 1, /* actually variable-length */
- .key_offset = offsetof(struct ptlrpc_connection, c_peer),
- .head_offset = offsetof(struct ptlrpc_connection, c_hash),
- .hashfn = lnet_process_id_hash,
- .obj_cmpfn = lnet_process_id_cmp,
-};
-
-struct ptlrpc_connection *
-ptlrpc_connection_get(struct lnet_process_id peer, lnet_nid_t self,
- struct obd_uuid *uuid)
-{
- struct ptlrpc_connection *conn, *conn2;
-
- conn = rhashtable_lookup_fast(&conn_hash, &peer, conn_hash_params);
- if (conn) {
- ptlrpc_connection_addref(conn);
- goto out;
- }
-
- conn = kzalloc(sizeof(*conn), GFP_NOFS);
- if (!conn)
- return NULL;
-
- conn->c_peer = peer;
- conn->c_self = self;
- atomic_set(&conn->c_refcount, 1);
- if (uuid)
- obd_str2uuid(&conn->c_remote_uuid, uuid->uuid);
-
- /*
- * Add the newly created conn to the hash, on key collision we
- * lost a racing addition and must destroy our newly allocated
- * connection. The object which exists in the hash will be
- * returned, otherwise NULL is returned on success.
- */
- conn2 = rhashtable_lookup_get_insert_fast(&conn_hash, &conn->c_hash,
- conn_hash_params);
- if (conn2 != NULL) {
- /* insertion failed */
- kfree(conn);
- if (IS_ERR(conn2))
- return NULL;
- conn = conn2;
- ptlrpc_connection_addref(conn);
- }
-out:
- CDEBUG(D_INFO, "conn=%p refcount %d to %s\n",
- conn, atomic_read(&conn->c_refcount),
- libcfs_nid2str(conn->c_peer.nid));
- return conn;
-}
-
-int ptlrpc_connection_put(struct ptlrpc_connection *conn)
-{
- int rc = 0;
-
- if (!conn)
- return rc;
-
- LASSERT(atomic_read(&conn->c_refcount) > 0);
-
- /*
- * We do not remove connection from hashtable and
- * do not free it even if last caller released ref,
- * as we want to have it cached for the case it is
- * needed again.
- *
- * Deallocating it and later creating new connection
- * again would be wastful. This way we also avoid
- * expensive locking to protect things from get/put
- * race when found cached connection is freed by
- * ptlrpc_connection_put().
- *
- * It will be freed later in module unload time,
- * when ptlrpc_connection_fini()->lh_exit->conn_exit()
- * path is called.
- */
- if (atomic_dec_return(&conn->c_refcount) == 0)
- rc = 1;
-
- CDEBUG(D_INFO, "PUT conn=%p refcount %d to %s\n",
- conn, atomic_read(&conn->c_refcount),
- libcfs_nid2str(conn->c_peer.nid));
-
- return rc;
-}
-
-struct ptlrpc_connection *
-ptlrpc_connection_addref(struct ptlrpc_connection *conn)
-{
- atomic_inc(&conn->c_refcount);
- CDEBUG(D_INFO, "conn=%p refcount %d to %s\n",
- conn, atomic_read(&conn->c_refcount),
- libcfs_nid2str(conn->c_peer.nid));
-
- return conn;
-}
-
-static void
-conn_exit(void *vconn, void *data)
-{
- struct ptlrpc_connection *conn = vconn;
-
- /*
- * Nothing should be left. Connection user put it and
- * connection also was deleted from table by this time
- * so we should have 0 refs.
- */
- LASSERTF(atomic_read(&conn->c_refcount) == 0,
- "Busy connection with %d refs\n",
- atomic_read(&conn->c_refcount));
- kfree(conn);
-}
-
-int ptlrpc_connection_init(void)
-{
- return rhashtable_init(&conn_hash, &conn_hash_params);
-}
-
-void ptlrpc_connection_fini(void)
-{
- rhashtable_free_and_destroy(&conn_hash, conn_exit, NULL);
-}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/errno.c b/drivers/staging/lustre/lustre/ptlrpc/errno.c
deleted file mode 100644
index b904524fc1c6..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/errno.c
+++ /dev/null
@@ -1,383 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.txt
- *
- * GPL HEADER END
- */
-/*
- * Copyright (C) 2011 FUJITSU LIMITED. All rights reserved.
- *
- * Copyright (c) 2013, Intel Corporation.
- */
-
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <lustre_errno.h>
-
-/*
- * The two translation tables below must define a one-to-one mapping between
- * host and network errnos.
- *
- * EWOULDBLOCK is equal to EAGAIN on all architectures except for parisc, which
- * appears irrelevant. Thus, existing references to EWOULDBLOCK are fine.
- *
- * EDEADLOCK is equal to EDEADLK on x86 but not on sparc, at least. A sparc
- * host has no context-free way to determine if a LUSTRE_EDEADLK represents an
- * EDEADLK or an EDEADLOCK. Therefore, all existing references to EDEADLOCK
- * that need to be transferred on wire have been replaced with EDEADLK.
- */
-static int lustre_errno_hton_mapping[] = {
- [EPERM] = LUSTRE_EPERM,
- [ENOENT] = LUSTRE_ENOENT,
- [ESRCH] = LUSTRE_ESRCH,
- [EINTR] = LUSTRE_EINTR,
- [EIO] = LUSTRE_EIO,
- [ENXIO] = LUSTRE_ENXIO,
- [E2BIG] = LUSTRE_E2BIG,
- [ENOEXEC] = LUSTRE_ENOEXEC,
- [EBADF] = LUSTRE_EBADF,
- [ECHILD] = LUSTRE_ECHILD,
- [EAGAIN] = LUSTRE_EAGAIN,
- [ENOMEM] = LUSTRE_ENOMEM,
- [EACCES] = LUSTRE_EACCES,
- [EFAULT] = LUSTRE_EFAULT,
- [ENOTBLK] = LUSTRE_ENOTBLK,
- [EBUSY] = LUSTRE_EBUSY,
- [EEXIST] = LUSTRE_EEXIST,
- [EXDEV] = LUSTRE_EXDEV,
- [ENODEV] = LUSTRE_ENODEV,
- [ENOTDIR] = LUSTRE_ENOTDIR,
- [EISDIR] = LUSTRE_EISDIR,
- [EINVAL] = LUSTRE_EINVAL,
- [ENFILE] = LUSTRE_ENFILE,
- [EMFILE] = LUSTRE_EMFILE,
- [ENOTTY] = LUSTRE_ENOTTY,
- [ETXTBSY] = LUSTRE_ETXTBSY,
- [EFBIG] = LUSTRE_EFBIG,
- [ENOSPC] = LUSTRE_ENOSPC,
- [ESPIPE] = LUSTRE_ESPIPE,
- [EROFS] = LUSTRE_EROFS,
- [EMLINK] = LUSTRE_EMLINK,
- [EPIPE] = LUSTRE_EPIPE,
- [EDOM] = LUSTRE_EDOM,
- [ERANGE] = LUSTRE_ERANGE,
- [EDEADLK] = LUSTRE_EDEADLK,
- [ENAMETOOLONG] = LUSTRE_ENAMETOOLONG,
- [ENOLCK] = LUSTRE_ENOLCK,
- [ENOSYS] = LUSTRE_ENOSYS,
- [ENOTEMPTY] = LUSTRE_ENOTEMPTY,
- [ELOOP] = LUSTRE_ELOOP,
- [ENOMSG] = LUSTRE_ENOMSG,
- [EIDRM] = LUSTRE_EIDRM,
- [ECHRNG] = LUSTRE_ECHRNG,
- [EL2NSYNC] = LUSTRE_EL2NSYNC,
- [EL3HLT] = LUSTRE_EL3HLT,
- [EL3RST] = LUSTRE_EL3RST,
- [ELNRNG] = LUSTRE_ELNRNG,
- [EUNATCH] = LUSTRE_EUNATCH,
- [ENOCSI] = LUSTRE_ENOCSI,
- [EL2HLT] = LUSTRE_EL2HLT,
- [EBADE] = LUSTRE_EBADE,
- [EBADR] = LUSTRE_EBADR,
- [EXFULL] = LUSTRE_EXFULL,
- [ENOANO] = LUSTRE_ENOANO,
- [EBADRQC] = LUSTRE_EBADRQC,
- [EBADSLT] = LUSTRE_EBADSLT,
- [EBFONT] = LUSTRE_EBFONT,
- [ENOSTR] = LUSTRE_ENOSTR,
- [ENODATA] = LUSTRE_ENODATA,
- [ETIME] = LUSTRE_ETIME,
- [ENOSR] = LUSTRE_ENOSR,
- [ENONET] = LUSTRE_ENONET,
- [ENOPKG] = LUSTRE_ENOPKG,
- [EREMOTE] = LUSTRE_EREMOTE,
- [ENOLINK] = LUSTRE_ENOLINK,
- [EADV] = LUSTRE_EADV,
- [ESRMNT] = LUSTRE_ESRMNT,
- [ECOMM] = LUSTRE_ECOMM,
- [EPROTO] = LUSTRE_EPROTO,
- [EMULTIHOP] = LUSTRE_EMULTIHOP,
- [EDOTDOT] = LUSTRE_EDOTDOT,
- [EBADMSG] = LUSTRE_EBADMSG,
- [EOVERFLOW] = LUSTRE_EOVERFLOW,
- [ENOTUNIQ] = LUSTRE_ENOTUNIQ,
- [EBADFD] = LUSTRE_EBADFD,
- [EREMCHG] = LUSTRE_EREMCHG,
- [ELIBACC] = LUSTRE_ELIBACC,
- [ELIBBAD] = LUSTRE_ELIBBAD,
- [ELIBSCN] = LUSTRE_ELIBSCN,
- [ELIBMAX] = LUSTRE_ELIBMAX,
- [ELIBEXEC] = LUSTRE_ELIBEXEC,
- [EILSEQ] = LUSTRE_EILSEQ,
- [ERESTART] = LUSTRE_ERESTART,
- [ESTRPIPE] = LUSTRE_ESTRPIPE,
- [EUSERS] = LUSTRE_EUSERS,
- [ENOTSOCK] = LUSTRE_ENOTSOCK,
- [EDESTADDRREQ] = LUSTRE_EDESTADDRREQ,
- [EMSGSIZE] = LUSTRE_EMSGSIZE,
- [EPROTOTYPE] = LUSTRE_EPROTOTYPE,
- [ENOPROTOOPT] = LUSTRE_ENOPROTOOPT,
- [EPROTONOSUPPORT] = LUSTRE_EPROTONOSUPPORT,
- [ESOCKTNOSUPPORT] = LUSTRE_ESOCKTNOSUPPORT,
- [EOPNOTSUPP] = LUSTRE_EOPNOTSUPP,
- [EPFNOSUPPORT] = LUSTRE_EPFNOSUPPORT,
- [EAFNOSUPPORT] = LUSTRE_EAFNOSUPPORT,
- [EADDRINUSE] = LUSTRE_EADDRINUSE,
- [EADDRNOTAVAIL] = LUSTRE_EADDRNOTAVAIL,
- [ENETDOWN] = LUSTRE_ENETDOWN,
- [ENETUNREACH] = LUSTRE_ENETUNREACH,
- [ENETRESET] = LUSTRE_ENETRESET,
- [ECONNABORTED] = LUSTRE_ECONNABORTED,
- [ECONNRESET] = LUSTRE_ECONNRESET,
- [ENOBUFS] = LUSTRE_ENOBUFS,
- [EISCONN] = LUSTRE_EISCONN,
- [ENOTCONN] = LUSTRE_ENOTCONN,
- [ESHUTDOWN] = LUSTRE_ESHUTDOWN,
- [ETOOMANYREFS] = LUSTRE_ETOOMANYREFS,
- [ETIMEDOUT] = LUSTRE_ETIMEDOUT,
- [ECONNREFUSED] = LUSTRE_ECONNREFUSED,
- [EHOSTDOWN] = LUSTRE_EHOSTDOWN,
- [EHOSTUNREACH] = LUSTRE_EHOSTUNREACH,
- [EALREADY] = LUSTRE_EALREADY,
- [EINPROGRESS] = LUSTRE_EINPROGRESS,
- [ESTALE] = LUSTRE_ESTALE,
- [EUCLEAN] = LUSTRE_EUCLEAN,
- [ENOTNAM] = LUSTRE_ENOTNAM,
- [ENAVAIL] = LUSTRE_ENAVAIL,
- [EISNAM] = LUSTRE_EISNAM,
- [EREMOTEIO] = LUSTRE_EREMOTEIO,
- [EDQUOT] = LUSTRE_EDQUOT,
- [ENOMEDIUM] = LUSTRE_ENOMEDIUM,
- [EMEDIUMTYPE] = LUSTRE_EMEDIUMTYPE,
- [ECANCELED] = LUSTRE_ECANCELED,
- [ENOKEY] = LUSTRE_ENOKEY,
- [EKEYEXPIRED] = LUSTRE_EKEYEXPIRED,
- [EKEYREVOKED] = LUSTRE_EKEYREVOKED,
- [EKEYREJECTED] = LUSTRE_EKEYREJECTED,
- [EOWNERDEAD] = LUSTRE_EOWNERDEAD,
- [ENOTRECOVERABLE] = LUSTRE_ENOTRECOVERABLE,
- [ERESTARTSYS] = LUSTRE_ERESTARTSYS,
- [ERESTARTNOINTR] = LUSTRE_ERESTARTNOINTR,
- [ERESTARTNOHAND] = LUSTRE_ERESTARTNOHAND,
- [ENOIOCTLCMD] = LUSTRE_ENOIOCTLCMD,
- [ERESTART_RESTARTBLOCK] = LUSTRE_ERESTART_RESTARTBLOCK,
- [EBADHANDLE] = LUSTRE_EBADHANDLE,
- [ENOTSYNC] = LUSTRE_ENOTSYNC,
- [EBADCOOKIE] = LUSTRE_EBADCOOKIE,
- [ENOTSUPP] = LUSTRE_ENOTSUPP,
- [ETOOSMALL] = LUSTRE_ETOOSMALL,
- [ESERVERFAULT] = LUSTRE_ESERVERFAULT,
- [EBADTYPE] = LUSTRE_EBADTYPE,
- [EJUKEBOX] = LUSTRE_EJUKEBOX,
- [EIOCBQUEUED] = LUSTRE_EIOCBQUEUED,
-};
-
-static int lustre_errno_ntoh_mapping[] = {
- [LUSTRE_EPERM] = EPERM,
- [LUSTRE_ENOENT] = ENOENT,
- [LUSTRE_ESRCH] = ESRCH,
- [LUSTRE_EINTR] = EINTR,
- [LUSTRE_EIO] = EIO,
- [LUSTRE_ENXIO] = ENXIO,
- [LUSTRE_E2BIG] = E2BIG,
- [LUSTRE_ENOEXEC] = ENOEXEC,
- [LUSTRE_EBADF] = EBADF,
- [LUSTRE_ECHILD] = ECHILD,
- [LUSTRE_EAGAIN] = EAGAIN,
- [LUSTRE_ENOMEM] = ENOMEM,
- [LUSTRE_EACCES] = EACCES,
- [LUSTRE_EFAULT] = EFAULT,
- [LUSTRE_ENOTBLK] = ENOTBLK,
- [LUSTRE_EBUSY] = EBUSY,
- [LUSTRE_EEXIST] = EEXIST,
- [LUSTRE_EXDEV] = EXDEV,
- [LUSTRE_ENODEV] = ENODEV,
- [LUSTRE_ENOTDIR] = ENOTDIR,
- [LUSTRE_EISDIR] = EISDIR,
- [LUSTRE_EINVAL] = EINVAL,
- [LUSTRE_ENFILE] = ENFILE,
- [LUSTRE_EMFILE] = EMFILE,
- [LUSTRE_ENOTTY] = ENOTTY,
- [LUSTRE_ETXTBSY] = ETXTBSY,
- [LUSTRE_EFBIG] = EFBIG,
- [LUSTRE_ENOSPC] = ENOSPC,
- [LUSTRE_ESPIPE] = ESPIPE,
- [LUSTRE_EROFS] = EROFS,
- [LUSTRE_EMLINK] = EMLINK,
- [LUSTRE_EPIPE] = EPIPE,
- [LUSTRE_EDOM] = EDOM,
- [LUSTRE_ERANGE] = ERANGE,
- [LUSTRE_EDEADLK] = EDEADLK,
- [LUSTRE_ENAMETOOLONG] = ENAMETOOLONG,
- [LUSTRE_ENOLCK] = ENOLCK,
- [LUSTRE_ENOSYS] = ENOSYS,
- [LUSTRE_ENOTEMPTY] = ENOTEMPTY,
- [LUSTRE_ELOOP] = ELOOP,
- [LUSTRE_ENOMSG] = ENOMSG,
- [LUSTRE_EIDRM] = EIDRM,
- [LUSTRE_ECHRNG] = ECHRNG,
- [LUSTRE_EL2NSYNC] = EL2NSYNC,
- [LUSTRE_EL3HLT] = EL3HLT,
- [LUSTRE_EL3RST] = EL3RST,
- [LUSTRE_ELNRNG] = ELNRNG,
- [LUSTRE_EUNATCH] = EUNATCH,
- [LUSTRE_ENOCSI] = ENOCSI,
- [LUSTRE_EL2HLT] = EL2HLT,
- [LUSTRE_EBADE] = EBADE,
- [LUSTRE_EBADR] = EBADR,
- [LUSTRE_EXFULL] = EXFULL,
- [LUSTRE_ENOANO] = ENOANO,
- [LUSTRE_EBADRQC] = EBADRQC,
- [LUSTRE_EBADSLT] = EBADSLT,
- [LUSTRE_EBFONT] = EBFONT,
- [LUSTRE_ENOSTR] = ENOSTR,
- [LUSTRE_ENODATA] = ENODATA,
- [LUSTRE_ETIME] = ETIME,
- [LUSTRE_ENOSR] = ENOSR,
- [LUSTRE_ENONET] = ENONET,
- [LUSTRE_ENOPKG] = ENOPKG,
- [LUSTRE_EREMOTE] = EREMOTE,
- [LUSTRE_ENOLINK] = ENOLINK,
- [LUSTRE_EADV] = EADV,
- [LUSTRE_ESRMNT] = ESRMNT,
- [LUSTRE_ECOMM] = ECOMM,
- [LUSTRE_EPROTO] = EPROTO,
- [LUSTRE_EMULTIHOP] = EMULTIHOP,
- [LUSTRE_EDOTDOT] = EDOTDOT,
- [LUSTRE_EBADMSG] = EBADMSG,
- [LUSTRE_EOVERFLOW] = EOVERFLOW,
- [LUSTRE_ENOTUNIQ] = ENOTUNIQ,
- [LUSTRE_EBADFD] = EBADFD,
- [LUSTRE_EREMCHG] = EREMCHG,
- [LUSTRE_ELIBACC] = ELIBACC,
- [LUSTRE_ELIBBAD] = ELIBBAD,
- [LUSTRE_ELIBSCN] = ELIBSCN,
- [LUSTRE_ELIBMAX] = ELIBMAX,
- [LUSTRE_ELIBEXEC] = ELIBEXEC,
- [LUSTRE_EILSEQ] = EILSEQ,
- [LUSTRE_ERESTART] = ERESTART,
- [LUSTRE_ESTRPIPE] = ESTRPIPE,
- [LUSTRE_EUSERS] = EUSERS,
- [LUSTRE_ENOTSOCK] = ENOTSOCK,
- [LUSTRE_EDESTADDRREQ] = EDESTADDRREQ,
- [LUSTRE_EMSGSIZE] = EMSGSIZE,
- [LUSTRE_EPROTOTYPE] = EPROTOTYPE,
- [LUSTRE_ENOPROTOOPT] = ENOPROTOOPT,
- [LUSTRE_EPROTONOSUPPORT] = EPROTONOSUPPORT,
- [LUSTRE_ESOCKTNOSUPPORT] = ESOCKTNOSUPPORT,
- [LUSTRE_EOPNOTSUPP] = EOPNOTSUPP,
- [LUSTRE_EPFNOSUPPORT] = EPFNOSUPPORT,
- [LUSTRE_EAFNOSUPPORT] = EAFNOSUPPORT,
- [LUSTRE_EADDRINUSE] = EADDRINUSE,
- [LUSTRE_EADDRNOTAVAIL] = EADDRNOTAVAIL,
- [LUSTRE_ENETDOWN] = ENETDOWN,
- [LUSTRE_ENETUNREACH] = ENETUNREACH,
- [LUSTRE_ENETRESET] = ENETRESET,
- [LUSTRE_ECONNABORTED] = ECONNABORTED,
- [LUSTRE_ECONNRESET] = ECONNRESET,
- [LUSTRE_ENOBUFS] = ENOBUFS,
- [LUSTRE_EISCONN] = EISCONN,
- [LUSTRE_ENOTCONN] = ENOTCONN,
- [LUSTRE_ESHUTDOWN] = ESHUTDOWN,
- [LUSTRE_ETOOMANYREFS] = ETOOMANYREFS,
- [LUSTRE_ETIMEDOUT] = ETIMEDOUT,
- [LUSTRE_ECONNREFUSED] = ECONNREFUSED,
- [LUSTRE_EHOSTDOWN] = EHOSTDOWN,
- [LUSTRE_EHOSTUNREACH] = EHOSTUNREACH,
- [LUSTRE_EALREADY] = EALREADY,
- [LUSTRE_EINPROGRESS] = EINPROGRESS,
- [LUSTRE_ESTALE] = ESTALE,
- [LUSTRE_EUCLEAN] = EUCLEAN,
- [LUSTRE_ENOTNAM] = ENOTNAM,
- [LUSTRE_ENAVAIL] = ENAVAIL,
- [LUSTRE_EISNAM] = EISNAM,
- [LUSTRE_EREMOTEIO] = EREMOTEIO,
- [LUSTRE_EDQUOT] = EDQUOT,
- [LUSTRE_ENOMEDIUM] = ENOMEDIUM,
- [LUSTRE_EMEDIUMTYPE] = EMEDIUMTYPE,
- [LUSTRE_ECANCELED] = ECANCELED,
- [LUSTRE_ENOKEY] = ENOKEY,
- [LUSTRE_EKEYEXPIRED] = EKEYEXPIRED,
- [LUSTRE_EKEYREVOKED] = EKEYREVOKED,
- [LUSTRE_EKEYREJECTED] = EKEYREJECTED,
- [LUSTRE_EOWNERDEAD] = EOWNERDEAD,
- [LUSTRE_ENOTRECOVERABLE] = ENOTRECOVERABLE,
- [LUSTRE_ERESTARTSYS] = ERESTARTSYS,
- [LUSTRE_ERESTARTNOINTR] = ERESTARTNOINTR,
- [LUSTRE_ERESTARTNOHAND] = ERESTARTNOHAND,
- [LUSTRE_ENOIOCTLCMD] = ENOIOCTLCMD,
- [LUSTRE_ERESTART_RESTARTBLOCK] = ERESTART_RESTARTBLOCK,
- [LUSTRE_EBADHANDLE] = EBADHANDLE,
- [LUSTRE_ENOTSYNC] = ENOTSYNC,
- [LUSTRE_EBADCOOKIE] = EBADCOOKIE,
- [LUSTRE_ENOTSUPP] = ENOTSUPP,
- [LUSTRE_ETOOSMALL] = ETOOSMALL,
- [LUSTRE_ESERVERFAULT] = ESERVERFAULT,
- [LUSTRE_EBADTYPE] = EBADTYPE,
- [LUSTRE_EJUKEBOX] = EJUKEBOX,
- [LUSTRE_EIOCBQUEUED] = EIOCBQUEUED,
-};
-
-unsigned int lustre_errno_hton(unsigned int h)
-{
- unsigned int n;
-
- if (h == 0) {
- n = 0;
- } else if (h < ARRAY_SIZE(lustre_errno_hton_mapping)) {
- n = lustre_errno_hton_mapping[h];
- if (n == 0)
- goto generic;
- } else {
-generic:
- /*
- * A generic errno is better than the unknown one that could
- * mean anything to a different host.
- */
- n = LUSTRE_EIO;
- }
-
- return n;
-}
-EXPORT_SYMBOL(lustre_errno_hton);
-
-unsigned int lustre_errno_ntoh(unsigned int n)
-{
- unsigned int h;
-
- if (n == 0) {
- h = 0;
- } else if (n < ARRAY_SIZE(lustre_errno_ntoh_mapping)) {
- h = lustre_errno_ntoh_mapping[n];
- if (h == 0)
- goto generic;
- } else {
-generic:
- /*
- * Similar to the situation in lustre_errno_hton(), an unknown
- * network errno could coincide with anything. Hence, it is
- * better to return a generic errno.
- */
- h = EIO;
- }
-
- return h;
-}
-EXPORT_SYMBOL(lustre_errno_ntoh);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/events.c b/drivers/staging/lustre/lustre/ptlrpc/events.c
deleted file mode 100644
index 130bacc2c891..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/events.c
+++ /dev/null
@@ -1,585 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015 Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_RPC
-
-#include <linux/libcfs/libcfs.h>
-# ifdef __mips64__
-# include <linux/kernel.h>
-# endif
-
-#include <obd_class.h>
-#include <lustre_net.h>
-#include <lustre_sec.h>
-#include "ptlrpc_internal.h"
-
-struct lnet_handle_eq ptlrpc_eq_h;
-
-/*
- * Client's outgoing request callback
- */
-void request_out_callback(struct lnet_event *ev)
-{
- struct ptlrpc_cb_id *cbid = ev->md.user_ptr;
- struct ptlrpc_request *req = cbid->cbid_arg;
- bool wakeup = false;
-
- LASSERT(ev->type == LNET_EVENT_SEND || ev->type == LNET_EVENT_UNLINK);
- LASSERT(ev->unlinked);
-
- DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status);
-
- sptlrpc_request_out_callback(req);
-
- spin_lock(&req->rq_lock);
- req->rq_real_sent = ktime_get_real_seconds();
- req->rq_req_unlinked = 1;
- /* reply_in_callback happened before request_out_callback? */
- if (req->rq_reply_unlinked)
- wakeup = true;
-
- if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) {
- /* Failed send: make it seem like the reply timed out, just
- * like failing sends in client.c does currently...
- */
- req->rq_net_err = 1;
- wakeup = true;
- }
-
- if (wakeup)
- ptlrpc_client_wake_req(req);
-
- spin_unlock(&req->rq_lock);
-
- ptlrpc_req_finished(req);
-}
-
-/*
- * Client's incoming reply callback
- */
-void reply_in_callback(struct lnet_event *ev)
-{
- struct ptlrpc_cb_id *cbid = ev->md.user_ptr;
- struct ptlrpc_request *req = cbid->cbid_arg;
-
- DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status);
-
- LASSERT(ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_UNLINK);
- LASSERT(ev->md.start == req->rq_repbuf);
- LASSERT(ev->offset + ev->mlength <= req->rq_repbuf_len);
- /* We've set LNET_MD_MANAGE_REMOTE for all outgoing requests
- * for adaptive timeouts' early reply.
- */
- LASSERT((ev->md.options & LNET_MD_MANAGE_REMOTE) != 0);
-
- spin_lock(&req->rq_lock);
-
- req->rq_receiving_reply = 0;
- req->rq_early = 0;
- if (ev->unlinked)
- req->rq_reply_unlinked = 1;
-
- if (ev->status)
- goto out_wake;
-
- if (ev->type == LNET_EVENT_UNLINK) {
- LASSERT(ev->unlinked);
- DEBUG_REQ(D_NET, req, "unlink");
- goto out_wake;
- }
-
- if (ev->mlength < ev->rlength) {
- CDEBUG(D_RPCTRACE, "truncate req %p rpc %d - %d+%d\n", req,
- req->rq_replen, ev->rlength, ev->offset);
- req->rq_reply_truncated = 1;
- req->rq_replied = 1;
- req->rq_status = -EOVERFLOW;
- req->rq_nob_received = ev->rlength + ev->offset;
- goto out_wake;
- }
-
- if ((ev->offset == 0) &&
- ((lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT))) {
- /* Early reply */
- DEBUG_REQ(D_ADAPTTO, req,
- "Early reply received: mlen=%u offset=%d replen=%d replied=%d unlinked=%d",
- ev->mlength, ev->offset,
- req->rq_replen, req->rq_replied, ev->unlinked);
-
- req->rq_early_count++; /* number received, client side */
-
- /* already got the real reply or buffers are already unlinked */
- if (req->rq_replied || req->rq_reply_unlinked == 1)
- goto out_wake;
-
- req->rq_early = 1;
- req->rq_reply_off = ev->offset;
- req->rq_nob_received = ev->mlength;
- /* And we're still receiving */
- req->rq_receiving_reply = 1;
- } else {
- /* Real reply */
- req->rq_rep_swab_mask = 0;
- req->rq_replied = 1;
- /* Got reply, no resend required */
- req->rq_resend = 0;
- req->rq_reply_off = ev->offset;
- req->rq_nob_received = ev->mlength;
- /* LNetMDUnlink can't be called under the LNET_LOCK,
- * so we must unlink in ptlrpc_unregister_reply
- */
- DEBUG_REQ(D_INFO, req,
- "reply in flags=%x mlen=%u offset=%d replen=%d",
- lustre_msg_get_flags(req->rq_reqmsg),
- ev->mlength, ev->offset, req->rq_replen);
- }
-
- req->rq_import->imp_last_reply_time = ktime_get_real_seconds();
-
-out_wake:
- /* NB don't unlock till after wakeup; req can disappear under us
- * since we don't have our own ref
- */
- ptlrpc_client_wake_req(req);
- spin_unlock(&req->rq_lock);
-}
-
-/*
- * Client's bulk has been written/read
- */
-void client_bulk_callback(struct lnet_event *ev)
-{
- struct ptlrpc_cb_id *cbid = ev->md.user_ptr;
- struct ptlrpc_bulk_desc *desc = cbid->cbid_arg;
- struct ptlrpc_request *req;
-
- LASSERT((ptlrpc_is_bulk_put_sink(desc->bd_type) &&
- ev->type == LNET_EVENT_PUT) ||
- (ptlrpc_is_bulk_get_source(desc->bd_type) &&
- ev->type == LNET_EVENT_GET) ||
- ev->type == LNET_EVENT_UNLINK);
- LASSERT(ev->unlinked);
-
- if (CFS_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_CLIENT_BULK_CB, CFS_FAIL_ONCE))
- ev->status = -EIO;
-
- if (CFS_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_CLIENT_BULK_CB2,
- CFS_FAIL_ONCE))
- ev->status = -EIO;
-
- CDEBUG((ev->status == 0) ? D_NET : D_ERROR,
- "event type %d, status %d, desc %p\n",
- ev->type, ev->status, desc);
-
- spin_lock(&desc->bd_lock);
- req = desc->bd_req;
- LASSERT(desc->bd_md_count > 0);
- desc->bd_md_count--;
-
- if (ev->type != LNET_EVENT_UNLINK && ev->status == 0) {
- desc->bd_nob_transferred += ev->mlength;
- desc->bd_sender = ev->sender;
- } else {
- /* start reconnect and resend if network error hit */
- spin_lock(&req->rq_lock);
- req->rq_net_err = 1;
- spin_unlock(&req->rq_lock);
- }
-
- if (ev->status != 0)
- desc->bd_failure = 1;
-
- /* NB don't unlock till after wakeup; desc can disappear under us
- * otherwise
- */
- if (desc->bd_md_count == 0)
- ptlrpc_client_wake_req(desc->bd_req);
-
- spin_unlock(&desc->bd_lock);
-}
-
-/*
- * We will have percpt request history list for ptlrpc service in upcoming
- * patches because we don't want to be serialized by current per-service
- * history operations. So we require history ID can (somehow) show arriving
- * order w/o grabbing global lock, and user can sort them in userspace.
- *
- * This is how we generate history ID for ptlrpc_request:
- * ----------------------------------------------------
- * | 32 bits | 16 bits | (16 - X)bits | X bits |
- * ----------------------------------------------------
- * | seconds | usec / 16 | sequence | CPT id |
- * ----------------------------------------------------
- *
- * it might not be precise but should be good enough.
- */
-
-#define REQS_CPT_BITS(svcpt) ((svcpt)->scp_service->srv_cpt_bits)
-
-#define REQS_SEC_SHIFT 32
-#define REQS_USEC_SHIFT 16
-#define REQS_SEQ_SHIFT(svcpt) REQS_CPT_BITS(svcpt)
-
-static void ptlrpc_req_add_history(struct ptlrpc_service_part *svcpt,
- struct ptlrpc_request *req)
-{
- __u64 sec = req->rq_arrival_time.tv_sec;
- __u32 usec = req->rq_arrival_time.tv_nsec / NSEC_PER_USEC / 16; /* usec / 16 */
- __u64 new_seq;
-
- /* set sequence ID for request and add it to history list,
- * it must be called with hold svcpt::scp_lock
- */
-
- new_seq = (sec << REQS_SEC_SHIFT) |
- (usec << REQS_USEC_SHIFT) |
- (svcpt->scp_cpt < 0 ? 0 : svcpt->scp_cpt);
-
- if (new_seq > svcpt->scp_hist_seq) {
- /* This handles the initial case of scp_hist_seq == 0 or
- * we just jumped into a new time window
- */
- svcpt->scp_hist_seq = new_seq;
- } else {
- LASSERT(REQS_SEQ_SHIFT(svcpt) < REQS_USEC_SHIFT);
- /* NB: increase sequence number in current usec bucket,
- * however, it's possible that we used up all bits for
- * sequence and jumped into the next usec bucket (future time),
- * then we hope there will be less RPCs per bucket at some
- * point, and sequence will catch up again
- */
- svcpt->scp_hist_seq += (1ULL << REQS_SEQ_SHIFT(svcpt));
- new_seq = svcpt->scp_hist_seq;
- }
-
- req->rq_history_seq = new_seq;
-
- list_add_tail(&req->rq_history_list, &svcpt->scp_hist_reqs);
-}
-
-/*
- * Server's incoming request callback
- */
-void request_in_callback(struct lnet_event *ev)
-{
- struct ptlrpc_cb_id *cbid = ev->md.user_ptr;
- struct ptlrpc_request_buffer_desc *rqbd = cbid->cbid_arg;
- struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt;
- struct ptlrpc_service *service = svcpt->scp_service;
- struct ptlrpc_request *req;
-
- LASSERT(ev->type == LNET_EVENT_PUT ||
- ev->type == LNET_EVENT_UNLINK);
- LASSERT((char *)ev->md.start >= rqbd->rqbd_buffer);
- LASSERT((char *)ev->md.start + ev->offset + ev->mlength <=
- rqbd->rqbd_buffer + service->srv_buf_size);
-
- CDEBUG((ev->status == 0) ? D_NET : D_ERROR,
- "event type %d, status %d, service %s\n",
- ev->type, ev->status, service->srv_name);
-
- if (ev->unlinked) {
- /* If this is the last request message to fit in the
- * request buffer we can use the request object embedded in
- * rqbd. Note that if we failed to allocate a request,
- * we'd have to re-post the rqbd, which we can't do in this
- * context.
- */
- req = &rqbd->rqbd_req;
- memset(req, 0, sizeof(*req));
- } else {
- LASSERT(ev->type == LNET_EVENT_PUT);
- if (ev->status != 0) {
- /* We moaned above already... */
- return;
- }
- req = ptlrpc_request_cache_alloc(GFP_ATOMIC);
- if (!req) {
- CERROR("Can't allocate incoming request descriptor: Dropping %s RPC from %s\n",
- service->srv_name,
- libcfs_id2str(ev->initiator));
- return;
- }
- }
-
- ptlrpc_srv_req_init(req);
- /* NB we ABSOLUTELY RELY on req being zeroed, so pointers are NULL,
- * flags are reset and scalars are zero. We only set the message
- * size to non-zero if this was a successful receive.
- */
- req->rq_xid = ev->match_bits;
- req->rq_reqbuf = ev->md.start + ev->offset;
- if (ev->type == LNET_EVENT_PUT && ev->status == 0)
- req->rq_reqdata_len = ev->mlength;
- ktime_get_real_ts64(&req->rq_arrival_time);
- req->rq_peer = ev->initiator;
- req->rq_self = ev->target.nid;
- req->rq_rqbd = rqbd;
- req->rq_phase = RQ_PHASE_NEW;
- if (ev->type == LNET_EVENT_PUT)
- CDEBUG(D_INFO, "incoming req@%p x%llu msgsize %u\n",
- req, req->rq_xid, ev->mlength);
-
- CDEBUG(D_RPCTRACE, "peer: %s\n", libcfs_id2str(req->rq_peer));
-
- spin_lock(&svcpt->scp_lock);
-
- ptlrpc_req_add_history(svcpt, req);
-
- if (ev->unlinked) {
- svcpt->scp_nrqbds_posted--;
- CDEBUG(D_INFO, "Buffer complete: %d buffers still posted\n",
- svcpt->scp_nrqbds_posted);
-
- /* Normally, don't complain about 0 buffers posted; LNET won't
- * drop incoming reqs since we set the portal lazy
- */
- if (test_req_buffer_pressure &&
- ev->type != LNET_EVENT_UNLINK &&
- svcpt->scp_nrqbds_posted == 0)
- CWARN("All %s request buffers busy\n",
- service->srv_name);
-
- /* req takes over the network's ref on rqbd */
- } else {
- /* req takes a ref on rqbd */
- rqbd->rqbd_refcount++;
- }
-
- list_add_tail(&req->rq_list, &svcpt->scp_req_incoming);
- svcpt->scp_nreqs_incoming++;
-
- /* NB everything can disappear under us once the request
- * has been queued and we unlock, so do the wake now...
- */
- wake_up(&svcpt->scp_waitq);
-
- spin_unlock(&svcpt->scp_lock);
-}
-
-/*
- * Server's outgoing reply callback
- */
-void reply_out_callback(struct lnet_event *ev)
-{
- struct ptlrpc_cb_id *cbid = ev->md.user_ptr;
- struct ptlrpc_reply_state *rs = cbid->cbid_arg;
- struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
-
- LASSERT(ev->type == LNET_EVENT_SEND ||
- ev->type == LNET_EVENT_ACK ||
- ev->type == LNET_EVENT_UNLINK);
-
- if (!rs->rs_difficult) {
- /* 'Easy' replies have no further processing so I drop the
- * net's ref on 'rs'
- */
- LASSERT(ev->unlinked);
- ptlrpc_rs_decref(rs);
- return;
- }
-
- LASSERT(rs->rs_on_net);
-
- if (ev->unlinked) {
- /* Last network callback. The net's ref on 'rs' stays put
- * until ptlrpc_handle_rs() is done with it
- */
- spin_lock(&svcpt->scp_rep_lock);
- spin_lock(&rs->rs_lock);
-
- rs->rs_on_net = 0;
- if (!rs->rs_no_ack ||
- rs->rs_transno <=
- rs->rs_export->exp_obd->obd_last_committed ||
- list_empty(&rs->rs_obd_list))
- ptlrpc_schedule_difficult_reply(rs);
-
- spin_unlock(&rs->rs_lock);
- spin_unlock(&svcpt->scp_rep_lock);
- }
-}
-
-static void ptlrpc_master_callback(struct lnet_event *ev)
-{
- struct ptlrpc_cb_id *cbid = ev->md.user_ptr;
- void (*callback)(struct lnet_event *ev) = cbid->cbid_fn;
-
- /* Honestly, it's best to find out early. */
- LASSERT(cbid->cbid_arg != LP_POISON);
- LASSERT(callback == request_out_callback ||
- callback == reply_in_callback ||
- callback == client_bulk_callback ||
- callback == request_in_callback ||
- callback == reply_out_callback);
-
- callback(ev);
-}
-
-int ptlrpc_uuid_to_peer(struct obd_uuid *uuid,
- struct lnet_process_id *peer, lnet_nid_t *self)
-{
- int best_dist = 0;
- __u32 best_order = 0;
- int count = 0;
- int rc = -ENOENT;
- int dist;
- __u32 order;
- lnet_nid_t dst_nid;
- lnet_nid_t src_nid;
-
- peer->pid = LNET_PID_LUSTRE;
-
- /* Choose the matching UUID that's closest */
- while (lustre_uuid_to_peer(uuid->uuid, &dst_nid, count++) == 0) {
- dist = LNetDist(dst_nid, &src_nid, &order);
- if (dist < 0)
- continue;
-
- if (dist == 0) { /* local! use loopback LND */
- peer->nid = *self = LNET_MKNID(LNET_MKNET(LOLND, 0), 0);
- rc = 0;
- break;
- }
-
- if (rc < 0 ||
- dist < best_dist ||
- (dist == best_dist && order < best_order)) {
- best_dist = dist;
- best_order = order;
-
- peer->nid = dst_nid;
- *self = src_nid;
- rc = 0;
- }
- }
-
- CDEBUG(D_NET, "%s->%s\n", uuid->uuid, libcfs_id2str(*peer));
- return rc;
-}
-
-static void ptlrpc_ni_fini(void)
-{
- int rc;
- int retries;
-
- /* Wait for the event queue to become idle since there may still be
- * messages in flight with pending events (i.e. the fire-and-forget
- * messages == client requests and "non-difficult" server
- * replies
- */
-
- for (retries = 0;; retries++) {
- rc = LNetEQFree(ptlrpc_eq_h);
- switch (rc) {
- default:
- LBUG();
-
- case 0:
- LNetNIFini();
- return;
-
- case -EBUSY:
- if (retries != 0)
- CWARN("Event queue still busy\n");
-
- schedule_timeout_uninterruptible(2 * HZ);
- break;
- }
- }
- /* notreached */
-}
-
-static lnet_pid_t ptl_get_pid(void)
-{
- lnet_pid_t pid;
-
- pid = LNET_PID_LUSTRE;
- return pid;
-}
-
-static int ptlrpc_ni_init(void)
-{
- int rc;
- lnet_pid_t pid;
-
- pid = ptl_get_pid();
- CDEBUG(D_NET, "My pid is: %x\n", pid);
-
- /* We're not passing any limits yet... */
- rc = LNetNIInit(pid);
- if (rc < 0) {
- CDEBUG(D_NET, "Can't init network interface: %d\n", rc);
- return rc;
- }
-
- /* CAVEAT EMPTOR: how we process portals events is _radically_
- * different depending on...
- */
- /* kernel LNet calls our master callback when there are new event,
- * because we are guaranteed to get every event via callback,
- * so we just set EQ size to 0 to avoid overhead of serializing
- * enqueue/dequeue operations in LNet.
- */
- rc = LNetEQAlloc(0, ptlrpc_master_callback, &ptlrpc_eq_h);
- if (rc == 0)
- return 0;
-
- CERROR("Failed to allocate event queue: %d\n", rc);
- LNetNIFini();
-
- return rc;
-}
-
-int ptlrpc_init_portals(void)
-{
- int rc = ptlrpc_ni_init();
-
- if (rc != 0) {
- CERROR("network initialisation failed\n");
- return rc;
- }
- rc = ptlrpcd_addref();
- if (rc == 0)
- return 0;
-
- CERROR("rpcd initialisation failed\n");
- ptlrpc_ni_fini();
- return rc;
-}
-
-void ptlrpc_exit_portals(void)
-{
- ptlrpcd_decref();
- ptlrpc_ni_fini();
-}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/import.c b/drivers/staging/lustre/lustre/ptlrpc/import.c
deleted file mode 100644
index 1a0f35dfab97..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/import.c
+++ /dev/null
@@ -1,1677 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/ptlrpc/import.c
- *
- * Author: Mike Shaver <shaver@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_RPC
-
-#include <linux/kthread.h>
-#include <obd_support.h>
-#include <lustre_ha.h>
-#include <lustre_net.h>
-#include <lustre_import.h>
-#include <lustre_export.h>
-#include <obd.h>
-#include <obd_cksum.h>
-#include <obd_class.h>
-
-#include "ptlrpc_internal.h"
-
-struct ptlrpc_connect_async_args {
- __u64 pcaa_peer_committed;
- int pcaa_initial_connect;
-};
-
-/**
- * Updates import \a imp current state to provided \a state value
- * Helper function. Must be called under imp_lock.
- */
-static void __import_set_state(struct obd_import *imp,
- enum lustre_imp_state state)
-{
- switch (state) {
- case LUSTRE_IMP_CLOSED:
- case LUSTRE_IMP_NEW:
- case LUSTRE_IMP_DISCON:
- case LUSTRE_IMP_CONNECTING:
- break;
- case LUSTRE_IMP_REPLAY_WAIT:
- imp->imp_replay_state = LUSTRE_IMP_REPLAY_LOCKS;
- break;
- default:
- imp->imp_replay_state = LUSTRE_IMP_REPLAY;
- }
-
- imp->imp_state = state;
- imp->imp_state_hist[imp->imp_state_hist_idx].ish_state = state;
- imp->imp_state_hist[imp->imp_state_hist_idx].ish_time =
- ktime_get_real_seconds();
- imp->imp_state_hist_idx = (imp->imp_state_hist_idx + 1) %
- IMP_STATE_HIST_LEN;
-}
-
-/* A CLOSED import should remain so. */
-#define IMPORT_SET_STATE_NOLOCK(imp, state) \
-do { \
- if (imp->imp_state != LUSTRE_IMP_CLOSED) { \
- CDEBUG(D_HA, "%p %s: changing import state from %s to %s\n", \
- imp, obd2cli_tgt(imp->imp_obd), \
- ptlrpc_import_state_name(imp->imp_state), \
- ptlrpc_import_state_name(state)); \
- __import_set_state(imp, state); \
- } \
-} while (0)
-
-#define IMPORT_SET_STATE(imp, state) \
-do { \
- spin_lock(&imp->imp_lock); \
- IMPORT_SET_STATE_NOLOCK(imp, state); \
- spin_unlock(&imp->imp_lock); \
-} while (0)
-
-static int ptlrpc_connect_interpret(const struct lu_env *env,
- struct ptlrpc_request *request,
- void *data, int rc);
-int ptlrpc_import_recovery_state_machine(struct obd_import *imp);
-
-/* Only this function is allowed to change the import state when it is
- * CLOSED. I would rather refcount the import and free it after
- * disconnection like we do with exports. To do that, the client_obd
- * will need to save the peer info somewhere other than in the import,
- * though.
- */
-int ptlrpc_init_import(struct obd_import *imp)
-{
- spin_lock(&imp->imp_lock);
-
- imp->imp_generation++;
- imp->imp_state = LUSTRE_IMP_NEW;
-
- spin_unlock(&imp->imp_lock);
-
- return 0;
-}
-EXPORT_SYMBOL(ptlrpc_init_import);
-
-#define UUID_STR "_UUID"
-static void deuuidify(char *uuid, const char *prefix, char **uuid_start,
- int *uuid_len)
-{
- *uuid_start = !prefix || strncmp(uuid, prefix, strlen(prefix))
- ? uuid : uuid + strlen(prefix);
-
- *uuid_len = strlen(*uuid_start);
-
- if (*uuid_len < strlen(UUID_STR))
- return;
-
- if (!strncmp(*uuid_start + *uuid_len - strlen(UUID_STR),
- UUID_STR, strlen(UUID_STR)))
- *uuid_len -= strlen(UUID_STR);
-}
-
-/**
- * Returns true if import was FULL, false if import was already not
- * connected.
- * @imp - import to be disconnected
- * @conn_cnt - connection count (epoch) of the request that timed out
- * and caused the disconnection. In some cases, multiple
- * inflight requests can fail to a single target (e.g. OST
- * bulk requests) and if one has already caused a reconnection
- * (increasing the import->conn_cnt) the older failure should
- * not also cause a reconnection. If zero it forces a reconnect.
- */
-int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt)
-{
- int rc = 0;
-
- spin_lock(&imp->imp_lock);
-
- if (imp->imp_state == LUSTRE_IMP_FULL &&
- (conn_cnt == 0 || conn_cnt == imp->imp_conn_cnt)) {
- char *target_start;
- int target_len;
-
- deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
- &target_start, &target_len);
-
- if (imp->imp_replayable) {
- LCONSOLE_WARN("%s: Connection to %.*s (at %s) was lost; in progress operations using this service will wait for recovery to complete\n",
- imp->imp_obd->obd_name, target_len, target_start,
- libcfs_nid2str(imp->imp_connection->c_peer.nid));
- } else {
- LCONSOLE_ERROR_MSG(0x166, "%s: Connection to %.*s (at %s) was lost; in progress operations using this service will fail\n",
- imp->imp_obd->obd_name,
- target_len, target_start,
- libcfs_nid2str(imp->imp_connection->c_peer.nid));
- }
- IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON);
- spin_unlock(&imp->imp_lock);
-
- if (obd_dump_on_timeout)
- libcfs_debug_dumplog();
-
- obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON);
- rc = 1;
- } else {
- spin_unlock(&imp->imp_lock);
- CDEBUG(D_HA, "%s: import %p already %s (conn %u, was %u): %s\n",
- imp->imp_client->cli_name, imp,
- (imp->imp_state == LUSTRE_IMP_FULL &&
- imp->imp_conn_cnt > conn_cnt) ?
- "reconnected" : "not connected", imp->imp_conn_cnt,
- conn_cnt, ptlrpc_import_state_name(imp->imp_state));
- }
-
- return rc;
-}
-
-/*
- * This acts as a barrier; all existing requests are rejected, and
- * no new requests will be accepted until the import is valid again.
- */
-void ptlrpc_deactivate_import(struct obd_import *imp)
-{
- CDEBUG(D_HA, "setting import %s INVALID\n", obd2cli_tgt(imp->imp_obd));
-
- spin_lock(&imp->imp_lock);
- imp->imp_invalid = 1;
- imp->imp_generation++;
- spin_unlock(&imp->imp_lock);
-
- ptlrpc_abort_inflight(imp);
- obd_import_event(imp->imp_obd, imp, IMP_EVENT_INACTIVE);
-}
-EXPORT_SYMBOL(ptlrpc_deactivate_import);
-
-static unsigned int
-ptlrpc_inflight_deadline(struct ptlrpc_request *req, time64_t now)
-{
- long dl;
-
- if (!(((req->rq_phase == RQ_PHASE_RPC) && !req->rq_waiting) ||
- (req->rq_phase == RQ_PHASE_BULK) ||
- (req->rq_phase == RQ_PHASE_NEW)))
- return 0;
-
- if (req->rq_timedout)
- return 0;
-
- if (req->rq_phase == RQ_PHASE_NEW)
- dl = req->rq_sent;
- else
- dl = req->rq_deadline;
-
- if (dl <= now)
- return 0;
-
- return dl - now;
-}
-
-static unsigned int ptlrpc_inflight_timeout(struct obd_import *imp)
-{
- time64_t now = ktime_get_real_seconds();
- struct ptlrpc_request *req, *n;
- unsigned int timeout = 0;
-
- spin_lock(&imp->imp_lock);
- list_for_each_entry_safe(req, n, &imp->imp_sending_list, rq_list)
- timeout = max(ptlrpc_inflight_deadline(req, now), timeout);
-
- spin_unlock(&imp->imp_lock);
- return timeout;
-}
-
-/**
- * This function will invalidate the import, if necessary, then block
- * for all the RPC completions, and finally notify the obd to
- * invalidate its state (ie cancel locks, clear pending requests,
- * etc).
- */
-void ptlrpc_invalidate_import(struct obd_import *imp)
-{
- struct ptlrpc_request *req, *n;
- unsigned int timeout;
- int rc;
-
- atomic_inc(&imp->imp_inval_count);
-
- if (!imp->imp_invalid || imp->imp_obd->obd_no_recov)
- ptlrpc_deactivate_import(imp);
-
- CFS_FAIL_TIMEOUT(OBD_FAIL_MGS_CONNECT_NET, 3 * cfs_fail_val / 2);
- LASSERT(imp->imp_invalid);
-
- /* Wait forever until inflight == 0. We really can't do it another
- * way because in some cases we need to wait for very long reply
- * unlink. We can't do anything before that because there is really
- * no guarantee that some rdma transfer is not in progress right now.
- */
- do {
- /* Calculate max timeout for waiting on rpcs to error
- * out. Use obd_timeout if calculated value is smaller
- * than it.
- */
- if (!OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) {
- timeout = ptlrpc_inflight_timeout(imp);
- timeout += timeout / 3;
-
- if (timeout == 0)
- timeout = obd_timeout;
- } else {
- /* decrease the interval to increase race condition */
- timeout = 1;
- }
-
- CDEBUG(D_RPCTRACE,
- "Sleeping %d sec for inflight to error out\n",
- timeout);
-
- /* Wait for all requests to error out and call completion
- * callbacks. Cap it at obd_timeout -- these should all
- * have been locally cancelled by ptlrpc_abort_inflight.
- */
- rc = wait_event_idle_timeout(imp->imp_recovery_waitq,
- atomic_read(&imp->imp_inflight) == 0,
- obd_timeout * HZ);
-
- if (rc == 0) {
- const char *cli_tgt = obd2cli_tgt(imp->imp_obd);
-
- CERROR("%s: timeout waiting for callback (%d != 0)\n",
- cli_tgt,
- atomic_read(&imp->imp_inflight));
-
- spin_lock(&imp->imp_lock);
- if (atomic_read(&imp->imp_inflight) == 0) {
- int count = atomic_read(&imp->imp_unregistering);
-
- /* We know that "unregistering" rpcs only can
- * survive in sending or delaying lists (they
- * maybe waiting for long reply unlink in
- * sluggish nets). Let's check this. If there
- * is no inflight and unregistering != 0, this
- * is bug.
- */
- LASSERTF(count == 0, "Some RPCs are still unregistering: %d\n",
- count);
-
- /* Let's save one loop as soon as inflight have
- * dropped to zero. No new inflights possible at
- * this point.
- */
- rc = 0;
- } else {
- list_for_each_entry_safe(req, n,
- &imp->imp_sending_list, rq_list) {
- DEBUG_REQ(D_ERROR, req,
- "still on sending list");
- }
- list_for_each_entry_safe(req, n,
- &imp->imp_delayed_list, rq_list) {
- DEBUG_REQ(D_ERROR, req,
- "still on delayed list");
- }
-
- CERROR("%s: Unregistering RPCs found (%d). Network is sluggish? Waiting them to error out.\n",
- cli_tgt,
- atomic_read(&imp->
- imp_unregistering));
- }
- spin_unlock(&imp->imp_lock);
- }
- } while (rc == 0);
-
- /*
- * Let's additionally check that no new rpcs added to import in
- * "invalidate" state.
- */
- LASSERT(atomic_read(&imp->imp_inflight) == 0);
- obd_import_event(imp->imp_obd, imp, IMP_EVENT_INVALIDATE);
- sptlrpc_import_flush_all_ctx(imp);
-
- atomic_dec(&imp->imp_inval_count);
- wake_up_all(&imp->imp_recovery_waitq);
-}
-EXPORT_SYMBOL(ptlrpc_invalidate_import);
-
-/* unset imp_invalid */
-void ptlrpc_activate_import(struct obd_import *imp)
-{
- struct obd_device *obd = imp->imp_obd;
-
- spin_lock(&imp->imp_lock);
- if (imp->imp_deactive != 0) {
- spin_unlock(&imp->imp_lock);
- return;
- }
-
- imp->imp_invalid = 0;
- spin_unlock(&imp->imp_lock);
- obd_import_event(obd, imp, IMP_EVENT_ACTIVE);
-}
-EXPORT_SYMBOL(ptlrpc_activate_import);
-
-void ptlrpc_pinger_force(struct obd_import *imp)
-{
- CDEBUG(D_HA, "%s: waking up pinger s:%s\n", obd2cli_tgt(imp->imp_obd),
- ptlrpc_import_state_name(imp->imp_state));
-
- spin_lock(&imp->imp_lock);
- imp->imp_force_verify = 1;
- spin_unlock(&imp->imp_lock);
-
- if (imp->imp_state != LUSTRE_IMP_CONNECTING)
- ptlrpc_pinger_wake_up();
-}
-EXPORT_SYMBOL(ptlrpc_pinger_force);
-
-void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt)
-{
- LASSERT(!imp->imp_dlm_fake);
-
- if (ptlrpc_set_import_discon(imp, conn_cnt)) {
- if (!imp->imp_replayable) {
- CDEBUG(D_HA, "import %s@%s for %s not replayable, auto-deactivating\n",
- obd2cli_tgt(imp->imp_obd),
- imp->imp_connection->c_remote_uuid.uuid,
- imp->imp_obd->obd_name);
- ptlrpc_deactivate_import(imp);
- }
-
- ptlrpc_pinger_force(imp);
- }
-}
-
-int ptlrpc_reconnect_import(struct obd_import *imp)
-{
- int rc;
-
- ptlrpc_pinger_force(imp);
-
- CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n",
- obd2cli_tgt(imp->imp_obd), obd_timeout);
-
- rc = wait_event_idle_timeout(imp->imp_recovery_waitq,
- !ptlrpc_import_in_recovery(imp),
- obd_timeout * HZ);
- CDEBUG(D_HA, "%s: recovery finished s:%s\n", obd2cli_tgt(imp->imp_obd),
- ptlrpc_import_state_name(imp->imp_state));
- return rc == 0 ? -ETIMEDOUT : 0;
-}
-EXPORT_SYMBOL(ptlrpc_reconnect_import);
-
-/**
- * Connection on import \a imp is changed to another one (if more than one is
- * present). We typically chose connection that we have not tried to connect to
- * the longest
- */
-static int import_select_connection(struct obd_import *imp)
-{
- struct obd_import_conn *imp_conn = NULL, *conn;
- struct obd_export *dlmexp;
- char *target_start;
- int target_len, tried_all = 1;
-
- spin_lock(&imp->imp_lock);
-
- if (list_empty(&imp->imp_conn_list)) {
- CERROR("%s: no connections available\n",
- imp->imp_obd->obd_name);
- spin_unlock(&imp->imp_lock);
- return -EINVAL;
- }
-
- list_for_each_entry(conn, &imp->imp_conn_list, oic_item) {
- CDEBUG(D_HA, "%s: connect to NID %s last attempt %llu\n",
- imp->imp_obd->obd_name,
- libcfs_nid2str(conn->oic_conn->c_peer.nid),
- conn->oic_last_attempt);
-
- /* If we have not tried this connection since
- * the last successful attempt, go with this one
- */
- if ((conn->oic_last_attempt == 0) ||
- time_before_eq64(conn->oic_last_attempt,
- imp->imp_last_success_conn)) {
- imp_conn = conn;
- tried_all = 0;
- break;
- }
-
- /* If all of the connections have already been tried
- * since the last successful connection; just choose the
- * least recently used
- */
- if (!imp_conn)
- imp_conn = conn;
- else if (time_before64(conn->oic_last_attempt,
- imp_conn->oic_last_attempt))
- imp_conn = conn;
- }
-
- /* if not found, simply choose the current one */
- if (!imp_conn || imp->imp_force_reconnect) {
- LASSERT(imp->imp_conn_current);
- imp_conn = imp->imp_conn_current;
- tried_all = 0;
- }
- LASSERT(imp_conn->oic_conn);
-
- /* If we've tried everything, and we're back to the beginning of the
- * list, increase our timeout and try again. It will be reset when
- * we do finally connect. (FIXME: really we should wait for all network
- * state associated with the last connection attempt to drain before
- * trying to reconnect on it.)
- */
- if (tried_all && (imp->imp_conn_list.next == &imp_conn->oic_item)) {
- struct adaptive_timeout *at = &imp->imp_at.iat_net_latency;
-
- if (at_get(at) < CONNECTION_SWITCH_MAX) {
- at_measured(at, at_get(at) + CONNECTION_SWITCH_INC);
- if (at_get(at) > CONNECTION_SWITCH_MAX)
- at_reset(at, CONNECTION_SWITCH_MAX);
- }
- LASSERT(imp_conn->oic_last_attempt);
- CDEBUG(D_HA, "%s: tried all connections, increasing latency to %ds\n",
- imp->imp_obd->obd_name, at_get(at));
- }
-
- imp_conn->oic_last_attempt = get_jiffies_64();
-
- /* switch connection, don't mind if it's same as the current one */
- ptlrpc_connection_put(imp->imp_connection);
- imp->imp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
-
- dlmexp = class_conn2export(&imp->imp_dlm_handle);
- ptlrpc_connection_put(dlmexp->exp_connection);
- dlmexp->exp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
- class_export_put(dlmexp);
-
- if (imp->imp_conn_current != imp_conn) {
- if (imp->imp_conn_current) {
- deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
- &target_start, &target_len);
-
- CDEBUG(D_HA, "%s: Connection changing to %.*s (at %s)\n",
- imp->imp_obd->obd_name,
- target_len, target_start,
- libcfs_nid2str(imp_conn->oic_conn->c_peer.nid));
- }
-
- imp->imp_conn_current = imp_conn;
- }
-
- CDEBUG(D_HA, "%s: import %p using connection %s/%s\n",
- imp->imp_obd->obd_name, imp, imp_conn->oic_uuid.uuid,
- libcfs_nid2str(imp_conn->oic_conn->c_peer.nid));
-
- spin_unlock(&imp->imp_lock);
-
- return 0;
-}
-
-/*
- * must be called under imp_lock
- */
-static int ptlrpc_first_transno(struct obd_import *imp, __u64 *transno)
-{
- struct ptlrpc_request *req;
-
- /* The requests in committed_list always have smaller transnos than
- * the requests in replay_list
- */
- if (!list_empty(&imp->imp_committed_list)) {
- req = list_first_entry(&imp->imp_committed_list,
- struct ptlrpc_request, rq_replay_list);
- *transno = req->rq_transno;
- if (req->rq_transno == 0) {
- DEBUG_REQ(D_ERROR, req,
- "zero transno in committed_list");
- LBUG();
- }
- return 1;
- }
- if (!list_empty(&imp->imp_replay_list)) {
- req = list_first_entry(&imp->imp_replay_list,
- struct ptlrpc_request, rq_replay_list);
- *transno = req->rq_transno;
- if (req->rq_transno == 0) {
- DEBUG_REQ(D_ERROR, req, "zero transno in replay_list");
- LBUG();
- }
- return 1;
- }
- return 0;
-}
-
-/**
- * Attempt to (re)connect import \a imp. This includes all preparations,
- * initializing CONNECT RPC request and passing it to ptlrpcd for
- * actual sending.
- * Returns 0 on success or error code.
- */
-int ptlrpc_connect_import(struct obd_import *imp)
-{
- struct obd_device *obd = imp->imp_obd;
- int initial_connect = 0;
- int set_transno = 0;
- __u64 committed_before_reconnect = 0;
- struct ptlrpc_request *request;
- char *bufs[] = { NULL,
- obd2cli_tgt(imp->imp_obd),
- obd->obd_uuid.uuid,
- (char *)&imp->imp_dlm_handle,
- (char *)&imp->imp_connect_data };
- struct ptlrpc_connect_async_args *aa;
- int rc;
-
- spin_lock(&imp->imp_lock);
- if (imp->imp_state == LUSTRE_IMP_CLOSED) {
- spin_unlock(&imp->imp_lock);
- CERROR("can't connect to a closed import\n");
- return -EINVAL;
- } else if (imp->imp_state == LUSTRE_IMP_FULL) {
- spin_unlock(&imp->imp_lock);
- CERROR("already connected\n");
- return 0;
- } else if (imp->imp_state == LUSTRE_IMP_CONNECTING ||
- imp->imp_connected) {
- spin_unlock(&imp->imp_lock);
- CERROR("already connecting\n");
- return -EALREADY;
- }
-
- IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CONNECTING);
-
- imp->imp_conn_cnt++;
- imp->imp_resend_replay = 0;
-
- if (!lustre_handle_is_used(&imp->imp_remote_handle))
- initial_connect = 1;
- else
- committed_before_reconnect = imp->imp_peer_committed_transno;
-
- set_transno = ptlrpc_first_transno(imp,
- &imp->imp_connect_data.ocd_transno);
- spin_unlock(&imp->imp_lock);
-
- rc = import_select_connection(imp);
- if (rc)
- goto out;
-
- rc = sptlrpc_import_sec_adapt(imp, NULL, NULL);
- if (rc)
- goto out;
-
- /* Reset connect flags to the originally requested flags, in case
- * the server is updated on-the-fly we will get the new features.
- */
- imp->imp_connect_data.ocd_connect_flags = imp->imp_connect_flags_orig;
- /* Reset ocd_version each time so the server knows the exact versions */
- imp->imp_connect_data.ocd_version = LUSTRE_VERSION_CODE;
- imp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
- imp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18;
-
- rc = obd_reconnect(NULL, imp->imp_obd->obd_self_export, obd,
- &obd->obd_uuid, &imp->imp_connect_data, NULL);
- if (rc)
- goto out;
-
- request = ptlrpc_request_alloc(imp, &RQF_MDS_CONNECT);
- if (!request) {
- rc = -ENOMEM;
- goto out;
- }
-
- rc = ptlrpc_request_bufs_pack(request, LUSTRE_OBD_VERSION,
- imp->imp_connect_op, bufs, NULL);
- if (rc) {
- ptlrpc_request_free(request);
- goto out;
- }
-
- /* Report the rpc service time to the server so that it knows how long
- * to wait for clients to join recovery
- */
- lustre_msg_set_service_time(request->rq_reqmsg,
- at_timeout2est(request->rq_timeout));
-
- /* The amount of time we give the server to process the connect req.
- * import_select_connection will increase the net latency on
- * repeated reconnect attempts to cover slow networks.
- * We override/ignore the server rpc completion estimate here,
- * which may be large if this is a reconnect attempt
- */
- request->rq_timeout = INITIAL_CONNECT_TIMEOUT;
- lustre_msg_set_timeout(request->rq_reqmsg, request->rq_timeout);
-
- request->rq_no_resend = 1;
- request->rq_no_delay = 1;
- request->rq_send_state = LUSTRE_IMP_CONNECTING;
- /* Allow a slightly larger reply for future growth compatibility */
- req_capsule_set_size(&request->rq_pill, &RMF_CONNECT_DATA, RCL_SERVER,
- sizeof(struct obd_connect_data) +
- 16 * sizeof(__u64));
- ptlrpc_request_set_replen(request);
- request->rq_interpret_reply = ptlrpc_connect_interpret;
-
- BUILD_BUG_ON(sizeof(*aa) > sizeof(request->rq_async_args));
- aa = ptlrpc_req_async_args(request);
- memset(aa, 0, sizeof(*aa));
-
- aa->pcaa_peer_committed = committed_before_reconnect;
- aa->pcaa_initial_connect = initial_connect;
-
- if (aa->pcaa_initial_connect) {
- spin_lock(&imp->imp_lock);
- imp->imp_replayable = 1;
- spin_unlock(&imp->imp_lock);
- lustre_msg_add_op_flags(request->rq_reqmsg,
- MSG_CONNECT_INITIAL);
- }
-
- if (set_transno)
- lustre_msg_add_op_flags(request->rq_reqmsg,
- MSG_CONNECT_TRANSNO);
-
- DEBUG_REQ(D_RPCTRACE, request, "(re)connect request (timeout %d)",
- request->rq_timeout);
- ptlrpcd_add_req(request);
- rc = 0;
-out:
- if (rc != 0)
- IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON);
-
- return rc;
-}
-EXPORT_SYMBOL(ptlrpc_connect_import);
-
-static void ptlrpc_maybe_ping_import_soon(struct obd_import *imp)
-{
- int force_verify;
-
- spin_lock(&imp->imp_lock);
- force_verify = imp->imp_force_verify != 0;
- spin_unlock(&imp->imp_lock);
-
- if (force_verify)
- ptlrpc_pinger_wake_up();
-}
-
-static int ptlrpc_busy_reconnect(int rc)
-{
- return (rc == -EBUSY) || (rc == -EAGAIN);
-}
-
-static int ptlrpc_connect_set_flags(struct obd_import *imp,
- struct obd_connect_data *ocd,
- u64 old_connect_flags,
- struct obd_export *exp, int init_connect)
-{
- struct client_obd *cli = &imp->imp_obd->u.cli;
- static bool warned;
-
- if ((imp->imp_connect_flags_orig & OBD_CONNECT_IBITS) &&
- !(ocd->ocd_connect_flags & OBD_CONNECT_IBITS)) {
- LCONSOLE_WARN("%s: MDS %s does not support ibits lock, either very old or invalid: requested %#llx, replied %#llx\n",
- imp->imp_obd->obd_name,
- imp->imp_connection->c_remote_uuid.uuid,
- imp->imp_connect_flags_orig,
- ocd->ocd_connect_flags);
- return -EPROTO;
- }
-
- spin_lock(&imp->imp_lock);
- list_del(&imp->imp_conn_current->oic_item);
- list_add(&imp->imp_conn_current->oic_item, &imp->imp_conn_list);
- imp->imp_last_success_conn = imp->imp_conn_current->oic_last_attempt;
-
- spin_unlock(&imp->imp_lock);
-
- if (!warned && (ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
- (ocd->ocd_version > LUSTRE_VERSION_CODE +
- LUSTRE_VERSION_OFFSET_WARN ||
- ocd->ocd_version < LUSTRE_VERSION_CODE -
- LUSTRE_VERSION_OFFSET_WARN)) {
- /*
- * Sigh, some compilers do not like #ifdef in the middle
- * of macro arguments
- */
- const char *older = "older than client. Consider upgrading server";
- const char *newer = "newer than client. Consider recompiling application";
-
- LCONSOLE_WARN("Server %s version (%d.%d.%d.%d) is much %s (%s)\n",
- obd2cli_tgt(imp->imp_obd),
- OBD_OCD_VERSION_MAJOR(ocd->ocd_version),
- OBD_OCD_VERSION_MINOR(ocd->ocd_version),
- OBD_OCD_VERSION_PATCH(ocd->ocd_version),
- OBD_OCD_VERSION_FIX(ocd->ocd_version),
- ocd->ocd_version > LUSTRE_VERSION_CODE ?
- newer : older, LUSTRE_VERSION_STRING);
- warned = true;
- }
-
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
- /*
- * Check if server has LU-1252 fix applied to not always swab
- * the IR MNE entries. Do this only once per connection. This
- * fixup is version-limited, because we don't want to carry the
- * OBD_CONNECT_MNE_SWAB flag around forever, just so long as we
- * need interop with unpatched 2.2 servers. For newer servers,
- * the client will do MNE swabbing only as needed. LU-1644
- */
- if (unlikely((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
- !(ocd->ocd_connect_flags & OBD_CONNECT_MNE_SWAB) &&
- OBD_OCD_VERSION_MAJOR(ocd->ocd_version) == 2 &&
- OBD_OCD_VERSION_MINOR(ocd->ocd_version) == 2 &&
- OBD_OCD_VERSION_PATCH(ocd->ocd_version) < 55 &&
- !strcmp(imp->imp_obd->obd_type->typ_name,
- LUSTRE_MGC_NAME)))
- imp->imp_need_mne_swab = 1;
- else /* clear if server was upgraded since last connect */
- imp->imp_need_mne_swab = 0;
-#endif
-
- if (ocd->ocd_connect_flags & OBD_CONNECT_CKSUM) {
- /*
- * We sent to the server ocd_cksum_types with bits set
- * for algorithms we understand. The server masked off
- * the checksum types it doesn't support
- */
- if (!(ocd->ocd_cksum_types & cksum_types_supported_client())) {
- LCONSOLE_WARN("The negotiation of the checksum algorithm to use with server %s failed (%x/%x), disabling checksums\n",
- obd2cli_tgt(imp->imp_obd),
- ocd->ocd_cksum_types,
- cksum_types_supported_client());
- cli->cl_checksum = 0;
- cli->cl_supp_cksum_types = OBD_CKSUM_ADLER;
- } else {
- cli->cl_supp_cksum_types = ocd->ocd_cksum_types;
- }
- } else {
- /*
- * The server does not support OBD_CONNECT_CKSUM.
- * Enforce ADLER for backward compatibility
- */
- cli->cl_supp_cksum_types = OBD_CKSUM_ADLER;
- }
- cli->cl_cksum_type = cksum_type_select(cli->cl_supp_cksum_types);
-
- if (ocd->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
- cli->cl_max_pages_per_rpc =
- min(ocd->ocd_brw_size >> PAGE_SHIFT,
- cli->cl_max_pages_per_rpc);
- else if (imp->imp_connect_op == MDS_CONNECT ||
- imp->imp_connect_op == MGS_CONNECT)
- cli->cl_max_pages_per_rpc = 1;
-
- LASSERT((cli->cl_max_pages_per_rpc <= PTLRPC_MAX_BRW_PAGES) &&
- (cli->cl_max_pages_per_rpc > 0));
-
- client_adjust_max_dirty(cli);
-
- /*
- * Update client max modify RPCs in flight with value returned
- * by the server
- */
- if (ocd->ocd_connect_flags & OBD_CONNECT_MULTIMODRPCS)
- cli->cl_max_mod_rpcs_in_flight = min(
- cli->cl_max_mod_rpcs_in_flight,
- ocd->ocd_maxmodrpcs);
- else
- cli->cl_max_mod_rpcs_in_flight = 1;
-
- /*
- * Reset ns_connect_flags only for initial connect. It might be
- * changed in while using FS and if we reset it in reconnect
- * this leads to losing user settings done before such as
- * disable lru_resize, etc.
- */
- if (old_connect_flags != exp_connect_flags(exp) || init_connect) {
- CDEBUG(D_HA, "%s: Resetting ns_connect_flags to server flags: %#llx\n",
- imp->imp_obd->obd_name, ocd->ocd_connect_flags);
- imp->imp_obd->obd_namespace->ns_connect_flags =
- ocd->ocd_connect_flags;
- imp->imp_obd->obd_namespace->ns_orig_connect_flags =
- ocd->ocd_connect_flags;
- }
-
- if (ocd->ocd_connect_flags & OBD_CONNECT_AT)
- /*
- * We need a per-message support flag, because
- * a. we don't know if the incoming connect reply
- * supports AT or not (in reply_in_callback)
- * until we unpack it.
- * b. failovered server means export and flags are gone
- * (in ptlrpc_send_reply).
- * Can only be set when we know AT is supported at
- * both ends
- */
- imp->imp_msghdr_flags |= MSGHDR_AT_SUPPORT;
- else
- imp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
-
- imp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18;
-
- return 0;
-}
-
-/**
- * Add all replay requests back to unreplied list before start replay,
- * so that we can make sure the known replied XID is always increased
- * only even if when replaying requests.
- */
-static void ptlrpc_prepare_replay(struct obd_import *imp)
-{
- struct ptlrpc_request *req;
-
- if (imp->imp_state != LUSTRE_IMP_REPLAY ||
- imp->imp_resend_replay)
- return;
-
- /*
- * If the server was restart during repaly, the requests may
- * have been added to the unreplied list in former replay.
- */
- spin_lock(&imp->imp_lock);
-
- list_for_each_entry(req, &imp->imp_committed_list, rq_replay_list) {
- if (list_empty(&req->rq_unreplied_list))
- ptlrpc_add_unreplied(req);
- }
-
- list_for_each_entry(req, &imp->imp_replay_list, rq_replay_list) {
- if (list_empty(&req->rq_unreplied_list))
- ptlrpc_add_unreplied(req);
- }
-
- imp->imp_known_replied_xid = ptlrpc_known_replied_xid(imp);
- spin_unlock(&imp->imp_lock);
-}
-
-/**
- * interpret_reply callback for connect RPCs.
- * Looks into returned status of connect operation and decides
- * what to do with the import - i.e enter recovery, promote it to
- * full state for normal operations of disconnect it due to an error.
- */
-static int ptlrpc_connect_interpret(const struct lu_env *env,
- struct ptlrpc_request *request,
- void *data, int rc)
-{
- struct ptlrpc_connect_async_args *aa = data;
- struct obd_import *imp = request->rq_import;
- struct lustre_handle old_hdl;
- __u64 old_connect_flags;
- int msg_flags;
- struct obd_connect_data *ocd;
- struct obd_export *exp;
- int ret;
-
- spin_lock(&imp->imp_lock);
- if (imp->imp_state == LUSTRE_IMP_CLOSED) {
- imp->imp_connect_tried = 1;
- spin_unlock(&imp->imp_lock);
- return 0;
- }
-
- if (rc) {
- /* if this reconnect to busy export - not need select new target
- * for connecting
- */
- imp->imp_force_reconnect = ptlrpc_busy_reconnect(rc);
- spin_unlock(&imp->imp_lock);
- ptlrpc_maybe_ping_import_soon(imp);
- goto out;
- }
-
- /*
- * LU-7558: indicate that we are interpretting connect reply,
- * pltrpc_connect_import() will not try to reconnect until
- * interpret will finish.
- */
- imp->imp_connected = 1;
- spin_unlock(&imp->imp_lock);
-
- LASSERT(imp->imp_conn_current);
-
- msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
-
- ret = req_capsule_get_size(&request->rq_pill, &RMF_CONNECT_DATA,
- RCL_SERVER);
- /* server replied obd_connect_data is always bigger */
- ocd = req_capsule_server_sized_get(&request->rq_pill,
- &RMF_CONNECT_DATA, ret);
-
- if (!ocd) {
- CERROR("%s: no connect data from server\n",
- imp->imp_obd->obd_name);
- rc = -EPROTO;
- goto out;
- }
-
- spin_lock(&imp->imp_lock);
-
- /* All imports are pingable */
- imp->imp_pingable = 1;
- imp->imp_force_reconnect = 0;
- imp->imp_force_verify = 0;
-
- imp->imp_connect_data = *ocd;
-
- CDEBUG(D_HA, "%s: connect to target with instance %u\n",
- imp->imp_obd->obd_name, ocd->ocd_instance);
- exp = class_conn2export(&imp->imp_dlm_handle);
-
- spin_unlock(&imp->imp_lock);
-
- if (!exp) {
- /* This could happen if export is cleaned during the
- * connect attempt
- */
- CERROR("%s: missing export after connect\n",
- imp->imp_obd->obd_name);
- rc = -ENODEV;
- goto out;
- }
-
- /* check that server granted subset of flags we asked for. */
- if ((ocd->ocd_connect_flags & imp->imp_connect_flags_orig) !=
- ocd->ocd_connect_flags) {
- CERROR("%s: Server didn't grant the asked for subset of flags: asked=%#llx granted=%#llx\n",
- imp->imp_obd->obd_name, imp->imp_connect_flags_orig,
- ocd->ocd_connect_flags);
- rc = -EPROTO;
- goto out;
- }
-
- old_connect_flags = exp_connect_flags(exp);
- exp->exp_connect_data = *ocd;
- imp->imp_obd->obd_self_export->exp_connect_data = *ocd;
-
- /*
- * The net statistics after (re-)connect is not valid anymore,
- * because may reflect other routing, etc.
- */
- at_init(&imp->imp_at.iat_net_latency, 0, 0);
- ptlrpc_at_adj_net_latency(request,
- lustre_msg_get_service_time(request->rq_repmsg));
-
- /* Import flags should be updated before waking import at FULL state */
- rc = ptlrpc_connect_set_flags(imp, ocd, old_connect_flags, exp,
- aa->pcaa_initial_connect);
- class_export_put(exp);
- if (rc)
- goto out;
-
- obd_import_event(imp->imp_obd, imp, IMP_EVENT_OCD);
-
- if (aa->pcaa_initial_connect) {
- spin_lock(&imp->imp_lock);
- if (msg_flags & MSG_CONNECT_REPLAYABLE) {
- imp->imp_replayable = 1;
- spin_unlock(&imp->imp_lock);
- CDEBUG(D_HA, "connected to replayable target: %s\n",
- obd2cli_tgt(imp->imp_obd));
- } else {
- imp->imp_replayable = 0;
- spin_unlock(&imp->imp_lock);
- }
-
- /* if applies, adjust the imp->imp_msg_magic here
- * according to reply flags
- */
-
- imp->imp_remote_handle =
- *lustre_msg_get_handle(request->rq_repmsg);
-
- /* Initial connects are allowed for clients with non-random
- * uuids when servers are in recovery. Simply signal the
- * servers replay is complete and wait in REPLAY_WAIT.
- */
- if (msg_flags & MSG_CONNECT_RECOVERING) {
- CDEBUG(D_HA, "connect to %s during recovery\n",
- obd2cli_tgt(imp->imp_obd));
- IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_LOCKS);
- } else {
- IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
- ptlrpc_activate_import(imp);
- }
-
- rc = 0;
- goto finish;
- }
-
- /* Determine what recovery state to move the import to. */
- if (msg_flags & MSG_CONNECT_RECONNECT) {
- memset(&old_hdl, 0, sizeof(old_hdl));
- if (!memcmp(&old_hdl, lustre_msg_get_handle(request->rq_repmsg),
- sizeof(old_hdl))) {
- LCONSOLE_WARN("Reconnect to %s (at @%s) failed due bad handle %#llx\n",
- obd2cli_tgt(imp->imp_obd),
- imp->imp_connection->c_remote_uuid.uuid,
- imp->imp_dlm_handle.cookie);
- rc = -ENOTCONN;
- goto out;
- }
-
- if (memcmp(&imp->imp_remote_handle,
- lustre_msg_get_handle(request->rq_repmsg),
- sizeof(imp->imp_remote_handle))) {
- int level = msg_flags & MSG_CONNECT_RECOVERING ?
- D_HA : D_WARNING;
-
- /* Bug 16611/14775: if server handle have changed,
- * that means some sort of disconnection happened.
- * If the server is not in recovery, that also means it
- * already erased all of our state because of previous
- * eviction. If it is in recovery - we are safe to
- * participate since we can reestablish all of our state
- * with server again
- */
- if ((msg_flags & MSG_CONNECT_RECOVERING)) {
- CDEBUG(level, "%s@%s changed server handle from %#llx to %#llx but is still in recovery\n",
- obd2cli_tgt(imp->imp_obd),
- imp->imp_connection->c_remote_uuid.uuid,
- imp->imp_remote_handle.cookie,
- lustre_msg_get_handle(
- request->rq_repmsg)->cookie);
- } else {
- LCONSOLE_WARN("Evicted from %s (at %s) after server handle changed from %#llx to %#llx\n",
- obd2cli_tgt(imp->imp_obd),
- imp->imp_connection-> \
- c_remote_uuid.uuid,
- imp->imp_remote_handle.cookie,
- lustre_msg_get_handle(
- request->rq_repmsg)->cookie);
- }
-
- imp->imp_remote_handle =
- *lustre_msg_get_handle(request->rq_repmsg);
-
- if (!(msg_flags & MSG_CONNECT_RECOVERING)) {
- IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
- rc = 0;
- goto finish;
- }
-
- } else {
- CDEBUG(D_HA, "reconnected to %s@%s after partition\n",
- obd2cli_tgt(imp->imp_obd),
- imp->imp_connection->c_remote_uuid.uuid);
- }
-
- if (imp->imp_invalid) {
- CDEBUG(D_HA, "%s: reconnected but import is invalid; marking evicted\n",
- imp->imp_obd->obd_name);
- IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
- } else if (msg_flags & MSG_CONNECT_RECOVERING) {
- CDEBUG(D_HA, "%s: reconnected to %s during replay\n",
- imp->imp_obd->obd_name,
- obd2cli_tgt(imp->imp_obd));
-
- spin_lock(&imp->imp_lock);
- imp->imp_resend_replay = 1;
- spin_unlock(&imp->imp_lock);
-
- IMPORT_SET_STATE(imp, imp->imp_replay_state);
- } else {
- IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
- }
- } else if ((msg_flags & MSG_CONNECT_RECOVERING) && !imp->imp_invalid) {
- LASSERT(imp->imp_replayable);
- imp->imp_remote_handle =
- *lustre_msg_get_handle(request->rq_repmsg);
- imp->imp_last_replay_transno = 0;
- imp->imp_replay_cursor = &imp->imp_committed_list;
- IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
- } else {
- DEBUG_REQ(D_HA, request, "%s: evicting (reconnect/recover flags not set: %x)",
- imp->imp_obd->obd_name, msg_flags);
- imp->imp_remote_handle =
- *lustre_msg_get_handle(request->rq_repmsg);
- IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
- }
-
- /* Sanity checks for a reconnected import. */
- if (!(imp->imp_replayable) != !(msg_flags & MSG_CONNECT_REPLAYABLE))
- CERROR("imp_replayable flag does not match server after reconnect. We should LBUG right here.\n");
-
- if (lustre_msg_get_last_committed(request->rq_repmsg) > 0 &&
- lustre_msg_get_last_committed(request->rq_repmsg) <
- aa->pcaa_peer_committed)
- CERROR("%s went back in time (transno %lld was previously committed, server now claims %lld)! See https://bugzilla.lustre.org/show_bug.cgi?id=9646\n",
- obd2cli_tgt(imp->imp_obd), aa->pcaa_peer_committed,
- lustre_msg_get_last_committed(request->rq_repmsg));
-
-finish:
- ptlrpc_prepare_replay(imp);
- rc = ptlrpc_import_recovery_state_machine(imp);
- if (rc == -ENOTCONN) {
- CDEBUG(D_HA, "evicted/aborted by %s@%s during recovery; invalidating and reconnecting\n",
- obd2cli_tgt(imp->imp_obd),
- imp->imp_connection->c_remote_uuid.uuid);
- ptlrpc_connect_import(imp);
- spin_lock(&imp->imp_lock);
- imp->imp_connected = 0;
- imp->imp_connect_tried = 1;
- spin_unlock(&imp->imp_lock);
- return 0;
- }
-
-out:
- spin_lock(&imp->imp_lock);
- imp->imp_connected = 0;
- imp->imp_connect_tried = 1;
- spin_unlock(&imp->imp_lock);
-
- if (rc != 0) {
- IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON);
- if (rc == -EACCES) {
- /*
- * Give up trying to reconnect
- * EACCES means client has no permission for connection
- */
- imp->imp_obd->obd_no_recov = 1;
- ptlrpc_deactivate_import(imp);
- }
-
- if (rc == -EPROTO) {
- struct obd_connect_data *ocd;
-
- /* reply message might not be ready */
- if (!request->rq_repmsg)
- return -EPROTO;
-
- ocd = req_capsule_server_get(&request->rq_pill,
- &RMF_CONNECT_DATA);
- if (ocd &&
- (ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
- (ocd->ocd_version != LUSTRE_VERSION_CODE)) {
- /*
- * Actually servers are only supposed to refuse
- * connection from liblustre clients, so we
- * should never see this from VFS context
- */
- LCONSOLE_ERROR_MSG(0x16a, "Server %s version (%d.%d.%d.%d) refused connection from this client with an incompatible version (%s). Client must be recompiled\n",
- obd2cli_tgt(imp->imp_obd),
- OBD_OCD_VERSION_MAJOR(ocd->ocd_version),
- OBD_OCD_VERSION_MINOR(ocd->ocd_version),
- OBD_OCD_VERSION_PATCH(ocd->ocd_version),
- OBD_OCD_VERSION_FIX(ocd->ocd_version),
- LUSTRE_VERSION_STRING);
- ptlrpc_deactivate_import(imp);
- IMPORT_SET_STATE(imp, LUSTRE_IMP_CLOSED);
- }
- return -EPROTO;
- }
-
- ptlrpc_maybe_ping_import_soon(imp);
-
- CDEBUG(D_HA, "recovery of %s on %s failed (%d)\n",
- obd2cli_tgt(imp->imp_obd),
- (char *)imp->imp_connection->c_remote_uuid.uuid, rc);
- }
-
- wake_up_all(&imp->imp_recovery_waitq);
- return rc;
-}
-
-/**
- * interpret callback for "completed replay" RPCs.
- * \see signal_completed_replay
- */
-static int completed_replay_interpret(const struct lu_env *env,
- struct ptlrpc_request *req,
- void *data, int rc)
-{
- atomic_dec(&req->rq_import->imp_replay_inflight);
- if (req->rq_status == 0 &&
- !req->rq_import->imp_vbr_failed) {
- ptlrpc_import_recovery_state_machine(req->rq_import);
- } else {
- if (req->rq_import->imp_vbr_failed) {
- CDEBUG(D_WARNING,
- "%s: version recovery fails, reconnecting\n",
- req->rq_import->imp_obd->obd_name);
- } else {
- CDEBUG(D_HA, "%s: LAST_REPLAY message error: %d, reconnecting\n",
- req->rq_import->imp_obd->obd_name,
- req->rq_status);
- }
- ptlrpc_connect_import(req->rq_import);
- }
-
- return 0;
-}
-
-/**
- * Let server know that we have no requests to replay anymore.
- * Achieved by just sending a PING request
- */
-static int signal_completed_replay(struct obd_import *imp)
-{
- struct ptlrpc_request *req;
-
- if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_FINISH_REPLAY)))
- return 0;
-
- LASSERT(atomic_read(&imp->imp_replay_inflight) == 0);
- atomic_inc(&imp->imp_replay_inflight);
-
- req = ptlrpc_request_alloc_pack(imp, &RQF_OBD_PING, LUSTRE_OBD_VERSION,
- OBD_PING);
- if (!req) {
- atomic_dec(&imp->imp_replay_inflight);
- return -ENOMEM;
- }
-
- ptlrpc_request_set_replen(req);
- req->rq_send_state = LUSTRE_IMP_REPLAY_WAIT;
- lustre_msg_add_flags(req->rq_reqmsg,
- MSG_LOCK_REPLAY_DONE | MSG_REQ_REPLAY_DONE);
- if (AT_OFF)
- req->rq_timeout *= 3;
- req->rq_interpret_reply = completed_replay_interpret;
-
- ptlrpcd_add_req(req);
- return 0;
-}
-
-/**
- * In kernel code all import invalidation happens in its own
- * separate thread, so that whatever application happened to encounter
- * a problem could still be killed or otherwise continue
- */
-static int ptlrpc_invalidate_import_thread(void *data)
-{
- struct obd_import *imp = data;
-
- unshare_fs_struct();
-
- CDEBUG(D_HA, "thread invalidate import %s to %s@%s\n",
- imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
- imp->imp_connection->c_remote_uuid.uuid);
-
- ptlrpc_invalidate_import(imp);
-
- if (obd_dump_on_eviction) {
- CERROR("dump the log upon eviction\n");
- libcfs_debug_dumplog();
- }
-
- IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
- ptlrpc_import_recovery_state_machine(imp);
-
- class_import_put(imp);
- return 0;
-}
-
-/**
- * This is the state machine for client-side recovery on import.
- *
- * Typically we have two possibly paths. If we came to server and it is not
- * in recovery, we just enter IMP_EVICTED state, invalidate our import
- * state and reconnect from scratch.
- * If we came to server that is in recovery, we enter IMP_REPLAY import state.
- * We go through our list of requests to replay and send them to server one by
- * one.
- * After sending all request from the list we change import state to
- * IMP_REPLAY_LOCKS and re-request all the locks we believe we have from server
- * and also all the locks we don't yet have and wait for server to grant us.
- * After that we send a special "replay completed" request and change import
- * state to IMP_REPLAY_WAIT.
- * Upon receiving reply to that "replay completed" RPC we enter IMP_RECOVER
- * state and resend all requests from sending list.
- * After that we promote import to FULL state and send all delayed requests
- * and import is fully operational after that.
- *
- */
-int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
-{
- int rc = 0;
- int inflight;
- char *target_start;
- int target_len;
-
- if (imp->imp_state == LUSTRE_IMP_EVICTED) {
- deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
- &target_start, &target_len);
- /* Don't care about MGC eviction */
- if (strcmp(imp->imp_obd->obd_type->typ_name,
- LUSTRE_MGC_NAME) != 0) {
- LCONSOLE_ERROR_MSG(0x167, "%s: This client was evicted by %.*s; in progress operations using this service will fail.\n",
- imp->imp_obd->obd_name, target_len,
- target_start);
- }
- CDEBUG(D_HA, "evicted from %s@%s; invalidating\n",
- obd2cli_tgt(imp->imp_obd),
- imp->imp_connection->c_remote_uuid.uuid);
- /* reset vbr_failed flag upon eviction */
- spin_lock(&imp->imp_lock);
- imp->imp_vbr_failed = 0;
- spin_unlock(&imp->imp_lock);
-
- {
- struct task_struct *task;
- /* bug 17802: XXX client_disconnect_export vs connect request
- * race. if client is evicted at this time, we start
- * invalidate thread without reference to import and import can
- * be freed at same time.
- */
- class_import_get(imp);
- task = kthread_run(ptlrpc_invalidate_import_thread, imp,
- "ll_imp_inval");
- if (IS_ERR(task)) {
- class_import_put(imp);
- CERROR("error starting invalidate thread: %d\n", rc);
- rc = PTR_ERR(task);
- } else {
- rc = 0;
- }
- return rc;
- }
- }
-
- if (imp->imp_state == LUSTRE_IMP_REPLAY) {
- CDEBUG(D_HA, "replay requested by %s\n",
- obd2cli_tgt(imp->imp_obd));
- rc = ptlrpc_replay_next(imp, &inflight);
- if (inflight == 0 &&
- atomic_read(&imp->imp_replay_inflight) == 0) {
- IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_LOCKS);
- rc = ldlm_replay_locks(imp);
- if (rc)
- goto out;
- }
- rc = 0;
- }
-
- if (imp->imp_state == LUSTRE_IMP_REPLAY_LOCKS)
- if (atomic_read(&imp->imp_replay_inflight) == 0) {
- IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_WAIT);
- rc = signal_completed_replay(imp);
- if (rc)
- goto out;
- }
-
- if (imp->imp_state == LUSTRE_IMP_REPLAY_WAIT)
- if (atomic_read(&imp->imp_replay_inflight) == 0)
- IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
-
- if (imp->imp_state == LUSTRE_IMP_RECOVER) {
- CDEBUG(D_HA, "reconnected to %s@%s\n",
- obd2cli_tgt(imp->imp_obd),
- imp->imp_connection->c_remote_uuid.uuid);
-
- rc = ptlrpc_resend(imp);
- if (rc)
- goto out;
- IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
- ptlrpc_activate_import(imp);
-
- deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
- &target_start, &target_len);
- LCONSOLE_INFO("%s: Connection restored to %.*s (at %s)\n",
- imp->imp_obd->obd_name,
- target_len, target_start,
- libcfs_nid2str(imp->imp_connection->c_peer.nid));
- }
-
- if (imp->imp_state == LUSTRE_IMP_FULL) {
- wake_up_all(&imp->imp_recovery_waitq);
- ptlrpc_wake_delayed(imp);
- }
-
-out:
- return rc;
-}
-
-int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
-{
- struct ptlrpc_request *req;
- int rq_opc, rc = 0;
-
- if (imp->imp_obd->obd_force)
- goto set_state;
-
- switch (imp->imp_connect_op) {
- case OST_CONNECT:
- rq_opc = OST_DISCONNECT;
- break;
- case MDS_CONNECT:
- rq_opc = MDS_DISCONNECT;
- break;
- case MGS_CONNECT:
- rq_opc = MGS_DISCONNECT;
- break;
- default:
- rc = -EINVAL;
- CERROR("%s: don't know how to disconnect from %s (connect_op %d): rc = %d\n",
- imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
- imp->imp_connect_op, rc);
- return rc;
- }
-
- if (ptlrpc_import_in_recovery(imp)) {
- unsigned long timeout;
-
- if (AT_OFF) {
- if (imp->imp_server_timeout)
- timeout = obd_timeout * HZ / 2;
- else
- timeout = obd_timeout * HZ;
- } else {
- int idx = import_at_get_index(imp,
- imp->imp_client->cli_request_portal);
- timeout = at_get(&imp->imp_at.iat_service_estimate[idx]) * HZ;
- }
-
- if (wait_event_idle_timeout(imp->imp_recovery_waitq,
- !ptlrpc_import_in_recovery(imp),
- max(timeout, 1UL)) == 0)
- l_wait_event_abortable(
- imp->imp_recovery_waitq,
- !ptlrpc_import_in_recovery(imp));
- }
-
- spin_lock(&imp->imp_lock);
- if (imp->imp_state != LUSTRE_IMP_FULL)
- goto out;
- spin_unlock(&imp->imp_lock);
-
- req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_DISCONNECT,
- LUSTRE_OBD_VERSION, rq_opc);
- if (req) {
- /* We are disconnecting, do not retry a failed DISCONNECT rpc if
- * it fails. We can get through the above with a down server
- * if the client doesn't know the server is gone yet.
- */
- req->rq_no_resend = 1;
-
- /* We want client umounts to happen quickly, no matter the
- * server state...
- */
- req->rq_timeout = min_t(int, req->rq_timeout,
- INITIAL_CONNECT_TIMEOUT);
-
- IMPORT_SET_STATE(imp, LUSTRE_IMP_CONNECTING);
- req->rq_send_state = LUSTRE_IMP_CONNECTING;
- ptlrpc_request_set_replen(req);
- rc = ptlrpc_queue_wait(req);
- ptlrpc_req_finished(req);
- }
-
-set_state:
- spin_lock(&imp->imp_lock);
-out:
- if (noclose)
- IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON);
- else
- IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CLOSED);
- memset(&imp->imp_remote_handle, 0, sizeof(imp->imp_remote_handle));
- spin_unlock(&imp->imp_lock);
-
- if (rc == -ETIMEDOUT || rc == -ENOTCONN || rc == -ESHUTDOWN)
- rc = 0;
-
- return rc;
-}
-EXPORT_SYMBOL(ptlrpc_disconnect_import);
-
-/* Adaptive Timeout utils */
-extern unsigned int at_min, at_max, at_history;
-
-/*
- *Update at_current with the specified value (bounded by at_min and at_max),
- * as well as the AT history "bins".
- * - Bin into timeslices using AT_BINS bins.
- * - This gives us a max of the last at_history seconds without the storage,
- * but still smoothing out a return to normalcy from a slow response.
- * - (E.g. remember the maximum latency in each minute of the last 4 minutes.)
- */
-int at_measured(struct adaptive_timeout *at, unsigned int val)
-{
- unsigned int old = at->at_current;
- time64_t now = ktime_get_real_seconds();
- long binlimit = max_t(long, at_history / AT_BINS, 1);
-
- LASSERT(at);
- CDEBUG(D_OTHER, "add %u to %p time=%lu v=%u (%u %u %u %u)\n",
- val, at, (long)(now - at->at_binstart), at->at_current,
- at->at_hist[0], at->at_hist[1], at->at_hist[2], at->at_hist[3]);
-
- if (val == 0)
- /* 0's don't count, because we never want our timeout to
- * drop to 0, and because 0 could mean an error
- */
- return 0;
-
- spin_lock(&at->at_lock);
-
- if (unlikely(at->at_binstart == 0)) {
- /* Special case to remove default from history */
- at->at_current = val;
- at->at_worst_ever = val;
- at->at_worst_time = now;
- at->at_hist[0] = val;
- at->at_binstart = now;
- } else if (now - at->at_binstart < binlimit) {
- /* in bin 0 */
- at->at_hist[0] = max(val, at->at_hist[0]);
- at->at_current = max(val, at->at_current);
- } else {
- int i, shift;
- unsigned int maxv = val;
- /* move bins over */
- shift = (u32)(now - at->at_binstart) / binlimit;
- LASSERT(shift > 0);
- for (i = AT_BINS - 1; i >= 0; i--) {
- if (i >= shift) {
- at->at_hist[i] = at->at_hist[i - shift];
- maxv = max(maxv, at->at_hist[i]);
- } else {
- at->at_hist[i] = 0;
- }
- }
- at->at_hist[0] = val;
- at->at_current = maxv;
- at->at_binstart += shift * binlimit;
- }
-
- if (at->at_current > at->at_worst_ever) {
- at->at_worst_ever = at->at_current;
- at->at_worst_time = now;
- }
-
- if (at->at_flags & AT_FLG_NOHIST)
- /* Only keep last reported val; keeping the rest of the history
- * for debugfs only
- */
- at->at_current = val;
-
- if (at_max > 0)
- at->at_current = min(at->at_current, at_max);
- at->at_current = max(at->at_current, at_min);
-
- if (at->at_current != old)
- CDEBUG(D_OTHER, "AT %p change: old=%u new=%u delta=%d (val=%u) hist %u %u %u %u\n",
- at,
- old, at->at_current, at->at_current - old, val,
- at->at_hist[0], at->at_hist[1], at->at_hist[2],
- at->at_hist[3]);
-
- /* if we changed, report the old value */
- old = (at->at_current != old) ? old : 0;
-
- spin_unlock(&at->at_lock);
- return old;
-}
-
-/* Find the imp_at index for a given portal; assign if space available */
-int import_at_get_index(struct obd_import *imp, int portal)
-{
- struct imp_at *at = &imp->imp_at;
- int i;
-
- for (i = 0; i < IMP_AT_MAX_PORTALS; i++) {
- if (at->iat_portal[i] == portal)
- return i;
- if (at->iat_portal[i] == 0)
- /* unused */
- break;
- }
-
- /* Not found in list, add it under a lock */
- spin_lock(&imp->imp_lock);
-
- /* Check unused under lock */
- for (; i < IMP_AT_MAX_PORTALS; i++) {
- if (at->iat_portal[i] == portal)
- goto out;
- if (at->iat_portal[i] == 0)
- /* unused */
- break;
- }
-
- /* Not enough portals? */
- LASSERT(i < IMP_AT_MAX_PORTALS);
-
- at->iat_portal[i] = portal;
-out:
- spin_unlock(&imp->imp_lock);
- return i;
-}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/layout.c b/drivers/staging/lustre/lustre/ptlrpc/layout.c
deleted file mode 100644
index 417d4a151433..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/layout.c
+++ /dev/null
@@ -1,2232 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/ptlrpc/layout.c
- *
- * Lustre Metadata Target (mdt) request handler
- *
- * Author: Nikita Danilov <nikita@clusterfs.com>
- */
-/*
- * This file contains the "capsule/pill" abstraction layered above PTLRPC.
- *
- * Every struct ptlrpc_request contains a "pill", which points to a description
- * of the format that the request conforms to.
- */
-
-#define DEBUG_SUBSYSTEM S_RPC
-
-#include <linux/module.h>
-
-#include <uapi/linux/lustre/lustre_idl.h>
-
-#include <llog_swab.h>
-#include <lustre_debug.h>
-#include <lustre_swab.h>
-#include <uapi/linux/lustre/lustre_ver.h>
-#include <obd.h>
-#include <obd_support.h>
-
-/* struct ptlrpc_request, lustre_msg* */
-#include <lustre_req_layout.h>
-#include <lustre_acl.h>
-
-/*
- * RQFs (see below) refer to two struct req_msg_field arrays describing the
- * client request and server reply, respectively.
- */
-/* empty set of fields... for suitable definition of emptiness. */
-static const struct req_msg_field *empty[] = {
- &RMF_PTLRPC_BODY
-};
-
-static const struct req_msg_field *mgs_target_info_only[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MGS_TARGET_INFO
-};
-
-static const struct req_msg_field *mgs_set_info[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MGS_SEND_PARAM
-};
-
-static const struct req_msg_field *mgs_config_read_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MGS_CONFIG_BODY
-};
-
-static const struct req_msg_field *mgs_config_read_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MGS_CONFIG_RES
-};
-
-static const struct req_msg_field *log_cancel_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_LOGCOOKIES
-};
-
-static const struct req_msg_field *mdt_body_only[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MDT_BODY
-};
-
-static const struct req_msg_field *mdt_body_capa[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MDT_BODY,
- &RMF_CAPA1
-};
-
-static const struct req_msg_field *quotactl_only[] = {
- &RMF_PTLRPC_BODY,
- &RMF_OBD_QUOTACTL
-};
-
-static const struct req_msg_field *mdt_close_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MDT_EPOCH,
- &RMF_REC_REINT,
- &RMF_CAPA1
-};
-
-static const struct req_msg_field *mdt_intent_close_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MDT_EPOCH,
- &RMF_REC_REINT,
- &RMF_CAPA1,
- &RMF_CLOSE_DATA
-};
-
-static const struct req_msg_field *obd_statfs_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_OBD_STATFS
-};
-
-static const struct req_msg_field *seq_query_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_SEQ_OPC,
- &RMF_SEQ_RANGE
-};
-
-static const struct req_msg_field *seq_query_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_SEQ_RANGE
-};
-
-static const struct req_msg_field *fld_query_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_FLD_OPC,
- &RMF_FLD_MDFLD
-};
-
-static const struct req_msg_field *fld_query_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_FLD_MDFLD
-};
-
-static const struct req_msg_field *fld_read_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_FLD_MDFLD
-};
-
-static const struct req_msg_field *fld_read_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_GENERIC_DATA
-};
-
-static const struct req_msg_field *mds_getattr_name_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MDT_BODY,
- &RMF_CAPA1,
- &RMF_NAME
-};
-
-static const struct req_msg_field *mds_reint_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_REC_REINT
-};
-
-static const struct req_msg_field *mds_reint_create_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_REC_REINT,
- &RMF_CAPA1,
- &RMF_NAME
-};
-
-static const struct req_msg_field *mds_reint_create_slave_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_REC_REINT,
- &RMF_CAPA1,
- &RMF_NAME,
- &RMF_EADATA,
- &RMF_DLM_REQ
-};
-
-static const struct req_msg_field *mds_reint_create_acl_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_REC_REINT,
- &RMF_CAPA1,
- &RMF_NAME,
- &RMF_EADATA,
- &RMF_DLM_REQ
-};
-
-static const struct req_msg_field *mds_reint_create_sym_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_REC_REINT,
- &RMF_CAPA1,
- &RMF_NAME,
- &RMF_SYMTGT,
- &RMF_DLM_REQ
-};
-
-static const struct req_msg_field *mds_reint_open_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_REC_REINT,
- &RMF_CAPA1,
- &RMF_CAPA2,
- &RMF_NAME,
- &RMF_EADATA
-};
-
-static const struct req_msg_field *mds_reint_open_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MDT_BODY,
- &RMF_MDT_MD,
- &RMF_ACL,
- &RMF_CAPA1,
- &RMF_CAPA2
-};
-
-static const struct req_msg_field *mds_reint_unlink_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_REC_REINT,
- &RMF_CAPA1,
- &RMF_NAME,
- &RMF_DLM_REQ
-};
-
-static const struct req_msg_field *mds_reint_link_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_REC_REINT,
- &RMF_CAPA1,
- &RMF_CAPA2,
- &RMF_NAME,
- &RMF_DLM_REQ
-};
-
-static const struct req_msg_field *mds_reint_rename_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_REC_REINT,
- &RMF_CAPA1,
- &RMF_CAPA2,
- &RMF_NAME,
- &RMF_SYMTGT,
- &RMF_DLM_REQ
-};
-
-static const struct req_msg_field *mds_reint_migrate_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_REC_REINT,
- &RMF_CAPA1,
- &RMF_CAPA2,
- &RMF_NAME,
- &RMF_SYMTGT,
- &RMF_DLM_REQ,
- &RMF_MDT_EPOCH,
- &RMF_CLOSE_DATA
-};
-
-static const struct req_msg_field *mds_last_unlink_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MDT_BODY,
- &RMF_MDT_MD,
- &RMF_LOGCOOKIES,
- &RMF_CAPA1,
- &RMF_CAPA2
-};
-
-static const struct req_msg_field *mds_reint_setattr_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_REC_REINT,
- &RMF_CAPA1,
- &RMF_MDT_EPOCH,
- &RMF_EADATA,
- &RMF_LOGCOOKIES,
- &RMF_DLM_REQ
-};
-
-static const struct req_msg_field *mds_reint_setxattr_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_REC_REINT,
- &RMF_CAPA1,
- &RMF_NAME,
- &RMF_EADATA,
- &RMF_DLM_REQ
-};
-
-static const struct req_msg_field *mdt_swap_layouts[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MDT_BODY,
- &RMF_SWAP_LAYOUTS,
- &RMF_CAPA1,
- &RMF_CAPA2,
- &RMF_DLM_REQ
-};
-
-static const struct req_msg_field *obd_connect_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_TGTUUID,
- &RMF_CLUUID,
- &RMF_CONN,
- &RMF_CONNECT_DATA
-};
-
-static const struct req_msg_field *obd_connect_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_CONNECT_DATA
-};
-
-static const struct req_msg_field *obd_set_info_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_SETINFO_KEY,
- &RMF_SETINFO_VAL
-};
-
-static const struct req_msg_field *ost_grant_shrink_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_SETINFO_KEY,
- &RMF_OST_BODY
-};
-
-static const struct req_msg_field *mds_getinfo_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_GETINFO_KEY,
- &RMF_GETINFO_VALLEN
-};
-
-static const struct req_msg_field *mds_getinfo_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_GETINFO_VAL,
-};
-
-static const struct req_msg_field *ldlm_enqueue_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_DLM_REQ
-};
-
-static const struct req_msg_field *ldlm_enqueue_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_DLM_REP
-};
-
-static const struct req_msg_field *ldlm_enqueue_lvb_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_DLM_REP,
- &RMF_DLM_LVB
-};
-
-static const struct req_msg_field *ldlm_cp_callback_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_DLM_REQ,
- &RMF_DLM_LVB
-};
-
-static const struct req_msg_field *ldlm_gl_callback_desc_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_DLM_REQ,
- &RMF_DLM_GL_DESC
-};
-
-static const struct req_msg_field *ldlm_gl_callback_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_DLM_LVB
-};
-
-static const struct req_msg_field *ldlm_intent_basic_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_DLM_REQ,
- &RMF_LDLM_INTENT,
-};
-
-static const struct req_msg_field *ldlm_intent_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_DLM_REQ,
- &RMF_LDLM_INTENT,
- &RMF_REC_REINT
-};
-
-static const struct req_msg_field *ldlm_intent_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_DLM_REP,
- &RMF_MDT_BODY,
- &RMF_MDT_MD,
- &RMF_ACL
-};
-
-static const struct req_msg_field *ldlm_intent_layout_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_DLM_REQ,
- &RMF_LDLM_INTENT,
- &RMF_LAYOUT_INTENT,
- &RMF_EADATA /* for new layout to be set up */
-};
-
-static const struct req_msg_field *ldlm_intent_open_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_DLM_REP,
- &RMF_MDT_BODY,
- &RMF_MDT_MD,
- &RMF_ACL,
- &RMF_CAPA1,
- &RMF_CAPA2
-};
-
-static const struct req_msg_field *ldlm_intent_getattr_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_DLM_REQ,
- &RMF_LDLM_INTENT,
- &RMF_MDT_BODY, /* coincides with mds_getattr_name_client[] */
- &RMF_CAPA1,
- &RMF_NAME
-};
-
-static const struct req_msg_field *ldlm_intent_getattr_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_DLM_REP,
- &RMF_MDT_BODY,
- &RMF_MDT_MD,
- &RMF_ACL,
- &RMF_CAPA1
-};
-
-static const struct req_msg_field *ldlm_intent_create_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_DLM_REQ,
- &RMF_LDLM_INTENT,
- &RMF_REC_REINT, /* coincides with mds_reint_create_client[] */
- &RMF_CAPA1,
- &RMF_NAME,
- &RMF_EADATA
-};
-
-static const struct req_msg_field *ldlm_intent_open_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_DLM_REQ,
- &RMF_LDLM_INTENT,
- &RMF_REC_REINT, /* coincides with mds_reint_open_client[] */
- &RMF_CAPA1,
- &RMF_CAPA2,
- &RMF_NAME,
- &RMF_EADATA
-};
-
-static const struct req_msg_field *ldlm_intent_unlink_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_DLM_REQ,
- &RMF_LDLM_INTENT,
- &RMF_REC_REINT, /* coincides with mds_reint_unlink_client[] */
- &RMF_CAPA1,
- &RMF_NAME
-};
-
-static const struct req_msg_field *ldlm_intent_getxattr_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_DLM_REQ,
- &RMF_LDLM_INTENT,
- &RMF_MDT_BODY,
- &RMF_CAPA1,
-};
-
-static const struct req_msg_field *ldlm_intent_getxattr_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_DLM_REP,
- &RMF_MDT_BODY,
- &RMF_MDT_MD,
- &RMF_ACL, /* for req_capsule_extend/mdt_intent_policy */
- &RMF_EADATA,
- &RMF_EAVALS,
- &RMF_EAVALS_LENS
-};
-
-static const struct req_msg_field *mds_getxattr_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MDT_BODY,
- &RMF_CAPA1,
- &RMF_NAME,
- &RMF_EADATA
-};
-
-static const struct req_msg_field *mds_getxattr_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MDT_BODY,
- &RMF_EADATA
-};
-
-static const struct req_msg_field *mds_getattr_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MDT_BODY,
- &RMF_MDT_MD,
- &RMF_ACL,
- &RMF_CAPA1,
- &RMF_CAPA2
-};
-
-static const struct req_msg_field *mds_setattr_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MDT_BODY,
- &RMF_MDT_MD,
- &RMF_ACL,
- &RMF_CAPA1,
- &RMF_CAPA2
-};
-
-static const struct req_msg_field *llog_origin_handle_create_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_LLOGD_BODY,
- &RMF_NAME
-};
-
-static const struct req_msg_field *llogd_body_only[] = {
- &RMF_PTLRPC_BODY,
- &RMF_LLOGD_BODY
-};
-
-static const struct req_msg_field *llog_log_hdr_only[] = {
- &RMF_PTLRPC_BODY,
- &RMF_LLOG_LOG_HDR
-};
-
-static const struct req_msg_field *llogd_conn_body_only[] = {
- &RMF_PTLRPC_BODY,
- &RMF_LLOGD_CONN_BODY
-};
-
-static const struct req_msg_field *llog_origin_handle_next_block_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_LLOGD_BODY,
- &RMF_EADATA
-};
-
-static const struct req_msg_field *ost_body_only[] = {
- &RMF_PTLRPC_BODY,
- &RMF_OST_BODY
-};
-
-static const struct req_msg_field *ost_body_capa[] = {
- &RMF_PTLRPC_BODY,
- &RMF_OST_BODY,
- &RMF_CAPA1
-};
-
-static const struct req_msg_field *ost_destroy_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_OST_BODY,
- &RMF_DLM_REQ,
- &RMF_CAPA1
-};
-
-static const struct req_msg_field *ost_brw_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_OST_BODY,
- &RMF_OBD_IOOBJ,
- &RMF_NIOBUF_REMOTE,
- &RMF_CAPA1
-};
-
-static const struct req_msg_field *ost_brw_read_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_OST_BODY
-};
-
-static const struct req_msg_field *ost_brw_write_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_OST_BODY,
- &RMF_RCS
-};
-
-static const struct req_msg_field *ost_get_info_generic_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_GENERIC_DATA,
-};
-
-static const struct req_msg_field *ost_get_info_generic_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_GETINFO_KEY
-};
-
-static const struct req_msg_field *ost_get_last_id_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_OBD_ID
-};
-
-static const struct req_msg_field *ost_get_last_fid_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_GETINFO_KEY,
- &RMF_FID,
-};
-
-static const struct req_msg_field *ost_get_last_fid_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_FID,
-};
-
-static const struct req_msg_field *ost_get_fiemap_client[] = {
- &RMF_PTLRPC_BODY,
- &RMF_FIEMAP_KEY,
- &RMF_FIEMAP_VAL
-};
-
-static const struct req_msg_field *ost_get_fiemap_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_FIEMAP_VAL
-};
-
-static const struct req_msg_field *mdt_hsm_progress[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MDT_BODY,
- &RMF_MDS_HSM_PROGRESS,
-};
-
-static const struct req_msg_field *mdt_hsm_ct_register[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MDT_BODY,
- &RMF_MDS_HSM_ARCHIVE,
-};
-
-static const struct req_msg_field *mdt_hsm_ct_unregister[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MDT_BODY,
-};
-
-static const struct req_msg_field *mdt_hsm_action_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MDT_BODY,
- &RMF_MDS_HSM_CURRENT_ACTION,
-};
-
-static const struct req_msg_field *mdt_hsm_state_get_server[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MDT_BODY,
- &RMF_HSM_USER_STATE,
-};
-
-static const struct req_msg_field *mdt_hsm_state_set[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MDT_BODY,
- &RMF_CAPA1,
- &RMF_HSM_STATE_SET,
-};
-
-static const struct req_msg_field *mdt_hsm_request[] = {
- &RMF_PTLRPC_BODY,
- &RMF_MDT_BODY,
- &RMF_MDS_HSM_REQUEST,
- &RMF_MDS_HSM_USER_ITEM,
- &RMF_GENERIC_DATA,
-};
-
-static struct req_format *req_formats[] = {
- &RQF_OBD_PING,
- &RQF_OBD_SET_INFO,
- &RQF_SEC_CTX,
- &RQF_MGS_TARGET_REG,
- &RQF_MGS_SET_INFO,
- &RQF_MGS_CONFIG_READ,
- &RQF_SEQ_QUERY,
- &RQF_FLD_QUERY,
- &RQF_FLD_READ,
- &RQF_MDS_CONNECT,
- &RQF_MDS_DISCONNECT,
- &RQF_MDS_GET_INFO,
- &RQF_MDS_GETSTATUS,
- &RQF_MDS_STATFS,
- &RQF_MDS_GETATTR,
- &RQF_MDS_GETATTR_NAME,
- &RQF_MDS_GETXATTR,
- &RQF_MDS_SYNC,
- &RQF_MDS_CLOSE,
- &RQF_MDS_INTENT_CLOSE,
- &RQF_MDS_READPAGE,
- &RQF_MDS_WRITEPAGE,
- &RQF_MDS_REINT,
- &RQF_MDS_REINT_CREATE,
- &RQF_MDS_REINT_CREATE_ACL,
- &RQF_MDS_REINT_CREATE_SLAVE,
- &RQF_MDS_REINT_CREATE_SYM,
- &RQF_MDS_REINT_OPEN,
- &RQF_MDS_REINT_UNLINK,
- &RQF_MDS_REINT_LINK,
- &RQF_MDS_REINT_RENAME,
- &RQF_MDS_REINT_MIGRATE,
- &RQF_MDS_REINT_SETATTR,
- &RQF_MDS_REINT_SETXATTR,
- &RQF_MDS_QUOTACTL,
- &RQF_MDS_HSM_PROGRESS,
- &RQF_MDS_HSM_CT_REGISTER,
- &RQF_MDS_HSM_CT_UNREGISTER,
- &RQF_MDS_HSM_STATE_GET,
- &RQF_MDS_HSM_STATE_SET,
- &RQF_MDS_HSM_ACTION,
- &RQF_MDS_HSM_REQUEST,
- &RQF_MDS_SWAP_LAYOUTS,
- &RQF_OST_CONNECT,
- &RQF_OST_DISCONNECT,
- &RQF_OST_QUOTACTL,
- &RQF_OST_GETATTR,
- &RQF_OST_SETATTR,
- &RQF_OST_CREATE,
- &RQF_OST_PUNCH,
- &RQF_OST_SYNC,
- &RQF_OST_DESTROY,
- &RQF_OST_BRW_READ,
- &RQF_OST_BRW_WRITE,
- &RQF_OST_STATFS,
- &RQF_OST_SET_GRANT_INFO,
- &RQF_OST_GET_INFO,
- &RQF_OST_GET_INFO_LAST_ID,
- &RQF_OST_GET_INFO_LAST_FID,
- &RQF_OST_SET_INFO_LAST_FID,
- &RQF_OST_GET_INFO_FIEMAP,
- &RQF_LDLM_ENQUEUE,
- &RQF_LDLM_ENQUEUE_LVB,
- &RQF_LDLM_CONVERT,
- &RQF_LDLM_CANCEL,
- &RQF_LDLM_CALLBACK,
- &RQF_LDLM_CP_CALLBACK,
- &RQF_LDLM_BL_CALLBACK,
- &RQF_LDLM_GL_CALLBACK,
- &RQF_LDLM_GL_DESC_CALLBACK,
- &RQF_LDLM_INTENT,
- &RQF_LDLM_INTENT_BASIC,
- &RQF_LDLM_INTENT_LAYOUT,
- &RQF_LDLM_INTENT_GETATTR,
- &RQF_LDLM_INTENT_OPEN,
- &RQF_LDLM_INTENT_CREATE,
- &RQF_LDLM_INTENT_UNLINK,
- &RQF_LDLM_INTENT_GETXATTR,
- &RQF_LOG_CANCEL,
- &RQF_LLOG_ORIGIN_HANDLE_CREATE,
- &RQF_LLOG_ORIGIN_HANDLE_DESTROY,
- &RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK,
- &RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK,
- &RQF_LLOG_ORIGIN_HANDLE_READ_HEADER,
- &RQF_LLOG_ORIGIN_CONNECT,
- &RQF_CONNECT,
-};
-
-struct req_msg_field {
- const __u32 rmf_flags;
- const char *rmf_name;
- /**
- * Field length. (-1) means "variable length". If the
- * \a RMF_F_STRUCT_ARRAY flag is set the field is also variable-length,
- * but the actual size must be a whole multiple of \a rmf_size.
- */
- const int rmf_size;
- void (*rmf_swabber)(void *);
- void (*rmf_dumper)(void *);
- int rmf_offset[ARRAY_SIZE(req_formats)][RCL_NR];
-};
-
-enum rmf_flags {
- /**
- * The field is a string, must be NUL-terminated.
- */
- RMF_F_STRING = BIT(0),
- /**
- * The field's buffer size need not match the declared \a rmf_size.
- */
- RMF_F_NO_SIZE_CHECK = BIT(1),
- /**
- * The field's buffer size must be a whole multiple of the declared \a
- * rmf_size and the \a rmf_swabber function must work on the declared \a
- * rmf_size worth of bytes.
- */
- RMF_F_STRUCT_ARRAY = BIT(2)
-};
-
-struct req_capsule;
-
-/*
- * Request fields.
- */
-#define DEFINE_MSGF(name, flags, size, swabber, dumper) { \
- .rmf_name = (name), \
- .rmf_flags = (flags), \
- .rmf_size = (size), \
- .rmf_swabber = (void (*)(void *))(swabber), \
- .rmf_dumper = (void (*)(void *))(dumper) \
-}
-
-struct req_msg_field RMF_GENERIC_DATA =
- DEFINE_MSGF("generic_data", 0,
- -1, NULL, NULL);
-EXPORT_SYMBOL(RMF_GENERIC_DATA);
-
-struct req_msg_field RMF_MGS_TARGET_INFO =
- DEFINE_MSGF("mgs_target_info", 0,
- sizeof(struct mgs_target_info),
- lustre_swab_mgs_target_info, NULL);
-EXPORT_SYMBOL(RMF_MGS_TARGET_INFO);
-
-struct req_msg_field RMF_MGS_SEND_PARAM =
- DEFINE_MSGF("mgs_send_param", 0,
- sizeof(struct mgs_send_param),
- NULL, NULL);
-EXPORT_SYMBOL(RMF_MGS_SEND_PARAM);
-
-struct req_msg_field RMF_MGS_CONFIG_BODY =
- DEFINE_MSGF("mgs_config_read request", 0,
- sizeof(struct mgs_config_body),
- lustre_swab_mgs_config_body, NULL);
-EXPORT_SYMBOL(RMF_MGS_CONFIG_BODY);
-
-struct req_msg_field RMF_MGS_CONFIG_RES =
- DEFINE_MSGF("mgs_config_read reply ", 0,
- sizeof(struct mgs_config_res),
- lustre_swab_mgs_config_res, NULL);
-EXPORT_SYMBOL(RMF_MGS_CONFIG_RES);
-
-struct req_msg_field RMF_U32 =
- DEFINE_MSGF("generic u32", 0,
- sizeof(__u32), lustre_swab_generic_32s, NULL);
-EXPORT_SYMBOL(RMF_U32);
-
-struct req_msg_field RMF_SETINFO_VAL =
- DEFINE_MSGF("setinfo_val", 0, -1, NULL, NULL);
-EXPORT_SYMBOL(RMF_SETINFO_VAL);
-
-struct req_msg_field RMF_GETINFO_KEY =
- DEFINE_MSGF("getinfo_key", 0, -1, NULL, NULL);
-EXPORT_SYMBOL(RMF_GETINFO_KEY);
-
-struct req_msg_field RMF_GETINFO_VALLEN =
- DEFINE_MSGF("getinfo_vallen", 0,
- sizeof(__u32), lustre_swab_generic_32s, NULL);
-EXPORT_SYMBOL(RMF_GETINFO_VALLEN);
-
-struct req_msg_field RMF_GETINFO_VAL =
- DEFINE_MSGF("getinfo_val", 0, -1, NULL, NULL);
-EXPORT_SYMBOL(RMF_GETINFO_VAL);
-
-struct req_msg_field RMF_SEQ_OPC =
- DEFINE_MSGF("seq_query_opc", 0,
- sizeof(__u32), lustre_swab_generic_32s, NULL);
-EXPORT_SYMBOL(RMF_SEQ_OPC);
-
-struct req_msg_field RMF_SEQ_RANGE =
- DEFINE_MSGF("seq_query_range", 0,
- sizeof(struct lu_seq_range),
- lustre_swab_lu_seq_range, NULL);
-EXPORT_SYMBOL(RMF_SEQ_RANGE);
-
-struct req_msg_field RMF_FLD_OPC =
- DEFINE_MSGF("fld_query_opc", 0,
- sizeof(__u32), lustre_swab_generic_32s, NULL);
-EXPORT_SYMBOL(RMF_FLD_OPC);
-
-struct req_msg_field RMF_FLD_MDFLD =
- DEFINE_MSGF("fld_query_mdfld", 0,
- sizeof(struct lu_seq_range),
- lustre_swab_lu_seq_range, NULL);
-EXPORT_SYMBOL(RMF_FLD_MDFLD);
-
-struct req_msg_field RMF_MDT_BODY =
- DEFINE_MSGF("mdt_body", 0,
- sizeof(struct mdt_body), lustre_swab_mdt_body, NULL);
-EXPORT_SYMBOL(RMF_MDT_BODY);
-
-struct req_msg_field RMF_OBD_QUOTACTL =
- DEFINE_MSGF("obd_quotactl", 0,
- sizeof(struct obd_quotactl),
- lustre_swab_obd_quotactl, NULL);
-EXPORT_SYMBOL(RMF_OBD_QUOTACTL);
-
-struct req_msg_field RMF_MDT_EPOCH =
- DEFINE_MSGF("mdt_ioepoch", 0,
- sizeof(struct mdt_ioepoch), lustre_swab_mdt_ioepoch, NULL);
-EXPORT_SYMBOL(RMF_MDT_EPOCH);
-
-struct req_msg_field RMF_PTLRPC_BODY =
- DEFINE_MSGF("ptlrpc_body", 0,
- sizeof(struct ptlrpc_body), lustre_swab_ptlrpc_body, NULL);
-EXPORT_SYMBOL(RMF_PTLRPC_BODY);
-
-struct req_msg_field RMF_CLOSE_DATA =
- DEFINE_MSGF("data_version", 0,
- sizeof(struct close_data), lustre_swab_close_data, NULL);
-EXPORT_SYMBOL(RMF_CLOSE_DATA);
-
-struct req_msg_field RMF_OBD_STATFS =
- DEFINE_MSGF("obd_statfs", 0,
- sizeof(struct obd_statfs), lustre_swab_obd_statfs, NULL);
-EXPORT_SYMBOL(RMF_OBD_STATFS);
-
-struct req_msg_field RMF_SETINFO_KEY =
- DEFINE_MSGF("setinfo_key", 0, -1, NULL, NULL);
-EXPORT_SYMBOL(RMF_SETINFO_KEY);
-
-struct req_msg_field RMF_NAME =
- DEFINE_MSGF("name", RMF_F_STRING, -1, NULL, NULL);
-EXPORT_SYMBOL(RMF_NAME);
-
-struct req_msg_field RMF_SYMTGT =
- DEFINE_MSGF("symtgt", RMF_F_STRING, -1, NULL, NULL);
-EXPORT_SYMBOL(RMF_SYMTGT);
-
-struct req_msg_field RMF_TGTUUID =
- DEFINE_MSGF("tgtuuid", RMF_F_STRING, sizeof(struct obd_uuid) - 1, NULL,
- NULL);
-EXPORT_SYMBOL(RMF_TGTUUID);
-
-struct req_msg_field RMF_CLUUID =
- DEFINE_MSGF("cluuid", RMF_F_STRING, sizeof(struct obd_uuid) - 1, NULL,
- NULL);
-EXPORT_SYMBOL(RMF_CLUUID);
-
-struct req_msg_field RMF_STRING =
- DEFINE_MSGF("string", RMF_F_STRING, -1, NULL, NULL);
-EXPORT_SYMBOL(RMF_STRING);
-
-struct req_msg_field RMF_LLOGD_BODY =
- DEFINE_MSGF("llogd_body", 0,
- sizeof(struct llogd_body), lustre_swab_llogd_body, NULL);
-EXPORT_SYMBOL(RMF_LLOGD_BODY);
-
-struct req_msg_field RMF_LLOG_LOG_HDR =
- DEFINE_MSGF("llog_log_hdr", 0,
- sizeof(struct llog_log_hdr), lustre_swab_llog_hdr, NULL);
-EXPORT_SYMBOL(RMF_LLOG_LOG_HDR);
-
-struct req_msg_field RMF_LLOGD_CONN_BODY =
- DEFINE_MSGF("llogd_conn_body", 0,
- sizeof(struct llogd_conn_body),
- lustre_swab_llogd_conn_body, NULL);
-EXPORT_SYMBOL(RMF_LLOGD_CONN_BODY);
-
-/*
- * connection handle received in MDS_CONNECT request.
- *
- * No swabbing needed because struct lustre_handle contains only a 64-bit cookie
- * that the client does not interpret at all.
- */
-struct req_msg_field RMF_CONN =
- DEFINE_MSGF("conn", 0, sizeof(struct lustre_handle), NULL, NULL);
-EXPORT_SYMBOL(RMF_CONN);
-
-struct req_msg_field RMF_CONNECT_DATA =
- DEFINE_MSGF("cdata",
- RMF_F_NO_SIZE_CHECK /* we allow extra space for interop */,
- sizeof(struct obd_connect_data),
- lustre_swab_connect, NULL);
-EXPORT_SYMBOL(RMF_CONNECT_DATA);
-
-struct req_msg_field RMF_DLM_REQ =
- DEFINE_MSGF("dlm_req", RMF_F_NO_SIZE_CHECK /* ldlm_request_bufsize */,
- sizeof(struct ldlm_request),
- lustre_swab_ldlm_request, NULL);
-EXPORT_SYMBOL(RMF_DLM_REQ);
-
-struct req_msg_field RMF_DLM_REP =
- DEFINE_MSGF("dlm_rep", 0,
- sizeof(struct ldlm_reply), lustre_swab_ldlm_reply, NULL);
-EXPORT_SYMBOL(RMF_DLM_REP);
-
-struct req_msg_field RMF_LDLM_INTENT =
- DEFINE_MSGF("ldlm_intent", 0,
- sizeof(struct ldlm_intent), lustre_swab_ldlm_intent, NULL);
-EXPORT_SYMBOL(RMF_LDLM_INTENT);
-
-struct req_msg_field RMF_DLM_LVB =
- DEFINE_MSGF("dlm_lvb", 0, -1, NULL, NULL);
-EXPORT_SYMBOL(RMF_DLM_LVB);
-
-struct req_msg_field RMF_DLM_GL_DESC =
- DEFINE_MSGF("dlm_gl_desc", 0, sizeof(union ldlm_gl_desc),
- lustre_swab_gl_desc, NULL);
-EXPORT_SYMBOL(RMF_DLM_GL_DESC);
-
-struct req_msg_field RMF_MDT_MD =
- DEFINE_MSGF("mdt_md", RMF_F_NO_SIZE_CHECK, MIN_MD_SIZE, NULL, NULL);
-EXPORT_SYMBOL(RMF_MDT_MD);
-
-struct req_msg_field RMF_REC_REINT =
- DEFINE_MSGF("rec_reint", 0, sizeof(struct mdt_rec_reint),
- lustre_swab_mdt_rec_reint, NULL);
-EXPORT_SYMBOL(RMF_REC_REINT);
-
-/* FIXME: this length should be defined as a macro */
-struct req_msg_field RMF_EADATA = DEFINE_MSGF("eadata", 0, -1,
- NULL, NULL);
-EXPORT_SYMBOL(RMF_EADATA);
-
-struct req_msg_field RMF_EAVALS = DEFINE_MSGF("eavals", 0, -1, NULL, NULL);
-EXPORT_SYMBOL(RMF_EAVALS);
-
-struct req_msg_field RMF_ACL = DEFINE_MSGF("acl", 0, -1, NULL, NULL);
-EXPORT_SYMBOL(RMF_ACL);
-
-/* FIXME: this should be made to use RMF_F_STRUCT_ARRAY */
-struct req_msg_field RMF_LOGCOOKIES =
- DEFINE_MSGF("logcookies", RMF_F_NO_SIZE_CHECK /* multiple cookies */,
- sizeof(struct llog_cookie), NULL, NULL);
-EXPORT_SYMBOL(RMF_LOGCOOKIES);
-
-struct req_msg_field RMF_CAPA1 =
- DEFINE_MSGF("capa", 0, sizeof(struct lustre_capa),
- lustre_swab_lustre_capa, NULL);
-EXPORT_SYMBOL(RMF_CAPA1);
-
-struct req_msg_field RMF_CAPA2 =
- DEFINE_MSGF("capa", 0, sizeof(struct lustre_capa),
- lustre_swab_lustre_capa, NULL);
-EXPORT_SYMBOL(RMF_CAPA2);
-
-struct req_msg_field RMF_LAYOUT_INTENT =
- DEFINE_MSGF("layout_intent", 0,
- sizeof(struct layout_intent), lustre_swab_layout_intent,
- NULL);
-EXPORT_SYMBOL(RMF_LAYOUT_INTENT);
-
-/*
- * OST request field.
- */
-struct req_msg_field RMF_OST_BODY =
- DEFINE_MSGF("ost_body", 0,
- sizeof(struct ost_body), lustre_swab_ost_body, dump_ost_body);
-EXPORT_SYMBOL(RMF_OST_BODY);
-
-struct req_msg_field RMF_OBD_IOOBJ =
- DEFINE_MSGF("obd_ioobj", RMF_F_STRUCT_ARRAY,
- sizeof(struct obd_ioobj), lustre_swab_obd_ioobj, dump_ioo);
-EXPORT_SYMBOL(RMF_OBD_IOOBJ);
-
-struct req_msg_field RMF_NIOBUF_REMOTE =
- DEFINE_MSGF("niobuf_remote", RMF_F_STRUCT_ARRAY,
- sizeof(struct niobuf_remote), lustre_swab_niobuf_remote,
- dump_rniobuf);
-EXPORT_SYMBOL(RMF_NIOBUF_REMOTE);
-
-struct req_msg_field RMF_RCS =
- DEFINE_MSGF("niobuf_remote", RMF_F_STRUCT_ARRAY, sizeof(__u32),
- lustre_swab_generic_32s, dump_rcs);
-EXPORT_SYMBOL(RMF_RCS);
-
-struct req_msg_field RMF_EAVALS_LENS =
- DEFINE_MSGF("eavals_lens", RMF_F_STRUCT_ARRAY, sizeof(__u32),
- lustre_swab_generic_32s, NULL);
-EXPORT_SYMBOL(RMF_EAVALS_LENS);
-
-struct req_msg_field RMF_OBD_ID =
- DEFINE_MSGF("u64", 0,
- sizeof(u64), lustre_swab_ost_last_id, NULL);
-EXPORT_SYMBOL(RMF_OBD_ID);
-
-struct req_msg_field RMF_FID =
- DEFINE_MSGF("fid", 0,
- sizeof(struct lu_fid), lustre_swab_lu_fid, NULL);
-EXPORT_SYMBOL(RMF_FID);
-
-struct req_msg_field RMF_OST_ID =
- DEFINE_MSGF("ost_id", 0,
- sizeof(struct ost_id), lustre_swab_ost_id, NULL);
-EXPORT_SYMBOL(RMF_OST_ID);
-
-struct req_msg_field RMF_FIEMAP_KEY =
- DEFINE_MSGF("fiemap", 0, sizeof(struct ll_fiemap_info_key),
- lustre_swab_fiemap, NULL);
-EXPORT_SYMBOL(RMF_FIEMAP_KEY);
-
-struct req_msg_field RMF_FIEMAP_VAL =
- DEFINE_MSGF("fiemap", 0, -1, lustre_swab_fiemap, NULL);
-EXPORT_SYMBOL(RMF_FIEMAP_VAL);
-
-struct req_msg_field RMF_HSM_USER_STATE =
- DEFINE_MSGF("hsm_user_state", 0, sizeof(struct hsm_user_state),
- lustre_swab_hsm_user_state, NULL);
-EXPORT_SYMBOL(RMF_HSM_USER_STATE);
-
-struct req_msg_field RMF_HSM_STATE_SET =
- DEFINE_MSGF("hsm_state_set", 0, sizeof(struct hsm_state_set),
- lustre_swab_hsm_state_set, NULL);
-EXPORT_SYMBOL(RMF_HSM_STATE_SET);
-
-struct req_msg_field RMF_MDS_HSM_PROGRESS =
- DEFINE_MSGF("hsm_progress", 0, sizeof(struct hsm_progress_kernel),
- lustre_swab_hsm_progress_kernel, NULL);
-EXPORT_SYMBOL(RMF_MDS_HSM_PROGRESS);
-
-struct req_msg_field RMF_MDS_HSM_CURRENT_ACTION =
- DEFINE_MSGF("hsm_current_action", 0, sizeof(struct hsm_current_action),
- lustre_swab_hsm_current_action, NULL);
-EXPORT_SYMBOL(RMF_MDS_HSM_CURRENT_ACTION);
-
-struct req_msg_field RMF_MDS_HSM_USER_ITEM =
- DEFINE_MSGF("hsm_user_item", RMF_F_STRUCT_ARRAY,
- sizeof(struct hsm_user_item), lustre_swab_hsm_user_item,
- NULL);
-EXPORT_SYMBOL(RMF_MDS_HSM_USER_ITEM);
-
-struct req_msg_field RMF_MDS_HSM_ARCHIVE =
- DEFINE_MSGF("hsm_archive", 0,
- sizeof(__u32), lustre_swab_generic_32s, NULL);
-EXPORT_SYMBOL(RMF_MDS_HSM_ARCHIVE);
-
-struct req_msg_field RMF_MDS_HSM_REQUEST =
- DEFINE_MSGF("hsm_request", 0, sizeof(struct hsm_request),
- lustre_swab_hsm_request, NULL);
-EXPORT_SYMBOL(RMF_MDS_HSM_REQUEST);
-
-struct req_msg_field RMF_SWAP_LAYOUTS =
- DEFINE_MSGF("swap_layouts", 0, sizeof(struct mdc_swap_layouts),
- lustre_swab_swap_layouts, NULL);
-EXPORT_SYMBOL(RMF_SWAP_LAYOUTS);
-/*
- * Request formats.
- */
-
-struct req_format {
- const char *rf_name;
- size_t rf_idx;
- struct {
- size_t nr;
- const struct req_msg_field **d;
- } rf_fields[RCL_NR];
-};
-
-#define DEFINE_REQ_FMT(name, client, client_nr, server, server_nr) { \
- .rf_name = name, \
- .rf_fields = { \
- [RCL_CLIENT] = { \
- .nr = client_nr, \
- .d = client \
- }, \
- [RCL_SERVER] = { \
- .nr = server_nr, \
- .d = server \
- } \
- } \
-}
-
-#define DEFINE_REQ_FMT0(name, client, server) \
-DEFINE_REQ_FMT(name, client, ARRAY_SIZE(client), server, ARRAY_SIZE(server))
-
-struct req_format RQF_OBD_PING =
- DEFINE_REQ_FMT0("OBD_PING", empty, empty);
-EXPORT_SYMBOL(RQF_OBD_PING);
-
-struct req_format RQF_OBD_SET_INFO =
- DEFINE_REQ_FMT0("OBD_SET_INFO", obd_set_info_client, empty);
-EXPORT_SYMBOL(RQF_OBD_SET_INFO);
-
-struct req_format RQF_SEC_CTX =
- DEFINE_REQ_FMT0("SEC_CTX", empty, empty);
-EXPORT_SYMBOL(RQF_SEC_CTX);
-
-struct req_format RQF_MGS_TARGET_REG =
- DEFINE_REQ_FMT0("MGS_TARGET_REG", mgs_target_info_only,
- mgs_target_info_only);
-EXPORT_SYMBOL(RQF_MGS_TARGET_REG);
-
-struct req_format RQF_MGS_SET_INFO =
- DEFINE_REQ_FMT0("MGS_SET_INFO", mgs_set_info,
- mgs_set_info);
-EXPORT_SYMBOL(RQF_MGS_SET_INFO);
-
-struct req_format RQF_MGS_CONFIG_READ =
- DEFINE_REQ_FMT0("MGS_CONFIG_READ", mgs_config_read_client,
- mgs_config_read_server);
-EXPORT_SYMBOL(RQF_MGS_CONFIG_READ);
-
-struct req_format RQF_SEQ_QUERY =
- DEFINE_REQ_FMT0("SEQ_QUERY", seq_query_client, seq_query_server);
-EXPORT_SYMBOL(RQF_SEQ_QUERY);
-
-struct req_format RQF_FLD_QUERY =
- DEFINE_REQ_FMT0("FLD_QUERY", fld_query_client, fld_query_server);
-EXPORT_SYMBOL(RQF_FLD_QUERY);
-
-/*
- * The 'fld_read_server' uses 'RMF_GENERIC_DATA' to hold the 'FLD_QUERY'
- * RPC reply that is composed of 'struct lu_seq_range_array'. But there
- * is not registered swabber function for 'RMF_GENERIC_DATA'. So the RPC
- * peers need to handle the RPC reply with fixed little-endian format.
- *
- * In theory, we can define new structure with some swabber registered to
- * handle the 'FLD_QUERY' RPC reply result automatically. But from the
- * implementation view, it is not easy to be done within current "struct
- * req_msg_field" framework. Because the sequence range array in the RPC
- * reply is not fixed length, instead, its length depends on 'lu_seq_range'
- * count, that is unknown when prepare the RPC buffer. Generally, for such
- * flexible length RPC usage, there will be a field in the RPC layout to
- * indicate the data length. But for the 'FLD_READ' RPC, we have no way to
- * do that unless we add new length filed that will broken the on-wire RPC
- * protocol and cause interoperability trouble with old peer.
- */
-struct req_format RQF_FLD_READ =
- DEFINE_REQ_FMT0("FLD_READ", fld_read_client, fld_read_server);
-EXPORT_SYMBOL(RQF_FLD_READ);
-
-struct req_format RQF_LOG_CANCEL =
- DEFINE_REQ_FMT0("OBD_LOG_CANCEL", log_cancel_client, empty);
-EXPORT_SYMBOL(RQF_LOG_CANCEL);
-
-struct req_format RQF_MDS_QUOTACTL =
- DEFINE_REQ_FMT0("MDS_QUOTACTL", quotactl_only, quotactl_only);
-EXPORT_SYMBOL(RQF_MDS_QUOTACTL);
-
-struct req_format RQF_OST_QUOTACTL =
- DEFINE_REQ_FMT0("OST_QUOTACTL", quotactl_only, quotactl_only);
-EXPORT_SYMBOL(RQF_OST_QUOTACTL);
-
-struct req_format RQF_MDS_GETSTATUS =
- DEFINE_REQ_FMT0("MDS_GETSTATUS", mdt_body_only, mdt_body_capa);
-EXPORT_SYMBOL(RQF_MDS_GETSTATUS);
-
-struct req_format RQF_MDS_STATFS =
- DEFINE_REQ_FMT0("MDS_STATFS", empty, obd_statfs_server);
-EXPORT_SYMBOL(RQF_MDS_STATFS);
-
-struct req_format RQF_MDS_SYNC =
- DEFINE_REQ_FMT0("MDS_SYNC", mdt_body_capa, mdt_body_only);
-EXPORT_SYMBOL(RQF_MDS_SYNC);
-
-struct req_format RQF_MDS_GETATTR =
- DEFINE_REQ_FMT0("MDS_GETATTR", mdt_body_capa, mds_getattr_server);
-EXPORT_SYMBOL(RQF_MDS_GETATTR);
-
-struct req_format RQF_MDS_GETXATTR =
- DEFINE_REQ_FMT0("MDS_GETXATTR",
- mds_getxattr_client, mds_getxattr_server);
-EXPORT_SYMBOL(RQF_MDS_GETXATTR);
-
-struct req_format RQF_MDS_GETATTR_NAME =
- DEFINE_REQ_FMT0("MDS_GETATTR_NAME",
- mds_getattr_name_client, mds_getattr_server);
-EXPORT_SYMBOL(RQF_MDS_GETATTR_NAME);
-
-struct req_format RQF_MDS_REINT =
- DEFINE_REQ_FMT0("MDS_REINT", mds_reint_client, mdt_body_only);
-EXPORT_SYMBOL(RQF_MDS_REINT);
-
-struct req_format RQF_MDS_REINT_CREATE =
- DEFINE_REQ_FMT0("MDS_REINT_CREATE",
- mds_reint_create_client, mdt_body_capa);
-EXPORT_SYMBOL(RQF_MDS_REINT_CREATE);
-
-struct req_format RQF_MDS_REINT_CREATE_ACL =
- DEFINE_REQ_FMT0("MDS_REINT_CREATE_ACL",
- mds_reint_create_acl_client, mdt_body_capa);
-EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_ACL);
-
-struct req_format RQF_MDS_REINT_CREATE_SLAVE =
- DEFINE_REQ_FMT0("MDS_REINT_CREATE_EA",
- mds_reint_create_slave_client, mdt_body_capa);
-EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_SLAVE);
-
-struct req_format RQF_MDS_REINT_CREATE_SYM =
- DEFINE_REQ_FMT0("MDS_REINT_CREATE_SYM",
- mds_reint_create_sym_client, mdt_body_capa);
-EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_SYM);
-
-struct req_format RQF_MDS_REINT_OPEN =
- DEFINE_REQ_FMT0("MDS_REINT_OPEN",
- mds_reint_open_client, mds_reint_open_server);
-EXPORT_SYMBOL(RQF_MDS_REINT_OPEN);
-
-struct req_format RQF_MDS_REINT_UNLINK =
- DEFINE_REQ_FMT0("MDS_REINT_UNLINK", mds_reint_unlink_client,
- mds_last_unlink_server);
-EXPORT_SYMBOL(RQF_MDS_REINT_UNLINK);
-
-struct req_format RQF_MDS_REINT_LINK =
- DEFINE_REQ_FMT0("MDS_REINT_LINK",
- mds_reint_link_client, mdt_body_only);
-EXPORT_SYMBOL(RQF_MDS_REINT_LINK);
-
-struct req_format RQF_MDS_REINT_RENAME =
- DEFINE_REQ_FMT0("MDS_REINT_RENAME", mds_reint_rename_client,
- mds_last_unlink_server);
-EXPORT_SYMBOL(RQF_MDS_REINT_RENAME);
-
-struct req_format RQF_MDS_REINT_MIGRATE =
- DEFINE_REQ_FMT0("MDS_REINT_MIGRATE", mds_reint_migrate_client,
- mds_last_unlink_server);
-EXPORT_SYMBOL(RQF_MDS_REINT_MIGRATE);
-
-struct req_format RQF_MDS_REINT_SETATTR =
- DEFINE_REQ_FMT0("MDS_REINT_SETATTR",
- mds_reint_setattr_client, mds_setattr_server);
-EXPORT_SYMBOL(RQF_MDS_REINT_SETATTR);
-
-struct req_format RQF_MDS_REINT_SETXATTR =
- DEFINE_REQ_FMT0("MDS_REINT_SETXATTR",
- mds_reint_setxattr_client, mdt_body_only);
-EXPORT_SYMBOL(RQF_MDS_REINT_SETXATTR);
-
-struct req_format RQF_MDS_CONNECT =
- DEFINE_REQ_FMT0("MDS_CONNECT",
- obd_connect_client, obd_connect_server);
-EXPORT_SYMBOL(RQF_MDS_CONNECT);
-
-struct req_format RQF_MDS_DISCONNECT =
- DEFINE_REQ_FMT0("MDS_DISCONNECT", empty, empty);
-EXPORT_SYMBOL(RQF_MDS_DISCONNECT);
-
-struct req_format RQF_MDS_GET_INFO =
- DEFINE_REQ_FMT0("MDS_GET_INFO", mds_getinfo_client,
- mds_getinfo_server);
-EXPORT_SYMBOL(RQF_MDS_GET_INFO);
-
-struct req_format RQF_LDLM_ENQUEUE =
- DEFINE_REQ_FMT0("LDLM_ENQUEUE",
- ldlm_enqueue_client, ldlm_enqueue_lvb_server);
-EXPORT_SYMBOL(RQF_LDLM_ENQUEUE);
-
-struct req_format RQF_LDLM_ENQUEUE_LVB =
- DEFINE_REQ_FMT0("LDLM_ENQUEUE_LVB",
- ldlm_enqueue_client, ldlm_enqueue_lvb_server);
-EXPORT_SYMBOL(RQF_LDLM_ENQUEUE_LVB);
-
-struct req_format RQF_LDLM_CONVERT =
- DEFINE_REQ_FMT0("LDLM_CONVERT",
- ldlm_enqueue_client, ldlm_enqueue_server);
-EXPORT_SYMBOL(RQF_LDLM_CONVERT);
-
-struct req_format RQF_LDLM_CANCEL =
- DEFINE_REQ_FMT0("LDLM_CANCEL", ldlm_enqueue_client, empty);
-EXPORT_SYMBOL(RQF_LDLM_CANCEL);
-
-struct req_format RQF_LDLM_CALLBACK =
- DEFINE_REQ_FMT0("LDLM_CALLBACK", ldlm_enqueue_client, empty);
-EXPORT_SYMBOL(RQF_LDLM_CALLBACK);
-
-struct req_format RQF_LDLM_CP_CALLBACK =
- DEFINE_REQ_FMT0("LDLM_CP_CALLBACK", ldlm_cp_callback_client, empty);
-EXPORT_SYMBOL(RQF_LDLM_CP_CALLBACK);
-
-struct req_format RQF_LDLM_BL_CALLBACK =
- DEFINE_REQ_FMT0("LDLM_BL_CALLBACK", ldlm_enqueue_client, empty);
-EXPORT_SYMBOL(RQF_LDLM_BL_CALLBACK);
-
-struct req_format RQF_LDLM_GL_CALLBACK =
- DEFINE_REQ_FMT0("LDLM_GL_CALLBACK", ldlm_enqueue_client,
- ldlm_gl_callback_server);
-EXPORT_SYMBOL(RQF_LDLM_GL_CALLBACK);
-
-struct req_format RQF_LDLM_GL_DESC_CALLBACK =
- DEFINE_REQ_FMT0("LDLM_GL_CALLBACK", ldlm_gl_callback_desc_client,
- ldlm_gl_callback_server);
-EXPORT_SYMBOL(RQF_LDLM_GL_DESC_CALLBACK);
-
-struct req_format RQF_LDLM_INTENT_BASIC =
- DEFINE_REQ_FMT0("LDLM_INTENT_BASIC",
- ldlm_intent_basic_client, ldlm_enqueue_lvb_server);
-EXPORT_SYMBOL(RQF_LDLM_INTENT_BASIC);
-
-struct req_format RQF_LDLM_INTENT =
- DEFINE_REQ_FMT0("LDLM_INTENT",
- ldlm_intent_client, ldlm_intent_server);
-EXPORT_SYMBOL(RQF_LDLM_INTENT);
-
-struct req_format RQF_LDLM_INTENT_LAYOUT =
- DEFINE_REQ_FMT0("LDLM_INTENT_LAYOUT ",
- ldlm_intent_layout_client, ldlm_enqueue_lvb_server);
-EXPORT_SYMBOL(RQF_LDLM_INTENT_LAYOUT);
-
-struct req_format RQF_LDLM_INTENT_GETATTR =
- DEFINE_REQ_FMT0("LDLM_INTENT_GETATTR",
- ldlm_intent_getattr_client, ldlm_intent_getattr_server);
-EXPORT_SYMBOL(RQF_LDLM_INTENT_GETATTR);
-
-struct req_format RQF_LDLM_INTENT_OPEN =
- DEFINE_REQ_FMT0("LDLM_INTENT_OPEN",
- ldlm_intent_open_client, ldlm_intent_open_server);
-EXPORT_SYMBOL(RQF_LDLM_INTENT_OPEN);
-
-struct req_format RQF_LDLM_INTENT_CREATE =
- DEFINE_REQ_FMT0("LDLM_INTENT_CREATE",
- ldlm_intent_create_client, ldlm_intent_getattr_server);
-EXPORT_SYMBOL(RQF_LDLM_INTENT_CREATE);
-
-struct req_format RQF_LDLM_INTENT_UNLINK =
- DEFINE_REQ_FMT0("LDLM_INTENT_UNLINK",
- ldlm_intent_unlink_client, ldlm_intent_server);
-EXPORT_SYMBOL(RQF_LDLM_INTENT_UNLINK);
-
-struct req_format RQF_LDLM_INTENT_GETXATTR =
- DEFINE_REQ_FMT0("LDLM_INTENT_GETXATTR",
- ldlm_intent_getxattr_client,
- ldlm_intent_getxattr_server);
-EXPORT_SYMBOL(RQF_LDLM_INTENT_GETXATTR);
-
-struct req_format RQF_MDS_CLOSE =
- DEFINE_REQ_FMT0("MDS_CLOSE",
- mdt_close_client, mds_last_unlink_server);
-EXPORT_SYMBOL(RQF_MDS_CLOSE);
-
-struct req_format RQF_MDS_INTENT_CLOSE =
- DEFINE_REQ_FMT0("MDS_CLOSE",
- mdt_intent_close_client, mds_last_unlink_server);
-EXPORT_SYMBOL(RQF_MDS_INTENT_CLOSE);
-
-struct req_format RQF_MDS_READPAGE =
- DEFINE_REQ_FMT0("MDS_READPAGE",
- mdt_body_capa, mdt_body_only);
-EXPORT_SYMBOL(RQF_MDS_READPAGE);
-
-struct req_format RQF_MDS_HSM_ACTION =
- DEFINE_REQ_FMT0("MDS_HSM_ACTION", mdt_body_capa, mdt_hsm_action_server);
-EXPORT_SYMBOL(RQF_MDS_HSM_ACTION);
-
-struct req_format RQF_MDS_HSM_PROGRESS =
- DEFINE_REQ_FMT0("MDS_HSM_PROGRESS", mdt_hsm_progress, empty);
-EXPORT_SYMBOL(RQF_MDS_HSM_PROGRESS);
-
-struct req_format RQF_MDS_HSM_CT_REGISTER =
- DEFINE_REQ_FMT0("MDS_HSM_CT_REGISTER", mdt_hsm_ct_register, empty);
-EXPORT_SYMBOL(RQF_MDS_HSM_CT_REGISTER);
-
-struct req_format RQF_MDS_HSM_CT_UNREGISTER =
- DEFINE_REQ_FMT0("MDS_HSM_CT_UNREGISTER", mdt_hsm_ct_unregister, empty);
-EXPORT_SYMBOL(RQF_MDS_HSM_CT_UNREGISTER);
-
-struct req_format RQF_MDS_HSM_STATE_GET =
- DEFINE_REQ_FMT0("MDS_HSM_STATE_GET",
- mdt_body_capa, mdt_hsm_state_get_server);
-EXPORT_SYMBOL(RQF_MDS_HSM_STATE_GET);
-
-struct req_format RQF_MDS_HSM_STATE_SET =
- DEFINE_REQ_FMT0("MDS_HSM_STATE_SET", mdt_hsm_state_set, empty);
-EXPORT_SYMBOL(RQF_MDS_HSM_STATE_SET);
-
-struct req_format RQF_MDS_HSM_REQUEST =
- DEFINE_REQ_FMT0("MDS_HSM_REQUEST", mdt_hsm_request, empty);
-EXPORT_SYMBOL(RQF_MDS_HSM_REQUEST);
-
-struct req_format RQF_MDS_SWAP_LAYOUTS =
- DEFINE_REQ_FMT0("MDS_SWAP_LAYOUTS",
- mdt_swap_layouts, empty);
-EXPORT_SYMBOL(RQF_MDS_SWAP_LAYOUTS);
-
-/* This is for split */
-struct req_format RQF_MDS_WRITEPAGE =
- DEFINE_REQ_FMT0("MDS_WRITEPAGE",
- mdt_body_capa, mdt_body_only);
-EXPORT_SYMBOL(RQF_MDS_WRITEPAGE);
-
-struct req_format RQF_LLOG_ORIGIN_HANDLE_CREATE =
- DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_CREATE",
- llog_origin_handle_create_client, llogd_body_only);
-EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_CREATE);
-
-struct req_format RQF_LLOG_ORIGIN_HANDLE_DESTROY =
- DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_DESTROY",
- llogd_body_only, llogd_body_only);
-EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_DESTROY);
-
-struct req_format RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK =
- DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_NEXT_BLOCK",
- llogd_body_only, llog_origin_handle_next_block_server);
-EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK);
-
-struct req_format RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK =
- DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_PREV_BLOCK",
- llogd_body_only, llog_origin_handle_next_block_server);
-EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK);
-
-struct req_format RQF_LLOG_ORIGIN_HANDLE_READ_HEADER =
- DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_READ_HEADER",
- llogd_body_only, llog_log_hdr_only);
-EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_READ_HEADER);
-
-struct req_format RQF_LLOG_ORIGIN_CONNECT =
- DEFINE_REQ_FMT0("LLOG_ORIGIN_CONNECT", llogd_conn_body_only, empty);
-EXPORT_SYMBOL(RQF_LLOG_ORIGIN_CONNECT);
-
-struct req_format RQF_CONNECT =
- DEFINE_REQ_FMT0("CONNECT", obd_connect_client, obd_connect_server);
-EXPORT_SYMBOL(RQF_CONNECT);
-
-struct req_format RQF_OST_CONNECT =
- DEFINE_REQ_FMT0("OST_CONNECT",
- obd_connect_client, obd_connect_server);
-EXPORT_SYMBOL(RQF_OST_CONNECT);
-
-struct req_format RQF_OST_DISCONNECT =
- DEFINE_REQ_FMT0("OST_DISCONNECT", empty, empty);
-EXPORT_SYMBOL(RQF_OST_DISCONNECT);
-
-struct req_format RQF_OST_GETATTR =
- DEFINE_REQ_FMT0("OST_GETATTR", ost_body_capa, ost_body_only);
-EXPORT_SYMBOL(RQF_OST_GETATTR);
-
-struct req_format RQF_OST_SETATTR =
- DEFINE_REQ_FMT0("OST_SETATTR", ost_body_capa, ost_body_only);
-EXPORT_SYMBOL(RQF_OST_SETATTR);
-
-struct req_format RQF_OST_CREATE =
- DEFINE_REQ_FMT0("OST_CREATE", ost_body_only, ost_body_only);
-EXPORT_SYMBOL(RQF_OST_CREATE);
-
-struct req_format RQF_OST_PUNCH =
- DEFINE_REQ_FMT0("OST_PUNCH", ost_body_capa, ost_body_only);
-EXPORT_SYMBOL(RQF_OST_PUNCH);
-
-struct req_format RQF_OST_SYNC =
- DEFINE_REQ_FMT0("OST_SYNC", ost_body_capa, ost_body_only);
-EXPORT_SYMBOL(RQF_OST_SYNC);
-
-struct req_format RQF_OST_DESTROY =
- DEFINE_REQ_FMT0("OST_DESTROY", ost_destroy_client, ost_body_only);
-EXPORT_SYMBOL(RQF_OST_DESTROY);
-
-struct req_format RQF_OST_BRW_READ =
- DEFINE_REQ_FMT0("OST_BRW_READ", ost_brw_client, ost_brw_read_server);
-EXPORT_SYMBOL(RQF_OST_BRW_READ);
-
-struct req_format RQF_OST_BRW_WRITE =
- DEFINE_REQ_FMT0("OST_BRW_WRITE", ost_brw_client, ost_brw_write_server);
-EXPORT_SYMBOL(RQF_OST_BRW_WRITE);
-
-struct req_format RQF_OST_STATFS =
- DEFINE_REQ_FMT0("OST_STATFS", empty, obd_statfs_server);
-EXPORT_SYMBOL(RQF_OST_STATFS);
-
-struct req_format RQF_OST_SET_GRANT_INFO =
- DEFINE_REQ_FMT0("OST_SET_GRANT_INFO", ost_grant_shrink_client,
- ost_body_only);
-EXPORT_SYMBOL(RQF_OST_SET_GRANT_INFO);
-
-struct req_format RQF_OST_GET_INFO =
- DEFINE_REQ_FMT0("OST_GET_INFO", ost_get_info_generic_client,
- ost_get_info_generic_server);
-EXPORT_SYMBOL(RQF_OST_GET_INFO);
-
-struct req_format RQF_OST_GET_INFO_LAST_ID =
- DEFINE_REQ_FMT0("OST_GET_INFO_LAST_ID", ost_get_info_generic_client,
- ost_get_last_id_server);
-EXPORT_SYMBOL(RQF_OST_GET_INFO_LAST_ID);
-
-struct req_format RQF_OST_GET_INFO_LAST_FID =
- DEFINE_REQ_FMT0("OST_GET_INFO_LAST_FID", ost_get_last_fid_client,
- ost_get_last_fid_server);
-EXPORT_SYMBOL(RQF_OST_GET_INFO_LAST_FID);
-
-struct req_format RQF_OST_SET_INFO_LAST_FID =
- DEFINE_REQ_FMT0("OST_SET_INFO_LAST_FID", obd_set_info_client,
- empty);
-EXPORT_SYMBOL(RQF_OST_SET_INFO_LAST_FID);
-
-struct req_format RQF_OST_GET_INFO_FIEMAP =
- DEFINE_REQ_FMT0("OST_GET_INFO_FIEMAP", ost_get_fiemap_client,
- ost_get_fiemap_server);
-EXPORT_SYMBOL(RQF_OST_GET_INFO_FIEMAP);
-
-/* Convenience macro */
-#define FMT_FIELD(fmt, i, j) ((fmt)->rf_fields[(i)].d[(j)])
-
-/**
- * Initializes the capsule abstraction by computing and setting the \a rf_idx
- * field of RQFs and the \a rmf_offset field of RMFs.
- */
-int req_layout_init(void)
-{
- size_t i;
- size_t j;
- size_t k;
- struct req_format *rf = NULL;
-
- for (i = 0; i < ARRAY_SIZE(req_formats); ++i) {
- rf = req_formats[i];
- rf->rf_idx = i;
- for (j = 0; j < RCL_NR; ++j) {
- LASSERT(rf->rf_fields[j].nr <= REQ_MAX_FIELD_NR);
- for (k = 0; k < rf->rf_fields[j].nr; ++k) {
- struct req_msg_field *field;
-
- field = (typeof(field))rf->rf_fields[j].d[k];
- LASSERT(!(field->rmf_flags & RMF_F_STRUCT_ARRAY)
- || field->rmf_size > 0);
- LASSERT(field->rmf_offset[i][j] == 0);
- /*
- * k + 1 to detect unused format/field
- * combinations.
- */
- field->rmf_offset[i][j] = k + 1;
- }
- }
- }
- return 0;
-}
-EXPORT_SYMBOL(req_layout_init);
-
-void req_layout_fini(void)
-{
-}
-EXPORT_SYMBOL(req_layout_fini);
-
-/**
- * Initializes the expected sizes of each RMF in a \a pill (\a rc_area) to -1.
- *
- * Actual/expected field sizes are set elsewhere in functions in this file:
- * req_capsule_init(), req_capsule_server_pack(), req_capsule_set_size() and
- * req_capsule_msg_size(). The \a rc_area information is used by.
- * ptlrpc_request_set_replen().
- */
-static void req_capsule_init_area(struct req_capsule *pill)
-{
- size_t i;
-
- for (i = 0; i < ARRAY_SIZE(pill->rc_area[RCL_CLIENT]); i++) {
- pill->rc_area[RCL_CLIENT][i] = -1;
- pill->rc_area[RCL_SERVER][i] = -1;
- }
-}
-
-/**
- * Initialize a pill.
- *
- * The \a location indicates whether the caller is executing on the client side
- * (RCL_CLIENT) or server side (RCL_SERVER)..
- */
-void req_capsule_init(struct req_capsule *pill,
- struct ptlrpc_request *req,
- enum req_location location)
-{
- LASSERT(location == RCL_SERVER || location == RCL_CLIENT);
-
- /*
- * Today all capsules are embedded in ptlrpc_request structs,
- * but just in case that ever isn't the case, we don't reach
- * into req unless req != NULL and pill is the one embedded in
- * the req.
- *
- * The req->rq_pill_init flag makes it safe to initialize a pill
- * twice, which might happen in the OST paths as a result of the
- * high-priority RPC queue getting peeked at before ost_handle()
- * handles an OST RPC.
- */
- if (req && pill == &req->rq_pill && req->rq_pill_init)
- return;
-
- memset(pill, 0, sizeof(*pill));
- pill->rc_req = req;
- pill->rc_loc = location;
- req_capsule_init_area(pill);
-
- if (req && pill == &req->rq_pill)
- req->rq_pill_init = 1;
-}
-EXPORT_SYMBOL(req_capsule_init);
-
-void req_capsule_fini(struct req_capsule *pill)
-{
-}
-EXPORT_SYMBOL(req_capsule_fini);
-
-static int __req_format_is_sane(const struct req_format *fmt)
-{
- return fmt->rf_idx < ARRAY_SIZE(req_formats) &&
- req_formats[fmt->rf_idx] == fmt;
-}
-
-static struct lustre_msg *__req_msg(const struct req_capsule *pill,
- enum req_location loc)
-{
- struct ptlrpc_request *req;
-
- req = pill->rc_req;
- return loc == RCL_CLIENT ? req->rq_reqmsg : req->rq_repmsg;
-}
-
-/**
- * Set the format (\a fmt) of a \a pill; format changes are not allowed here
- * (see req_capsule_extend()).
- */
-void req_capsule_set(struct req_capsule *pill, const struct req_format *fmt)
-{
- LASSERT(!pill->rc_fmt || pill->rc_fmt == fmt);
- LASSERT(__req_format_is_sane(fmt));
-
- pill->rc_fmt = fmt;
-}
-EXPORT_SYMBOL(req_capsule_set);
-
-/**
- * Fills in any parts of the \a rc_area of a \a pill that haven't been filled in
- * yet.
-
- * \a rc_area is an array of REQ_MAX_FIELD_NR elements, used to store sizes of
- * variable-sized fields. The field sizes come from the declared \a rmf_size
- * field of a \a pill's \a rc_fmt's RMF's.
- */
-size_t req_capsule_filled_sizes(struct req_capsule *pill,
- enum req_location loc)
-{
- const struct req_format *fmt = pill->rc_fmt;
- size_t i;
-
- for (i = 0; i < fmt->rf_fields[loc].nr; ++i) {
- if (pill->rc_area[loc][i] == -1) {
- pill->rc_area[loc][i] =
- fmt->rf_fields[loc].d[i]->rmf_size;
- if (pill->rc_area[loc][i] == -1) {
- /*
- * Skip the following fields.
- *
- * If this LASSERT() trips then you're missing a
- * call to req_capsule_set_size().
- */
- LASSERT(loc != RCL_SERVER);
- break;
- }
- }
- }
- return i;
-}
-EXPORT_SYMBOL(req_capsule_filled_sizes);
-
-/**
- * Capsule equivalent of lustre_pack_request() and lustre_pack_reply().
- *
- * This function uses the \a pill's \a rc_area as filled in by
- * req_capsule_set_size() or req_capsule_filled_sizes() (the latter is called by
- * this function).
- */
-int req_capsule_server_pack(struct req_capsule *pill)
-{
- const struct req_format *fmt;
- int count;
- int rc;
-
- LASSERT(pill->rc_loc == RCL_SERVER);
- fmt = pill->rc_fmt;
- LASSERT(fmt);
-
- count = req_capsule_filled_sizes(pill, RCL_SERVER);
- rc = lustre_pack_reply(pill->rc_req, count,
- pill->rc_area[RCL_SERVER], NULL);
- if (rc != 0) {
- DEBUG_REQ(D_ERROR, pill->rc_req,
- "Cannot pack %d fields in format `%s': ",
- count, fmt->rf_name);
- }
- return rc;
-}
-EXPORT_SYMBOL(req_capsule_server_pack);
-
-/**
- * Returns the PTLRPC request or reply (\a loc) buffer offset of a \a pill
- * corresponding to the given RMF (\a field).
- */
-static u32 __req_capsule_offset(const struct req_capsule *pill,
- const struct req_msg_field *field,
- enum req_location loc)
-{
- u32 offset;
-
- offset = field->rmf_offset[pill->rc_fmt->rf_idx][loc];
- LASSERTF(offset > 0, "%s:%s, off=%d, loc=%d\n", pill->rc_fmt->rf_name,
- field->rmf_name, offset, loc);
- offset--;
-
- LASSERT(offset < REQ_MAX_FIELD_NR);
- return offset;
-}
-
-/**
- * Helper for __req_capsule_get(); swabs value / array of values and/or dumps
- * them if desired.
- */
-static
-void
-swabber_dumper_helper(struct req_capsule *pill,
- const struct req_msg_field *field,
- enum req_location loc,
- int offset,
- void *value, int len, int dump, void (*swabber)(void *))
-{
- void *p;
- int i;
- int n;
- int do_swab;
- int inout = loc == RCL_CLIENT;
-
- swabber = swabber ?: field->rmf_swabber;
-
- if (ptlrpc_buf_need_swab(pill->rc_req, inout, offset) &&
- swabber && value)
- do_swab = 1;
- else
- do_swab = 0;
-
- if (!field->rmf_dumper)
- dump = 0;
-
- if (!(field->rmf_flags & RMF_F_STRUCT_ARRAY)) {
- if (dump) {
- CDEBUG(D_RPCTRACE, "Dump of %sfield %s follows\n",
- do_swab ? "unswabbed " : "", field->rmf_name);
- field->rmf_dumper(value);
- }
- if (!do_swab)
- return;
- swabber(value);
- ptlrpc_buf_set_swabbed(pill->rc_req, inout, offset);
- if (dump && field->rmf_dumper) {
- CDEBUG(D_RPCTRACE, "Dump of swabbed field %s follows\n",
- field->rmf_name);
- field->rmf_dumper(value);
- }
-
- return;
- }
-
- /*
- * We're swabbing an array; swabber() swabs a single array element, so
- * swab every element.
- */
- LASSERT((len % field->rmf_size) == 0);
- for (p = value, i = 0, n = len / field->rmf_size;
- i < n;
- i++, p += field->rmf_size) {
- if (dump) {
- CDEBUG(D_RPCTRACE, "Dump of %sarray field %s, element %d follows\n",
- do_swab ? "unswabbed " : "", field->rmf_name, i);
- field->rmf_dumper(p);
- }
- if (!do_swab)
- continue;
- swabber(p);
- if (dump) {
- CDEBUG(D_RPCTRACE, "Dump of swabbed array field %s, element %d follows\n",
- field->rmf_name, i);
- field->rmf_dumper(value);
- }
- }
- if (do_swab)
- ptlrpc_buf_set_swabbed(pill->rc_req, inout, offset);
-}
-
-/**
- * Returns the pointer to a PTLRPC request or reply (\a loc) buffer of a \a pill
- * corresponding to the given RMF (\a field).
- *
- * The buffer will be swabbed using the given \a swabber. If \a swabber == NULL
- * then the \a rmf_swabber from the RMF will be used. Soon there will be no
- * calls to __req_capsule_get() with a non-NULL \a swabber; \a swabber will then
- * be removed. Fields with the \a RMF_F_STRUCT_ARRAY flag set will have each
- * element of the array swabbed.
- */
-static void *__req_capsule_get(struct req_capsule *pill,
- const struct req_msg_field *field,
- enum req_location loc,
- void (*swabber)(void *),
- int dump)
-{
- const struct req_format *fmt;
- struct lustre_msg *msg;
- void *value;
- u32 len;
- u32 offset;
-
- void *(*getter)(struct lustre_msg *m, u32 n, u32 minlen);
-
- static const char *rcl_names[RCL_NR] = {
- [RCL_CLIENT] = "client",
- [RCL_SERVER] = "server"
- };
-
- fmt = pill->rc_fmt;
- LASSERT(fmt);
- LASSERT(fmt != LP_POISON);
- LASSERT(__req_format_is_sane(fmt));
-
- offset = __req_capsule_offset(pill, field, loc);
-
- msg = __req_msg(pill, loc);
- LASSERT(msg);
-
- getter = (field->rmf_flags & RMF_F_STRING) ?
- (typeof(getter))lustre_msg_string : lustre_msg_buf;
-
- if (field->rmf_flags & (RMF_F_STRUCT_ARRAY | RMF_F_NO_SIZE_CHECK)) {
- /*
- * We've already asserted that field->rmf_size > 0 in
- * req_layout_init().
- */
- len = lustre_msg_buflen(msg, offset);
- if (!(field->rmf_flags & RMF_F_NO_SIZE_CHECK) &&
- (len % field->rmf_size)) {
- CERROR("%s: array field size mismatch %d modulo %u != 0 (%d)\n",
- field->rmf_name, len, field->rmf_size, loc);
- return NULL;
- }
- } else if (pill->rc_area[loc][offset] != -1) {
- len = pill->rc_area[loc][offset];
- } else {
- len = max_t(typeof(field->rmf_size), field->rmf_size, 0);
- }
- value = getter(msg, offset, len);
-
- if (!value) {
- DEBUG_REQ(D_ERROR, pill->rc_req,
- "Wrong buffer for field `%s' (%u of %u) in format `%s': %u vs. %u (%s)\n",
- field->rmf_name, offset, lustre_msg_bufcount(msg),
- fmt->rf_name, lustre_msg_buflen(msg, offset), len,
- rcl_names[loc]);
- } else {
- swabber_dumper_helper(pill, field, loc, offset, value, len,
- dump, swabber);
- }
-
- return value;
-}
-
-/**
- * Trivial wrapper around __req_capsule_get(), that returns the PTLRPC request
- * buffer corresponding to the given RMF (\a field) of a \a pill.
- */
-void *req_capsule_client_get(struct req_capsule *pill,
- const struct req_msg_field *field)
-{
- return __req_capsule_get(pill, field, RCL_CLIENT, NULL, 0);
-}
-EXPORT_SYMBOL(req_capsule_client_get);
-
-/**
- * Same as req_capsule_client_get(), but with a \a swabber argument.
- *
- * Currently unused; will be removed when req_capsule_server_swab_get() is
- * unused too.
- */
-void *req_capsule_client_swab_get(struct req_capsule *pill,
- const struct req_msg_field *field,
- void *swabber)
-{
- return __req_capsule_get(pill, field, RCL_CLIENT, swabber, 0);
-}
-EXPORT_SYMBOL(req_capsule_client_swab_get);
-
-/**
- * Utility that combines req_capsule_set_size() and req_capsule_client_get().
- *
- * First the \a pill's request \a field's size is set (\a rc_area) using
- * req_capsule_set_size() with the given \a len. Then the actual buffer is
- * returned.
- */
-void *req_capsule_client_sized_get(struct req_capsule *pill,
- const struct req_msg_field *field,
- u32 len)
-{
- req_capsule_set_size(pill, field, RCL_CLIENT, len);
- return __req_capsule_get(pill, field, RCL_CLIENT, NULL, 0);
-}
-EXPORT_SYMBOL(req_capsule_client_sized_get);
-
-/**
- * Trivial wrapper around __req_capsule_get(), that returns the PTLRPC reply
- * buffer corresponding to the given RMF (\a field) of a \a pill.
- */
-void *req_capsule_server_get(struct req_capsule *pill,
- const struct req_msg_field *field)
-{
- return __req_capsule_get(pill, field, RCL_SERVER, NULL, 0);
-}
-EXPORT_SYMBOL(req_capsule_server_get);
-
-/**
- * Same as req_capsule_server_get(), but with a \a swabber argument.
- *
- * Ideally all swabbing should be done pursuant to RMF definitions, with no
- * swabbing done outside this capsule abstraction.
- */
-void *req_capsule_server_swab_get(struct req_capsule *pill,
- const struct req_msg_field *field,
- void *swabber)
-{
- return __req_capsule_get(pill, field, RCL_SERVER, swabber, 0);
-}
-EXPORT_SYMBOL(req_capsule_server_swab_get);
-
-/**
- * Utility that combines req_capsule_set_size() and req_capsule_server_get().
- *
- * First the \a pill's request \a field's size is set (\a rc_area) using
- * req_capsule_set_size() with the given \a len. Then the actual buffer is
- * returned.
- */
-void *req_capsule_server_sized_get(struct req_capsule *pill,
- const struct req_msg_field *field,
- u32 len)
-{
- req_capsule_set_size(pill, field, RCL_SERVER, len);
- return __req_capsule_get(pill, field, RCL_SERVER, NULL, 0);
-}
-EXPORT_SYMBOL(req_capsule_server_sized_get);
-
-void *req_capsule_server_sized_swab_get(struct req_capsule *pill,
- const struct req_msg_field *field,
- u32 len, void *swabber)
-{
- req_capsule_set_size(pill, field, RCL_SERVER, len);
- return __req_capsule_get(pill, field, RCL_SERVER, swabber, 0);
-}
-EXPORT_SYMBOL(req_capsule_server_sized_swab_get);
-
-/**
- * Set the size of the PTLRPC request/reply (\a loc) buffer for the given \a
- * field of the given \a pill.
- *
- * This function must be used when constructing variable sized fields of a
- * request or reply.
- */
-void req_capsule_set_size(struct req_capsule *pill,
- const struct req_msg_field *field,
- enum req_location loc, u32 size)
-{
- LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT);
-
- if ((size != (u32)field->rmf_size) &&
- (field->rmf_size != -1) &&
- !(field->rmf_flags & RMF_F_NO_SIZE_CHECK) &&
- (size > 0)) {
- u32 rmf_size = (u32)field->rmf_size;
-
- if ((field->rmf_flags & RMF_F_STRUCT_ARRAY) &&
- (size % rmf_size != 0)) {
- CERROR("%s: array field size mismatch %u %% %u != 0 (%d)\n",
- field->rmf_name, size, rmf_size, loc);
- LBUG();
- } else if (!(field->rmf_flags & RMF_F_STRUCT_ARRAY) &&
- size < rmf_size) {
- CERROR("%s: field size mismatch %u != %u (%d)\n",
- field->rmf_name, size, rmf_size, loc);
- LBUG();
- }
- }
-
- pill->rc_area[loc][__req_capsule_offset(pill, field, loc)] = size;
-}
-EXPORT_SYMBOL(req_capsule_set_size);
-
-/**
- * Return the actual PTLRPC buffer length of a request or reply (\a loc)
- * for the given \a pill's given \a field.
- *
- * NB: this function doesn't correspond with req_capsule_set_size(), which
- * actually sets the size in pill.rc_area[loc][offset], but this function
- * returns the message buflen[offset], maybe we should use another name.
- */
-u32 req_capsule_get_size(const struct req_capsule *pill,
- const struct req_msg_field *field,
- enum req_location loc)
-{
- LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT);
-
- return lustre_msg_buflen(__req_msg(pill, loc),
- __req_capsule_offset(pill, field, loc));
-}
-EXPORT_SYMBOL(req_capsule_get_size);
-
-/**
- * Wrapper around lustre_msg_size() that returns the PTLRPC size needed for the
- * given \a pill's request or reply (\a loc) given the field size recorded in
- * the \a pill's rc_area.
- *
- * See also req_capsule_set_size().
- */
-u32 req_capsule_msg_size(struct req_capsule *pill, enum req_location loc)
-{
- return lustre_msg_size(pill->rc_req->rq_import->imp_msg_magic,
- pill->rc_fmt->rf_fields[loc].nr,
- pill->rc_area[loc]);
-}
-
-/**
- * While req_capsule_msg_size() computes the size of a PTLRPC request or reply
- * (\a loc) given a \a pill's \a rc_area, this function computes the size of a
- * PTLRPC request or reply given only an RQF (\a fmt).
- *
- * This function should not be used for formats which contain variable size
- * fields.
- */
-u32 req_capsule_fmt_size(__u32 magic, const struct req_format *fmt,
- enum req_location loc)
-{
- size_t i = 0;
- u32 size;
-
- /*
- * This function should probably LASSERT() that fmt has no fields with
- * RMF_F_STRUCT_ARRAY in rmf_flags, since we can't know here how many
- * elements in the array there will ultimately be, but then, we could
- * assume that there will be at least one element, and that's just what
- * we do.
- */
- size = lustre_msg_hdr_size(magic, fmt->rf_fields[loc].nr);
- if (!size)
- return size;
-
- for (; i < fmt->rf_fields[loc].nr; ++i)
- if (fmt->rf_fields[loc].d[i]->rmf_size != -1)
- size += cfs_size_round(fmt->rf_fields[loc].d[i]->
- rmf_size);
- return size;
-}
-
-/**
- * Changes the format of an RPC.
- *
- * The pill must already have been initialized, which means that it already has
- * a request format. The new format \a fmt must be an extension of the pill's
- * old format. Specifically: the new format must have as many request and reply
- * fields as the old one, and all fields shared by the old and new format must
- * be at least as large in the new format.
- *
- * The new format's fields may be of different "type" than the old format, but
- * only for fields that are "opaque" blobs: fields which have a) have no
- * \a rmf_swabber, b) \a rmf_flags == 0 or RMF_F_NO_SIZE_CHECK, and c) \a
- * rmf_size == -1 or \a rmf_flags == RMF_F_NO_SIZE_CHECK. For example,
- * OBD_SET_INFO has a key field and an opaque value field that gets interpreted
- * according to the key field. When the value, according to the key, contains a
- * structure (or array thereof) to be swabbed, the format should be changed to
- * one where the value field has \a rmf_size/rmf_flags/rmf_swabber set
- * accordingly.
- */
-void req_capsule_extend(struct req_capsule *pill, const struct req_format *fmt)
-{
- int i;
- size_t j;
-
- const struct req_format *old;
-
- LASSERT(pill->rc_fmt);
- LASSERT(__req_format_is_sane(fmt));
-
- old = pill->rc_fmt;
- /*
- * Sanity checking...
- */
- for (i = 0; i < RCL_NR; ++i) {
- LASSERT(fmt->rf_fields[i].nr >= old->rf_fields[i].nr);
- for (j = 0; j < old->rf_fields[i].nr - 1; ++j) {
- const struct req_msg_field *ofield = FMT_FIELD(old, i, j);
-
- /* "opaque" fields can be transmogrified */
- if (!ofield->rmf_swabber &&
- (ofield->rmf_flags & ~RMF_F_NO_SIZE_CHECK) == 0 &&
- (ofield->rmf_size == -1 ||
- ofield->rmf_flags == RMF_F_NO_SIZE_CHECK))
- continue;
- LASSERT(FMT_FIELD(fmt, i, j) == FMT_FIELD(old, i, j));
- }
- /*
- * Last field in old format can be shorter than in new.
- */
- LASSERT(FMT_FIELD(fmt, i, j)->rmf_size >=
- FMT_FIELD(old, i, j)->rmf_size);
- }
-
- pill->rc_fmt = fmt;
-}
-EXPORT_SYMBOL(req_capsule_extend);
-
-/**
- * This function returns a non-zero value if the given \a field is present in
- * the format (\a rc_fmt) of \a pill's PTLRPC request or reply (\a loc), else it
- * returns 0.
- */
-int req_capsule_has_field(const struct req_capsule *pill,
- const struct req_msg_field *field,
- enum req_location loc)
-{
- LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT);
-
- return field->rmf_offset[pill->rc_fmt->rf_idx][loc];
-}
-EXPORT_SYMBOL(req_capsule_has_field);
-
-/**
- * Returns a non-zero value if the given \a field is present in the given \a
- * pill's PTLRPC request or reply (\a loc), else it returns 0.
- */
-static int req_capsule_field_present(const struct req_capsule *pill,
- const struct req_msg_field *field,
- enum req_location loc)
-{
- u32 offset;
-
- LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT);
- LASSERT(req_capsule_has_field(pill, field, loc));
-
- offset = __req_capsule_offset(pill, field, loc);
- return lustre_msg_bufcount(__req_msg(pill, loc)) > offset;
-}
-
-/**
- * This function shrinks the size of the _buffer_ of the \a pill's PTLRPC
- * request or reply (\a loc).
- *
- * This is not the opposite of req_capsule_extend().
- */
-void req_capsule_shrink(struct req_capsule *pill,
- const struct req_msg_field *field,
- u32 newlen, enum req_location loc)
-{
- const struct req_format *fmt;
- struct lustre_msg *msg;
- u32 len;
- int offset;
-
- fmt = pill->rc_fmt;
- LASSERT(fmt);
- LASSERT(__req_format_is_sane(fmt));
- LASSERT(req_capsule_has_field(pill, field, loc));
- LASSERT(req_capsule_field_present(pill, field, loc));
-
- offset = __req_capsule_offset(pill, field, loc);
-
- msg = __req_msg(pill, loc);
- len = lustre_msg_buflen(msg, offset);
- LASSERTF(newlen <= len, "%s:%s, oldlen=%u, newlen=%u\n",
- fmt->rf_name, field->rmf_name, len, newlen);
-
- if (loc == RCL_CLIENT)
- pill->rc_req->rq_reqlen = lustre_shrink_msg(msg, offset, newlen,
- 1);
- else
- pill->rc_req->rq_replen = lustre_shrink_msg(msg, offset, newlen,
- 1);
-}
-EXPORT_SYMBOL(req_capsule_shrink);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_client.c b/drivers/staging/lustre/lustre/ptlrpc/llog_client.c
deleted file mode 100644
index 946d538121de..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/llog_client.c
+++ /dev/null
@@ -1,338 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/ptlrpc/llog_client.c
- *
- * remote api for llog - client side
- *
- * Author: Andreas Dilger <adilger@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LOG
-
-#include <obd_class.h>
-#include <lustre_log.h>
-#include <lustre_net.h>
-#include <linux/list.h>
-
-#define LLOG_CLIENT_ENTRY(ctxt, imp) do { \
- mutex_lock(&ctxt->loc_mutex); \
- if (ctxt->loc_imp) { \
- imp = class_import_get(ctxt->loc_imp); \
- } else { \
- CERROR("ctxt->loc_imp == NULL for context idx %d." \
- "Unable to complete MDS/OSS recovery," \
- "but I'll try again next time. Not fatal.\n", \
- ctxt->loc_idx); \
- imp = NULL; \
- mutex_unlock(&ctxt->loc_mutex); \
- return (-EINVAL); \
- } \
- mutex_unlock(&ctxt->loc_mutex); \
-} while (0)
-
-#define LLOG_CLIENT_EXIT(ctxt, imp) do { \
- mutex_lock(&ctxt->loc_mutex); \
- if (ctxt->loc_imp != imp) \
- CWARN("loc_imp has changed from %p to %p\n", \
- ctxt->loc_imp, imp); \
- class_import_put(imp); \
- mutex_unlock(&ctxt->loc_mutex); \
-} while (0)
-
-/* This is a callback from the llog_* functions.
- * Assumes caller has already pushed us into the kernel context.
- */
-static int llog_client_open(const struct lu_env *env,
- struct llog_handle *lgh, struct llog_logid *logid,
- char *name, enum llog_open_param open_param)
-{
- struct obd_import *imp;
- struct llogd_body *body;
- struct llog_ctxt *ctxt = lgh->lgh_ctxt;
- struct ptlrpc_request *req = NULL;
- int rc;
-
- LLOG_CLIENT_ENTRY(ctxt, imp);
-
- /* client cannot create llog */
- LASSERTF(open_param != LLOG_OPEN_NEW, "%#x\n", open_param);
- LASSERT(lgh);
-
- req = ptlrpc_request_alloc(imp, &RQF_LLOG_ORIGIN_HANDLE_CREATE);
- if (!req) {
- rc = -ENOMEM;
- goto out;
- }
-
- if (name)
- req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
- strlen(name) + 1);
-
- rc = ptlrpc_request_pack(req, LUSTRE_LOG_VERSION,
- LLOG_ORIGIN_HANDLE_CREATE);
- if (rc) {
- ptlrpc_request_free(req);
- req = NULL;
- goto out;
- }
- ptlrpc_request_set_replen(req);
-
- body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
- if (logid)
- body->lgd_logid = *logid;
- body->lgd_ctxt_idx = ctxt->loc_idx - 1;
-
- if (name) {
- char *tmp;
-
- tmp = req_capsule_client_sized_get(&req->rq_pill, &RMF_NAME,
- strlen(name) + 1);
- LASSERT(tmp);
- strcpy(tmp, name);
- }
-
- rc = ptlrpc_queue_wait(req);
- if (rc)
- goto out;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_LLOGD_BODY);
- if (!body) {
- rc = -EFAULT;
- goto out;
- }
-
- lgh->lgh_id = body->lgd_logid;
- lgh->lgh_ctxt = ctxt;
-out:
- LLOG_CLIENT_EXIT(ctxt, imp);
- ptlrpc_req_finished(req);
- return rc;
-}
-
-static int llog_client_next_block(const struct lu_env *env,
- struct llog_handle *loghandle,
- int *cur_idx, int next_idx,
- __u64 *cur_offset, void *buf, int len)
-{
- struct obd_import *imp;
- struct ptlrpc_request *req = NULL;
- struct llogd_body *body;
- void *ptr;
- int rc;
-
- LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp);
- req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK,
- LUSTRE_LOG_VERSION,
- LLOG_ORIGIN_HANDLE_NEXT_BLOCK);
- if (!req) {
- rc = -ENOMEM;
- goto err_exit;
- }
-
- body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
- body->lgd_logid = loghandle->lgh_id;
- body->lgd_ctxt_idx = loghandle->lgh_ctxt->loc_idx - 1;
- body->lgd_llh_flags = loghandle->lgh_hdr->llh_flags;
- body->lgd_index = next_idx;
- body->lgd_saved_index = *cur_idx;
- body->lgd_len = len;
- body->lgd_cur_offset = *cur_offset;
-
- req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_SERVER, len);
- ptlrpc_request_set_replen(req);
- rc = ptlrpc_queue_wait(req);
- if (rc)
- goto out;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_LLOGD_BODY);
- if (!body) {
- rc = -EFAULT;
- goto out;
- }
-
- /* The log records are swabbed as they are processed */
- ptr = req_capsule_server_get(&req->rq_pill, &RMF_EADATA);
- if (!ptr) {
- rc = -EFAULT;
- goto out;
- }
-
- *cur_idx = body->lgd_saved_index;
- *cur_offset = body->lgd_cur_offset;
-
- memcpy(buf, ptr, len);
-out:
- ptlrpc_req_finished(req);
-err_exit:
- LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp);
- return rc;
-}
-
-static int llog_client_prev_block(const struct lu_env *env,
- struct llog_handle *loghandle,
- int prev_idx, void *buf, int len)
-{
- struct obd_import *imp;
- struct ptlrpc_request *req = NULL;
- struct llogd_body *body;
- void *ptr;
- int rc;
-
- LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp);
- req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK,
- LUSTRE_LOG_VERSION,
- LLOG_ORIGIN_HANDLE_PREV_BLOCK);
- if (!req) {
- rc = -ENOMEM;
- goto err_exit;
- }
-
- body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
- body->lgd_logid = loghandle->lgh_id;
- body->lgd_ctxt_idx = loghandle->lgh_ctxt->loc_idx - 1;
- body->lgd_llh_flags = loghandle->lgh_hdr->llh_flags;
- body->lgd_index = prev_idx;
- body->lgd_len = len;
-
- req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_SERVER, len);
- ptlrpc_request_set_replen(req);
-
- rc = ptlrpc_queue_wait(req);
- if (rc)
- goto out;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_LLOGD_BODY);
- if (!body) {
- rc = -EFAULT;
- goto out;
- }
-
- ptr = req_capsule_server_get(&req->rq_pill, &RMF_EADATA);
- if (!ptr) {
- rc = -EFAULT;
- goto out;
- }
-
- memcpy(buf, ptr, len);
-out:
- ptlrpc_req_finished(req);
-err_exit:
- LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp);
- return rc;
-}
-
-static int llog_client_read_header(const struct lu_env *env,
- struct llog_handle *handle)
-{
- struct obd_import *imp;
- struct ptlrpc_request *req = NULL;
- struct llogd_body *body;
- struct llog_log_hdr *hdr;
- struct llog_rec_hdr *llh_hdr;
- int rc;
-
- LLOG_CLIENT_ENTRY(handle->lgh_ctxt, imp);
- req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_READ_HEADER,
- LUSTRE_LOG_VERSION,
- LLOG_ORIGIN_HANDLE_READ_HEADER);
- if (!req) {
- rc = -ENOMEM;
- goto err_exit;
- }
-
- body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
- body->lgd_logid = handle->lgh_id;
- body->lgd_ctxt_idx = handle->lgh_ctxt->loc_idx - 1;
- body->lgd_llh_flags = handle->lgh_hdr->llh_flags;
-
- ptlrpc_request_set_replen(req);
- rc = ptlrpc_queue_wait(req);
- if (rc)
- goto out;
-
- hdr = req_capsule_server_get(&req->rq_pill, &RMF_LLOG_LOG_HDR);
- if (!hdr) {
- rc = -EFAULT;
- goto out;
- }
-
- if (handle->lgh_hdr_size < hdr->llh_hdr.lrh_len) {
- rc = -EFAULT;
- goto out;
- }
-
- memcpy(handle->lgh_hdr, hdr, hdr->llh_hdr.lrh_len);
- handle->lgh_last_idx = LLOG_HDR_TAIL(handle->lgh_hdr)->lrt_index;
-
- /* sanity checks */
- llh_hdr = &handle->lgh_hdr->llh_hdr;
- if (llh_hdr->lrh_type != LLOG_HDR_MAGIC) {
- CERROR("bad log header magic: %#x (expecting %#x)\n",
- llh_hdr->lrh_type, LLOG_HDR_MAGIC);
- rc = -EIO;
- } else if (llh_hdr->lrh_len !=
- LLOG_HDR_TAIL(handle->lgh_hdr)->lrt_len ||
- (llh_hdr->lrh_len & (llh_hdr->lrh_len - 1)) ||
- llh_hdr->lrh_len < LLOG_MIN_CHUNK_SIZE ||
- llh_hdr->lrh_len > handle->lgh_hdr_size) {
- CERROR("incorrectly sized log header: %#x (expecting %#x) (power of two > 8192)\n",
- llh_hdr->lrh_len,
- LLOG_HDR_TAIL(handle->lgh_hdr)->lrt_len);
- CERROR("you may need to re-run lconf --write_conf.\n");
- rc = -EIO;
- }
-out:
- ptlrpc_req_finished(req);
-err_exit:
- LLOG_CLIENT_EXIT(handle->lgh_ctxt, imp);
- return rc;
-}
-
-static int llog_client_close(const struct lu_env *env,
- struct llog_handle *handle)
-{
- /* this doesn't call LLOG_ORIGIN_HANDLE_CLOSE because
- * the servers all close the file at the end of every
- * other LLOG_ RPC.
- */
- return 0;
-}
-
-struct llog_operations llog_client_ops = {
- .lop_next_block = llog_client_next_block,
- .lop_prev_block = llog_client_prev_block,
- .lop_read_header = llog_client_read_header,
- .lop_open = llog_client_open,
- .lop_close = llog_client_close,
-};
-EXPORT_SYMBOL(llog_client_ops);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_net.c b/drivers/staging/lustre/lustre/ptlrpc/llog_net.c
deleted file mode 100644
index b871d9e40a9e..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/llog_net.c
+++ /dev/null
@@ -1,67 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/ptlrpc/llog_net.c
- *
- * OST<->MDS recovery logging infrastructure.
- *
- * Invariants in implementation:
- * - we do not share logs among different OST<->MDS connections, so that
- * if an OST or MDS fails it need only look at log(s) relevant to itself
- *
- * Author: Andreas Dilger <adilger@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LOG
-
-#include <obd_class.h>
-#include <lustre_log.h>
-#include <linux/list.h>
-
-int llog_initiator_connect(struct llog_ctxt *ctxt)
-{
- struct obd_import *new_imp;
-
- LASSERT(ctxt);
- new_imp = ctxt->loc_obd->u.cli.cl_import;
- LASSERTF(!ctxt->loc_imp || ctxt->loc_imp == new_imp,
- "%p - %p\n", ctxt->loc_imp, new_imp);
- mutex_lock(&ctxt->loc_mutex);
- if (ctxt->loc_imp != new_imp) {
- if (ctxt->loc_imp)
- class_import_put(ctxt->loc_imp);
- ctxt->loc_imp = class_import_get(new_imp);
- }
- mutex_unlock(&ctxt->loc_mutex);
- return 0;
-}
-EXPORT_SYMBOL(llog_initiator_connect);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
deleted file mode 100644
index 0b638837f88b..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
+++ /dev/null
@@ -1,1316 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-#define DEBUG_SUBSYSTEM S_CLASS
-
-#include <obd_support.h>
-#include <obd.h>
-#include <lprocfs_status.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-#include <lustre_net.h>
-#include <obd_class.h>
-#include "ptlrpc_internal.h"
-
-static struct ll_rpc_opcode {
- __u32 opcode;
- const char *opname;
-} ll_rpc_opcode_table[LUSTRE_MAX_OPCODES] = {
- { OST_REPLY, "ost_reply" },
- { OST_GETATTR, "ost_getattr" },
- { OST_SETATTR, "ost_setattr" },
- { OST_READ, "ost_read" },
- { OST_WRITE, "ost_write" },
- { OST_CREATE, "ost_create" },
- { OST_DESTROY, "ost_destroy" },
- { OST_GET_INFO, "ost_get_info" },
- { OST_CONNECT, "ost_connect" },
- { OST_DISCONNECT, "ost_disconnect" },
- { OST_PUNCH, "ost_punch" },
- { OST_OPEN, "ost_open" },
- { OST_CLOSE, "ost_close" },
- { OST_STATFS, "ost_statfs" },
- { 14, NULL }, /* formerly OST_SAN_READ */
- { 15, NULL }, /* formerly OST_SAN_WRITE */
- { OST_SYNC, "ost_sync" },
- { OST_SET_INFO, "ost_set_info" },
- { OST_QUOTACHECK, "ost_quotacheck" },
- { OST_QUOTACTL, "ost_quotactl" },
- { OST_QUOTA_ADJUST_QUNIT, "ost_quota_adjust_qunit" },
- { MDS_GETATTR, "mds_getattr" },
- { MDS_GETATTR_NAME, "mds_getattr_lock" },
- { MDS_CLOSE, "mds_close" },
- { MDS_REINT, "mds_reint" },
- { MDS_READPAGE, "mds_readpage" },
- { MDS_CONNECT, "mds_connect" },
- { MDS_DISCONNECT, "mds_disconnect" },
- { MDS_GETSTATUS, "mds_getstatus" },
- { MDS_STATFS, "mds_statfs" },
- { MDS_PIN, "mds_pin" },
- { MDS_UNPIN, "mds_unpin" },
- { MDS_SYNC, "mds_sync" },
- { MDS_DONE_WRITING, "mds_done_writing" },
- { MDS_SET_INFO, "mds_set_info" },
- { MDS_QUOTACHECK, "mds_quotacheck" },
- { MDS_QUOTACTL, "mds_quotactl" },
- { MDS_GETXATTR, "mds_getxattr" },
- { MDS_SETXATTR, "mds_setxattr" },
- { MDS_WRITEPAGE, "mds_writepage" },
- { MDS_IS_SUBDIR, "mds_is_subdir" },
- { MDS_GET_INFO, "mds_get_info" },
- { MDS_HSM_STATE_GET, "mds_hsm_state_get" },
- { MDS_HSM_STATE_SET, "mds_hsm_state_set" },
- { MDS_HSM_ACTION, "mds_hsm_action" },
- { MDS_HSM_PROGRESS, "mds_hsm_progress" },
- { MDS_HSM_REQUEST, "mds_hsm_request" },
- { MDS_HSM_CT_REGISTER, "mds_hsm_ct_register" },
- { MDS_HSM_CT_UNREGISTER, "mds_hsm_ct_unregister" },
- { MDS_SWAP_LAYOUTS, "mds_swap_layouts" },
- { LDLM_ENQUEUE, "ldlm_enqueue" },
- { LDLM_CONVERT, "ldlm_convert" },
- { LDLM_CANCEL, "ldlm_cancel" },
- { LDLM_BL_CALLBACK, "ldlm_bl_callback" },
- { LDLM_CP_CALLBACK, "ldlm_cp_callback" },
- { LDLM_GL_CALLBACK, "ldlm_gl_callback" },
- { LDLM_SET_INFO, "ldlm_set_info" },
- { MGS_CONNECT, "mgs_connect" },
- { MGS_DISCONNECT, "mgs_disconnect" },
- { MGS_EXCEPTION, "mgs_exception" },
- { MGS_TARGET_REG, "mgs_target_reg" },
- { MGS_TARGET_DEL, "mgs_target_del" },
- { MGS_SET_INFO, "mgs_set_info" },
- { MGS_CONFIG_READ, "mgs_config_read" },
- { OBD_PING, "obd_ping" },
- { OBD_LOG_CANCEL, "llog_cancel" },
- { OBD_QC_CALLBACK, "obd_quota_callback" },
- { OBD_IDX_READ, "dt_index_read" },
- { LLOG_ORIGIN_HANDLE_CREATE, "llog_origin_handle_open" },
- { LLOG_ORIGIN_HANDLE_NEXT_BLOCK, "llog_origin_handle_next_block" },
- { LLOG_ORIGIN_HANDLE_READ_HEADER, "llog_origin_handle_read_header" },
- { LLOG_ORIGIN_HANDLE_WRITE_REC, "llog_origin_handle_write_rec" },
- { LLOG_ORIGIN_HANDLE_CLOSE, "llog_origin_handle_close" },
- { LLOG_ORIGIN_CONNECT, "llog_origin_connect" },
- { LLOG_CATINFO, "llog_catinfo" },
- { LLOG_ORIGIN_HANDLE_PREV_BLOCK, "llog_origin_handle_prev_block" },
- { LLOG_ORIGIN_HANDLE_DESTROY, "llog_origin_handle_destroy" },
- { QUOTA_DQACQ, "quota_acquire" },
- { QUOTA_DQREL, "quota_release" },
- { SEQ_QUERY, "seq_query" },
- { SEC_CTX_INIT, "sec_ctx_init" },
- { SEC_CTX_INIT_CONT, "sec_ctx_init_cont" },
- { SEC_CTX_FINI, "sec_ctx_fini" },
- { FLD_QUERY, "fld_query" },
- { FLD_READ, "fld_read" },
-};
-
-static struct ll_eopcode {
- __u32 opcode;
- const char *opname;
-} ll_eopcode_table[EXTRA_LAST_OPC] = {
- { LDLM_GLIMPSE_ENQUEUE, "ldlm_glimpse_enqueue" },
- { LDLM_PLAIN_ENQUEUE, "ldlm_plain_enqueue" },
- { LDLM_EXTENT_ENQUEUE, "ldlm_extent_enqueue" },
- { LDLM_FLOCK_ENQUEUE, "ldlm_flock_enqueue" },
- { LDLM_IBITS_ENQUEUE, "ldlm_ibits_enqueue" },
- { MDS_REINT_SETATTR, "mds_reint_setattr" },
- { MDS_REINT_CREATE, "mds_reint_create" },
- { MDS_REINT_LINK, "mds_reint_link" },
- { MDS_REINT_UNLINK, "mds_reint_unlink" },
- { MDS_REINT_RENAME, "mds_reint_rename" },
- { MDS_REINT_OPEN, "mds_reint_open" },
- { MDS_REINT_SETXATTR, "mds_reint_setxattr" },
- { BRW_READ_BYTES, "read_bytes" },
- { BRW_WRITE_BYTES, "write_bytes" },
-};
-
-const char *ll_opcode2str(__u32 opcode)
-{
- /* When one of the assertions below fail, chances are that:
- * 1) A new opcode was added in include/lustre/lustre_idl.h,
- * but is missing from the table above.
- * or 2) The opcode space was renumbered or rearranged,
- * and the opcode_offset() function in
- * ptlrpc_internal.h needs to be modified.
- */
- __u32 offset = opcode_offset(opcode);
-
- LASSERTF(offset < LUSTRE_MAX_OPCODES,
- "offset %u >= LUSTRE_MAX_OPCODES %u\n",
- offset, LUSTRE_MAX_OPCODES);
- LASSERTF(ll_rpc_opcode_table[offset].opcode == opcode,
- "ll_rpc_opcode_table[%u].opcode %u != opcode %u\n",
- offset, ll_rpc_opcode_table[offset].opcode, opcode);
- return ll_rpc_opcode_table[offset].opname;
-}
-
-static const char *ll_eopcode2str(__u32 opcode)
-{
- LASSERT(ll_eopcode_table[opcode].opcode == opcode);
- return ll_eopcode_table[opcode].opname;
-}
-
-static void
-ptlrpc_ldebugfs_register(struct dentry *root, char *dir,
- char *name,
- struct dentry **debugfs_root_ret,
- struct lprocfs_stats **stats_ret)
-{
- struct dentry *svc_debugfs_entry;
- struct lprocfs_stats *svc_stats;
- int i;
- unsigned int svc_counter_config = LPROCFS_CNTR_AVGMINMAX |
- LPROCFS_CNTR_STDDEV;
-
- LASSERT(!*debugfs_root_ret);
- LASSERT(!*stats_ret);
-
- svc_stats = lprocfs_alloc_stats(EXTRA_MAX_OPCODES + LUSTRE_MAX_OPCODES,
- 0);
- if (!svc_stats)
- return;
-
- if (dir)
- svc_debugfs_entry = debugfs_create_dir(dir, root);
- else
- svc_debugfs_entry = root;
-
- lprocfs_counter_init(svc_stats, PTLRPC_REQWAIT_CNTR,
- svc_counter_config, "req_waittime", "usec");
- lprocfs_counter_init(svc_stats, PTLRPC_REQQDEPTH_CNTR,
- svc_counter_config, "req_qdepth", "reqs");
- lprocfs_counter_init(svc_stats, PTLRPC_REQACTIVE_CNTR,
- svc_counter_config, "req_active", "reqs");
- lprocfs_counter_init(svc_stats, PTLRPC_TIMEOUT,
- svc_counter_config, "req_timeout", "sec");
- lprocfs_counter_init(svc_stats, PTLRPC_REQBUF_AVAIL_CNTR,
- svc_counter_config, "reqbuf_avail", "bufs");
- for (i = 0; i < EXTRA_LAST_OPC; i++) {
- char *units;
-
- switch (i) {
- case BRW_WRITE_BYTES:
- case BRW_READ_BYTES:
- units = "bytes";
- break;
- default:
- units = "reqs";
- break;
- }
- lprocfs_counter_init(svc_stats, PTLRPC_LAST_CNTR + i,
- svc_counter_config,
- ll_eopcode2str(i), units);
- }
- for (i = 0; i < LUSTRE_MAX_OPCODES; i++) {
- __u32 opcode = ll_rpc_opcode_table[i].opcode;
-
- lprocfs_counter_init(svc_stats,
- EXTRA_MAX_OPCODES + i, svc_counter_config,
- ll_opcode2str(opcode), "usec");
- }
-
- debugfs_create_file("stats", 0644, svc_debugfs_entry, svc_stats,
- &lprocfs_stats_seq_fops);
- if (dir)
- *debugfs_root_ret = svc_debugfs_entry;
- *stats_ret = svc_stats;
-}
-
-static int
-ptlrpc_lprocfs_req_history_len_seq_show(struct seq_file *m, void *v)
-{
- struct ptlrpc_service *svc = m->private;
- struct ptlrpc_service_part *svcpt;
- int total = 0;
- int i;
-
- ptlrpc_service_for_each_part(svcpt, i, svc)
- total += svcpt->scp_hist_nrqbds;
-
- seq_printf(m, "%d\n", total);
- return 0;
-}
-
-LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_req_history_len);
-
-static int
-ptlrpc_lprocfs_req_history_max_seq_show(struct seq_file *m, void *n)
-{
- struct ptlrpc_service *svc = m->private;
- struct ptlrpc_service_part *svcpt;
- int total = 0;
- int i;
-
- ptlrpc_service_for_each_part(svcpt, i, svc)
- total += svc->srv_hist_nrqbds_cpt_max;
-
- seq_printf(m, "%d\n", total);
- return 0;
-}
-
-static ssize_t
-ptlrpc_lprocfs_req_history_max_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private;
- int bufpages;
- int val;
- int rc;
-
- rc = lprocfs_write_helper(buffer, count, &val);
- if (rc < 0)
- return rc;
-
- if (val < 0)
- return -ERANGE;
-
- /* This sanity check is more of an insanity check; we can still
- * hose a kernel by allowing the request history to grow too
- * far.
- */
- bufpages = (svc->srv_buf_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
- if (val > totalram_pages / (2 * bufpages))
- return -ERANGE;
-
- spin_lock(&svc->srv_lock);
-
- if (val == 0)
- svc->srv_hist_nrqbds_cpt_max = 0;
- else
- svc->srv_hist_nrqbds_cpt_max = max(1, (val / svc->srv_ncpts));
-
- spin_unlock(&svc->srv_lock);
-
- return count;
-}
-
-LPROC_SEQ_FOPS(ptlrpc_lprocfs_req_history_max);
-
-static ssize_t threads_min_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ptlrpc_service *svc = container_of(kobj, struct ptlrpc_service,
- srv_kobj);
-
- return sprintf(buf, "%d\n", svc->srv_nthrs_cpt_init * svc->srv_ncpts);
-}
-
-static ssize_t threads_min_store(struct kobject *kobj, struct attribute *attr,
- const char *buffer, size_t count)
-{
- struct ptlrpc_service *svc = container_of(kobj, struct ptlrpc_service,
- srv_kobj);
- unsigned long val;
- int rc = kstrtoul(buffer, 10, &val);
-
- if (rc < 0)
- return rc;
-
- if (val / svc->srv_ncpts < PTLRPC_NTHRS_INIT)
- return -ERANGE;
-
- spin_lock(&svc->srv_lock);
- if (val > svc->srv_nthrs_cpt_limit * svc->srv_ncpts) {
- spin_unlock(&svc->srv_lock);
- return -ERANGE;
- }
-
- svc->srv_nthrs_cpt_init = val / svc->srv_ncpts;
-
- spin_unlock(&svc->srv_lock);
-
- return count;
-}
-LUSTRE_RW_ATTR(threads_min);
-
-static ssize_t threads_started_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ptlrpc_service *svc = container_of(kobj, struct ptlrpc_service,
- srv_kobj);
- struct ptlrpc_service_part *svcpt;
- int total = 0;
- int i;
-
- ptlrpc_service_for_each_part(svcpt, i, svc)
- total += svcpt->scp_nthrs_running;
-
- return sprintf(buf, "%d\n", total);
-}
-LUSTRE_RO_ATTR(threads_started);
-
-static ssize_t threads_max_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ptlrpc_service *svc = container_of(kobj, struct ptlrpc_service,
- srv_kobj);
-
- return sprintf(buf, "%d\n", svc->srv_nthrs_cpt_limit * svc->srv_ncpts);
-}
-
-static ssize_t threads_max_store(struct kobject *kobj, struct attribute *attr,
- const char *buffer, size_t count)
-{
- struct ptlrpc_service *svc = container_of(kobj, struct ptlrpc_service,
- srv_kobj);
- unsigned long val;
- int rc = kstrtoul(buffer, 10, &val);
-
- if (rc < 0)
- return rc;
-
- if (val / svc->srv_ncpts < PTLRPC_NTHRS_INIT)
- return -ERANGE;
-
- spin_lock(&svc->srv_lock);
- if (val < svc->srv_nthrs_cpt_init * svc->srv_ncpts) {
- spin_unlock(&svc->srv_lock);
- return -ERANGE;
- }
-
- svc->srv_nthrs_cpt_limit = val / svc->srv_ncpts;
-
- spin_unlock(&svc->srv_lock);
-
- return count;
-}
-LUSTRE_RW_ATTR(threads_max);
-
-/**
- * \addtogoup nrs
- * @{
- */
-
-/**
- * Translates \e ptlrpc_nrs_pol_state values to human-readable strings.
- *
- * \param[in] state The policy state
- */
-static const char *nrs_state2str(enum ptlrpc_nrs_pol_state state)
-{
- switch (state) {
- default:
- LBUG();
- case NRS_POL_STATE_INVALID:
- return "invalid";
- case NRS_POL_STATE_STOPPED:
- return "stopped";
- case NRS_POL_STATE_STOPPING:
- return "stopping";
- case NRS_POL_STATE_STARTING:
- return "starting";
- case NRS_POL_STATE_STARTED:
- return "started";
- }
-}
-
-/**
- * Obtains status information for \a policy.
- *
- * Information is copied in \a info.
- *
- * \param[in] policy The policy
- * \param[out] info Holds returned status information
- */
-static void nrs_policy_get_info_locked(struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_pol_info *info)
-{
- assert_spin_locked(&policy->pol_nrs->nrs_lock);
-
- memcpy(info->pi_name, policy->pol_desc->pd_name, NRS_POL_NAME_MAX);
-
- info->pi_fallback = !!(policy->pol_flags & PTLRPC_NRS_FL_FALLBACK);
- info->pi_state = policy->pol_state;
- /**
- * XXX: These are accessed without holding
- * ptlrpc_service_part::scp_req_lock.
- */
- info->pi_req_queued = policy->pol_req_queued;
- info->pi_req_started = policy->pol_req_started;
-}
-
-/**
- * Reads and prints policy status information for all policies of a PTLRPC
- * service.
- */
-static int ptlrpc_lprocfs_nrs_seq_show(struct seq_file *m, void *n)
-{
- struct ptlrpc_service *svc = m->private;
- struct ptlrpc_service_part *svcpt;
- struct ptlrpc_nrs *nrs;
- struct ptlrpc_nrs_policy *policy;
- struct ptlrpc_nrs_pol_info *infos;
- struct ptlrpc_nrs_pol_info tmp;
- unsigned int num_pols;
- unsigned int pol_idx = 0;
- bool hp = false;
- int i;
- int rc = 0;
-
- /**
- * Serialize NRS core lprocfs operations with policy registration/
- * unregistration.
- */
- mutex_lock(&nrs_core.nrs_mutex);
-
- /**
- * Use the first service partition's regular NRS head in order to obtain
- * the number of policies registered with NRS heads of this service. All
- * service partitions will have the same number of policies.
- */
- nrs = nrs_svcpt2nrs(svc->srv_parts[0], false);
-
- spin_lock(&nrs->nrs_lock);
- num_pols = svc->srv_parts[0]->scp_nrs_reg.nrs_num_pols;
- spin_unlock(&nrs->nrs_lock);
-
- infos = kcalloc(num_pols, sizeof(*infos), GFP_NOFS);
- if (!infos) {
- rc = -ENOMEM;
- goto unlock;
- }
-again:
-
- ptlrpc_service_for_each_part(svcpt, i, svc) {
- nrs = nrs_svcpt2nrs(svcpt, hp);
- spin_lock(&nrs->nrs_lock);
-
- pol_idx = 0;
-
- list_for_each_entry(policy, &nrs->nrs_policy_list, pol_list) {
- LASSERT(pol_idx < num_pols);
-
- nrs_policy_get_info_locked(policy, &tmp);
- /**
- * Copy values when handling the first service
- * partition.
- */
- if (i == 0) {
- memcpy(infos[pol_idx].pi_name, tmp.pi_name,
- NRS_POL_NAME_MAX);
- memcpy(&infos[pol_idx].pi_state, &tmp.pi_state,
- sizeof(tmp.pi_state));
- infos[pol_idx].pi_fallback = tmp.pi_fallback;
- /**
- * For the rest of the service partitions
- * sanity-check the values we get.
- */
- } else {
- LASSERT(strncmp(infos[pol_idx].pi_name,
- tmp.pi_name,
- NRS_POL_NAME_MAX) == 0);
- /**
- * Not asserting ptlrpc_nrs_pol_info::pi_state,
- * because it may be different between
- * instances of the same policy in different
- * service partitions.
- */
- LASSERT(infos[pol_idx].pi_fallback ==
- tmp.pi_fallback);
- }
-
- infos[pol_idx].pi_req_queued += tmp.pi_req_queued;
- infos[pol_idx].pi_req_started += tmp.pi_req_started;
-
- pol_idx++;
- }
- spin_unlock(&nrs->nrs_lock);
- }
-
- /**
- * Policy status information output is in YAML format.
- * For example:
- *
- * regular_requests:
- * - name: fifo
- * state: started
- * fallback: yes
- * queued: 0
- * active: 0
- *
- * - name: crrn
- * state: started
- * fallback: no
- * queued: 2015
- * active: 384
- *
- * high_priority_requests:
- * - name: fifo
- * state: started
- * fallback: yes
- * queued: 0
- * active: 2
- *
- * - name: crrn
- * state: stopped
- * fallback: no
- * queued: 0
- * active: 0
- */
- seq_printf(m, "%s\n",
- !hp ? "\nregular_requests:" : "high_priority_requests:");
-
- for (pol_idx = 0; pol_idx < num_pols; pol_idx++) {
- seq_printf(m, " - name: %s\n"
- " state: %s\n"
- " fallback: %s\n"
- " queued: %-20d\n"
- " active: %-20d\n\n",
- infos[pol_idx].pi_name,
- nrs_state2str(infos[pol_idx].pi_state),
- infos[pol_idx].pi_fallback ? "yes" : "no",
- (int)infos[pol_idx].pi_req_queued,
- (int)infos[pol_idx].pi_req_started);
- }
-
- if (!hp && nrs_svc_has_hp(svc)) {
- memset(infos, 0, num_pols * sizeof(*infos));
-
- /**
- * Redo the processing for the service's HP NRS heads' policies.
- */
- hp = true;
- goto again;
- }
-
- kfree(infos);
-unlock:
- mutex_unlock(&nrs_core.nrs_mutex);
-
- return rc;
-}
-
-/**
- * The longest valid command string is the maximum policy name size, plus the
- * length of the " reg" substring
- */
-#define LPROCFS_NRS_WR_MAX_CMD (NRS_POL_NAME_MAX + sizeof(" reg") - 1)
-
-/**
- * Starts and stops a given policy on a PTLRPC service.
- *
- * Commands consist of the policy name, followed by an optional [reg|hp] token;
- * if the optional token is omitted, the operation is performed on both the
- * regular and high-priority (if the service has one) NRS head.
- */
-static ssize_t ptlrpc_lprocfs_nrs_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private;
- enum ptlrpc_nrs_queue_type queue = PTLRPC_NRS_QUEUE_BOTH;
- char *cmd;
- char *cmd_copy = NULL;
- char *token;
- int rc = 0;
-
- if (count >= LPROCFS_NRS_WR_MAX_CMD)
- return -EINVAL;
-
- cmd = kzalloc(LPROCFS_NRS_WR_MAX_CMD, GFP_NOFS);
- if (!cmd)
- return -ENOMEM;
- /**
- * strsep() modifies its argument, so keep a copy
- */
- cmd_copy = cmd;
-
- if (copy_from_user(cmd, buffer, count)) {
- rc = -EFAULT;
- goto out;
- }
-
- cmd[count] = '\0';
-
- token = strsep(&cmd, " ");
-
- if (strlen(token) > NRS_POL_NAME_MAX - 1) {
- rc = -EINVAL;
- goto out;
- }
-
- /**
- * No [reg|hp] token has been specified
- */
- if (!cmd)
- goto default_queue;
-
- /**
- * The second token is either NULL, or an optional [reg|hp] string
- */
- if (strcmp(cmd, "reg") == 0) {
- queue = PTLRPC_NRS_QUEUE_REG;
- } else if (strcmp(cmd, "hp") == 0) {
- queue = PTLRPC_NRS_QUEUE_HP;
- } else {
- rc = -EINVAL;
- goto out;
- }
-
-default_queue:
-
- if (queue == PTLRPC_NRS_QUEUE_HP && !nrs_svc_has_hp(svc)) {
- rc = -ENODEV;
- goto out;
- } else if (queue == PTLRPC_NRS_QUEUE_BOTH && !nrs_svc_has_hp(svc)) {
- queue = PTLRPC_NRS_QUEUE_REG;
- }
-
- /**
- * Serialize NRS core lprocfs operations with policy registration/
- * unregistration.
- */
- mutex_lock(&nrs_core.nrs_mutex);
-
- rc = ptlrpc_nrs_policy_control(svc, queue, token, PTLRPC_NRS_CTL_START,
- false, NULL);
-
- mutex_unlock(&nrs_core.nrs_mutex);
-out:
- kfree(cmd_copy);
-
- return rc < 0 ? rc : count;
-}
-
-LPROC_SEQ_FOPS(ptlrpc_lprocfs_nrs);
-
-/** @} nrs */
-
-struct ptlrpc_srh_iterator {
- int srhi_idx;
- __u64 srhi_seq;
- struct ptlrpc_request *srhi_req;
-};
-
-static int
-ptlrpc_lprocfs_svc_req_history_seek(struct ptlrpc_service_part *svcpt,
- struct ptlrpc_srh_iterator *srhi,
- __u64 seq)
-{
- struct list_head *e;
- struct ptlrpc_request *req;
-
- if (srhi->srhi_req && srhi->srhi_seq > svcpt->scp_hist_seq_culled &&
- srhi->srhi_seq <= seq) {
- /* If srhi_req was set previously, hasn't been culled and
- * we're searching for a seq on or after it (i.e. more
- * recent), search from it onwards.
- * Since the service history is LRU (i.e. culled reqs will
- * be near the head), we shouldn't have to do long
- * re-scans
- */
- LASSERTF(srhi->srhi_seq == srhi->srhi_req->rq_history_seq,
- "%s:%d: seek seq %llu, request seq %llu\n",
- svcpt->scp_service->srv_name, svcpt->scp_cpt,
- srhi->srhi_seq, srhi->srhi_req->rq_history_seq);
- LASSERTF(!list_empty(&svcpt->scp_hist_reqs),
- "%s:%d: seek offset %llu, request seq %llu, last culled %llu\n",
- svcpt->scp_service->srv_name, svcpt->scp_cpt,
- seq, srhi->srhi_seq, svcpt->scp_hist_seq_culled);
- e = &srhi->srhi_req->rq_history_list;
- } else {
- /* search from start */
- e = svcpt->scp_hist_reqs.next;
- }
-
- while (e != &svcpt->scp_hist_reqs) {
- req = list_entry(e, struct ptlrpc_request, rq_history_list);
-
- if (req->rq_history_seq >= seq) {
- srhi->srhi_seq = req->rq_history_seq;
- srhi->srhi_req = req;
- return 0;
- }
- e = e->next;
- }
-
- return -ENOENT;
-}
-
-/*
- * ptlrpc history sequence is used as "position" of seq_file, in some case,
- * seq_read() will increase "position" to indicate reading the next
- * element, however, low bits of history sequence are reserved for CPT id
- * (check the details from comments before ptlrpc_req_add_history), which
- * means seq_read() might change CPT id of history sequence and never
- * finish reading of requests on a CPT. To make it work, we have to shift
- * CPT id to high bits and timestamp to low bits, so seq_read() will only
- * increase timestamp which can correctly indicate the next position.
- */
-
-/* convert seq_file pos to cpt */
-#define PTLRPC_REQ_POS2CPT(svc, pos) \
- ((svc)->srv_cpt_bits == 0 ? 0 : \
- (__u64)(pos) >> (64 - (svc)->srv_cpt_bits))
-
-/* make up seq_file pos from cpt */
-#define PTLRPC_REQ_CPT2POS(svc, cpt) \
- ((svc)->srv_cpt_bits == 0 ? 0 : \
- (cpt) << (64 - (svc)->srv_cpt_bits))
-
-/* convert sequence to position */
-#define PTLRPC_REQ_SEQ2POS(svc, seq) \
- ((svc)->srv_cpt_bits == 0 ? (seq) : \
- ((seq) >> (svc)->srv_cpt_bits) | \
- ((seq) << (64 - (svc)->srv_cpt_bits)))
-
-/* convert position to sequence */
-#define PTLRPC_REQ_POS2SEQ(svc, pos) \
- ((svc)->srv_cpt_bits == 0 ? (pos) : \
- ((__u64)(pos) << (svc)->srv_cpt_bits) | \
- ((__u64)(pos) >> (64 - (svc)->srv_cpt_bits)))
-
-static void *
-ptlrpc_lprocfs_svc_req_history_start(struct seq_file *s, loff_t *pos)
-{
- struct ptlrpc_service *svc = s->private;
- struct ptlrpc_service_part *svcpt;
- struct ptlrpc_srh_iterator *srhi;
- unsigned int cpt;
- int rc;
- int i;
-
- if (sizeof(loff_t) != sizeof(__u64)) { /* can't support */
- CWARN("Failed to read request history because size of loff_t %d can't match size of u64\n",
- (int)sizeof(loff_t));
- return NULL;
- }
-
- srhi = kzalloc(sizeof(*srhi), GFP_NOFS);
- if (!srhi)
- return NULL;
-
- srhi->srhi_seq = 0;
- srhi->srhi_req = NULL;
-
- cpt = PTLRPC_REQ_POS2CPT(svc, *pos);
-
- ptlrpc_service_for_each_part(svcpt, i, svc) {
- if (i < cpt) /* skip */
- continue;
- if (i > cpt) /* make up the lowest position for this CPT */
- *pos = PTLRPC_REQ_CPT2POS(svc, i);
-
- spin_lock(&svcpt->scp_lock);
- rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi,
- PTLRPC_REQ_POS2SEQ(svc, *pos));
- spin_unlock(&svcpt->scp_lock);
- if (rc == 0) {
- *pos = PTLRPC_REQ_SEQ2POS(svc, srhi->srhi_seq);
- srhi->srhi_idx = i;
- return srhi;
- }
- }
-
- kfree(srhi);
- return NULL;
-}
-
-static void
-ptlrpc_lprocfs_svc_req_history_stop(struct seq_file *s, void *iter)
-{
- struct ptlrpc_srh_iterator *srhi = iter;
-
- kfree(srhi);
-}
-
-static void *
-ptlrpc_lprocfs_svc_req_history_next(struct seq_file *s,
- void *iter, loff_t *pos)
-{
- struct ptlrpc_service *svc = s->private;
- struct ptlrpc_srh_iterator *srhi = iter;
- struct ptlrpc_service_part *svcpt;
- __u64 seq;
- int rc;
- int i;
-
- for (i = srhi->srhi_idx; i < svc->srv_ncpts; i++) {
- svcpt = svc->srv_parts[i];
-
- if (i > srhi->srhi_idx) { /* reset iterator for a new CPT */
- srhi->srhi_req = NULL;
- seq = 0;
- srhi->srhi_seq = 0;
- } else { /* the next sequence */
- seq = srhi->srhi_seq + (1 << svc->srv_cpt_bits);
- }
-
- spin_lock(&svcpt->scp_lock);
- rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi, seq);
- spin_unlock(&svcpt->scp_lock);
- if (rc == 0) {
- *pos = PTLRPC_REQ_SEQ2POS(svc, srhi->srhi_seq);
- srhi->srhi_idx = i;
- return srhi;
- }
- }
-
- kfree(srhi);
- return NULL;
-}
-
-static int ptlrpc_lprocfs_svc_req_history_show(struct seq_file *s, void *iter)
-{
- struct ptlrpc_service *svc = s->private;
- struct ptlrpc_srh_iterator *srhi = iter;
- struct ptlrpc_service_part *svcpt;
- struct ptlrpc_request *req;
- int rc;
-
- LASSERT(srhi->srhi_idx < svc->srv_ncpts);
-
- svcpt = svc->srv_parts[srhi->srhi_idx];
-
- spin_lock(&svcpt->scp_lock);
-
- rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi, srhi->srhi_seq);
-
- if (rc == 0) {
- struct timespec64 arrival, sent, arrivaldiff;
- char nidstr[LNET_NIDSTR_SIZE];
-
- req = srhi->srhi_req;
-
- libcfs_nid2str_r(req->rq_self, nidstr, sizeof(nidstr));
- arrival.tv_sec = req->rq_arrival_time.tv_sec;
- arrival.tv_nsec = req->rq_arrival_time.tv_nsec;
- sent.tv_sec = req->rq_sent;
- sent.tv_nsec = 0;
- arrivaldiff = timespec64_sub(sent, arrival);
-
- /* Print common req fields.
- * CAVEAT EMPTOR: we're racing with the service handler
- * here. The request could contain any old crap, so you
- * must be just as careful as the service's request
- * parser. Currently I only print stuff here I know is OK
- * to look at coz it was set up in request_in_callback()!!!
- */
- seq_printf(s, "%lld:%s:%s:x%llu:%d:%s:%lld.%06lld:%lld.%06llds(%+lld.0s) ",
- req->rq_history_seq, nidstr,
- libcfs_id2str(req->rq_peer), req->rq_xid,
- req->rq_reqlen, ptlrpc_rqphase2str(req),
- (s64)req->rq_arrival_time.tv_sec,
- (s64)req->rq_arrival_time.tv_nsec / NSEC_PER_USEC,
- (s64)arrivaldiff.tv_sec,
- (s64)(arrivaldiff.tv_nsec / NSEC_PER_USEC),
- (s64)(req->rq_sent - req->rq_deadline));
- if (!svc->srv_ops.so_req_printer)
- seq_putc(s, '\n');
- else
- svc->srv_ops.so_req_printer(s, srhi->srhi_req);
- }
-
- spin_unlock(&svcpt->scp_lock);
- return rc;
-}
-
-static int
-ptlrpc_lprocfs_svc_req_history_open(struct inode *inode, struct file *file)
-{
- static const struct seq_operations sops = {
- .start = ptlrpc_lprocfs_svc_req_history_start,
- .stop = ptlrpc_lprocfs_svc_req_history_stop,
- .next = ptlrpc_lprocfs_svc_req_history_next,
- .show = ptlrpc_lprocfs_svc_req_history_show,
- };
- struct seq_file *seqf;
- int rc;
-
- rc = seq_open(file, &sops);
- if (rc)
- return rc;
-
- seqf = file->private_data;
- seqf->private = inode->i_private;
- return 0;
-}
-
-/* See also lprocfs_rd_timeouts */
-static int ptlrpc_lprocfs_timeouts_seq_show(struct seq_file *m, void *n)
-{
- struct ptlrpc_service *svc = m->private;
- struct ptlrpc_service_part *svcpt;
- struct dhms ts;
- time64_t worstt;
- unsigned int cur;
- unsigned int worst;
- int i;
-
- if (AT_OFF) {
- seq_printf(m, "adaptive timeouts off, using obd_timeout %u\n",
- obd_timeout);
- return 0;
- }
-
- ptlrpc_service_for_each_part(svcpt, i, svc) {
- cur = at_get(&svcpt->scp_at_estimate);
- worst = svcpt->scp_at_estimate.at_worst_ever;
- worstt = svcpt->scp_at_estimate.at_worst_time;
- s2dhms(&ts, ktime_get_real_seconds() - worstt);
-
- seq_printf(m, "%10s : cur %3u worst %3u (at %lld, "
- DHMS_FMT " ago) ", "service",
- cur, worst, (s64)worstt, DHMS_VARS(&ts));
-
- lprocfs_at_hist_helper(m, &svcpt->scp_at_estimate);
- }
-
- return 0;
-}
-
-LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_timeouts);
-
-static ssize_t high_priority_ratio_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ptlrpc_service *svc = container_of(kobj, struct ptlrpc_service,
- srv_kobj);
- return sprintf(buf, "%d\n", svc->srv_hpreq_ratio);
-}
-
-static ssize_t high_priority_ratio_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ptlrpc_service *svc = container_of(kobj, struct ptlrpc_service,
- srv_kobj);
- int rc;
- int val;
-
- rc = kstrtoint(buffer, 10, &val);
- if (rc < 0)
- return rc;
-
- if (val < 0)
- return -ERANGE;
-
- spin_lock(&svc->srv_lock);
- svc->srv_hpreq_ratio = val;
- spin_unlock(&svc->srv_lock);
-
- return count;
-}
-LUSTRE_RW_ATTR(high_priority_ratio);
-
-static struct attribute *ptlrpc_svc_attrs[] = {
- &lustre_attr_threads_min.attr,
- &lustre_attr_threads_started.attr,
- &lustre_attr_threads_max.attr,
- &lustre_attr_high_priority_ratio.attr,
- NULL,
-};
-
-static void ptlrpc_sysfs_svc_release(struct kobject *kobj)
-{
- struct ptlrpc_service *svc = container_of(kobj, struct ptlrpc_service,
- srv_kobj);
-
- complete(&svc->srv_kobj_unregister);
-}
-
-static struct kobj_type ptlrpc_svc_ktype = {
- .default_attrs = ptlrpc_svc_attrs,
- .sysfs_ops = &lustre_sysfs_ops,
- .release = ptlrpc_sysfs_svc_release,
-};
-
-void ptlrpc_sysfs_unregister_service(struct ptlrpc_service *svc)
-{
- /* Let's see if we had a chance at initialization first */
- if (svc->srv_kobj.kset) {
- kobject_put(&svc->srv_kobj);
- wait_for_completion(&svc->srv_kobj_unregister);
- }
-}
-
-int ptlrpc_sysfs_register_service(struct kset *parent,
- struct ptlrpc_service *svc)
-{
- int rc;
-
- svc->srv_kobj.kset = parent;
- init_completion(&svc->srv_kobj_unregister);
- rc = kobject_init_and_add(&svc->srv_kobj, &ptlrpc_svc_ktype, NULL,
- "%s", svc->srv_name);
-
- return rc;
-}
-
-void ptlrpc_ldebugfs_register_service(struct dentry *entry,
- struct ptlrpc_service *svc)
-{
- struct lprocfs_vars lproc_vars[] = {
- {.name = "req_buffer_history_len",
- .fops = &ptlrpc_lprocfs_req_history_len_fops,
- .data = svc},
- {.name = "req_buffer_history_max",
- .fops = &ptlrpc_lprocfs_req_history_max_fops,
- .data = svc},
- {.name = "timeouts",
- .fops = &ptlrpc_lprocfs_timeouts_fops,
- .data = svc},
- {.name = "nrs_policies",
- .fops = &ptlrpc_lprocfs_nrs_fops,
- .data = svc},
- {NULL}
- };
- static const struct file_operations req_history_fops = {
- .owner = THIS_MODULE,
- .open = ptlrpc_lprocfs_svc_req_history_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = lprocfs_seq_release,
- };
-
- ptlrpc_ldebugfs_register(entry, svc->srv_name,
- "stats", &svc->srv_debugfs_entry,
- &svc->srv_stats);
-
- if (IS_ERR_OR_NULL(svc->srv_debugfs_entry))
- return;
-
- ldebugfs_add_vars(svc->srv_debugfs_entry, lproc_vars, NULL);
-
- debugfs_create_file("req_history", 0400, svc->srv_debugfs_entry, svc,
- &req_history_fops);
-}
-
-void ptlrpc_lprocfs_register_obd(struct obd_device *obddev)
-{
- ptlrpc_ldebugfs_register(obddev->obd_debugfs_entry, NULL, "stats",
- &obddev->obd_svc_debugfs_entry,
- &obddev->obd_svc_stats);
-}
-EXPORT_SYMBOL(ptlrpc_lprocfs_register_obd);
-
-void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req, long amount)
-{
- struct lprocfs_stats *svc_stats;
- __u32 op = lustre_msg_get_opc(req->rq_reqmsg);
- int opc = opcode_offset(op);
-
- svc_stats = req->rq_import->imp_obd->obd_svc_stats;
- if (!svc_stats || opc <= 0)
- return;
- LASSERT(opc < LUSTRE_MAX_OPCODES);
- if (!(op == LDLM_ENQUEUE || op == MDS_REINT))
- lprocfs_counter_add(svc_stats, opc + EXTRA_MAX_OPCODES, amount);
-}
-
-void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes)
-{
- struct lprocfs_stats *svc_stats;
- int idx;
-
- if (!req->rq_import)
- return;
- svc_stats = req->rq_import->imp_obd->obd_svc_stats;
- if (!svc_stats)
- return;
- idx = lustre_msg_get_opc(req->rq_reqmsg);
- switch (idx) {
- case OST_READ:
- idx = BRW_READ_BYTES + PTLRPC_LAST_CNTR;
- break;
- case OST_WRITE:
- idx = BRW_WRITE_BYTES + PTLRPC_LAST_CNTR;
- break;
- default:
- LASSERTF(0, "unsupported opcode %u\n", idx);
- break;
- }
-
- lprocfs_counter_add(svc_stats, idx, bytes);
-}
-EXPORT_SYMBOL(ptlrpc_lprocfs_brw);
-
-void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc)
-{
- debugfs_remove_recursive(svc->srv_debugfs_entry);
-
- if (svc->srv_stats)
- lprocfs_free_stats(&svc->srv_stats);
-}
-
-void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd)
-{
- debugfs_remove_recursive(obd->obd_svc_debugfs_entry);
-
- if (obd->obd_svc_stats)
- lprocfs_free_stats(&obd->obd_svc_stats);
-}
-EXPORT_SYMBOL(ptlrpc_lprocfs_unregister_obd);
-
-#undef BUFLEN
-
-int lprocfs_wr_ping(struct file *file, const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
- struct ptlrpc_request *req;
- int rc;
-
- rc = lprocfs_climp_check(obd);
- if (rc)
- return rc;
-
- req = ptlrpc_prep_ping(obd->u.cli.cl_import);
- up_read(&obd->u.cli.cl_sem);
- if (!req)
- return -ENOMEM;
-
- req->rq_send_state = LUSTRE_IMP_FULL;
-
- rc = ptlrpc_queue_wait(req);
-
- ptlrpc_req_finished(req);
- if (rc >= 0)
- return count;
- return rc;
-}
-EXPORT_SYMBOL(lprocfs_wr_ping);
-
-/* Write the connection UUID to this file to attempt to connect to that node.
- * The connection UUID is a node's primary NID. For example,
- * "echo connection=192.168.0.1@tcp0::instance > .../import".
- */
-int lprocfs_wr_import(struct file *file, const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
- struct obd_import *imp = obd->u.cli.cl_import;
- char *kbuf = NULL;
- char *uuid;
- char *ptr;
- int do_reconn = 1;
- const char prefix[] = "connection=";
- const int prefix_len = sizeof(prefix) - 1;
-
- if (count > PAGE_SIZE - 1 || count <= prefix_len)
- return -EINVAL;
-
- kbuf = kzalloc(count + 1, GFP_NOFS);
- if (!kbuf)
- return -ENOMEM;
-
- if (copy_from_user(kbuf, buffer, count)) {
- count = -EFAULT;
- goto out;
- }
-
- kbuf[count] = 0;
-
- /* only support connection=uuid::instance now */
- if (strncmp(prefix, kbuf, prefix_len) != 0) {
- count = -EINVAL;
- goto out;
- }
-
- uuid = kbuf + prefix_len;
- ptr = strstr(uuid, "::");
- if (ptr) {
- __u32 inst;
- char *endptr;
-
- *ptr = 0;
- do_reconn = 0;
- ptr += strlen("::");
- inst = simple_strtoul(ptr, &endptr, 10);
- if (*endptr) {
- CERROR("config: wrong instance # %s\n", ptr);
- } else if (inst != imp->imp_connect_data.ocd_instance) {
- CDEBUG(D_INFO, "IR: %s is connecting to an obsoleted target(%u/%u), reconnecting...\n",
- imp->imp_obd->obd_name,
- imp->imp_connect_data.ocd_instance, inst);
- do_reconn = 1;
- } else {
- CDEBUG(D_INFO, "IR: %s has already been connecting to new target(%u)\n",
- imp->imp_obd->obd_name, inst);
- }
- }
-
- if (do_reconn)
- ptlrpc_recover_import(imp, uuid, 1);
-
-out:
- kfree(kbuf);
- return count;
-}
-EXPORT_SYMBOL(lprocfs_wr_import);
-
-int lprocfs_rd_pinger_recov(struct seq_file *m, void *n)
-{
- struct obd_device *obd = m->private;
- struct obd_import *imp = obd->u.cli.cl_import;
- int rc;
-
- rc = lprocfs_climp_check(obd);
- if (rc)
- return rc;
-
- seq_printf(m, "%d\n", !imp->imp_no_pinger_recover);
- up_read(&obd->u.cli.cl_sem);
-
- return 0;
-}
-EXPORT_SYMBOL(lprocfs_rd_pinger_recov);
-
-int lprocfs_wr_pinger_recov(struct file *file, const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
- struct client_obd *cli = &obd->u.cli;
- struct obd_import *imp = cli->cl_import;
- int rc, val;
-
- rc = lprocfs_write_helper(buffer, count, &val);
- if (rc < 0)
- return rc;
-
- if (val != 0 && val != 1)
- return -ERANGE;
-
- rc = lprocfs_climp_check(obd);
- if (rc)
- return rc;
-
- spin_lock(&imp->imp_lock);
- imp->imp_no_pinger_recover = !val;
- spin_unlock(&imp->imp_lock);
- up_read(&obd->u.cli.cl_sem);
-
- return count;
-}
-EXPORT_SYMBOL(lprocfs_wr_pinger_recov);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
deleted file mode 100644
index 2897afb8806c..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
+++ /dev/null
@@ -1,771 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_RPC
-#include <linux/sched/mm.h>
-#include <obd_support.h>
-#include <lustre_net.h>
-#include <lustre_lib.h>
-#include <obd.h>
-#include <obd_class.h>
-#include "ptlrpc_internal.h"
-
-/**
- * Helper function. Sends \a len bytes from \a base at offset \a offset
- * over \a conn connection to portal \a portal.
- * Returns 0 on success or error code.
- */
-static int ptl_send_buf(struct lnet_handle_md *mdh, void *base, int len,
- enum lnet_ack_req ack, struct ptlrpc_cb_id *cbid,
- struct ptlrpc_connection *conn, int portal, __u64 xid,
- unsigned int offset)
-{
- int rc;
- struct lnet_md md;
-
- LASSERT(portal != 0);
- CDEBUG(D_INFO, "conn=%p id %s\n", conn, libcfs_id2str(conn->c_peer));
- md.start = base;
- md.length = len;
- md.threshold = (ack == LNET_ACK_REQ) ? 2 : 1;
- md.options = PTLRPC_MD_OPTIONS;
- md.user_ptr = cbid;
- md.eq_handle = ptlrpc_eq_h;
-
- if (unlikely(ack == LNET_ACK_REQ &&
- OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_ACK,
- OBD_FAIL_ONCE))) {
- /* don't ask for the ack to simulate failing client */
- ack = LNET_NOACK_REQ;
- }
-
- rc = LNetMDBind(md, LNET_UNLINK, mdh);
- if (unlikely(rc != 0)) {
- CERROR("LNetMDBind failed: %d\n", rc);
- LASSERT(rc == -ENOMEM);
- return -ENOMEM;
- }
-
- CDEBUG(D_NET, "Sending %d bytes to portal %d, xid %lld, offset %u\n",
- len, portal, xid, offset);
-
- rc = LNetPut(conn->c_self, *mdh, ack,
- conn->c_peer, portal, xid, offset, 0);
- if (unlikely(rc != 0)) {
- int rc2;
- /* We're going to get an UNLINK event when I unlink below,
- * which will complete just like any other failed send, so
- * I fall through and return success here!
- */
- CERROR("LNetPut(%s, %d, %lld) failed: %d\n",
- libcfs_id2str(conn->c_peer), portal, xid, rc);
- rc2 = LNetMDUnlink(*mdh);
- LASSERTF(rc2 == 0, "rc2 = %d\n", rc2);
- }
-
- return 0;
-}
-
-static void mdunlink_iterate_helper(struct lnet_handle_md *bd_mds, int count)
-{
- int i;
-
- for (i = 0; i < count; i++)
- LNetMDUnlink(bd_mds[i]);
-}
-
-/**
- * Register bulk at the sender for later transfer.
- * Returns 0 on success or error code.
- */
-static int ptlrpc_register_bulk(struct ptlrpc_request *req)
-{
- struct ptlrpc_bulk_desc *desc = req->rq_bulk;
- struct lnet_process_id peer;
- int rc = 0;
- int rc2;
- int posted_md;
- int total_md;
- u64 mbits;
- struct lnet_handle_me me_h;
- struct lnet_md md;
-
- if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_GET_NET))
- return 0;
-
- /* NB no locking required until desc is on the network */
- LASSERT(desc->bd_nob > 0);
- LASSERT(desc->bd_md_count == 0);
- LASSERT(desc->bd_md_max_brw <= PTLRPC_BULK_OPS_COUNT);
- LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES);
- LASSERT(desc->bd_req);
- LASSERT(ptlrpc_is_bulk_op_passive(desc->bd_type));
-
- /* cleanup the state of the bulk for it will be reused */
- if (req->rq_resend || req->rq_send_state == LUSTRE_IMP_REPLAY)
- desc->bd_nob_transferred = 0;
- else
- LASSERT(desc->bd_nob_transferred == 0);
-
- desc->bd_failure = 0;
-
- peer = desc->bd_import->imp_connection->c_peer;
-
- LASSERT(desc->bd_cbid.cbid_fn == client_bulk_callback);
- LASSERT(desc->bd_cbid.cbid_arg == desc);
-
- total_md = DIV_ROUND_UP(desc->bd_iov_count, LNET_MAX_IOV);
- /* rq_mbits is matchbits of the final bulk */
- mbits = req->rq_mbits - total_md + 1;
-
- LASSERTF(mbits == (req->rq_mbits & PTLRPC_BULK_OPS_MASK),
- "first mbits = x%llu, last mbits = x%llu\n",
- mbits, req->rq_mbits);
- LASSERTF(!(desc->bd_registered &&
- req->rq_send_state != LUSTRE_IMP_REPLAY) ||
- mbits != desc->bd_last_mbits,
- "registered: %d rq_mbits: %llu bd_last_mbits: %llu\n",
- desc->bd_registered, mbits, desc->bd_last_mbits);
-
- desc->bd_registered = 1;
- desc->bd_last_mbits = mbits;
- desc->bd_md_count = total_md;
- md.user_ptr = &desc->bd_cbid;
- md.eq_handle = ptlrpc_eq_h;
- md.threshold = 1; /* PUT or GET */
-
- for (posted_md = 0; posted_md < total_md; posted_md++, mbits++) {
- md.options = PTLRPC_MD_OPTIONS |
- (ptlrpc_is_bulk_op_get(desc->bd_type) ?
- LNET_MD_OP_GET : LNET_MD_OP_PUT);
- ptlrpc_fill_bulk_md(&md, desc, posted_md);
-
- rc = LNetMEAttach(desc->bd_portal, peer, mbits, 0,
- LNET_UNLINK, LNET_INS_AFTER, &me_h);
- if (rc != 0) {
- CERROR("%s: LNetMEAttach failed x%llu/%d: rc = %d\n",
- desc->bd_import->imp_obd->obd_name, mbits,
- posted_md, rc);
- break;
- }
-
- /* About to let the network at it... */
- rc = LNetMDAttach(me_h, md, LNET_UNLINK,
- &desc->bd_mds[posted_md]);
- if (rc != 0) {
- CERROR("%s: LNetMDAttach failed x%llu/%d: rc = %d\n",
- desc->bd_import->imp_obd->obd_name, mbits,
- posted_md, rc);
- rc2 = LNetMEUnlink(me_h);
- LASSERT(rc2 == 0);
- break;
- }
- }
-
- if (rc != 0) {
- LASSERT(rc == -ENOMEM);
- spin_lock(&desc->bd_lock);
- desc->bd_md_count -= total_md - posted_md;
- spin_unlock(&desc->bd_lock);
- LASSERT(desc->bd_md_count >= 0);
- mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);
- req->rq_status = -ENOMEM;
- return -ENOMEM;
- }
-
- spin_lock(&desc->bd_lock);
- /* Holler if peer manages to touch buffers before he knows the mbits */
- if (desc->bd_md_count != total_md)
- CWARN("%s: Peer %s touched %d buffers while I registered\n",
- desc->bd_import->imp_obd->obd_name, libcfs_id2str(peer),
- total_md - desc->bd_md_count);
- spin_unlock(&desc->bd_lock);
-
- CDEBUG(D_NET, "Setup %u bulk %s buffers: %u pages %u bytes, mbits x%#llx-%#llx, portal %u\n",
- desc->bd_md_count,
- ptlrpc_is_bulk_op_get(desc->bd_type) ? "get-source" : "put-sink",
- desc->bd_iov_count, desc->bd_nob,
- desc->bd_last_mbits, req->rq_mbits, desc->bd_portal);
-
- return 0;
-}
-
-/**
- * Disconnect a bulk desc from the network. Idempotent. Not
- * thread-safe (i.e. only interlocks with completion callback).
- * Returns 1 on success or 0 if network unregistration failed for whatever
- * reason.
- */
-int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
-{
- struct ptlrpc_bulk_desc *desc = req->rq_bulk;
- wait_queue_head_t *wq;
- int rc;
-
- LASSERT(!in_interrupt()); /* might sleep */
-
- /* Let's setup deadline for reply unlink. */
- if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
- async && req->rq_bulk_deadline == 0 && cfs_fail_val == 0)
- req->rq_bulk_deadline = ktime_get_real_seconds() + LONG_UNLINK;
-
- if (ptlrpc_client_bulk_active(req) == 0) /* completed or */
- return 1; /* never registered */
-
- LASSERT(desc->bd_req == req); /* bd_req NULL until registered */
-
- /* the unlink ensures the callback happens ASAP and is the last
- * one. If it fails, it must be because completion just happened,
- * but we must still wait_event() in this case to give liblustre
- * a chance to run client_bulk_callback()
- */
- mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);
-
- if (ptlrpc_client_bulk_active(req) == 0) /* completed or */
- return 1; /* never registered */
-
- /* Move to "Unregistering" phase as bulk was not unlinked yet. */
- ptlrpc_rqphase_move(req, RQ_PHASE_UNREG_BULK);
-
- /* Do not wait for unlink to finish. */
- if (async)
- return 0;
-
- if (req->rq_set)
- wq = &req->rq_set->set_waitq;
- else
- wq = &req->rq_reply_waitq;
-
- for (;;) {
- /* Network access will complete in finite time but the HUGE
- * timeout lets us CWARN for visibility of sluggish LNDs
- */
- int cnt = 0;
- while (cnt < LONG_UNLINK &&
- (rc = wait_event_idle_timeout(*wq,
- !ptlrpc_client_bulk_active(req),
- HZ)) == 0)
- cnt += 1;
- if (rc > 0) {
- ptlrpc_rqphase_move(req, req->rq_next_phase);
- return 1;
- }
-
- DEBUG_REQ(D_WARNING, req, "Unexpectedly long timeout: desc %p",
- desc);
- }
- return 0;
-}
-
-static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
-{
- struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
- struct ptlrpc_service *svc = svcpt->scp_service;
- int service_time = max_t(int, ktime_get_real_seconds() -
- req->rq_arrival_time.tv_sec, 1);
-
- if (!(flags & PTLRPC_REPLY_EARLY) &&
- (req->rq_type != PTL_RPC_MSG_ERR) && req->rq_reqmsg &&
- !(lustre_msg_get_flags(req->rq_reqmsg) &
- (MSG_RESENT | MSG_REPLAY |
- MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE))) {
- /* early replies, errors and recovery requests don't count
- * toward our service time estimate
- */
- int oldse = at_measured(&svcpt->scp_at_estimate, service_time);
-
- if (oldse != 0) {
- DEBUG_REQ(D_ADAPTTO, req,
- "svc %s changed estimate from %d to %d",
- svc->srv_name, oldse,
- at_get(&svcpt->scp_at_estimate));
- }
- }
- /* Report actual service time for client latency calc */
- lustre_msg_set_service_time(req->rq_repmsg, service_time);
- /* Report service time estimate for future client reqs, but report 0
- * (to be ignored by client) if it's a error reply during recovery.
- * (bz15815)
- */
- if (req->rq_type == PTL_RPC_MSG_ERR && !req->rq_export)
- lustre_msg_set_timeout(req->rq_repmsg, 0);
- else
- lustre_msg_set_timeout(req->rq_repmsg,
- at_get(&svcpt->scp_at_estimate));
-
- if (req->rq_reqmsg &&
- !(lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) {
- CDEBUG(D_ADAPTTO, "No early reply support: flags=%#x req_flags=%#x magic=%x/%x len=%d\n",
- flags, lustre_msg_get_flags(req->rq_reqmsg),
- lustre_msg_get_magic(req->rq_reqmsg),
- lustre_msg_get_magic(req->rq_repmsg), req->rq_replen);
- }
-}
-
-/**
- * Send request reply from request \a req reply buffer.
- * \a flags defines reply types
- * Returns 0 on success or error code
- */
-int ptlrpc_send_reply(struct ptlrpc_request *req, int flags)
-{
- struct ptlrpc_reply_state *rs = req->rq_reply_state;
- struct ptlrpc_connection *conn;
- int rc;
-
- /* We must already have a reply buffer (only ptlrpc_error() may be
- * called without one). The reply generated by sptlrpc layer (e.g.
- * error notify, etc.) might have NULL rq->reqmsg; Otherwise we must
- * have a request buffer which is either the actual (swabbed) incoming
- * request, or a saved copy if this is a req saved in
- * target_queue_final_reply().
- */
- LASSERT(req->rq_no_reply == 0);
- LASSERT(req->rq_reqbuf);
- LASSERT(rs);
- LASSERT((flags & PTLRPC_REPLY_MAYBE_DIFFICULT) || !rs->rs_difficult);
- LASSERT(req->rq_repmsg);
- LASSERT(req->rq_repmsg == rs->rs_msg);
- LASSERT(rs->rs_cb_id.cbid_fn == reply_out_callback);
- LASSERT(rs->rs_cb_id.cbid_arg == rs);
-
- /* There may be no rq_export during failover */
-
- if (unlikely(req->rq_export && req->rq_export->exp_obd &&
- req->rq_export->exp_obd->obd_fail)) {
- /* Failed obd's only send ENODEV */
- req->rq_type = PTL_RPC_MSG_ERR;
- req->rq_status = -ENODEV;
- CDEBUG(D_HA, "sending ENODEV from failed obd %d\n",
- req->rq_export->exp_obd->obd_minor);
- }
-
- /* In order to keep interoperability with the client (< 2.3) which
- * doesn't have pb_jobid in ptlrpc_body, We have to shrink the
- * ptlrpc_body in reply buffer to ptlrpc_body_v2, otherwise, the
- * reply buffer on client will be overflow.
- *
- * XXX Remove this whenever we drop the interoperability with
- * such client.
- */
- req->rq_replen = lustre_shrink_msg(req->rq_repmsg, 0,
- sizeof(struct ptlrpc_body_v2), 1);
-
- if (req->rq_type != PTL_RPC_MSG_ERR)
- req->rq_type = PTL_RPC_MSG_REPLY;
-
- lustre_msg_set_type(req->rq_repmsg, req->rq_type);
- lustre_msg_set_status(req->rq_repmsg,
- ptlrpc_status_hton(req->rq_status));
- lustre_msg_set_opc(req->rq_repmsg,
- req->rq_reqmsg ?
- lustre_msg_get_opc(req->rq_reqmsg) : 0);
-
- target_pack_pool_reply(req);
-
- ptlrpc_at_set_reply(req, flags);
-
- if (!req->rq_export || !req->rq_export->exp_connection)
- conn = ptlrpc_connection_get(req->rq_peer, req->rq_self, NULL);
- else
- conn = ptlrpc_connection_addref(req->rq_export->exp_connection);
-
- if (unlikely(!conn)) {
- CERROR("not replying on NULL connection\n"); /* bug 9635 */
- return -ENOTCONN;
- }
- ptlrpc_rs_addref(rs); /* +1 ref for the network */
-
- rc = sptlrpc_svc_wrap_reply(req);
- if (unlikely(rc))
- goto out;
-
- req->rq_sent = ktime_get_real_seconds();
-
- rc = ptl_send_buf(&rs->rs_md_h, rs->rs_repbuf, rs->rs_repdata_len,
- (rs->rs_difficult && !rs->rs_no_ack) ?
- LNET_ACK_REQ : LNET_NOACK_REQ,
- &rs->rs_cb_id, conn,
- ptlrpc_req2svc(req)->srv_rep_portal,
- req->rq_xid, req->rq_reply_off);
-out:
- if (unlikely(rc != 0))
- ptlrpc_req_drop_rs(req);
- ptlrpc_connection_put(conn);
- return rc;
-}
-
-int ptlrpc_reply(struct ptlrpc_request *req)
-{
- if (req->rq_no_reply)
- return 0;
- return ptlrpc_send_reply(req, 0);
-}
-
-/**
- * For request \a req send an error reply back. Create empty
- * reply buffers if necessary.
- */
-int ptlrpc_send_error(struct ptlrpc_request *req, int may_be_difficult)
-{
- int rc;
-
- if (req->rq_no_reply)
- return 0;
-
- if (!req->rq_repmsg) {
- rc = lustre_pack_reply(req, 1, NULL, NULL);
- if (rc)
- return rc;
- }
-
- if (req->rq_status != -ENOSPC && req->rq_status != -EACCES &&
- req->rq_status != -EPERM && req->rq_status != -ENOENT &&
- req->rq_status != -EINPROGRESS && req->rq_status != -EDQUOT)
- req->rq_type = PTL_RPC_MSG_ERR;
-
- rc = ptlrpc_send_reply(req, may_be_difficult);
- return rc;
-}
-
-int ptlrpc_error(struct ptlrpc_request *req)
-{
- return ptlrpc_send_error(req, 0);
-}
-
-/**
- * Send request \a request.
- * if \a noreply is set, don't expect any reply back and don't set up
- * reply buffers.
- * Returns 0 on success or error code.
- */
-int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
-{
- int rc;
- int rc2;
- unsigned int mpflag = 0;
- struct ptlrpc_connection *connection;
- struct lnet_handle_me reply_me_h;
- struct lnet_md reply_md;
- struct obd_import *imp = request->rq_import;
- struct obd_device *obd = imp->imp_obd;
-
- if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC))
- return 0;
-
- LASSERT(request->rq_type == PTL_RPC_MSG_REQUEST);
- LASSERT(request->rq_wait_ctx == 0);
-
- /* If this is a re-transmit, we're required to have disengaged
- * cleanly from the previous attempt
- */
- LASSERT(!request->rq_receiving_reply);
- LASSERT(!((lustre_msg_get_flags(request->rq_reqmsg) & MSG_REPLAY) &&
- (imp->imp_state == LUSTRE_IMP_FULL)));
-
- if (unlikely(obd && obd->obd_fail)) {
- CDEBUG(D_HA, "muting rpc for failed imp obd %s\n",
- obd->obd_name);
- /* this prevents us from waiting in ptlrpc_queue_wait */
- spin_lock(&request->rq_lock);
- request->rq_err = 1;
- spin_unlock(&request->rq_lock);
- request->rq_status = -ENODEV;
- return -ENODEV;
- }
-
- connection = imp->imp_connection;
-
- lustre_msg_set_handle(request->rq_reqmsg,
- &imp->imp_remote_handle);
- lustre_msg_set_type(request->rq_reqmsg, PTL_RPC_MSG_REQUEST);
- lustre_msg_set_conn_cnt(request->rq_reqmsg, imp->imp_conn_cnt);
- lustre_msghdr_set_flags(request->rq_reqmsg, imp->imp_msghdr_flags);
-
- /*
- * If it's the first time to resend the request for EINPROGRESS,
- * we need to allocate a new XID (see after_reply()), it's different
- * from the resend for reply timeout.
- */
- if (request->rq_nr_resend && list_empty(&request->rq_unreplied_list)) {
- __u64 min_xid = 0;
- /*
- * resend for EINPROGRESS, allocate new xid to avoid reply
- * reconstruction
- */
- spin_lock(&imp->imp_lock);
- ptlrpc_assign_next_xid_nolock(request);
- min_xid = ptlrpc_known_replied_xid(imp);
- spin_unlock(&imp->imp_lock);
-
- lustre_msg_set_last_xid(request->rq_reqmsg, min_xid);
- DEBUG_REQ(D_RPCTRACE, request, "Allocating new xid for resend on EINPROGRESS");
- }
-
- if (request->rq_bulk) {
- ptlrpc_set_bulk_mbits(request);
- lustre_msg_set_mbits(request->rq_reqmsg, request->rq_mbits);
- }
-
- if (list_empty(&request->rq_unreplied_list) ||
- request->rq_xid <= imp->imp_known_replied_xid) {
- DEBUG_REQ(D_ERROR, request,
- "xid: %llu, replied: %llu, list_empty:%d\n",
- request->rq_xid, imp->imp_known_replied_xid,
- list_empty(&request->rq_unreplied_list));
- LBUG();
- }
-
- /**
- * For enabled AT all request should have AT_SUPPORT in the
- * FULL import state when OBD_CONNECT_AT is set
- */
- LASSERT(AT_OFF || imp->imp_state != LUSTRE_IMP_FULL ||
- (imp->imp_msghdr_flags & MSGHDR_AT_SUPPORT) ||
- !(imp->imp_connect_data.ocd_connect_flags &
- OBD_CONNECT_AT));
-
- if (request->rq_resend)
- lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT);
-
- if (request->rq_memalloc)
- mpflag = memalloc_noreclaim_save();
-
- rc = sptlrpc_cli_wrap_request(request);
- if (rc) {
- /*
- * set rq_sent so that this request is treated
- * as a delayed send in the upper layers
- */
- if (rc == -ENOMEM)
- request->rq_sent = ktime_get_seconds();
- goto out;
- }
-
- /* bulk register should be done after wrap_request() */
- if (request->rq_bulk) {
- rc = ptlrpc_register_bulk(request);
- if (rc != 0)
- goto out;
- }
-
- if (!noreply) {
- LASSERT(request->rq_replen != 0);
- if (!request->rq_repbuf) {
- LASSERT(!request->rq_repdata);
- LASSERT(!request->rq_repmsg);
- rc = sptlrpc_cli_alloc_repbuf(request,
- request->rq_replen);
- if (rc) {
- /* this prevents us from looping in
- * ptlrpc_queue_wait
- */
- spin_lock(&request->rq_lock);
- request->rq_err = 1;
- spin_unlock(&request->rq_lock);
- request->rq_status = rc;
- goto cleanup_bulk;
- }
- } else {
- request->rq_repdata = NULL;
- request->rq_repmsg = NULL;
- }
-
- rc = LNetMEAttach(request->rq_reply_portal,/*XXX FIXME bug 249*/
- connection->c_peer, request->rq_xid, 0,
- LNET_UNLINK, LNET_INS_AFTER, &reply_me_h);
- if (rc != 0) {
- CERROR("LNetMEAttach failed: %d\n", rc);
- LASSERT(rc == -ENOMEM);
- rc = -ENOMEM;
- goto cleanup_bulk;
- }
- }
-
- spin_lock(&request->rq_lock);
- /* We are responsible for unlinking the reply buffer */
- request->rq_reply_unlinked = noreply;
- request->rq_receiving_reply = !noreply;
- /* Clear any flags that may be present from previous sends. */
- request->rq_req_unlinked = 0;
- request->rq_replied = 0;
- request->rq_err = 0;
- request->rq_timedout = 0;
- request->rq_net_err = 0;
- request->rq_resend = 0;
- request->rq_restart = 0;
- request->rq_reply_truncated = 0;
- spin_unlock(&request->rq_lock);
-
- if (!noreply) {
- reply_md.start = request->rq_repbuf;
- reply_md.length = request->rq_repbuf_len;
- /* Allow multiple early replies */
- reply_md.threshold = LNET_MD_THRESH_INF;
- /* Manage remote for early replies */
- reply_md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT |
- LNET_MD_MANAGE_REMOTE |
- LNET_MD_TRUNCATE; /* allow to make EOVERFLOW error */
- reply_md.user_ptr = &request->rq_reply_cbid;
- reply_md.eq_handle = ptlrpc_eq_h;
-
- /* We must see the unlink callback to set rq_reply_unlinked,
- * so we can't auto-unlink
- */
- rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN,
- &request->rq_reply_md_h);
- if (rc != 0) {
- CERROR("LNetMDAttach failed: %d\n", rc);
- LASSERT(rc == -ENOMEM);
- spin_lock(&request->rq_lock);
- /* ...but the MD attach didn't succeed... */
- request->rq_receiving_reply = 0;
- spin_unlock(&request->rq_lock);
- rc = -ENOMEM;
- goto cleanup_me;
- }
-
- CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid %llu, portal %u\n",
- request->rq_repbuf_len, request->rq_xid,
- request->rq_reply_portal);
- }
-
- /* add references on request for request_out_callback */
- ptlrpc_request_addref(request);
- if (obd && obd->obd_svc_stats)
- lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQACTIVE_CNTR,
- atomic_read(&imp->imp_inflight));
-
- OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5);
-
- ktime_get_real_ts64(&request->rq_sent_tv);
- request->rq_sent = ktime_get_real_seconds();
- /* We give the server rq_timeout secs to process the req, and
- * add the network latency for our local timeout.
- */
- request->rq_deadline = request->rq_sent + request->rq_timeout +
- ptlrpc_at_get_net_latency(request);
-
- ptlrpc_pinger_sending_on_import(imp);
-
- DEBUG_REQ(D_INFO, request, "send flg=%x",
- lustre_msg_get_flags(request->rq_reqmsg));
- rc = ptl_send_buf(&request->rq_req_md_h,
- request->rq_reqbuf, request->rq_reqdata_len,
- LNET_NOACK_REQ, &request->rq_req_cbid,
- connection,
- request->rq_request_portal,
- request->rq_xid, 0);
- if (likely(rc == 0))
- goto out;
-
- request->rq_req_unlinked = 1;
- ptlrpc_req_finished(request);
- if (noreply)
- goto out;
-
- cleanup_me:
- /* MEUnlink is safe; the PUT didn't even get off the ground, and
- * nobody apart from the PUT's target has the right nid+XID to
- * access the reply buffer.
- */
- rc2 = LNetMEUnlink(reply_me_h);
- LASSERT(rc2 == 0);
- /* UNLINKED callback called synchronously */
- LASSERT(!request->rq_receiving_reply);
-
- cleanup_bulk:
- /* We do sync unlink here as there was no real transfer here so
- * the chance to have long unlink to sluggish net is smaller here.
- */
- ptlrpc_unregister_bulk(request, 0);
- out:
- if (request->rq_memalloc)
- memalloc_noreclaim_restore(mpflag);
- return rc;
-}
-EXPORT_SYMBOL(ptl_send_rpc);
-
-/**
- * Register request buffer descriptor for request receiving.
- */
-int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd)
-{
- struct ptlrpc_service *service = rqbd->rqbd_svcpt->scp_service;
- static struct lnet_process_id match_id = {LNET_NID_ANY, LNET_PID_ANY};
- int rc;
- struct lnet_md md;
- struct lnet_handle_me me_h;
-
- CDEBUG(D_NET, "LNetMEAttach: portal %d\n",
- service->srv_req_portal);
-
- if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_RQBD))
- return -ENOMEM;
-
- /* NB: CPT affinity service should use new LNet flag LNET_INS_LOCAL,
- * which means buffer can only be attached on local CPT, and LND
- * threads can find it by grabbing a local lock
- */
- rc = LNetMEAttach(service->srv_req_portal,
- match_id, 0, ~0, LNET_UNLINK,
- rqbd->rqbd_svcpt->scp_cpt >= 0 ?
- LNET_INS_LOCAL : LNET_INS_AFTER, &me_h);
- if (rc != 0) {
- CERROR("LNetMEAttach failed: %d\n", rc);
- return -ENOMEM;
- }
-
- LASSERT(rqbd->rqbd_refcount == 0);
- rqbd->rqbd_refcount = 1;
-
- md.start = rqbd->rqbd_buffer;
- md.length = service->srv_buf_size;
- md.max_size = service->srv_max_req_size;
- md.threshold = LNET_MD_THRESH_INF;
- md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT | LNET_MD_MAX_SIZE;
- md.user_ptr = &rqbd->rqbd_cbid;
- md.eq_handle = ptlrpc_eq_h;
-
- rc = LNetMDAttach(me_h, md, LNET_UNLINK, &rqbd->rqbd_md_h);
- if (rc == 0)
- return 0;
-
- CERROR("LNetMDAttach failed: %d;\n", rc);
- LASSERT(rc == -ENOMEM);
- rc = LNetMEUnlink(me_h);
- LASSERT(rc == 0);
- rqbd->rqbd_refcount = 0;
-
- return -ENOMEM;
-}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/nrs.c b/drivers/staging/lustre/lustre/ptlrpc/nrs.c
deleted file mode 100644
index e09b86529c5d..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/nrs.c
+++ /dev/null
@@ -1,1613 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
-
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License version 2 for more details. A copy is
- * included in the COPYING file that accompanied this code.
-
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2011 Intel Corporation
- *
- * Copyright 2012 Xyratex Technology Limited
- */
-/*
- * lustre/ptlrpc/nrs.c
- *
- * Network Request Scheduler (NRS)
- *
- * Allows to reorder the handling of RPCs at servers.
- *
- * Author: Liang Zhen <liang@whamcloud.com>
- * Author: Nikitas Angelinas <nikitas_angelinas@xyratex.com>
- */
-/**
- * \addtogoup nrs
- * @{
- */
-
-#define DEBUG_SUBSYSTEM S_RPC
-#include <linux/libcfs/libcfs.h>
-#include <linux/libcfs/libcfs_cpu.h>
-#include <obd_support.h>
-#include <obd_class.h>
-#include <lustre_net.h>
-#include <lprocfs_status.h>
-#include "ptlrpc_internal.h"
-
-/**
- * NRS core object.
- */
-struct nrs_core nrs_core;
-
-static int nrs_policy_init(struct ptlrpc_nrs_policy *policy)
-{
- return policy->pol_desc->pd_ops->op_policy_init ?
- policy->pol_desc->pd_ops->op_policy_init(policy) : 0;
-}
-
-static void nrs_policy_fini(struct ptlrpc_nrs_policy *policy)
-{
- LASSERT(policy->pol_ref == 0);
- LASSERT(policy->pol_req_queued == 0);
-
- if (policy->pol_desc->pd_ops->op_policy_fini)
- policy->pol_desc->pd_ops->op_policy_fini(policy);
-}
-
-static int nrs_policy_ctl_locked(struct ptlrpc_nrs_policy *policy,
- enum ptlrpc_nrs_ctl opc, void *arg)
-{
- /**
- * The policy may be stopped, but the lprocfs files and
- * ptlrpc_nrs_policy instances remain present until unregistration time.
- * Do not perform the ctl operation if the policy is stopped, as
- * policy->pol_private will be NULL in such a case.
- */
- if (policy->pol_state == NRS_POL_STATE_STOPPED)
- return -ENODEV;
-
- return policy->pol_desc->pd_ops->op_policy_ctl ?
- policy->pol_desc->pd_ops->op_policy_ctl(policy, opc, arg) :
- -ENOSYS;
-}
-
-static void nrs_policy_stop0(struct ptlrpc_nrs_policy *policy)
-{
- if (policy->pol_desc->pd_ops->op_policy_stop)
- policy->pol_desc->pd_ops->op_policy_stop(policy);
-
- LASSERT(list_empty(&policy->pol_list_queued));
- LASSERT(policy->pol_req_queued == 0 &&
- policy->pol_req_started == 0);
-
- policy->pol_private = NULL;
-
- policy->pol_state = NRS_POL_STATE_STOPPED;
-
- if (atomic_dec_and_test(&policy->pol_desc->pd_refs))
- module_put(policy->pol_desc->pd_owner);
-}
-
-static int nrs_policy_stop_locked(struct ptlrpc_nrs_policy *policy)
-{
- struct ptlrpc_nrs *nrs = policy->pol_nrs;
-
- if (nrs->nrs_policy_fallback == policy && !nrs->nrs_stopping)
- return -EPERM;
-
- if (policy->pol_state == NRS_POL_STATE_STARTING)
- return -EAGAIN;
-
- /* In progress or already stopped */
- if (policy->pol_state != NRS_POL_STATE_STARTED)
- return 0;
-
- policy->pol_state = NRS_POL_STATE_STOPPING;
-
- /* Immediately make it invisible */
- if (nrs->nrs_policy_primary == policy) {
- nrs->nrs_policy_primary = NULL;
-
- } else {
- LASSERT(nrs->nrs_policy_fallback == policy);
- nrs->nrs_policy_fallback = NULL;
- }
-
- /* I have the only refcount */
- if (policy->pol_ref == 1)
- nrs_policy_stop0(policy);
-
- return 0;
-}
-
-/**
- * Transitions the \a nrs NRS head's primary policy to
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING and if the policy has no
- * pending usage references, to ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPED.
- *
- * \param[in] nrs the NRS head to carry out this operation on
- */
-static void nrs_policy_stop_primary(struct ptlrpc_nrs *nrs)
-{
- struct ptlrpc_nrs_policy *tmp = nrs->nrs_policy_primary;
-
- if (!tmp)
- return;
-
- nrs->nrs_policy_primary = NULL;
-
- LASSERT(tmp->pol_state == NRS_POL_STATE_STARTED);
- tmp->pol_state = NRS_POL_STATE_STOPPING;
-
- if (tmp->pol_ref == 0)
- nrs_policy_stop0(tmp);
-}
-
-/**
- * Transitions a policy across the ptlrpc_nrs_pol_state range of values, in
- * response to an lprocfs command to start a policy.
- *
- * If a primary policy different to the current one is specified, this function
- * will transition the new policy to the
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTING and then to
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED, and will then transition
- * the old primary policy (if there is one) to
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding
- * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED.
- *
- * If the fallback policy is specified, this is taken to indicate an instruction
- * to stop the current primary policy, without substituting it with another
- * primary policy, so the primary policy (if any) is transitioned to
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding
- * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED. In
- * this case, the fallback policy is only left active in the NRS head.
- */
-static int nrs_policy_start_locked(struct ptlrpc_nrs_policy *policy)
-{
- struct ptlrpc_nrs *nrs = policy->pol_nrs;
- int rc = 0;
-
- /**
- * Don't allow multiple starting which is too complex, and has no real
- * benefit.
- */
- if (nrs->nrs_policy_starting)
- return -EAGAIN;
-
- LASSERT(policy->pol_state != NRS_POL_STATE_STARTING);
-
- if (policy->pol_state == NRS_POL_STATE_STOPPING)
- return -EAGAIN;
-
- if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) {
- /**
- * This is for cases in which the user sets the policy to the
- * fallback policy (currently fifo for all services); i.e. the
- * user is resetting the policy to the default; so we stop the
- * primary policy, if any.
- */
- if (policy == nrs->nrs_policy_fallback) {
- nrs_policy_stop_primary(nrs);
- return 0;
- }
-
- /**
- * If we reach here, we must be setting up the fallback policy
- * at service startup time, and only a single policy with the
- * nrs_policy_flags::PTLRPC_NRS_FL_FALLBACK flag set can
- * register with NRS core.
- */
- LASSERT(!nrs->nrs_policy_fallback);
- } else {
- /**
- * Shouldn't start primary policy if w/o fallback policy.
- */
- if (!nrs->nrs_policy_fallback)
- return -EPERM;
-
- if (policy->pol_state == NRS_POL_STATE_STARTED)
- return 0;
- }
-
- /**
- * Increase the module usage count for policies registering from other
- * modules.
- */
- if (atomic_inc_return(&policy->pol_desc->pd_refs) == 1 &&
- !try_module_get(policy->pol_desc->pd_owner)) {
- atomic_dec(&policy->pol_desc->pd_refs);
- CERROR("NRS: cannot get module for policy %s; is it alive?\n",
- policy->pol_desc->pd_name);
- return -ENODEV;
- }
-
- /**
- * Serialize policy starting across the NRS head
- */
- nrs->nrs_policy_starting = 1;
-
- policy->pol_state = NRS_POL_STATE_STARTING;
-
- if (policy->pol_desc->pd_ops->op_policy_start) {
- spin_unlock(&nrs->nrs_lock);
-
- rc = policy->pol_desc->pd_ops->op_policy_start(policy);
-
- spin_lock(&nrs->nrs_lock);
- if (rc != 0) {
- if (atomic_dec_and_test(&policy->pol_desc->pd_refs))
- module_put(policy->pol_desc->pd_owner);
-
- policy->pol_state = NRS_POL_STATE_STOPPED;
- goto out;
- }
- }
-
- policy->pol_state = NRS_POL_STATE_STARTED;
-
- if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) {
- /**
- * This path is only used at PTLRPC service setup time.
- */
- nrs->nrs_policy_fallback = policy;
- } else {
- /*
- * Try to stop the current primary policy if there is one.
- */
- nrs_policy_stop_primary(nrs);
-
- /**
- * And set the newly-started policy as the primary one.
- */
- nrs->nrs_policy_primary = policy;
- }
-
-out:
- nrs->nrs_policy_starting = 0;
-
- return rc;
-}
-
-/**
- * Increases the policy's usage reference count.
- */
-static inline void nrs_policy_get_locked(struct ptlrpc_nrs_policy *policy)
-{
- policy->pol_ref++;
-}
-
-/**
- * Decreases the policy's usage reference count, and stops the policy in case it
- * was already stopping and have no more outstanding usage references (which
- * indicates it has no more queued or started requests, and can be safely
- * stopped).
- */
-static void nrs_policy_put_locked(struct ptlrpc_nrs_policy *policy)
-{
- LASSERT(policy->pol_ref > 0);
-
- policy->pol_ref--;
- if (unlikely(policy->pol_ref == 0 &&
- policy->pol_state == NRS_POL_STATE_STOPPING))
- nrs_policy_stop0(policy);
-}
-
-static void nrs_policy_put(struct ptlrpc_nrs_policy *policy)
-{
- spin_lock(&policy->pol_nrs->nrs_lock);
- nrs_policy_put_locked(policy);
- spin_unlock(&policy->pol_nrs->nrs_lock);
-}
-
-/**
- * Find and return a policy by name.
- */
-static struct ptlrpc_nrs_policy *nrs_policy_find_locked(struct ptlrpc_nrs *nrs,
- char *name)
-{
- struct ptlrpc_nrs_policy *tmp;
-
- list_for_each_entry(tmp, &nrs->nrs_policy_list, pol_list) {
- if (strncmp(tmp->pol_desc->pd_name, name,
- NRS_POL_NAME_MAX) == 0) {
- nrs_policy_get_locked(tmp);
- return tmp;
- }
- }
- return NULL;
-}
-
-/**
- * Release references for the resource hierarchy moving upwards towards the
- * policy instance resource.
- */
-static void nrs_resource_put(struct ptlrpc_nrs_resource *res)
-{
- struct ptlrpc_nrs_policy *policy = res->res_policy;
-
- if (policy->pol_desc->pd_ops->op_res_put) {
- struct ptlrpc_nrs_resource *parent;
-
- for (; res; res = parent) {
- parent = res->res_parent;
- policy->pol_desc->pd_ops->op_res_put(policy, res);
- }
- }
-}
-
-/**
- * Obtains references for each resource in the resource hierarchy for request
- * \a nrq if it is to be handled by \a policy.
- *
- * \param[in] policy the policy
- * \param[in] nrq the request
- * \param[in] moving_req denotes whether this is a call to the function by
- * ldlm_lock_reorder_req(), in order to move \a nrq to
- * the high-priority NRS head; we should not sleep when
- * set.
- *
- * \retval NULL resource hierarchy references not obtained
- * \retval valid-pointer the bottom level of the resource hierarchy
- *
- * \see ptlrpc_nrs_pol_ops::op_res_get()
- */
-static
-struct ptlrpc_nrs_resource *nrs_resource_get(struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq,
- bool moving_req)
-{
- /**
- * Set to NULL to traverse the resource hierarchy from the top.
- */
- struct ptlrpc_nrs_resource *res = NULL;
- struct ptlrpc_nrs_resource *tmp = NULL;
- int rc;
-
- while (1) {
- rc = policy->pol_desc->pd_ops->op_res_get(policy, nrq, res,
- &tmp, moving_req);
- if (rc < 0) {
- if (res)
- nrs_resource_put(res);
- return NULL;
- }
-
- tmp->res_parent = res;
- tmp->res_policy = policy;
- res = tmp;
- tmp = NULL;
- /**
- * Return once we have obtained a reference to the bottom level
- * of the resource hierarchy.
- */
- if (rc > 0)
- return res;
- }
-}
-
-/**
- * Obtains resources for the resource hierarchies and policy references for
- * the fallback and current primary policy (if any), that will later be used
- * to handle request \a nrq.
- *
- * \param[in] nrs the NRS head instance that will be handling request \a nrq.
- * \param[in] nrq the request that is being handled.
- * \param[out] resp the array where references to the resource hierarchy are
- * stored.
- * \param[in] moving_req is set when obtaining resources while moving a
- * request from a policy on the regular NRS head to a
- * policy on the HP NRS head (via
- * ldlm_lock_reorder_req()). It signifies that
- * allocations to get resources should be atomic; for
- * a full explanation, see comment in
- * ptlrpc_nrs_pol_ops::op_res_get().
- */
-static void nrs_resource_get_safe(struct ptlrpc_nrs *nrs,
- struct ptlrpc_nrs_request *nrq,
- struct ptlrpc_nrs_resource **resp,
- bool moving_req)
-{
- struct ptlrpc_nrs_policy *primary = NULL;
- struct ptlrpc_nrs_policy *fallback = NULL;
-
- memset(resp, 0, sizeof(resp[0]) * NRS_RES_MAX);
-
- /**
- * Obtain policy references.
- */
- spin_lock(&nrs->nrs_lock);
-
- fallback = nrs->nrs_policy_fallback;
- nrs_policy_get_locked(fallback);
-
- primary = nrs->nrs_policy_primary;
- if (primary)
- nrs_policy_get_locked(primary);
-
- spin_unlock(&nrs->nrs_lock);
-
- /**
- * Obtain resource hierarchy references.
- */
- resp[NRS_RES_FALLBACK] = nrs_resource_get(fallback, nrq, moving_req);
- LASSERT(resp[NRS_RES_FALLBACK]);
-
- if (primary) {
- resp[NRS_RES_PRIMARY] = nrs_resource_get(primary, nrq,
- moving_req);
- /**
- * A primary policy may exist which may not wish to serve a
- * particular request for different reasons; release the
- * reference on the policy as it will not be used for this
- * request.
- */
- if (!resp[NRS_RES_PRIMARY])
- nrs_policy_put(primary);
- }
-}
-
-/**
- * Releases references to resource hierarchies and policies, because they are no
- * longer required; used when request handling has been completed, or the
- * request is moving to the high priority NRS head.
- *
- * \param resp the resource hierarchy that is being released
- *
- * \see ptlrpc_nrs_req_finalize()
- */
-static void nrs_resource_put_safe(struct ptlrpc_nrs_resource **resp)
-{
- struct ptlrpc_nrs_policy *pols[NRS_RES_MAX];
- int i;
-
- for (i = 0; i < NRS_RES_MAX; i++) {
- if (resp[i]) {
- pols[i] = resp[i]->res_policy;
- nrs_resource_put(resp[i]);
- resp[i] = NULL;
- } else {
- pols[i] = NULL;
- }
- }
-
- for (i = 0; i < NRS_RES_MAX; i++) {
- if (pols[i])
- nrs_policy_put(pols[i]);
- }
-}
-
-/**
- * Obtains an NRS request from \a policy for handling or examination; the
- * request should be removed in the 'handling' case.
- *
- * Calling into this function implies we already know the policy has a request
- * waiting to be handled.
- *
- * \param[in] policy the policy from which a request
- * \param[in] peek when set, signifies that we just want to examine the
- * request, and not handle it, so the request is not removed
- * from the policy.
- * \param[in] force when set, it will force a policy to return a request if it
- * has one pending
- *
- * \retval the NRS request to be handled
- */
-static inline
-struct ptlrpc_nrs_request *nrs_request_get(struct ptlrpc_nrs_policy *policy,
- bool peek, bool force)
-{
- struct ptlrpc_nrs_request *nrq;
-
- LASSERT(policy->pol_req_queued > 0);
-
- nrq = policy->pol_desc->pd_ops->op_req_get(policy, peek, force);
-
- LASSERT(ergo(nrq, nrs_request_policy(nrq) == policy));
-
- return nrq;
-}
-
-/**
- * Enqueues request \a nrq for later handling, via one one the policies for
- * which resources where earlier obtained via nrs_resource_get_safe(). The
- * function attempts to enqueue the request first on the primary policy
- * (if any), since this is the preferred choice.
- *
- * \param nrq the request being enqueued
- *
- * \see nrs_resource_get_safe()
- */
-static inline void nrs_request_enqueue(struct ptlrpc_nrs_request *nrq)
-{
- struct ptlrpc_nrs_policy *policy;
- int rc;
- int i;
-
- /**
- * Try in descending order, because the primary policy (if any) is
- * the preferred choice.
- */
- for (i = NRS_RES_MAX - 1; i >= 0; i--) {
- if (!nrq->nr_res_ptrs[i])
- continue;
-
- nrq->nr_res_idx = i;
- policy = nrq->nr_res_ptrs[i]->res_policy;
-
- rc = policy->pol_desc->pd_ops->op_req_enqueue(policy, nrq);
- if (rc == 0) {
- policy->pol_nrs->nrs_req_queued++;
- policy->pol_req_queued++;
- return;
- }
- }
- /**
- * Should never get here, as at least the primary policy's
- * ptlrpc_nrs_pol_ops::op_req_enqueue() implementation should always
- * succeed.
- */
- LBUG();
-}
-
-/**
- * Called when a request has been handled
- *
- * \param[in] nrs the request that has been handled; can be used for
- * job/resource control.
- *
- * \see ptlrpc_nrs_req_stop_nolock()
- */
-static inline void nrs_request_stop(struct ptlrpc_nrs_request *nrq)
-{
- struct ptlrpc_nrs_policy *policy = nrs_request_policy(nrq);
-
- if (policy->pol_desc->pd_ops->op_req_stop)
- policy->pol_desc->pd_ops->op_req_stop(policy, nrq);
-
- LASSERT(policy->pol_nrs->nrs_req_started > 0);
- LASSERT(policy->pol_req_started > 0);
-
- policy->pol_nrs->nrs_req_started--;
- policy->pol_req_started--;
-}
-
-/**
- * Handler for operations that can be carried out on policies.
- *
- * Handles opcodes that are common to all policy types within NRS core, and
- * passes any unknown opcodes to the policy-specific control function.
- *
- * \param[in] nrs the NRS head this policy belongs to.
- * \param[in] name the human-readable policy name; should be the same as
- * ptlrpc_nrs_pol_desc::pd_name.
- * \param[in] opc the opcode of the operation being carried out.
- * \param[in,out] arg can be used to pass information in and out between when
- * carrying an operation; usually data that is private to
- * the policy at some level, or generic policy status
- * information.
- *
- * \retval -ve error condition
- * \retval 0 operation was carried out successfully
- */
-static int nrs_policy_ctl(struct ptlrpc_nrs *nrs, char *name,
- enum ptlrpc_nrs_ctl opc, void *arg)
-{
- struct ptlrpc_nrs_policy *policy;
- int rc = 0;
-
- spin_lock(&nrs->nrs_lock);
-
- policy = nrs_policy_find_locked(nrs, name);
- if (!policy) {
- rc = -ENOENT;
- goto out;
- }
-
- if (policy->pol_state != NRS_POL_STATE_STARTED &&
- policy->pol_state != NRS_POL_STATE_STOPPED) {
- rc = -EAGAIN;
- goto out;
- }
-
- switch (opc) {
- /**
- * Unknown opcode, pass it down to the policy-specific control
- * function for handling.
- */
- default:
- rc = nrs_policy_ctl_locked(policy, opc, arg);
- break;
-
- /**
- * Start \e policy
- */
- case PTLRPC_NRS_CTL_START:
- rc = nrs_policy_start_locked(policy);
- break;
- }
-out:
- if (policy)
- nrs_policy_put_locked(policy);
-
- spin_unlock(&nrs->nrs_lock);
-
- return rc;
-}
-
-/**
- * Unregisters a policy by name.
- *
- * \param[in] nrs the NRS head this policy belongs to.
- * \param[in] name the human-readable policy name; should be the same as
- * ptlrpc_nrs_pol_desc::pd_name
- *
- * \retval -ve error
- * \retval 0 success
- */
-static int nrs_policy_unregister(struct ptlrpc_nrs *nrs, char *name)
-{
- struct ptlrpc_nrs_policy *policy = NULL;
-
- spin_lock(&nrs->nrs_lock);
-
- policy = nrs_policy_find_locked(nrs, name);
- if (!policy) {
- spin_unlock(&nrs->nrs_lock);
-
- CERROR("Can't find NRS policy %s\n", name);
- return -ENOENT;
- }
-
- if (policy->pol_ref > 1) {
- CERROR("Policy %s is busy with %d references\n", name,
- (int)policy->pol_ref);
- nrs_policy_put_locked(policy);
-
- spin_unlock(&nrs->nrs_lock);
- return -EBUSY;
- }
-
- LASSERT(policy->pol_req_queued == 0);
- LASSERT(policy->pol_req_started == 0);
-
- if (policy->pol_state != NRS_POL_STATE_STOPPED) {
- nrs_policy_stop_locked(policy);
- LASSERT(policy->pol_state == NRS_POL_STATE_STOPPED);
- }
-
- list_del(&policy->pol_list);
- nrs->nrs_num_pols--;
-
- nrs_policy_put_locked(policy);
-
- spin_unlock(&nrs->nrs_lock);
-
- nrs_policy_fini(policy);
-
- LASSERT(!policy->pol_private);
- kfree(policy);
-
- return 0;
-}
-
-/**
- * Register a policy from \policy descriptor \a desc with NRS head \a nrs.
- *
- * \param[in] nrs the NRS head on which the policy will be registered.
- * \param[in] desc the policy descriptor from which the information will be
- * obtained to register the policy.
- *
- * \retval -ve error
- * \retval 0 success
- */
-static int nrs_policy_register(struct ptlrpc_nrs *nrs,
- struct ptlrpc_nrs_pol_desc *desc)
-{
- struct ptlrpc_nrs_policy *policy;
- struct ptlrpc_nrs_policy *tmp;
- struct ptlrpc_service_part *svcpt = nrs->nrs_svcpt;
- int rc;
-
- LASSERT(desc->pd_ops->op_res_get);
- LASSERT(desc->pd_ops->op_req_get);
- LASSERT(desc->pd_ops->op_req_enqueue);
- LASSERT(desc->pd_ops->op_req_dequeue);
- LASSERT(desc->pd_compat);
-
- policy = kzalloc_node(sizeof(*policy), GFP_NOFS,
- cfs_cpt_spread_node(svcpt->scp_service->srv_cptable,
- svcpt->scp_cpt));
- if (!policy)
- return -ENOMEM;
-
- policy->pol_nrs = nrs;
- policy->pol_desc = desc;
- policy->pol_state = NRS_POL_STATE_STOPPED;
- policy->pol_flags = desc->pd_flags;
-
- INIT_LIST_HEAD(&policy->pol_list);
- INIT_LIST_HEAD(&policy->pol_list_queued);
-
- rc = nrs_policy_init(policy);
- if (rc != 0) {
- kfree(policy);
- return rc;
- }
-
- spin_lock(&nrs->nrs_lock);
-
- tmp = nrs_policy_find_locked(nrs, policy->pol_desc->pd_name);
- if (tmp) {
- CERROR("NRS policy %s has been registered, can't register it for %s\n",
- policy->pol_desc->pd_name,
- svcpt->scp_service->srv_name);
- nrs_policy_put_locked(tmp);
-
- spin_unlock(&nrs->nrs_lock);
- nrs_policy_fini(policy);
- kfree(policy);
-
- return -EEXIST;
- }
-
- list_add_tail(&policy->pol_list, &nrs->nrs_policy_list);
- nrs->nrs_num_pols++;
-
- if (policy->pol_flags & PTLRPC_NRS_FL_REG_START)
- rc = nrs_policy_start_locked(policy);
-
- spin_unlock(&nrs->nrs_lock);
-
- if (rc != 0)
- (void)nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
-
- return rc;
-}
-
-/**
- * Enqueue request \a req using one of the policies its resources are referring
- * to.
- *
- * \param[in] req the request to enqueue.
- */
-static void ptlrpc_nrs_req_add_nolock(struct ptlrpc_request *req)
-{
- struct ptlrpc_nrs_policy *policy;
-
- LASSERT(req->rq_nrq.nr_initialized);
- LASSERT(!req->rq_nrq.nr_enqueued);
-
- nrs_request_enqueue(&req->rq_nrq);
- req->rq_nrq.nr_enqueued = 1;
-
- policy = nrs_request_policy(&req->rq_nrq);
- /**
- * Add the policy to the NRS head's list of policies with enqueued
- * requests, if it has not been added there.
- */
- if (unlikely(list_empty(&policy->pol_list_queued)))
- list_add_tail(&policy->pol_list_queued,
- &policy->pol_nrs->nrs_policy_queued);
-}
-
-/**
- * Enqueue a request on the high priority NRS head.
- *
- * \param req the request to enqueue.
- */
-static void ptlrpc_nrs_hpreq_add_nolock(struct ptlrpc_request *req)
-{
- int opc = lustre_msg_get_opc(req->rq_reqmsg);
-
- spin_lock(&req->rq_lock);
- req->rq_hp = 1;
- ptlrpc_nrs_req_add_nolock(req);
- if (opc != OBD_PING)
- DEBUG_REQ(D_NET, req, "high priority req");
- spin_unlock(&req->rq_lock);
-}
-
-/**
- * Returns a boolean predicate indicating whether the policy described by
- * \a desc is adequate for use with service \a svc.
- *
- * \param[in] svc the service
- * \param[in] desc the policy descriptor
- *
- * \retval false the policy is not compatible with the service
- * \retval true the policy is compatible with the service
- */
-static inline bool nrs_policy_compatible(const struct ptlrpc_service *svc,
- const struct ptlrpc_nrs_pol_desc *desc)
-{
- return desc->pd_compat(svc, desc);
-}
-
-/**
- * Registers all compatible policies in nrs_core.nrs_policies, for NRS head
- * \a nrs.
- *
- * \param[in] nrs the NRS head
- *
- * \retval -ve error
- * \retval 0 success
- *
- * \pre mutex_is_locked(&nrs_core.nrs_mutex)
- *
- * \see ptlrpc_service_nrs_setup()
- */
-static int nrs_register_policies_locked(struct ptlrpc_nrs *nrs)
-{
- struct ptlrpc_nrs_pol_desc *desc;
- /* for convenience */
- struct ptlrpc_service_part *svcpt = nrs->nrs_svcpt;
- struct ptlrpc_service *svc = svcpt->scp_service;
- int rc = -EINVAL;
-
- LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
-
- list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
- if (nrs_policy_compatible(svc, desc)) {
- rc = nrs_policy_register(nrs, desc);
- if (rc != 0) {
- CERROR("Failed to register NRS policy %s for partition %d of service %s: %d\n",
- desc->pd_name, svcpt->scp_cpt,
- svc->srv_name, rc);
- /**
- * Fail registration if any of the policies'
- * registration fails.
- */
- break;
- }
- }
- }
-
- return rc;
-}
-
-/**
- * Initializes NRS head \a nrs of service partition \a svcpt, and registers all
- * compatible policies in NRS core, with the NRS head.
- *
- * \param[in] nrs the NRS head
- * \param[in] svcpt the PTLRPC service partition to setup
- *
- * \retval -ve error
- * \retval 0 success
- *
- * \pre mutex_is_locked(&nrs_core.nrs_mutex)
- */
-static int nrs_svcpt_setup_locked0(struct ptlrpc_nrs *nrs,
- struct ptlrpc_service_part *svcpt)
-{
- enum ptlrpc_nrs_queue_type queue;
-
- LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
-
- if (nrs == &svcpt->scp_nrs_reg)
- queue = PTLRPC_NRS_QUEUE_REG;
- else if (nrs == svcpt->scp_nrs_hp)
- queue = PTLRPC_NRS_QUEUE_HP;
- else
- LBUG();
-
- nrs->nrs_svcpt = svcpt;
- nrs->nrs_queue_type = queue;
- spin_lock_init(&nrs->nrs_lock);
- INIT_LIST_HEAD(&nrs->nrs_policy_list);
- INIT_LIST_HEAD(&nrs->nrs_policy_queued);
-
- return nrs_register_policies_locked(nrs);
-}
-
-/**
- * Allocates a regular and optionally a high-priority NRS head (if the service
- * handles high-priority RPCs), and then registers all available compatible
- * policies on those NRS heads.
- *
- * \param[in,out] svcpt the PTLRPC service partition to setup
- *
- * \pre mutex_is_locked(&nrs_core.nrs_mutex)
- */
-static int nrs_svcpt_setup_locked(struct ptlrpc_service_part *svcpt)
-{
- struct ptlrpc_nrs *nrs;
- int rc;
-
- LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
-
- /**
- * Initialize the regular NRS head.
- */
- nrs = nrs_svcpt2nrs(svcpt, false);
- rc = nrs_svcpt_setup_locked0(nrs, svcpt);
- if (rc < 0)
- goto out;
-
- /**
- * Optionally allocate a high-priority NRS head.
- */
- if (!svcpt->scp_service->srv_ops.so_hpreq_handler)
- goto out;
-
- svcpt->scp_nrs_hp =
- kzalloc_node(sizeof(*svcpt->scp_nrs_hp), GFP_NOFS,
- cfs_cpt_spread_node(svcpt->scp_service->srv_cptable,
- svcpt->scp_cpt));
- if (!svcpt->scp_nrs_hp) {
- rc = -ENOMEM;
- goto out;
- }
-
- nrs = nrs_svcpt2nrs(svcpt, true);
- rc = nrs_svcpt_setup_locked0(nrs, svcpt);
-
-out:
- return rc;
-}
-
-/**
- * Unregisters all policies on all available NRS heads in a service partition;
- * called at PTLRPC service unregistration time.
- *
- * \param[in] svcpt the PTLRPC service partition
- *
- * \pre mutex_is_locked(&nrs_core.nrs_mutex)
- */
-static void nrs_svcpt_cleanup_locked(struct ptlrpc_service_part *svcpt)
-{
- struct ptlrpc_nrs *nrs;
- struct ptlrpc_nrs_policy *policy;
- struct ptlrpc_nrs_policy *tmp;
- int rc;
- bool hp = false;
-
- LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
-
-again:
- /* scp_nrs_hp could be NULL due to short of memory. */
- nrs = hp ? svcpt->scp_nrs_hp : &svcpt->scp_nrs_reg;
- /* check the nrs_svcpt to see if nrs is initialized. */
- if (!nrs || !nrs->nrs_svcpt)
- return;
- nrs->nrs_stopping = 1;
-
- list_for_each_entry_safe(policy, tmp, &nrs->nrs_policy_list, pol_list) {
- rc = nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
- LASSERT(rc == 0);
- }
-
- /**
- * If the service partition has an HP NRS head, clean that up as well.
- */
- if (!hp && nrs_svcpt_has_hp(svcpt)) {
- hp = true;
- goto again;
- }
-
- if (hp)
- kfree(nrs);
-}
-
-/**
- * Returns the descriptor for a policy as identified by by \a name.
- *
- * \param[in] name the policy name
- *
- * \retval the policy descriptor
- * \retval NULL
- */
-static struct ptlrpc_nrs_pol_desc *nrs_policy_find_desc_locked(const char *name)
-{
- struct ptlrpc_nrs_pol_desc *tmp;
-
- list_for_each_entry(tmp, &nrs_core.nrs_policies, pd_list) {
- if (strncmp(tmp->pd_name, name, NRS_POL_NAME_MAX) == 0)
- return tmp;
- }
- return NULL;
-}
-
-/**
- * Removes the policy from all supported NRS heads of all partitions of all
- * PTLRPC services.
- *
- * \param[in] desc the policy descriptor to unregister
- *
- * \retval -ve error
- * \retval 0 successfully unregistered policy on all supported NRS heads
- *
- * \pre mutex_is_locked(&nrs_core.nrs_mutex)
- * \pre mutex_is_locked(&ptlrpc_all_services_mutex)
- */
-static int nrs_policy_unregister_locked(struct ptlrpc_nrs_pol_desc *desc)
-{
- struct ptlrpc_nrs *nrs;
- struct ptlrpc_service *svc;
- struct ptlrpc_service_part *svcpt;
- int i;
- int rc = 0;
-
- LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
- LASSERT(mutex_is_locked(&ptlrpc_all_services_mutex));
-
- list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
- if (!nrs_policy_compatible(svc, desc) ||
- unlikely(svc->srv_is_stopping))
- continue;
-
- ptlrpc_service_for_each_part(svcpt, i, svc) {
- bool hp = false;
-
-again:
- nrs = nrs_svcpt2nrs(svcpt, hp);
- rc = nrs_policy_unregister(nrs, desc->pd_name);
- /**
- * Ignore -ENOENT as the policy may not have registered
- * successfully on all service partitions.
- */
- if (rc == -ENOENT) {
- rc = 0;
- } else if (rc != 0) {
- CERROR("Failed to unregister NRS policy %s for partition %d of service %s: %d\n",
- desc->pd_name, svcpt->scp_cpt,
- svcpt->scp_service->srv_name, rc);
- return rc;
- }
-
- if (!hp && nrs_svc_has_hp(svc)) {
- hp = true;
- goto again;
- }
- }
-
- if (desc->pd_ops->op_lprocfs_fini)
- desc->pd_ops->op_lprocfs_fini(svc);
- }
-
- return rc;
-}
-
-/**
- * Registers a new policy with NRS core.
- *
- * The function will only succeed if policy registration with all compatible
- * service partitions (if any) is successful.
- *
- * N.B. This function should be called either at ptlrpc module initialization
- * time when registering a policy that ships with NRS core, or in a
- * module's init() function for policies registering from other modules.
- *
- * \param[in] conf configuration information for the new policy to register
- *
- * \retval -ve error
- * \retval 0 success
- */
-static int ptlrpc_nrs_policy_register(struct ptlrpc_nrs_pol_conf *conf)
-{
- struct ptlrpc_service *svc;
- struct ptlrpc_nrs_pol_desc *desc;
- size_t len;
- int rc = 0;
-
- LASSERT(conf->nc_ops);
- LASSERT(conf->nc_compat);
- LASSERT(ergo(conf->nc_compat == nrs_policy_compat_one,
- conf->nc_compat_svc_name));
- LASSERT(ergo((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0,
- conf->nc_owner));
-
- conf->nc_name[NRS_POL_NAME_MAX - 1] = '\0';
-
- /**
- * External policies are not allowed to start immediately upon
- * registration, as there is a relatively higher chance that their
- * registration might fail. In such a case, some policy instances may
- * already have requests queued wen unregistration needs to happen as
- * part o cleanup; since there is currently no way to drain requests
- * from a policy unless the service is unregistering, we just disallow
- * this.
- */
- if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) &&
- (conf->nc_flags & (PTLRPC_NRS_FL_FALLBACK |
- PTLRPC_NRS_FL_REG_START))) {
- CERROR("NRS: failing to register policy %s. Please check policy flags; external policies cannot act as fallback policies, or be started immediately upon registration without interaction with lprocfs\n",
- conf->nc_name);
- return -EINVAL;
- }
-
- mutex_lock(&nrs_core.nrs_mutex);
-
- if (nrs_policy_find_desc_locked(conf->nc_name)) {
- CERROR("NRS: failing to register policy %s which has already been registered with NRS core!\n",
- conf->nc_name);
- rc = -EEXIST;
- goto fail;
- }
-
- desc = kzalloc(sizeof(*desc), GFP_NOFS);
- if (!desc) {
- rc = -ENOMEM;
- goto fail;
- }
-
- len = strlcpy(desc->pd_name, conf->nc_name, sizeof(desc->pd_name));
- if (len >= sizeof(desc->pd_name)) {
- kfree(desc);
- rc = -E2BIG;
- goto fail;
- }
- desc->pd_ops = conf->nc_ops;
- desc->pd_compat = conf->nc_compat;
- desc->pd_compat_svc_name = conf->nc_compat_svc_name;
- if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0)
- desc->pd_owner = conf->nc_owner;
- desc->pd_flags = conf->nc_flags;
- atomic_set(&desc->pd_refs, 0);
-
- /**
- * For policies that are held in the same module as NRS (currently
- * ptlrpc), do not register the policy with all compatible services,
- * as the services will not have started at this point, since we are
- * calling from ptlrpc module initialization code. In such cases each
- * service will register all compatible policies later, via
- * ptlrpc_service_nrs_setup().
- */
- if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) == 0)
- goto internal;
-
- /**
- * Register the new policy on all compatible services
- */
- mutex_lock(&ptlrpc_all_services_mutex);
-
- list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
- struct ptlrpc_service_part *svcpt;
- int i;
- int rc2;
-
- if (!nrs_policy_compatible(svc, desc) ||
- unlikely(svc->srv_is_stopping))
- continue;
-
- ptlrpc_service_for_each_part(svcpt, i, svc) {
- struct ptlrpc_nrs *nrs;
- bool hp = false;
-again:
- nrs = nrs_svcpt2nrs(svcpt, hp);
- rc = nrs_policy_register(nrs, desc);
- if (rc != 0) {
- CERROR("Failed to register NRS policy %s for partition %d of service %s: %d\n",
- desc->pd_name, svcpt->scp_cpt,
- svcpt->scp_service->srv_name, rc);
-
- rc2 = nrs_policy_unregister_locked(desc);
- /**
- * Should not fail at this point
- */
- LASSERT(rc2 == 0);
- mutex_unlock(&ptlrpc_all_services_mutex);
- kfree(desc);
- goto fail;
- }
-
- if (!hp && nrs_svc_has_hp(svc)) {
- hp = true;
- goto again;
- }
- }
-
- /**
- * No need to take a reference to other modules here, as we
- * will be calling from the module's init() function.
- */
- if (desc->pd_ops->op_lprocfs_init) {
- rc = desc->pd_ops->op_lprocfs_init(svc);
- if (rc != 0) {
- rc2 = nrs_policy_unregister_locked(desc);
- /**
- * Should not fail at this point
- */
- LASSERT(rc2 == 0);
- mutex_unlock(&ptlrpc_all_services_mutex);
- kfree(desc);
- goto fail;
- }
- }
- }
-
- mutex_unlock(&ptlrpc_all_services_mutex);
-internal:
- list_add_tail(&desc->pd_list, &nrs_core.nrs_policies);
-fail:
- mutex_unlock(&nrs_core.nrs_mutex);
-
- return rc;
-}
-
-/**
- * Setup NRS heads on all service partitions of service \a svc, and register
- * all compatible policies on those NRS heads.
- *
- * To be called from within ptl
- * \param[in] svc the service to setup
- *
- * \retval -ve error, the calling logic should eventually call
- * ptlrpc_service_nrs_cleanup() to undo any work performed
- * by this function.
- *
- * \see ptlrpc_register_service()
- * \see ptlrpc_service_nrs_cleanup()
- */
-int ptlrpc_service_nrs_setup(struct ptlrpc_service *svc)
-{
- struct ptlrpc_service_part *svcpt;
- const struct ptlrpc_nrs_pol_desc *desc;
- int i;
- int rc = 0;
-
- mutex_lock(&nrs_core.nrs_mutex);
-
- /**
- * Initialize NRS heads on all service CPTs.
- */
- ptlrpc_service_for_each_part(svcpt, i, svc) {
- rc = nrs_svcpt_setup_locked(svcpt);
- if (rc != 0)
- goto failed;
- }
-
- /**
- * Set up lprocfs interfaces for all supported policies for the
- * service.
- */
- list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
- if (!nrs_policy_compatible(svc, desc))
- continue;
-
- if (desc->pd_ops->op_lprocfs_init) {
- rc = desc->pd_ops->op_lprocfs_init(svc);
- if (rc != 0)
- goto failed;
- }
- }
-
-failed:
-
- mutex_unlock(&nrs_core.nrs_mutex);
-
- return rc;
-}
-
-/**
- * Unregisters all policies on all service partitions of service \a svc.
- *
- * \param[in] svc the PTLRPC service to unregister
- */
-void ptlrpc_service_nrs_cleanup(struct ptlrpc_service *svc)
-{
- struct ptlrpc_service_part *svcpt;
- const struct ptlrpc_nrs_pol_desc *desc;
- int i;
-
- mutex_lock(&nrs_core.nrs_mutex);
-
- /**
- * Clean up NRS heads on all service partitions
- */
- ptlrpc_service_for_each_part(svcpt, i, svc)
- nrs_svcpt_cleanup_locked(svcpt);
-
- /**
- * Clean up lprocfs interfaces for all supported policies for the
- * service.
- */
- list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
- if (!nrs_policy_compatible(svc, desc))
- continue;
-
- if (desc->pd_ops->op_lprocfs_fini)
- desc->pd_ops->op_lprocfs_fini(svc);
- }
-
- mutex_unlock(&nrs_core.nrs_mutex);
-}
-
-/**
- * Obtains NRS head resources for request \a req.
- *
- * These could be either on the regular or HP NRS head of \a svcpt; resources
- * taken on the regular head can later be swapped for HP head resources by
- * ldlm_lock_reorder_req().
- *
- * \param[in] svcpt the service partition
- * \param[in] req the request
- * \param[in] hp which NRS head of \a svcpt to use
- */
-void ptlrpc_nrs_req_initialize(struct ptlrpc_service_part *svcpt,
- struct ptlrpc_request *req, bool hp)
-{
- struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
-
- memset(&req->rq_nrq, 0, sizeof(req->rq_nrq));
- nrs_resource_get_safe(nrs, &req->rq_nrq, req->rq_nrq.nr_res_ptrs,
- false);
-
- /**
- * It is fine to access \e nr_initialized without locking as there is
- * no contention at this early stage.
- */
- req->rq_nrq.nr_initialized = 1;
-}
-
-/**
- * Releases resources for a request; is called after the request has been
- * handled.
- *
- * \param[in] req the request
- *
- * \see ptlrpc_server_finish_request()
- */
-void ptlrpc_nrs_req_finalize(struct ptlrpc_request *req)
-{
- if (req->rq_nrq.nr_initialized) {
- nrs_resource_put_safe(req->rq_nrq.nr_res_ptrs);
- /* no protection on bit nr_initialized because no
- * contention at this late stage
- */
- req->rq_nrq.nr_finalized = 1;
- }
-}
-
-void ptlrpc_nrs_req_stop_nolock(struct ptlrpc_request *req)
-{
- if (req->rq_nrq.nr_started)
- nrs_request_stop(&req->rq_nrq);
-}
-
-/**
- * Enqueues request \a req on either the regular or high-priority NRS head
- * of service partition \a svcpt.
- *
- * \param[in] svcpt the service partition
- * \param[in] req the request to be enqueued
- * \param[in] hp whether to enqueue the request on the regular or
- * high-priority NRS head.
- */
-void ptlrpc_nrs_req_add(struct ptlrpc_service_part *svcpt,
- struct ptlrpc_request *req, bool hp)
-{
- spin_lock(&svcpt->scp_req_lock);
-
- if (hp)
- ptlrpc_nrs_hpreq_add_nolock(req);
- else
- ptlrpc_nrs_req_add_nolock(req);
-
- spin_unlock(&svcpt->scp_req_lock);
-}
-
-static void nrs_request_removed(struct ptlrpc_nrs_policy *policy)
-{
- LASSERT(policy->pol_nrs->nrs_req_queued > 0);
- LASSERT(policy->pol_req_queued > 0);
-
- policy->pol_nrs->nrs_req_queued--;
- policy->pol_req_queued--;
-
- /**
- * If the policy has no more requests queued, remove it from
- * ptlrpc_nrs::nrs_policy_queued.
- */
- if (unlikely(policy->pol_req_queued == 0)) {
- list_del_init(&policy->pol_list_queued);
-
- /**
- * If there are other policies with queued requests, move the
- * current policy to the end so that we can round robin over
- * all policies and drain the requests.
- */
- } else if (policy->pol_req_queued != policy->pol_nrs->nrs_req_queued) {
- LASSERT(policy->pol_req_queued <
- policy->pol_nrs->nrs_req_queued);
-
- list_move_tail(&policy->pol_list_queued,
- &policy->pol_nrs->nrs_policy_queued);
- }
-}
-
-/**
- * Obtains a request for handling from an NRS head of service partition
- * \a svcpt.
- *
- * \param[in] svcpt the service partition
- * \param[in] hp whether to obtain a request from the regular or
- * high-priority NRS head.
- * \param[in] peek when set, signifies that we just want to examine the
- * request, and not handle it, so the request is not removed
- * from the policy.
- * \param[in] force when set, it will force a policy to return a request if it
- * has one pending
- *
- * \retval the request to be handled
- * \retval NULL the head has no requests to serve
- */
-struct ptlrpc_request *
-ptlrpc_nrs_req_get_nolock0(struct ptlrpc_service_part *svcpt, bool hp,
- bool peek, bool force)
-{
- struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
- struct ptlrpc_nrs_policy *policy;
- struct ptlrpc_nrs_request *nrq;
-
- /**
- * Always try to drain requests from all NRS polices even if they are
- * inactive, because the user can change policy status at runtime.
- */
- list_for_each_entry(policy, &nrs->nrs_policy_queued, pol_list_queued) {
- nrq = nrs_request_get(policy, peek, force);
- if (nrq) {
- if (likely(!peek)) {
- nrq->nr_started = 1;
-
- policy->pol_req_started++;
- policy->pol_nrs->nrs_req_started++;
-
- nrs_request_removed(policy);
- }
-
- return container_of(nrq, struct ptlrpc_request, rq_nrq);
- }
- }
-
- return NULL;
-}
-
-/**
- * Returns whether there are any requests currently enqueued on any of the
- * policies of service partition's \a svcpt NRS head specified by \a hp. Should
- * be called while holding ptlrpc_service_part::scp_req_lock to get a reliable
- * result.
- *
- * \param[in] svcpt the service partition to enquire.
- * \param[in] hp whether the regular or high-priority NRS head is to be
- * enquired.
- *
- * \retval false the indicated NRS head has no enqueued requests.
- * \retval true the indicated NRS head has some enqueued requests.
- */
-bool ptlrpc_nrs_req_pending_nolock(struct ptlrpc_service_part *svcpt, bool hp)
-{
- struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
-
- return nrs->nrs_req_queued > 0;
-};
-
-/**
- * Carries out a control operation \a opc on the policy identified by the
- * human-readable \a name, on either all partitions, or only on the first
- * partition of service \a svc.
- *
- * \param[in] svc the service the policy belongs to.
- * \param[in] queue whether to carry out the command on the policy which
- * belongs to the regular, high-priority, or both NRS
- * heads of service partitions of \a svc.
- * \param[in] name the policy to act upon, by human-readable name
- * \param[in] opc the opcode of the operation to carry out
- * \param[in] single when set, the operation will only be carried out on the
- * NRS heads of the first service partition of \a svc.
- * This is useful for some policies which e.g. share
- * identical values on the same parameters of different
- * service partitions; when reading these parameters via
- * lprocfs, these policies may just want to obtain and
- * print out the values from the first service partition.
- * Storing these values centrally elsewhere then could be
- * another solution for this.
- * \param[in,out] arg can be used as a generic in/out buffer between control
- * operations and the user environment.
- *
- *\retval -ve error condition
- *\retval 0 operation was carried out successfully
- */
-int ptlrpc_nrs_policy_control(const struct ptlrpc_service *svc,
- enum ptlrpc_nrs_queue_type queue, char *name,
- enum ptlrpc_nrs_ctl opc, bool single, void *arg)
-{
- struct ptlrpc_service_part *svcpt;
- int i;
- int rc = 0;
-
- LASSERT(opc != PTLRPC_NRS_CTL_INVALID);
-
- if ((queue & PTLRPC_NRS_QUEUE_BOTH) == 0)
- return -EINVAL;
-
- ptlrpc_service_for_each_part(svcpt, i, svc) {
- if ((queue & PTLRPC_NRS_QUEUE_REG) != 0) {
- rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, false), name,
- opc, arg);
- if (rc != 0 || (queue == PTLRPC_NRS_QUEUE_REG &&
- single))
- goto out;
- }
-
- if ((queue & PTLRPC_NRS_QUEUE_HP) != 0) {
- /**
- * XXX: We could optionally check for
- * nrs_svc_has_hp(svc) here, and return an error if it
- * is false. Right now we rely on the policies' lprocfs
- * handlers that call the present function to make this
- * check; if they fail to do so, they might hit the
- * assertion inside nrs_svcpt2nrs() below.
- */
- rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, true), name,
- opc, arg);
- if (rc != 0 || single)
- goto out;
- }
- }
-out:
- return rc;
-}
-
-/**
- * Adds all policies that ship with the ptlrpc module, to NRS core's list of
- * policies \e nrs_core.nrs_policies.
- *
- * \retval 0 all policies have been registered successfully
- * \retval -ve error
- */
-int ptlrpc_nrs_init(void)
-{
- int rc;
-
- mutex_init(&nrs_core.nrs_mutex);
- INIT_LIST_HEAD(&nrs_core.nrs_policies);
-
- rc = ptlrpc_nrs_policy_register(&nrs_conf_fifo);
- if (rc != 0)
- goto fail;
-
- return rc;
-fail:
- /**
- * Since no PTLRPC services have been started at this point, all we need
- * to do for cleanup is to free the descriptors.
- */
- ptlrpc_nrs_fini();
-
- return rc;
-}
-
-/**
- * Removes all policy descriptors from nrs_core::nrs_policies, and frees the
- * policy descriptors.
- *
- * Since all PTLRPC services are stopped at this point, there are no more
- * instances of any policies, because each service will have stopped its policy
- * instances in ptlrpc_service_nrs_cleanup(), so we just need to free the
- * descriptors here.
- */
-void ptlrpc_nrs_fini(void)
-{
- struct ptlrpc_nrs_pol_desc *desc;
- struct ptlrpc_nrs_pol_desc *tmp;
-
- list_for_each_entry_safe(desc, tmp, &nrs_core.nrs_policies, pd_list) {
- list_del_init(&desc->pd_list);
- kfree(desc);
- }
-}
-
-/** @} nrs */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c b/drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c
deleted file mode 100644
index ff630d94dd26..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c
+++ /dev/null
@@ -1,270 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
-
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License version 2 for more details. A copy is
- * included in the COPYING file that accompanied this code.
-
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2011 Intel Corporation
- *
- * Copyright 2012 Xyratex Technology Limited
- */
-/*
- * lustre/ptlrpc/nrs_fifo.c
- *
- * Network Request Scheduler (NRS) FIFO policy
- *
- * Handles RPCs in a FIFO manner, as received from the network. This policy is
- * a logical wrapper around previous, non-NRS functionality. It is used as the
- * default and fallback policy for all types of RPCs on all PTLRPC service
- * partitions, for both regular and high-priority NRS heads. Default here means
- * the policy is the one enabled at PTLRPC service partition startup time, and
- * fallback means the policy is used to handle RPCs that are not handled
- * successfully or are not handled at all by any primary policy that may be
- * enabled on a given NRS head.
- *
- * Author: Liang Zhen <liang@whamcloud.com>
- * Author: Nikitas Angelinas <nikitas_angelinas@xyratex.com>
- */
-/**
- * \addtogoup nrs
- * @{
- */
-
-#define DEBUG_SUBSYSTEM S_RPC
-#include <linux/libcfs/libcfs.h>
-#include <linux/libcfs/libcfs_cpu.h>
-#include <obd_support.h>
-#include <obd_class.h>
-#include "ptlrpc_internal.h"
-
-/**
- * \name fifo
- *
- * The FIFO policy is a logical wrapper around previous, non-NRS functionality.
- * It schedules RPCs in the same order as they are queued from LNet.
- *
- * @{
- */
-
-#define NRS_POL_NAME_FIFO "fifo"
-
-/**
- * Is called before the policy transitions into
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED; allocates and initializes a
- * policy-specific private data structure.
- *
- * \param[in] policy The policy to start
- *
- * \retval -ENOMEM OOM error
- * \retval 0 success
- *
- * \see nrs_policy_register()
- * \see nrs_policy_ctl()
- */
-static int nrs_fifo_start(struct ptlrpc_nrs_policy *policy)
-{
- struct nrs_fifo_head *head;
-
- head = kzalloc_node(sizeof(*head), GFP_NOFS,
- cfs_cpt_spread_node(nrs_pol2cptab(policy),
- nrs_pol2cptid(policy)));
- if (!head)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&head->fh_list);
- policy->pol_private = head;
- return 0;
-}
-
-/**
- * Is called before the policy transitions into
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPED; deallocates the policy-specific
- * private data structure.
- *
- * \param[in] policy The policy to stop
- *
- * \see nrs_policy_stop0()
- */
-static void nrs_fifo_stop(struct ptlrpc_nrs_policy *policy)
-{
- struct nrs_fifo_head *head = policy->pol_private;
-
- LASSERT(head);
- LASSERT(list_empty(&head->fh_list));
-
- kfree(head);
-}
-
-/**
- * Is called for obtaining a FIFO policy resource.
- *
- * \param[in] policy The policy on which the request is being asked for
- * \param[in] nrq The request for which resources are being taken
- * \param[in] parent Parent resource, unused in this policy
- * \param[out] resp Resources references are placed in this array
- * \param[in] moving_req Signifies limited caller context; unused in this
- * policy
- *
- * \retval 1 The FIFO policy only has a one-level resource hierarchy, as since
- * it implements a simple scheduling algorithm in which request
- * priority is determined on the request arrival order, it does not
- * need to maintain a set of resources that would otherwise be used
- * to calculate a request's priority.
- *
- * \see nrs_resource_get_safe()
- */
-static int nrs_fifo_res_get(struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq,
- const struct ptlrpc_nrs_resource *parent,
- struct ptlrpc_nrs_resource **resp, bool moving_req)
-{
- /**
- * Just return the resource embedded inside nrs_fifo_head, and end this
- * resource hierarchy reference request.
- */
- *resp = &((struct nrs_fifo_head *)policy->pol_private)->fh_res;
- return 1;
-}
-
-/**
- * Called when getting a request from the FIFO policy for handling, or just
- * peeking; removes the request from the policy when it is to be handled.
- *
- * \param[in] policy The policy
- * \param[in] peek When set, signifies that we just want to examine the
- * request, and not handle it, so the request is not removed
- * from the policy.
- * \param[in] force Force the policy to return a request; unused in this
- * policy
- *
- * \retval The request to be handled; this is the next request in the FIFO
- * queue
- *
- * \see ptlrpc_nrs_req_get_nolock()
- * \see nrs_request_get()
- */
-static
-struct ptlrpc_nrs_request *nrs_fifo_req_get(struct ptlrpc_nrs_policy *policy,
- bool peek, bool force)
-{
- struct nrs_fifo_head *head = policy->pol_private;
- struct ptlrpc_nrs_request *nrq;
-
- nrq = unlikely(list_empty(&head->fh_list)) ? NULL :
- list_entry(head->fh_list.next, struct ptlrpc_nrs_request,
- nr_u.fifo.fr_list);
-
- if (likely(!peek && nrq)) {
- struct ptlrpc_request *req = container_of(nrq,
- struct ptlrpc_request,
- rq_nrq);
-
- list_del_init(&nrq->nr_u.fifo.fr_list);
-
- CDEBUG(D_RPCTRACE, "NRS start %s request from %s, seq: %llu\n",
- policy->pol_desc->pd_name, libcfs_id2str(req->rq_peer),
- nrq->nr_u.fifo.fr_sequence);
- }
-
- return nrq;
-}
-
-/**
- * Adds request \a nrq to \a policy's list of queued requests
- *
- * \param[in] policy The policy
- * \param[in] nrq The request to add
- *
- * \retval 0 success; nrs_request_enqueue() assumes this function will always
- * succeed
- */
-static int nrs_fifo_req_add(struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq)
-{
- struct nrs_fifo_head *head;
-
- head = container_of(nrs_request_resource(nrq), struct nrs_fifo_head,
- fh_res);
- /**
- * Only used for debugging
- */
- nrq->nr_u.fifo.fr_sequence = head->fh_sequence++;
- list_add_tail(&nrq->nr_u.fifo.fr_list, &head->fh_list);
-
- return 0;
-}
-
-/**
- * Removes request \a nrq from \a policy's list of queued requests.
- *
- * \param[in] policy The policy
- * \param[in] nrq The request to remove
- */
-static void nrs_fifo_req_del(struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq)
-{
- LASSERT(!list_empty(&nrq->nr_u.fifo.fr_list));
- list_del_init(&nrq->nr_u.fifo.fr_list);
-}
-
-/**
- * Prints a debug statement right before the request \a nrq stops being
- * handled.
- *
- * \param[in] policy The policy handling the request
- * \param[in] nrq The request being handled
- *
- * \see ptlrpc_server_finish_request()
- * \see ptlrpc_nrs_req_stop_nolock()
- */
-static void nrs_fifo_req_stop(struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq)
-{
- struct ptlrpc_request *req = container_of(nrq, struct ptlrpc_request,
- rq_nrq);
-
- CDEBUG(D_RPCTRACE, "NRS stop %s request from %s, seq: %llu\n",
- policy->pol_desc->pd_name, libcfs_id2str(req->rq_peer),
- nrq->nr_u.fifo.fr_sequence);
-}
-
-/**
- * FIFO policy operations
- */
-static const struct ptlrpc_nrs_pol_ops nrs_fifo_ops = {
- .op_policy_start = nrs_fifo_start,
- .op_policy_stop = nrs_fifo_stop,
- .op_res_get = nrs_fifo_res_get,
- .op_req_get = nrs_fifo_req_get,
- .op_req_enqueue = nrs_fifo_req_add,
- .op_req_dequeue = nrs_fifo_req_del,
- .op_req_stop = nrs_fifo_req_stop,
-};
-
-/**
- * FIFO policy configuration
- */
-struct ptlrpc_nrs_pol_conf nrs_conf_fifo = {
- .nc_name = NRS_POL_NAME_FIFO,
- .nc_ops = &nrs_fifo_ops,
- .nc_compat = nrs_policy_compat_all,
- .nc_flags = PTLRPC_NRS_FL_FALLBACK |
- PTLRPC_NRS_FL_REG_START
-};
-
-/** @} fifo */
-
-/** @} nrs */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
deleted file mode 100644
index 6ac9bb570663..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
+++ /dev/null
@@ -1,2311 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/ptlrpc/pack_generic.c
- *
- * (Un)packing of OST requests
- *
- * Author: Peter J. Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Eric Barton <eeb@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_RPC
-
-#include <uapi/linux/lustre/lustre_fiemap.h>
-
-#include <llog_swab.h>
-#include <lustre_net.h>
-#include <lustre_swab.h>
-#include <obd_cksum.h>
-#include <obd_support.h>
-#include <obd_class.h>
-
-#include "ptlrpc_internal.h"
-
-static inline u32 lustre_msg_hdr_size_v2(u32 count)
-{
- return cfs_size_round(offsetof(struct lustre_msg_v2,
- lm_buflens[count]));
-}
-
-u32 lustre_msg_hdr_size(__u32 magic, u32 count)
-{
- switch (magic) {
- case LUSTRE_MSG_MAGIC_V2:
- return lustre_msg_hdr_size_v2(count);
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", magic);
- return 0;
- }
-}
-
-void ptlrpc_buf_set_swabbed(struct ptlrpc_request *req, const int inout,
- u32 index)
-{
- if (inout)
- lustre_set_req_swabbed(req, index);
- else
- lustre_set_rep_swabbed(req, index);
-}
-
-int ptlrpc_buf_need_swab(struct ptlrpc_request *req, const int inout,
- u32 index)
-{
- if (inout)
- return (ptlrpc_req_need_swab(req) &&
- !lustre_req_swabbed(req, index));
- else
- return (ptlrpc_rep_need_swab(req) &&
- !lustre_rep_swabbed(req, index));
-}
-
-/* early reply size */
-u32 lustre_msg_early_size(void)
-{
- static u32 size;
-
- if (!size) {
- /* Always reply old ptlrpc_body_v2 to keep interoperability
- * with the old client (< 2.3) which doesn't have pb_jobid
- * in the ptlrpc_body.
- *
- * XXX Remove this whenever we drop interoperability with such
- * client.
- */
- __u32 pblen = sizeof(struct ptlrpc_body_v2);
-
- size = lustre_msg_size(LUSTRE_MSG_MAGIC_V2, 1, &pblen);
- }
- return size;
-}
-EXPORT_SYMBOL(lustre_msg_early_size);
-
-u32 lustre_msg_size_v2(int count, __u32 *lengths)
-{
- u32 size;
- int i;
-
- size = lustre_msg_hdr_size_v2(count);
- for (i = 0; i < count; i++)
- size += cfs_size_round(lengths[i]);
-
- return size;
-}
-EXPORT_SYMBOL(lustre_msg_size_v2);
-
-/* This returns the size of the buffer that is required to hold a lustre_msg
- * with the given sub-buffer lengths.
- * NOTE: this should only be used for NEW requests, and should always be
- * in the form of a v2 request. If this is a connection to a v1
- * target then the first buffer will be stripped because the ptlrpc
- * data is part of the lustre_msg_v1 header. b=14043
- */
-u32 lustre_msg_size(__u32 magic, int count, __u32 *lens)
-{
- __u32 size[] = { sizeof(struct ptlrpc_body) };
-
- if (!lens) {
- LASSERT(count == 1);
- lens = size;
- }
-
- LASSERT(count > 0);
- LASSERT(lens[MSG_PTLRPC_BODY_OFF] >= sizeof(struct ptlrpc_body_v2));
-
- switch (magic) {
- case LUSTRE_MSG_MAGIC_V2:
- return lustre_msg_size_v2(count, lens);
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", magic);
- return 0;
- }
-}
-
-/* This is used to determine the size of a buffer that was already packed
- * and will correctly handle the different message formats.
- */
-u32 lustre_packed_msg_size(struct lustre_msg *msg)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2:
- return lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
- default:
- CERROR("incorrect message magic: %08x\n", msg->lm_magic);
- return 0;
- }
-}
-
-void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, __u32 *lens,
- char **bufs)
-{
- char *ptr;
- int i;
-
- msg->lm_bufcount = count;
- /* XXX: lm_secflvr uninitialized here */
- msg->lm_magic = LUSTRE_MSG_MAGIC_V2;
-
- for (i = 0; i < count; i++)
- msg->lm_buflens[i] = lens[i];
-
- if (!bufs)
- return;
-
- ptr = (char *)msg + lustre_msg_hdr_size_v2(count);
- for (i = 0; i < count; i++) {
- char *tmp = bufs[i];
-
- if (tmp)
- memcpy(ptr, tmp, lens[i]);
- ptr += cfs_size_round(lens[i]);
- }
-}
-EXPORT_SYMBOL(lustre_init_msg_v2);
-
-static int lustre_pack_request_v2(struct ptlrpc_request *req,
- int count, __u32 *lens, char **bufs)
-{
- int reqlen, rc;
-
- reqlen = lustre_msg_size_v2(count, lens);
-
- rc = sptlrpc_cli_alloc_reqbuf(req, reqlen);
- if (rc)
- return rc;
-
- req->rq_reqlen = reqlen;
-
- lustre_init_msg_v2(req->rq_reqmsg, count, lens, bufs);
- lustre_msg_add_version(req->rq_reqmsg, PTLRPC_MSG_VERSION);
- return 0;
-}
-
-int lustre_pack_request(struct ptlrpc_request *req, __u32 magic, int count,
- __u32 *lens, char **bufs)
-{
- __u32 size[] = { sizeof(struct ptlrpc_body) };
-
- if (!lens) {
- LASSERT(count == 1);
- lens = size;
- }
-
- LASSERT(count > 0);
- LASSERT(lens[MSG_PTLRPC_BODY_OFF] == sizeof(struct ptlrpc_body));
-
- /* only use new format, we don't need to be compatible with 1.4 */
- return lustre_pack_request_v2(req, count, lens, bufs);
-}
-
-#if RS_DEBUG
-LIST_HEAD(ptlrpc_rs_debug_lru);
-spinlock_t ptlrpc_rs_debug_lock;
-
-#define PTLRPC_RS_DEBUG_LRU_ADD(rs) \
-do { \
- spin_lock(&ptlrpc_rs_debug_lock); \
- list_add_tail(&(rs)->rs_debug_list, &ptlrpc_rs_debug_lru); \
- spin_unlock(&ptlrpc_rs_debug_lock); \
-} while (0)
-
-#define PTLRPC_RS_DEBUG_LRU_DEL(rs) \
-do { \
- spin_lock(&ptlrpc_rs_debug_lock); \
- list_del(&(rs)->rs_debug_list); \
- spin_unlock(&ptlrpc_rs_debug_lock); \
-} while (0)
-#else
-# define PTLRPC_RS_DEBUG_LRU_ADD(rs) do {} while (0)
-# define PTLRPC_RS_DEBUG_LRU_DEL(rs) do {} while (0)
-#endif
-
-struct ptlrpc_reply_state *
-lustre_get_emerg_rs(struct ptlrpc_service_part *svcpt)
-{
- struct ptlrpc_reply_state *rs = NULL;
-
- spin_lock(&svcpt->scp_rep_lock);
-
- /* See if we have anything in a pool, and wait if nothing */
- while (list_empty(&svcpt->scp_rep_idle)) {
- int rc;
-
- spin_unlock(&svcpt->scp_rep_lock);
- /* If we cannot get anything for some long time, we better
- * bail out instead of waiting infinitely
- */
- rc = wait_event_idle_timeout(svcpt->scp_rep_waitq,
- !list_empty(&svcpt->scp_rep_idle),
- 10 * HZ);
- if (rc == 0)
- goto out;
- spin_lock(&svcpt->scp_rep_lock);
- }
-
- rs = list_entry(svcpt->scp_rep_idle.next,
- struct ptlrpc_reply_state, rs_list);
- list_del(&rs->rs_list);
-
- spin_unlock(&svcpt->scp_rep_lock);
-
- memset(rs, 0, svcpt->scp_service->srv_max_reply_size);
- rs->rs_size = svcpt->scp_service->srv_max_reply_size;
- rs->rs_svcpt = svcpt;
- rs->rs_prealloc = 1;
-out:
- return rs;
-}
-
-void lustre_put_emerg_rs(struct ptlrpc_reply_state *rs)
-{
- struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
-
- spin_lock(&svcpt->scp_rep_lock);
- list_add(&rs->rs_list, &svcpt->scp_rep_idle);
- spin_unlock(&svcpt->scp_rep_lock);
- wake_up(&svcpt->scp_rep_waitq);
-}
-
-int lustre_pack_reply_v2(struct ptlrpc_request *req, int count,
- __u32 *lens, char **bufs, int flags)
-{
- struct ptlrpc_reply_state *rs;
- int msg_len, rc;
-
- LASSERT(!req->rq_reply_state);
-
- if ((flags & LPRFL_EARLY_REPLY) == 0) {
- spin_lock(&req->rq_lock);
- req->rq_packed_final = 1;
- spin_unlock(&req->rq_lock);
- }
-
- msg_len = lustre_msg_size_v2(count, lens);
- rc = sptlrpc_svc_alloc_rs(req, msg_len);
- if (rc)
- return rc;
-
- rs = req->rq_reply_state;
- atomic_set(&rs->rs_refcount, 1); /* 1 ref for rq_reply_state */
- rs->rs_cb_id.cbid_fn = reply_out_callback;
- rs->rs_cb_id.cbid_arg = rs;
- rs->rs_svcpt = req->rq_rqbd->rqbd_svcpt;
- INIT_LIST_HEAD(&rs->rs_exp_list);
- INIT_LIST_HEAD(&rs->rs_obd_list);
- INIT_LIST_HEAD(&rs->rs_list);
- spin_lock_init(&rs->rs_lock);
-
- req->rq_replen = msg_len;
- req->rq_reply_state = rs;
- req->rq_repmsg = rs->rs_msg;
-
- lustre_init_msg_v2(rs->rs_msg, count, lens, bufs);
- lustre_msg_add_version(rs->rs_msg, PTLRPC_MSG_VERSION);
-
- PTLRPC_RS_DEBUG_LRU_ADD(rs);
-
- return 0;
-}
-EXPORT_SYMBOL(lustre_pack_reply_v2);
-
-int lustre_pack_reply_flags(struct ptlrpc_request *req, int count, __u32 *lens,
- char **bufs, int flags)
-{
- int rc = 0;
- __u32 size[] = { sizeof(struct ptlrpc_body) };
-
- if (!lens) {
- LASSERT(count == 1);
- lens = size;
- }
-
- LASSERT(count > 0);
- LASSERT(lens[MSG_PTLRPC_BODY_OFF] == sizeof(struct ptlrpc_body));
-
- switch (req->rq_reqmsg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2:
- rc = lustre_pack_reply_v2(req, count, lens, bufs, flags);
- break;
- default:
- LASSERTF(0, "incorrect message magic: %08x\n",
- req->rq_reqmsg->lm_magic);
- rc = -EINVAL;
- }
- if (rc != 0)
- CERROR("lustre_pack_reply failed: rc=%d size=%d\n", rc,
- lustre_msg_size(req->rq_reqmsg->lm_magic, count, lens));
- return rc;
-}
-
-int lustre_pack_reply(struct ptlrpc_request *req, int count, __u32 *lens,
- char **bufs)
-{
- return lustre_pack_reply_flags(req, count, lens, bufs, 0);
-}
-EXPORT_SYMBOL(lustre_pack_reply);
-
-void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, u32 n, u32 min_size)
-{
- u32 i, offset, buflen, bufcount;
-
- bufcount = m->lm_bufcount;
- if (unlikely(n >= bufcount)) {
- CDEBUG(D_INFO, "msg %p buffer[%d] not present (count %d)\n",
- m, n, bufcount);
- return NULL;
- }
-
- buflen = m->lm_buflens[n];
- if (unlikely(buflen < min_size)) {
- CERROR("msg %p buffer[%d] size %d too small (required %d, opc=%d)\n",
- m, n, buflen, min_size,
- n == MSG_PTLRPC_BODY_OFF ? -1 : lustre_msg_get_opc(m));
- return NULL;
- }
-
- offset = lustre_msg_hdr_size_v2(bufcount);
- for (i = 0; i < n; i++)
- offset += cfs_size_round(m->lm_buflens[i]);
-
- return (char *)m + offset;
-}
-
-void *lustre_msg_buf(struct lustre_msg *m, u32 n, u32 min_size)
-{
- switch (m->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2:
- return lustre_msg_buf_v2(m, n, min_size);
- default:
- LASSERTF(0, "incorrect message magic: %08x (msg:%p)\n",
- m->lm_magic, m);
- return NULL;
- }
-}
-EXPORT_SYMBOL(lustre_msg_buf);
-
-static int lustre_shrink_msg_v2(struct lustre_msg_v2 *msg, u32 segment,
- unsigned int newlen, int move_data)
-{
- char *tail = NULL, *newpos;
- int tail_len = 0, n;
-
- LASSERT(msg);
- LASSERT(msg->lm_bufcount > segment);
- LASSERT(msg->lm_buflens[segment] >= newlen);
-
- if (msg->lm_buflens[segment] == newlen)
- goto out;
-
- if (move_data && msg->lm_bufcount > segment + 1) {
- tail = lustre_msg_buf_v2(msg, segment + 1, 0);
- for (n = segment + 1; n < msg->lm_bufcount; n++)
- tail_len += cfs_size_round(msg->lm_buflens[n]);
- }
-
- msg->lm_buflens[segment] = newlen;
-
- if (tail && tail_len) {
- newpos = lustre_msg_buf_v2(msg, segment + 1, 0);
- LASSERT(newpos <= tail);
- if (newpos != tail)
- memmove(newpos, tail, tail_len);
- }
-out:
- return lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
-}
-
-/*
- * for @msg, shrink @segment to size @newlen. if @move_data is non-zero,
- * we also move data forward from @segment + 1.
- *
- * if @newlen == 0, we remove the segment completely, but we still keep the
- * totally bufcount the same to save possible data moving. this will leave a
- * unused segment with size 0 at the tail, but that's ok.
- *
- * return new msg size after shrinking.
- *
- * CAUTION:
- * + if any buffers higher than @segment has been filled in, must call shrink
- * with non-zero @move_data.
- * + caller should NOT keep pointers to msg buffers which higher than @segment
- * after call shrink.
- */
-int lustre_shrink_msg(struct lustre_msg *msg, int segment,
- unsigned int newlen, int move_data)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2:
- return lustre_shrink_msg_v2(msg, segment, newlen, move_data);
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
- }
- return 0;
-}
-EXPORT_SYMBOL(lustre_shrink_msg);
-
-void lustre_free_reply_state(struct ptlrpc_reply_state *rs)
-{
- PTLRPC_RS_DEBUG_LRU_DEL(rs);
-
- LASSERT(atomic_read(&rs->rs_refcount) == 0);
- LASSERT(!rs->rs_difficult || rs->rs_handled);
- LASSERT(!rs->rs_on_net);
- LASSERT(!rs->rs_scheduled);
- LASSERT(!rs->rs_export);
- LASSERT(rs->rs_nlocks == 0);
- LASSERT(list_empty(&rs->rs_exp_list));
- LASSERT(list_empty(&rs->rs_obd_list));
-
- sptlrpc_svc_free_rs(rs);
-}
-
-static int lustre_unpack_msg_v2(struct lustre_msg_v2 *m, int len)
-{
- int swabbed, required_len, i;
-
- /* Now we know the sender speaks my language. */
- required_len = lustre_msg_hdr_size_v2(0);
- if (len < required_len) {
- /* can't even look inside the message */
- CERROR("message length %d too small for lustre_msg\n", len);
- return -EINVAL;
- }
-
- swabbed = (m->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED);
-
- if (swabbed) {
- __swab32s(&m->lm_magic);
- __swab32s(&m->lm_bufcount);
- __swab32s(&m->lm_secflvr);
- __swab32s(&m->lm_repsize);
- __swab32s(&m->lm_cksum);
- __swab32s(&m->lm_flags);
- BUILD_BUG_ON(offsetof(typeof(*m), lm_padding_2) == 0);
- BUILD_BUG_ON(offsetof(typeof(*m), lm_padding_3) == 0);
- }
-
- required_len = lustre_msg_hdr_size_v2(m->lm_bufcount);
- if (len < required_len) {
- /* didn't receive all the buffer lengths */
- CERROR("message length %d too small for %d buflens\n",
- len, m->lm_bufcount);
- return -EINVAL;
- }
-
- for (i = 0; i < m->lm_bufcount; i++) {
- if (swabbed)
- __swab32s(&m->lm_buflens[i]);
- required_len += cfs_size_round(m->lm_buflens[i]);
- }
-
- if (len < required_len) {
- CERROR("len: %d, required_len %d\n", len, required_len);
- CERROR("bufcount: %d\n", m->lm_bufcount);
- for (i = 0; i < m->lm_bufcount; i++)
- CERROR("buffer %d length %d\n", i, m->lm_buflens[i]);
- return -EINVAL;
- }
-
- return swabbed;
-}
-
-int __lustre_unpack_msg(struct lustre_msg *m, int len)
-{
- int required_len, rc;
-
- /* We can provide a slightly better error log, if we check the
- * message magic and version first. In the future, struct
- * lustre_msg may grow, and we'd like to log a version mismatch,
- * rather than a short message.
- *
- */
- required_len = offsetof(struct lustre_msg, lm_magic) +
- sizeof(m->lm_magic);
- if (len < required_len) {
- /* can't even look inside the message */
- CERROR("message length %d too small for magic/version check\n",
- len);
- return -EINVAL;
- }
-
- rc = lustre_unpack_msg_v2(m, len);
-
- return rc;
-}
-EXPORT_SYMBOL(__lustre_unpack_msg);
-
-int ptlrpc_unpack_req_msg(struct ptlrpc_request *req, int len)
-{
- int rc;
-
- rc = __lustre_unpack_msg(req->rq_reqmsg, len);
- if (rc == 1) {
- lustre_set_req_swabbed(req, MSG_PTLRPC_HEADER_OFF);
- rc = 0;
- }
- return rc;
-}
-
-int ptlrpc_unpack_rep_msg(struct ptlrpc_request *req, int len)
-{
- int rc;
-
- rc = __lustre_unpack_msg(req->rq_repmsg, len);
- if (rc == 1) {
- lustre_set_rep_swabbed(req, MSG_PTLRPC_HEADER_OFF);
- rc = 0;
- }
- return rc;
-}
-
-static inline int lustre_unpack_ptlrpc_body_v2(struct ptlrpc_request *req,
- const int inout, int offset)
-{
- struct ptlrpc_body *pb;
- struct lustre_msg_v2 *m = inout ? req->rq_reqmsg : req->rq_repmsg;
-
- pb = lustre_msg_buf_v2(m, offset, sizeof(struct ptlrpc_body_v2));
- if (!pb) {
- CERROR("error unpacking ptlrpc body\n");
- return -EFAULT;
- }
- if (ptlrpc_buf_need_swab(req, inout, offset)) {
- lustre_swab_ptlrpc_body(pb);
- ptlrpc_buf_set_swabbed(req, inout, offset);
- }
-
- if ((pb->pb_version & ~LUSTRE_VERSION_MASK) != PTLRPC_MSG_VERSION) {
- CERROR("wrong lustre_msg version %08x\n", pb->pb_version);
- return -EINVAL;
- }
-
- if (!inout)
- pb->pb_status = ptlrpc_status_ntoh(pb->pb_status);
-
- return 0;
-}
-
-int lustre_unpack_req_ptlrpc_body(struct ptlrpc_request *req, int offset)
-{
- switch (req->rq_reqmsg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2:
- return lustre_unpack_ptlrpc_body_v2(req, 1, offset);
- default:
- CERROR("bad lustre msg magic: %08x\n",
- req->rq_reqmsg->lm_magic);
- return -EINVAL;
- }
-}
-
-int lustre_unpack_rep_ptlrpc_body(struct ptlrpc_request *req, int offset)
-{
- switch (req->rq_repmsg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2:
- return lustre_unpack_ptlrpc_body_v2(req, 0, offset);
- default:
- CERROR("bad lustre msg magic: %08x\n",
- req->rq_repmsg->lm_magic);
- return -EINVAL;
- }
-}
-
-static inline u32 lustre_msg_buflen_v2(struct lustre_msg_v2 *m, u32 n)
-{
- if (n >= m->lm_bufcount)
- return 0;
-
- return m->lm_buflens[n];
-}
-
-/**
- * lustre_msg_buflen - return the length of buffer \a n in message \a m
- * \param m lustre_msg (request or reply) to look at
- * \param n message index (base 0)
- *
- * returns zero for non-existent message indices
- */
-u32 lustre_msg_buflen(struct lustre_msg *m, u32 n)
-{
- switch (m->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2:
- return lustre_msg_buflen_v2(m, n);
- default:
- CERROR("incorrect message magic: %08x\n", m->lm_magic);
- return 0;
- }
-}
-EXPORT_SYMBOL(lustre_msg_buflen);
-
-/* NB return the bufcount for lustre_msg_v2 format, so if message is packed
- * in V1 format, the result is one bigger. (add struct ptlrpc_body).
- */
-u32 lustre_msg_bufcount(struct lustre_msg *m)
-{
- switch (m->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2:
- return m->lm_bufcount;
- default:
- CERROR("incorrect message magic: %08x\n", m->lm_magic);
- return 0;
- }
-}
-
-char *lustre_msg_string(struct lustre_msg *m, u32 index, u32 max_len)
-{
- /* max_len == 0 means the string should fill the buffer */
- char *str;
- u32 slen, blen;
-
- switch (m->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2:
- str = lustre_msg_buf_v2(m, index, 0);
- blen = lustre_msg_buflen_v2(m, index);
- break;
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", m->lm_magic);
- }
-
- if (!str) {
- CERROR("can't unpack string in msg %p buffer[%d]\n", m, index);
- return NULL;
- }
-
- slen = strnlen(str, blen);
-
- if (slen == blen) { /* not NULL terminated */
- CERROR("can't unpack non-NULL terminated string in msg %p buffer[%d] len %d\n",
- m, index, blen);
- return NULL;
- }
-
- if (max_len == 0) {
- if (slen != blen - 1) {
- CERROR("can't unpack short string in msg %p buffer[%d] len %d: strlen %d\n",
- m, index, blen, slen);
- return NULL;
- }
- } else if (slen > max_len) {
- CERROR("can't unpack oversized string in msg %p buffer[%d] len %d strlen %d: max %d expected\n",
- m, index, blen, slen, max_len);
- return NULL;
- }
-
- return str;
-}
-
-/* Wrap up the normal fixed length cases */
-static inline void *__lustre_swab_buf(struct lustre_msg *msg, u32 index,
- u32 min_size, void *swabber)
-{
- void *ptr = NULL;
-
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2:
- ptr = lustre_msg_buf_v2(msg, index, min_size);
- break;
- default:
- CERROR("incorrect message magic: %08x\n", msg->lm_magic);
- }
-
- if (ptr && swabber)
- ((void (*)(void *))swabber)(ptr);
-
- return ptr;
-}
-
-static inline struct ptlrpc_body *lustre_msg_ptlrpc_body(struct lustre_msg *msg)
-{
- return lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF,
- sizeof(struct ptlrpc_body_v2));
-}
-
-__u32 lustre_msghdr_get_flags(struct lustre_msg *msg)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2:
- /* already in host endian */
- return msg->lm_flags;
- default:
- CERROR("incorrect message magic: %08x\n", msg->lm_magic);
- return 0;
- }
-}
-EXPORT_SYMBOL(lustre_msghdr_get_flags);
-
-void lustre_msghdr_set_flags(struct lustre_msg *msg, __u32 flags)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2:
- msg->lm_flags = flags;
- return;
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
- }
-}
-
-__u32 lustre_msg_get_flags(struct lustre_msg *msg)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- if (pb)
- return pb->pb_flags;
-
- CERROR("invalid msg %p: no ptlrpc body!\n", msg);
- }
- /* fall through */
- default:
- /* flags might be printed in debug code while message
- * uninitialized
- */
- return 0;
- }
-}
-EXPORT_SYMBOL(lustre_msg_get_flags);
-
-void lustre_msg_add_flags(struct lustre_msg *msg, u32 flags)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
- pb->pb_flags |= flags;
- return;
- }
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
- }
-}
-EXPORT_SYMBOL(lustre_msg_add_flags);
-
-void lustre_msg_set_flags(struct lustre_msg *msg, u32 flags)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
- pb->pb_flags = flags;
- return;
- }
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
- }
-}
-
-void lustre_msg_clear_flags(struct lustre_msg *msg, u32 flags)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
- pb->pb_flags &= ~(flags & MSG_GEN_FLAG_MASK);
- return;
- }
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
- }
-}
-EXPORT_SYMBOL(lustre_msg_clear_flags);
-
-__u32 lustre_msg_get_op_flags(struct lustre_msg *msg)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- if (pb)
- return pb->pb_op_flags;
-
- CERROR("invalid msg %p: no ptlrpc body!\n", msg);
- }
- /* fall through */
- default:
- return 0;
- }
-}
-
-void lustre_msg_add_op_flags(struct lustre_msg *msg, u32 flags)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
- pb->pb_op_flags |= flags;
- return;
- }
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
- }
-}
-EXPORT_SYMBOL(lustre_msg_add_op_flags);
-
-struct lustre_handle *lustre_msg_get_handle(struct lustre_msg *msg)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- if (!pb) {
- CERROR("invalid msg %p: no ptlrpc body!\n", msg);
- return NULL;
- }
- return &pb->pb_handle;
- }
- default:
- CERROR("incorrect message magic: %08x\n", msg->lm_magic);
- return NULL;
- }
-}
-
-__u32 lustre_msg_get_type(struct lustre_msg *msg)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- if (!pb) {
- CERROR("invalid msg %p: no ptlrpc body!\n", msg);
- return PTL_RPC_MSG_ERR;
- }
- return pb->pb_type;
- }
- default:
- CERROR("incorrect message magic: %08x\n", msg->lm_magic);
- return PTL_RPC_MSG_ERR;
- }
-}
-EXPORT_SYMBOL(lustre_msg_get_type);
-
-void lustre_msg_add_version(struct lustre_msg *msg, u32 version)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
- pb->pb_version |= version;
- return;
- }
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
- }
-}
-
-__u32 lustre_msg_get_opc(struct lustre_msg *msg)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- if (!pb) {
- CERROR("invalid msg %p: no ptlrpc body!\n", msg);
- return 0;
- }
- return pb->pb_opc;
- }
- default:
- CERROR("incorrect message magic: %08x (msg:%p)\n",
- msg->lm_magic, msg);
- return 0;
- }
-}
-EXPORT_SYMBOL(lustre_msg_get_opc);
-
-__u16 lustre_msg_get_tag(struct lustre_msg *msg)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- if (!pb) {
- CERROR("invalid msg %p: no ptlrpc body!\n", msg);
- return 0;
- }
- return pb->pb_tag;
- }
- default:
- CERROR("incorrect message magic: %08x\n", msg->lm_magic);
- return 0;
- }
-}
-EXPORT_SYMBOL(lustre_msg_get_tag);
-
-__u64 lustre_msg_get_last_committed(struct lustre_msg *msg)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- if (!pb) {
- CERROR("invalid msg %p: no ptlrpc body!\n", msg);
- return 0;
- }
- return pb->pb_last_committed;
- }
- default:
- CERROR("incorrect message magic: %08x\n", msg->lm_magic);
- return 0;
- }
-}
-EXPORT_SYMBOL(lustre_msg_get_last_committed);
-
-__u64 *lustre_msg_get_versions(struct lustre_msg *msg)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- if (!pb) {
- CERROR("invalid msg %p: no ptlrpc body!\n", msg);
- return NULL;
- }
- return pb->pb_pre_versions;
- }
- default:
- CERROR("incorrect message magic: %08x\n", msg->lm_magic);
- return NULL;
- }
-}
-EXPORT_SYMBOL(lustre_msg_get_versions);
-
-__u64 lustre_msg_get_transno(struct lustre_msg *msg)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- if (!pb) {
- CERROR("invalid msg %p: no ptlrpc body!\n", msg);
- return 0;
- }
- return pb->pb_transno;
- }
- default:
- CERROR("incorrect message magic: %08x\n", msg->lm_magic);
- return 0;
- }
-}
-EXPORT_SYMBOL(lustre_msg_get_transno);
-
-int lustre_msg_get_status(struct lustre_msg *msg)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- if (pb)
- return pb->pb_status;
-
- CERROR("invalid msg %p: no ptlrpc body!\n", msg);
- }
- /* fall through */
- default:
- /* status might be printed in debug code while message
- * uninitialized
- */
- return -EINVAL;
- }
-}
-EXPORT_SYMBOL(lustre_msg_get_status);
-
-__u64 lustre_msg_get_slv(struct lustre_msg *msg)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- if (!pb) {
- CERROR("invalid msg %p: no ptlrpc body!\n", msg);
- return -EINVAL;
- }
- return pb->pb_slv;
- }
- default:
- CERROR("invalid msg magic %08x\n", msg->lm_magic);
- return -EINVAL;
- }
-}
-
-void lustre_msg_set_slv(struct lustre_msg *msg, __u64 slv)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- if (!pb) {
- CERROR("invalid msg %p: no ptlrpc body!\n", msg);
- return;
- }
- pb->pb_slv = slv;
- return;
- }
- default:
- CERROR("invalid msg magic %x\n", msg->lm_magic);
- return;
- }
-}
-
-__u32 lustre_msg_get_limit(struct lustre_msg *msg)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- if (!pb) {
- CERROR("invalid msg %p: no ptlrpc body!\n", msg);
- return -EINVAL;
- }
- return pb->pb_limit;
- }
- default:
- CERROR("invalid msg magic %x\n", msg->lm_magic);
- return -EINVAL;
- }
-}
-
-void lustre_msg_set_limit(struct lustre_msg *msg, __u64 limit)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- if (!pb) {
- CERROR("invalid msg %p: no ptlrpc body!\n", msg);
- return;
- }
- pb->pb_limit = limit;
- return;
- }
- default:
- CERROR("invalid msg magic %08x\n", msg->lm_magic);
- return;
- }
-}
-
-__u32 lustre_msg_get_conn_cnt(struct lustre_msg *msg)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- if (!pb) {
- CERROR("invalid msg %p: no ptlrpc body!\n", msg);
- return 0;
- }
- return pb->pb_conn_cnt;
- }
- default:
- CERROR("incorrect message magic: %08x\n", msg->lm_magic);
- return 0;
- }
-}
-EXPORT_SYMBOL(lustre_msg_get_conn_cnt);
-
-__u32 lustre_msg_get_magic(struct lustre_msg *msg)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2:
- return msg->lm_magic;
- default:
- CERROR("incorrect message magic: %08x\n", msg->lm_magic);
- return 0;
- }
-}
-
-__u32 lustre_msg_get_timeout(struct lustre_msg *msg)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- if (!pb) {
- CERROR("invalid msg %p: no ptlrpc body!\n", msg);
- return 0;
- }
- return pb->pb_timeout;
- }
- default:
- CERROR("incorrect message magic: %08x\n", msg->lm_magic);
- return -EPROTO;
- }
-}
-
-__u32 lustre_msg_get_service_time(struct lustre_msg *msg)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- if (!pb) {
- CERROR("invalid msg %p: no ptlrpc body!\n", msg);
- return 0;
- }
- return pb->pb_service_time;
- }
- default:
- CERROR("incorrect message magic: %08x\n", msg->lm_magic);
- return 0;
- }
-}
-
-__u32 lustre_msg_get_cksum(struct lustre_msg *msg)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2:
- return msg->lm_cksum;
- default:
- CERROR("incorrect message magic: %08x\n", msg->lm_magic);
- return 0;
- }
-}
-
-__u32 lustre_msg_calc_cksum(struct lustre_msg *msg)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
- __u32 crc;
- unsigned int hsize = 4;
-
- cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32, (unsigned char *)pb,
- lustre_msg_buflen(msg,
- MSG_PTLRPC_BODY_OFF),
- NULL, 0, (unsigned char *)&crc, &hsize);
- return crc;
- }
- default:
- CERROR("incorrect message magic: %08x\n", msg->lm_magic);
- return 0;
- }
-}
-
-void lustre_msg_set_handle(struct lustre_msg *msg, struct lustre_handle *handle)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
- pb->pb_handle = *handle;
- return;
- }
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
- }
-}
-
-void lustre_msg_set_type(struct lustre_msg *msg, __u32 type)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
- pb->pb_type = type;
- return;
- }
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
- }
-}
-
-void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
- pb->pb_opc = opc;
- return;
- }
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
- }
-}
-
-void lustre_msg_set_last_xid(struct lustre_msg *msg, u64 last_xid)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
- pb->pb_last_xid = last_xid;
- return;
- }
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
- }
-}
-
-void lustre_msg_set_tag(struct lustre_msg *msg, __u16 tag)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
- pb->pb_tag = tag;
- return;
- }
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
- }
-}
-EXPORT_SYMBOL(lustre_msg_set_tag);
-
-void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
- pb->pb_pre_versions[0] = versions[0];
- pb->pb_pre_versions[1] = versions[1];
- pb->pb_pre_versions[2] = versions[2];
- pb->pb_pre_versions[3] = versions[3];
- return;
- }
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
- }
-}
-EXPORT_SYMBOL(lustre_msg_set_versions);
-
-void lustre_msg_set_transno(struct lustre_msg *msg, __u64 transno)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
- pb->pb_transno = transno;
- return;
- }
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
- }
-}
-EXPORT_SYMBOL(lustre_msg_set_transno);
-
-void lustre_msg_set_status(struct lustre_msg *msg, __u32 status)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
- pb->pb_status = status;
- return;
- }
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
- }
-}
-EXPORT_SYMBOL(lustre_msg_set_status);
-
-void lustre_msg_set_conn_cnt(struct lustre_msg *msg, __u32 conn_cnt)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
- pb->pb_conn_cnt = conn_cnt;
- return;
- }
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
- }
-}
-
-void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
- pb->pb_timeout = timeout;
- return;
- }
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
- }
-}
-
-void lustre_msg_set_service_time(struct lustre_msg *msg, __u32 service_time)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
- pb->pb_service_time = service_time;
- return;
- }
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
- }
-}
-
-void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- __u32 opc = lustre_msg_get_opc(msg);
- struct ptlrpc_body *pb;
-
- /* Don't set jobid for ldlm ast RPCs, they've been shrunk.
- * See the comment in ptlrpc_request_pack().
- */
- if (!opc || opc == LDLM_BL_CALLBACK ||
- opc == LDLM_CP_CALLBACK || opc == LDLM_GL_CALLBACK)
- return;
-
- pb = lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF,
- sizeof(struct ptlrpc_body));
- LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
-
- if (jobid)
- memcpy(pb->pb_jobid, jobid, LUSTRE_JOBID_SIZE);
- else if (pb->pb_jobid[0] == '\0')
- lustre_get_jobid(pb->pb_jobid);
- return;
- }
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
- }
-}
-EXPORT_SYMBOL(lustre_msg_set_jobid);
-
-void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2:
- msg->lm_cksum = cksum;
- return;
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
- }
-}
-
-void lustre_msg_set_mbits(struct lustre_msg *msg, __u64 mbits)
-{
- switch (msg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2: {
- struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
-
- LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
- pb->pb_mbits = mbits;
- return;
- }
- default:
- LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
- }
-}
-
-void ptlrpc_request_set_replen(struct ptlrpc_request *req)
-{
- int count = req_capsule_filled_sizes(&req->rq_pill, RCL_SERVER);
-
- req->rq_replen = lustre_msg_size(req->rq_reqmsg->lm_magic, count,
- req->rq_pill.rc_area[RCL_SERVER]);
- if (req->rq_reqmsg->lm_magic == LUSTRE_MSG_MAGIC_V2)
- req->rq_reqmsg->lm_repsize = req->rq_replen;
-}
-EXPORT_SYMBOL(ptlrpc_request_set_replen);
-
-/**
- * Send a remote set_info_async.
- *
- * This may go from client to server or server to client.
- */
-int do_set_info_async(struct obd_import *imp,
- int opcode, int version,
- u32 keylen, void *key,
- u32 vallen, void *val,
- struct ptlrpc_request_set *set)
-{
- struct ptlrpc_request *req;
- char *tmp;
- int rc;
-
- req = ptlrpc_request_alloc(imp, &RQF_OBD_SET_INFO);
- if (!req)
- return -ENOMEM;
-
- req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_KEY,
- RCL_CLIENT, keylen);
- req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_VAL,
- RCL_CLIENT, vallen);
- rc = ptlrpc_request_pack(req, version, opcode);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY);
- memcpy(tmp, key, keylen);
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_VAL);
- memcpy(tmp, val, vallen);
-
- ptlrpc_request_set_replen(req);
-
- if (set) {
- ptlrpc_set_add_req(set, req);
- ptlrpc_check_set(NULL, set);
- } else {
- rc = ptlrpc_queue_wait(req);
- ptlrpc_req_finished(req);
- }
-
- return rc;
-}
-EXPORT_SYMBOL(do_set_info_async);
-
-/* byte flipping routines for all wire types declared in
- * lustre_idl.h implemented here.
- */
-void lustre_swab_ptlrpc_body(struct ptlrpc_body *b)
-{
- __swab32s(&b->pb_type);
- __swab32s(&b->pb_version);
- __swab32s(&b->pb_opc);
- __swab32s(&b->pb_status);
- __swab64s(&b->pb_last_xid);
- __swab16s(&b->pb_tag);
- __swab64s(&b->pb_last_committed);
- __swab64s(&b->pb_transno);
- __swab32s(&b->pb_flags);
- __swab32s(&b->pb_op_flags);
- __swab32s(&b->pb_conn_cnt);
- __swab32s(&b->pb_timeout);
- __swab32s(&b->pb_service_time);
- __swab32s(&b->pb_limit);
- __swab64s(&b->pb_slv);
- __swab64s(&b->pb_pre_versions[0]);
- __swab64s(&b->pb_pre_versions[1]);
- __swab64s(&b->pb_pre_versions[2]);
- __swab64s(&b->pb_pre_versions[3]);
- __swab64s(&b->pb_mbits);
- BUILD_BUG_ON(offsetof(typeof(*b), pb_padding0) == 0);
- BUILD_BUG_ON(offsetof(typeof(*b), pb_padding1) == 0);
- BUILD_BUG_ON(offsetof(typeof(*b), pb_padding64_0) == 0);
- BUILD_BUG_ON(offsetof(typeof(*b), pb_padding64_1) == 0);
- BUILD_BUG_ON(offsetof(typeof(*b), pb_padding64_2) == 0);
- /* While we need to maintain compatibility between
- * clients and servers without ptlrpc_body_v2 (< 2.3)
- * do not swab any fields beyond pb_jobid, as we are
- * using this swab function for both ptlrpc_body
- * and ptlrpc_body_v2.
- */
- BUILD_BUG_ON(offsetof(typeof(*b), pb_jobid) == 0);
-}
-
-void lustre_swab_connect(struct obd_connect_data *ocd)
-{
- __swab64s(&ocd->ocd_connect_flags);
- __swab32s(&ocd->ocd_version);
- __swab32s(&ocd->ocd_grant);
- __swab64s(&ocd->ocd_ibits_known);
- __swab32s(&ocd->ocd_index);
- __swab32s(&ocd->ocd_brw_size);
- /* ocd_blocksize and ocd_inodespace don't need to be swabbed because
- * they are 8-byte values
- */
- __swab16s(&ocd->ocd_grant_extent);
- __swab32s(&ocd->ocd_unused);
- __swab64s(&ocd->ocd_transno);
- __swab32s(&ocd->ocd_group);
- __swab32s(&ocd->ocd_cksum_types);
- __swab32s(&ocd->ocd_instance);
- /* Fields after ocd_cksum_types are only accessible by the receiver
- * if the corresponding flag in ocd_connect_flags is set. Accessing
- * any field after ocd_maxbytes on the receiver without a valid flag
- * may result in out-of-bound memory access and kernel oops.
- */
- if (ocd->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)
- __swab32s(&ocd->ocd_max_easize);
- if (ocd->ocd_connect_flags & OBD_CONNECT_MAXBYTES)
- __swab64s(&ocd->ocd_maxbytes);
- if (ocd->ocd_connect_flags & OBD_CONNECT_MULTIMODRPCS)
- __swab16s(&ocd->ocd_maxmodrpcs);
- BUILD_BUG_ON(!offsetof(typeof(*ocd), padding0));
- BUILD_BUG_ON(offsetof(typeof(*ocd), padding1) == 0);
- if (ocd->ocd_connect_flags & OBD_CONNECT_FLAGS2)
- __swab64s(&ocd->ocd_connect_flags2);
- BUILD_BUG_ON(offsetof(typeof(*ocd), padding3) == 0);
- BUILD_BUG_ON(offsetof(typeof(*ocd), padding4) == 0);
- BUILD_BUG_ON(offsetof(typeof(*ocd), padding5) == 0);
- BUILD_BUG_ON(offsetof(typeof(*ocd), padding6) == 0);
- BUILD_BUG_ON(offsetof(typeof(*ocd), padding7) == 0);
- BUILD_BUG_ON(offsetof(typeof(*ocd), padding8) == 0);
- BUILD_BUG_ON(offsetof(typeof(*ocd), padding9) == 0);
- BUILD_BUG_ON(offsetof(typeof(*ocd), paddingA) == 0);
- BUILD_BUG_ON(offsetof(typeof(*ocd), paddingB) == 0);
- BUILD_BUG_ON(offsetof(typeof(*ocd), paddingC) == 0);
- BUILD_BUG_ON(offsetof(typeof(*ocd), paddingD) == 0);
- BUILD_BUG_ON(offsetof(typeof(*ocd), paddingE) == 0);
- BUILD_BUG_ON(offsetof(typeof(*ocd), paddingF) == 0);
-}
-
-static void lustre_swab_obdo(struct obdo *o)
-{
- __swab64s(&o->o_valid);
- lustre_swab_ost_id(&o->o_oi);
- __swab64s(&o->o_parent_seq);
- __swab64s(&o->o_size);
- __swab64s(&o->o_mtime);
- __swab64s(&o->o_atime);
- __swab64s(&o->o_ctime);
- __swab64s(&o->o_blocks);
- __swab64s(&o->o_grant);
- __swab32s(&o->o_blksize);
- __swab32s(&o->o_mode);
- __swab32s(&o->o_uid);
- __swab32s(&o->o_gid);
- __swab32s(&o->o_flags);
- __swab32s(&o->o_nlink);
- __swab32s(&o->o_parent_oid);
- __swab32s(&o->o_misc);
- __swab64s(&o->o_ioepoch);
- __swab32s(&o->o_stripe_idx);
- __swab32s(&o->o_parent_ver);
- /* o_handle is opaque */
- /* o_lcookie is swabbed elsewhere */
- __swab32s(&o->o_uid_h);
- __swab32s(&o->o_gid_h);
- __swab64s(&o->o_data_version);
- BUILD_BUG_ON(offsetof(typeof(*o), o_padding_4) == 0);
- BUILD_BUG_ON(offsetof(typeof(*o), o_padding_5) == 0);
- BUILD_BUG_ON(offsetof(typeof(*o), o_padding_6) == 0);
-}
-
-void lustre_swab_obd_statfs(struct obd_statfs *os)
-{
- __swab64s(&os->os_type);
- __swab64s(&os->os_blocks);
- __swab64s(&os->os_bfree);
- __swab64s(&os->os_bavail);
- __swab64s(&os->os_files);
- __swab64s(&os->os_ffree);
- /* no need to swab os_fsid */
- __swab32s(&os->os_bsize);
- __swab32s(&os->os_namelen);
- __swab64s(&os->os_maxbytes);
- __swab32s(&os->os_state);
- BUILD_BUG_ON(offsetof(typeof(*os), os_fprecreated) == 0);
- BUILD_BUG_ON(offsetof(typeof(*os), os_spare2) == 0);
- BUILD_BUG_ON(offsetof(typeof(*os), os_spare3) == 0);
- BUILD_BUG_ON(offsetof(typeof(*os), os_spare4) == 0);
- BUILD_BUG_ON(offsetof(typeof(*os), os_spare5) == 0);
- BUILD_BUG_ON(offsetof(typeof(*os), os_spare6) == 0);
- BUILD_BUG_ON(offsetof(typeof(*os), os_spare7) == 0);
- BUILD_BUG_ON(offsetof(typeof(*os), os_spare8) == 0);
- BUILD_BUG_ON(offsetof(typeof(*os), os_spare9) == 0);
-}
-
-void lustre_swab_obd_ioobj(struct obd_ioobj *ioo)
-{
- lustre_swab_ost_id(&ioo->ioo_oid);
- __swab32s(&ioo->ioo_max_brw);
- __swab32s(&ioo->ioo_bufcnt);
-}
-
-void lustre_swab_niobuf_remote(struct niobuf_remote *nbr)
-{
- __swab64s(&nbr->rnb_offset);
- __swab32s(&nbr->rnb_len);
- __swab32s(&nbr->rnb_flags);
-}
-
-void lustre_swab_ost_body(struct ost_body *b)
-{
- lustre_swab_obdo(&b->oa);
-}
-
-void lustre_swab_ost_last_id(u64 *id)
-{
- __swab64s(id);
-}
-
-void lustre_swab_generic_32s(__u32 *val)
-{
- __swab32s(val);
-}
-
-void lustre_swab_gl_desc(union ldlm_gl_desc *desc)
-{
- lustre_swab_lu_fid(&desc->lquota_desc.gl_id.qid_fid);
- __swab64s(&desc->lquota_desc.gl_flags);
- __swab64s(&desc->lquota_desc.gl_ver);
- __swab64s(&desc->lquota_desc.gl_hardlimit);
- __swab64s(&desc->lquota_desc.gl_softlimit);
- __swab64s(&desc->lquota_desc.gl_time);
- BUILD_BUG_ON(offsetof(typeof(desc->lquota_desc), gl_pad2) == 0);
-}
-
-void lustre_swab_ost_lvb_v1(struct ost_lvb_v1 *lvb)
-{
- __swab64s(&lvb->lvb_size);
- __swab64s(&lvb->lvb_mtime);
- __swab64s(&lvb->lvb_atime);
- __swab64s(&lvb->lvb_ctime);
- __swab64s(&lvb->lvb_blocks);
-}
-EXPORT_SYMBOL(lustre_swab_ost_lvb_v1);
-
-void lustre_swab_ost_lvb(struct ost_lvb *lvb)
-{
- __swab64s(&lvb->lvb_size);
- __swab64s(&lvb->lvb_mtime);
- __swab64s(&lvb->lvb_atime);
- __swab64s(&lvb->lvb_ctime);
- __swab64s(&lvb->lvb_blocks);
- __swab32s(&lvb->lvb_mtime_ns);
- __swab32s(&lvb->lvb_atime_ns);
- __swab32s(&lvb->lvb_ctime_ns);
- __swab32s(&lvb->lvb_padding);
-}
-EXPORT_SYMBOL(lustre_swab_ost_lvb);
-
-void lustre_swab_lquota_lvb(struct lquota_lvb *lvb)
-{
- __swab64s(&lvb->lvb_flags);
- __swab64s(&lvb->lvb_id_may_rel);
- __swab64s(&lvb->lvb_id_rel);
- __swab64s(&lvb->lvb_id_qunit);
- __swab64s(&lvb->lvb_pad1);
-}
-EXPORT_SYMBOL(lustre_swab_lquota_lvb);
-
-void lustre_swab_mdt_body(struct mdt_body *b)
-{
- lustre_swab_lu_fid(&b->mbo_fid1);
- lustre_swab_lu_fid(&b->mbo_fid2);
- /* handle is opaque */
- __swab64s(&b->mbo_valid);
- __swab64s(&b->mbo_size);
- __swab64s(&b->mbo_mtime);
- __swab64s(&b->mbo_atime);
- __swab64s(&b->mbo_ctime);
- __swab64s(&b->mbo_blocks);
- __swab64s(&b->mbo_ioepoch);
- __swab64s(&b->mbo_t_state);
- __swab32s(&b->mbo_fsuid);
- __swab32s(&b->mbo_fsgid);
- __swab32s(&b->mbo_capability);
- __swab32s(&b->mbo_mode);
- __swab32s(&b->mbo_uid);
- __swab32s(&b->mbo_gid);
- __swab32s(&b->mbo_flags);
- __swab32s(&b->mbo_rdev);
- __swab32s(&b->mbo_nlink);
- BUILD_BUG_ON(offsetof(typeof(*b), mbo_unused2) == 0);
- __swab32s(&b->mbo_suppgid);
- __swab32s(&b->mbo_eadatasize);
- __swab32s(&b->mbo_aclsize);
- __swab32s(&b->mbo_max_mdsize);
- BUILD_BUG_ON(!offsetof(typeof(*b), mbo_unused3));
- __swab32s(&b->mbo_uid_h);
- __swab32s(&b->mbo_gid_h);
- BUILD_BUG_ON(offsetof(typeof(*b), mbo_padding_5) == 0);
-}
-
-void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b)
-{
- /* handle is opaque */
- /* mio_handle is opaque */
- BUILD_BUG_ON(!offsetof(typeof(*b), mio_unused1));
- BUILD_BUG_ON(!offsetof(typeof(*b), mio_unused2));
- BUILD_BUG_ON(!offsetof(typeof(*b), mio_padding));
-}
-
-void lustre_swab_mgs_target_info(struct mgs_target_info *mti)
-{
- int i;
-
- __swab32s(&mti->mti_lustre_ver);
- __swab32s(&mti->mti_stripe_index);
- __swab32s(&mti->mti_config_ver);
- __swab32s(&mti->mti_flags);
- __swab32s(&mti->mti_instance);
- __swab32s(&mti->mti_nid_count);
- BUILD_BUG_ON(sizeof(lnet_nid_t) != sizeof(__u64));
- for (i = 0; i < MTI_NIDS_MAX; i++)
- __swab64s(&mti->mti_nids[i]);
-}
-
-void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *entry)
-{
- __u8 i;
-
- __swab64s(&entry->mne_version);
- __swab32s(&entry->mne_instance);
- __swab32s(&entry->mne_index);
- __swab32s(&entry->mne_length);
-
- /* mne_nid_(count|type) must be one byte size because we're gonna
- * access it w/o swapping. */
- BUILD_BUG_ON(sizeof(entry->mne_nid_count) != sizeof(__u8));
- BUILD_BUG_ON(sizeof(entry->mne_nid_type) != sizeof(__u8));
-
- /* remove this assertion if ipv6 is supported. */
- LASSERT(entry->mne_nid_type == 0);
- for (i = 0; i < entry->mne_nid_count; i++) {
- BUILD_BUG_ON(sizeof(lnet_nid_t) != sizeof(__u64));
- __swab64s(&entry->u.nids[i]);
- }
-}
-EXPORT_SYMBOL(lustre_swab_mgs_nidtbl_entry);
-
-void lustre_swab_mgs_config_body(struct mgs_config_body *body)
-{
- __swab64s(&body->mcb_offset);
- __swab32s(&body->mcb_units);
- __swab16s(&body->mcb_type);
-}
-
-void lustre_swab_mgs_config_res(struct mgs_config_res *body)
-{
- __swab64s(&body->mcr_offset);
- __swab64s(&body->mcr_size);
-}
-
-static void lustre_swab_obd_dqinfo(struct obd_dqinfo *i)
-{
- __swab64s(&i->dqi_bgrace);
- __swab64s(&i->dqi_igrace);
- __swab32s(&i->dqi_flags);
- __swab32s(&i->dqi_valid);
-}
-
-static void lustre_swab_obd_dqblk(struct obd_dqblk *b)
-{
- __swab64s(&b->dqb_ihardlimit);
- __swab64s(&b->dqb_isoftlimit);
- __swab64s(&b->dqb_curinodes);
- __swab64s(&b->dqb_bhardlimit);
- __swab64s(&b->dqb_bsoftlimit);
- __swab64s(&b->dqb_curspace);
- __swab64s(&b->dqb_btime);
- __swab64s(&b->dqb_itime);
- __swab32s(&b->dqb_valid);
- BUILD_BUG_ON(offsetof(typeof(*b), dqb_padding) == 0);
-}
-
-void lustre_swab_obd_quotactl(struct obd_quotactl *q)
-{
- __swab32s(&q->qc_cmd);
- __swab32s(&q->qc_type);
- __swab32s(&q->qc_id);
- __swab32s(&q->qc_stat);
- lustre_swab_obd_dqinfo(&q->qc_dqinfo);
- lustre_swab_obd_dqblk(&q->qc_dqblk);
-}
-
-void lustre_swab_fid2path(struct getinfo_fid2path *gf)
-{
- lustre_swab_lu_fid(&gf->gf_fid);
- __swab64s(&gf->gf_recno);
- __swab32s(&gf->gf_linkno);
- __swab32s(&gf->gf_pathlen);
-}
-EXPORT_SYMBOL(lustre_swab_fid2path);
-
-static void lustre_swab_fiemap_extent(struct fiemap_extent *fm_extent)
-{
- __swab64s(&fm_extent->fe_logical);
- __swab64s(&fm_extent->fe_physical);
- __swab64s(&fm_extent->fe_length);
- __swab32s(&fm_extent->fe_flags);
- __swab32s(&fm_extent->fe_device);
-}
-
-void lustre_swab_fiemap(struct fiemap *fiemap)
-{
- __u32 i;
-
- __swab64s(&fiemap->fm_start);
- __swab64s(&fiemap->fm_length);
- __swab32s(&fiemap->fm_flags);
- __swab32s(&fiemap->fm_mapped_extents);
- __swab32s(&fiemap->fm_extent_count);
- __swab32s(&fiemap->fm_reserved);
-
- for (i = 0; i < fiemap->fm_mapped_extents; i++)
- lustre_swab_fiemap_extent(&fiemap->fm_extents[i]);
-}
-
-void lustre_swab_mdt_rec_reint (struct mdt_rec_reint *rr)
-{
- __swab32s(&rr->rr_opcode);
- __swab32s(&rr->rr_cap);
- __swab32s(&rr->rr_fsuid);
- /* rr_fsuid_h is unused */
- __swab32s(&rr->rr_fsgid);
- /* rr_fsgid_h is unused */
- __swab32s(&rr->rr_suppgid1);
- /* rr_suppgid1_h is unused */
- __swab32s(&rr->rr_suppgid2);
- /* rr_suppgid2_h is unused */
- lustre_swab_lu_fid(&rr->rr_fid1);
- lustre_swab_lu_fid(&rr->rr_fid2);
- __swab64s(&rr->rr_mtime);
- __swab64s(&rr->rr_atime);
- __swab64s(&rr->rr_ctime);
- __swab64s(&rr->rr_size);
- __swab64s(&rr->rr_blocks);
- __swab32s(&rr->rr_bias);
- __swab32s(&rr->rr_mode);
- __swab32s(&rr->rr_flags);
- __swab32s(&rr->rr_flags_h);
- __swab32s(&rr->rr_umask);
-
- BUILD_BUG_ON(offsetof(typeof(*rr), rr_padding_4) == 0);
-};
-
-void lustre_swab_lov_desc(struct lov_desc *ld)
-{
- __swab32s(&ld->ld_tgt_count);
- __swab32s(&ld->ld_active_tgt_count);
- __swab32s(&ld->ld_default_stripe_count);
- __swab32s(&ld->ld_pattern);
- __swab64s(&ld->ld_default_stripe_size);
- __swab64s(&ld->ld_default_stripe_offset);
- __swab32s(&ld->ld_qos_maxage);
- /* uuid endian insensitive */
-}
-EXPORT_SYMBOL(lustre_swab_lov_desc);
-
-/* This structure is always in little-endian */
-static void lustre_swab_lmv_mds_md_v1(struct lmv_mds_md_v1 *lmm1)
-{
- int i;
-
- __swab32s(&lmm1->lmv_magic);
- __swab32s(&lmm1->lmv_stripe_count);
- __swab32s(&lmm1->lmv_master_mdt_index);
- __swab32s(&lmm1->lmv_hash_type);
- __swab32s(&lmm1->lmv_layout_version);
- for (i = 0; i < lmm1->lmv_stripe_count; i++)
- lustre_swab_lu_fid(&lmm1->lmv_stripe_fids[i]);
-}
-
-void lustre_swab_lmv_mds_md(union lmv_mds_md *lmm)
-{
- switch (lmm->lmv_magic) {
- case LMV_MAGIC_V1:
- lustre_swab_lmv_mds_md_v1(&lmm->lmv_md_v1);
- break;
- default:
- break;
- }
-}
-EXPORT_SYMBOL(lustre_swab_lmv_mds_md);
-
-void lustre_swab_lmv_user_md(struct lmv_user_md *lum)
-{
- __swab32s(&lum->lum_magic);
- __swab32s(&lum->lum_stripe_count);
- __swab32s(&lum->lum_stripe_offset);
- __swab32s(&lum->lum_hash_type);
- __swab32s(&lum->lum_type);
- BUILD_BUG_ON(!offsetof(typeof(*lum), lum_padding1));
-}
-EXPORT_SYMBOL(lustre_swab_lmv_user_md);
-
-static void lustre_swab_lmm_oi(struct ost_id *oi)
-{
- __swab64s(&oi->oi.oi_id);
- __swab64s(&oi->oi.oi_seq);
-}
-
-static void lustre_swab_lov_user_md_common(struct lov_user_md_v1 *lum)
-{
- __swab32s(&lum->lmm_magic);
- __swab32s(&lum->lmm_pattern);
- lustre_swab_lmm_oi(&lum->lmm_oi);
- __swab32s(&lum->lmm_stripe_size);
- __swab16s(&lum->lmm_stripe_count);
- __swab16s(&lum->lmm_stripe_offset);
-}
-
-void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum)
-{
- CDEBUG(D_IOCTL, "swabbing lov_user_md v1\n");
- lustre_swab_lov_user_md_common(lum);
-}
-EXPORT_SYMBOL(lustre_swab_lov_user_md_v1);
-
-void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum)
-{
- CDEBUG(D_IOCTL, "swabbing lov_user_md v3\n");
- lustre_swab_lov_user_md_common((struct lov_user_md_v1 *)lum);
- /* lmm_pool_name nothing to do with char */
-}
-EXPORT_SYMBOL(lustre_swab_lov_user_md_v3);
-
-void lustre_swab_lov_mds_md(struct lov_mds_md *lmm)
-{
- CDEBUG(D_IOCTL, "swabbing lov_mds_md\n");
- __swab32s(&lmm->lmm_magic);
- __swab32s(&lmm->lmm_pattern);
- lustre_swab_lmm_oi(&lmm->lmm_oi);
- __swab32s(&lmm->lmm_stripe_size);
- __swab16s(&lmm->lmm_stripe_count);
- __swab16s(&lmm->lmm_layout_gen);
-}
-EXPORT_SYMBOL(lustre_swab_lov_mds_md);
-
-void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
- int stripe_count)
-{
- int i;
-
- for (i = 0; i < stripe_count; i++) {
- lustre_swab_ost_id(&lod[i].l_ost_oi);
- __swab32s(&lod[i].l_ost_gen);
- __swab32s(&lod[i].l_ost_idx);
- }
-}
-EXPORT_SYMBOL(lustre_swab_lov_user_md_objects);
-
-static void lustre_swab_ldlm_res_id(struct ldlm_res_id *id)
-{
- int i;
-
- for (i = 0; i < RES_NAME_SIZE; i++)
- __swab64s(&id->name[i]);
-}
-
-static void lustre_swab_ldlm_policy_data(union ldlm_wire_policy_data *d)
-{
- /* the lock data is a union and the first two fields are always an
- * extent so it's ok to process an LDLM_EXTENT and LDLM_FLOCK lock
- * data the same way.
- */
- __swab64s(&d->l_extent.start);
- __swab64s(&d->l_extent.end);
- __swab64s(&d->l_extent.gid);
- __swab64s(&d->l_flock.lfw_owner);
- __swab32s(&d->l_flock.lfw_pid);
-}
-
-void lustre_swab_ldlm_intent(struct ldlm_intent *i)
-{
- __swab64s(&i->opc);
-}
-
-static void lustre_swab_ldlm_resource_desc(struct ldlm_resource_desc *r)
-{
- __swab32s(&r->lr_type);
- BUILD_BUG_ON(offsetof(typeof(*r), lr_padding) == 0);
- lustre_swab_ldlm_res_id(&r->lr_name);
-}
-
-static void lustre_swab_ldlm_lock_desc(struct ldlm_lock_desc *l)
-{
- lustre_swab_ldlm_resource_desc(&l->l_resource);
- __swab32s(&l->l_req_mode);
- __swab32s(&l->l_granted_mode);
- lustre_swab_ldlm_policy_data(&l->l_policy_data);
-}
-
-void lustre_swab_ldlm_request(struct ldlm_request *rq)
-{
- __swab32s(&rq->lock_flags);
- lustre_swab_ldlm_lock_desc(&rq->lock_desc);
- __swab32s(&rq->lock_count);
- /* lock_handle[] opaque */
-}
-
-void lustre_swab_ldlm_reply(struct ldlm_reply *r)
-{
- __swab32s(&r->lock_flags);
- BUILD_BUG_ON(offsetof(typeof(*r), lock_padding) == 0);
- lustre_swab_ldlm_lock_desc(&r->lock_desc);
- /* lock_handle opaque */
- __swab64s(&r->lock_policy_res1);
- __swab64s(&r->lock_policy_res2);
-}
-
-/* Dump functions */
-void dump_ioo(struct obd_ioobj *ioo)
-{
- CDEBUG(D_RPCTRACE,
- "obd_ioobj: ioo_oid=" DOSTID ", ioo_max_brw=%#x, ioo_bufct=%d\n",
- POSTID(&ioo->ioo_oid), ioo->ioo_max_brw,
- ioo->ioo_bufcnt);
-}
-
-void dump_rniobuf(struct niobuf_remote *nb)
-{
- CDEBUG(D_RPCTRACE, "niobuf_remote: offset=%llu, len=%d, flags=%x\n",
- nb->rnb_offset, nb->rnb_len, nb->rnb_flags);
-}
-
-static void dump_obdo(struct obdo *oa)
-{
- __u32 valid = oa->o_valid;
-
- CDEBUG(D_RPCTRACE, "obdo: o_valid = %08x\n", valid);
- if (valid & OBD_MD_FLID)
- CDEBUG(D_RPCTRACE, "obdo: id = " DOSTID "\n", POSTID(&oa->o_oi));
- if (valid & OBD_MD_FLFID)
- CDEBUG(D_RPCTRACE, "obdo: o_parent_seq = %#llx\n",
- oa->o_parent_seq);
- if (valid & OBD_MD_FLSIZE)
- CDEBUG(D_RPCTRACE, "obdo: o_size = %lld\n", oa->o_size);
- if (valid & OBD_MD_FLMTIME)
- CDEBUG(D_RPCTRACE, "obdo: o_mtime = %lld\n", oa->o_mtime);
- if (valid & OBD_MD_FLATIME)
- CDEBUG(D_RPCTRACE, "obdo: o_atime = %lld\n", oa->o_atime);
- if (valid & OBD_MD_FLCTIME)
- CDEBUG(D_RPCTRACE, "obdo: o_ctime = %lld\n", oa->o_ctime);
- if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
- CDEBUG(D_RPCTRACE, "obdo: o_blocks = %lld\n", oa->o_blocks);
- if (valid & OBD_MD_FLGRANT)
- CDEBUG(D_RPCTRACE, "obdo: o_grant = %lld\n", oa->o_grant);
- if (valid & OBD_MD_FLBLKSZ)
- CDEBUG(D_RPCTRACE, "obdo: o_blksize = %d\n", oa->o_blksize);
- if (valid & (OBD_MD_FLTYPE | OBD_MD_FLMODE))
- CDEBUG(D_RPCTRACE, "obdo: o_mode = %o\n",
- oa->o_mode & ((valid & OBD_MD_FLTYPE ? S_IFMT : 0) |
- (valid & OBD_MD_FLMODE ? ~S_IFMT : 0)));
- if (valid & OBD_MD_FLUID)
- CDEBUG(D_RPCTRACE, "obdo: o_uid = %u\n", oa->o_uid);
- if (valid & OBD_MD_FLUID)
- CDEBUG(D_RPCTRACE, "obdo: o_uid_h = %u\n", oa->o_uid_h);
- if (valid & OBD_MD_FLGID)
- CDEBUG(D_RPCTRACE, "obdo: o_gid = %u\n", oa->o_gid);
- if (valid & OBD_MD_FLGID)
- CDEBUG(D_RPCTRACE, "obdo: o_gid_h = %u\n", oa->o_gid_h);
- if (valid & OBD_MD_FLFLAGS)
- CDEBUG(D_RPCTRACE, "obdo: o_flags = %x\n", oa->o_flags);
- if (valid & OBD_MD_FLNLINK)
- CDEBUG(D_RPCTRACE, "obdo: o_nlink = %u\n", oa->o_nlink);
- else if (valid & OBD_MD_FLCKSUM)
- CDEBUG(D_RPCTRACE, "obdo: o_checksum (o_nlink) = %u\n",
- oa->o_nlink);
- if (valid & OBD_MD_FLGENER)
- CDEBUG(D_RPCTRACE, "obdo: o_parent_oid = %x\n",
- oa->o_parent_oid);
- if (valid & OBD_MD_FLEPOCH)
- CDEBUG(D_RPCTRACE, "obdo: o_ioepoch = %lld\n",
- oa->o_ioepoch);
- if (valid & OBD_MD_FLFID) {
- CDEBUG(D_RPCTRACE, "obdo: o_stripe_idx = %u\n",
- oa->o_stripe_idx);
- CDEBUG(D_RPCTRACE, "obdo: o_parent_ver = %x\n",
- oa->o_parent_ver);
- }
- if (valid & OBD_MD_FLHANDLE)
- CDEBUG(D_RPCTRACE, "obdo: o_handle = %lld\n",
- oa->o_handle.cookie);
-}
-
-void dump_ost_body(struct ost_body *ob)
-{
- dump_obdo(&ob->oa);
-}
-
-void dump_rcs(__u32 *rc)
-{
- CDEBUG(D_RPCTRACE, "rmf_rcs: %d\n", *rc);
-}
-
-static inline int req_ptlrpc_body_swabbed(struct ptlrpc_request *req)
-{
- LASSERT(req->rq_reqmsg);
-
- switch (req->rq_reqmsg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2:
- return lustre_req_swabbed(req, MSG_PTLRPC_BODY_OFF);
- default:
- CERROR("bad lustre msg magic: %#08X\n",
- req->rq_reqmsg->lm_magic);
- }
- return 0;
-}
-
-static inline int rep_ptlrpc_body_swabbed(struct ptlrpc_request *req)
-{
- LASSERT(req->rq_repmsg);
-
- switch (req->rq_repmsg->lm_magic) {
- case LUSTRE_MSG_MAGIC_V2:
- return lustre_rep_swabbed(req, MSG_PTLRPC_BODY_OFF);
- default:
- /* uninitialized yet */
- return 0;
- }
-}
-
-void _debug_req(struct ptlrpc_request *req,
- struct libcfs_debug_msg_data *msgdata,
- const char *fmt, ...)
-{
- int req_ok = req->rq_reqmsg != NULL;
- int rep_ok = req->rq_repmsg != NULL;
- lnet_nid_t nid = LNET_NID_ANY;
- va_list args;
-
- if (ptlrpc_req_need_swab(req)) {
- req_ok = req_ok && req_ptlrpc_body_swabbed(req);
- rep_ok = rep_ok && rep_ptlrpc_body_swabbed(req);
- }
-
- if (req->rq_import && req->rq_import->imp_connection)
- nid = req->rq_import->imp_connection->c_peer.nid;
- else if (req->rq_export && req->rq_export->exp_connection)
- nid = req->rq_export->exp_connection->c_peer.nid;
-
- va_start(args, fmt);
- libcfs_debug_vmsg2(msgdata, fmt, args,
- " req@%p x%llu/t%lld(%lld) o%d->%s@%s:%d/%d lens %d/%d e %d to %lld dl %lld ref %d fl " REQ_FLAGS_FMT "/%x/%x rc %d/%d\n",
- req, req->rq_xid, req->rq_transno,
- req_ok ? lustre_msg_get_transno(req->rq_reqmsg) : 0,
- req_ok ? lustre_msg_get_opc(req->rq_reqmsg) : -1,
- req->rq_import ?
- req->rq_import->imp_obd->obd_name :
- req->rq_export ?
- req->rq_export->exp_client_uuid.uuid :
- "<?>",
- libcfs_nid2str(nid),
- req->rq_request_portal, req->rq_reply_portal,
- req->rq_reqlen, req->rq_replen,
- req->rq_early_count, (s64)req->rq_timedout,
- (s64)req->rq_deadline,
- atomic_read(&req->rq_refcount),
- DEBUG_REQ_FLAGS(req),
- req_ok ? lustre_msg_get_flags(req->rq_reqmsg) : -1,
- rep_ok ? lustre_msg_get_flags(req->rq_repmsg) : -1,
- req->rq_status,
- rep_ok ? lustre_msg_get_status(req->rq_repmsg) : -1);
- va_end(args);
-}
-EXPORT_SYMBOL(_debug_req);
-
-void lustre_swab_lustre_capa(struct lustre_capa *c)
-{
- lustre_swab_lu_fid(&c->lc_fid);
- __swab64s(&c->lc_opc);
- __swab64s(&c->lc_uid);
- __swab64s(&c->lc_gid);
- __swab32s(&c->lc_flags);
- __swab32s(&c->lc_keyid);
- __swab32s(&c->lc_timeout);
- __swab32s(&c->lc_expiry);
-}
-
-void lustre_swab_hsm_user_state(struct hsm_user_state *state)
-{
- __swab32s(&state->hus_states);
- __swab32s(&state->hus_archive_id);
-}
-
-void lustre_swab_hsm_state_set(struct hsm_state_set *hss)
-{
- __swab32s(&hss->hss_valid);
- __swab64s(&hss->hss_setmask);
- __swab64s(&hss->hss_clearmask);
- __swab32s(&hss->hss_archive_id);
-}
-EXPORT_SYMBOL(lustre_swab_hsm_state_set);
-
-static void lustre_swab_hsm_extent(struct hsm_extent *extent)
-{
- __swab64s(&extent->offset);
- __swab64s(&extent->length);
-}
-
-void lustre_swab_hsm_current_action(struct hsm_current_action *action)
-{
- __swab32s(&action->hca_state);
- __swab32s(&action->hca_action);
- lustre_swab_hsm_extent(&action->hca_location);
-}
-
-void lustre_swab_hsm_user_item(struct hsm_user_item *hui)
-{
- lustre_swab_lu_fid(&hui->hui_fid);
- lustre_swab_hsm_extent(&hui->hui_extent);
-}
-
-void lustre_swab_layout_intent(struct layout_intent *li)
-{
- __swab32s(&li->li_opc);
- __swab32s(&li->li_flags);
- __swab64s(&li->li_start);
- __swab64s(&li->li_end);
-}
-
-void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk)
-{
- lustre_swab_lu_fid(&hpk->hpk_fid);
- __swab64s(&hpk->hpk_cookie);
- __swab64s(&hpk->hpk_extent.offset);
- __swab64s(&hpk->hpk_extent.length);
- __swab16s(&hpk->hpk_flags);
- __swab16s(&hpk->hpk_errval);
-}
-
-void lustre_swab_hsm_request(struct hsm_request *hr)
-{
- __swab32s(&hr->hr_action);
- __swab32s(&hr->hr_archive_id);
- __swab64s(&hr->hr_flags);
- __swab32s(&hr->hr_itemcount);
- __swab32s(&hr->hr_data_len);
-}
-
-void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl)
-{
- __swab64s(&msl->msl_flags);
-}
-EXPORT_SYMBOL(lustre_swab_swap_layouts);
-
-void lustre_swab_close_data(struct close_data *cd)
-{
- lustre_swab_lu_fid(&cd->cd_fid);
- __swab64s(&cd->cd_data_version);
-}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pers.c b/drivers/staging/lustre/lustre/ptlrpc/pers.c
deleted file mode 100644
index 2466868afb9c..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/pers.c
+++ /dev/null
@@ -1,72 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2014, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_RPC
-
-#include <obd_support.h>
-#include <obd_class.h>
-#include <lustre_lib.h>
-#include <lustre_ha.h>
-#include <lustre_import.h>
-
-#include "ptlrpc_internal.h"
-
-void ptlrpc_fill_bulk_md(struct lnet_md *md, struct ptlrpc_bulk_desc *desc,
- int mdidx)
-{
- int offset = mdidx * LNET_MAX_IOV;
-
- BUILD_BUG_ON(PTLRPC_MAX_BRW_PAGES >= LI_POISON);
-
- LASSERT(mdidx < desc->bd_md_max_brw);
- LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES);
- LASSERT(!(md->options & (LNET_MD_IOVEC | LNET_MD_KIOV |
- LNET_MD_PHYS)));
-
- md->length = max(0, desc->bd_iov_count - mdidx * LNET_MAX_IOV);
- md->length = min_t(unsigned int, LNET_MAX_IOV, md->length);
-
- if (ptlrpc_is_bulk_desc_kiov(desc->bd_type)) {
- md->options |= LNET_MD_KIOV;
- if (GET_ENC_KIOV(desc))
- md->start = &BD_GET_ENC_KIOV(desc, offset);
- else
- md->start = &BD_GET_KIOV(desc, offset);
- } else {
- md->options |= LNET_MD_IOVEC;
- if (GET_ENC_KVEC(desc))
- md->start = &BD_GET_ENC_KVEC(desc, offset);
- else
- md->start = &BD_GET_KVEC(desc, offset);
- }
-}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pinger.c b/drivers/staging/lustre/lustre/ptlrpc/pinger.c
deleted file mode 100644
index b3297b5ce395..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/pinger.c
+++ /dev/null
@@ -1,474 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/ptlrpc/pinger.c
- *
- * Portal-RPC reconnection and replay operations, for use in recovery.
- */
-
-#define DEBUG_SUBSYSTEM S_RPC
-
-#include <obd_support.h>
-#include <obd_class.h>
-#include "ptlrpc_internal.h"
-
-struct mutex pinger_mutex;
-static LIST_HEAD(pinger_imports);
-static struct list_head timeout_list = LIST_HEAD_INIT(timeout_list);
-
-struct ptlrpc_request *
-ptlrpc_prep_ping(struct obd_import *imp)
-{
- struct ptlrpc_request *req;
-
- req = ptlrpc_request_alloc_pack(imp, &RQF_OBD_PING,
- LUSTRE_OBD_VERSION, OBD_PING);
- if (req) {
- ptlrpc_request_set_replen(req);
- req->rq_no_resend = 1;
- req->rq_no_delay = 1;
- }
- return req;
-}
-
-int ptlrpc_obd_ping(struct obd_device *obd)
-{
- int rc;
- struct ptlrpc_request *req;
-
- req = ptlrpc_prep_ping(obd->u.cli.cl_import);
- if (!req)
- return -ENOMEM;
-
- req->rq_send_state = LUSTRE_IMP_FULL;
-
- rc = ptlrpc_queue_wait(req);
-
- ptlrpc_req_finished(req);
-
- return rc;
-}
-EXPORT_SYMBOL(ptlrpc_obd_ping);
-
-static int ptlrpc_ping(struct obd_import *imp)
-{
- struct ptlrpc_request *req;
-
- req = ptlrpc_prep_ping(imp);
- if (!req) {
- CERROR("OOM trying to ping %s->%s\n",
- imp->imp_obd->obd_uuid.uuid,
- obd2cli_tgt(imp->imp_obd));
- return -ENOMEM;
- }
-
- DEBUG_REQ(D_INFO, req, "pinging %s->%s",
- imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
- ptlrpcd_add_req(req);
-
- return 0;
-}
-
-static void ptlrpc_update_next_ping(struct obd_import *imp, int soon)
-{
- int time = soon ? PING_INTERVAL_SHORT : PING_INTERVAL;
-
- if (imp->imp_state == LUSTRE_IMP_DISCON) {
- int dtime = max_t(int, CONNECTION_SWITCH_MIN,
- AT_OFF ? 0 :
- at_get(&imp->imp_at.iat_net_latency));
- time = min(time, dtime);
- }
- imp->imp_next_ping = jiffies + time * HZ;
-}
-
-static inline int imp_is_deactive(struct obd_import *imp)
-{
- return (imp->imp_deactive ||
- OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_IMP_DEACTIVE));
-}
-
-static inline int ptlrpc_next_reconnect(struct obd_import *imp)
-{
- if (imp->imp_server_timeout)
- return jiffies + obd_timeout / 2 * HZ;
- else
- return jiffies + obd_timeout * HZ;
-}
-
-static long pinger_check_timeout(unsigned long time)
-{
- struct timeout_item *item;
- unsigned long timeout = PING_INTERVAL;
-
- /* The timeout list is a increase order sorted list */
- mutex_lock(&pinger_mutex);
- list_for_each_entry(item, &timeout_list, ti_chain) {
- int ti_timeout = item->ti_timeout;
-
- if (timeout > ti_timeout)
- timeout = ti_timeout;
- break;
- }
- mutex_unlock(&pinger_mutex);
-
- return time + timeout * HZ - jiffies;
-}
-
-static bool ir_up;
-
-void ptlrpc_pinger_ir_up(void)
-{
- CDEBUG(D_HA, "IR up\n");
- ir_up = true;
-}
-EXPORT_SYMBOL(ptlrpc_pinger_ir_up);
-
-void ptlrpc_pinger_ir_down(void)
-{
- CDEBUG(D_HA, "IR down\n");
- ir_up = false;
-}
-EXPORT_SYMBOL(ptlrpc_pinger_ir_down);
-
-static void ptlrpc_pinger_process_import(struct obd_import *imp,
- unsigned long this_ping)
-{
- int level;
- int force;
- int force_next;
- int suppress;
-
- spin_lock(&imp->imp_lock);
-
- level = imp->imp_state;
- force = imp->imp_force_verify;
- force_next = imp->imp_force_next_verify;
- /*
- * This will be used below only if the import is "FULL".
- */
- suppress = ir_up && OCD_HAS_FLAG(&imp->imp_connect_data, PINGLESS);
-
- imp->imp_force_verify = 0;
-
- if (time_after_eq(imp->imp_next_ping - 5, this_ping) &&
- !force) {
- spin_unlock(&imp->imp_lock);
- return;
- }
-
- imp->imp_force_next_verify = 0;
-
- spin_unlock(&imp->imp_lock);
-
- CDEBUG(level == LUSTRE_IMP_FULL ? D_INFO : D_HA, "%s->%s: level %s/%u force %u force_next %u deactive %u pingable %u suppress %u\n",
- imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd),
- ptlrpc_import_state_name(level), level, force, force_next,
- imp->imp_deactive, imp->imp_pingable, suppress);
-
- if (level == LUSTRE_IMP_DISCON && !imp_is_deactive(imp)) {
- /* wait for a while before trying recovery again */
- imp->imp_next_ping = ptlrpc_next_reconnect(imp);
- if (!imp->imp_no_pinger_recover)
- ptlrpc_initiate_recovery(imp);
- } else if (level != LUSTRE_IMP_FULL ||
- imp->imp_obd->obd_no_recov ||
- imp_is_deactive(imp)) {
- CDEBUG(D_HA, "%s->%s: not pinging (in recovery or recovery disabled: %s)\n",
- imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd),
- ptlrpc_import_state_name(level));
- if (force) {
- spin_lock(&imp->imp_lock);
- imp->imp_force_verify = 1;
- spin_unlock(&imp->imp_lock);
- }
- } else if ((imp->imp_pingable && !suppress) || force_next || force) {
- ptlrpc_ping(imp);
- }
-}
-
-static struct workqueue_struct *pinger_wq;
-static void ptlrpc_pinger_main(struct work_struct *ws);
-static DECLARE_DELAYED_WORK(ping_work, ptlrpc_pinger_main);
-
-static void ptlrpc_pinger_main(struct work_struct *ws)
-{
- unsigned long this_ping = jiffies;
- long time_to_next_wake;
- struct timeout_item *item;
- struct obd_import *imp;
-
- do {
- mutex_lock(&pinger_mutex);
- list_for_each_entry(item, &timeout_list, ti_chain) {
- item->ti_cb(item, item->ti_cb_data);
- }
- list_for_each_entry(imp, &pinger_imports, imp_pinger_chain) {
- ptlrpc_pinger_process_import(imp, this_ping);
- /* obd_timeout might have changed */
- if (imp->imp_pingable && imp->imp_next_ping &&
- time_after(imp->imp_next_ping,
- this_ping + PING_INTERVAL * HZ))
- ptlrpc_update_next_ping(imp, 0);
- }
- mutex_unlock(&pinger_mutex);
-
- /* Wait until the next ping time, or until we're stopped. */
- time_to_next_wake = pinger_check_timeout(this_ping);
- /* The ping sent by ptlrpc_send_rpc may get sent out
- * say .01 second after this.
- * ptlrpc_pinger_sending_on_import will then set the
- * next ping time to next_ping + .01 sec, which means
- * we will SKIP the next ping at next_ping, and the
- * ping will get sent 2 timeouts from now! Beware.
- */
- CDEBUG(D_INFO, "next wakeup in %ld (%ld)\n",
- time_to_next_wake,
- this_ping + PING_INTERVAL * HZ);
- } while (time_to_next_wake <= 0);
-
- queue_delayed_work(pinger_wq, &ping_work,
- round_jiffies_up_relative(time_to_next_wake));
-}
-
-int ptlrpc_start_pinger(void)
-{
- if (pinger_wq)
- return -EALREADY;
-
- pinger_wq = alloc_workqueue("ptlrpc_pinger", WQ_MEM_RECLAIM, 1);
- if (!pinger_wq) {
- CERROR("cannot start pinger workqueue\n");
- return -ENOMEM;
- }
-
- queue_delayed_work(pinger_wq, &ping_work, 0);
- return 0;
-}
-
-static int ptlrpc_pinger_remove_timeouts(void);
-
-int ptlrpc_stop_pinger(void)
-{
- int rc = 0;
-
- if (!pinger_wq)
- return -EALREADY;
-
- ptlrpc_pinger_remove_timeouts();
- cancel_delayed_work_sync(&ping_work);
- destroy_workqueue(pinger_wq);
- pinger_wq = NULL;
-
- return rc;
-}
-
-void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
-{
- ptlrpc_update_next_ping(imp, 0);
-}
-
-void ptlrpc_pinger_commit_expected(struct obd_import *imp)
-{
- ptlrpc_update_next_ping(imp, 1);
- assert_spin_locked(&imp->imp_lock);
- /*
- * Avoid reading stale imp_connect_data. When not sure if pings are
- * expected or not on next connection, we assume they are not and force
- * one anyway to guarantee the chance of updating
- * imp_peer_committed_transno.
- */
- if (imp->imp_state != LUSTRE_IMP_FULL ||
- OCD_HAS_FLAG(&imp->imp_connect_data, PINGLESS))
- imp->imp_force_next_verify = 1;
-}
-
-int ptlrpc_pinger_add_import(struct obd_import *imp)
-{
- if (!list_empty(&imp->imp_pinger_chain))
- return -EALREADY;
-
- mutex_lock(&pinger_mutex);
- CDEBUG(D_HA, "adding pingable import %s->%s\n",
- imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
- /* if we add to pinger we want recovery on this import */
- imp->imp_obd->obd_no_recov = 0;
- ptlrpc_update_next_ping(imp, 0);
- /* XXX sort, blah blah */
- list_add_tail(&imp->imp_pinger_chain, &pinger_imports);
- class_import_get(imp);
-
- ptlrpc_pinger_wake_up();
- mutex_unlock(&pinger_mutex);
-
- return 0;
-}
-EXPORT_SYMBOL(ptlrpc_pinger_add_import);
-
-int ptlrpc_pinger_del_import(struct obd_import *imp)
-{
- if (list_empty(&imp->imp_pinger_chain))
- return -ENOENT;
-
- mutex_lock(&pinger_mutex);
- list_del_init(&imp->imp_pinger_chain);
- CDEBUG(D_HA, "removing pingable import %s->%s\n",
- imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
- /* if we remove from pinger we don't want recovery on this import */
- imp->imp_obd->obd_no_recov = 1;
- class_import_put(imp);
- mutex_unlock(&pinger_mutex);
- return 0;
-}
-EXPORT_SYMBOL(ptlrpc_pinger_del_import);
-
-/**
- * Register a timeout callback to the pinger list, and the callback will
- * be called when timeout happens.
- */
-static struct timeout_item *ptlrpc_new_timeout(int time,
- enum timeout_event event,
- timeout_cb_t cb, void *data)
-{
- struct timeout_item *ti;
-
- ti = kzalloc(sizeof(*ti), GFP_NOFS);
- if (!ti)
- return NULL;
-
- INIT_LIST_HEAD(&ti->ti_obd_list);
- INIT_LIST_HEAD(&ti->ti_chain);
- ti->ti_timeout = time;
- ti->ti_event = event;
- ti->ti_cb = cb;
- ti->ti_cb_data = data;
-
- return ti;
-}
-
-/**
- * Register timeout event on the pinger thread.
- * Note: the timeout list is an sorted list with increased timeout value.
- */
-static struct timeout_item*
-ptlrpc_pinger_register_timeout(int time, enum timeout_event event,
- timeout_cb_t cb, void *data)
-{
- struct timeout_item *item, *tmp;
-
- LASSERT(mutex_is_locked(&pinger_mutex));
-
- list_for_each_entry(item, &timeout_list, ti_chain)
- if (item->ti_event == event)
- goto out;
-
- item = ptlrpc_new_timeout(time, event, cb, data);
- if (item) {
- list_for_each_entry_reverse(tmp, &timeout_list, ti_chain) {
- if (tmp->ti_timeout < time) {
- list_add(&item->ti_chain, &tmp->ti_chain);
- goto out;
- }
- }
- list_add(&item->ti_chain, &timeout_list);
- }
-out:
- return item;
-}
-
-/* Add a client_obd to the timeout event list, when timeout(@time)
- * happens, the callback(@cb) will be called.
- */
-int ptlrpc_add_timeout_client(int time, enum timeout_event event,
- timeout_cb_t cb, void *data,
- struct list_head *obd_list)
-{
- struct timeout_item *ti;
-
- mutex_lock(&pinger_mutex);
- ti = ptlrpc_pinger_register_timeout(time, event, cb, data);
- if (!ti) {
- mutex_unlock(&pinger_mutex);
- return -EINVAL;
- }
- list_add(obd_list, &ti->ti_obd_list);
- mutex_unlock(&pinger_mutex);
- return 0;
-}
-EXPORT_SYMBOL(ptlrpc_add_timeout_client);
-
-int ptlrpc_del_timeout_client(struct list_head *obd_list,
- enum timeout_event event)
-{
- struct timeout_item *ti = NULL, *item;
-
- if (list_empty(obd_list))
- return 0;
- mutex_lock(&pinger_mutex);
- list_del_init(obd_list);
- /**
- * If there are no obd attached to the timeout event
- * list, remove this timeout event from the pinger
- */
- list_for_each_entry(item, &timeout_list, ti_chain) {
- if (item->ti_event == event) {
- ti = item;
- break;
- }
- }
- if (list_empty(&ti->ti_obd_list)) {
- list_del(&ti->ti_chain);
- kfree(ti);
- }
- mutex_unlock(&pinger_mutex);
- return 0;
-}
-EXPORT_SYMBOL(ptlrpc_del_timeout_client);
-
-static int ptlrpc_pinger_remove_timeouts(void)
-{
- struct timeout_item *item, *tmp;
-
- mutex_lock(&pinger_mutex);
- list_for_each_entry_safe(item, tmp, &timeout_list, ti_chain) {
- LASSERT(list_empty(&item->ti_obd_list));
- list_del(&item->ti_chain);
- kfree(item);
- }
- mutex_unlock(&pinger_mutex);
- return 0;
-}
-
-void ptlrpc_pinger_wake_up(void)
-{
- mod_delayed_work(pinger_wq, &ping_work, 0);
-}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
deleted file mode 100644
index 134b74234519..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
+++ /dev/null
@@ -1,371 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-/* Intramodule declarations for ptlrpc. */
-
-#ifndef PTLRPC_INTERNAL_H
-#define PTLRPC_INTERNAL_H
-
-#include "../ldlm/ldlm_internal.h"
-
-struct ldlm_namespace;
-struct obd_import;
-struct ldlm_res_id;
-struct ptlrpc_request_set;
-extern int test_req_buffer_pressure;
-extern struct mutex ptlrpc_all_services_mutex;
-extern struct list_head ptlrpc_all_services;
-
-extern struct mutex ptlrpcd_mutex;
-extern struct mutex pinger_mutex;
-
-int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait);
-/* ptlrpcd.c */
-int ptlrpcd_start(struct ptlrpcd_ctl *pc);
-
-/* client.c */
-void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
- unsigned int service_time);
-struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned int nfrags,
- unsigned int max_brw,
- enum ptlrpc_bulk_op_type type,
- unsigned int portal,
- const struct ptlrpc_bulk_frag_ops *ops);
-int ptlrpc_request_cache_init(void);
-void ptlrpc_request_cache_fini(void);
-struct ptlrpc_request *ptlrpc_request_cache_alloc(gfp_t flags);
-void ptlrpc_request_cache_free(struct ptlrpc_request *req);
-void ptlrpc_init_xid(void);
-void ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc,
- struct ptlrpc_request *req);
-void ptlrpc_expired_set(struct ptlrpc_request_set *set);
-int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set);
-void ptlrpc_resend_req(struct ptlrpc_request *request);
-void ptlrpc_set_bulk_mbits(struct ptlrpc_request *req);
-void ptlrpc_assign_next_xid_nolock(struct ptlrpc_request *req);
-__u64 ptlrpc_known_replied_xid(struct obd_import *imp);
-void ptlrpc_add_unreplied(struct ptlrpc_request *req);
-
-/* events.c */
-int ptlrpc_init_portals(void);
-void ptlrpc_exit_portals(void);
-
-void ptlrpc_request_handle_notconn(struct ptlrpc_request *req);
-void lustre_assert_wire_constants(void);
-int ptlrpc_import_in_recovery(struct obd_import *imp);
-int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt);
-int ptlrpc_replay_next(struct obd_import *imp, int *inflight);
-void ptlrpc_initiate_recovery(struct obd_import *imp);
-
-int lustre_unpack_req_ptlrpc_body(struct ptlrpc_request *req, int offset);
-int lustre_unpack_rep_ptlrpc_body(struct ptlrpc_request *req, int offset);
-
-int ptlrpc_sysfs_register_service(struct kset *parent,
- struct ptlrpc_service *svc);
-void ptlrpc_sysfs_unregister_service(struct ptlrpc_service *svc);
-
-void ptlrpc_ldebugfs_register_service(struct dentry *debugfs_entry,
- struct ptlrpc_service *svc);
-void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc);
-void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req, long amount);
-
-/* NRS */
-
-/**
- * NRS core object.
- *
- * Holds NRS core fields.
- */
-struct nrs_core {
- /**
- * Protects nrs_core::nrs_policies, serializes external policy
- * registration/unregistration, and NRS core lprocfs operations.
- */
- struct mutex nrs_mutex;
- /**
- * List of all policy descriptors registered with NRS core; protected
- * by nrs_core::nrs_mutex.
- */
- struct list_head nrs_policies;
-
-};
-
-extern struct nrs_core nrs_core;
-
-int ptlrpc_service_nrs_setup(struct ptlrpc_service *svc);
-void ptlrpc_service_nrs_cleanup(struct ptlrpc_service *svc);
-
-void ptlrpc_nrs_req_initialize(struct ptlrpc_service_part *svcpt,
- struct ptlrpc_request *req, bool hp);
-void ptlrpc_nrs_req_finalize(struct ptlrpc_request *req);
-void ptlrpc_nrs_req_stop_nolock(struct ptlrpc_request *req);
-void ptlrpc_nrs_req_add(struct ptlrpc_service_part *svcpt,
- struct ptlrpc_request *req, bool hp);
-
-struct ptlrpc_request *
-ptlrpc_nrs_req_get_nolock0(struct ptlrpc_service_part *svcpt, bool hp,
- bool peek, bool force);
-
-static inline struct ptlrpc_request *
-ptlrpc_nrs_req_get_nolock(struct ptlrpc_service_part *svcpt, bool hp,
- bool force)
-{
- return ptlrpc_nrs_req_get_nolock0(svcpt, hp, false, force);
-}
-
-bool ptlrpc_nrs_req_pending_nolock(struct ptlrpc_service_part *svcpt, bool hp);
-
-int ptlrpc_nrs_policy_control(const struct ptlrpc_service *svc,
- enum ptlrpc_nrs_queue_type queue, char *name,
- enum ptlrpc_nrs_ctl opc, bool single, void *arg);
-
-int ptlrpc_nrs_init(void);
-void ptlrpc_nrs_fini(void);
-
-static inline bool nrs_svcpt_has_hp(const struct ptlrpc_service_part *svcpt)
-{
- return svcpt->scp_nrs_hp != NULL;
-}
-
-static inline bool nrs_svc_has_hp(const struct ptlrpc_service *svc)
-{
- /**
- * If the first service partition has an HP NRS head, all service
- * partitions will.
- */
- return nrs_svcpt_has_hp(svc->srv_parts[0]);
-}
-
-static inline
-struct ptlrpc_nrs *nrs_svcpt2nrs(struct ptlrpc_service_part *svcpt, bool hp)
-{
- LASSERT(ergo(hp, nrs_svcpt_has_hp(svcpt)));
- return hp ? svcpt->scp_nrs_hp : &svcpt->scp_nrs_reg;
-}
-
-static inline int nrs_pol2cptid(const struct ptlrpc_nrs_policy *policy)
-{
- return policy->pol_nrs->nrs_svcpt->scp_cpt;
-}
-
-static inline
-struct ptlrpc_service *nrs_pol2svc(struct ptlrpc_nrs_policy *policy)
-{
- return policy->pol_nrs->nrs_svcpt->scp_service;
-}
-
-static inline
-struct ptlrpc_service_part *nrs_pol2svcpt(struct ptlrpc_nrs_policy *policy)
-{
- return policy->pol_nrs->nrs_svcpt;
-}
-
-static inline
-struct cfs_cpt_table *nrs_pol2cptab(struct ptlrpc_nrs_policy *policy)
-{
- return nrs_pol2svc(policy)->srv_cptable;
-}
-
-static inline struct ptlrpc_nrs_resource *
-nrs_request_resource(struct ptlrpc_nrs_request *nrq)
-{
- LASSERT(nrq->nr_initialized);
- LASSERT(!nrq->nr_finalized);
-
- return nrq->nr_res_ptrs[nrq->nr_res_idx];
-}
-
-static inline
-struct ptlrpc_nrs_policy *nrs_request_policy(struct ptlrpc_nrs_request *nrq)
-{
- return nrs_request_resource(nrq)->res_policy;
-}
-
-#define NRS_LPROCFS_QUANTUM_NAME_REG "reg_quantum:"
-#define NRS_LPROCFS_QUANTUM_NAME_HP "hp_quantum:"
-
-/**
- * the maximum size of nrs_crrn_client::cc_quantum and nrs_orr_data::od_quantum.
- */
-#define LPROCFS_NRS_QUANTUM_MAX 65535
-
-/**
- * Max valid command string is the size of the labels, plus "65535" twice, plus
- * a separating space character.
- */
-#define LPROCFS_NRS_WR_QUANTUM_MAX_CMD \
- sizeof(NRS_LPROCFS_QUANTUM_NAME_REG __stringify(LPROCFS_NRS_QUANTUM_MAX) " " \
- NRS_LPROCFS_QUANTUM_NAME_HP __stringify(LPROCFS_NRS_QUANTUM_MAX))
-
-/* ptlrpc/nrs_fifo.c */
-extern struct ptlrpc_nrs_pol_conf nrs_conf_fifo;
-
-/* recovd_thread.c */
-
-int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink);
-
-/* pers.c */
-void ptlrpc_fill_bulk_md(struct lnet_md *md, struct ptlrpc_bulk_desc *desc,
- int mdcnt);
-
-/* pack_generic.c */
-struct ptlrpc_reply_state *
-lustre_get_emerg_rs(struct ptlrpc_service_part *svcpt);
-void lustre_put_emerg_rs(struct ptlrpc_reply_state *rs);
-
-/* pinger.c */
-int ptlrpc_start_pinger(void);
-int ptlrpc_stop_pinger(void);
-void ptlrpc_pinger_sending_on_import(struct obd_import *imp);
-void ptlrpc_pinger_commit_expected(struct obd_import *imp);
-void ptlrpc_pinger_wake_up(void);
-
-/* sec_null.c */
-int sptlrpc_null_init(void);
-void sptlrpc_null_fini(void);
-
-/* sec_plain.c */
-int sptlrpc_plain_init(void);
-void sptlrpc_plain_fini(void);
-
-/* sec_bulk.c */
-int sptlrpc_enc_pool_init(void);
-void sptlrpc_enc_pool_fini(void);
-int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v);
-
-/* sec_lproc.c */
-void sptlrpc_lproc_init(void);
-void sptlrpc_lproc_fini(void);
-
-/* sec_gc.c */
-int sptlrpc_gc_init(void);
-void sptlrpc_gc_fini(void);
-
-/* sec_config.c */
-void sptlrpc_conf_choose_flavor(enum lustre_sec_part from,
- enum lustre_sec_part to,
- struct obd_uuid *target,
- lnet_nid_t nid,
- struct sptlrpc_flavor *sf);
-int sptlrpc_conf_init(void);
-void sptlrpc_conf_fini(void);
-
-/* sec.c */
-int sptlrpc_init(void);
-void sptlrpc_fini(void);
-
-static inline bool ptlrpc_recoverable_error(int rc)
-{
- return (rc == -ENOTCONN || rc == -ENODEV);
-}
-
-static inline int tgt_mod_init(void)
-{
- return 0;
-}
-
-static inline void tgt_mod_exit(void)
-{
- return;
-}
-
-static inline void ptlrpc_reqset_put(struct ptlrpc_request_set *set)
-{
- if (atomic_dec_and_test(&set->set_refcount))
- kfree(set);
-}
-
-/** initialise ptlrpc common fields */
-static inline void ptlrpc_req_comm_init(struct ptlrpc_request *req)
-{
- spin_lock_init(&req->rq_lock);
- atomic_set(&req->rq_refcount, 1);
- INIT_LIST_HEAD(&req->rq_list);
- INIT_LIST_HEAD(&req->rq_replay_list);
-}
-
-/** initialise client side ptlrpc request */
-static inline void ptlrpc_cli_req_init(struct ptlrpc_request *req)
-{
- struct ptlrpc_cli_req *cr = &req->rq_cli;
-
- ptlrpc_req_comm_init(req);
-
- req->rq_receiving_reply = 0;
- req->rq_req_unlinked = 1;
- req->rq_reply_unlinked = 1;
-
- req->rq_receiving_reply = 0;
- req->rq_req_unlinked = 1;
- req->rq_reply_unlinked = 1;
-
- INIT_LIST_HEAD(&cr->cr_set_chain);
- INIT_LIST_HEAD(&cr->cr_ctx_chain);
- INIT_LIST_HEAD(&cr->cr_unreplied_list);
- init_waitqueue_head(&cr->cr_reply_waitq);
- init_waitqueue_head(&cr->cr_set_waitq);
-}
-
-/** initialise server side ptlrpc request */
-static inline void ptlrpc_srv_req_init(struct ptlrpc_request *req)
-{
- struct ptlrpc_srv_req *sr = &req->rq_srv;
-
- ptlrpc_req_comm_init(req);
- req->rq_srv_req = 1;
- INIT_LIST_HEAD(&sr->sr_exp_list);
- INIT_LIST_HEAD(&sr->sr_timed_list);
- INIT_LIST_HEAD(&sr->sr_hist_list);
-}
-
-static inline bool ptlrpc_req_is_connect(struct ptlrpc_request *req)
-{
- if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_CONNECT ||
- lustre_msg_get_opc(req->rq_reqmsg) == OST_CONNECT ||
- lustre_msg_get_opc(req->rq_reqmsg) == MGS_CONNECT)
- return true;
- else
- return false;
-}
-
-static inline bool ptlrpc_req_is_disconnect(struct ptlrpc_request *req)
-{
- if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_DISCONNECT ||
- lustre_msg_get_opc(req->rq_reqmsg) == OST_DISCONNECT ||
- lustre_msg_get_opc(req->rq_reqmsg) == MGS_DISCONNECT)
- return true;
- else
- return false;
-}
-
-#endif /* PTLRPC_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c
deleted file mode 100644
index 5c32b657b3b5..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c
+++ /dev/null
@@ -1,186 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_RPC
-
-#include <obd_support.h>
-#include <obd_class.h>
-#include <lustre_net.h>
-#include <lustre_req_layout.h>
-
-#include "ptlrpc_internal.h"
-
-extern spinlock_t ptlrpc_last_xid_lock;
-#if RS_DEBUG
-extern spinlock_t ptlrpc_rs_debug_lock;
-#endif
-
-DEFINE_MUTEX(ptlrpc_startup);
-static int ptlrpc_active = 0;
-
-int ptlrpc_inc_ref(void)
-{
- int rc = 0;
-
- mutex_lock(&ptlrpc_startup);
- if (ptlrpc_active++ == 0) {
- ptlrpc_put_connection_superhack = ptlrpc_connection_put;
-
- rc = ptlrpc_init_portals();
- if (!rc) {
- rc= ptlrpc_start_pinger();
- if (rc)
- ptlrpc_exit_portals();
- }
- if (rc)
- ptlrpc_active--;
- }
- mutex_unlock(&ptlrpc_startup);
- return rc;
-}
-EXPORT_SYMBOL(ptlrpc_inc_ref);
-
-void ptlrpc_dec_ref(void)
-{
- mutex_lock(&ptlrpc_startup);
- if (--ptlrpc_active == 0) {
- ptlrpc_stop_pinger();
- ptlrpc_exit_portals();
- }
- mutex_unlock(&ptlrpc_startup);
-}
-EXPORT_SYMBOL(ptlrpc_dec_ref);
-
-static int __init ptlrpc_init(void)
-{
- int rc, cleanup_phase = 0;
-
- lustre_assert_wire_constants();
-#if RS_DEBUG
- spin_lock_init(&ptlrpc_rs_debug_lock);
-#endif
- mutex_init(&ptlrpc_all_services_mutex);
- mutex_init(&pinger_mutex);
- mutex_init(&ptlrpcd_mutex);
- ptlrpc_init_xid();
-
- rc = libcfs_setup();
- if (rc)
- return rc;
-
- rc = req_layout_init();
- if (rc)
- return rc;
-
- rc = ptlrpc_hr_init();
- if (rc)
- return rc;
-
- cleanup_phase = 1;
- rc = ptlrpc_request_cache_init();
- if (rc)
- goto cleanup;
-
- cleanup_phase = 3;
-
- rc = ptlrpc_connection_init();
- if (rc)
- goto cleanup;
-
- cleanup_phase = 5;
- rc = ldlm_init();
- if (rc)
- goto cleanup;
-
- cleanup_phase = 6;
- rc = sptlrpc_init();
- if (rc)
- goto cleanup;
-
- cleanup_phase = 7;
- rc = ptlrpc_nrs_init();
- if (rc)
- goto cleanup;
-
- cleanup_phase = 8;
- rc = tgt_mod_init();
- if (rc)
- goto cleanup;
- return 0;
-
-cleanup:
- switch (cleanup_phase) {
- case 8:
- ptlrpc_nrs_fini();
- /* Fall through */
- case 7:
- sptlrpc_fini();
- /* Fall through */
- case 6:
- ldlm_exit();
- /* Fall through */
- case 5:
- ptlrpc_connection_fini();
- /* Fall through */
- case 3:
- ptlrpc_request_cache_fini();
- /* Fall through */
- case 1:
- ptlrpc_hr_fini();
- req_layout_fini();
- /* Fall through */
- default:
- ;
- }
-
- return rc;
-}
-
-static void __exit ptlrpc_exit(void)
-{
- tgt_mod_exit();
- ptlrpc_nrs_fini();
- sptlrpc_fini();
- ldlm_exit();
- ptlrpc_request_cache_fini();
- ptlrpc_hr_fini();
- ptlrpc_connection_fini();
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre Request Processor and Lock Management");
-MODULE_VERSION(LUSTRE_VERSION_STRING);
-MODULE_LICENSE("GPL");
-
-module_init(ptlrpc_init);
-module_exit(ptlrpc_exit);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
deleted file mode 100644
index 531005411edf..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
+++ /dev/null
@@ -1,914 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/ptlrpc/ptlrpcd.c
- */
-
-/** \defgroup ptlrpcd PortalRPC daemon
- *
- * ptlrpcd is a special thread with its own set where other user might add
- * requests when they don't want to wait for their completion.
- * PtlRPCD will take care of sending such requests and then processing their
- * replies and calling completion callbacks as necessary.
- * The callbacks are called directly from ptlrpcd context.
- * It is important to never significantly block (esp. on RPCs!) within such
- * completion handler or a deadlock might occur where ptlrpcd enters some
- * callback that attempts to send another RPC and wait for it to return,
- * during which time ptlrpcd is completely blocked, so e.g. if import
- * fails, recovery cannot progress because connection requests are also
- * sent by ptlrpcd.
- *
- * @{
- */
-
-#define DEBUG_SUBSYSTEM S_RPC
-
-#include <linux/kthread.h>
-#include <linux/libcfs/libcfs.h>
-#include <linux/libcfs/libcfs_cpu.h>
-#include <linux/libcfs/libcfs_string.h>
-#include <lustre_net.h>
-#include <lustre_lib.h>
-#include <lustre_ha.h>
-#include <obd_class.h> /* for obd_zombie */
-#include <obd_support.h> /* for OBD_FAIL_CHECK */
-#include <cl_object.h> /* cl_env_{get,put}() */
-#include <lprocfs_status.h>
-
-#include "ptlrpc_internal.h"
-
-/* One of these per CPT. */
-struct ptlrpcd {
- int pd_size;
- int pd_index;
- int pd_cpt;
- int pd_cursor;
- int pd_nthreads;
- int pd_groupsize;
- struct ptlrpcd_ctl pd_threads[0];
-};
-
-/*
- * max_ptlrpcds is obsolete, but retained to ensure that the kernel
- * module will load on a system where it has been tuned.
- * A value other than 0 implies it was tuned, in which case the value
- * is used to derive a setting for ptlrpcd_per_cpt_max.
- */
-static int max_ptlrpcds;
-module_param(max_ptlrpcds, int, 0644);
-MODULE_PARM_DESC(max_ptlrpcds,
- "Max ptlrpcd thread count to be started (obsolete).");
-
-/*
- * ptlrpcd_bind_policy is obsolete, but retained to ensure that
- * the kernel module will load on a system where it has been tuned.
- * A value other than 0 implies it was tuned, in which case the value
- * is used to derive a setting for ptlrpcd_partner_group_size.
- */
-static int ptlrpcd_bind_policy;
-module_param(ptlrpcd_bind_policy, int, 0644);
-MODULE_PARM_DESC(ptlrpcd_bind_policy,
- "Ptlrpcd threads binding mode (obsolete).");
-
-/*
- * ptlrpcd_per_cpt_max: The maximum number of ptlrpcd threads to run
- * in a CPT.
- */
-static int ptlrpcd_per_cpt_max;
-module_param(ptlrpcd_per_cpt_max, int, 0644);
-MODULE_PARM_DESC(ptlrpcd_per_cpt_max,
- "Max ptlrpcd thread count to be started per CPT.");
-
-/*
- * ptlrpcd_partner_group_size: The desired number of threads in each
- * ptlrpcd partner thread group. Default is 2, corresponding to the
- * old PDB_POLICY_PAIR. A negative value makes all ptlrpcd threads in
- * a CPT partners of each other.
- */
-static int ptlrpcd_partner_group_size;
-module_param(ptlrpcd_partner_group_size, int, 0644);
-MODULE_PARM_DESC(ptlrpcd_partner_group_size,
- "Number of ptlrpcd threads in a partner group.");
-
-/*
- * ptlrpcd_cpts: A CPT string describing the CPU partitions that
- * ptlrpcd threads should run on. Used to make ptlrpcd threads run on
- * a subset of all CPTs.
- *
- * ptlrpcd_cpts=2
- * ptlrpcd_cpts=[2]
- * run ptlrpcd threads only on CPT 2.
- *
- * ptlrpcd_cpts=0-3
- * ptlrpcd_cpts=[0-3]
- * run ptlrpcd threads on CPTs 0, 1, 2, and 3.
- *
- * ptlrpcd_cpts=[0-3,5,7]
- * run ptlrpcd threads on CPTS 0, 1, 2, 3, 5, and 7.
- */
-static char *ptlrpcd_cpts;
-module_param(ptlrpcd_cpts, charp, 0644);
-MODULE_PARM_DESC(ptlrpcd_cpts,
- "CPU partitions ptlrpcd threads should run in");
-
-/* ptlrpcds_cpt_idx maps cpt numbers to an index in the ptlrpcds array. */
-static int *ptlrpcds_cpt_idx;
-
-/* ptlrpcds_num is the number of entries in the ptlrpcds array. */
-static int ptlrpcds_num;
-static struct ptlrpcd **ptlrpcds;
-
-/*
- * In addition to the regular thread pool above, there is a single
- * global recovery thread. Recovery isn't critical for performance,
- * and doesn't block, but must always be able to proceed, and it is
- * possible that all normal ptlrpcd threads are blocked. Hence the
- * need for a dedicated thread.
- */
-static struct ptlrpcd_ctl ptlrpcd_rcv;
-
-struct mutex ptlrpcd_mutex;
-static int ptlrpcd_users;
-
-void ptlrpcd_wake(struct ptlrpc_request *req)
-{
- struct ptlrpc_request_set *set = req->rq_set;
-
- wake_up(&set->set_waitq);
-}
-EXPORT_SYMBOL(ptlrpcd_wake);
-
-static struct ptlrpcd_ctl *
-ptlrpcd_select_pc(struct ptlrpc_request *req)
-{
- struct ptlrpcd *pd;
- int cpt;
- int idx;
-
- if (req && req->rq_send_state != LUSTRE_IMP_FULL)
- return &ptlrpcd_rcv;
-
- cpt = cfs_cpt_current(cfs_cpt_tab, 1);
- if (!ptlrpcds_cpt_idx)
- idx = cpt;
- else
- idx = ptlrpcds_cpt_idx[cpt];
- pd = ptlrpcds[idx];
-
- /* We do not care whether it is strict load balance. */
- idx = pd->pd_cursor;
- if (++idx == pd->pd_nthreads)
- idx = 0;
- pd->pd_cursor = idx;
-
- return &pd->pd_threads[idx];
-}
-
-/**
- * Return transferred RPCs count.
- */
-static int ptlrpcd_steal_rqset(struct ptlrpc_request_set *des,
- struct ptlrpc_request_set *src)
-{
- struct ptlrpc_request *req, *tmp;
- int rc = 0;
-
- spin_lock(&src->set_new_req_lock);
- if (likely(!list_empty(&src->set_new_requests))) {
- list_for_each_entry_safe(req, tmp, &src->set_new_requests, rq_set_chain)
- req->rq_set = des;
-
- list_splice_init(&src->set_new_requests, &des->set_requests);
- rc = atomic_read(&src->set_new_count);
- atomic_add(rc, &des->set_remaining);
- atomic_set(&src->set_new_count, 0);
- }
- spin_unlock(&src->set_new_req_lock);
- return rc;
-}
-
-/**
- * Requests that are added to the ptlrpcd queue are sent via
- * ptlrpcd_check->ptlrpc_check_set().
- */
-void ptlrpcd_add_req(struct ptlrpc_request *req)
-{
- struct ptlrpcd_ctl *pc;
-
- if (req->rq_reqmsg)
- lustre_msg_set_jobid(req->rq_reqmsg, NULL);
-
- spin_lock(&req->rq_lock);
- if (req->rq_invalid_rqset) {
- req->rq_invalid_rqset = 0;
- spin_unlock(&req->rq_lock);
- if (wait_event_idle_timeout(req->rq_set_waitq,
- !req->rq_set,
- 5 * HZ) == 0)
- wait_event_idle(req->rq_set_waitq,
- !req->rq_set);
- } else if (req->rq_set) {
- /* If we have a valid "rq_set", just reuse it to avoid double
- * linked.
- */
- LASSERT(req->rq_phase == RQ_PHASE_NEW);
- LASSERT(req->rq_send_state == LUSTRE_IMP_REPLAY);
-
- /* ptlrpc_check_set will decrease the count */
- atomic_inc(&req->rq_set->set_remaining);
- spin_unlock(&req->rq_lock);
- wake_up(&req->rq_set->set_waitq);
- return;
- } else {
- spin_unlock(&req->rq_lock);
- }
-
- pc = ptlrpcd_select_pc(req);
-
- DEBUG_REQ(D_INFO, req, "add req [%p] to pc [%s:%d]",
- req, pc->pc_name, pc->pc_index);
-
- ptlrpc_set_add_new_req(pc, req);
-}
-EXPORT_SYMBOL(ptlrpcd_add_req);
-
-static inline void ptlrpc_reqset_get(struct ptlrpc_request_set *set)
-{
- atomic_inc(&set->set_refcount);
-}
-
-/**
- * Check if there is more work to do on ptlrpcd set.
- * Returns 1 if yes.
- */
-static int ptlrpcd_check(struct lu_env *env, struct ptlrpcd_ctl *pc)
-{
- struct ptlrpc_request *req, *tmp;
- struct ptlrpc_request_set *set = pc->pc_set;
- int rc = 0;
- int rc2;
-
- if (atomic_read(&set->set_new_count)) {
- spin_lock(&set->set_new_req_lock);
- if (likely(!list_empty(&set->set_new_requests))) {
- list_splice_init(&set->set_new_requests,
- &set->set_requests);
- atomic_add(atomic_read(&set->set_new_count),
- &set->set_remaining);
- atomic_set(&set->set_new_count, 0);
- /*
- * Need to calculate its timeout.
- */
- rc = 1;
- }
- spin_unlock(&set->set_new_req_lock);
- }
-
- /* We should call lu_env_refill() before handling new requests to make
- * sure that env key the requests depending on really exists.
- */
- rc2 = lu_env_refill(env);
- if (rc2 != 0) {
- /*
- * XXX This is very awkward situation, because
- * execution can neither continue (request
- * interpreters assume that env is set up), nor repeat
- * the loop (as this potentially results in a tight
- * loop of -ENOMEM's).
- *
- * Fortunately, refill only ever does something when
- * new modules are loaded, i.e., early during boot up.
- */
- CERROR("Failure to refill session: %d\n", rc2);
- return rc;
- }
-
- if (atomic_read(&set->set_remaining))
- rc |= ptlrpc_check_set(env, set);
-
- /* NB: ptlrpc_check_set has already moved completed request at the
- * head of seq::set_requests
- */
- list_for_each_entry_safe(req, tmp, &set->set_requests, rq_set_chain) {
- if (req->rq_phase != RQ_PHASE_COMPLETE)
- break;
-
- list_del_init(&req->rq_set_chain);
- req->rq_set = NULL;
- ptlrpc_req_finished(req);
- }
-
- if (rc == 0) {
- /*
- * If new requests have been added, make sure to wake up.
- */
- rc = atomic_read(&set->set_new_count);
-
- /* If we have nothing to do, check whether we can take some
- * work from our partner threads.
- */
- if (rc == 0 && pc->pc_npartners > 0) {
- struct ptlrpcd_ctl *partner;
- struct ptlrpc_request_set *ps;
- int first = pc->pc_cursor;
-
- do {
- partner = pc->pc_partners[pc->pc_cursor++];
- if (pc->pc_cursor >= pc->pc_npartners)
- pc->pc_cursor = 0;
- if (!partner)
- continue;
-
- spin_lock(&partner->pc_lock);
- ps = partner->pc_set;
- if (!ps) {
- spin_unlock(&partner->pc_lock);
- continue;
- }
-
- ptlrpc_reqset_get(ps);
- spin_unlock(&partner->pc_lock);
-
- if (atomic_read(&ps->set_new_count)) {
- rc = ptlrpcd_steal_rqset(set, ps);
- if (rc > 0)
- CDEBUG(D_RPCTRACE, "transfer %d async RPCs [%d->%d]\n",
- rc, partner->pc_index,
- pc->pc_index);
- }
- ptlrpc_reqset_put(ps);
- } while (rc == 0 && pc->pc_cursor != first);
- }
- }
-
- return rc;
-}
-
-/**
- * Main ptlrpcd thread.
- * ptlrpc's code paths like to execute in process context, so we have this
- * thread which spins on a set which contains the rpcs and sends them.
- *
- */
-static int ptlrpcd(void *arg)
-{
- struct ptlrpcd_ctl *pc = arg;
- struct ptlrpc_request_set *set;
- struct lu_context ses = { 0 };
- struct lu_env env = { .le_ses = &ses };
- int rc = 0;
- int exit = 0;
-
- unshare_fs_struct();
- if (cfs_cpt_bind(cfs_cpt_tab, pc->pc_cpt) != 0)
- CWARN("Failed to bind %s on CPT %d\n", pc->pc_name, pc->pc_cpt);
-
- /*
- * Allocate the request set after the thread has been bound
- * above. This is safe because no requests will be queued
- * until all ptlrpcd threads have confirmed that they have
- * successfully started.
- */
- set = ptlrpc_prep_set();
- if (!set) {
- rc = -ENOMEM;
- goto failed;
- }
- spin_lock(&pc->pc_lock);
- pc->pc_set = set;
- spin_unlock(&pc->pc_lock);
- /*
- * XXX So far only "client" ptlrpcd uses an environment. In
- * the future, ptlrpcd thread (or a thread-set) has to given
- * an argument, describing its "scope".
- */
- rc = lu_context_init(&env.le_ctx,
- LCT_CL_THREAD | LCT_REMEMBER | LCT_NOREF);
- if (rc == 0) {
- rc = lu_context_init(env.le_ses,
- LCT_SESSION | LCT_REMEMBER | LCT_NOREF);
- if (rc != 0)
- lu_context_fini(&env.le_ctx);
- }
-
- if (rc != 0)
- goto failed;
-
- complete(&pc->pc_starting);
-
- /*
- * This mainloop strongly resembles ptlrpc_set_wait() except that our
- * set never completes. ptlrpcd_check() calls ptlrpc_check_set() when
- * there are requests in the set. New requests come in on the set's
- * new_req_list and ptlrpcd_check() moves them into the set.
- */
- do {
- int timeout;
-
- timeout = ptlrpc_set_next_timeout(set);
-
- lu_context_enter(&env.le_ctx);
- lu_context_enter(env.le_ses);
- if (wait_event_idle_timeout(set->set_waitq,
- ptlrpcd_check(&env, pc),
- (timeout ? timeout : 1) * HZ) == 0)
- ptlrpc_expired_set(set);
-
- lu_context_exit(&env.le_ctx);
- lu_context_exit(env.le_ses);
-
- /*
- * Abort inflight rpcs for forced stop case.
- */
- if (test_bit(LIOD_STOP, &pc->pc_flags)) {
- if (test_bit(LIOD_FORCE, &pc->pc_flags))
- ptlrpc_abort_set(set);
- exit++;
- }
-
- /*
- * Let's make one more loop to make sure that ptlrpcd_check()
- * copied all raced new rpcs into the set so we can kill them.
- */
- } while (exit < 2);
-
- /*
- * Wait for inflight requests to drain.
- */
- if (!list_empty(&set->set_requests))
- ptlrpc_set_wait(set);
- lu_context_fini(&env.le_ctx);
- lu_context_fini(env.le_ses);
-
- complete(&pc->pc_finishing);
-
- return 0;
-failed:
- pc->pc_error = rc;
- complete(&pc->pc_starting);
- return rc;
-}
-
-static void ptlrpcd_ctl_init(struct ptlrpcd_ctl *pc, int index, int cpt)
-{
- pc->pc_index = index;
- pc->pc_cpt = cpt;
- init_completion(&pc->pc_starting);
- init_completion(&pc->pc_finishing);
- spin_lock_init(&pc->pc_lock);
-
- if (index < 0) {
- /* Recovery thread. */
- snprintf(pc->pc_name, sizeof(pc->pc_name), "ptlrpcd_rcv");
- } else {
- /* Regular thread. */
- snprintf(pc->pc_name, sizeof(pc->pc_name),
- "ptlrpcd_%02d_%02d", cpt, index);
- }
-}
-
-/* XXX: We want multiple CPU cores to share the async RPC load. So we
- * start many ptlrpcd threads. We also want to reduce the ptlrpcd
- * overhead caused by data transfer cross-CPU cores. So we bind
- * all ptlrpcd threads to a CPT, in the expectation that CPTs
- * will be defined in a way that matches these boundaries. Within
- * a CPT a ptlrpcd thread can be scheduled on any available core.
- *
- * Each ptlrpcd thread has its own request queue. This can cause
- * response delay if the thread is already busy. To help with
- * this we define partner threads: these are other threads bound
- * to the same CPT which will check for work in each other's
- * request queues if they have no work to do.
- *
- * The desired number of partner threads can be tuned by setting
- * ptlrpcd_partner_group_size. The default is to create pairs of
- * partner threads.
- */
-static int ptlrpcd_partners(struct ptlrpcd *pd, int index)
-{
- struct ptlrpcd_ctl *pc;
- struct ptlrpcd_ctl **ppc;
- int first;
- int i;
- int rc = 0;
- int size;
-
- LASSERT(index >= 0 && index < pd->pd_nthreads);
- pc = &pd->pd_threads[index];
- pc->pc_npartners = pd->pd_groupsize - 1;
-
- if (pc->pc_npartners <= 0)
- goto out;
-
- size = sizeof(struct ptlrpcd_ctl *) * pc->pc_npartners;
- pc->pc_partners = kzalloc_node(size, GFP_NOFS,
- cfs_cpt_spread_node(cfs_cpt_tab,
- pc->pc_cpt));
- if (!pc->pc_partners) {
- pc->pc_npartners = 0;
- rc = -ENOMEM;
- goto out;
- }
-
- first = index - index % pd->pd_groupsize;
- ppc = pc->pc_partners;
- for (i = first; i < first + pd->pd_groupsize; i++) {
- if (i != index)
- *ppc++ = &pd->pd_threads[i];
- }
-out:
- return rc;
-}
-
-int ptlrpcd_start(struct ptlrpcd_ctl *pc)
-{
- struct task_struct *task;
- int rc = 0;
-
- /*
- * Do not allow start second thread for one pc.
- */
- if (test_and_set_bit(LIOD_START, &pc->pc_flags)) {
- CWARN("Starting second thread (%s) for same pc %p\n",
- pc->pc_name, pc);
- return 0;
- }
-
- task = kthread_run(ptlrpcd, pc, "%s", pc->pc_name);
- if (IS_ERR(task)) {
- rc = PTR_ERR(task);
- goto out_set;
- }
-
- wait_for_completion(&pc->pc_starting);
- rc = pc->pc_error;
- if (rc != 0)
- goto out_set;
-
- return 0;
-
-out_set:
- if (pc->pc_set) {
- struct ptlrpc_request_set *set = pc->pc_set;
-
- spin_lock(&pc->pc_lock);
- pc->pc_set = NULL;
- spin_unlock(&pc->pc_lock);
- ptlrpc_set_destroy(set);
- }
- clear_bit(LIOD_START, &pc->pc_flags);
- return rc;
-}
-
-void ptlrpcd_stop(struct ptlrpcd_ctl *pc, int force)
-{
- if (!test_bit(LIOD_START, &pc->pc_flags)) {
- CWARN("Thread for pc %p was not started\n", pc);
- return;
- }
-
- set_bit(LIOD_STOP, &pc->pc_flags);
- if (force)
- set_bit(LIOD_FORCE, &pc->pc_flags);
- wake_up(&pc->pc_set->set_waitq);
-}
-
-void ptlrpcd_free(struct ptlrpcd_ctl *pc)
-{
- struct ptlrpc_request_set *set = pc->pc_set;
-
- if (!test_bit(LIOD_START, &pc->pc_flags)) {
- CWARN("Thread for pc %p was not started\n", pc);
- goto out;
- }
-
- wait_for_completion(&pc->pc_finishing);
-
- spin_lock(&pc->pc_lock);
- pc->pc_set = NULL;
- spin_unlock(&pc->pc_lock);
- ptlrpc_set_destroy(set);
-
- clear_bit(LIOD_START, &pc->pc_flags);
- clear_bit(LIOD_STOP, &pc->pc_flags);
- clear_bit(LIOD_FORCE, &pc->pc_flags);
-
-out:
- if (pc->pc_npartners > 0) {
- LASSERT(pc->pc_partners);
-
- kfree(pc->pc_partners);
- pc->pc_partners = NULL;
- }
- pc->pc_npartners = 0;
- pc->pc_error = 0;
-}
-
-static void ptlrpcd_fini(void)
-{
- int i;
- int j;
-
- if (ptlrpcds) {
- for (i = 0; i < ptlrpcds_num; i++) {
- if (!ptlrpcds[i])
- break;
- for (j = 0; j < ptlrpcds[i]->pd_nthreads; j++)
- ptlrpcd_stop(&ptlrpcds[i]->pd_threads[j], 0);
- for (j = 0; j < ptlrpcds[i]->pd_nthreads; j++)
- ptlrpcd_free(&ptlrpcds[i]->pd_threads[j]);
- kfree(ptlrpcds[i]);
- ptlrpcds[i] = NULL;
- }
- kfree(ptlrpcds);
- }
- ptlrpcds_num = 0;
-
- ptlrpcd_stop(&ptlrpcd_rcv, 0);
- ptlrpcd_free(&ptlrpcd_rcv);
-
- kfree(ptlrpcds_cpt_idx);
- ptlrpcds_cpt_idx = NULL;
-}
-
-static int ptlrpcd_init(void)
-{
- int nthreads;
- int groupsize;
- int size;
- int i;
- int j;
- int rc = 0;
- struct cfs_cpt_table *cptable;
- __u32 *cpts = NULL;
- int ncpts;
- int cpt;
- struct ptlrpcd *pd;
-
- /*
- * Determine the CPTs that ptlrpcd threads will run on.
- */
- cptable = cfs_cpt_tab;
- ncpts = cfs_cpt_number(cptable);
- if (ptlrpcd_cpts) {
- struct cfs_expr_list *el;
-
- size = ncpts * sizeof(ptlrpcds_cpt_idx[0]);
- ptlrpcds_cpt_idx = kzalloc(size, GFP_KERNEL);
- if (!ptlrpcds_cpt_idx) {
- rc = -ENOMEM;
- goto out;
- }
-
- rc = cfs_expr_list_parse(ptlrpcd_cpts,
- strlen(ptlrpcd_cpts),
- 0, ncpts - 1, &el);
-
- if (rc != 0) {
- CERROR("ptlrpcd_cpts: invalid CPT pattern string: %s",
- ptlrpcd_cpts);
- rc = -EINVAL;
- goto out;
- }
-
- rc = cfs_expr_list_values(el, ncpts, &cpts);
- cfs_expr_list_free(el);
- if (rc <= 0) {
- CERROR("ptlrpcd_cpts: failed to parse CPT array %s: %d\n",
- ptlrpcd_cpts, rc);
- if (rc == 0)
- rc = -EINVAL;
- goto out;
- }
-
- /*
- * Create the cpt-to-index map. When there is no match
- * in the cpt table, pick a cpt at random. This could
- * be changed to take the topology of the system into
- * account.
- */
- for (cpt = 0; cpt < ncpts; cpt++) {
- for (i = 0; i < rc; i++)
- if (cpts[i] == cpt)
- break;
- if (i >= rc)
- i = cpt % rc;
- ptlrpcds_cpt_idx[cpt] = i;
- }
-
- cfs_expr_list_values_free(cpts, rc);
- ncpts = rc;
- }
- ptlrpcds_num = ncpts;
-
- size = ncpts * sizeof(ptlrpcds[0]);
- ptlrpcds = kzalloc(size, GFP_KERNEL);
- if (!ptlrpcds) {
- rc = -ENOMEM;
- goto out;
- }
-
- /*
- * The max_ptlrpcds parameter is obsolete, but do something
- * sane if it has been tuned, and complain if
- * ptlrpcd_per_cpt_max has also been tuned.
- */
- if (max_ptlrpcds != 0) {
- CWARN("max_ptlrpcds is obsolete.\n");
- if (ptlrpcd_per_cpt_max == 0) {
- ptlrpcd_per_cpt_max = max_ptlrpcds / ncpts;
- /* Round up if there is a remainder. */
- if (max_ptlrpcds % ncpts != 0)
- ptlrpcd_per_cpt_max++;
- CWARN("Setting ptlrpcd_per_cpt_max = %d\n",
- ptlrpcd_per_cpt_max);
- } else {
- CWARN("ptlrpd_per_cpt_max is also set!\n");
- }
- }
-
- /*
- * The ptlrpcd_bind_policy parameter is obsolete, but do
- * something sane if it has been tuned, and complain if
- * ptlrpcd_partner_group_size is also tuned.
- */
- if (ptlrpcd_bind_policy != 0) {
- CWARN("ptlrpcd_bind_policy is obsolete.\n");
- if (ptlrpcd_partner_group_size == 0) {
- switch (ptlrpcd_bind_policy) {
- case 1: /* PDB_POLICY_NONE */
- case 2: /* PDB_POLICY_FULL */
- ptlrpcd_partner_group_size = 1;
- break;
- case 3: /* PDB_POLICY_PAIR */
- ptlrpcd_partner_group_size = 2;
- break;
- case 4: /* PDB_POLICY_NEIGHBOR */
-#ifdef CONFIG_NUMA
- ptlrpcd_partner_group_size = -1; /* CPT */
-#else
- ptlrpcd_partner_group_size = 3; /* Triplets */
-#endif
- break;
- default: /* Illegal value, use the default. */
- ptlrpcd_partner_group_size = 2;
- break;
- }
- CWARN("Setting ptlrpcd_partner_group_size = %d\n",
- ptlrpcd_partner_group_size);
- } else {
- CWARN("ptlrpcd_partner_group_size is also set!\n");
- }
- }
-
- if (ptlrpcd_partner_group_size == 0)
- ptlrpcd_partner_group_size = 2;
- else if (ptlrpcd_partner_group_size < 0)
- ptlrpcd_partner_group_size = -1;
- else if (ptlrpcd_per_cpt_max > 0 &&
- ptlrpcd_partner_group_size > ptlrpcd_per_cpt_max)
- ptlrpcd_partner_group_size = ptlrpcd_per_cpt_max;
-
- /*
- * Start the recovery thread first.
- */
- set_bit(LIOD_RECOVERY, &ptlrpcd_rcv.pc_flags);
- ptlrpcd_ctl_init(&ptlrpcd_rcv, -1, CFS_CPT_ANY);
- rc = ptlrpcd_start(&ptlrpcd_rcv);
- if (rc < 0)
- goto out;
-
- for (i = 0; i < ncpts; i++) {
- if (!cpts)
- cpt = i;
- else
- cpt = cpts[i];
-
- nthreads = cfs_cpt_weight(cptable, cpt);
- if (ptlrpcd_per_cpt_max > 0 && ptlrpcd_per_cpt_max < nthreads)
- nthreads = ptlrpcd_per_cpt_max;
- if (nthreads < 2)
- nthreads = 2;
-
- if (ptlrpcd_partner_group_size <= 0) {
- groupsize = nthreads;
- } else if (nthreads <= ptlrpcd_partner_group_size) {
- groupsize = nthreads;
- } else {
- groupsize = ptlrpcd_partner_group_size;
- if (nthreads % groupsize != 0)
- nthreads += groupsize - (nthreads % groupsize);
- }
-
- size = offsetof(struct ptlrpcd, pd_threads[nthreads]);
- pd = kzalloc_node(size, GFP_NOFS,
- cfs_cpt_spread_node(cfs_cpt_tab, cpt));
- if (!pd) {
- rc = -ENOMEM;
- goto out;
- }
- pd->pd_size = size;
- pd->pd_index = i;
- pd->pd_cpt = cpt;
- pd->pd_cursor = 0;
- pd->pd_nthreads = nthreads;
- pd->pd_groupsize = groupsize;
- ptlrpcds[i] = pd;
-
- /*
- * The ptlrpcd threads in a partner group can access
- * each other's struct ptlrpcd_ctl, so these must be
- * initialized before any thread is started.
- */
- for (j = 0; j < nthreads; j++) {
- ptlrpcd_ctl_init(&pd->pd_threads[j], j, cpt);
- rc = ptlrpcd_partners(pd, j);
- if (rc < 0)
- goto out;
- }
-
- /* XXX: We start nthreads ptlrpc daemons.
- * Each of them can process any non-recovery
- * async RPC to improve overall async RPC
- * efficiency.
- *
- * But there are some issues with async I/O RPCs
- * and async non-I/O RPCs processed in the same
- * set under some cases. The ptlrpcd may be
- * blocked by some async I/O RPC(s), then will
- * cause other async non-I/O RPC(s) can not be
- * processed in time.
- *
- * Maybe we should distinguish blocked async RPCs
- * from non-blocked async RPCs, and process them
- * in different ptlrpcd sets to avoid unnecessary
- * dependency. But how to distribute async RPCs
- * load among all the ptlrpc daemons becomes
- * another trouble.
- */
- for (j = 0; j < nthreads; j++) {
- rc = ptlrpcd_start(&pd->pd_threads[j]);
- if (rc < 0)
- goto out;
- }
- }
-out:
- if (rc != 0)
- ptlrpcd_fini();
-
- return rc;
-}
-
-int ptlrpcd_addref(void)
-{
- int rc = 0;
-
- mutex_lock(&ptlrpcd_mutex);
- if (++ptlrpcd_users == 1) {
- rc = ptlrpcd_init();
- if (rc < 0)
- ptlrpcd_users--;
- }
- mutex_unlock(&ptlrpcd_mutex);
- return rc;
-}
-EXPORT_SYMBOL(ptlrpcd_addref);
-
-void ptlrpcd_decref(void)
-{
- mutex_lock(&ptlrpcd_mutex);
- if (--ptlrpcd_users == 0)
- ptlrpcd_fini();
- mutex_unlock(&ptlrpcd_mutex);
-}
-EXPORT_SYMBOL(ptlrpcd_decref);
-/** @} ptlrpcd */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/recover.c b/drivers/staging/lustre/lustre/ptlrpc/recover.c
deleted file mode 100644
index 2ea0a7ff87dd..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/recover.c
+++ /dev/null
@@ -1,374 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/ptlrpc/recover.c
- *
- * Author: Mike Shaver <shaver@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_RPC
-#include <linux/libcfs/libcfs.h>
-
-#include <obd_support.h>
-#include <lustre_ha.h>
-#include <lustre_net.h>
-#include <lustre_import.h>
-#include <lustre_export.h>
-#include <obd.h>
-#include <obd_class.h>
-#include <linux/list.h>
-
-#include "ptlrpc_internal.h"
-
-/**
- * Start recovery on disconnected import.
- * This is done by just attempting a connect
- */
-void ptlrpc_initiate_recovery(struct obd_import *imp)
-{
- CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
- ptlrpc_connect_import(imp);
-}
-
-/**
- * Identify what request from replay list needs to be replayed next
- * (based on what we have already replayed) and send it to server.
- */
-int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
-{
- int rc = 0;
- struct ptlrpc_request *req = NULL, *pos;
- __u64 last_transno;
-
- *inflight = 0;
-
- /* It might have committed some after we last spoke, so make sure we
- * get rid of them now.
- */
- spin_lock(&imp->imp_lock);
- imp->imp_last_transno_checked = 0;
- ptlrpc_free_committed(imp);
- last_transno = imp->imp_last_replay_transno;
-
- CDEBUG(D_HA, "import %p from %s committed %llu last %llu\n",
- imp, obd2cli_tgt(imp->imp_obd),
- imp->imp_peer_committed_transno, last_transno);
-
- /* Replay all the committed open requests on committed_list first */
- if (!list_empty(&imp->imp_committed_list)) {
- req = list_last_entry(&imp->imp_committed_list,
- struct ptlrpc_request, rq_replay_list);
-
- /* The last request on committed_list hasn't been replayed */
- if (req->rq_transno > last_transno) {
- if (!imp->imp_resend_replay ||
- imp->imp_replay_cursor == &imp->imp_committed_list)
- imp->imp_replay_cursor = imp->imp_replay_cursor->next;
-
- while (imp->imp_replay_cursor !=
- &imp->imp_committed_list) {
- req = list_entry(imp->imp_replay_cursor,
- struct ptlrpc_request,
- rq_replay_list);
- if (req->rq_transno > last_transno)
- break;
-
- req = NULL;
- LASSERT(!list_empty(imp->imp_replay_cursor));
- imp->imp_replay_cursor =
- imp->imp_replay_cursor->next;
- }
- } else {
- /* All requests on committed_list have been replayed */
- imp->imp_replay_cursor = &imp->imp_committed_list;
- req = NULL;
- }
- }
-
- /* All the requests in committed list have been replayed, let's replay
- * the imp_replay_list
- */
- if (!req) {
- struct ptlrpc_request *tmp;
- list_for_each_entry_safe(tmp, pos, &imp->imp_replay_list,
- rq_replay_list) {
- if (tmp->rq_transno > last_transno) {
- req = tmp;
- break;
- }
- }
- }
-
- /* If need to resend the last sent transno (because a reconnect
- * has occurred), then stop on the matching req and send it again.
- * If, however, the last sent transno has been committed then we
- * continue replay from the next request.
- */
- if (req && imp->imp_resend_replay)
- lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
-
- /* The resend replay request may have been removed from the
- * unreplied list.
- */
- if (req && imp->imp_resend_replay &&
- list_empty(&req->rq_unreplied_list)) {
- ptlrpc_add_unreplied(req);
- imp->imp_known_replied_xid = ptlrpc_known_replied_xid(imp);
- }
-
- imp->imp_resend_replay = 0;
- spin_unlock(&imp->imp_lock);
-
- if (req) {
- /* The request should have been added back in unreplied list
- * by ptlrpc_prepare_replay().
- */
- LASSERT(!list_empty(&req->rq_unreplied_list));
-
- rc = ptlrpc_replay_req(req);
- if (rc) {
- CERROR("recovery replay error %d for req %llu\n",
- rc, req->rq_xid);
- return rc;
- }
- *inflight = 1;
- }
- return rc;
-}
-
-/**
- * Schedule resending of request on sending_list. This is done after
- * we completed replaying of requests and locks.
- */
-int ptlrpc_resend(struct obd_import *imp)
-{
- struct ptlrpc_request *req, *next;
-
- /* As long as we're in recovery, nothing should be added to the sending
- * list, so we don't need to hold the lock during this iteration and
- * resend process.
- */
- /* Well... what if lctl recover is called twice at the same time?
- */
- spin_lock(&imp->imp_lock);
- if (imp->imp_state != LUSTRE_IMP_RECOVER) {
- spin_unlock(&imp->imp_lock);
- return -1;
- }
-
- list_for_each_entry_safe(req, next, &imp->imp_sending_list, rq_list) {
- LASSERTF((long)req > PAGE_SIZE && req != LP_POISON,
- "req %p bad\n", req);
- LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
-
- /*
- * If the request is allowed to be sent during replay and it
- * is not timeout yet, then it does not need to be resent.
- */
- if (!ptlrpc_no_resend(req) &&
- (req->rq_timedout || !req->rq_allow_replay))
- ptlrpc_resend_req(req);
- }
- spin_unlock(&imp->imp_lock);
-
- OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_ENQUEUE_OLD_EXPORT, 2);
- return 0;
-}
-
-/**
- * Go through all requests in delayed list and wake their threads
- * for resending
- */
-void ptlrpc_wake_delayed(struct obd_import *imp)
-{
- struct ptlrpc_request *req, *pos;
-
- spin_lock(&imp->imp_lock);
- list_for_each_entry_safe(req, pos, &imp->imp_delayed_list, rq_list) {
- DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set);
- ptlrpc_client_wake_req(req);
- }
- spin_unlock(&imp->imp_lock);
-}
-
-void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
-{
- struct obd_import *imp = failed_req->rq_import;
-
- CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n",
- imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
- imp->imp_connection->c_remote_uuid.uuid);
-
- if (ptlrpc_set_import_discon(imp,
- lustre_msg_get_conn_cnt(failed_req->rq_reqmsg))) {
- if (!imp->imp_replayable) {
- CDEBUG(D_HA, "import %s@%s for %s not replayable, auto-deactivating\n",
- obd2cli_tgt(imp->imp_obd),
- imp->imp_connection->c_remote_uuid.uuid,
- imp->imp_obd->obd_name);
- ptlrpc_deactivate_import(imp);
- }
- /* to control recovery via lctl {disable|enable}_recovery */
- if (imp->imp_deactive == 0)
- ptlrpc_connect_import(imp);
- }
-
- /* Wait for recovery to complete and resend. If evicted, then
- * this request will be errored out later.
- */
- spin_lock(&failed_req->rq_lock);
- if (!failed_req->rq_no_resend)
- failed_req->rq_resend = 1;
- spin_unlock(&failed_req->rq_lock);
-}
-
-/**
- * Administratively active/deactive a client.
- * This should only be called by the ioctl interface, currently
- * - the lctl deactivate and activate commands
- * - echo 0/1 >> /sys/fs/lustre/osc/XXX/active
- * - client umount -f (ll_umount_begin)
- */
-int ptlrpc_set_import_active(struct obd_import *imp, int active)
-{
- struct obd_device *obd = imp->imp_obd;
- int rc = 0;
-
- LASSERT(obd);
-
- /* When deactivating, mark import invalid, and abort in-flight
- * requests.
- */
- if (!active) {
- LCONSOLE_WARN("setting import %s INACTIVE by administrator request\n",
- obd2cli_tgt(imp->imp_obd));
-
- /* set before invalidate to avoid messages about imp_inval
- * set without imp_deactive in ptlrpc_import_delay_req
- */
- spin_lock(&imp->imp_lock);
- imp->imp_deactive = 1;
- spin_unlock(&imp->imp_lock);
-
- obd_import_event(imp->imp_obd, imp, IMP_EVENT_DEACTIVATE);
-
- ptlrpc_invalidate_import(imp);
- }
-
- /* When activating, mark import valid, and attempt recovery */
- if (active) {
- CDEBUG(D_HA, "setting import %s VALID\n",
- obd2cli_tgt(imp->imp_obd));
-
- spin_lock(&imp->imp_lock);
- imp->imp_deactive = 0;
- spin_unlock(&imp->imp_lock);
- obd_import_event(imp->imp_obd, imp, IMP_EVENT_ACTIVATE);
-
- rc = ptlrpc_recover_import(imp, NULL, 0);
- }
-
- return rc;
-}
-EXPORT_SYMBOL(ptlrpc_set_import_active);
-
-/* Attempt to reconnect an import */
-int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async)
-{
- int rc = 0;
-
- spin_lock(&imp->imp_lock);
- if (imp->imp_state == LUSTRE_IMP_NEW || imp->imp_deactive ||
- atomic_read(&imp->imp_inval_count))
- rc = -EINVAL;
- spin_unlock(&imp->imp_lock);
- if (rc)
- goto out;
-
- /* force import to be disconnected. */
- ptlrpc_set_import_discon(imp, 0);
-
- if (new_uuid) {
- struct obd_uuid uuid;
-
- /* intruct import to use new uuid */
- obd_str2uuid(&uuid, new_uuid);
- rc = import_set_conn_priority(imp, &uuid);
- if (rc)
- goto out;
- }
-
- /* Check if reconnect is already in progress */
- spin_lock(&imp->imp_lock);
- if (imp->imp_state != LUSTRE_IMP_DISCON) {
- imp->imp_force_verify = 1;
- rc = -EALREADY;
- }
- spin_unlock(&imp->imp_lock);
- if (rc)
- goto out;
-
- rc = ptlrpc_connect_import(imp);
- if (rc)
- goto out;
-
- if (!async) {
- CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n",
- obd2cli_tgt(imp->imp_obd), obd_timeout);
-
- rc = wait_event_idle_timeout(imp->imp_recovery_waitq,
- !ptlrpc_import_in_recovery(imp),
- obd_timeout * HZ);
- CDEBUG(D_HA, "%s: recovery finished\n",
- obd2cli_tgt(imp->imp_obd));
- rc = rc ? 0 : -ETIMEDOUT;
- }
-
-out:
- return rc;
-}
-EXPORT_SYMBOL(ptlrpc_recover_import);
-
-int ptlrpc_import_in_recovery(struct obd_import *imp)
-{
- int in_recovery = 1;
-
- spin_lock(&imp->imp_lock);
- if (imp->imp_state == LUSTRE_IMP_FULL ||
- imp->imp_state == LUSTRE_IMP_CLOSED ||
- imp->imp_state == LUSTRE_IMP_DISCON ||
- imp->imp_obd->obd_no_recov)
- in_recovery = 0;
- spin_unlock(&imp->imp_lock);
-
- return in_recovery;
-}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec.c b/drivers/staging/lustre/lustre/ptlrpc/sec.c
deleted file mode 100644
index e193f3346e6f..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/sec.c
+++ /dev/null
@@ -1,2379 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/ptlrpc/sec.c
- *
- * Author: Eric Mei <ericm@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_SEC
-
-#include <linux/libcfs/libcfs.h>
-#include <linux/crypto.h>
-#include <linux/cred.h>
-#include <linux/key.h>
-#include <linux/sched/task.h>
-
-#include <obd.h>
-#include <obd_class.h>
-#include <obd_support.h>
-#include <lustre_net.h>
-#include <lustre_import.h>
-#include <lustre_dlm.h>
-#include <lustre_sec.h>
-
-#include "ptlrpc_internal.h"
-
-/***********************************************
- * policy registers *
- ***********************************************/
-
-static rwlock_t policy_lock;
-static struct ptlrpc_sec_policy *policies[SPTLRPC_POLICY_MAX] = {
- NULL,
-};
-
-int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy)
-{
- __u16 number = policy->sp_policy;
-
- LASSERT(policy->sp_name);
- LASSERT(policy->sp_cops);
- LASSERT(policy->sp_sops);
-
- if (number >= SPTLRPC_POLICY_MAX)
- return -EINVAL;
-
- write_lock(&policy_lock);
- if (unlikely(policies[number])) {
- write_unlock(&policy_lock);
- return -EALREADY;
- }
- policies[number] = policy;
- write_unlock(&policy_lock);
-
- CDEBUG(D_SEC, "%s: registered\n", policy->sp_name);
- return 0;
-}
-EXPORT_SYMBOL(sptlrpc_register_policy);
-
-int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy)
-{
- __u16 number = policy->sp_policy;
-
- LASSERT(number < SPTLRPC_POLICY_MAX);
-
- write_lock(&policy_lock);
- if (unlikely(!policies[number])) {
- write_unlock(&policy_lock);
- CERROR("%s: already unregistered\n", policy->sp_name);
- return -EINVAL;
- }
-
- LASSERT(policies[number] == policy);
- policies[number] = NULL;
- write_unlock(&policy_lock);
-
- CDEBUG(D_SEC, "%s: unregistered\n", policy->sp_name);
- return 0;
-}
-EXPORT_SYMBOL(sptlrpc_unregister_policy);
-
-static
-struct ptlrpc_sec_policy *sptlrpc_wireflavor2policy(__u32 flavor)
-{
- static DEFINE_MUTEX(load_mutex);
- static atomic_t loaded = ATOMIC_INIT(0);
- struct ptlrpc_sec_policy *policy;
- __u16 number = SPTLRPC_FLVR_POLICY(flavor);
- __u16 flag = 0;
-
- if (number >= SPTLRPC_POLICY_MAX)
- return NULL;
-
- while (1) {
- read_lock(&policy_lock);
- policy = policies[number];
- if (policy && !try_module_get(policy->sp_owner))
- policy = NULL;
- if (!policy)
- flag = atomic_read(&loaded);
- read_unlock(&policy_lock);
-
- if (policy || flag != 0 ||
- number != SPTLRPC_POLICY_GSS)
- break;
-
- /* try to load gss module, once */
- mutex_lock(&load_mutex);
- if (atomic_read(&loaded) == 0) {
- if (request_module("ptlrpc_gss") == 0)
- CDEBUG(D_SEC,
- "module ptlrpc_gss loaded on demand\n");
- else
- CERROR("Unable to load module ptlrpc_gss\n");
-
- atomic_set(&loaded, 1);
- }
- mutex_unlock(&load_mutex);
- }
-
- return policy;
-}
-
-__u32 sptlrpc_name2flavor_base(const char *name)
-{
- if (!strcmp(name, "null"))
- return SPTLRPC_FLVR_NULL;
- if (!strcmp(name, "plain"))
- return SPTLRPC_FLVR_PLAIN;
- if (!strcmp(name, "krb5n"))
- return SPTLRPC_FLVR_KRB5N;
- if (!strcmp(name, "krb5a"))
- return SPTLRPC_FLVR_KRB5A;
- if (!strcmp(name, "krb5i"))
- return SPTLRPC_FLVR_KRB5I;
- if (!strcmp(name, "krb5p"))
- return SPTLRPC_FLVR_KRB5P;
-
- return SPTLRPC_FLVR_INVALID;
-}
-EXPORT_SYMBOL(sptlrpc_name2flavor_base);
-
-const char *sptlrpc_flavor2name_base(__u32 flvr)
-{
- __u32 base = SPTLRPC_FLVR_BASE(flvr);
-
- if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_NULL))
- return "null";
- else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_PLAIN))
- return "plain";
- else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5N))
- return "krb5n";
- else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5A))
- return "krb5a";
- else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5I))
- return "krb5i";
- else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5P))
- return "krb5p";
-
- CERROR("invalid wire flavor 0x%x\n", flvr);
- return "invalid";
-}
-EXPORT_SYMBOL(sptlrpc_flavor2name_base);
-
-char *sptlrpc_flavor2name_bulk(struct sptlrpc_flavor *sf,
- char *buf, int bufsize)
-{
- if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN)
- snprintf(buf, bufsize, "hash:%s",
- sptlrpc_get_hash_name(sf->u_bulk.hash.hash_alg));
- else
- snprintf(buf, bufsize, "%s",
- sptlrpc_flavor2name_base(sf->sf_rpc));
-
- buf[bufsize - 1] = '\0';
- return buf;
-}
-EXPORT_SYMBOL(sptlrpc_flavor2name_bulk);
-
-char *sptlrpc_flavor2name(struct sptlrpc_flavor *sf, char *buf, int bufsize)
-{
- strlcpy(buf, sptlrpc_flavor2name_base(sf->sf_rpc), bufsize);
-
- /*
- * currently we don't support customized bulk specification for
- * flavors other than plain
- */
- if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN) {
- char bspec[16];
-
- bspec[0] = '-';
- sptlrpc_flavor2name_bulk(sf, &bspec[1], sizeof(bspec) - 1);
- strlcat(buf, bspec, bufsize);
- }
-
- return buf;
-}
-EXPORT_SYMBOL(sptlrpc_flavor2name);
-
-static char *sptlrpc_secflags2str(__u32 flags, char *buf, int bufsize)
-{
- buf[0] = '\0';
-
- if (flags & PTLRPC_SEC_FL_REVERSE)
- strlcat(buf, "reverse,", bufsize);
- if (flags & PTLRPC_SEC_FL_ROOTONLY)
- strlcat(buf, "rootonly,", bufsize);
- if (flags & PTLRPC_SEC_FL_UDESC)
- strlcat(buf, "udesc,", bufsize);
- if (flags & PTLRPC_SEC_FL_BULK)
- strlcat(buf, "bulk,", bufsize);
- if (buf[0] == '\0')
- strlcat(buf, "-,", bufsize);
-
- return buf;
-}
-
-/**************************************************
- * client context APIs *
- **************************************************/
-
-static
-struct ptlrpc_cli_ctx *get_my_ctx(struct ptlrpc_sec *sec)
-{
- struct vfs_cred vcred;
- int create = 1, remove_dead = 1;
-
- LASSERT(sec);
- LASSERT(sec->ps_policy->sp_cops->lookup_ctx);
-
- if (sec->ps_flvr.sf_flags & (PTLRPC_SEC_FL_REVERSE |
- PTLRPC_SEC_FL_ROOTONLY)) {
- vcred.vc_uid = 0;
- vcred.vc_gid = 0;
- if (sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_REVERSE) {
- create = 0;
- remove_dead = 0;
- }
- } else {
- vcred.vc_uid = from_kuid(&init_user_ns, current_uid());
- vcred.vc_gid = from_kgid(&init_user_ns, current_gid());
- }
-
- return sec->ps_policy->sp_cops->lookup_ctx(sec, &vcred,
- create, remove_dead);
-}
-
-struct ptlrpc_cli_ctx *sptlrpc_cli_ctx_get(struct ptlrpc_cli_ctx *ctx)
-{
- atomic_inc(&ctx->cc_refcount);
- return ctx;
-}
-EXPORT_SYMBOL(sptlrpc_cli_ctx_get);
-
-void sptlrpc_cli_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync)
-{
- struct ptlrpc_sec *sec = ctx->cc_sec;
-
- LASSERT(sec);
- LASSERT_ATOMIC_POS(&ctx->cc_refcount);
-
- if (!atomic_dec_and_test(&ctx->cc_refcount))
- return;
-
- sec->ps_policy->sp_cops->release_ctx(sec, ctx, sync);
-}
-EXPORT_SYMBOL(sptlrpc_cli_ctx_put);
-
-static int import_sec_check_expire(struct obd_import *imp)
-{
- int adapt = 0;
-
- spin_lock(&imp->imp_lock);
- if (imp->imp_sec_expire &&
- imp->imp_sec_expire < ktime_get_real_seconds()) {
- adapt = 1;
- imp->imp_sec_expire = 0;
- }
- spin_unlock(&imp->imp_lock);
-
- if (!adapt)
- return 0;
-
- CDEBUG(D_SEC, "found delayed sec adapt expired, do it now\n");
- return sptlrpc_import_sec_adapt(imp, NULL, NULL);
-}
-
-/**
- * Get and validate the client side ptlrpc security facilities from
- * \a imp. There is a race condition on client reconnect when the import is
- * being destroyed while there are outstanding client bound requests. In
- * this case do not output any error messages if import secuity is not
- * found.
- *
- * \param[in] imp obd import associated with client
- * \param[out] sec client side ptlrpc security
- *
- * \retval 0 if security retrieved successfully
- * \retval -ve errno if there was a problem
- */
-static int import_sec_validate_get(struct obd_import *imp,
- struct ptlrpc_sec **sec)
-{
- int rc;
-
- if (unlikely(imp->imp_sec_expire)) {
- rc = import_sec_check_expire(imp);
- if (rc)
- return rc;
- }
-
- *sec = sptlrpc_import_sec_ref(imp);
- if (!*sec) {
- CERROR("import %p (%s) with no sec\n",
- imp, ptlrpc_import_state_name(imp->imp_state));
- return -EACCES;
- }
-
- if (unlikely((*sec)->ps_dying)) {
- CERROR("attempt to use dying sec %p\n", sec);
- sptlrpc_sec_put(*sec);
- return -EACCES;
- }
-
- return 0;
-}
-
-/**
- * Given a \a req, find or allocate a appropriate context for it.
- * \pre req->rq_cli_ctx == NULL.
- *
- * \retval 0 succeed, and req->rq_cli_ctx is set.
- * \retval -ev error number, and req->rq_cli_ctx == NULL.
- */
-int sptlrpc_req_get_ctx(struct ptlrpc_request *req)
-{
- struct obd_import *imp = req->rq_import;
- struct ptlrpc_sec *sec;
- int rc;
-
- LASSERT(!req->rq_cli_ctx);
- LASSERT(imp);
-
- rc = import_sec_validate_get(imp, &sec);
- if (rc)
- return rc;
-
- req->rq_cli_ctx = get_my_ctx(sec);
-
- sptlrpc_sec_put(sec);
-
- if (!req->rq_cli_ctx) {
- CERROR("req %p: fail to get context\n", req);
- return -ECONNREFUSED;
- }
-
- return 0;
-}
-
-/**
- * Drop the context for \a req.
- * \pre req->rq_cli_ctx != NULL.
- * \post req->rq_cli_ctx == NULL.
- *
- * If \a sync == 0, this function should return quickly without sleep;
- * otherwise it might trigger and wait for the whole process of sending
- * an context-destroying rpc to server.
- */
-void sptlrpc_req_put_ctx(struct ptlrpc_request *req, int sync)
-{
- LASSERT(req);
- LASSERT(req->rq_cli_ctx);
-
- /* request might be asked to release earlier while still
- * in the context waiting list.
- */
- if (!list_empty(&req->rq_ctx_chain)) {
- spin_lock(&req->rq_cli_ctx->cc_lock);
- list_del_init(&req->rq_ctx_chain);
- spin_unlock(&req->rq_cli_ctx->cc_lock);
- }
-
- sptlrpc_cli_ctx_put(req->rq_cli_ctx, sync);
- req->rq_cli_ctx = NULL;
-}
-
-static
-int sptlrpc_req_ctx_switch(struct ptlrpc_request *req,
- struct ptlrpc_cli_ctx *oldctx,
- struct ptlrpc_cli_ctx *newctx)
-{
- struct sptlrpc_flavor old_flvr;
- char *reqmsg = NULL; /* to workaround old gcc */
- int reqmsg_size;
- int rc = 0;
-
- LASSERT(req->rq_reqmsg);
- LASSERT(req->rq_reqlen);
- LASSERT(req->rq_replen);
-
- CDEBUG(D_SEC, "req %p: switch ctx %p(%u->%s) -> %p(%u->%s), switch sec %p(%s) -> %p(%s)\n",
- req,
- oldctx, oldctx->cc_vcred.vc_uid, sec2target_str(oldctx->cc_sec),
- newctx, newctx->cc_vcred.vc_uid, sec2target_str(newctx->cc_sec),
- oldctx->cc_sec, oldctx->cc_sec->ps_policy->sp_name,
- newctx->cc_sec, newctx->cc_sec->ps_policy->sp_name);
-
- /* save flavor */
- old_flvr = req->rq_flvr;
-
- /* save request message */
- reqmsg_size = req->rq_reqlen;
- if (reqmsg_size != 0) {
- reqmsg = kvzalloc(reqmsg_size, GFP_NOFS);
- if (!reqmsg)
- return -ENOMEM;
- memcpy(reqmsg, req->rq_reqmsg, reqmsg_size);
- }
-
- /* release old req/rep buf */
- req->rq_cli_ctx = oldctx;
- sptlrpc_cli_free_reqbuf(req);
- sptlrpc_cli_free_repbuf(req);
- req->rq_cli_ctx = newctx;
-
- /* recalculate the flavor */
- sptlrpc_req_set_flavor(req, 0);
-
- /* alloc new request buffer
- * we don't need to alloc reply buffer here, leave it to the
- * rest procedure of ptlrpc
- */
- if (reqmsg_size != 0) {
- rc = sptlrpc_cli_alloc_reqbuf(req, reqmsg_size);
- if (!rc) {
- LASSERT(req->rq_reqmsg);
- memcpy(req->rq_reqmsg, reqmsg, reqmsg_size);
- } else {
- CWARN("failed to alloc reqbuf: %d\n", rc);
- req->rq_flvr = old_flvr;
- }
-
- kvfree(reqmsg);
- }
- return rc;
-}
-
-/**
- * If current context of \a req is dead somehow, e.g. we just switched flavor
- * thus marked original contexts dead, we'll find a new context for it. if
- * no switch is needed, \a req will end up with the same context.
- *
- * \note a request must have a context, to keep other parts of code happy.
- * In any case of failure during the switching, we must restore the old one.
- */
-static int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req)
-{
- struct ptlrpc_cli_ctx *oldctx = req->rq_cli_ctx;
- struct ptlrpc_cli_ctx *newctx;
- int rc;
-
- LASSERT(oldctx);
-
- sptlrpc_cli_ctx_get(oldctx);
- sptlrpc_req_put_ctx(req, 0);
-
- rc = sptlrpc_req_get_ctx(req);
- if (unlikely(rc)) {
- LASSERT(!req->rq_cli_ctx);
-
- /* restore old ctx */
- req->rq_cli_ctx = oldctx;
- return rc;
- }
-
- newctx = req->rq_cli_ctx;
- LASSERT(newctx);
-
- if (unlikely(newctx == oldctx &&
- test_bit(PTLRPC_CTX_DEAD_BIT, &oldctx->cc_flags))) {
- /*
- * still get the old dead ctx, usually means system too busy
- */
- CDEBUG(D_SEC,
- "ctx (%p, fl %lx) doesn't switch, relax a little bit\n",
- newctx, newctx->cc_flags);
-
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC));
- } else if (unlikely(!test_bit(PTLRPC_CTX_UPTODATE_BIT, &newctx->cc_flags))) {
- /*
- * new ctx not up to date yet
- */
- CDEBUG(D_SEC,
- "ctx (%p, fl %lx) doesn't switch, not up to date yet\n",
- newctx, newctx->cc_flags);
- } else {
- /*
- * it's possible newctx == oldctx if we're switching
- * subflavor with the same sec.
- */
- rc = sptlrpc_req_ctx_switch(req, oldctx, newctx);
- if (rc) {
- /* restore old ctx */
- sptlrpc_req_put_ctx(req, 0);
- req->rq_cli_ctx = oldctx;
- return rc;
- }
-
- LASSERT(req->rq_cli_ctx == newctx);
- }
-
- sptlrpc_cli_ctx_put(oldctx, 1);
- return 0;
-}
-
-static
-int ctx_check_refresh(struct ptlrpc_cli_ctx *ctx)
-{
- if (cli_ctx_is_refreshed(ctx))
- return 1;
- return 0;
-}
-
-static
-int ctx_refresh_timeout(struct ptlrpc_request *req)
-{
- int rc;
-
- /* conn_cnt is needed in expire_one_request */
- lustre_msg_set_conn_cnt(req->rq_reqmsg, req->rq_import->imp_conn_cnt);
-
- rc = ptlrpc_expire_one_request(req, 1);
- /* if we started recovery, we should mark this ctx dead; otherwise
- * in case of lgssd died nobody would retire this ctx, following
- * connecting will still find the same ctx thus cause deadlock.
- * there's an assumption that expire time of the request should be
- * later than the context refresh expire time.
- */
- if (rc == 0)
- req->rq_cli_ctx->cc_ops->force_die(req->rq_cli_ctx, 0);
- return rc;
-}
-
-static
-void ctx_refresh_interrupt(struct ptlrpc_request *req)
-{
- spin_lock(&req->rq_lock);
- req->rq_intr = 1;
- spin_unlock(&req->rq_lock);
-}
-
-static
-void req_off_ctx_list(struct ptlrpc_request *req, struct ptlrpc_cli_ctx *ctx)
-{
- spin_lock(&ctx->cc_lock);
- if (!list_empty(&req->rq_ctx_chain))
- list_del_init(&req->rq_ctx_chain);
- spin_unlock(&ctx->cc_lock);
-}
-
-/**
- * To refresh the context of \req, if it's not up-to-date.
- * \param timeout
- * - < 0: don't wait
- * - = 0: wait until success or fatal error occur
- * - > 0: timeout value (in seconds)
- *
- * The status of the context could be subject to be changed by other threads
- * at any time. We allow this race, but once we return with 0, the caller will
- * suppose it's uptodated and keep using it until the owning rpc is done.
- *
- * \retval 0 only if the context is uptodated.
- * \retval -ev error number.
- */
-int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout)
-{
- struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
- struct ptlrpc_sec *sec;
- int rc;
-
- LASSERT(ctx);
-
- if (req->rq_ctx_init || req->rq_ctx_fini)
- return 0;
-
- /*
- * during the process a request's context might change type even
- * (e.g. from gss ctx to null ctx), so each loop we need to re-check
- * everything
- */
-again:
- rc = import_sec_validate_get(req->rq_import, &sec);
- if (rc)
- return rc;
-
- if (sec->ps_flvr.sf_rpc != req->rq_flvr.sf_rpc) {
- CDEBUG(D_SEC, "req %p: flavor has changed %x -> %x\n",
- req, req->rq_flvr.sf_rpc, sec->ps_flvr.sf_rpc);
- req_off_ctx_list(req, ctx);
- sptlrpc_req_replace_dead_ctx(req);
- ctx = req->rq_cli_ctx;
- }
- sptlrpc_sec_put(sec);
-
- if (cli_ctx_is_eternal(ctx))
- return 0;
-
- if (unlikely(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags))) {
- LASSERT(ctx->cc_ops->refresh);
- ctx->cc_ops->refresh(ctx);
- }
- LASSERT(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags) == 0);
-
- LASSERT(ctx->cc_ops->validate);
- if (ctx->cc_ops->validate(ctx) == 0) {
- req_off_ctx_list(req, ctx);
- return 0;
- }
-
- if (unlikely(test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags))) {
- spin_lock(&req->rq_lock);
- req->rq_err = 1;
- spin_unlock(&req->rq_lock);
- req_off_ctx_list(req, ctx);
- return -EPERM;
- }
-
- /*
- * There's a subtle issue for resending RPCs, suppose following
- * situation:
- * 1. the request was sent to server.
- * 2. recovery was kicked start, after finished the request was
- * marked as resent.
- * 3. resend the request.
- * 4. old reply from server received, we accept and verify the reply.
- * this has to be success, otherwise the error will be aware
- * by application.
- * 5. new reply from server received, dropped by LNet.
- *
- * Note the xid of old & new request is the same. We can't simply
- * change xid for the resent request because the server replies on
- * it for reply reconstruction.
- *
- * Commonly the original context should be uptodate because we
- * have a expiry nice time; server will keep its context because
- * we at least hold a ref of old context which prevent context
- * destroying RPC being sent. So server still can accept the request
- * and finish the RPC. But if that's not the case:
- * 1. If server side context has been trimmed, a NO_CONTEXT will
- * be returned, gss_cli_ctx_verify/unseal will switch to new
- * context by force.
- * 2. Current context never be refreshed, then we are fine: we
- * never really send request with old context before.
- */
- if (test_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags) &&
- unlikely(req->rq_reqmsg) &&
- lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
- req_off_ctx_list(req, ctx);
- return 0;
- }
-
- if (unlikely(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags))) {
- req_off_ctx_list(req, ctx);
- /*
- * don't switch ctx if import was deactivated
- */
- if (req->rq_import->imp_deactive) {
- spin_lock(&req->rq_lock);
- req->rq_err = 1;
- spin_unlock(&req->rq_lock);
- return -EINTR;
- }
-
- rc = sptlrpc_req_replace_dead_ctx(req);
- if (rc) {
- LASSERT(ctx == req->rq_cli_ctx);
- CERROR("req %p: failed to replace dead ctx %p: %d\n",
- req, ctx, rc);
- spin_lock(&req->rq_lock);
- req->rq_err = 1;
- spin_unlock(&req->rq_lock);
- return rc;
- }
-
- ctx = req->rq_cli_ctx;
- goto again;
- }
-
- /*
- * Now we're sure this context is during upcall, add myself into
- * waiting list
- */
- spin_lock(&ctx->cc_lock);
- if (list_empty(&req->rq_ctx_chain))
- list_add(&req->rq_ctx_chain, &ctx->cc_req_list);
- spin_unlock(&ctx->cc_lock);
-
- if (timeout < 0)
- return -EWOULDBLOCK;
-
- /* Clear any flags that may be present from previous sends */
- LASSERT(req->rq_receiving_reply == 0);
- spin_lock(&req->rq_lock);
- req->rq_err = 0;
- req->rq_timedout = 0;
- req->rq_resend = 0;
- req->rq_restart = 0;
- spin_unlock(&req->rq_lock);
-
- rc = wait_event_idle_timeout(req->rq_reply_waitq,
- ctx_check_refresh(ctx),
- timeout * HZ);
- if (rc == 0 && ctx_refresh_timeout(req) == 0) {
- /* Keep waiting, but enable some signals */
- rc = l_wait_event_abortable(req->rq_reply_waitq,
- ctx_check_refresh(ctx));
- if (rc == 0)
- rc = 1;
- }
-
- if (rc > 0)
- /* condition is true */
- rc = 0;
- else if (rc == 0)
- /* Timed out */
- rc = -ETIMEDOUT;
- else {
- /* Aborted by signal */
- rc = -EINTR;
- ctx_refresh_interrupt(req);
- }
-
- /*
- * following cases could lead us here:
- * - successfully refreshed;
- * - interrupted;
- * - timedout, and we don't want recover from the failure;
- * - timedout, and waked up upon recovery finished;
- * - someone else mark this ctx dead by force;
- * - someone invalidate the req and call ptlrpc_client_wake_req(),
- * e.g. ptlrpc_abort_inflight();
- */
- if (!cli_ctx_is_refreshed(ctx)) {
- /* timed out or interrupted */
- req_off_ctx_list(req, ctx);
-
- LASSERT(rc != 0);
- return rc;
- }
-
- goto again;
-}
-
-/**
- * Initialize flavor settings for \a req, according to \a opcode.
- *
- * \note this could be called in two situations:
- * - new request from ptlrpc_pre_req(), with proper @opcode
- * - old request which changed ctx in the middle, with @opcode == 0
- */
-void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode)
-{
- struct ptlrpc_sec *sec;
-
- LASSERT(req->rq_import);
- LASSERT(req->rq_cli_ctx);
- LASSERT(req->rq_cli_ctx->cc_sec);
- LASSERT(req->rq_bulk_read == 0 || req->rq_bulk_write == 0);
-
- /* special security flags according to opcode */
- switch (opcode) {
- case OST_READ:
- case MDS_READPAGE:
- case MGS_CONFIG_READ:
- case OBD_IDX_READ:
- req->rq_bulk_read = 1;
- break;
- case OST_WRITE:
- case MDS_WRITEPAGE:
- req->rq_bulk_write = 1;
- break;
- case SEC_CTX_INIT:
- req->rq_ctx_init = 1;
- break;
- case SEC_CTX_FINI:
- req->rq_ctx_fini = 1;
- break;
- case 0:
- /* init/fini rpc won't be resend, so can't be here */
- LASSERT(req->rq_ctx_init == 0);
- LASSERT(req->rq_ctx_fini == 0);
-
- /* cleanup flags, which should be recalculated */
- req->rq_pack_udesc = 0;
- req->rq_pack_bulk = 0;
- break;
- }
-
- sec = req->rq_cli_ctx->cc_sec;
-
- spin_lock(&sec->ps_lock);
- req->rq_flvr = sec->ps_flvr;
- spin_unlock(&sec->ps_lock);
-
- /* force SVC_NULL for context initiation rpc, SVC_INTG for context
- * destruction rpc
- */
- if (unlikely(req->rq_ctx_init))
- flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_NULL);
- else if (unlikely(req->rq_ctx_fini))
- flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_INTG);
-
- /* user descriptor flag, null security can't do it anyway */
- if ((sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_UDESC) &&
- (req->rq_flvr.sf_rpc != SPTLRPC_FLVR_NULL))
- req->rq_pack_udesc = 1;
-
- /* bulk security flag */
- if ((req->rq_bulk_read || req->rq_bulk_write) &&
- sptlrpc_flavor_has_bulk(&req->rq_flvr))
- req->rq_pack_bulk = 1;
-}
-
-void sptlrpc_request_out_callback(struct ptlrpc_request *req)
-{
- if (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc) != SPTLRPC_SVC_PRIV)
- return;
-
- LASSERT(req->rq_clrbuf);
- if (req->rq_pool || !req->rq_reqbuf)
- return;
-
- kvfree(req->rq_reqbuf);
- req->rq_reqbuf = NULL;
- req->rq_reqbuf_len = 0;
-}
-
-/**
- * Given an import \a imp, check whether current user has a valid context
- * or not. We may create a new context and try to refresh it, and try
- * repeatedly try in case of non-fatal errors. Return 0 means success.
- */
-int sptlrpc_import_check_ctx(struct obd_import *imp)
-{
- struct ptlrpc_sec *sec;
- struct ptlrpc_cli_ctx *ctx;
- struct ptlrpc_request *req = NULL;
- int rc;
-
- might_sleep();
-
- sec = sptlrpc_import_sec_ref(imp);
- ctx = get_my_ctx(sec);
- sptlrpc_sec_put(sec);
-
- if (!ctx)
- return -ENOMEM;
-
- if (cli_ctx_is_eternal(ctx) ||
- ctx->cc_ops->validate(ctx) == 0) {
- sptlrpc_cli_ctx_put(ctx, 1);
- return 0;
- }
-
- if (cli_ctx_is_error(ctx)) {
- sptlrpc_cli_ctx_put(ctx, 1);
- return -EACCES;
- }
-
- req = ptlrpc_request_cache_alloc(GFP_NOFS);
- if (!req)
- return -ENOMEM;
-
- ptlrpc_cli_req_init(req);
- atomic_set(&req->rq_refcount, 10000);
-
- req->rq_import = imp;
- req->rq_flvr = sec->ps_flvr;
- req->rq_cli_ctx = ctx;
-
- rc = sptlrpc_req_refresh_ctx(req, 0);
- LASSERT(list_empty(&req->rq_ctx_chain));
- sptlrpc_cli_ctx_put(req->rq_cli_ctx, 1);
- ptlrpc_request_cache_free(req);
-
- return rc;
-}
-
-/**
- * Used by ptlrpc client, to perform the pre-defined security transformation
- * upon the request message of \a req. After this function called,
- * req->rq_reqmsg is still accessible as clear text.
- */
-int sptlrpc_cli_wrap_request(struct ptlrpc_request *req)
-{
- struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
- int rc = 0;
-
- LASSERT(ctx);
- LASSERT(ctx->cc_sec);
- LASSERT(req->rq_reqbuf || req->rq_clrbuf);
-
- /* we wrap bulk request here because now we can be sure
- * the context is uptodate.
- */
- if (req->rq_bulk) {
- rc = sptlrpc_cli_wrap_bulk(req, req->rq_bulk);
- if (rc)
- return rc;
- }
-
- switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
- case SPTLRPC_SVC_NULL:
- case SPTLRPC_SVC_AUTH:
- case SPTLRPC_SVC_INTG:
- LASSERT(ctx->cc_ops->sign);
- rc = ctx->cc_ops->sign(ctx, req);
- break;
- case SPTLRPC_SVC_PRIV:
- LASSERT(ctx->cc_ops->seal);
- rc = ctx->cc_ops->seal(ctx, req);
- break;
- default:
- LBUG();
- }
-
- if (rc == 0) {
- LASSERT(req->rq_reqdata_len);
- LASSERT(req->rq_reqdata_len % 8 == 0);
- LASSERT(req->rq_reqdata_len <= req->rq_reqbuf_len);
- }
-
- return rc;
-}
-
-static int do_cli_unwrap_reply(struct ptlrpc_request *req)
-{
- struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
- int rc;
-
- LASSERT(ctx);
- LASSERT(ctx->cc_sec);
- LASSERT(req->rq_repbuf);
- LASSERT(req->rq_repdata);
- LASSERT(!req->rq_repmsg);
-
- req->rq_rep_swab_mask = 0;
-
- rc = __lustre_unpack_msg(req->rq_repdata, req->rq_repdata_len);
- switch (rc) {
- case 1:
- lustre_set_rep_swabbed(req, MSG_PTLRPC_HEADER_OFF);
- case 0:
- break;
- default:
- CERROR("failed unpack reply: x%llu\n", req->rq_xid);
- return -EPROTO;
- }
-
- if (req->rq_repdata_len < sizeof(struct lustre_msg)) {
- CERROR("replied data length %d too small\n",
- req->rq_repdata_len);
- return -EPROTO;
- }
-
- if (SPTLRPC_FLVR_POLICY(req->rq_repdata->lm_secflvr) !=
- SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc)) {
- CERROR("reply policy %u doesn't match request policy %u\n",
- SPTLRPC_FLVR_POLICY(req->rq_repdata->lm_secflvr),
- SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc));
- return -EPROTO;
- }
-
- switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
- case SPTLRPC_SVC_NULL:
- case SPTLRPC_SVC_AUTH:
- case SPTLRPC_SVC_INTG:
- LASSERT(ctx->cc_ops->verify);
- rc = ctx->cc_ops->verify(ctx, req);
- break;
- case SPTLRPC_SVC_PRIV:
- LASSERT(ctx->cc_ops->unseal);
- rc = ctx->cc_ops->unseal(ctx, req);
- break;
- default:
- LBUG();
- }
- LASSERT(rc || req->rq_repmsg || req->rq_resend);
-
- if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL &&
- !req->rq_ctx_init)
- req->rq_rep_swab_mask = 0;
- return rc;
-}
-
-/**
- * Used by ptlrpc client, to perform security transformation upon the reply
- * message of \a req. After return successfully, req->rq_repmsg points to
- * the reply message in clear text.
- *
- * \pre the reply buffer should have been un-posted from LNet, so nothing is
- * going to change.
- */
-int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req)
-{
- LASSERT(req->rq_repbuf);
- LASSERT(!req->rq_repdata);
- LASSERT(!req->rq_repmsg);
- LASSERT(req->rq_reply_off + req->rq_nob_received <= req->rq_repbuf_len);
-
- if (req->rq_reply_off == 0 &&
- (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) {
- CERROR("real reply with offset 0\n");
- return -EPROTO;
- }
-
- if (req->rq_reply_off % 8 != 0) {
- CERROR("reply at odd offset %u\n", req->rq_reply_off);
- return -EPROTO;
- }
-
- req->rq_repdata = (struct lustre_msg *)
- (req->rq_repbuf + req->rq_reply_off);
- req->rq_repdata_len = req->rq_nob_received;
-
- return do_cli_unwrap_reply(req);
-}
-
-/**
- * Used by ptlrpc client, to perform security transformation upon the early
- * reply message of \a req. We expect the rq_reply_off is 0, and
- * rq_nob_received is the early reply size.
- *
- * Because the receive buffer might be still posted, the reply data might be
- * changed at any time, no matter we're holding rq_lock or not. For this reason
- * we allocate a separate ptlrpc_request and reply buffer for early reply
- * processing.
- *
- * \retval 0 success, \a req_ret is filled with a duplicated ptlrpc_request.
- * Later the caller must call sptlrpc_cli_finish_early_reply() on the returned
- * \a *req_ret to release it.
- * \retval -ev error number, and \a req_ret will not be set.
- */
-int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req,
- struct ptlrpc_request **req_ret)
-{
- struct ptlrpc_request *early_req;
- char *early_buf;
- int early_bufsz, early_size;
- int rc;
-
- early_req = ptlrpc_request_cache_alloc(GFP_NOFS);
- if (!early_req)
- return -ENOMEM;
-
- ptlrpc_cli_req_init(early_req);
-
- early_size = req->rq_nob_received;
- early_bufsz = size_roundup_power2(early_size);
- early_buf = kvzalloc(early_bufsz, GFP_NOFS);
- if (!early_buf) {
- rc = -ENOMEM;
- goto err_req;
- }
-
- /* sanity checkings and copy data out, do it inside spinlock */
- spin_lock(&req->rq_lock);
-
- if (req->rq_replied) {
- spin_unlock(&req->rq_lock);
- rc = -EALREADY;
- goto err_buf;
- }
-
- LASSERT(req->rq_repbuf);
- LASSERT(!req->rq_repdata);
- LASSERT(!req->rq_repmsg);
-
- if (req->rq_reply_off != 0) {
- CERROR("early reply with offset %u\n", req->rq_reply_off);
- spin_unlock(&req->rq_lock);
- rc = -EPROTO;
- goto err_buf;
- }
-
- if (req->rq_nob_received != early_size) {
- /* even another early arrived the size should be the same */
- CERROR("data size has changed from %u to %u\n",
- early_size, req->rq_nob_received);
- spin_unlock(&req->rq_lock);
- rc = -EINVAL;
- goto err_buf;
- }
-
- if (req->rq_nob_received < sizeof(struct lustre_msg)) {
- CERROR("early reply length %d too small\n",
- req->rq_nob_received);
- spin_unlock(&req->rq_lock);
- rc = -EALREADY;
- goto err_buf;
- }
-
- memcpy(early_buf, req->rq_repbuf, early_size);
- spin_unlock(&req->rq_lock);
-
- early_req->rq_cli_ctx = sptlrpc_cli_ctx_get(req->rq_cli_ctx);
- early_req->rq_flvr = req->rq_flvr;
- early_req->rq_repbuf = early_buf;
- early_req->rq_repbuf_len = early_bufsz;
- early_req->rq_repdata = (struct lustre_msg *)early_buf;
- early_req->rq_repdata_len = early_size;
- early_req->rq_early = 1;
- early_req->rq_reqmsg = req->rq_reqmsg;
-
- rc = do_cli_unwrap_reply(early_req);
- if (rc) {
- DEBUG_REQ(D_ADAPTTO, early_req,
- "error %d unwrap early reply", rc);
- goto err_ctx;
- }
-
- LASSERT(early_req->rq_repmsg);
- *req_ret = early_req;
- return 0;
-
-err_ctx:
- sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1);
-err_buf:
- kvfree(early_buf);
-err_req:
- ptlrpc_request_cache_free(early_req);
- return rc;
-}
-
-/**
- * Used by ptlrpc client, to release a processed early reply \a early_req.
- *
- * \pre \a early_req was obtained from calling sptlrpc_cli_unwrap_early_reply().
- */
-void sptlrpc_cli_finish_early_reply(struct ptlrpc_request *early_req)
-{
- LASSERT(early_req->rq_repbuf);
- LASSERT(early_req->rq_repdata);
- LASSERT(early_req->rq_repmsg);
-
- sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1);
- kvfree(early_req->rq_repbuf);
- ptlrpc_request_cache_free(early_req);
-}
-
-/**************************************************
- * sec ID *
- **************************************************/
-
-/*
- * "fixed" sec (e.g. null) use sec_id < 0
- */
-static atomic_t sptlrpc_sec_id = ATOMIC_INIT(1);
-
-int sptlrpc_get_next_secid(void)
-{
- return atomic_inc_return(&sptlrpc_sec_id);
-}
-EXPORT_SYMBOL(sptlrpc_get_next_secid);
-
-/**************************************************
- * client side high-level security APIs *
- **************************************************/
-
-static int sec_cop_flush_ctx_cache(struct ptlrpc_sec *sec, uid_t uid,
- int grace, int force)
-{
- struct ptlrpc_sec_policy *policy = sec->ps_policy;
-
- LASSERT(policy->sp_cops);
- LASSERT(policy->sp_cops->flush_ctx_cache);
-
- return policy->sp_cops->flush_ctx_cache(sec, uid, grace, force);
-}
-
-static void sec_cop_destroy_sec(struct ptlrpc_sec *sec)
-{
- struct ptlrpc_sec_policy *policy = sec->ps_policy;
-
- LASSERT_ATOMIC_ZERO(&sec->ps_refcount);
- LASSERT_ATOMIC_ZERO(&sec->ps_nctx);
- LASSERT(policy->sp_cops->destroy_sec);
-
- CDEBUG(D_SEC, "%s@%p: being destroyed\n", sec->ps_policy->sp_name, sec);
-
- policy->sp_cops->destroy_sec(sec);
- sptlrpc_policy_put(policy);
-}
-
-static void sptlrpc_sec_kill(struct ptlrpc_sec *sec)
-{
- LASSERT_ATOMIC_POS(&sec->ps_refcount);
-
- if (sec->ps_policy->sp_cops->kill_sec) {
- sec->ps_policy->sp_cops->kill_sec(sec);
-
- sec_cop_flush_ctx_cache(sec, -1, 1, 1);
- }
-}
-
-static struct ptlrpc_sec *sptlrpc_sec_get(struct ptlrpc_sec *sec)
-{
- if (sec)
- atomic_inc(&sec->ps_refcount);
-
- return sec;
-}
-
-void sptlrpc_sec_put(struct ptlrpc_sec *sec)
-{
- if (sec) {
- LASSERT_ATOMIC_POS(&sec->ps_refcount);
-
- if (atomic_dec_and_test(&sec->ps_refcount)) {
- sptlrpc_gc_del_sec(sec);
- sec_cop_destroy_sec(sec);
- }
- }
-}
-EXPORT_SYMBOL(sptlrpc_sec_put);
-
-/*
- * policy module is responsible for taking reference of import
- */
-static
-struct ptlrpc_sec *sptlrpc_sec_create(struct obd_import *imp,
- struct ptlrpc_svc_ctx *svc_ctx,
- struct sptlrpc_flavor *sf,
- enum lustre_sec_part sp)
-{
- struct ptlrpc_sec_policy *policy;
- struct ptlrpc_sec *sec;
- char str[32];
-
- if (svc_ctx) {
- LASSERT(imp->imp_dlm_fake == 1);
-
- CDEBUG(D_SEC, "%s %s: reverse sec using flavor %s\n",
- imp->imp_obd->obd_type->typ_name,
- imp->imp_obd->obd_name,
- sptlrpc_flavor2name(sf, str, sizeof(str)));
-
- policy = sptlrpc_policy_get(svc_ctx->sc_policy);
- sf->sf_flags |= PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY;
- } else {
- LASSERT(imp->imp_dlm_fake == 0);
-
- CDEBUG(D_SEC, "%s %s: select security flavor %s\n",
- imp->imp_obd->obd_type->typ_name,
- imp->imp_obd->obd_name,
- sptlrpc_flavor2name(sf, str, sizeof(str)));
-
- policy = sptlrpc_wireflavor2policy(sf->sf_rpc);
- if (!policy) {
- CERROR("invalid flavor 0x%x\n", sf->sf_rpc);
- return NULL;
- }
- }
-
- sec = policy->sp_cops->create_sec(imp, svc_ctx, sf);
- if (sec) {
- atomic_inc(&sec->ps_refcount);
-
- sec->ps_part = sp;
-
- if (sec->ps_gc_interval && policy->sp_cops->gc_ctx)
- sptlrpc_gc_add_sec(sec);
- } else {
- sptlrpc_policy_put(policy);
- }
-
- return sec;
-}
-
-struct ptlrpc_sec *sptlrpc_import_sec_ref(struct obd_import *imp)
-{
- struct ptlrpc_sec *sec;
-
- spin_lock(&imp->imp_lock);
- sec = sptlrpc_sec_get(imp->imp_sec);
- spin_unlock(&imp->imp_lock);
-
- return sec;
-}
-EXPORT_SYMBOL(sptlrpc_import_sec_ref);
-
-static void sptlrpc_import_sec_install(struct obd_import *imp,
- struct ptlrpc_sec *sec)
-{
- struct ptlrpc_sec *old_sec;
-
- LASSERT_ATOMIC_POS(&sec->ps_refcount);
-
- spin_lock(&imp->imp_lock);
- old_sec = imp->imp_sec;
- imp->imp_sec = sec;
- spin_unlock(&imp->imp_lock);
-
- if (old_sec) {
- sptlrpc_sec_kill(old_sec);
-
- /* balance the ref taken by this import */
- sptlrpc_sec_put(old_sec);
- }
-}
-
-static inline
-int flavor_equal(struct sptlrpc_flavor *sf1, struct sptlrpc_flavor *sf2)
-{
- return (memcmp(sf1, sf2, sizeof(*sf1)) == 0);
-}
-
-static inline
-void flavor_copy(struct sptlrpc_flavor *dst, struct sptlrpc_flavor *src)
-{
- *dst = *src;
-}
-
-static void sptlrpc_import_sec_adapt_inplace(struct obd_import *imp,
- struct ptlrpc_sec *sec,
- struct sptlrpc_flavor *sf)
-{
- char str1[32], str2[32];
-
- if (sec->ps_flvr.sf_flags != sf->sf_flags)
- CDEBUG(D_SEC, "changing sec flags: %s -> %s\n",
- sptlrpc_secflags2str(sec->ps_flvr.sf_flags,
- str1, sizeof(str1)),
- sptlrpc_secflags2str(sf->sf_flags,
- str2, sizeof(str2)));
-
- spin_lock(&sec->ps_lock);
- flavor_copy(&sec->ps_flvr, sf);
- spin_unlock(&sec->ps_lock);
-}
-
-/**
- * To get an appropriate ptlrpc_sec for the \a imp, according to the current
- * configuration. Upon called, imp->imp_sec may or may not be NULL.
- *
- * - regular import: \a svc_ctx should be NULL and \a flvr is ignored;
- * - reverse import: \a svc_ctx and \a flvr are obtained from incoming request.
- */
-int sptlrpc_import_sec_adapt(struct obd_import *imp,
- struct ptlrpc_svc_ctx *svc_ctx,
- struct sptlrpc_flavor *flvr)
-{
- struct ptlrpc_connection *conn;
- struct sptlrpc_flavor sf;
- struct ptlrpc_sec *sec, *newsec;
- enum lustre_sec_part sp;
- char str[24];
- int rc = 0;
-
- might_sleep();
-
- if (!imp)
- return 0;
-
- conn = imp->imp_connection;
-
- if (!svc_ctx) {
- struct client_obd *cliobd = &imp->imp_obd->u.cli;
- /*
- * normal import, determine flavor from rule set, except
- * for mgc the flavor is predetermined.
- */
- if (cliobd->cl_sp_me == LUSTRE_SP_MGC)
- sf = cliobd->cl_flvr_mgc;
- else
- sptlrpc_conf_choose_flavor(cliobd->cl_sp_me,
- cliobd->cl_sp_to,
- &cliobd->cl_target_uuid,
- conn->c_self, &sf);
-
- sp = imp->imp_obd->u.cli.cl_sp_me;
- } else {
- /* reverse import, determine flavor from incoming request */
- sf = *flvr;
-
- if (sf.sf_rpc != SPTLRPC_FLVR_NULL)
- sf.sf_flags = PTLRPC_SEC_FL_REVERSE |
- PTLRPC_SEC_FL_ROOTONLY;
-
- sp = sptlrpc_target_sec_part(imp->imp_obd);
- }
-
- sec = sptlrpc_import_sec_ref(imp);
- if (sec) {
- char str2[24];
-
- if (flavor_equal(&sf, &sec->ps_flvr))
- goto out;
-
- CDEBUG(D_SEC, "import %s->%s: changing flavor %s -> %s\n",
- imp->imp_obd->obd_name,
- obd_uuid2str(&conn->c_remote_uuid),
- sptlrpc_flavor2name(&sec->ps_flvr, str, sizeof(str)),
- sptlrpc_flavor2name(&sf, str2, sizeof(str2)));
-
- if (SPTLRPC_FLVR_POLICY(sf.sf_rpc) ==
- SPTLRPC_FLVR_POLICY(sec->ps_flvr.sf_rpc) &&
- SPTLRPC_FLVR_MECH(sf.sf_rpc) ==
- SPTLRPC_FLVR_MECH(sec->ps_flvr.sf_rpc)) {
- sptlrpc_import_sec_adapt_inplace(imp, sec, &sf);
- goto out;
- }
- } else if (SPTLRPC_FLVR_BASE(sf.sf_rpc) !=
- SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_NULL)) {
- CDEBUG(D_SEC, "import %s->%s netid %x: select flavor %s\n",
- imp->imp_obd->obd_name,
- obd_uuid2str(&conn->c_remote_uuid),
- LNET_NIDNET(conn->c_self),
- sptlrpc_flavor2name(&sf, str, sizeof(str)));
- }
-
- mutex_lock(&imp->imp_sec_mutex);
-
- newsec = sptlrpc_sec_create(imp, svc_ctx, &sf, sp);
- if (newsec) {
- sptlrpc_import_sec_install(imp, newsec);
- } else {
- CERROR("import %s->%s: failed to create new sec\n",
- imp->imp_obd->obd_name,
- obd_uuid2str(&conn->c_remote_uuid));
- rc = -EPERM;
- }
-
- mutex_unlock(&imp->imp_sec_mutex);
-out:
- sptlrpc_sec_put(sec);
- return rc;
-}
-
-void sptlrpc_import_sec_put(struct obd_import *imp)
-{
- if (imp->imp_sec) {
- sptlrpc_sec_kill(imp->imp_sec);
-
- sptlrpc_sec_put(imp->imp_sec);
- imp->imp_sec = NULL;
- }
-}
-
-static void import_flush_ctx_common(struct obd_import *imp,
- uid_t uid, int grace, int force)
-{
- struct ptlrpc_sec *sec;
-
- if (!imp)
- return;
-
- sec = sptlrpc_import_sec_ref(imp);
- if (!sec)
- return;
-
- sec_cop_flush_ctx_cache(sec, uid, grace, force);
- sptlrpc_sec_put(sec);
-}
-
-void sptlrpc_import_flush_my_ctx(struct obd_import *imp)
-{
- import_flush_ctx_common(imp, from_kuid(&init_user_ns, current_uid()),
- 1, 1);
-}
-EXPORT_SYMBOL(sptlrpc_import_flush_my_ctx);
-
-void sptlrpc_import_flush_all_ctx(struct obd_import *imp)
-{
- import_flush_ctx_common(imp, -1, 1, 1);
-}
-EXPORT_SYMBOL(sptlrpc_import_flush_all_ctx);
-
-/**
- * Used by ptlrpc client to allocate request buffer of \a req. Upon return
- * successfully, req->rq_reqmsg points to a buffer with size \a msgsize.
- */
-int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize)
-{
- struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
- struct ptlrpc_sec_policy *policy;
- int rc;
-
- LASSERT(ctx);
- LASSERT(ctx->cc_sec);
- LASSERT(ctx->cc_sec->ps_policy);
- LASSERT(!req->rq_reqmsg);
- LASSERT_ATOMIC_POS(&ctx->cc_refcount);
-
- policy = ctx->cc_sec->ps_policy;
- rc = policy->sp_cops->alloc_reqbuf(ctx->cc_sec, req, msgsize);
- if (!rc) {
- LASSERT(req->rq_reqmsg);
- LASSERT(req->rq_reqbuf || req->rq_clrbuf);
-
- /* zeroing preallocated buffer */
- if (req->rq_pool)
- memset(req->rq_reqmsg, 0, msgsize);
- }
-
- return rc;
-}
-
-/**
- * Used by ptlrpc client to free request buffer of \a req. After this
- * req->rq_reqmsg is set to NULL and should not be accessed anymore.
- */
-void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req)
-{
- struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
- struct ptlrpc_sec_policy *policy;
-
- LASSERT(ctx);
- LASSERT(ctx->cc_sec);
- LASSERT(ctx->cc_sec->ps_policy);
- LASSERT_ATOMIC_POS(&ctx->cc_refcount);
-
- if (!req->rq_reqbuf && !req->rq_clrbuf)
- return;
-
- policy = ctx->cc_sec->ps_policy;
- policy->sp_cops->free_reqbuf(ctx->cc_sec, req);
- req->rq_reqmsg = NULL;
-}
-
-/*
- * NOTE caller must guarantee the buffer size is enough for the enlargement
- */
-void _sptlrpc_enlarge_msg_inplace(struct lustre_msg *msg,
- int segment, int newsize)
-{
- void *src, *dst;
- int oldsize, oldmsg_size, movesize;
-
- LASSERT(segment < msg->lm_bufcount);
- LASSERT(msg->lm_buflens[segment] <= newsize);
-
- if (msg->lm_buflens[segment] == newsize)
- return;
-
- /* nothing to do if we are enlarging the last segment */
- if (segment == msg->lm_bufcount - 1) {
- msg->lm_buflens[segment] = newsize;
- return;
- }
-
- oldsize = msg->lm_buflens[segment];
-
- src = lustre_msg_buf(msg, segment + 1, 0);
- msg->lm_buflens[segment] = newsize;
- dst = lustre_msg_buf(msg, segment + 1, 0);
- msg->lm_buflens[segment] = oldsize;
-
- /* move from segment + 1 to end segment */
- LASSERT(msg->lm_magic == LUSTRE_MSG_MAGIC_V2);
- oldmsg_size = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
- movesize = oldmsg_size - ((unsigned long)src - (unsigned long)msg);
- LASSERT(movesize >= 0);
-
- if (movesize)
- memmove(dst, src, movesize);
-
- /* note we don't clear the ares where old data live, not secret */
-
- /* finally set new segment size */
- msg->lm_buflens[segment] = newsize;
-}
-EXPORT_SYMBOL(_sptlrpc_enlarge_msg_inplace);
-
-/**
- * Used by ptlrpc client to enlarge the \a segment of request message pointed
- * by req->rq_reqmsg to size \a newsize, all previously filled-in data will be
- * preserved after the enlargement. this must be called after original request
- * buffer being allocated.
- *
- * \note after this be called, rq_reqmsg and rq_reqlen might have been changed,
- * so caller should refresh its local pointers if needed.
- */
-int sptlrpc_cli_enlarge_reqbuf(struct ptlrpc_request *req,
- int segment, int newsize)
-{
- struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
- struct ptlrpc_sec_cops *cops;
- struct lustre_msg *msg = req->rq_reqmsg;
-
- LASSERT(ctx);
- LASSERT(msg);
- LASSERT(msg->lm_bufcount > segment);
- LASSERT(msg->lm_buflens[segment] <= newsize);
-
- if (msg->lm_buflens[segment] == newsize)
- return 0;
-
- cops = ctx->cc_sec->ps_policy->sp_cops;
- LASSERT(cops->enlarge_reqbuf);
- return cops->enlarge_reqbuf(ctx->cc_sec, req, segment, newsize);
-}
-EXPORT_SYMBOL(sptlrpc_cli_enlarge_reqbuf);
-
-/**
- * Used by ptlrpc client to allocate reply buffer of \a req.
- *
- * \note After this, req->rq_repmsg is still not accessible.
- */
-int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize)
-{
- struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
- struct ptlrpc_sec_policy *policy;
-
- LASSERT(ctx);
- LASSERT(ctx->cc_sec);
- LASSERT(ctx->cc_sec->ps_policy);
-
- if (req->rq_repbuf)
- return 0;
-
- policy = ctx->cc_sec->ps_policy;
- return policy->sp_cops->alloc_repbuf(ctx->cc_sec, req, msgsize);
-}
-
-/**
- * Used by ptlrpc client to free reply buffer of \a req. After this
- * req->rq_repmsg is set to NULL and should not be accessed anymore.
- */
-void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req)
-{
- struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
- struct ptlrpc_sec_policy *policy;
-
- LASSERT(ctx);
- LASSERT(ctx->cc_sec);
- LASSERT(ctx->cc_sec->ps_policy);
- LASSERT_ATOMIC_POS(&ctx->cc_refcount);
-
- if (!req->rq_repbuf)
- return;
- LASSERT(req->rq_repbuf_len);
-
- policy = ctx->cc_sec->ps_policy;
- policy->sp_cops->free_repbuf(ctx->cc_sec, req);
- req->rq_repmsg = NULL;
-}
-
-static int sptlrpc_svc_install_rvs_ctx(struct obd_import *imp,
- struct ptlrpc_svc_ctx *ctx)
-{
- struct ptlrpc_sec_policy *policy = ctx->sc_policy;
-
- if (!policy->sp_sops->install_rctx)
- return 0;
- return policy->sp_sops->install_rctx(imp, ctx);
-}
-
-/****************************************
- * server side security *
- ****************************************/
-
-static int flavor_allowed(struct sptlrpc_flavor *exp,
- struct ptlrpc_request *req)
-{
- struct sptlrpc_flavor *flvr = &req->rq_flvr;
-
- if (exp->sf_rpc == SPTLRPC_FLVR_ANY || exp->sf_rpc == flvr->sf_rpc)
- return 1;
-
- if ((req->rq_ctx_init || req->rq_ctx_fini) &&
- SPTLRPC_FLVR_POLICY(exp->sf_rpc) ==
- SPTLRPC_FLVR_POLICY(flvr->sf_rpc) &&
- SPTLRPC_FLVR_MECH(exp->sf_rpc) == SPTLRPC_FLVR_MECH(flvr->sf_rpc))
- return 1;
-
- return 0;
-}
-
-#define EXP_FLVR_UPDATE_EXPIRE (OBD_TIMEOUT_DEFAULT + 10)
-
-/**
- * Given an export \a exp, check whether the flavor of incoming \a req
- * is allowed by the export \a exp. Main logic is about taking care of
- * changing configurations. Return 0 means success.
- */
-int sptlrpc_target_export_check(struct obd_export *exp,
- struct ptlrpc_request *req)
-{
- struct sptlrpc_flavor flavor;
-
- if (!exp)
- return 0;
-
- /* client side export has no imp_reverse, skip
- * FIXME maybe we should check flavor this as well???
- */
- if (!exp->exp_imp_reverse)
- return 0;
-
- /* don't care about ctx fini rpc */
- if (req->rq_ctx_fini)
- return 0;
-
- spin_lock(&exp->exp_lock);
-
- /* if flavor just changed (exp->exp_flvr_changed != 0), we wait for
- * the first req with the new flavor, then treat it as current flavor,
- * adapt reverse sec according to it.
- * note the first rpc with new flavor might not be with root ctx, in
- * which case delay the sec_adapt by leaving exp_flvr_adapt == 1.
- */
- if (unlikely(exp->exp_flvr_changed) &&
- flavor_allowed(&exp->exp_flvr_old[1], req)) {
- /* make the new flavor as "current", and old ones as
- * about-to-expire
- */
- CDEBUG(D_SEC, "exp %p: just changed: %x->%x\n", exp,
- exp->exp_flvr.sf_rpc, exp->exp_flvr_old[1].sf_rpc);
- flavor = exp->exp_flvr_old[1];
- exp->exp_flvr_old[1] = exp->exp_flvr_old[0];
- exp->exp_flvr_expire[1] = exp->exp_flvr_expire[0];
- exp->exp_flvr_old[0] = exp->exp_flvr;
- exp->exp_flvr_expire[0] = ktime_get_real_seconds() +
- EXP_FLVR_UPDATE_EXPIRE;
- exp->exp_flvr = flavor;
-
- /* flavor change finished */
- exp->exp_flvr_changed = 0;
- LASSERT(exp->exp_flvr_adapt == 1);
-
- /* if it's gss, we only interested in root ctx init */
- if (req->rq_auth_gss &&
- !(req->rq_ctx_init &&
- (req->rq_auth_usr_root || req->rq_auth_usr_mdt ||
- req->rq_auth_usr_ost))) {
- spin_unlock(&exp->exp_lock);
- CDEBUG(D_SEC, "is good but not root(%d:%d:%d:%d:%d)\n",
- req->rq_auth_gss, req->rq_ctx_init,
- req->rq_auth_usr_root, req->rq_auth_usr_mdt,
- req->rq_auth_usr_ost);
- return 0;
- }
-
- exp->exp_flvr_adapt = 0;
- spin_unlock(&exp->exp_lock);
-
- return sptlrpc_import_sec_adapt(exp->exp_imp_reverse,
- req->rq_svc_ctx, &flavor);
- }
-
- /* if it equals to the current flavor, we accept it, but need to
- * dealing with reverse sec/ctx
- */
- if (likely(flavor_allowed(&exp->exp_flvr, req))) {
- /* most cases should return here, we only interested in
- * gss root ctx init
- */
- if (!req->rq_auth_gss || !req->rq_ctx_init ||
- (!req->rq_auth_usr_root && !req->rq_auth_usr_mdt &&
- !req->rq_auth_usr_ost)) {
- spin_unlock(&exp->exp_lock);
- return 0;
- }
-
- /* if flavor just changed, we should not proceed, just leave
- * it and current flavor will be discovered and replaced
- * shortly, and let _this_ rpc pass through
- */
- if (exp->exp_flvr_changed) {
- LASSERT(exp->exp_flvr_adapt);
- spin_unlock(&exp->exp_lock);
- return 0;
- }
-
- if (exp->exp_flvr_adapt) {
- exp->exp_flvr_adapt = 0;
- CDEBUG(D_SEC, "exp %p (%x|%x|%x): do delayed adapt\n",
- exp, exp->exp_flvr.sf_rpc,
- exp->exp_flvr_old[0].sf_rpc,
- exp->exp_flvr_old[1].sf_rpc);
- flavor = exp->exp_flvr;
- spin_unlock(&exp->exp_lock);
-
- return sptlrpc_import_sec_adapt(exp->exp_imp_reverse,
- req->rq_svc_ctx,
- &flavor);
- } else {
- CDEBUG(D_SEC, "exp %p (%x|%x|%x): is current flavor, install rvs ctx\n",
- exp, exp->exp_flvr.sf_rpc,
- exp->exp_flvr_old[0].sf_rpc,
- exp->exp_flvr_old[1].sf_rpc);
- spin_unlock(&exp->exp_lock);
-
- return sptlrpc_svc_install_rvs_ctx(exp->exp_imp_reverse,
- req->rq_svc_ctx);
- }
- }
-
- if (exp->exp_flvr_expire[0]) {
- if (exp->exp_flvr_expire[0] >= ktime_get_real_seconds()) {
- if (flavor_allowed(&exp->exp_flvr_old[0], req)) {
- CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the middle one (%lld)\n", exp,
- exp->exp_flvr.sf_rpc,
- exp->exp_flvr_old[0].sf_rpc,
- exp->exp_flvr_old[1].sf_rpc,
- (s64)(exp->exp_flvr_expire[0] -
- ktime_get_real_seconds()));
- spin_unlock(&exp->exp_lock);
- return 0;
- }
- } else {
- CDEBUG(D_SEC, "mark middle expired\n");
- exp->exp_flvr_expire[0] = 0;
- }
- CDEBUG(D_SEC, "exp %p (%x|%x|%x): %x not match middle\n", exp,
- exp->exp_flvr.sf_rpc,
- exp->exp_flvr_old[0].sf_rpc, exp->exp_flvr_old[1].sf_rpc,
- req->rq_flvr.sf_rpc);
- }
-
- /* now it doesn't match the current flavor, the only chance we can
- * accept it is match the old flavors which is not expired.
- */
- if (exp->exp_flvr_changed == 0 && exp->exp_flvr_expire[1]) {
- if (exp->exp_flvr_expire[1] >= ktime_get_real_seconds()) {
- if (flavor_allowed(&exp->exp_flvr_old[1], req)) {
- CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the oldest one (%lld)\n",
- exp,
- exp->exp_flvr.sf_rpc,
- exp->exp_flvr_old[0].sf_rpc,
- exp->exp_flvr_old[1].sf_rpc,
- (s64)(exp->exp_flvr_expire[1] -
- ktime_get_real_seconds()));
- spin_unlock(&exp->exp_lock);
- return 0;
- }
- } else {
- CDEBUG(D_SEC, "mark oldest expired\n");
- exp->exp_flvr_expire[1] = 0;
- }
- CDEBUG(D_SEC, "exp %p (%x|%x|%x): %x not match found\n",
- exp, exp->exp_flvr.sf_rpc,
- exp->exp_flvr_old[0].sf_rpc, exp->exp_flvr_old[1].sf_rpc,
- req->rq_flvr.sf_rpc);
- } else {
- CDEBUG(D_SEC, "exp %p (%x|%x|%x): skip the last one\n",
- exp, exp->exp_flvr.sf_rpc, exp->exp_flvr_old[0].sf_rpc,
- exp->exp_flvr_old[1].sf_rpc);
- }
-
- spin_unlock(&exp->exp_lock);
-
- CWARN("exp %p(%s): req %p (%u|%u|%u|%u|%u|%u) with unauthorized flavor %x, expect %x|%x(%+lld)|%x(%+lld)\n",
- exp, exp->exp_obd->obd_name,
- req, req->rq_auth_gss, req->rq_ctx_init, req->rq_ctx_fini,
- req->rq_auth_usr_root, req->rq_auth_usr_mdt, req->rq_auth_usr_ost,
- req->rq_flvr.sf_rpc,
- exp->exp_flvr.sf_rpc,
- exp->exp_flvr_old[0].sf_rpc,
- exp->exp_flvr_expire[0] ?
- (s64)(exp->exp_flvr_expire[0] - ktime_get_real_seconds()) : 0,
- exp->exp_flvr_old[1].sf_rpc,
- exp->exp_flvr_expire[1] ?
- (s64)(exp->exp_flvr_expire[1] - ktime_get_real_seconds()) : 0);
- return -EACCES;
-}
-EXPORT_SYMBOL(sptlrpc_target_export_check);
-
-static int sptlrpc_svc_check_from(struct ptlrpc_request *req, int svc_rc)
-{
- /* peer's claim is unreliable unless gss is being used */
- if (!req->rq_auth_gss || svc_rc == SECSVC_DROP)
- return svc_rc;
-
- switch (req->rq_sp_from) {
- case LUSTRE_SP_CLI:
- if (req->rq_auth_usr_mdt || req->rq_auth_usr_ost) {
- DEBUG_REQ(D_ERROR, req, "faked source CLI");
- svc_rc = SECSVC_DROP;
- }
- break;
- case LUSTRE_SP_MDT:
- if (!req->rq_auth_usr_mdt) {
- DEBUG_REQ(D_ERROR, req, "faked source MDT");
- svc_rc = SECSVC_DROP;
- }
- break;
- case LUSTRE_SP_OST:
- if (!req->rq_auth_usr_ost) {
- DEBUG_REQ(D_ERROR, req, "faked source OST");
- svc_rc = SECSVC_DROP;
- }
- break;
- case LUSTRE_SP_MGS:
- case LUSTRE_SP_MGC:
- if (!req->rq_auth_usr_root && !req->rq_auth_usr_mdt &&
- !req->rq_auth_usr_ost) {
- DEBUG_REQ(D_ERROR, req, "faked source MGC/MGS");
- svc_rc = SECSVC_DROP;
- }
- break;
- case LUSTRE_SP_ANY:
- default:
- DEBUG_REQ(D_ERROR, req, "invalid source %u", req->rq_sp_from);
- svc_rc = SECSVC_DROP;
- }
-
- return svc_rc;
-}
-
-/**
- * Used by ptlrpc server, to perform transformation upon request message of
- * incoming \a req. This must be the first thing to do with a incoming
- * request in ptlrpc layer.
- *
- * \retval SECSVC_OK success, and req->rq_reqmsg point to request message in
- * clear text, size is req->rq_reqlen; also req->rq_svc_ctx is set.
- * \retval SECSVC_COMPLETE success, the request has been fully processed, and
- * reply message has been prepared.
- * \retval SECSVC_DROP failed, this request should be dropped.
- */
-int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req)
-{
- struct ptlrpc_sec_policy *policy;
- struct lustre_msg *msg = req->rq_reqbuf;
- int rc;
-
- LASSERT(msg);
- LASSERT(!req->rq_reqmsg);
- LASSERT(!req->rq_repmsg);
- LASSERT(!req->rq_svc_ctx);
-
- req->rq_req_swab_mask = 0;
-
- rc = __lustre_unpack_msg(msg, req->rq_reqdata_len);
- switch (rc) {
- case 1:
- lustre_set_req_swabbed(req, MSG_PTLRPC_HEADER_OFF);
- case 0:
- break;
- default:
- CERROR("error unpacking request from %s x%llu\n",
- libcfs_id2str(req->rq_peer), req->rq_xid);
- return SECSVC_DROP;
- }
-
- req->rq_flvr.sf_rpc = WIRE_FLVR(msg->lm_secflvr);
- req->rq_sp_from = LUSTRE_SP_ANY;
- req->rq_auth_uid = -1;
- req->rq_auth_mapped_uid = -1;
-
- policy = sptlrpc_wireflavor2policy(req->rq_flvr.sf_rpc);
- if (!policy) {
- CERROR("unsupported rpc flavor %x\n", req->rq_flvr.sf_rpc);
- return SECSVC_DROP;
- }
-
- LASSERT(policy->sp_sops->accept);
- rc = policy->sp_sops->accept(req);
- sptlrpc_policy_put(policy);
- LASSERT(req->rq_reqmsg || rc != SECSVC_OK);
- LASSERT(req->rq_svc_ctx || rc == SECSVC_DROP);
-
- /*
- * if it's not null flavor (which means embedded packing msg),
- * reset the swab mask for the coming inner msg unpacking.
- */
- if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL)
- req->rq_req_swab_mask = 0;
-
- /* sanity check for the request source */
- rc = sptlrpc_svc_check_from(req, rc);
- return rc;
-}
-
-/**
- * Used by ptlrpc server, to allocate reply buffer for \a req. If succeed,
- * req->rq_reply_state is set, and req->rq_reply_state->rs_msg point to
- * a buffer of \a msglen size.
- */
-int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, int msglen)
-{
- struct ptlrpc_sec_policy *policy;
- struct ptlrpc_reply_state *rs;
- int rc;
-
- LASSERT(req->rq_svc_ctx);
- LASSERT(req->rq_svc_ctx->sc_policy);
-
- policy = req->rq_svc_ctx->sc_policy;
- LASSERT(policy->sp_sops->alloc_rs);
-
- rc = policy->sp_sops->alloc_rs(req, msglen);
- if (unlikely(rc == -ENOMEM)) {
- struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
-
- if (svcpt->scp_service->srv_max_reply_size <
- msglen + sizeof(struct ptlrpc_reply_state)) {
- /* Just return failure if the size is too big */
- CERROR("size of message is too big (%zd), %d allowed\n",
- msglen + sizeof(struct ptlrpc_reply_state),
- svcpt->scp_service->srv_max_reply_size);
- return -ENOMEM;
- }
-
- /* failed alloc, try emergency pool */
- rs = lustre_get_emerg_rs(svcpt);
- if (!rs)
- return -ENOMEM;
-
- req->rq_reply_state = rs;
- rc = policy->sp_sops->alloc_rs(req, msglen);
- if (rc) {
- lustre_put_emerg_rs(rs);
- req->rq_reply_state = NULL;
- }
- }
-
- LASSERT(rc != 0 ||
- (req->rq_reply_state && req->rq_reply_state->rs_msg));
-
- return rc;
-}
-
-/**
- * Used by ptlrpc server, to perform transformation upon reply message.
- *
- * \post req->rq_reply_off is set to appropriate server-controlled reply offset.
- * \post req->rq_repmsg and req->rq_reply_state->rs_msg becomes inaccessible.
- */
-int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req)
-{
- struct ptlrpc_sec_policy *policy;
- int rc;
-
- LASSERT(req->rq_svc_ctx);
- LASSERT(req->rq_svc_ctx->sc_policy);
-
- policy = req->rq_svc_ctx->sc_policy;
- LASSERT(policy->sp_sops->authorize);
-
- rc = policy->sp_sops->authorize(req);
- LASSERT(rc || req->rq_reply_state->rs_repdata_len);
-
- return rc;
-}
-
-/**
- * Used by ptlrpc server, to free reply_state.
- */
-void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs)
-{
- struct ptlrpc_sec_policy *policy;
- unsigned int prealloc;
-
- LASSERT(rs->rs_svc_ctx);
- LASSERT(rs->rs_svc_ctx->sc_policy);
-
- policy = rs->rs_svc_ctx->sc_policy;
- LASSERT(policy->sp_sops->free_rs);
-
- prealloc = rs->rs_prealloc;
- policy->sp_sops->free_rs(rs);
-
- if (prealloc)
- lustre_put_emerg_rs(rs);
-}
-
-void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req)
-{
- struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
-
- if (ctx)
- atomic_inc(&ctx->sc_refcount);
-}
-
-void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req)
-{
- struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
-
- if (!ctx)
- return;
-
- LASSERT_ATOMIC_POS(&ctx->sc_refcount);
- if (atomic_dec_and_test(&ctx->sc_refcount)) {
- if (ctx->sc_policy->sp_sops->free_ctx)
- ctx->sc_policy->sp_sops->free_ctx(ctx);
- }
- req->rq_svc_ctx = NULL;
-}
-
-/****************************************
- * bulk security *
- ****************************************/
-
-/**
- * Perform transformation upon bulk data pointed by \a desc. This is called
- * before transforming the request message.
- */
-int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
- struct ptlrpc_bulk_desc *desc)
-{
- struct ptlrpc_cli_ctx *ctx;
-
- LASSERT(req->rq_bulk_read || req->rq_bulk_write);
-
- if (!req->rq_pack_bulk)
- return 0;
-
- ctx = req->rq_cli_ctx;
- if (ctx->cc_ops->wrap_bulk)
- return ctx->cc_ops->wrap_bulk(ctx, req, desc);
- return 0;
-}
-EXPORT_SYMBOL(sptlrpc_cli_wrap_bulk);
-
-/**
- * This is called after unwrap the reply message.
- * return nob of actual plain text size received, or error code.
- */
-int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req,
- struct ptlrpc_bulk_desc *desc,
- int nob)
-{
- struct ptlrpc_cli_ctx *ctx;
- int rc;
-
- LASSERT(req->rq_bulk_read && !req->rq_bulk_write);
-
- if (!req->rq_pack_bulk)
- return desc->bd_nob_transferred;
-
- ctx = req->rq_cli_ctx;
- if (ctx->cc_ops->unwrap_bulk) {
- rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc);
- if (rc < 0)
- return rc;
- }
- return desc->bd_nob_transferred;
-}
-EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_read);
-
-/**
- * This is called after unwrap the reply message.
- * return 0 for success or error code.
- */
-int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req,
- struct ptlrpc_bulk_desc *desc)
-{
- struct ptlrpc_cli_ctx *ctx;
- int rc;
-
- LASSERT(!req->rq_bulk_read && req->rq_bulk_write);
-
- if (!req->rq_pack_bulk)
- return 0;
-
- ctx = req->rq_cli_ctx;
- if (ctx->cc_ops->unwrap_bulk) {
- rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc);
- if (rc < 0)
- return rc;
- }
-
- /*
- * if everything is going right, nob should equals to nob_transferred.
- * in case of privacy mode, nob_transferred needs to be adjusted.
- */
- if (desc->bd_nob != desc->bd_nob_transferred) {
- CERROR("nob %d doesn't match transferred nob %d\n",
- desc->bd_nob, desc->bd_nob_transferred);
- return -EPROTO;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_write);
-
-/****************************************
- * user descriptor helpers *
- ****************************************/
-
-int sptlrpc_current_user_desc_size(void)
-{
- int ngroups;
-
- ngroups = current_ngroups;
-
- if (ngroups > LUSTRE_MAX_GROUPS)
- ngroups = LUSTRE_MAX_GROUPS;
- return sptlrpc_user_desc_size(ngroups);
-}
-EXPORT_SYMBOL(sptlrpc_current_user_desc_size);
-
-int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset)
-{
- struct ptlrpc_user_desc *pud;
-
- pud = lustre_msg_buf(msg, offset, 0);
-
- if (!pud)
- return -EINVAL;
-
- pud->pud_uid = from_kuid(&init_user_ns, current_uid());
- pud->pud_gid = from_kgid(&init_user_ns, current_gid());
- pud->pud_fsuid = from_kuid(&init_user_ns, current_fsuid());
- pud->pud_fsgid = from_kgid(&init_user_ns, current_fsgid());
- pud->pud_cap = current_cap().cap[0];
- pud->pud_ngroups = (msg->lm_buflens[offset] - sizeof(*pud)) / 4;
-
- task_lock(current);
- if (pud->pud_ngroups > current_ngroups)
- pud->pud_ngroups = current_ngroups;
- memcpy(pud->pud_groups, current_cred()->group_info->gid,
- pud->pud_ngroups * sizeof(__u32));
- task_unlock(current);
-
- return 0;
-}
-EXPORT_SYMBOL(sptlrpc_pack_user_desc);
-
-int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset, int swabbed)
-{
- struct ptlrpc_user_desc *pud;
- int i;
-
- pud = lustre_msg_buf(msg, offset, sizeof(*pud));
- if (!pud)
- return -EINVAL;
-
- if (swabbed) {
- __swab32s(&pud->pud_uid);
- __swab32s(&pud->pud_gid);
- __swab32s(&pud->pud_fsuid);
- __swab32s(&pud->pud_fsgid);
- __swab32s(&pud->pud_cap);
- __swab32s(&pud->pud_ngroups);
- }
-
- if (pud->pud_ngroups > LUSTRE_MAX_GROUPS) {
- CERROR("%u groups is too large\n", pud->pud_ngroups);
- return -EINVAL;
- }
-
- if (sizeof(*pud) + pud->pud_ngroups * sizeof(__u32) >
- msg->lm_buflens[offset]) {
- CERROR("%u groups are claimed but bufsize only %u\n",
- pud->pud_ngroups, msg->lm_buflens[offset]);
- return -EINVAL;
- }
-
- if (swabbed) {
- for (i = 0; i < pud->pud_ngroups; i++)
- __swab32s(&pud->pud_groups[i]);
- }
-
- return 0;
-}
-EXPORT_SYMBOL(sptlrpc_unpack_user_desc);
-
-/****************************************
- * misc helpers *
- ****************************************/
-
-const char *sec2target_str(struct ptlrpc_sec *sec)
-{
- if (!sec || !sec->ps_import || !sec->ps_import->imp_obd)
- return "*";
- if (sec_is_reverse(sec))
- return "c";
- return obd_uuid2str(&sec->ps_import->imp_obd->u.cli.cl_target_uuid);
-}
-EXPORT_SYMBOL(sec2target_str);
-
-/*
- * return true if the bulk data is protected
- */
-bool sptlrpc_flavor_has_bulk(struct sptlrpc_flavor *flvr)
-{
- switch (SPTLRPC_FLVR_BULK_SVC(flvr->sf_rpc)) {
- case SPTLRPC_BULK_SVC_INTG:
- case SPTLRPC_BULK_SVC_PRIV:
- return true;
- default:
- return false;
- }
-}
-EXPORT_SYMBOL(sptlrpc_flavor_has_bulk);
-
-/****************************************
- * crypto API helper/alloc blkciper *
- ****************************************/
-
-/****************************************
- * initialize/finalize *
- ****************************************/
-
-int sptlrpc_init(void)
-{
- int rc;
-
- rwlock_init(&policy_lock);
-
- rc = sptlrpc_gc_init();
- if (rc)
- goto out;
-
- rc = sptlrpc_conf_init();
- if (rc)
- goto out_gc;
-
- rc = sptlrpc_enc_pool_init();
- if (rc)
- goto out_conf;
-
- rc = sptlrpc_null_init();
- if (rc)
- goto out_pool;
-
- rc = sptlrpc_plain_init();
- if (rc)
- goto out_null;
-
- sptlrpc_lproc_init();
-
- return 0;
-
-out_null:
- sptlrpc_null_fini();
-out_pool:
- sptlrpc_enc_pool_fini();
-out_conf:
- sptlrpc_conf_fini();
-out_gc:
- sptlrpc_gc_fini();
-out:
- return rc;
-}
-
-void sptlrpc_fini(void)
-{
- sptlrpc_lproc_fini();
- sptlrpc_plain_fini();
- sptlrpc_null_fini();
- sptlrpc_enc_pool_fini();
- sptlrpc_conf_fini();
- sptlrpc_gc_fini();
-}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
deleted file mode 100644
index 625b9520d78f..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
+++ /dev/null
@@ -1,572 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/ptlrpc/sec_bulk.c
- *
- * Author: Eric Mei <ericm@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_SEC
-
-#include <linux/libcfs/libcfs.h>
-
-#include <obd.h>
-#include <obd_cksum.h>
-#include <obd_class.h>
-#include <obd_support.h>
-#include <lustre_net.h>
-#include <lustre_import.h>
-#include <lustre_dlm.h>
-#include <lustre_sec.h>
-
-#include "ptlrpc_internal.h"
-
-/****************************************
- * bulk encryption page pools *
- ****************************************/
-
-#define POINTERS_PER_PAGE (PAGE_SIZE / sizeof(void *))
-#define PAGES_PER_POOL (POINTERS_PER_PAGE)
-
-#define IDLE_IDX_MAX (100)
-#define IDLE_IDX_WEIGHT (3)
-
-#define CACHE_QUIESCENT_PERIOD (20)
-
-static struct ptlrpc_enc_page_pool {
- /*
- * constants
- */
- unsigned long epp_max_pages; /* maximum pages can hold, const */
- unsigned int epp_max_pools; /* number of pools, const */
-
- /*
- * wait queue in case of not enough free pages.
- */
- wait_queue_head_t epp_waitq; /* waiting threads */
- unsigned int epp_waitqlen; /* wait queue length */
- unsigned long epp_pages_short; /* # of pages wanted of in-q users */
- unsigned int epp_growing:1; /* during adding pages */
-
- /*
- * indicating how idle the pools are, from 0 to MAX_IDLE_IDX
- * this is counted based on each time when getting pages from
- * the pools, not based on time. which means in case that system
- * is idled for a while but the idle_idx might still be low if no
- * activities happened in the pools.
- */
- unsigned long epp_idle_idx;
-
- /* last shrink time due to mem tight */
- time64_t epp_last_shrink;
- time64_t epp_last_access;
-
- /*
- * in-pool pages bookkeeping
- */
- spinlock_t epp_lock; /* protect following fields */
- unsigned long epp_total_pages; /* total pages in pools */
- unsigned long epp_free_pages; /* current pages available */
-
- /*
- * statistics
- */
- unsigned long epp_st_max_pages; /* # of pages ever reached */
- unsigned int epp_st_grows; /* # of grows */
- unsigned int epp_st_grow_fails; /* # of add pages failures */
- unsigned int epp_st_shrinks; /* # of shrinks */
- unsigned long epp_st_access; /* # of access */
- unsigned long epp_st_missings; /* # of cache missing */
- unsigned long epp_st_lowfree; /* lowest free pages reached */
- unsigned int epp_st_max_wqlen; /* highest waitqueue length */
- unsigned long epp_st_max_wait; /* in jiffies */
- unsigned long epp_st_outofmem; /* # of out of mem requests */
- /*
- * pointers to pools
- */
- struct page ***epp_pools;
-} page_pools;
-
-/*
- * /sys/kernel/debug/lustre/sptlrpc/encrypt_page_pools
- */
-int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v)
-{
- spin_lock(&page_pools.epp_lock);
-
- seq_printf(m,
- "physical pages: %lu\n"
- "pages per pool: %lu\n"
- "max pages: %lu\n"
- "max pools: %u\n"
- "total pages: %lu\n"
- "total free: %lu\n"
- "idle index: %lu/100\n"
- "last shrink: %lds\n"
- "last access: %lds\n"
- "max pages reached: %lu\n"
- "grows: %u\n"
- "grows failure: %u\n"
- "shrinks: %u\n"
- "cache access: %lu\n"
- "cache missing: %lu\n"
- "low free mark: %lu\n"
- "max waitqueue depth: %u\n"
- "max wait time: %ld/%lu\n"
- "out of mem: %lu\n",
- totalram_pages,
- PAGES_PER_POOL,
- page_pools.epp_max_pages,
- page_pools.epp_max_pools,
- page_pools.epp_total_pages,
- page_pools.epp_free_pages,
- page_pools.epp_idle_idx,
- (long)(ktime_get_seconds() - page_pools.epp_last_shrink),
- (long)(ktime_get_seconds() - page_pools.epp_last_access),
- page_pools.epp_st_max_pages,
- page_pools.epp_st_grows,
- page_pools.epp_st_grow_fails,
- page_pools.epp_st_shrinks,
- page_pools.epp_st_access,
- page_pools.epp_st_missings,
- page_pools.epp_st_lowfree,
- page_pools.epp_st_max_wqlen,
- page_pools.epp_st_max_wait,
- msecs_to_jiffies(MSEC_PER_SEC),
- page_pools.epp_st_outofmem);
-
- spin_unlock(&page_pools.epp_lock);
-
- return 0;
-}
-
-static void enc_pools_release_free_pages(long npages)
-{
- int p_idx, g_idx;
- int p_idx_max1, p_idx_max2;
-
- LASSERT(npages > 0);
- LASSERT(npages <= page_pools.epp_free_pages);
- LASSERT(page_pools.epp_free_pages <= page_pools.epp_total_pages);
-
- /* max pool index before the release */
- p_idx_max2 = (page_pools.epp_total_pages - 1) / PAGES_PER_POOL;
-
- page_pools.epp_free_pages -= npages;
- page_pools.epp_total_pages -= npages;
-
- /* max pool index after the release */
- p_idx_max1 = page_pools.epp_total_pages == 0 ? -1 :
- ((page_pools.epp_total_pages - 1) / PAGES_PER_POOL);
-
- p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
- g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
- LASSERT(page_pools.epp_pools[p_idx]);
-
- while (npages--) {
- LASSERT(page_pools.epp_pools[p_idx]);
- LASSERT(page_pools.epp_pools[p_idx][g_idx]);
-
- __free_page(page_pools.epp_pools[p_idx][g_idx]);
- page_pools.epp_pools[p_idx][g_idx] = NULL;
-
- if (++g_idx == PAGES_PER_POOL) {
- p_idx++;
- g_idx = 0;
- }
- }
-
- /* free unused pools */
- while (p_idx_max1 < p_idx_max2) {
- LASSERT(page_pools.epp_pools[p_idx_max2]);
- kfree(page_pools.epp_pools[p_idx_max2]);
- page_pools.epp_pools[p_idx_max2] = NULL;
- p_idx_max2--;
- }
-}
-
-/*
- * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
- */
-static unsigned long enc_pools_shrink_count(struct shrinker *s,
- struct shrink_control *sc)
-{
- /*
- * if no pool access for a long time, we consider it's fully idle.
- * a little race here is fine.
- */
- if (unlikely(ktime_get_seconds() - page_pools.epp_last_access >
- CACHE_QUIESCENT_PERIOD)) {
- spin_lock(&page_pools.epp_lock);
- page_pools.epp_idle_idx = IDLE_IDX_MAX;
- spin_unlock(&page_pools.epp_lock);
- }
-
- LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
- return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) *
- (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX;
-}
-
-/*
- * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
- */
-static unsigned long enc_pools_shrink_scan(struct shrinker *s,
- struct shrink_control *sc)
-{
- spin_lock(&page_pools.epp_lock);
- sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan,
- page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES);
- if (sc->nr_to_scan > 0) {
- enc_pools_release_free_pages(sc->nr_to_scan);
- CDEBUG(D_SEC, "released %ld pages, %ld left\n",
- (long)sc->nr_to_scan, page_pools.epp_free_pages);
-
- page_pools.epp_st_shrinks++;
- page_pools.epp_last_shrink = ktime_get_seconds();
- }
- spin_unlock(&page_pools.epp_lock);
-
- /*
- * if no pool access for a long time, we consider it's fully idle.
- * a little race here is fine.
- */
- if (unlikely(ktime_get_seconds() - page_pools.epp_last_access >
- CACHE_QUIESCENT_PERIOD)) {
- spin_lock(&page_pools.epp_lock);
- page_pools.epp_idle_idx = IDLE_IDX_MAX;
- spin_unlock(&page_pools.epp_lock);
- }
-
- LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
- return sc->nr_to_scan;
-}
-
-static inline
-int npages_to_npools(unsigned long npages)
-{
- return (int)DIV_ROUND_UP(npages, PAGES_PER_POOL);
-}
-
-/*
- * return how many pages cleaned up.
- */
-static unsigned long enc_pools_cleanup(struct page ***pools, int npools)
-{
- unsigned long cleaned = 0;
- int i, j;
-
- for (i = 0; i < npools; i++) {
- if (pools[i]) {
- for (j = 0; j < PAGES_PER_POOL; j++) {
- if (pools[i][j]) {
- __free_page(pools[i][j]);
- cleaned++;
- }
- }
- kfree(pools[i]);
- pools[i] = NULL;
- }
- }
-
- return cleaned;
-}
-
-static inline void enc_pools_wakeup(void)
-{
- assert_spin_locked(&page_pools.epp_lock);
-
- if (unlikely(page_pools.epp_waitqlen)) {
- LASSERT(waitqueue_active(&page_pools.epp_waitq));
- wake_up_all(&page_pools.epp_waitq);
- }
-}
-
-/*
- * Export the number of free pages in the pool
- */
-int get_free_pages_in_pool(void)
-{
- return page_pools.epp_free_pages;
-}
-
-/*
- * Let outside world know if enc_pool full capacity is reached
- */
-int pool_is_at_full_capacity(void)
-{
- return (page_pools.epp_total_pages == page_pools.epp_max_pages);
-}
-
-void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc)
-{
- int p_idx, g_idx;
- int i;
-
- LASSERT(ptlrpc_is_bulk_desc_kiov(desc->bd_type));
-
- if (!GET_ENC_KIOV(desc))
- return;
-
- LASSERT(desc->bd_iov_count > 0);
-
- spin_lock(&page_pools.epp_lock);
-
- p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
- g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
-
- LASSERT(page_pools.epp_free_pages + desc->bd_iov_count <=
- page_pools.epp_total_pages);
- LASSERT(page_pools.epp_pools[p_idx]);
-
- for (i = 0; i < desc->bd_iov_count; i++) {
- LASSERT(BD_GET_ENC_KIOV(desc, i).bv_page);
- LASSERT(g_idx != 0 || page_pools.epp_pools[p_idx]);
- LASSERT(!page_pools.epp_pools[p_idx][g_idx]);
-
- page_pools.epp_pools[p_idx][g_idx] =
- BD_GET_ENC_KIOV(desc, i).bv_page;
-
- if (++g_idx == PAGES_PER_POOL) {
- p_idx++;
- g_idx = 0;
- }
- }
-
- page_pools.epp_free_pages += desc->bd_iov_count;
-
- enc_pools_wakeup();
-
- spin_unlock(&page_pools.epp_lock);
-
- kfree(GET_ENC_KIOV(desc));
- GET_ENC_KIOV(desc) = NULL;
-}
-
-static inline void enc_pools_alloc(void)
-{
- LASSERT(page_pools.epp_max_pools);
- page_pools.epp_pools =
- kvzalloc(page_pools.epp_max_pools *
- sizeof(*page_pools.epp_pools),
- GFP_KERNEL);
-}
-
-static inline void enc_pools_free(void)
-{
- LASSERT(page_pools.epp_max_pools);
- LASSERT(page_pools.epp_pools);
-
- kvfree(page_pools.epp_pools);
-}
-
-static struct shrinker pools_shrinker = {
- .count_objects = enc_pools_shrink_count,
- .scan_objects = enc_pools_shrink_scan,
- .seeks = DEFAULT_SEEKS,
-};
-
-int sptlrpc_enc_pool_init(void)
-{
- int rc;
-
- /*
- * maximum capacity is 1/8 of total physical memory.
- * is the 1/8 a good number?
- */
- page_pools.epp_max_pages = totalram_pages / 8;
- page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages);
-
- init_waitqueue_head(&page_pools.epp_waitq);
- page_pools.epp_waitqlen = 0;
- page_pools.epp_pages_short = 0;
-
- page_pools.epp_growing = 0;
-
- page_pools.epp_idle_idx = 0;
- page_pools.epp_last_shrink = ktime_get_seconds();
- page_pools.epp_last_access = ktime_get_seconds();
-
- spin_lock_init(&page_pools.epp_lock);
- page_pools.epp_total_pages = 0;
- page_pools.epp_free_pages = 0;
-
- page_pools.epp_st_max_pages = 0;
- page_pools.epp_st_grows = 0;
- page_pools.epp_st_grow_fails = 0;
- page_pools.epp_st_shrinks = 0;
- page_pools.epp_st_access = 0;
- page_pools.epp_st_missings = 0;
- page_pools.epp_st_lowfree = 0;
- page_pools.epp_st_max_wqlen = 0;
- page_pools.epp_st_max_wait = 0;
- page_pools.epp_st_outofmem = 0;
-
- enc_pools_alloc();
- if (!page_pools.epp_pools)
- return -ENOMEM;
-
- rc = register_shrinker(&pools_shrinker);
- if (rc)
- enc_pools_free();
-
- return rc;
-}
-
-void sptlrpc_enc_pool_fini(void)
-{
- unsigned long cleaned, npools;
-
- LASSERT(page_pools.epp_pools);
- LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages);
-
- unregister_shrinker(&pools_shrinker);
-
- npools = npages_to_npools(page_pools.epp_total_pages);
- cleaned = enc_pools_cleanup(page_pools.epp_pools, npools);
- LASSERT(cleaned == page_pools.epp_total_pages);
-
- enc_pools_free();
-
- if (page_pools.epp_st_access > 0) {
- CDEBUG(D_SEC,
- "max pages %lu, grows %u, grow fails %u, shrinks %u, access %lu, missing %lu, max qlen %u, max wait %ld/%ld, out of mem %lu\n",
- page_pools.epp_st_max_pages, page_pools.epp_st_grows,
- page_pools.epp_st_grow_fails,
- page_pools.epp_st_shrinks, page_pools.epp_st_access,
- page_pools.epp_st_missings, page_pools.epp_st_max_wqlen,
- page_pools.epp_st_max_wait,
- msecs_to_jiffies(MSEC_PER_SEC),
- page_pools.epp_st_outofmem);
- }
-}
-
-static int cfs_hash_alg_id[] = {
- [BULK_HASH_ALG_NULL] = CFS_HASH_ALG_NULL,
- [BULK_HASH_ALG_ADLER32] = CFS_HASH_ALG_ADLER32,
- [BULK_HASH_ALG_CRC32] = CFS_HASH_ALG_CRC32,
- [BULK_HASH_ALG_MD5] = CFS_HASH_ALG_MD5,
- [BULK_HASH_ALG_SHA1] = CFS_HASH_ALG_SHA1,
- [BULK_HASH_ALG_SHA256] = CFS_HASH_ALG_SHA256,
- [BULK_HASH_ALG_SHA384] = CFS_HASH_ALG_SHA384,
- [BULK_HASH_ALG_SHA512] = CFS_HASH_ALG_SHA512,
-};
-
-const char *sptlrpc_get_hash_name(__u8 hash_alg)
-{
- return cfs_crypto_hash_name(cfs_hash_alg_id[hash_alg]);
-}
-
-__u8 sptlrpc_get_hash_alg(const char *algname)
-{
- return cfs_crypto_hash_alg(algname);
-}
-
-int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset, int swabbed)
-{
- struct ptlrpc_bulk_sec_desc *bsd;
- int size = msg->lm_buflens[offset];
-
- bsd = lustre_msg_buf(msg, offset, sizeof(*bsd));
- if (!bsd) {
- CERROR("Invalid bulk sec desc: size %d\n", size);
- return -EINVAL;
- }
-
- if (swabbed)
- __swab32s(&bsd->bsd_nob);
-
- if (unlikely(bsd->bsd_version != 0)) {
- CERROR("Unexpected version %u\n", bsd->bsd_version);
- return -EPROTO;
- }
-
- if (unlikely(bsd->bsd_type >= SPTLRPC_BULK_MAX)) {
- CERROR("Invalid type %u\n", bsd->bsd_type);
- return -EPROTO;
- }
-
- /* FIXME more sanity check here */
-
- if (unlikely(bsd->bsd_svc != SPTLRPC_BULK_SVC_NULL &&
- bsd->bsd_svc != SPTLRPC_BULK_SVC_INTG &&
- bsd->bsd_svc != SPTLRPC_BULK_SVC_PRIV)) {
- CERROR("Invalid svc %u\n", bsd->bsd_svc);
- return -EPROTO;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(bulk_sec_desc_unpack);
-
-int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
- void *buf, int buflen)
-{
- struct ahash_request *hdesc;
- int hashsize;
- unsigned int bufsize;
- int i, err;
-
- LASSERT(alg > BULK_HASH_ALG_NULL && alg < BULK_HASH_ALG_MAX);
- LASSERT(buflen >= 4);
-
- hdesc = cfs_crypto_hash_init(cfs_hash_alg_id[alg], NULL, 0);
- if (IS_ERR(hdesc)) {
- CERROR("Unable to initialize checksum hash %s\n",
- cfs_crypto_hash_name(cfs_hash_alg_id[alg]));
- return PTR_ERR(hdesc);
- }
-
- hashsize = cfs_crypto_hash_digestsize(cfs_hash_alg_id[alg]);
-
- for (i = 0; i < desc->bd_iov_count; i++) {
- cfs_crypto_hash_update_page(hdesc,
- BD_GET_KIOV(desc, i).bv_page,
- BD_GET_KIOV(desc, i).bv_offset &
- ~PAGE_MASK,
- BD_GET_KIOV(desc, i).bv_len);
- }
-
- if (hashsize > buflen) {
- unsigned char hashbuf[CFS_CRYPTO_HASH_DIGESTSIZE_MAX];
-
- bufsize = sizeof(hashbuf);
- LASSERTF(bufsize >= hashsize, "bufsize = %u < hashsize %u\n",
- bufsize, hashsize);
- err = cfs_crypto_hash_final(hdesc, hashbuf, &bufsize);
- memcpy(buf, hashbuf, buflen);
- } else {
- bufsize = buflen;
- err = cfs_crypto_hash_final(hdesc, buf, &bufsize);
- }
-
- return err;
-}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_config.c b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
deleted file mode 100644
index 2389f9a8f534..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
+++ /dev/null
@@ -1,850 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_SEC
-
-#include <linux/libcfs/libcfs.h>
-#include <linux/crypto.h>
-#include <linux/key.h>
-
-#include <obd.h>
-#include <obd_class.h>
-#include <obd_support.h>
-#include <lustre_import.h>
-#include <uapi/linux/lustre/lustre_param.h>
-#include <lustre_sec.h>
-
-#include "ptlrpc_internal.h"
-
-enum lustre_sec_part sptlrpc_target_sec_part(struct obd_device *obd)
-{
- const char *type = obd->obd_type->typ_name;
-
- if (!strcmp(type, LUSTRE_MDT_NAME))
- return LUSTRE_SP_MDT;
- if (!strcmp(type, LUSTRE_OST_NAME))
- return LUSTRE_SP_OST;
- if (!strcmp(type, LUSTRE_MGS_NAME))
- return LUSTRE_SP_MGS;
-
- CERROR("unknown target %p(%s)\n", obd, type);
- return LUSTRE_SP_ANY;
-}
-
-/****************************************
- * user supplied flavor string parsing *
- ****************************************/
-
-/*
- * format: <base_flavor>[-<bulk_type:alg_spec>]
- */
-int sptlrpc_parse_flavor(const char *str, struct sptlrpc_flavor *flvr)
-{
- char buf[32];
- char *bulk, *alg;
-
- memset(flvr, 0, sizeof(*flvr));
-
- if (!str || str[0] == '\0') {
- flvr->sf_rpc = SPTLRPC_FLVR_INVALID;
- return 0;
- }
-
- strlcpy(buf, str, sizeof(buf));
-
- bulk = strchr(buf, '-');
- if (bulk)
- *bulk++ = '\0';
-
- flvr->sf_rpc = sptlrpc_name2flavor_base(buf);
- if (flvr->sf_rpc == SPTLRPC_FLVR_INVALID)
- goto err_out;
-
- /*
- * currently only base flavor "plain" can have bulk specification.
- */
- if (flvr->sf_rpc == SPTLRPC_FLVR_PLAIN) {
- flvr->u_bulk.hash.hash_alg = BULK_HASH_ALG_ADLER32;
- if (bulk) {
- /*
- * format: plain-hash:<hash_alg>
- */
- alg = strchr(bulk, ':');
- if (!alg)
- goto err_out;
- *alg++ = '\0';
-
- if (strcmp(bulk, "hash"))
- goto err_out;
-
- flvr->u_bulk.hash.hash_alg = sptlrpc_get_hash_alg(alg);
- if (flvr->u_bulk.hash.hash_alg >= BULK_HASH_ALG_MAX)
- goto err_out;
- }
-
- if (flvr->u_bulk.hash.hash_alg == BULK_HASH_ALG_NULL)
- flvr_set_bulk_svc(&flvr->sf_rpc, SPTLRPC_BULK_SVC_NULL);
- else
- flvr_set_bulk_svc(&flvr->sf_rpc, SPTLRPC_BULK_SVC_INTG);
- } else {
- if (bulk)
- goto err_out;
- }
-
- flvr->sf_flags = 0;
- return 0;
-
-err_out:
- CERROR("invalid flavor string: %s\n", str);
- return -EINVAL;
-}
-EXPORT_SYMBOL(sptlrpc_parse_flavor);
-
-/****************************************
- * configure rules *
- ****************************************/
-
-static void get_default_flavor(struct sptlrpc_flavor *sf)
-{
- memset(sf, 0, sizeof(*sf));
-
- sf->sf_rpc = SPTLRPC_FLVR_NULL;
- sf->sf_flags = 0;
-}
-
-static void sptlrpc_rule_init(struct sptlrpc_rule *rule)
-{
- rule->sr_netid = LNET_NIDNET(LNET_NID_ANY);
- rule->sr_from = LUSTRE_SP_ANY;
- rule->sr_to = LUSTRE_SP_ANY;
- rule->sr_padding = 0;
-
- get_default_flavor(&rule->sr_flvr);
-}
-
-/*
- * format: network[.direction]=flavor
- */
-static int sptlrpc_parse_rule(char *param, struct sptlrpc_rule *rule)
-{
- char *flavor, *dir;
- int rc;
-
- sptlrpc_rule_init(rule);
-
- flavor = strchr(param, '=');
- if (!flavor) {
- CERROR("invalid param, no '='\n");
- return -EINVAL;
- }
- *flavor++ = '\0';
-
- dir = strchr(param, '.');
- if (dir)
- *dir++ = '\0';
-
- /* 1.1 network */
- if (strcmp(param, "default")) {
- rule->sr_netid = libcfs_str2net(param);
- if (rule->sr_netid == LNET_NIDNET(LNET_NID_ANY)) {
- CERROR("invalid network name: %s\n", param);
- return -EINVAL;
- }
- }
-
- /* 1.2 direction */
- if (dir) {
- if (!strcmp(dir, "mdt2ost")) {
- rule->sr_from = LUSTRE_SP_MDT;
- rule->sr_to = LUSTRE_SP_OST;
- } else if (!strcmp(dir, "mdt2mdt")) {
- rule->sr_from = LUSTRE_SP_MDT;
- rule->sr_to = LUSTRE_SP_MDT;
- } else if (!strcmp(dir, "cli2ost")) {
- rule->sr_from = LUSTRE_SP_CLI;
- rule->sr_to = LUSTRE_SP_OST;
- } else if (!strcmp(dir, "cli2mdt")) {
- rule->sr_from = LUSTRE_SP_CLI;
- rule->sr_to = LUSTRE_SP_MDT;
- } else {
- CERROR("invalid rule dir segment: %s\n", dir);
- return -EINVAL;
- }
- }
-
- /* 2.1 flavor */
- rc = sptlrpc_parse_flavor(flavor, &rule->sr_flvr);
- if (rc)
- return -EINVAL;
-
- return 0;
-}
-
-static void sptlrpc_rule_set_free(struct sptlrpc_rule_set *rset)
-{
- LASSERT(rset->srs_nslot ||
- (rset->srs_nrule == 0 && !rset->srs_rules));
-
- if (rset->srs_nslot) {
- kfree(rset->srs_rules);
- sptlrpc_rule_set_init(rset);
- }
-}
-
-/*
- * return 0 if the rule set could accommodate one more rule.
- */
-static int sptlrpc_rule_set_expand(struct sptlrpc_rule_set *rset)
-{
- struct sptlrpc_rule *rules;
- int nslot;
-
- might_sleep();
-
- if (rset->srs_nrule < rset->srs_nslot)
- return 0;
-
- nslot = rset->srs_nslot + 8;
-
- /* better use realloc() if available */
- rules = kcalloc(nslot, sizeof(*rset->srs_rules), GFP_NOFS);
- if (!rules)
- return -ENOMEM;
-
- if (rset->srs_nrule) {
- LASSERT(rset->srs_nslot && rset->srs_rules);
- memcpy(rules, rset->srs_rules,
- rset->srs_nrule * sizeof(*rset->srs_rules));
-
- kfree(rset->srs_rules);
- }
-
- rset->srs_rules = rules;
- rset->srs_nslot = nslot;
- return 0;
-}
-
-static inline int rule_spec_dir(struct sptlrpc_rule *rule)
-{
- return (rule->sr_from != LUSTRE_SP_ANY ||
- rule->sr_to != LUSTRE_SP_ANY);
-}
-
-static inline int rule_spec_net(struct sptlrpc_rule *rule)
-{
- return (rule->sr_netid != LNET_NIDNET(LNET_NID_ANY));
-}
-
-static inline int rule_match_dir(struct sptlrpc_rule *r1,
- struct sptlrpc_rule *r2)
-{
- return (r1->sr_from == r2->sr_from && r1->sr_to == r2->sr_to);
-}
-
-static inline int rule_match_net(struct sptlrpc_rule *r1,
- struct sptlrpc_rule *r2)
-{
- return (r1->sr_netid == r2->sr_netid);
-}
-
-/*
- * merge @rule into @rset.
- * the @rset slots might be expanded.
- */
-static int sptlrpc_rule_set_merge(struct sptlrpc_rule_set *rset,
- struct sptlrpc_rule *rule)
-{
- struct sptlrpc_rule *p = rset->srs_rules;
- int spec_dir, spec_net;
- int rc, n, match = 0;
-
- might_sleep();
-
- spec_net = rule_spec_net(rule);
- spec_dir = rule_spec_dir(rule);
-
- for (n = 0; n < rset->srs_nrule; n++) {
- p = &rset->srs_rules[n];
-
- /* test network match, if failed:
- * - spec rule: skip rules which is also spec rule match, until
- * we hit a wild rule, which means no more chance
- * - wild rule: skip until reach the one which is also wild
- * and matches
- */
- if (!rule_match_net(p, rule)) {
- if (spec_net) {
- if (rule_spec_net(p))
- continue;
- else
- break;
- } else {
- continue;
- }
- }
-
- /* test dir match, same logic as net matching */
- if (!rule_match_dir(p, rule)) {
- if (spec_dir) {
- if (rule_spec_dir(p))
- continue;
- else
- break;
- } else {
- continue;
- }
- }
-
- /* find a match */
- match = 1;
- break;
- }
-
- if (match) {
- LASSERT(n >= 0 && n < rset->srs_nrule);
-
- if (rule->sr_flvr.sf_rpc == SPTLRPC_FLVR_INVALID) {
- /* remove this rule */
- if (n < rset->srs_nrule - 1)
- memmove(&rset->srs_rules[n],
- &rset->srs_rules[n + 1],
- (rset->srs_nrule - n - 1) *
- sizeof(*rule));
- rset->srs_nrule--;
- } else {
- /* override the rule */
- memcpy(&rset->srs_rules[n], rule, sizeof(*rule));
- }
- } else {
- LASSERT(n >= 0 && n <= rset->srs_nrule);
-
- if (rule->sr_flvr.sf_rpc != SPTLRPC_FLVR_INVALID) {
- rc = sptlrpc_rule_set_expand(rset);
- if (rc)
- return rc;
-
- if (n < rset->srs_nrule)
- memmove(&rset->srs_rules[n + 1],
- &rset->srs_rules[n],
- (rset->srs_nrule - n) * sizeof(*rule));
- memcpy(&rset->srs_rules[n], rule, sizeof(*rule));
- rset->srs_nrule++;
- } else {
- CDEBUG(D_CONFIG, "ignore the unmatched deletion\n");
- }
- }
-
- return 0;
-}
-
-/**
- * given from/to/nid, determine a matching flavor in ruleset.
- * return 1 if a match found, otherwise return 0.
- */
-static int sptlrpc_rule_set_choose(struct sptlrpc_rule_set *rset,
- enum lustre_sec_part from,
- enum lustre_sec_part to,
- lnet_nid_t nid,
- struct sptlrpc_flavor *sf)
-{
- struct sptlrpc_rule *r;
- int n;
-
- for (n = 0; n < rset->srs_nrule; n++) {
- r = &rset->srs_rules[n];
-
- if (LNET_NIDNET(nid) != LNET_NIDNET(LNET_NID_ANY) &&
- r->sr_netid != LNET_NIDNET(LNET_NID_ANY) &&
- LNET_NIDNET(nid) != r->sr_netid)
- continue;
-
- if (from != LUSTRE_SP_ANY && r->sr_from != LUSTRE_SP_ANY &&
- from != r->sr_from)
- continue;
-
- if (to != LUSTRE_SP_ANY && r->sr_to != LUSTRE_SP_ANY &&
- to != r->sr_to)
- continue;
-
- *sf = r->sr_flvr;
- return 1;
- }
-
- return 0;
-}
-
-/**********************************
- * sptlrpc configuration support *
- **********************************/
-
-struct sptlrpc_conf_tgt {
- struct list_head sct_list;
- char sct_name[MAX_OBD_NAME];
- struct sptlrpc_rule_set sct_rset;
-};
-
-struct sptlrpc_conf {
- struct list_head sc_list;
- char sc_fsname[MTI_NAME_MAXLEN];
- unsigned int sc_modified; /* modified during updating */
- unsigned int sc_updated:1, /* updated copy from MGS */
- sc_local:1; /* local copy from target */
- struct sptlrpc_rule_set sc_rset; /* fs general rules */
- struct list_head sc_tgts; /* target-specific rules */
-};
-
-static struct mutex sptlrpc_conf_lock;
-static LIST_HEAD(sptlrpc_confs);
-
-static inline int is_hex(char c)
-{
- return ((c >= '0' && c <= '9') ||
- (c >= 'a' && c <= 'f'));
-}
-
-static void target2fsname(const char *tgt, char *fsname, int buflen)
-{
- const char *ptr;
- int len;
-
- ptr = strrchr(tgt, '-');
- if (ptr) {
- if ((strncmp(ptr, "-MDT", 4) != 0 &&
- strncmp(ptr, "-OST", 4) != 0) ||
- !is_hex(ptr[4]) || !is_hex(ptr[5]) ||
- !is_hex(ptr[6]) || !is_hex(ptr[7]))
- ptr = NULL;
- }
-
- /* if we didn't find the pattern, treat the whole string as fsname */
- if (!ptr)
- len = strlen(tgt);
- else
- len = ptr - tgt;
-
- len = min(len, buflen - 1);
- memcpy(fsname, tgt, len);
- fsname[len] = '\0';
-}
-
-static void sptlrpc_conf_free_rsets(struct sptlrpc_conf *conf)
-{
- struct sptlrpc_conf_tgt *conf_tgt, *conf_tgt_next;
-
- sptlrpc_rule_set_free(&conf->sc_rset);
-
- list_for_each_entry_safe(conf_tgt, conf_tgt_next,
- &conf->sc_tgts, sct_list) {
- sptlrpc_rule_set_free(&conf_tgt->sct_rset);
- list_del(&conf_tgt->sct_list);
- kfree(conf_tgt);
- }
- LASSERT(list_empty(&conf->sc_tgts));
-
- conf->sc_updated = 0;
- conf->sc_local = 0;
-}
-
-static void sptlrpc_conf_free(struct sptlrpc_conf *conf)
-{
- CDEBUG(D_SEC, "free sptlrpc conf %s\n", conf->sc_fsname);
-
- sptlrpc_conf_free_rsets(conf);
- list_del(&conf->sc_list);
- kfree(conf);
-}
-
-static
-struct sptlrpc_conf_tgt *sptlrpc_conf_get_tgt(struct sptlrpc_conf *conf,
- const char *name,
- int create)
-{
- struct sptlrpc_conf_tgt *conf_tgt;
-
- list_for_each_entry(conf_tgt, &conf->sc_tgts, sct_list) {
- if (strcmp(conf_tgt->sct_name, name) == 0)
- return conf_tgt;
- }
-
- if (!create)
- return NULL;
-
- conf_tgt = kzalloc(sizeof(*conf_tgt), GFP_NOFS);
- if (conf_tgt) {
- strlcpy(conf_tgt->sct_name, name, sizeof(conf_tgt->sct_name));
- sptlrpc_rule_set_init(&conf_tgt->sct_rset);
- list_add(&conf_tgt->sct_list, &conf->sc_tgts);
- }
-
- return conf_tgt;
-}
-
-static
-struct sptlrpc_conf *sptlrpc_conf_get(const char *fsname,
- int create)
-{
- struct sptlrpc_conf *conf;
- size_t len;
-
- list_for_each_entry(conf, &sptlrpc_confs, sc_list) {
- if (strcmp(conf->sc_fsname, fsname) == 0)
- return conf;
- }
-
- if (!create)
- return NULL;
-
- conf = kzalloc(sizeof(*conf), GFP_NOFS);
- if (!conf)
- return NULL;
-
- len = strlcpy(conf->sc_fsname, fsname, sizeof(conf->sc_fsname));
- if (len >= sizeof(conf->sc_fsname)) {
- kfree(conf);
- return NULL;
- }
- sptlrpc_rule_set_init(&conf->sc_rset);
- INIT_LIST_HEAD(&conf->sc_tgts);
- list_add(&conf->sc_list, &sptlrpc_confs);
-
- CDEBUG(D_SEC, "create sptlrpc conf %s\n", conf->sc_fsname);
- return conf;
-}
-
-/**
- * caller must hold conf_lock already.
- */
-static int sptlrpc_conf_merge_rule(struct sptlrpc_conf *conf,
- const char *target,
- struct sptlrpc_rule *rule)
-{
- struct sptlrpc_conf_tgt *conf_tgt;
- struct sptlrpc_rule_set *rule_set;
-
- /* fsname == target means general rules for the whole fs */
- if (strcmp(conf->sc_fsname, target) == 0) {
- rule_set = &conf->sc_rset;
- } else {
- conf_tgt = sptlrpc_conf_get_tgt(conf, target, 1);
- if (conf_tgt) {
- rule_set = &conf_tgt->sct_rset;
- } else {
- CERROR("out of memory, can't merge rule!\n");
- return -ENOMEM;
- }
- }
-
- return sptlrpc_rule_set_merge(rule_set, rule);
-}
-
-/**
- * process one LCFG_SPTLRPC_CONF record. if \a conf is NULL, we
- * find one through the target name in the record inside conf_lock;
- * otherwise means caller already hold conf_lock.
- */
-static int __sptlrpc_process_config(struct lustre_cfg *lcfg,
- struct sptlrpc_conf *conf)
-{
- char *target, *param;
- char fsname[MTI_NAME_MAXLEN];
- struct sptlrpc_rule rule;
- int rc;
-
- target = lustre_cfg_string(lcfg, 1);
- if (!target) {
- CERROR("missing target name\n");
- return -EINVAL;
- }
-
- param = lustre_cfg_string(lcfg, 2);
- if (!param) {
- CERROR("missing parameter\n");
- return -EINVAL;
- }
-
- CDEBUG(D_SEC, "processing rule: %s.%s\n", target, param);
-
- /* parse rule to make sure the format is correct */
- if (strncmp(param, PARAM_SRPC_FLVR, sizeof(PARAM_SRPC_FLVR) - 1) != 0) {
- CERROR("Invalid sptlrpc parameter: %s\n", param);
- return -EINVAL;
- }
- param += sizeof(PARAM_SRPC_FLVR) - 1;
-
- rc = sptlrpc_parse_rule(param, &rule);
- if (rc)
- return -EINVAL;
-
- if (!conf) {
- target2fsname(target, fsname, sizeof(fsname));
-
- mutex_lock(&sptlrpc_conf_lock);
- conf = sptlrpc_conf_get(fsname, 0);
- if (!conf) {
- CERROR("can't find conf\n");
- rc = -ENOMEM;
- } else {
- rc = sptlrpc_conf_merge_rule(conf, target, &rule);
- }
- mutex_unlock(&sptlrpc_conf_lock);
- } else {
- LASSERT(mutex_is_locked(&sptlrpc_conf_lock));
- rc = sptlrpc_conf_merge_rule(conf, target, &rule);
- }
-
- if (rc == 0)
- conf->sc_modified++;
-
- return rc;
-}
-
-int sptlrpc_process_config(struct lustre_cfg *lcfg)
-{
- return __sptlrpc_process_config(lcfg, NULL);
-}
-EXPORT_SYMBOL(sptlrpc_process_config);
-
-static int logname2fsname(const char *logname, char *buf, int buflen)
-{
- char *ptr;
- int len;
-
- ptr = strrchr(logname, '-');
- if (!ptr || strcmp(ptr, "-sptlrpc")) {
- CERROR("%s is not a sptlrpc config log\n", logname);
- return -EINVAL;
- }
-
- len = min((int)(ptr - logname), buflen - 1);
-
- memcpy(buf, logname, len);
- buf[len] = '\0';
- return 0;
-}
-
-void sptlrpc_conf_log_update_begin(const char *logname)
-{
- struct sptlrpc_conf *conf;
- char fsname[16];
-
- if (logname2fsname(logname, fsname, sizeof(fsname)))
- return;
-
- mutex_lock(&sptlrpc_conf_lock);
-
- conf = sptlrpc_conf_get(fsname, 0);
- if (conf) {
- if (conf->sc_local) {
- LASSERT(conf->sc_updated == 0);
- sptlrpc_conf_free_rsets(conf);
- }
- conf->sc_modified = 0;
- }
-
- mutex_unlock(&sptlrpc_conf_lock);
-}
-EXPORT_SYMBOL(sptlrpc_conf_log_update_begin);
-
-/**
- * mark a config log has been updated
- */
-void sptlrpc_conf_log_update_end(const char *logname)
-{
- struct sptlrpc_conf *conf;
- char fsname[16];
-
- if (logname2fsname(logname, fsname, sizeof(fsname)))
- return;
-
- mutex_lock(&sptlrpc_conf_lock);
-
- conf = sptlrpc_conf_get(fsname, 0);
- if (conf) {
- /*
- * if original state is not updated, make sure the
- * modified counter > 0 to enforce updating local copy.
- */
- if (conf->sc_updated == 0)
- conf->sc_modified++;
-
- conf->sc_updated = 1;
- }
-
- mutex_unlock(&sptlrpc_conf_lock);
-}
-EXPORT_SYMBOL(sptlrpc_conf_log_update_end);
-
-void sptlrpc_conf_log_start(const char *logname)
-{
- char fsname[16];
-
- if (logname2fsname(logname, fsname, sizeof(fsname)))
- return;
-
- mutex_lock(&sptlrpc_conf_lock);
- sptlrpc_conf_get(fsname, 1);
- mutex_unlock(&sptlrpc_conf_lock);
-}
-EXPORT_SYMBOL(sptlrpc_conf_log_start);
-
-void sptlrpc_conf_log_stop(const char *logname)
-{
- struct sptlrpc_conf *conf;
- char fsname[16];
-
- if (logname2fsname(logname, fsname, sizeof(fsname)))
- return;
-
- mutex_lock(&sptlrpc_conf_lock);
- conf = sptlrpc_conf_get(fsname, 0);
- if (conf)
- sptlrpc_conf_free(conf);
- mutex_unlock(&sptlrpc_conf_lock);
-}
-EXPORT_SYMBOL(sptlrpc_conf_log_stop);
-
-static inline void flavor_set_flags(struct sptlrpc_flavor *sf,
- enum lustre_sec_part from,
- enum lustre_sec_part to,
- unsigned int fl_udesc)
-{
- /*
- * null flavor doesn't need to set any flavor, and in fact
- * we'd better not do that because everybody share a single sec.
- */
- if (sf->sf_rpc == SPTLRPC_FLVR_NULL)
- return;
-
- if (from == LUSTRE_SP_MDT) {
- /* MDT->MDT; MDT->OST */
- sf->sf_flags |= PTLRPC_SEC_FL_ROOTONLY;
- } else if (from == LUSTRE_SP_CLI && to == LUSTRE_SP_OST) {
- /* CLI->OST */
- sf->sf_flags |= PTLRPC_SEC_FL_ROOTONLY | PTLRPC_SEC_FL_BULK;
- } else if (from == LUSTRE_SP_CLI && to == LUSTRE_SP_MDT) {
- /* CLI->MDT */
- if (fl_udesc && sf->sf_rpc != SPTLRPC_FLVR_NULL)
- sf->sf_flags |= PTLRPC_SEC_FL_UDESC;
- }
-}
-
-void sptlrpc_conf_choose_flavor(enum lustre_sec_part from,
- enum lustre_sec_part to,
- struct obd_uuid *target,
- lnet_nid_t nid,
- struct sptlrpc_flavor *sf)
-{
- struct sptlrpc_conf *conf;
- struct sptlrpc_conf_tgt *conf_tgt;
- char name[MTI_NAME_MAXLEN];
- int len, rc = 0;
-
- target2fsname(target->uuid, name, sizeof(name));
-
- mutex_lock(&sptlrpc_conf_lock);
-
- conf = sptlrpc_conf_get(name, 0);
- if (!conf)
- goto out;
-
- /* convert uuid name (supposed end with _UUID) to target name */
- len = strlen(target->uuid);
- LASSERT(len > 5);
- memcpy(name, target->uuid, len - 5);
- name[len - 5] = '\0';
-
- conf_tgt = sptlrpc_conf_get_tgt(conf, name, 0);
- if (conf_tgt) {
- rc = sptlrpc_rule_set_choose(&conf_tgt->sct_rset,
- from, to, nid, sf);
- if (rc)
- goto out;
- }
-
- rc = sptlrpc_rule_set_choose(&conf->sc_rset, from, to, nid, sf);
-out:
- mutex_unlock(&sptlrpc_conf_lock);
-
- if (rc == 0)
- get_default_flavor(sf);
-
- flavor_set_flags(sf, from, to, 1);
-}
-
-#define SEC_ADAPT_DELAY (10)
-
-/**
- * called by client devices, notify the sptlrpc config has changed and
- * do import_sec_adapt later.
- */
-void sptlrpc_conf_client_adapt(struct obd_device *obd)
-{
- struct obd_import *imp;
-
- LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 ||
- strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) == 0);
- CDEBUG(D_SEC, "obd %s\n", obd->u.cli.cl_target_uuid.uuid);
-
- /* serialize with connect/disconnect import */
- down_read_nested(&obd->u.cli.cl_sem, OBD_CLI_SEM_MDCOSC);
-
- imp = obd->u.cli.cl_import;
- if (imp) {
- spin_lock(&imp->imp_lock);
- if (imp->imp_sec)
- imp->imp_sec_expire = ktime_get_real_seconds() +
- SEC_ADAPT_DELAY;
- spin_unlock(&imp->imp_lock);
- }
-
- up_read(&obd->u.cli.cl_sem);
-}
-EXPORT_SYMBOL(sptlrpc_conf_client_adapt);
-
-int sptlrpc_conf_init(void)
-{
- mutex_init(&sptlrpc_conf_lock);
- return 0;
-}
-
-void sptlrpc_conf_fini(void)
-{
- struct sptlrpc_conf *conf, *conf_next;
-
- mutex_lock(&sptlrpc_conf_lock);
- list_for_each_entry_safe(conf, conf_next, &sptlrpc_confs, sc_list) {
- sptlrpc_conf_free(conf);
- }
- LASSERT(list_empty(&sptlrpc_confs));
- mutex_unlock(&sptlrpc_conf_lock);
-}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
deleted file mode 100644
index 2c8bad7b7877..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
+++ /dev/null
@@ -1,190 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/ptlrpc/sec_gc.c
- *
- * Author: Eric Mei <ericm@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_SEC
-
-#include <linux/libcfs/libcfs.h>
-
-#include <obd_support.h>
-#include <obd_class.h>
-#include <lustre_net.h>
-#include <lustre_sec.h>
-
-#include "ptlrpc_internal.h"
-
-#define SEC_GC_INTERVAL (30 * 60)
-
-static struct mutex sec_gc_mutex;
-static LIST_HEAD(sec_gc_list);
-static spinlock_t sec_gc_list_lock;
-
-static LIST_HEAD(sec_gc_ctx_list);
-static spinlock_t sec_gc_ctx_list_lock;
-
-static atomic_t sec_gc_wait_del = ATOMIC_INIT(0);
-
-void sptlrpc_gc_add_sec(struct ptlrpc_sec *sec)
-{
- LASSERT(sec->ps_policy->sp_cops->gc_ctx);
- LASSERT(sec->ps_gc_interval > 0);
- LASSERT(list_empty(&sec->ps_gc_list));
-
- sec->ps_gc_next = ktime_get_real_seconds() + sec->ps_gc_interval;
-
- spin_lock(&sec_gc_list_lock);
- list_add_tail(&sec->ps_gc_list, &sec_gc_list);
- spin_unlock(&sec_gc_list_lock);
-
- CDEBUG(D_SEC, "added sec %p(%s)\n", sec, sec->ps_policy->sp_name);
-}
-
-void sptlrpc_gc_del_sec(struct ptlrpc_sec *sec)
-{
- if (list_empty(&sec->ps_gc_list))
- return;
-
- might_sleep();
-
- /* signal before list_del to make iteration in gc thread safe */
- atomic_inc(&sec_gc_wait_del);
-
- spin_lock(&sec_gc_list_lock);
- list_del_init(&sec->ps_gc_list);
- spin_unlock(&sec_gc_list_lock);
-
- /* barrier */
- mutex_lock(&sec_gc_mutex);
- mutex_unlock(&sec_gc_mutex);
-
- atomic_dec(&sec_gc_wait_del);
-
- CDEBUG(D_SEC, "del sec %p(%s)\n", sec, sec->ps_policy->sp_name);
-}
-
-static void sec_process_ctx_list(void)
-{
- struct ptlrpc_cli_ctx *ctx;
-
- spin_lock(&sec_gc_ctx_list_lock);
-
- while (!list_empty(&sec_gc_ctx_list)) {
- ctx = list_entry(sec_gc_ctx_list.next,
- struct ptlrpc_cli_ctx, cc_gc_chain);
- list_del_init(&ctx->cc_gc_chain);
- spin_unlock(&sec_gc_ctx_list_lock);
-
- LASSERT(ctx->cc_sec);
- LASSERT(atomic_read(&ctx->cc_refcount) == 1);
- CDEBUG(D_SEC, "gc pick up ctx %p(%u->%s)\n",
- ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec));
- sptlrpc_cli_ctx_put(ctx, 1);
-
- spin_lock(&sec_gc_ctx_list_lock);
- }
-
- spin_unlock(&sec_gc_ctx_list_lock);
-}
-
-static void sec_do_gc(struct ptlrpc_sec *sec)
-{
- LASSERT(sec->ps_policy->sp_cops->gc_ctx);
-
- if (unlikely(sec->ps_gc_next == 0)) {
- CDEBUG(D_SEC, "sec %p(%s) has 0 gc time\n",
- sec, sec->ps_policy->sp_name);
- return;
- }
-
- CDEBUG(D_SEC, "check on sec %p(%s)\n", sec, sec->ps_policy->sp_name);
-
- if (sec->ps_gc_next > ktime_get_real_seconds())
- return;
-
- sec->ps_policy->sp_cops->gc_ctx(sec);
- sec->ps_gc_next = ktime_get_real_seconds() + sec->ps_gc_interval;
-}
-
-static void sec_gc_main(struct work_struct *ws);
-static DECLARE_DELAYED_WORK(sec_gc_work, sec_gc_main);
-
-static void sec_gc_main(struct work_struct *ws)
-{
- struct ptlrpc_sec *sec;
-
- sec_process_ctx_list();
-again:
- /* go through sec list do gc.
- * FIXME here we iterate through the whole list each time which
- * is not optimal. we perhaps want to use balanced binary tree
- * to trace each sec as order of expiry time.
- * another issue here is we wakeup as fixed interval instead of
- * according to each sec's expiry time
- */
- mutex_lock(&sec_gc_mutex);
- list_for_each_entry(sec, &sec_gc_list, ps_gc_list) {
- /* if someone is waiting to be deleted, let it
- * proceed as soon as possible.
- */
- if (atomic_read(&sec_gc_wait_del)) {
- CDEBUG(D_SEC, "deletion pending, start over\n");
- mutex_unlock(&sec_gc_mutex);
- goto again;
- }
-
- sec_do_gc(sec);
- }
- mutex_unlock(&sec_gc_mutex);
-
- /* check ctx list again before sleep */
- sec_process_ctx_list();
- schedule_delayed_work(&sec_gc_work, SEC_GC_INTERVAL * HZ);
-}
-
-int sptlrpc_gc_init(void)
-{
- mutex_init(&sec_gc_mutex);
- spin_lock_init(&sec_gc_list_lock);
- spin_lock_init(&sec_gc_ctx_list_lock);
-
- schedule_delayed_work(&sec_gc_work, 0);
- return 0;
-}
-
-void sptlrpc_gc_fini(void)
-{
- cancel_delayed_work_sync(&sec_gc_work);
-}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c
deleted file mode 100644
index 2bb75ebd5d98..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c
+++ /dev/null
@@ -1,170 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/ptlrpc/sec_lproc.c
- *
- * Author: Eric Mei <ericm@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_SEC
-
-#include <linux/libcfs/libcfs.h>
-#include <linux/crypto.h>
-
-#include <obd.h>
-#include <obd_class.h>
-#include <obd_support.h>
-#include <lustre_net.h>
-#include <lustre_import.h>
-#include <lustre_dlm.h>
-#include <lustre_sec.h>
-
-#include "ptlrpc_internal.h"
-
-static char *sec_flags2str(unsigned long flags, char *buf, int bufsize)
-{
- buf[0] = '\0';
-
- if (flags & PTLRPC_SEC_FL_REVERSE)
- strlcat(buf, "reverse,", bufsize);
- if (flags & PTLRPC_SEC_FL_ROOTONLY)
- strlcat(buf, "rootonly,", bufsize);
- if (flags & PTLRPC_SEC_FL_UDESC)
- strlcat(buf, "udesc,", bufsize);
- if (flags & PTLRPC_SEC_FL_BULK)
- strlcat(buf, "bulk,", bufsize);
- if (buf[0] == '\0')
- strlcat(buf, "-,", bufsize);
-
- return buf;
-}
-
-static int sptlrpc_info_lprocfs_seq_show(struct seq_file *seq, void *v)
-{
- struct obd_device *dev = seq->private;
- struct client_obd *cli = &dev->u.cli;
- struct ptlrpc_sec *sec = NULL;
- char str[32];
-
- LASSERT(strcmp(dev->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 ||
- strcmp(dev->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 ||
- strcmp(dev->obd_type->typ_name, LUSTRE_MGC_NAME) == 0);
-
- if (cli->cl_import)
- sec = sptlrpc_import_sec_ref(cli->cl_import);
- if (!sec)
- goto out;
-
- sec_flags2str(sec->ps_flvr.sf_flags, str, sizeof(str));
-
- seq_printf(seq, "rpc flavor: %s\n",
- sptlrpc_flavor2name_base(sec->ps_flvr.sf_rpc));
- seq_printf(seq, "bulk flavor: %s\n",
- sptlrpc_flavor2name_bulk(&sec->ps_flvr, str, sizeof(str)));
- seq_printf(seq, "flags: %s\n",
- sec_flags2str(sec->ps_flvr.sf_flags, str, sizeof(str)));
- seq_printf(seq, "id: %d\n", sec->ps_id);
- seq_printf(seq, "refcount: %d\n",
- atomic_read(&sec->ps_refcount));
- seq_printf(seq, "nctx: %d\n", atomic_read(&sec->ps_nctx));
- seq_printf(seq, "gc internal %ld\n", sec->ps_gc_interval);
- seq_printf(seq, "gc next %lld\n",
- sec->ps_gc_interval ?
- (s64)(sec->ps_gc_next - ktime_get_real_seconds()) : 0ll);
-
- sptlrpc_sec_put(sec);
-out:
- return 0;
-}
-
-LPROC_SEQ_FOPS_RO(sptlrpc_info_lprocfs);
-
-static int sptlrpc_ctxs_lprocfs_seq_show(struct seq_file *seq, void *v)
-{
- struct obd_device *dev = seq->private;
- struct client_obd *cli = &dev->u.cli;
- struct ptlrpc_sec *sec = NULL;
-
- LASSERT(strcmp(dev->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 ||
- strcmp(dev->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 ||
- strcmp(dev->obd_type->typ_name, LUSTRE_MGC_NAME) == 0);
-
- if (cli->cl_import)
- sec = sptlrpc_import_sec_ref(cli->cl_import);
- if (!sec)
- goto out;
-
- if (sec->ps_policy->sp_cops->display)
- sec->ps_policy->sp_cops->display(sec, seq);
-
- sptlrpc_sec_put(sec);
-out:
- return 0;
-}
-
-LPROC_SEQ_FOPS_RO(sptlrpc_ctxs_lprocfs);
-
-int sptlrpc_lprocfs_cliobd_attach(struct obd_device *dev)
-{
- if (strcmp(dev->obd_type->typ_name, LUSTRE_OSC_NAME) != 0 &&
- strcmp(dev->obd_type->typ_name, LUSTRE_MDC_NAME) != 0 &&
- strcmp(dev->obd_type->typ_name, LUSTRE_MGC_NAME) != 0) {
- CERROR("can't register lproc for obd type %s\n",
- dev->obd_type->typ_name);
- return -EINVAL;
- }
-
- debugfs_create_file("srpc_info", 0444, dev->obd_debugfs_entry, dev,
- &sptlrpc_info_lprocfs_fops);
- debugfs_create_file("srpc_contexts", 0444, dev->obd_debugfs_entry, dev,
- &sptlrpc_ctxs_lprocfs_fops);
-
- return 0;
-}
-EXPORT_SYMBOL(sptlrpc_lprocfs_cliobd_attach);
-
-LPROC_SEQ_FOPS_RO(sptlrpc_proc_enc_pool);
-static struct lprocfs_vars sptlrpc_lprocfs_vars[] = {
- { "encrypt_page_pools", &sptlrpc_proc_enc_pool_fops },
- { NULL }
-};
-
-static struct dentry *sptlrpc_debugfs_dir;
-
-void sptlrpc_lproc_init(void)
-{
- sptlrpc_debugfs_dir = debugfs_create_dir("sptlrpc", debugfs_lustre_root);
- ldebugfs_add_vars(sptlrpc_debugfs_dir, sptlrpc_lprocfs_vars, NULL);
-}
-
-void sptlrpc_lproc_fini(void)
-{
- debugfs_remove_recursive(sptlrpc_debugfs_dir);
-}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_null.c b/drivers/staging/lustre/lustre/ptlrpc/sec_null.c
deleted file mode 100644
index ecc387d1b9b4..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_null.c
+++ /dev/null
@@ -1,459 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/ptlrpc/sec_null.c
- *
- * Author: Eric Mei <ericm@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_SEC
-
-#include <obd_support.h>
-#include <obd_cksum.h>
-#include <obd_class.h>
-#include <lustre_net.h>
-#include <lustre_sec.h>
-
-#include "ptlrpc_internal.h"
-
-static struct ptlrpc_sec_policy null_policy;
-static struct ptlrpc_sec null_sec;
-static struct ptlrpc_cli_ctx null_cli_ctx;
-static struct ptlrpc_svc_ctx null_svc_ctx;
-
-/*
- * we can temporarily use the topmost 8-bits of lm_secflvr to identify
- * the source sec part.
- */
-static inline
-void null_encode_sec_part(struct lustre_msg *msg, enum lustre_sec_part sp)
-{
- msg->lm_secflvr |= (((__u32)sp) & 0xFF) << 24;
-}
-
-static inline
-enum lustre_sec_part null_decode_sec_part(struct lustre_msg *msg)
-{
- return (msg->lm_secflvr >> 24) & 0xFF;
-}
-
-static int null_ctx_refresh(struct ptlrpc_cli_ctx *ctx)
-{
- /* should never reach here */
- LBUG();
- return 0;
-}
-
-static
-int null_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
-{
- req->rq_reqbuf->lm_secflvr = SPTLRPC_FLVR_NULL;
-
- if (!req->rq_import->imp_dlm_fake) {
- struct obd_device *obd = req->rq_import->imp_obd;
-
- null_encode_sec_part(req->rq_reqbuf,
- obd->u.cli.cl_sp_me);
- }
- req->rq_reqdata_len = req->rq_reqlen;
- return 0;
-}
-
-static
-int null_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
-{
- __u32 cksums, cksumc;
-
- LASSERT(req->rq_repdata);
-
- req->rq_repmsg = req->rq_repdata;
- req->rq_replen = req->rq_repdata_len;
-
- if (req->rq_early) {
- cksums = lustre_msg_get_cksum(req->rq_repdata);
- cksumc = lustre_msg_calc_cksum(req->rq_repmsg);
- if (cksumc != cksums) {
- CDEBUG(D_SEC,
- "early reply checksum mismatch: %08x != %08x\n",
- cksumc, cksums);
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
-static
-struct ptlrpc_sec *null_create_sec(struct obd_import *imp,
- struct ptlrpc_svc_ctx *svc_ctx,
- struct sptlrpc_flavor *sf)
-{
- LASSERT(SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_NULL);
-
- /* general layer has take a module reference for us, because we never
- * really destroy the sec, simply release the reference here.
- */
- sptlrpc_policy_put(&null_policy);
- return &null_sec;
-}
-
-static
-void null_destroy_sec(struct ptlrpc_sec *sec)
-{
- LASSERT(sec == &null_sec);
-}
-
-static
-struct ptlrpc_cli_ctx *null_lookup_ctx(struct ptlrpc_sec *sec,
- struct vfs_cred *vcred,
- int create, int remove_dead)
-{
- atomic_inc(&null_cli_ctx.cc_refcount);
- return &null_cli_ctx;
-}
-
-static
-int null_flush_ctx_cache(struct ptlrpc_sec *sec,
- uid_t uid,
- int grace, int force)
-{
- return 0;
-}
-
-static
-int null_alloc_reqbuf(struct ptlrpc_sec *sec,
- struct ptlrpc_request *req,
- int msgsize)
-{
- if (!req->rq_reqbuf) {
- int alloc_size = size_roundup_power2(msgsize);
-
- LASSERT(!req->rq_pool);
- req->rq_reqbuf = kvzalloc(alloc_size, GFP_NOFS);
- if (!req->rq_reqbuf)
- return -ENOMEM;
-
- req->rq_reqbuf_len = alloc_size;
- } else {
- LASSERT(req->rq_pool);
- LASSERT(req->rq_reqbuf_len >= msgsize);
- memset(req->rq_reqbuf, 0, msgsize);
- }
-
- req->rq_reqmsg = req->rq_reqbuf;
- return 0;
-}
-
-static
-void null_free_reqbuf(struct ptlrpc_sec *sec,
- struct ptlrpc_request *req)
-{
- if (!req->rq_pool) {
- LASSERTF(req->rq_reqmsg == req->rq_reqbuf,
- "req %p: reqmsg %p is not reqbuf %p in null sec\n",
- req, req->rq_reqmsg, req->rq_reqbuf);
- LASSERTF(req->rq_reqbuf_len >= req->rq_reqlen,
- "req %p: reqlen %d should smaller than buflen %d\n",
- req, req->rq_reqlen, req->rq_reqbuf_len);
-
- kvfree(req->rq_reqbuf);
- req->rq_reqbuf = NULL;
- req->rq_reqbuf_len = 0;
- }
-}
-
-static
-int null_alloc_repbuf(struct ptlrpc_sec *sec,
- struct ptlrpc_request *req,
- int msgsize)
-{
- /* add space for early replied */
- msgsize += lustre_msg_early_size();
-
- msgsize = size_roundup_power2(msgsize);
-
- req->rq_repbuf = kvzalloc(msgsize, GFP_NOFS);
- if (!req->rq_repbuf)
- return -ENOMEM;
-
- req->rq_repbuf_len = msgsize;
- return 0;
-}
-
-static
-void null_free_repbuf(struct ptlrpc_sec *sec,
- struct ptlrpc_request *req)
-{
- LASSERT(req->rq_repbuf);
-
- kvfree(req->rq_repbuf);
- req->rq_repbuf = NULL;
- req->rq_repbuf_len = 0;
-}
-
-static
-int null_enlarge_reqbuf(struct ptlrpc_sec *sec,
- struct ptlrpc_request *req,
- int segment, int newsize)
-{
- struct lustre_msg *newbuf;
- struct lustre_msg *oldbuf = req->rq_reqmsg;
- int oldsize, newmsg_size, alloc_size;
-
- LASSERT(req->rq_reqbuf);
- LASSERT(req->rq_reqbuf == req->rq_reqmsg);
- LASSERT(req->rq_reqbuf_len >= req->rq_reqlen);
- LASSERT(req->rq_reqlen == lustre_packed_msg_size(oldbuf));
-
- /* compute new message size */
- oldsize = req->rq_reqbuf->lm_buflens[segment];
- req->rq_reqbuf->lm_buflens[segment] = newsize;
- newmsg_size = lustre_packed_msg_size(oldbuf);
- req->rq_reqbuf->lm_buflens[segment] = oldsize;
-
- /* request from pool should always have enough buffer */
- LASSERT(!req->rq_pool || req->rq_reqbuf_len >= newmsg_size);
-
- if (req->rq_reqbuf_len < newmsg_size) {
- alloc_size = size_roundup_power2(newmsg_size);
-
- newbuf = kvzalloc(alloc_size, GFP_NOFS);
- if (!newbuf)
- return -ENOMEM;
-
- /* Must lock this, so that otherwise unprotected change of
- * rq_reqmsg is not racing with parallel processing of
- * imp_replay_list traversing threads. See LU-3333
- * This is a bandaid at best, we really need to deal with this
- * in request enlarging code before unpacking that's already
- * there
- */
- if (req->rq_import)
- spin_lock(&req->rq_import->imp_lock);
- memcpy(newbuf, req->rq_reqbuf, req->rq_reqlen);
-
- kvfree(req->rq_reqbuf);
- req->rq_reqbuf = newbuf;
- req->rq_reqmsg = newbuf;
- req->rq_reqbuf_len = alloc_size;
-
- if (req->rq_import)
- spin_unlock(&req->rq_import->imp_lock);
- }
-
- _sptlrpc_enlarge_msg_inplace(req->rq_reqmsg, segment, newsize);
- req->rq_reqlen = newmsg_size;
-
- return 0;
-}
-
-static struct ptlrpc_svc_ctx null_svc_ctx = {
- .sc_refcount = ATOMIC_INIT(1),
- .sc_policy = &null_policy,
-};
-
-static
-int null_accept(struct ptlrpc_request *req)
-{
- LASSERT(SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) ==
- SPTLRPC_POLICY_NULL);
-
- if (req->rq_flvr.sf_rpc != SPTLRPC_FLVR_NULL) {
- CERROR("Invalid rpc flavor 0x%x\n", req->rq_flvr.sf_rpc);
- return SECSVC_DROP;
- }
-
- req->rq_sp_from = null_decode_sec_part(req->rq_reqbuf);
-
- req->rq_reqmsg = req->rq_reqbuf;
- req->rq_reqlen = req->rq_reqdata_len;
-
- req->rq_svc_ctx = &null_svc_ctx;
- atomic_inc(&req->rq_svc_ctx->sc_refcount);
-
- return SECSVC_OK;
-}
-
-static
-int null_alloc_rs(struct ptlrpc_request *req, int msgsize)
-{
- struct ptlrpc_reply_state *rs;
- int rs_size = sizeof(*rs) + msgsize;
-
- LASSERT(msgsize % 8 == 0);
-
- rs = req->rq_reply_state;
-
- if (rs) {
- /* pre-allocated */
- LASSERT(rs->rs_size >= rs_size);
- } else {
- rs = kvzalloc(rs_size, GFP_NOFS);
- if (!rs)
- return -ENOMEM;
-
- rs->rs_size = rs_size;
- }
-
- rs->rs_svc_ctx = req->rq_svc_ctx;
- atomic_inc(&req->rq_svc_ctx->sc_refcount);
-
- rs->rs_repbuf = (struct lustre_msg *)(rs + 1);
- rs->rs_repbuf_len = rs_size - sizeof(*rs);
- rs->rs_msg = rs->rs_repbuf;
-
- req->rq_reply_state = rs;
- return 0;
-}
-
-static
-void null_free_rs(struct ptlrpc_reply_state *rs)
-{
- LASSERT_ATOMIC_GT(&rs->rs_svc_ctx->sc_refcount, 1);
- atomic_dec(&rs->rs_svc_ctx->sc_refcount);
-
- if (!rs->rs_prealloc)
- kvfree(rs);
-}
-
-static
-int null_authorize(struct ptlrpc_request *req)
-{
- struct ptlrpc_reply_state *rs = req->rq_reply_state;
-
- LASSERT(rs);
-
- rs->rs_repbuf->lm_secflvr = SPTLRPC_FLVR_NULL;
- rs->rs_repdata_len = req->rq_replen;
-
- if (likely(req->rq_packed_final)) {
- if (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)
- req->rq_reply_off = lustre_msg_early_size();
- else
- req->rq_reply_off = 0;
- } else {
- __u32 cksum;
-
- cksum = lustre_msg_calc_cksum(rs->rs_repbuf);
- lustre_msg_set_cksum(rs->rs_repbuf, cksum);
- req->rq_reply_off = 0;
- }
-
- return 0;
-}
-
-static struct ptlrpc_ctx_ops null_ctx_ops = {
- .refresh = null_ctx_refresh,
- .sign = null_ctx_sign,
- .verify = null_ctx_verify,
-};
-
-static struct ptlrpc_sec_cops null_sec_cops = {
- .create_sec = null_create_sec,
- .destroy_sec = null_destroy_sec,
- .lookup_ctx = null_lookup_ctx,
- .flush_ctx_cache = null_flush_ctx_cache,
- .alloc_reqbuf = null_alloc_reqbuf,
- .alloc_repbuf = null_alloc_repbuf,
- .free_reqbuf = null_free_reqbuf,
- .free_repbuf = null_free_repbuf,
- .enlarge_reqbuf = null_enlarge_reqbuf,
-};
-
-static struct ptlrpc_sec_sops null_sec_sops = {
- .accept = null_accept,
- .alloc_rs = null_alloc_rs,
- .authorize = null_authorize,
- .free_rs = null_free_rs,
-};
-
-static struct ptlrpc_sec_policy null_policy = {
- .sp_owner = THIS_MODULE,
- .sp_name = "sec.null",
- .sp_policy = SPTLRPC_POLICY_NULL,
- .sp_cops = &null_sec_cops,
- .sp_sops = &null_sec_sops,
-};
-
-static void null_init_internal(void)
-{
- static HLIST_HEAD(__list);
-
- null_sec.ps_policy = &null_policy;
- atomic_set(&null_sec.ps_refcount, 1); /* always busy */
- null_sec.ps_id = -1;
- null_sec.ps_import = NULL;
- null_sec.ps_flvr.sf_rpc = SPTLRPC_FLVR_NULL;
- null_sec.ps_flvr.sf_flags = 0;
- null_sec.ps_part = LUSTRE_SP_ANY;
- null_sec.ps_dying = 0;
- spin_lock_init(&null_sec.ps_lock);
- atomic_set(&null_sec.ps_nctx, 1); /* for "null_cli_ctx" */
- INIT_LIST_HEAD(&null_sec.ps_gc_list);
- null_sec.ps_gc_interval = 0;
- null_sec.ps_gc_next = 0;
-
- hlist_add_head(&null_cli_ctx.cc_cache, &__list);
- atomic_set(&null_cli_ctx.cc_refcount, 1); /* for hash */
- null_cli_ctx.cc_sec = &null_sec;
- null_cli_ctx.cc_ops = &null_ctx_ops;
- null_cli_ctx.cc_expire = 0;
- null_cli_ctx.cc_flags = PTLRPC_CTX_CACHED | PTLRPC_CTX_ETERNAL |
- PTLRPC_CTX_UPTODATE;
- null_cli_ctx.cc_vcred.vc_uid = 0;
- spin_lock_init(&null_cli_ctx.cc_lock);
- INIT_LIST_HEAD(&null_cli_ctx.cc_req_list);
- INIT_LIST_HEAD(&null_cli_ctx.cc_gc_chain);
-}
-
-int sptlrpc_null_init(void)
-{
- int rc;
-
- null_init_internal();
-
- rc = sptlrpc_register_policy(&null_policy);
- if (rc)
- CERROR("failed to register %s: %d\n", null_policy.sp_name, rc);
-
- return rc;
-}
-
-void sptlrpc_null_fini(void)
-{
- int rc;
-
- rc = sptlrpc_unregister_policy(&null_policy);
- if (rc)
- CERROR("failed to unregister %s: %d\n",
- null_policy.sp_name, rc);
-}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
deleted file mode 100644
index ec3d9af76b17..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
+++ /dev/null
@@ -1,1023 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/ptlrpc/sec_plain.c
- *
- * Author: Eric Mei <ericm@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_SEC
-
-#include <obd_support.h>
-#include <obd_cksum.h>
-#include <obd_class.h>
-#include <lustre_net.h>
-#include <lustre_sec.h>
-#include "ptlrpc_internal.h"
-
-struct plain_sec {
- struct ptlrpc_sec pls_base;
- rwlock_t pls_lock;
- struct ptlrpc_cli_ctx *pls_ctx;
-};
-
-static inline struct plain_sec *sec2plsec(struct ptlrpc_sec *sec)
-{
- return container_of(sec, struct plain_sec, pls_base);
-}
-
-static struct ptlrpc_sec_policy plain_policy;
-static struct ptlrpc_ctx_ops plain_ctx_ops;
-static struct ptlrpc_svc_ctx plain_svc_ctx;
-
-static unsigned int plain_at_offset;
-
-/*
- * for simplicity, plain policy rpc use fixed layout.
- */
-#define PLAIN_PACK_SEGMENTS (4)
-
-#define PLAIN_PACK_HDR_OFF (0)
-#define PLAIN_PACK_MSG_OFF (1)
-#define PLAIN_PACK_USER_OFF (2)
-#define PLAIN_PACK_BULK_OFF (3)
-
-#define PLAIN_FL_USER (0x01)
-#define PLAIN_FL_BULK (0x02)
-
-struct plain_header {
- __u8 ph_ver; /* 0 */
- __u8 ph_flags;
- __u8 ph_sp; /* source */
- __u8 ph_bulk_hash_alg; /* complete flavor desc */
- __u8 ph_pad[4];
-};
-
-struct plain_bulk_token {
- __u8 pbt_hash[8];
-};
-
-#define PLAIN_BSD_SIZE \
- (sizeof(struct ptlrpc_bulk_sec_desc) + sizeof(struct plain_bulk_token))
-
-/****************************************
- * bulk checksum helpers *
- ****************************************/
-
-static int plain_unpack_bsd(struct lustre_msg *msg, int swabbed)
-{
- struct ptlrpc_bulk_sec_desc *bsd;
-
- if (bulk_sec_desc_unpack(msg, PLAIN_PACK_BULK_OFF, swabbed))
- return -EPROTO;
-
- bsd = lustre_msg_buf(msg, PLAIN_PACK_BULK_OFF, PLAIN_BSD_SIZE);
- if (!bsd) {
- CERROR("bulk sec desc has short size %d\n",
- lustre_msg_buflen(msg, PLAIN_PACK_BULK_OFF));
- return -EPROTO;
- }
-
- if (bsd->bsd_svc != SPTLRPC_BULK_SVC_NULL &&
- bsd->bsd_svc != SPTLRPC_BULK_SVC_INTG) {
- CERROR("invalid bulk svc %u\n", bsd->bsd_svc);
- return -EPROTO;
- }
-
- return 0;
-}
-
-static int plain_generate_bulk_csum(struct ptlrpc_bulk_desc *desc,
- __u8 hash_alg,
- struct plain_bulk_token *token)
-{
- if (hash_alg == BULK_HASH_ALG_NULL)
- return 0;
-
- memset(token->pbt_hash, 0, sizeof(token->pbt_hash));
- return sptlrpc_get_bulk_checksum(desc, hash_alg, token->pbt_hash,
- sizeof(token->pbt_hash));
-}
-
-static int plain_verify_bulk_csum(struct ptlrpc_bulk_desc *desc,
- __u8 hash_alg,
- struct plain_bulk_token *tokenr)
-{
- struct plain_bulk_token tokenv;
- int rc;
-
- if (hash_alg == BULK_HASH_ALG_NULL)
- return 0;
-
- memset(&tokenv.pbt_hash, 0, sizeof(tokenv.pbt_hash));
- rc = sptlrpc_get_bulk_checksum(desc, hash_alg, tokenv.pbt_hash,
- sizeof(tokenv.pbt_hash));
- if (rc)
- return rc;
-
- if (memcmp(tokenr->pbt_hash, tokenv.pbt_hash, sizeof(tokenr->pbt_hash)))
- return -EACCES;
- return 0;
-}
-
-static void corrupt_bulk_data(struct ptlrpc_bulk_desc *desc)
-{
- char *ptr;
- unsigned int off, i;
-
- LASSERT(ptlrpc_is_bulk_desc_kiov(desc->bd_type));
-
- for (i = 0; i < desc->bd_iov_count; i++) {
- if (!BD_GET_KIOV(desc, i).bv_len)
- continue;
-
- ptr = kmap(BD_GET_KIOV(desc, i).bv_page);
- off = BD_GET_KIOV(desc, i).bv_offset & ~PAGE_MASK;
- ptr[off] ^= 0x1;
- kunmap(BD_GET_KIOV(desc, i).bv_page);
- return;
- }
-}
-
-/****************************************
- * cli_ctx apis *
- ****************************************/
-
-static
-int plain_ctx_refresh(struct ptlrpc_cli_ctx *ctx)
-{
- /* should never reach here */
- LBUG();
- return 0;
-}
-
-static
-int plain_ctx_validate(struct ptlrpc_cli_ctx *ctx)
-{
- return 0;
-}
-
-static
-int plain_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
-{
- struct lustre_msg *msg = req->rq_reqbuf;
- struct plain_header *phdr;
-
- msg->lm_secflvr = req->rq_flvr.sf_rpc;
-
- phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, 0);
- phdr->ph_ver = 0;
- phdr->ph_flags = 0;
- phdr->ph_sp = ctx->cc_sec->ps_part;
- phdr->ph_bulk_hash_alg = req->rq_flvr.u_bulk.hash.hash_alg;
-
- if (req->rq_pack_udesc)
- phdr->ph_flags |= PLAIN_FL_USER;
- if (req->rq_pack_bulk)
- phdr->ph_flags |= PLAIN_FL_BULK;
-
- req->rq_reqdata_len = lustre_msg_size_v2(msg->lm_bufcount,
- msg->lm_buflens);
- return 0;
-}
-
-static
-int plain_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
-{
- struct lustre_msg *msg = req->rq_repdata;
- struct plain_header *phdr;
- __u32 cksum;
- int swabbed;
-
- if (msg->lm_bufcount != PLAIN_PACK_SEGMENTS) {
- CERROR("unexpected reply buf count %u\n", msg->lm_bufcount);
- return -EPROTO;
- }
-
- swabbed = ptlrpc_rep_need_swab(req);
-
- phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, sizeof(*phdr));
- if (!phdr) {
- CERROR("missing plain header\n");
- return -EPROTO;
- }
-
- if (phdr->ph_ver != 0) {
- CERROR("Invalid header version\n");
- return -EPROTO;
- }
-
- /* expect no user desc in reply */
- if (phdr->ph_flags & PLAIN_FL_USER) {
- CERROR("Unexpected udesc flag in reply\n");
- return -EPROTO;
- }
-
- if (phdr->ph_bulk_hash_alg != req->rq_flvr.u_bulk.hash.hash_alg) {
- CERROR("reply bulk flavor %u != %u\n", phdr->ph_bulk_hash_alg,
- req->rq_flvr.u_bulk.hash.hash_alg);
- return -EPROTO;
- }
-
- if (unlikely(req->rq_early)) {
- unsigned int hsize = 4;
-
- cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32,
- lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF,
- 0),
- lustre_msg_buflen(msg,
- PLAIN_PACK_MSG_OFF),
- NULL, 0, (unsigned char *)&cksum,
- &hsize);
- if (cksum != msg->lm_cksum) {
- CDEBUG(D_SEC,
- "early reply checksum mismatch: %08x != %08x\n",
- cpu_to_le32(cksum), msg->lm_cksum);
- return -EINVAL;
- }
- } else {
- /* whether we sent with bulk or not, we expect the same
- * in reply, except for early reply
- */
- if (!req->rq_early &&
- !equi(req->rq_pack_bulk == 1,
- phdr->ph_flags & PLAIN_FL_BULK)) {
- CERROR("%s bulk checksum in reply\n",
- req->rq_pack_bulk ? "Missing" : "Unexpected");
- return -EPROTO;
- }
-
- if (phdr->ph_flags & PLAIN_FL_BULK) {
- if (plain_unpack_bsd(msg, swabbed))
- return -EPROTO;
- }
- }
-
- req->rq_repmsg = lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0);
- req->rq_replen = lustre_msg_buflen(msg, PLAIN_PACK_MSG_OFF);
- return 0;
-}
-
-static
-int plain_cli_wrap_bulk(struct ptlrpc_cli_ctx *ctx,
- struct ptlrpc_request *req,
- struct ptlrpc_bulk_desc *desc)
-{
- struct ptlrpc_bulk_sec_desc *bsd;
- struct plain_bulk_token *token;
- int rc;
-
- LASSERT(req->rq_pack_bulk);
- LASSERT(req->rq_reqbuf->lm_bufcount == PLAIN_PACK_SEGMENTS);
-
- bsd = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
- token = (struct plain_bulk_token *)bsd->bsd_data;
-
- bsd->bsd_version = 0;
- bsd->bsd_flags = 0;
- bsd->bsd_type = SPTLRPC_BULK_DEFAULT;
- bsd->bsd_svc = SPTLRPC_FLVR_BULK_SVC(req->rq_flvr.sf_rpc);
-
- if (bsd->bsd_svc == SPTLRPC_BULK_SVC_NULL)
- return 0;
-
- if (req->rq_bulk_read)
- return 0;
-
- rc = plain_generate_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
- token);
- if (rc) {
- CERROR("bulk write: failed to compute checksum: %d\n", rc);
- } else {
- /*
- * for sending we only compute the wrong checksum instead
- * of corrupting the data so it is still correct on a redo
- */
- if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_SEND) &&
- req->rq_flvr.u_bulk.hash.hash_alg != BULK_HASH_ALG_NULL)
- token->pbt_hash[0] ^= 0x1;
- }
-
- return rc;
-}
-
-static
-int plain_cli_unwrap_bulk(struct ptlrpc_cli_ctx *ctx,
- struct ptlrpc_request *req,
- struct ptlrpc_bulk_desc *desc)
-{
- struct ptlrpc_bulk_sec_desc *bsdv;
- struct plain_bulk_token *tokenv;
- int rc;
- int i, nob;
-
- LASSERT(req->rq_pack_bulk);
- LASSERT(req->rq_reqbuf->lm_bufcount == PLAIN_PACK_SEGMENTS);
- LASSERT(req->rq_repdata->lm_bufcount == PLAIN_PACK_SEGMENTS);
-
- bsdv = lustre_msg_buf(req->rq_repdata, PLAIN_PACK_BULK_OFF, 0);
- tokenv = (struct plain_bulk_token *)bsdv->bsd_data;
-
- if (req->rq_bulk_write) {
- if (bsdv->bsd_flags & BSD_FL_ERR)
- return -EIO;
- return 0;
- }
-
- /* fix the actual data size */
- for (i = 0, nob = 0; i < desc->bd_iov_count; i++) {
- struct bio_vec bv_desc = BD_GET_KIOV(desc, i);
-
- if (bv_desc.bv_len + nob > desc->bd_nob_transferred)
- bv_desc.bv_len = desc->bd_nob_transferred - nob;
- nob += bv_desc.bv_len;
- }
-
- rc = plain_verify_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
- tokenv);
- if (rc)
- CERROR("bulk read: client verify failed: %d\n", rc);
-
- return rc;
-}
-
-/****************************************
- * sec apis *
- ****************************************/
-
-static
-struct ptlrpc_cli_ctx *plain_sec_install_ctx(struct plain_sec *plsec)
-{
- struct ptlrpc_cli_ctx *ctx, *ctx_new;
-
- ctx_new = kzalloc(sizeof(*ctx_new), GFP_NOFS);
-
- write_lock(&plsec->pls_lock);
-
- ctx = plsec->pls_ctx;
- if (ctx) {
- atomic_inc(&ctx->cc_refcount);
-
- kfree(ctx_new);
- } else if (ctx_new) {
- ctx = ctx_new;
-
- atomic_set(&ctx->cc_refcount, 1); /* for cache */
- ctx->cc_sec = &plsec->pls_base;
- ctx->cc_ops = &plain_ctx_ops;
- ctx->cc_expire = 0;
- ctx->cc_flags = PTLRPC_CTX_CACHED | PTLRPC_CTX_UPTODATE;
- ctx->cc_vcred.vc_uid = 0;
- spin_lock_init(&ctx->cc_lock);
- INIT_LIST_HEAD(&ctx->cc_req_list);
- INIT_LIST_HEAD(&ctx->cc_gc_chain);
-
- plsec->pls_ctx = ctx;
- atomic_inc(&plsec->pls_base.ps_nctx);
- atomic_inc(&plsec->pls_base.ps_refcount);
-
- atomic_inc(&ctx->cc_refcount); /* for caller */
- }
-
- write_unlock(&plsec->pls_lock);
-
- return ctx;
-}
-
-static
-void plain_destroy_sec(struct ptlrpc_sec *sec)
-{
- struct plain_sec *plsec = sec2plsec(sec);
-
- LASSERT(sec->ps_policy == &plain_policy);
- LASSERT(sec->ps_import);
- LASSERT(atomic_read(&sec->ps_refcount) == 0);
- LASSERT(atomic_read(&sec->ps_nctx) == 0);
- LASSERT(!plsec->pls_ctx);
-
- class_import_put(sec->ps_import);
-
- kfree(plsec);
-}
-
-static
-void plain_kill_sec(struct ptlrpc_sec *sec)
-{
- sec->ps_dying = 1;
-}
-
-static
-struct ptlrpc_sec *plain_create_sec(struct obd_import *imp,
- struct ptlrpc_svc_ctx *svc_ctx,
- struct sptlrpc_flavor *sf)
-{
- struct plain_sec *plsec;
- struct ptlrpc_sec *sec;
- struct ptlrpc_cli_ctx *ctx;
-
- LASSERT(SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN);
-
- plsec = kzalloc(sizeof(*plsec), GFP_NOFS);
- if (!plsec)
- return NULL;
-
- /*
- * initialize plain_sec
- */
- rwlock_init(&plsec->pls_lock);
- plsec->pls_ctx = NULL;
-
- sec = &plsec->pls_base;
- sec->ps_policy = &plain_policy;
- atomic_set(&sec->ps_refcount, 0);
- atomic_set(&sec->ps_nctx, 0);
- sec->ps_id = sptlrpc_get_next_secid();
- sec->ps_import = class_import_get(imp);
- sec->ps_flvr = *sf;
- spin_lock_init(&sec->ps_lock);
- INIT_LIST_HEAD(&sec->ps_gc_list);
- sec->ps_gc_interval = 0;
- sec->ps_gc_next = 0;
-
- /* install ctx immediately if this is a reverse sec */
- if (svc_ctx) {
- ctx = plain_sec_install_ctx(plsec);
- if (!ctx) {
- plain_destroy_sec(sec);
- return NULL;
- }
- sptlrpc_cli_ctx_put(ctx, 1);
- }
-
- return sec;
-}
-
-static
-struct ptlrpc_cli_ctx *plain_lookup_ctx(struct ptlrpc_sec *sec,
- struct vfs_cred *vcred,
- int create, int remove_dead)
-{
- struct plain_sec *plsec = sec2plsec(sec);
- struct ptlrpc_cli_ctx *ctx;
-
- read_lock(&plsec->pls_lock);
- ctx = plsec->pls_ctx;
- if (ctx)
- atomic_inc(&ctx->cc_refcount);
- read_unlock(&plsec->pls_lock);
-
- if (unlikely(!ctx))
- ctx = plain_sec_install_ctx(plsec);
-
- return ctx;
-}
-
-static
-void plain_release_ctx(struct ptlrpc_sec *sec,
- struct ptlrpc_cli_ctx *ctx, int sync)
-{
- LASSERT(atomic_read(&sec->ps_refcount) > 0);
- LASSERT(atomic_read(&sec->ps_nctx) > 0);
- LASSERT(atomic_read(&ctx->cc_refcount) == 0);
- LASSERT(ctx->cc_sec == sec);
-
- kfree(ctx);
-
- atomic_dec(&sec->ps_nctx);
- sptlrpc_sec_put(sec);
-}
-
-static
-int plain_flush_ctx_cache(struct ptlrpc_sec *sec,
- uid_t uid, int grace, int force)
-{
- struct plain_sec *plsec = sec2plsec(sec);
- struct ptlrpc_cli_ctx *ctx;
-
- /* do nothing unless caller want to flush for 'all' */
- if (uid != -1)
- return 0;
-
- write_lock(&plsec->pls_lock);
- ctx = plsec->pls_ctx;
- plsec->pls_ctx = NULL;
- write_unlock(&plsec->pls_lock);
-
- if (ctx)
- sptlrpc_cli_ctx_put(ctx, 1);
- return 0;
-}
-
-static
-int plain_alloc_reqbuf(struct ptlrpc_sec *sec,
- struct ptlrpc_request *req,
- int msgsize)
-{
- __u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, };
- int alloc_len;
-
- buflens[PLAIN_PACK_HDR_OFF] = sizeof(struct plain_header);
- buflens[PLAIN_PACK_MSG_OFF] = msgsize;
-
- if (req->rq_pack_udesc)
- buflens[PLAIN_PACK_USER_OFF] = sptlrpc_current_user_desc_size();
-
- if (req->rq_pack_bulk) {
- LASSERT(req->rq_bulk_read || req->rq_bulk_write);
- buflens[PLAIN_PACK_BULK_OFF] = PLAIN_BSD_SIZE;
- }
-
- alloc_len = lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens);
-
- if (!req->rq_reqbuf) {
- LASSERT(!req->rq_pool);
-
- alloc_len = size_roundup_power2(alloc_len);
- req->rq_reqbuf = kvzalloc(alloc_len, GFP_NOFS);
- if (!req->rq_reqbuf)
- return -ENOMEM;
-
- req->rq_reqbuf_len = alloc_len;
- } else {
- LASSERT(req->rq_pool);
- LASSERT(req->rq_reqbuf_len >= alloc_len);
- memset(req->rq_reqbuf, 0, alloc_len);
- }
-
- lustre_init_msg_v2(req->rq_reqbuf, PLAIN_PACK_SEGMENTS, buflens, NULL);
- req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_MSG_OFF, 0);
-
- if (req->rq_pack_udesc) {
- int rc = sptlrpc_pack_user_desc(req->rq_reqbuf,
- PLAIN_PACK_USER_OFF);
- if (rc < 0)
- return rc;
- }
-
- return 0;
-}
-
-static
-void plain_free_reqbuf(struct ptlrpc_sec *sec,
- struct ptlrpc_request *req)
-{
- if (!req->rq_pool) {
- kvfree(req->rq_reqbuf);
- req->rq_reqbuf = NULL;
- req->rq_reqbuf_len = 0;
- }
-}
-
-static
-int plain_alloc_repbuf(struct ptlrpc_sec *sec,
- struct ptlrpc_request *req,
- int msgsize)
-{
- __u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, };
- int alloc_len;
-
- buflens[PLAIN_PACK_HDR_OFF] = sizeof(struct plain_header);
- buflens[PLAIN_PACK_MSG_OFF] = msgsize;
-
- if (req->rq_pack_bulk) {
- LASSERT(req->rq_bulk_read || req->rq_bulk_write);
- buflens[PLAIN_PACK_BULK_OFF] = PLAIN_BSD_SIZE;
- }
-
- alloc_len = lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens);
-
- /* add space for early reply */
- alloc_len += plain_at_offset;
-
- alloc_len = size_roundup_power2(alloc_len);
-
- req->rq_repbuf = kvzalloc(alloc_len, GFP_NOFS);
- if (!req->rq_repbuf)
- return -ENOMEM;
-
- req->rq_repbuf_len = alloc_len;
- return 0;
-}
-
-static
-void plain_free_repbuf(struct ptlrpc_sec *sec,
- struct ptlrpc_request *req)
-{
- kvfree(req->rq_repbuf);
- req->rq_repbuf = NULL;
- req->rq_repbuf_len = 0;
-}
-
-static
-int plain_enlarge_reqbuf(struct ptlrpc_sec *sec,
- struct ptlrpc_request *req,
- int segment, int newsize)
-{
- struct lustre_msg *newbuf;
- int oldsize;
- int newmsg_size, newbuf_size;
-
- LASSERT(req->rq_reqbuf);
- LASSERT(req->rq_reqbuf_len >= req->rq_reqlen);
- LASSERT(lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_MSG_OFF, 0) ==
- req->rq_reqmsg);
-
- /* compute new embedded msg size. */
- oldsize = req->rq_reqmsg->lm_buflens[segment];
- req->rq_reqmsg->lm_buflens[segment] = newsize;
- newmsg_size = lustre_msg_size_v2(req->rq_reqmsg->lm_bufcount,
- req->rq_reqmsg->lm_buflens);
- req->rq_reqmsg->lm_buflens[segment] = oldsize;
-
- /* compute new wrapper msg size. */
- oldsize = req->rq_reqbuf->lm_buflens[PLAIN_PACK_MSG_OFF];
- req->rq_reqbuf->lm_buflens[PLAIN_PACK_MSG_OFF] = newmsg_size;
- newbuf_size = lustre_msg_size_v2(req->rq_reqbuf->lm_bufcount,
- req->rq_reqbuf->lm_buflens);
- req->rq_reqbuf->lm_buflens[PLAIN_PACK_MSG_OFF] = oldsize;
-
- /* request from pool should always have enough buffer */
- LASSERT(!req->rq_pool || req->rq_reqbuf_len >= newbuf_size);
-
- if (req->rq_reqbuf_len < newbuf_size) {
- newbuf_size = size_roundup_power2(newbuf_size);
-
- newbuf = kvzalloc(newbuf_size, GFP_NOFS);
- if (!newbuf)
- return -ENOMEM;
-
- /* Must lock this, so that otherwise unprotected change of
- * rq_reqmsg is not racing with parallel processing of
- * imp_replay_list traversing threads. See LU-3333
- * This is a bandaid at best, we really need to deal with this
- * in request enlarging code before unpacking that's already
- * there
- */
- if (req->rq_import)
- spin_lock(&req->rq_import->imp_lock);
-
- memcpy(newbuf, req->rq_reqbuf, req->rq_reqbuf_len);
-
- kvfree(req->rq_reqbuf);
- req->rq_reqbuf = newbuf;
- req->rq_reqbuf_len = newbuf_size;
- req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf,
- PLAIN_PACK_MSG_OFF, 0);
-
- if (req->rq_import)
- spin_unlock(&req->rq_import->imp_lock);
- }
-
- _sptlrpc_enlarge_msg_inplace(req->rq_reqbuf, PLAIN_PACK_MSG_OFF,
- newmsg_size);
- _sptlrpc_enlarge_msg_inplace(req->rq_reqmsg, segment, newsize);
-
- req->rq_reqlen = newmsg_size;
- return 0;
-}
-
-/****************************************
- * service apis *
- ****************************************/
-
-static struct ptlrpc_svc_ctx plain_svc_ctx = {
- .sc_refcount = ATOMIC_INIT(1),
- .sc_policy = &plain_policy,
-};
-
-static
-int plain_accept(struct ptlrpc_request *req)
-{
- struct lustre_msg *msg = req->rq_reqbuf;
- struct plain_header *phdr;
- int swabbed;
-
- LASSERT(SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) ==
- SPTLRPC_POLICY_PLAIN);
-
- if (SPTLRPC_FLVR_BASE(req->rq_flvr.sf_rpc) !=
- SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_PLAIN) ||
- SPTLRPC_FLVR_BULK_TYPE(req->rq_flvr.sf_rpc) !=
- SPTLRPC_FLVR_BULK_TYPE(SPTLRPC_FLVR_PLAIN)) {
- CERROR("Invalid rpc flavor %x\n", req->rq_flvr.sf_rpc);
- return SECSVC_DROP;
- }
-
- if (msg->lm_bufcount < PLAIN_PACK_SEGMENTS) {
- CERROR("unexpected request buf count %u\n", msg->lm_bufcount);
- return SECSVC_DROP;
- }
-
- swabbed = ptlrpc_req_need_swab(req);
-
- phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, sizeof(*phdr));
- if (!phdr) {
- CERROR("missing plain header\n");
- return -EPROTO;
- }
-
- if (phdr->ph_ver != 0) {
- CERROR("Invalid header version\n");
- return -EPROTO;
- }
-
- if (phdr->ph_bulk_hash_alg >= BULK_HASH_ALG_MAX) {
- CERROR("invalid hash algorithm: %u\n", phdr->ph_bulk_hash_alg);
- return -EPROTO;
- }
-
- req->rq_sp_from = phdr->ph_sp;
- req->rq_flvr.u_bulk.hash.hash_alg = phdr->ph_bulk_hash_alg;
-
- if (phdr->ph_flags & PLAIN_FL_USER) {
- if (sptlrpc_unpack_user_desc(msg, PLAIN_PACK_USER_OFF,
- swabbed)) {
- CERROR("Mal-formed user descriptor\n");
- return SECSVC_DROP;
- }
-
- req->rq_pack_udesc = 1;
- req->rq_user_desc = lustre_msg_buf(msg, PLAIN_PACK_USER_OFF, 0);
- }
-
- if (phdr->ph_flags & PLAIN_FL_BULK) {
- if (plain_unpack_bsd(msg, swabbed))
- return SECSVC_DROP;
-
- req->rq_pack_bulk = 1;
- }
-
- req->rq_reqmsg = lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0);
- req->rq_reqlen = msg->lm_buflens[PLAIN_PACK_MSG_OFF];
-
- req->rq_svc_ctx = &plain_svc_ctx;
- atomic_inc(&req->rq_svc_ctx->sc_refcount);
-
- return SECSVC_OK;
-}
-
-static
-int plain_alloc_rs(struct ptlrpc_request *req, int msgsize)
-{
- struct ptlrpc_reply_state *rs;
- __u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, };
- int rs_size = sizeof(*rs);
-
- LASSERT(msgsize % 8 == 0);
-
- buflens[PLAIN_PACK_HDR_OFF] = sizeof(struct plain_header);
- buflens[PLAIN_PACK_MSG_OFF] = msgsize;
-
- if (req->rq_pack_bulk && (req->rq_bulk_read || req->rq_bulk_write))
- buflens[PLAIN_PACK_BULK_OFF] = PLAIN_BSD_SIZE;
-
- rs_size += lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens);
-
- rs = req->rq_reply_state;
-
- if (rs) {
- /* pre-allocated */
- LASSERT(rs->rs_size >= rs_size);
- } else {
- rs = kvzalloc(rs_size, GFP_NOFS);
- if (!rs)
- return -ENOMEM;
-
- rs->rs_size = rs_size;
- }
-
- rs->rs_svc_ctx = req->rq_svc_ctx;
- atomic_inc(&req->rq_svc_ctx->sc_refcount);
- rs->rs_repbuf = (struct lustre_msg *)(rs + 1);
- rs->rs_repbuf_len = rs_size - sizeof(*rs);
-
- lustre_init_msg_v2(rs->rs_repbuf, PLAIN_PACK_SEGMENTS, buflens, NULL);
- rs->rs_msg = lustre_msg_buf_v2(rs->rs_repbuf, PLAIN_PACK_MSG_OFF, 0);
-
- req->rq_reply_state = rs;
- return 0;
-}
-
-static
-void plain_free_rs(struct ptlrpc_reply_state *rs)
-{
- LASSERT(atomic_read(&rs->rs_svc_ctx->sc_refcount) > 1);
- atomic_dec(&rs->rs_svc_ctx->sc_refcount);
-
- if (!rs->rs_prealloc)
- kvfree(rs);
-}
-
-static
-int plain_authorize(struct ptlrpc_request *req)
-{
- struct ptlrpc_reply_state *rs = req->rq_reply_state;
- struct lustre_msg_v2 *msg = rs->rs_repbuf;
- struct plain_header *phdr;
- int len;
-
- LASSERT(rs);
- LASSERT(msg);
-
- if (req->rq_replen != msg->lm_buflens[PLAIN_PACK_MSG_OFF])
- len = lustre_shrink_msg(msg, PLAIN_PACK_MSG_OFF,
- req->rq_replen, 1);
- else
- len = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
-
- msg->lm_secflvr = req->rq_flvr.sf_rpc;
-
- phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, 0);
- phdr->ph_ver = 0;
- phdr->ph_flags = 0;
- phdr->ph_bulk_hash_alg = req->rq_flvr.u_bulk.hash.hash_alg;
-
- if (req->rq_pack_bulk)
- phdr->ph_flags |= PLAIN_FL_BULK;
-
- rs->rs_repdata_len = len;
-
- if (likely(req->rq_packed_final)) {
- if (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)
- req->rq_reply_off = plain_at_offset;
- else
- req->rq_reply_off = 0;
- } else {
- unsigned int hsize = 4;
-
- cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32,
- lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF,
- 0),
- lustre_msg_buflen(msg,
- PLAIN_PACK_MSG_OFF),
- NULL, 0, (unsigned char *)&msg->lm_cksum,
- &hsize);
- req->rq_reply_off = 0;
- }
-
- return 0;
-}
-
-static
-int plain_svc_unwrap_bulk(struct ptlrpc_request *req,
- struct ptlrpc_bulk_desc *desc)
-{
- struct ptlrpc_reply_state *rs = req->rq_reply_state;
- struct ptlrpc_bulk_sec_desc *bsdr, *bsdv;
- struct plain_bulk_token *tokenr;
- int rc;
-
- LASSERT(req->rq_bulk_write);
- LASSERT(req->rq_pack_bulk);
-
- bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
- tokenr = (struct plain_bulk_token *)bsdr->bsd_data;
- bsdv = lustre_msg_buf(rs->rs_repbuf, PLAIN_PACK_BULK_OFF, 0);
-
- bsdv->bsd_version = 0;
- bsdv->bsd_type = SPTLRPC_BULK_DEFAULT;
- bsdv->bsd_svc = bsdr->bsd_svc;
- bsdv->bsd_flags = 0;
-
- if (bsdr->bsd_svc == SPTLRPC_BULK_SVC_NULL)
- return 0;
-
- rc = plain_verify_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
- tokenr);
- if (rc) {
- bsdv->bsd_flags |= BSD_FL_ERR;
- CERROR("bulk write: server verify failed: %d\n", rc);
- }
-
- return rc;
-}
-
-static
-int plain_svc_wrap_bulk(struct ptlrpc_request *req,
- struct ptlrpc_bulk_desc *desc)
-{
- struct ptlrpc_reply_state *rs = req->rq_reply_state;
- struct ptlrpc_bulk_sec_desc *bsdr, *bsdv;
- struct plain_bulk_token *tokenv;
- int rc;
-
- LASSERT(req->rq_bulk_read);
- LASSERT(req->rq_pack_bulk);
-
- bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
- bsdv = lustre_msg_buf(rs->rs_repbuf, PLAIN_PACK_BULK_OFF, 0);
- tokenv = (struct plain_bulk_token *)bsdv->bsd_data;
-
- bsdv->bsd_version = 0;
- bsdv->bsd_type = SPTLRPC_BULK_DEFAULT;
- bsdv->bsd_svc = bsdr->bsd_svc;
- bsdv->bsd_flags = 0;
-
- if (bsdr->bsd_svc == SPTLRPC_BULK_SVC_NULL)
- return 0;
-
- rc = plain_generate_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
- tokenv);
- if (rc) {
- CERROR("bulk read: server failed to compute checksum: %d\n",
- rc);
- } else {
- if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_RECEIVE))
- corrupt_bulk_data(desc);
- }
-
- return rc;
-}
-
-static struct ptlrpc_ctx_ops plain_ctx_ops = {
- .refresh = plain_ctx_refresh,
- .validate = plain_ctx_validate,
- .sign = plain_ctx_sign,
- .verify = plain_ctx_verify,
- .wrap_bulk = plain_cli_wrap_bulk,
- .unwrap_bulk = plain_cli_unwrap_bulk,
-};
-
-static struct ptlrpc_sec_cops plain_sec_cops = {
- .create_sec = plain_create_sec,
- .destroy_sec = plain_destroy_sec,
- .kill_sec = plain_kill_sec,
- .lookup_ctx = plain_lookup_ctx,
- .release_ctx = plain_release_ctx,
- .flush_ctx_cache = plain_flush_ctx_cache,
- .alloc_reqbuf = plain_alloc_reqbuf,
- .free_reqbuf = plain_free_reqbuf,
- .alloc_repbuf = plain_alloc_repbuf,
- .free_repbuf = plain_free_repbuf,
- .enlarge_reqbuf = plain_enlarge_reqbuf,
-};
-
-static struct ptlrpc_sec_sops plain_sec_sops = {
- .accept = plain_accept,
- .alloc_rs = plain_alloc_rs,
- .authorize = plain_authorize,
- .free_rs = plain_free_rs,
- .unwrap_bulk = plain_svc_unwrap_bulk,
- .wrap_bulk = plain_svc_wrap_bulk,
-};
-
-static struct ptlrpc_sec_policy plain_policy = {
- .sp_owner = THIS_MODULE,
- .sp_name = "plain",
- .sp_policy = SPTLRPC_POLICY_PLAIN,
- .sp_cops = &plain_sec_cops,
- .sp_sops = &plain_sec_sops,
-};
-
-int sptlrpc_plain_init(void)
-{
- __u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, };
- int rc;
-
- buflens[PLAIN_PACK_MSG_OFF] = lustre_msg_early_size();
- plain_at_offset = lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens);
-
- rc = sptlrpc_register_policy(&plain_policy);
- if (rc)
- CERROR("failed to register: %d\n", rc);
-
- return rc;
-}
-
-void sptlrpc_plain_fini(void)
-{
- int rc;
-
- rc = sptlrpc_unregister_policy(&plain_policy);
- if (rc)
- CERROR("cannot unregister: %d\n", rc);
-}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/service.c b/drivers/staging/lustre/lustre/ptlrpc/service.c
deleted file mode 100644
index 3fd8c746f460..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/service.c
+++ /dev/null
@@ -1,2807 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2010, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_RPC
-
-#include <linux/kthread.h>
-#include <obd_support.h>
-#include <obd_class.h>
-#include <lustre_net.h>
-#include <lu_object.h>
-#include <uapi/linux/lnet/lnet-types.h>
-#include "ptlrpc_internal.h"
-#include <linux/libcfs/libcfs_cpu.h>
-#include <linux/libcfs/libcfs_string.h>
-
-/* The following are visible and mutable through /sys/module/ptlrpc */
-int test_req_buffer_pressure;
-module_param(test_req_buffer_pressure, int, 0444);
-MODULE_PARM_DESC(test_req_buffer_pressure, "set non-zero to put pressure on request buffer pools");
-module_param(at_min, int, 0644);
-MODULE_PARM_DESC(at_min, "Adaptive timeout minimum (sec)");
-module_param(at_max, int, 0644);
-MODULE_PARM_DESC(at_max, "Adaptive timeout maximum (sec)");
-module_param(at_history, int, 0644);
-MODULE_PARM_DESC(at_history,
- "Adaptive timeouts remember the slowest event that took place within this period (sec)");
-module_param(at_early_margin, int, 0644);
-MODULE_PARM_DESC(at_early_margin, "How soon before an RPC deadline to send an early reply");
-module_param(at_extra, int, 0644);
-MODULE_PARM_DESC(at_extra, "How much extra time to give with each early reply");
-
-/* forward ref */
-static int ptlrpc_server_post_idle_rqbds(struct ptlrpc_service_part *svcpt);
-static void ptlrpc_server_hpreq_fini(struct ptlrpc_request *req);
-static void ptlrpc_at_remove_timed(struct ptlrpc_request *req);
-
-/** Holds a list of all PTLRPC services */
-LIST_HEAD(ptlrpc_all_services);
-/** Used to protect the \e ptlrpc_all_services list */
-struct mutex ptlrpc_all_services_mutex;
-
-static struct ptlrpc_request_buffer_desc *
-ptlrpc_alloc_rqbd(struct ptlrpc_service_part *svcpt)
-{
- struct ptlrpc_service *svc = svcpt->scp_service;
- struct ptlrpc_request_buffer_desc *rqbd;
-
- rqbd = kzalloc_node(sizeof(*rqbd), GFP_NOFS,
- cfs_cpt_spread_node(svc->srv_cptable,
- svcpt->scp_cpt));
- if (!rqbd)
- return NULL;
-
- rqbd->rqbd_svcpt = svcpt;
- rqbd->rqbd_refcount = 0;
- rqbd->rqbd_cbid.cbid_fn = request_in_callback;
- rqbd->rqbd_cbid.cbid_arg = rqbd;
- INIT_LIST_HEAD(&rqbd->rqbd_reqs);
- rqbd->rqbd_buffer = kvzalloc_node(svc->srv_buf_size, GFP_KERNEL,
- cfs_cpt_spread_node(svc->srv_cptable,
- svcpt->scp_cpt));
-
- if (!rqbd->rqbd_buffer) {
- kfree(rqbd);
- return NULL;
- }
-
- spin_lock(&svcpt->scp_lock);
- list_add(&rqbd->rqbd_list, &svcpt->scp_rqbd_idle);
- svcpt->scp_nrqbds_total++;
- spin_unlock(&svcpt->scp_lock);
-
- return rqbd;
-}
-
-static void
-ptlrpc_free_rqbd(struct ptlrpc_request_buffer_desc *rqbd)
-{
- struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt;
-
- LASSERT(rqbd->rqbd_refcount == 0);
- LASSERT(list_empty(&rqbd->rqbd_reqs));
-
- spin_lock(&svcpt->scp_lock);
- list_del(&rqbd->rqbd_list);
- svcpt->scp_nrqbds_total--;
- spin_unlock(&svcpt->scp_lock);
-
- kvfree(rqbd->rqbd_buffer);
- kfree(rqbd);
-}
-
-static int
-ptlrpc_grow_req_bufs(struct ptlrpc_service_part *svcpt, int post)
-{
- struct ptlrpc_service *svc = svcpt->scp_service;
- struct ptlrpc_request_buffer_desc *rqbd;
- int rc = 0;
- int i;
-
- if (svcpt->scp_rqbd_allocating)
- goto try_post;
-
- spin_lock(&svcpt->scp_lock);
- /* check again with lock */
- if (svcpt->scp_rqbd_allocating) {
- /* NB: we might allow more than one thread in the future */
- LASSERT(svcpt->scp_rqbd_allocating == 1);
- spin_unlock(&svcpt->scp_lock);
- goto try_post;
- }
-
- svcpt->scp_rqbd_allocating++;
- spin_unlock(&svcpt->scp_lock);
-
- for (i = 0; i < svc->srv_nbuf_per_group; i++) {
- /* NB: another thread might have recycled enough rqbds, we
- * need to make sure it wouldn't over-allocate, see LU-1212.
- */
- if (svcpt->scp_nrqbds_posted >= svc->srv_nbuf_per_group)
- break;
-
- rqbd = ptlrpc_alloc_rqbd(svcpt);
-
- if (!rqbd) {
- CERROR("%s: Can't allocate request buffer\n",
- svc->srv_name);
- rc = -ENOMEM;
- break;
- }
- }
-
- spin_lock(&svcpt->scp_lock);
-
- LASSERT(svcpt->scp_rqbd_allocating == 1);
- svcpt->scp_rqbd_allocating--;
-
- spin_unlock(&svcpt->scp_lock);
-
- CDEBUG(D_RPCTRACE,
- "%s: allocate %d new %d-byte reqbufs (%d/%d left), rc = %d\n",
- svc->srv_name, i, svc->srv_buf_size, svcpt->scp_nrqbds_posted,
- svcpt->scp_nrqbds_total, rc);
-
- try_post:
- if (post && rc == 0)
- rc = ptlrpc_server_post_idle_rqbds(svcpt);
-
- return rc;
-}
-
-struct ptlrpc_hr_partition;
-
-struct ptlrpc_hr_thread {
- int hrt_id; /* thread ID */
- spinlock_t hrt_lock;
- wait_queue_head_t hrt_waitq;
- struct list_head hrt_queue; /* RS queue */
- struct ptlrpc_hr_partition *hrt_partition;
-};
-
-struct ptlrpc_hr_partition {
- /* # of started threads */
- atomic_t hrp_nstarted;
- /* # of stopped threads */
- atomic_t hrp_nstopped;
- /* cpu partition id */
- int hrp_cpt;
- /* round-robin rotor for choosing thread */
- int hrp_rotor;
- /* total number of threads on this partition */
- int hrp_nthrs;
- /* threads table */
- struct ptlrpc_hr_thread *hrp_thrs;
-};
-
-#define HRT_RUNNING 0
-#define HRT_STOPPING 1
-
-struct ptlrpc_hr_service {
- /* CPU partition table, it's just cfs_cpt_tab for now */
- struct cfs_cpt_table *hr_cpt_table;
- /** controller sleep waitq */
- wait_queue_head_t hr_waitq;
- unsigned int hr_stopping;
- /** roundrobin rotor for non-affinity service */
- unsigned int hr_rotor;
- /* partition data */
- struct ptlrpc_hr_partition **hr_partitions;
-};
-
-/** reply handling service. */
-static struct ptlrpc_hr_service ptlrpc_hr;
-
-/**
- * Choose an hr thread to dispatch requests to.
- */
-static struct ptlrpc_hr_thread *
-ptlrpc_hr_select(struct ptlrpc_service_part *svcpt)
-{
- struct ptlrpc_hr_partition *hrp;
- unsigned int rotor;
-
- if (svcpt->scp_cpt >= 0 &&
- svcpt->scp_service->srv_cptable == ptlrpc_hr.hr_cpt_table) {
- /* directly match partition */
- hrp = ptlrpc_hr.hr_partitions[svcpt->scp_cpt];
-
- } else {
- rotor = ptlrpc_hr.hr_rotor++;
- rotor %= cfs_cpt_number(ptlrpc_hr.hr_cpt_table);
-
- hrp = ptlrpc_hr.hr_partitions[rotor];
- }
-
- rotor = hrp->hrp_rotor++;
- return &hrp->hrp_thrs[rotor % hrp->hrp_nthrs];
-}
-
-/**
- * Put reply state into a queue for processing because we received
- * ACK from the client
- */
-void ptlrpc_dispatch_difficult_reply(struct ptlrpc_reply_state *rs)
-{
- struct ptlrpc_hr_thread *hrt;
-
- LASSERT(list_empty(&rs->rs_list));
-
- hrt = ptlrpc_hr_select(rs->rs_svcpt);
-
- spin_lock(&hrt->hrt_lock);
- list_add_tail(&rs->rs_list, &hrt->hrt_queue);
- spin_unlock(&hrt->hrt_lock);
-
- wake_up(&hrt->hrt_waitq);
-}
-
-void
-ptlrpc_schedule_difficult_reply(struct ptlrpc_reply_state *rs)
-{
- assert_spin_locked(&rs->rs_svcpt->scp_rep_lock);
- assert_spin_locked(&rs->rs_lock);
- LASSERT(rs->rs_difficult);
- rs->rs_scheduled_ever = 1; /* flag any notification attempt */
-
- if (rs->rs_scheduled) { /* being set up or already notified */
- return;
- }
-
- rs->rs_scheduled = 1;
- list_del_init(&rs->rs_list);
- ptlrpc_dispatch_difficult_reply(rs);
-}
-EXPORT_SYMBOL(ptlrpc_schedule_difficult_reply);
-
-static int
-ptlrpc_server_post_idle_rqbds(struct ptlrpc_service_part *svcpt)
-{
- struct ptlrpc_request_buffer_desc *rqbd;
- int rc;
- int posted = 0;
-
- for (;;) {
- spin_lock(&svcpt->scp_lock);
-
- if (list_empty(&svcpt->scp_rqbd_idle)) {
- spin_unlock(&svcpt->scp_lock);
- return posted;
- }
-
- rqbd = list_entry(svcpt->scp_rqbd_idle.next,
- struct ptlrpc_request_buffer_desc,
- rqbd_list);
- list_del(&rqbd->rqbd_list);
-
- /* assume we will post successfully */
- svcpt->scp_nrqbds_posted++;
- list_add(&rqbd->rqbd_list, &svcpt->scp_rqbd_posted);
-
- spin_unlock(&svcpt->scp_lock);
-
- rc = ptlrpc_register_rqbd(rqbd);
- if (rc != 0)
- break;
-
- posted = 1;
- }
-
- spin_lock(&svcpt->scp_lock);
-
- svcpt->scp_nrqbds_posted--;
- list_del(&rqbd->rqbd_list);
- list_add_tail(&rqbd->rqbd_list, &svcpt->scp_rqbd_idle);
-
- /* Don't complain if no request buffers are posted right now; LNET
- * won't drop requests because we set the portal lazy!
- */
-
- spin_unlock(&svcpt->scp_lock);
-
- return -1;
-}
-
-static void ptlrpc_at_timer(struct timer_list *t)
-{
- struct ptlrpc_service_part *svcpt;
-
- svcpt = from_timer(svcpt, t, scp_at_timer);
-
- svcpt->scp_at_check = 1;
- svcpt->scp_at_checktime = jiffies;
- wake_up(&svcpt->scp_waitq);
-}
-
-static void
-ptlrpc_server_nthreads_check(struct ptlrpc_service *svc,
- struct ptlrpc_service_conf *conf)
-{
- struct ptlrpc_service_thr_conf *tc = &conf->psc_thr;
- unsigned int init;
- unsigned int total;
- unsigned int nthrs;
- int weight;
-
- /*
- * Common code for estimating & validating threads number.
- * CPT affinity service could have percpt thread-pool instead
- * of a global thread-pool, which means user might not always
- * get the threads number they give it in conf::tc_nthrs_user
- * even they did set. It's because we need to validate threads
- * number for each CPT to guarantee each pool will have enough
- * threads to keep the service healthy.
- */
- init = PTLRPC_NTHRS_INIT + (svc->srv_ops.so_hpreq_handler != NULL);
- init = max_t(int, init, tc->tc_nthrs_init);
-
- /* NB: please see comments in lustre_lnet.h for definition
- * details of these members
- */
- LASSERT(tc->tc_nthrs_max != 0);
-
- if (tc->tc_nthrs_user != 0) {
- /* In case there is a reason to test a service with many
- * threads, we give a less strict check here, it can
- * be up to 8 * nthrs_max
- */
- total = min(tc->tc_nthrs_max * 8, tc->tc_nthrs_user);
- nthrs = total / svc->srv_ncpts;
- init = max(init, nthrs);
- goto out;
- }
-
- total = tc->tc_nthrs_max;
- if (tc->tc_nthrs_base == 0) {
- /* don't care about base threads number per partition,
- * this is most for non-affinity service
- */
- nthrs = total / svc->srv_ncpts;
- goto out;
- }
-
- nthrs = tc->tc_nthrs_base;
- if (svc->srv_ncpts == 1) {
- int i;
-
- /* NB: Increase the base number if it's single partition
- * and total number of cores/HTs is larger or equal to 4.
- * result will always < 2 * nthrs_base
- */
- weight = cfs_cpt_weight(svc->srv_cptable, CFS_CPT_ANY);
- for (i = 1; (weight >> (i + 1)) != 0 && /* >= 4 cores/HTs */
- (tc->tc_nthrs_base >> i) != 0; i++)
- nthrs += tc->tc_nthrs_base >> i;
- }
-
- if (tc->tc_thr_factor != 0) {
- int factor = tc->tc_thr_factor;
- const int fade = 4;
-
- /*
- * User wants to increase number of threads with for
- * each CPU core/HT, most likely the factor is larger then
- * one thread/core because service threads are supposed to
- * be blocked by lock or wait for IO.
- */
- /*
- * Amdahl's law says that adding processors wouldn't give
- * a linear increasing of parallelism, so it's nonsense to
- * have too many threads no matter how many cores/HTs
- * there are.
- */
- /* weight is # of HTs */
- if (cpumask_weight(topology_sibling_cpumask(0)) > 1) {
- /* depress thread factor for hyper-thread */
- factor = factor - (factor >> 1) + (factor >> 3);
- }
-
- weight = cfs_cpt_weight(svc->srv_cptable, 0);
- LASSERT(weight > 0);
-
- for (; factor > 0 && weight > 0; factor--, weight -= fade)
- nthrs += min(weight, fade) * factor;
- }
-
- if (nthrs * svc->srv_ncpts > tc->tc_nthrs_max) {
- nthrs = max(tc->tc_nthrs_base,
- tc->tc_nthrs_max / svc->srv_ncpts);
- }
- out:
- nthrs = max(nthrs, tc->tc_nthrs_init);
- svc->srv_nthrs_cpt_limit = nthrs;
- svc->srv_nthrs_cpt_init = init;
-
- if (nthrs * svc->srv_ncpts > tc->tc_nthrs_max) {
- CDEBUG(D_OTHER, "%s: This service may have more threads (%d) than the given soft limit (%d)\n",
- svc->srv_name, nthrs * svc->srv_ncpts,
- tc->tc_nthrs_max);
- }
-}
-
-/**
- * Initialize percpt data for a service
- */
-static int
-ptlrpc_service_part_init(struct ptlrpc_service *svc,
- struct ptlrpc_service_part *svcpt, int cpt)
-{
- struct ptlrpc_at_array *array;
- int size;
- int index;
- int rc;
-
- svcpt->scp_cpt = cpt;
- INIT_LIST_HEAD(&svcpt->scp_threads);
-
- /* rqbd and incoming request queue */
- spin_lock_init(&svcpt->scp_lock);
- INIT_LIST_HEAD(&svcpt->scp_rqbd_idle);
- INIT_LIST_HEAD(&svcpt->scp_rqbd_posted);
- INIT_LIST_HEAD(&svcpt->scp_req_incoming);
- init_waitqueue_head(&svcpt->scp_waitq);
- /* history request & rqbd list */
- INIT_LIST_HEAD(&svcpt->scp_hist_reqs);
- INIT_LIST_HEAD(&svcpt->scp_hist_rqbds);
-
- /* active requests and hp requests */
- spin_lock_init(&svcpt->scp_req_lock);
-
- /* reply states */
- spin_lock_init(&svcpt->scp_rep_lock);
- INIT_LIST_HEAD(&svcpt->scp_rep_active);
- INIT_LIST_HEAD(&svcpt->scp_rep_idle);
- init_waitqueue_head(&svcpt->scp_rep_waitq);
- atomic_set(&svcpt->scp_nreps_difficult, 0);
-
- /* adaptive timeout */
- spin_lock_init(&svcpt->scp_at_lock);
- array = &svcpt->scp_at_array;
-
- size = at_est2timeout(at_max);
- array->paa_size = size;
- array->paa_count = 0;
- array->paa_deadline = -1;
-
- /* allocate memory for scp_at_array (ptlrpc_at_array) */
- array->paa_reqs_array =
- kzalloc_node(sizeof(struct list_head) * size, GFP_NOFS,
- cfs_cpt_spread_node(svc->srv_cptable, cpt));
- if (!array->paa_reqs_array)
- return -ENOMEM;
-
- for (index = 0; index < size; index++)
- INIT_LIST_HEAD(&array->paa_reqs_array[index]);
-
- array->paa_reqs_count =
- kzalloc_node(sizeof(__u32) * size, GFP_NOFS,
- cfs_cpt_spread_node(svc->srv_cptable, cpt));
- if (!array->paa_reqs_count)
- goto free_reqs_array;
-
- timer_setup(&svcpt->scp_at_timer, ptlrpc_at_timer, 0);
-
- /* At SOW, service time should be quick; 10s seems generous. If client
- * timeout is less than this, we'll be sending an early reply.
- */
- at_init(&svcpt->scp_at_estimate, 10, 0);
-
- /* assign this before call ptlrpc_grow_req_bufs */
- svcpt->scp_service = svc;
- /* Now allocate the request buffers, but don't post them now */
- rc = ptlrpc_grow_req_bufs(svcpt, 0);
- /* We shouldn't be under memory pressure at startup, so
- * fail if we can't allocate all our buffers at this time.
- */
- if (rc != 0)
- goto free_reqs_count;
-
- return 0;
-
-free_reqs_count:
- kfree(array->paa_reqs_count);
- array->paa_reqs_count = NULL;
-free_reqs_array:
- kfree(array->paa_reqs_array);
- array->paa_reqs_array = NULL;
-
- return -ENOMEM;
-}
-
-/**
- * Initialize service on a given portal.
- * This includes starting serving threads , allocating and posting rqbds and
- * so on.
- */
-struct ptlrpc_service *
-ptlrpc_register_service(struct ptlrpc_service_conf *conf,
- struct kset *parent,
- struct dentry *debugfs_entry)
-{
- struct ptlrpc_service_cpt_conf *cconf = &conf->psc_cpt;
- struct ptlrpc_service *service;
- struct ptlrpc_service_part *svcpt;
- struct cfs_cpt_table *cptable;
- __u32 *cpts = NULL;
- int ncpts;
- int cpt;
- int rc;
- int i;
-
- LASSERT(conf->psc_buf.bc_nbufs > 0);
- LASSERT(conf->psc_buf.bc_buf_size >=
- conf->psc_buf.bc_req_max_size + SPTLRPC_MAX_PAYLOAD);
- LASSERT(conf->psc_thr.tc_ctx_tags != 0);
-
- cptable = cconf->cc_cptable;
- if (!cptable)
- cptable = cfs_cpt_tab;
-
- if (!conf->psc_thr.tc_cpu_affinity) {
- ncpts = 1;
- } else {
- ncpts = cfs_cpt_number(cptable);
- if (cconf->cc_pattern) {
- struct cfs_expr_list *el;
-
- rc = cfs_expr_list_parse(cconf->cc_pattern,
- strlen(cconf->cc_pattern),
- 0, ncpts - 1, &el);
- if (rc != 0) {
- CERROR("%s: invalid CPT pattern string: %s",
- conf->psc_name, cconf->cc_pattern);
- return ERR_PTR(-EINVAL);
- }
-
- rc = cfs_expr_list_values(el, ncpts, &cpts);
- cfs_expr_list_free(el);
- if (rc <= 0) {
- CERROR("%s: failed to parse CPT array %s: %d\n",
- conf->psc_name, cconf->cc_pattern, rc);
- kfree(cpts);
- return ERR_PTR(rc < 0 ? rc : -EINVAL);
- }
- ncpts = rc;
- }
- }
-
- service = kzalloc(offsetof(struct ptlrpc_service, srv_parts[ncpts]),
- GFP_NOFS);
- if (!service) {
- kfree(cpts);
- return ERR_PTR(-ENOMEM);
- }
-
- service->srv_cptable = cptable;
- service->srv_cpts = cpts;
- service->srv_ncpts = ncpts;
-
- service->srv_cpt_bits = 0; /* it's zero already, easy to read... */
- while ((1 << service->srv_cpt_bits) < cfs_cpt_number(cptable))
- service->srv_cpt_bits++;
-
- /* public members */
- spin_lock_init(&service->srv_lock);
- service->srv_name = conf->psc_name;
- service->srv_watchdog_factor = conf->psc_watchdog_factor;
- INIT_LIST_HEAD(&service->srv_list); /* for safety of cleanup */
-
- /* buffer configuration */
- service->srv_nbuf_per_group = test_req_buffer_pressure ?
- 1 : conf->psc_buf.bc_nbufs;
- service->srv_max_req_size = conf->psc_buf.bc_req_max_size +
- SPTLRPC_MAX_PAYLOAD;
- service->srv_buf_size = conf->psc_buf.bc_buf_size;
- service->srv_rep_portal = conf->psc_buf.bc_rep_portal;
- service->srv_req_portal = conf->psc_buf.bc_req_portal;
-
- /* Increase max reply size to next power of two */
- service->srv_max_reply_size = 1;
- while (service->srv_max_reply_size <
- conf->psc_buf.bc_rep_max_size + SPTLRPC_MAX_PAYLOAD)
- service->srv_max_reply_size <<= 1;
-
- service->srv_thread_name = conf->psc_thr.tc_thr_name;
- service->srv_ctx_tags = conf->psc_thr.tc_ctx_tags;
- service->srv_hpreq_ratio = PTLRPC_SVC_HP_RATIO;
- service->srv_ops = conf->psc_ops;
-
- for (i = 0; i < ncpts; i++) {
- if (!conf->psc_thr.tc_cpu_affinity)
- cpt = CFS_CPT_ANY;
- else
- cpt = cpts ? cpts[i] : i;
-
- svcpt = kzalloc_node(sizeof(*svcpt), GFP_NOFS,
- cfs_cpt_spread_node(cptable, cpt));
- if (!svcpt) {
- rc = -ENOMEM;
- goto failed;
- }
-
- service->srv_parts[i] = svcpt;
- rc = ptlrpc_service_part_init(service, svcpt, cpt);
- if (rc != 0)
- goto failed;
- }
-
- ptlrpc_server_nthreads_check(service, conf);
-
- rc = LNetSetLazyPortal(service->srv_req_portal);
- LASSERT(rc == 0);
-
- mutex_lock(&ptlrpc_all_services_mutex);
- list_add(&service->srv_list, &ptlrpc_all_services);
- mutex_unlock(&ptlrpc_all_services_mutex);
-
- if (parent) {
- rc = ptlrpc_sysfs_register_service(parent, service);
- if (rc)
- goto failed;
- }
-
- if (!IS_ERR_OR_NULL(debugfs_entry))
- ptlrpc_ldebugfs_register_service(debugfs_entry, service);
-
- rc = ptlrpc_service_nrs_setup(service);
- if (rc != 0)
- goto failed;
-
- CDEBUG(D_NET, "%s: Started, listening on portal %d\n",
- service->srv_name, service->srv_req_portal);
-
- rc = ptlrpc_start_threads(service);
- if (rc != 0) {
- CERROR("Failed to start threads for service %s: %d\n",
- service->srv_name, rc);
- goto failed;
- }
-
- return service;
-failed:
- ptlrpc_unregister_service(service);
- return ERR_PTR(rc);
-}
-EXPORT_SYMBOL(ptlrpc_register_service);
-
-/**
- * to actually free the request, must be called without holding svc_lock.
- * note it's caller's responsibility to unlink req->rq_list.
- */
-static void ptlrpc_server_free_request(struct ptlrpc_request *req)
-{
- LASSERT(atomic_read(&req->rq_refcount) == 0);
- LASSERT(list_empty(&req->rq_timed_list));
-
- /* DEBUG_REQ() assumes the reply state of a request with a valid
- * ref will not be destroyed until that reference is dropped.
- */
- ptlrpc_req_drop_rs(req);
-
- sptlrpc_svc_ctx_decref(req);
-
- if (req != &req->rq_rqbd->rqbd_req) {
- /* NB request buffers use an embedded
- * req if the incoming req unlinked the
- * MD; this isn't one of them!
- */
- ptlrpc_request_cache_free(req);
- }
-}
-
-/**
- * drop a reference count of the request. if it reaches 0, we either
- * put it into history list, or free it immediately.
- */
-static void ptlrpc_server_drop_request(struct ptlrpc_request *req)
-{
- struct ptlrpc_request_buffer_desc *rqbd = req->rq_rqbd;
- struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt;
- struct ptlrpc_service *svc = svcpt->scp_service;
- int refcount;
-
- if (!atomic_dec_and_test(&req->rq_refcount))
- return;
-
- if (req->rq_at_linked) {
- spin_lock(&svcpt->scp_at_lock);
- /* recheck with lock, in case it's unlinked by
- * ptlrpc_at_check_timed()
- */
- if (likely(req->rq_at_linked))
- ptlrpc_at_remove_timed(req);
- spin_unlock(&svcpt->scp_at_lock);
- }
-
- LASSERT(list_empty(&req->rq_timed_list));
-
- /* finalize request */
- if (req->rq_export) {
- class_export_put(req->rq_export);
- req->rq_export = NULL;
- }
-
- spin_lock(&svcpt->scp_lock);
-
- list_add(&req->rq_list, &rqbd->rqbd_reqs);
-
- refcount = --(rqbd->rqbd_refcount);
- if (refcount == 0) {
- /* request buffer is now idle: add to history */
- list_del(&rqbd->rqbd_list);
-
- list_add_tail(&rqbd->rqbd_list, &svcpt->scp_hist_rqbds);
- svcpt->scp_hist_nrqbds++;
-
- /* cull some history?
- * I expect only about 1 or 2 rqbds need to be recycled here
- */
- while (svcpt->scp_hist_nrqbds > svc->srv_hist_nrqbds_cpt_max) {
- rqbd = list_entry(svcpt->scp_hist_rqbds.next,
- struct ptlrpc_request_buffer_desc,
- rqbd_list);
-
- list_del(&rqbd->rqbd_list);
- svcpt->scp_hist_nrqbds--;
-
- /* remove rqbd's reqs from svc's req history while
- * I've got the service lock
- */
- list_for_each_entry(req, &rqbd->rqbd_reqs, rq_list) {
- /* Track the highest culled req seq */
- if (req->rq_history_seq >
- svcpt->scp_hist_seq_culled) {
- svcpt->scp_hist_seq_culled =
- req->rq_history_seq;
- }
- list_del(&req->rq_history_list);
- }
-
- spin_unlock(&svcpt->scp_lock);
-
- while ((req = list_first_entry_or_null(
- &rqbd->rqbd_reqs,
- struct ptlrpc_request, rq_list))) {
- list_del(&req->rq_list);
- ptlrpc_server_free_request(req);
- }
-
- spin_lock(&svcpt->scp_lock);
- /*
- * now all reqs including the embedded req has been
- * disposed, schedule request buffer for re-use.
- */
- LASSERT(atomic_read(&rqbd->rqbd_req.rq_refcount) ==
- 0);
- list_add_tail(&rqbd->rqbd_list, &svcpt->scp_rqbd_idle);
- }
-
- spin_unlock(&svcpt->scp_lock);
- } else if (req->rq_reply_state && req->rq_reply_state->rs_prealloc) {
- /* If we are low on memory, we are not interested in history */
- list_del(&req->rq_list);
- list_del_init(&req->rq_history_list);
-
- /* Track the highest culled req seq */
- if (req->rq_history_seq > svcpt->scp_hist_seq_culled)
- svcpt->scp_hist_seq_culled = req->rq_history_seq;
-
- spin_unlock(&svcpt->scp_lock);
-
- ptlrpc_server_free_request(req);
- } else {
- spin_unlock(&svcpt->scp_lock);
- }
-}
-
-/**
- * to finish a request: stop sending more early replies, and release
- * the request.
- */
-static void ptlrpc_server_finish_request(struct ptlrpc_service_part *svcpt,
- struct ptlrpc_request *req)
-{
- ptlrpc_server_hpreq_fini(req);
-
- if (req->rq_session.lc_thread) {
- lu_context_exit(&req->rq_session);
- lu_context_fini(&req->rq_session);
- }
-
- ptlrpc_server_drop_request(req);
-}
-
-/**
- * to finish a active request: stop sending more early replies, and release
- * the request. should be called after we finished handling the request.
- */
-static void ptlrpc_server_finish_active_request(
- struct ptlrpc_service_part *svcpt,
- struct ptlrpc_request *req)
-{
- spin_lock(&svcpt->scp_req_lock);
- ptlrpc_nrs_req_stop_nolock(req);
- svcpt->scp_nreqs_active--;
- if (req->rq_hp)
- svcpt->scp_nhreqs_active--;
- spin_unlock(&svcpt->scp_req_lock);
-
- ptlrpc_nrs_req_finalize(req);
-
- if (req->rq_export)
- class_export_rpc_dec(req->rq_export);
-
- ptlrpc_server_finish_request(svcpt, req);
-}
-
-/**
- * Sanity check request \a req.
- * Return 0 if all is ok, error code otherwise.
- */
-static int ptlrpc_check_req(struct ptlrpc_request *req)
-{
- struct obd_device *obd = req->rq_export->exp_obd;
- int rc = 0;
-
- if (unlikely(lustre_msg_get_conn_cnt(req->rq_reqmsg) <
- req->rq_export->exp_conn_cnt)) {
- DEBUG_REQ(D_RPCTRACE, req,
- "DROPPING req from old connection %d < %d",
- lustre_msg_get_conn_cnt(req->rq_reqmsg),
- req->rq_export->exp_conn_cnt);
- return -EEXIST;
- }
- if (unlikely(!obd || obd->obd_fail)) {
- /*
- * Failing over, don't handle any more reqs, send
- * error response instead.
- */
- CDEBUG(D_RPCTRACE, "Dropping req %p for failed obd %s\n",
- req, obd ? obd->obd_name : "unknown");
- rc = -ENODEV;
- } else if (lustre_msg_get_flags(req->rq_reqmsg) &
- (MSG_REPLAY | MSG_REQ_REPLAY_DONE)) {
- DEBUG_REQ(D_ERROR, req, "Invalid replay without recovery");
- class_fail_export(req->rq_export);
- rc = -ENODEV;
- } else if (lustre_msg_get_transno(req->rq_reqmsg) != 0) {
- DEBUG_REQ(D_ERROR, req,
- "Invalid req with transno %llu without recovery",
- lustre_msg_get_transno(req->rq_reqmsg));
- class_fail_export(req->rq_export);
- rc = -ENODEV;
- }
-
- if (unlikely(rc < 0)) {
- req->rq_status = rc;
- ptlrpc_error(req);
- }
- return rc;
-}
-
-static void ptlrpc_at_set_timer(struct ptlrpc_service_part *svcpt)
-{
- struct ptlrpc_at_array *array = &svcpt->scp_at_array;
- __s32 next;
-
- if (array->paa_count == 0) {
- del_timer(&svcpt->scp_at_timer);
- return;
- }
-
- /* Set timer for closest deadline */
- next = (__s32)(array->paa_deadline - ktime_get_real_seconds() -
- at_early_margin);
- if (next <= 0) {
- ptlrpc_at_timer(&svcpt->scp_at_timer);
- } else {
- mod_timer(&svcpt->scp_at_timer, jiffies + next * HZ);
- CDEBUG(D_INFO, "armed %s at %+ds\n",
- svcpt->scp_service->srv_name, next);
- }
-}
-
-/* Add rpc to early reply check list */
-static int ptlrpc_at_add_timed(struct ptlrpc_request *req)
-{
- struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
- struct ptlrpc_at_array *array = &svcpt->scp_at_array;
- struct ptlrpc_request *rq = NULL;
- __u32 index;
-
- if (AT_OFF)
- return 0;
-
- if (req->rq_no_reply)
- return 0;
-
- if ((lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT) == 0)
- return -ENOSYS;
-
- spin_lock(&svcpt->scp_at_lock);
- LASSERT(list_empty(&req->rq_timed_list));
-
- div_u64_rem(req->rq_deadline, array->paa_size, &index);
- if (array->paa_reqs_count[index] > 0) {
- /* latest rpcs will have the latest deadlines in the list,
- * so search backward.
- */
- list_for_each_entry_reverse(rq, &array->paa_reqs_array[index],
- rq_timed_list) {
- if (req->rq_deadline >= rq->rq_deadline) {
- list_add(&req->rq_timed_list,
- &rq->rq_timed_list);
- break;
- }
- }
- }
-
- /* Add the request at the head of the list */
- if (list_empty(&req->rq_timed_list))
- list_add(&req->rq_timed_list, &array->paa_reqs_array[index]);
-
- spin_lock(&req->rq_lock);
- req->rq_at_linked = 1;
- spin_unlock(&req->rq_lock);
- req->rq_at_index = index;
- array->paa_reqs_count[index]++;
- array->paa_count++;
- if (array->paa_count == 1 || array->paa_deadline > req->rq_deadline) {
- array->paa_deadline = req->rq_deadline;
- ptlrpc_at_set_timer(svcpt);
- }
- spin_unlock(&svcpt->scp_at_lock);
-
- return 0;
-}
-
-static void
-ptlrpc_at_remove_timed(struct ptlrpc_request *req)
-{
- struct ptlrpc_at_array *array;
-
- array = &req->rq_rqbd->rqbd_svcpt->scp_at_array;
-
- /* NB: must call with hold svcpt::scp_at_lock */
- LASSERT(!list_empty(&req->rq_timed_list));
- list_del_init(&req->rq_timed_list);
-
- spin_lock(&req->rq_lock);
- req->rq_at_linked = 0;
- spin_unlock(&req->rq_lock);
-
- array->paa_reqs_count[req->rq_at_index]--;
- array->paa_count--;
-}
-
-/*
- * Attempt to extend the request deadline by sending an early reply to the
- * client.
- */
-static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
-{
- struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
- struct ptlrpc_request *reqcopy;
- struct lustre_msg *reqmsg;
- long olddl = req->rq_deadline - ktime_get_real_seconds();
- time64_t newdl;
- int rc;
-
- /* deadline is when the client expects us to reply, margin is the
- * difference between clients' and servers' expectations
- */
- DEBUG_REQ(D_ADAPTTO, req,
- "%ssending early reply (deadline %+lds, margin %+lds) for %d+%d",
- AT_OFF ? "AT off - not " : "",
- olddl, olddl - at_get(&svcpt->scp_at_estimate),
- at_get(&svcpt->scp_at_estimate), at_extra);
-
- if (AT_OFF)
- return 0;
-
- if (olddl < 0) {
- DEBUG_REQ(D_WARNING, req, "Already past deadline (%+lds), not sending early reply. Consider increasing at_early_margin (%d)?",
- olddl, at_early_margin);
-
- /* Return an error so we're not re-added to the timed list. */
- return -ETIMEDOUT;
- }
-
- if (!(lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) {
- DEBUG_REQ(D_INFO, req, "Wanted to ask client for more time, but no AT support");
- return -ENOSYS;
- }
-
- /*
- * We want to extend the request deadline by at_extra seconds,
- * so we set our service estimate to reflect how much time has
- * passed since this request arrived plus an additional
- * at_extra seconds. The client will calculate the new deadline
- * based on this service estimate (plus some additional time to
- * account for network latency). See ptlrpc_at_recv_early_reply
- */
- at_measured(&svcpt->scp_at_estimate, at_extra +
- ktime_get_real_seconds() - req->rq_arrival_time.tv_sec);
- newdl = req->rq_arrival_time.tv_sec + at_get(&svcpt->scp_at_estimate);
-
- /* Check to see if we've actually increased the deadline -
- * we may be past adaptive_max
- */
- if (req->rq_deadline >= newdl) {
- DEBUG_REQ(D_WARNING, req, "Couldn't add any time (%ld/%lld), not sending early reply\n",
- olddl, newdl - ktime_get_real_seconds());
- return -ETIMEDOUT;
- }
-
- reqcopy = ptlrpc_request_cache_alloc(GFP_NOFS);
- if (!reqcopy)
- return -ENOMEM;
- reqmsg = kvzalloc(req->rq_reqlen, GFP_NOFS);
- if (!reqmsg) {
- rc = -ENOMEM;
- goto out_free;
- }
-
- *reqcopy = *req;
- reqcopy->rq_reply_state = NULL;
- reqcopy->rq_rep_swab_mask = 0;
- reqcopy->rq_pack_bulk = 0;
- reqcopy->rq_pack_udesc = 0;
- reqcopy->rq_packed_final = 0;
- sptlrpc_svc_ctx_addref(reqcopy);
- /* We only need the reqmsg for the magic */
- reqcopy->rq_reqmsg = reqmsg;
- memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen);
-
- LASSERT(atomic_read(&req->rq_refcount));
- /** if it is last refcount then early reply isn't needed */
- if (atomic_read(&req->rq_refcount) == 1) {
- DEBUG_REQ(D_ADAPTTO, reqcopy, "Normal reply already sent out, abort sending early reply\n");
- rc = -EINVAL;
- goto out;
- }
-
- /* Connection ref */
- reqcopy->rq_export = class_conn2export(
- lustre_msg_get_handle(reqcopy->rq_reqmsg));
- if (!reqcopy->rq_export) {
- rc = -ENODEV;
- goto out;
- }
-
- /* RPC ref */
- class_export_rpc_inc(reqcopy->rq_export);
- if (reqcopy->rq_export->exp_obd &&
- reqcopy->rq_export->exp_obd->obd_fail) {
- rc = -ENODEV;
- goto out_put;
- }
-
- rc = lustre_pack_reply_flags(reqcopy, 1, NULL, NULL, LPRFL_EARLY_REPLY);
- if (rc)
- goto out_put;
-
- rc = ptlrpc_send_reply(reqcopy, PTLRPC_REPLY_EARLY);
-
- if (!rc) {
- /* Adjust our own deadline to what we told the client */
- req->rq_deadline = newdl;
- req->rq_early_count++; /* number sent, server side */
- } else {
- DEBUG_REQ(D_ERROR, req, "Early reply send failed %d", rc);
- }
-
- /* Free the (early) reply state from lustre_pack_reply.
- * (ptlrpc_send_reply takes it's own rs ref, so this is safe here)
- */
- ptlrpc_req_drop_rs(reqcopy);
-
-out_put:
- class_export_rpc_dec(reqcopy->rq_export);
- class_export_put(reqcopy->rq_export);
-out:
- sptlrpc_svc_ctx_decref(reqcopy);
- kvfree(reqmsg);
-out_free:
- ptlrpc_request_cache_free(reqcopy);
- return rc;
-}
-
-/* Send early replies to everybody expiring within at_early_margin
- * asking for at_extra time
- */
-static void ptlrpc_at_check_timed(struct ptlrpc_service_part *svcpt)
-{
- struct ptlrpc_at_array *array = &svcpt->scp_at_array;
- struct ptlrpc_request *rq, *n;
- struct list_head work_list;
- __u32 index, count;
- time64_t deadline;
- time64_t now = ktime_get_real_seconds();
- long delay;
- int first, counter = 0;
-
- spin_lock(&svcpt->scp_at_lock);
- if (svcpt->scp_at_check == 0) {
- spin_unlock(&svcpt->scp_at_lock);
- return;
- }
- delay = jiffies - svcpt->scp_at_checktime;
- svcpt->scp_at_check = 0;
-
- if (array->paa_count == 0) {
- spin_unlock(&svcpt->scp_at_lock);
- return;
- }
-
- /* The timer went off, but maybe the nearest rpc already completed. */
- first = array->paa_deadline - now;
- if (first > at_early_margin) {
- /* We've still got plenty of time. Reset the timer. */
- ptlrpc_at_set_timer(svcpt);
- spin_unlock(&svcpt->scp_at_lock);
- return;
- }
-
- /* We're close to a timeout, and we don't know how much longer the
- * server will take. Send early replies to everyone expiring soon.
- */
- INIT_LIST_HEAD(&work_list);
- deadline = -1;
- div_u64_rem(array->paa_deadline, array->paa_size, &index);
- count = array->paa_count;
- while (count > 0) {
- count -= array->paa_reqs_count[index];
- list_for_each_entry_safe(rq, n, &array->paa_reqs_array[index],
- rq_timed_list) {
- if (rq->rq_deadline > now + at_early_margin) {
- /* update the earliest deadline */
- if (deadline == -1 ||
- rq->rq_deadline < deadline)
- deadline = rq->rq_deadline;
- break;
- }
-
- ptlrpc_at_remove_timed(rq);
- /**
- * ptlrpc_server_drop_request() may drop
- * refcount to 0 already. Let's check this and
- * don't add entry to work_list
- */
- if (likely(atomic_inc_not_zero(&rq->rq_refcount)))
- list_add(&rq->rq_timed_list, &work_list);
- counter++;
- }
-
- if (++index >= array->paa_size)
- index = 0;
- }
- array->paa_deadline = deadline;
- /* we have a new earliest deadline, restart the timer */
- ptlrpc_at_set_timer(svcpt);
-
- spin_unlock(&svcpt->scp_at_lock);
-
- CDEBUG(D_ADAPTTO, "timeout in %+ds, asking for %d secs on %d early replies\n",
- first, at_extra, counter);
- if (first < 0) {
- /* We're already past request deadlines before we even get a
- * chance to send early replies
- */
- LCONSOLE_WARN("%s: This server is not able to keep up with request traffic (cpu-bound).\n",
- svcpt->scp_service->srv_name);
- CWARN("earlyQ=%d reqQ=%d recA=%d, svcEst=%d, delay=%ld(jiff)\n",
- counter, svcpt->scp_nreqs_incoming,
- svcpt->scp_nreqs_active,
- at_get(&svcpt->scp_at_estimate), delay);
- }
-
- /* we took additional refcount so entries can't be deleted from list, no
- * locking is needed
- */
- while (!list_empty(&work_list)) {
- rq = list_entry(work_list.next, struct ptlrpc_request,
- rq_timed_list);
- list_del_init(&rq->rq_timed_list);
-
- if (ptlrpc_at_send_early_reply(rq) == 0)
- ptlrpc_at_add_timed(rq);
-
- ptlrpc_server_drop_request(rq);
- }
-}
-
-/**
- * Put the request to the export list if the request may become
- * a high priority one.
- */
-static int ptlrpc_server_hpreq_init(struct ptlrpc_service_part *svcpt,
- struct ptlrpc_request *req)
-{
- int rc = 0;
-
- if (svcpt->scp_service->srv_ops.so_hpreq_handler) {
- rc = svcpt->scp_service->srv_ops.so_hpreq_handler(req);
- if (rc < 0)
- return rc;
- LASSERT(rc == 0);
- }
- if (req->rq_export && req->rq_ops) {
- /* Perform request specific check. We should do this check
- * before the request is added into exp_hp_rpcs list otherwise
- * it may hit swab race at LU-1044.
- */
- if (req->rq_ops->hpreq_check) {
- rc = req->rq_ops->hpreq_check(req);
- if (rc == -ESTALE) {
- req->rq_status = rc;
- ptlrpc_error(req);
- }
- /** can only return error,
- * 0 for normal request,
- * or 1 for high priority request
- */
- LASSERT(rc <= 1);
- }
-
- spin_lock_bh(&req->rq_export->exp_rpc_lock);
- list_add(&req->rq_exp_list, &req->rq_export->exp_hp_rpcs);
- spin_unlock_bh(&req->rq_export->exp_rpc_lock);
- }
-
- ptlrpc_nrs_req_initialize(svcpt, req, rc);
-
- return rc;
-}
-
-/** Remove the request from the export list. */
-static void ptlrpc_server_hpreq_fini(struct ptlrpc_request *req)
-{
- if (req->rq_export && req->rq_ops) {
- /* refresh lock timeout again so that client has more
- * room to send lock cancel RPC.
- */
- if (req->rq_ops->hpreq_fini)
- req->rq_ops->hpreq_fini(req);
-
- spin_lock_bh(&req->rq_export->exp_rpc_lock);
- list_del_init(&req->rq_exp_list);
- spin_unlock_bh(&req->rq_export->exp_rpc_lock);
- }
-}
-
-static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt,
- struct ptlrpc_request *req)
-{
- int rc;
-
- rc = ptlrpc_server_hpreq_init(svcpt, req);
- if (rc < 0)
- return rc;
-
- ptlrpc_nrs_req_add(svcpt, req, !!rc);
-
- return 0;
-}
-
-/**
- * Allow to handle high priority request
- * User can call it w/o any lock but need to hold
- * ptlrpc_service_part::scp_req_lock to get reliable result
- */
-static bool ptlrpc_server_allow_high(struct ptlrpc_service_part *svcpt,
- bool force)
-{
- int running = svcpt->scp_nthrs_running;
-
- if (!nrs_svcpt_has_hp(svcpt))
- return false;
-
- if (force)
- return true;
-
- if (unlikely(svcpt->scp_service->srv_req_portal == MDS_REQUEST_PORTAL &&
- CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CANCEL_RESEND))) {
- /* leave just 1 thread for normal RPCs */
- running = PTLRPC_NTHRS_INIT;
- if (svcpt->scp_service->srv_ops.so_hpreq_handler)
- running += 1;
- }
-
- if (svcpt->scp_nreqs_active >= running - 1)
- return false;
-
- if (svcpt->scp_nhreqs_active == 0)
- return true;
-
- return !ptlrpc_nrs_req_pending_nolock(svcpt, false) ||
- svcpt->scp_hreq_count < svcpt->scp_service->srv_hpreq_ratio;
-}
-
-static bool ptlrpc_server_high_pending(struct ptlrpc_service_part *svcpt,
- bool force)
-{
- return ptlrpc_server_allow_high(svcpt, force) &&
- ptlrpc_nrs_req_pending_nolock(svcpt, true);
-}
-
-/**
- * Only allow normal priority requests on a service that has a high-priority
- * queue if forced (i.e. cleanup), if there are other high priority requests
- * already being processed (i.e. those threads can service more high-priority
- * requests), or if there are enough idle threads that a later thread can do
- * a high priority request.
- * User can call it w/o any lock but need to hold
- * ptlrpc_service_part::scp_req_lock to get reliable result
- */
-static bool ptlrpc_server_allow_normal(struct ptlrpc_service_part *svcpt,
- bool force)
-{
- int running = svcpt->scp_nthrs_running;
-
- if (unlikely(svcpt->scp_service->srv_req_portal == MDS_REQUEST_PORTAL &&
- CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CANCEL_RESEND))) {
- /* leave just 1 thread for normal RPCs */
- running = PTLRPC_NTHRS_INIT;
- if (svcpt->scp_service->srv_ops.so_hpreq_handler)
- running += 1;
- }
-
- if (force ||
- svcpt->scp_nreqs_active < running - 2)
- return true;
-
- if (svcpt->scp_nreqs_active >= running - 1)
- return false;
-
- return svcpt->scp_nhreqs_active > 0 || !nrs_svcpt_has_hp(svcpt);
-}
-
-static bool ptlrpc_server_normal_pending(struct ptlrpc_service_part *svcpt,
- bool force)
-{
- return ptlrpc_server_allow_normal(svcpt, force) &&
- ptlrpc_nrs_req_pending_nolock(svcpt, false);
-}
-
-/**
- * Returns true if there are requests available in incoming
- * request queue for processing and it is allowed to fetch them.
- * User can call it w/o any lock but need to hold ptlrpc_service::scp_req_lock
- * to get reliable result
- * \see ptlrpc_server_allow_normal
- * \see ptlrpc_server_allow high
- */
-static inline bool
-ptlrpc_server_request_pending(struct ptlrpc_service_part *svcpt, bool force)
-{
- return ptlrpc_server_high_pending(svcpt, force) ||
- ptlrpc_server_normal_pending(svcpt, force);
-}
-
-/**
- * Fetch a request for processing from queue of unprocessed requests.
- * Favors high-priority requests.
- * Returns a pointer to fetched request.
- */
-static struct ptlrpc_request *
-ptlrpc_server_request_get(struct ptlrpc_service_part *svcpt, bool force)
-{
- struct ptlrpc_request *req = NULL;
-
- spin_lock(&svcpt->scp_req_lock);
-
- if (ptlrpc_server_high_pending(svcpt, force)) {
- req = ptlrpc_nrs_req_get_nolock(svcpt, true, force);
- if (req) {
- svcpt->scp_hreq_count++;
- goto got_request;
- }
- }
-
- if (ptlrpc_server_normal_pending(svcpt, force)) {
- req = ptlrpc_nrs_req_get_nolock(svcpt, false, force);
- if (req) {
- svcpt->scp_hreq_count = 0;
- goto got_request;
- }
- }
-
- spin_unlock(&svcpt->scp_req_lock);
- return NULL;
-
-got_request:
- svcpt->scp_nreqs_active++;
- if (req->rq_hp)
- svcpt->scp_nhreqs_active++;
-
- spin_unlock(&svcpt->scp_req_lock);
-
- if (likely(req->rq_export))
- class_export_rpc_inc(req->rq_export);
-
- return req;
-}
-
-/**
- * Handle freshly incoming reqs, add to timed early reply list,
- * pass on to regular request queue.
- * All incoming requests pass through here before getting into
- * ptlrpc_server_handle_req later on.
- */
-static int
-ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt,
- struct ptlrpc_thread *thread)
-{
- struct ptlrpc_service *svc = svcpt->scp_service;
- struct ptlrpc_request *req;
- __u32 deadline;
- int rc;
-
- spin_lock(&svcpt->scp_lock);
- if (list_empty(&svcpt->scp_req_incoming)) {
- spin_unlock(&svcpt->scp_lock);
- return 0;
- }
-
- req = list_entry(svcpt->scp_req_incoming.next,
- struct ptlrpc_request, rq_list);
- list_del_init(&req->rq_list);
- svcpt->scp_nreqs_incoming--;
- /* Consider this still a "queued" request as far as stats are
- * concerned
- */
- spin_unlock(&svcpt->scp_lock);
-
- /* go through security check/transform */
- rc = sptlrpc_svc_unwrap_request(req);
- switch (rc) {
- case SECSVC_OK:
- break;
- case SECSVC_COMPLETE:
- target_send_reply(req, 0, OBD_FAIL_MDS_ALL_REPLY_NET);
- goto err_req;
- case SECSVC_DROP:
- goto err_req;
- default:
- LBUG();
- }
-
- /*
- * for null-flavored rpc, msg has been unpacked by sptlrpc, although
- * redo it wouldn't be harmful.
- */
- if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL) {
- rc = ptlrpc_unpack_req_msg(req, req->rq_reqlen);
- if (rc != 0) {
- CERROR("error unpacking request: ptl %d from %s x%llu\n",
- svc->srv_req_portal, libcfs_id2str(req->rq_peer),
- req->rq_xid);
- goto err_req;
- }
- }
-
- rc = lustre_unpack_req_ptlrpc_body(req, MSG_PTLRPC_BODY_OFF);
- if (rc) {
- CERROR("error unpacking ptlrpc body: ptl %d from %s x%llu\n",
- svc->srv_req_portal, libcfs_id2str(req->rq_peer),
- req->rq_xid);
- goto err_req;
- }
-
- if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_REQ_OPC) &&
- lustre_msg_get_opc(req->rq_reqmsg) == cfs_fail_val) {
- CERROR("drop incoming rpc opc %u, x%llu\n",
- cfs_fail_val, req->rq_xid);
- goto err_req;
- }
-
- rc = -EINVAL;
- if (lustre_msg_get_type(req->rq_reqmsg) != PTL_RPC_MSG_REQUEST) {
- CERROR("wrong packet type received (type=%u) from %s\n",
- lustre_msg_get_type(req->rq_reqmsg),
- libcfs_id2str(req->rq_peer));
- goto err_req;
- }
-
- switch (lustre_msg_get_opc(req->rq_reqmsg)) {
- case MDS_WRITEPAGE:
- case OST_WRITE:
- req->rq_bulk_write = 1;
- break;
- case MDS_READPAGE:
- case OST_READ:
- case MGS_CONFIG_READ:
- req->rq_bulk_read = 1;
- break;
- }
-
- CDEBUG(D_RPCTRACE, "got req x%llu\n", req->rq_xid);
-
- req->rq_export = class_conn2export(
- lustre_msg_get_handle(req->rq_reqmsg));
- if (req->rq_export) {
- rc = ptlrpc_check_req(req);
- if (rc == 0) {
- rc = sptlrpc_target_export_check(req->rq_export, req);
- if (rc)
- DEBUG_REQ(D_ERROR, req, "DROPPING req with illegal security flavor,");
- }
-
- if (rc)
- goto err_req;
- }
-
- /* req_in handling should/must be fast */
- if (ktime_get_real_seconds() - req->rq_arrival_time.tv_sec > 5)
- DEBUG_REQ(D_WARNING, req, "Slow req_in handling %llds",
- (s64)(ktime_get_real_seconds() -
- req->rq_arrival_time.tv_sec));
-
- /* Set rpc server deadline and add it to the timed list */
- deadline = (lustre_msghdr_get_flags(req->rq_reqmsg) &
- MSGHDR_AT_SUPPORT) ?
- /* The max time the client expects us to take */
- lustre_msg_get_timeout(req->rq_reqmsg) : obd_timeout;
- req->rq_deadline = req->rq_arrival_time.tv_sec + deadline;
- if (unlikely(deadline == 0)) {
- DEBUG_REQ(D_ERROR, req, "Dropping request with 0 timeout");
- goto err_req;
- }
-
- req->rq_svc_thread = thread;
- if (thread) {
- /* initialize request session, it is needed for request
- * processing by target
- */
- rc = lu_context_init(&req->rq_session,
- LCT_SERVER_SESSION | LCT_NOREF);
- if (rc) {
- CERROR("%s: failure to initialize session: rc = %d\n",
- thread->t_name, rc);
- goto err_req;
- }
- req->rq_session.lc_thread = thread;
- lu_context_enter(&req->rq_session);
- req->rq_svc_thread->t_env->le_ses = &req->rq_session;
- }
-
- ptlrpc_at_add_timed(req);
-
- /* Move it over to the request processing queue */
- rc = ptlrpc_server_request_add(svcpt, req);
- if (rc)
- goto err_req;
-
- wake_up(&svcpt->scp_waitq);
- return 1;
-
-err_req:
- ptlrpc_server_finish_request(svcpt, req);
-
- return 1;
-}
-
-/**
- * Main incoming request handling logic.
- * Calls handler function from service to do actual processing.
- */
-static int
-ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
- struct ptlrpc_thread *thread)
-{
- struct ptlrpc_service *svc = svcpt->scp_service;
- struct ptlrpc_request *request;
- struct timespec64 work_start;
- struct timespec64 work_end;
- struct timespec64 timediff;
- struct timespec64 arrived;
- unsigned long timediff_usecs;
- unsigned long arrived_usecs;
- int fail_opc = 0;
-
- request = ptlrpc_server_request_get(svcpt, false);
- if (!request)
- return 0;
-
- if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT))
- fail_opc = OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT;
- else if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_TIMEOUT))
- fail_opc = OBD_FAIL_PTLRPC_HPREQ_TIMEOUT;
-
- if (unlikely(fail_opc)) {
- if (request->rq_export && request->rq_ops)
- OBD_FAIL_TIMEOUT(fail_opc, 4);
- }
-
- ptlrpc_rqphase_move(request, RQ_PHASE_INTERPRET);
-
- if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DUMP_LOG))
- libcfs_debug_dumplog();
-
- ktime_get_real_ts64(&work_start);
- timediff = timespec64_sub(work_start, request->rq_arrival_time);
- timediff_usecs = timediff.tv_sec * USEC_PER_SEC +
- timediff.tv_nsec / NSEC_PER_USEC;
- if (likely(svc->srv_stats)) {
- lprocfs_counter_add(svc->srv_stats, PTLRPC_REQWAIT_CNTR,
- timediff_usecs);
- lprocfs_counter_add(svc->srv_stats, PTLRPC_REQQDEPTH_CNTR,
- svcpt->scp_nreqs_incoming);
- lprocfs_counter_add(svc->srv_stats, PTLRPC_REQACTIVE_CNTR,
- svcpt->scp_nreqs_active);
- lprocfs_counter_add(svc->srv_stats, PTLRPC_TIMEOUT,
- at_get(&svcpt->scp_at_estimate));
- }
-
- if (likely(request->rq_export)) {
- if (unlikely(ptlrpc_check_req(request)))
- goto put_conn;
- }
-
- /* Discard requests queued for longer than the deadline.
- * The deadline is increased if we send an early reply.
- */
- if (ktime_get_real_seconds() > request->rq_deadline) {
- DEBUG_REQ(D_ERROR, request, "Dropping timed-out request from %s: deadline %lld:%llds ago\n",
- libcfs_id2str(request->rq_peer),
- request->rq_deadline -
- request->rq_arrival_time.tv_sec,
- ktime_get_real_seconds() - request->rq_deadline);
- goto put_conn;
- }
-
- CDEBUG(D_RPCTRACE, "Handling RPC pname:cluuid+ref:pid:xid:nid:opc %s:%s+%d:%d:x%llu:%s:%d\n",
- current->comm,
- (request->rq_export ?
- (char *)request->rq_export->exp_client_uuid.uuid : "0"),
- (request->rq_export ?
- atomic_read(&request->rq_export->exp_refcount) : -99),
- lustre_msg_get_status(request->rq_reqmsg), request->rq_xid,
- libcfs_id2str(request->rq_peer),
- lustre_msg_get_opc(request->rq_reqmsg));
-
- if (lustre_msg_get_opc(request->rq_reqmsg) != OBD_PING)
- CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_PAUSE_REQ, cfs_fail_val);
-
- CDEBUG(D_NET, "got req %llu\n", request->rq_xid);
-
- /* re-assign request and sesson thread to the current one */
- request->rq_svc_thread = thread;
- if (thread) {
- LASSERT(request->rq_session.lc_thread);
- request->rq_session.lc_thread = thread;
- request->rq_session.lc_cookie = 0x55;
- thread->t_env->le_ses = &request->rq_session;
- }
- svc->srv_ops.so_req_handler(request);
-
- ptlrpc_rqphase_move(request, RQ_PHASE_COMPLETE);
-
-put_conn:
- if (unlikely(ktime_get_real_seconds() > request->rq_deadline)) {
- DEBUG_REQ(D_WARNING, request,
- "Request took longer than estimated (%lld:%llds); "
- "client may timeout.",
- (s64)request->rq_deadline -
- request->rq_arrival_time.tv_sec,
- (s64)ktime_get_real_seconds() - request->rq_deadline);
- }
-
- ktime_get_real_ts64(&work_end);
- timediff = timespec64_sub(work_end, work_start);
- timediff_usecs = timediff.tv_sec * USEC_PER_SEC +
- timediff.tv_nsec / NSEC_PER_USEC;
- arrived = timespec64_sub(work_end, request->rq_arrival_time);
- arrived_usecs = arrived.tv_sec * USEC_PER_SEC +
- arrived.tv_nsec / NSEC_PER_USEC;
- CDEBUG(D_RPCTRACE, "Handled RPC pname:cluuid+ref:pid:xid:nid:opc %s:%s+%d:%d:x%llu:%s:%d Request processed in %ldus (%ldus total) trans %llu rc %d/%d\n",
- current->comm,
- (request->rq_export ?
- (char *)request->rq_export->exp_client_uuid.uuid : "0"),
- (request->rq_export ?
- atomic_read(&request->rq_export->exp_refcount) : -99),
- lustre_msg_get_status(request->rq_reqmsg),
- request->rq_xid,
- libcfs_id2str(request->rq_peer),
- lustre_msg_get_opc(request->rq_reqmsg),
- timediff_usecs,
- arrived_usecs,
- (request->rq_repmsg ?
- lustre_msg_get_transno(request->rq_repmsg) :
- request->rq_transno),
- request->rq_status,
- (request->rq_repmsg ?
- lustre_msg_get_status(request->rq_repmsg) : -999));
- if (likely(svc->srv_stats && request->rq_reqmsg)) {
- __u32 op = lustre_msg_get_opc(request->rq_reqmsg);
- int opc = opcode_offset(op);
-
- if (opc > 0 && !(op == LDLM_ENQUEUE || op == MDS_REINT)) {
- LASSERT(opc < LUSTRE_MAX_OPCODES);
- lprocfs_counter_add(svc->srv_stats,
- opc + EXTRA_MAX_OPCODES,
- timediff_usecs);
- }
- }
- if (unlikely(request->rq_early_count)) {
- DEBUG_REQ(D_ADAPTTO, request,
- "sent %d early replies before finishing in %llds",
- request->rq_early_count,
- (s64)work_end.tv_sec -
- request->rq_arrival_time.tv_sec);
- }
-
- ptlrpc_server_finish_active_request(svcpt, request);
-
- return 1;
-}
-
-/**
- * An internal function to process a single reply state object.
- */
-static int
-ptlrpc_handle_rs(struct ptlrpc_reply_state *rs)
-{
- struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
- struct ptlrpc_service *svc = svcpt->scp_service;
- struct obd_export *exp;
- int nlocks;
- int been_handled;
-
- exp = rs->rs_export;
-
- LASSERT(rs->rs_difficult);
- LASSERT(rs->rs_scheduled);
- LASSERT(list_empty(&rs->rs_list));
-
- spin_lock(&exp->exp_lock);
- /* Noop if removed already */
- list_del_init(&rs->rs_exp_list);
- spin_unlock(&exp->exp_lock);
-
- /* The disk commit callback holds exp_uncommitted_replies_lock while it
- * iterates over newly committed replies, removing them from
- * exp_uncommitted_replies. It then drops this lock and schedules the
- * replies it found for handling here.
- *
- * We can avoid contention for exp_uncommitted_replies_lock between the
- * HRT threads and further commit callbacks by checking rs_committed
- * which is set in the commit callback while it holds both
- * rs_lock and exp_uncommitted_reples.
- *
- * If we see rs_committed clear, the commit callback _may_ not have
- * handled this reply yet and we race with it to grab
- * exp_uncommitted_replies_lock before removing the reply from
- * exp_uncommitted_replies. Note that if we lose the race and the
- * reply has already been removed, list_del_init() is a noop.
- *
- * If we see rs_committed set, we know the commit callback is handling,
- * or has handled this reply since store reordering might allow us to
- * see rs_committed set out of sequence. But since this is done
- * holding rs_lock, we can be sure it has all completed once we hold
- * rs_lock, which we do right next.
- */
- if (!rs->rs_committed) {
- spin_lock(&exp->exp_uncommitted_replies_lock);
- list_del_init(&rs->rs_obd_list);
- spin_unlock(&exp->exp_uncommitted_replies_lock);
- }
-
- spin_lock(&rs->rs_lock);
-
- been_handled = rs->rs_handled;
- rs->rs_handled = 1;
-
- nlocks = rs->rs_nlocks; /* atomic "steal", but */
- rs->rs_nlocks = 0; /* locks still on rs_locks! */
-
- if (nlocks == 0 && !been_handled) {
- /* If we see this, we should already have seen the warning
- * in mds_steal_ack_locks()
- */
- CDEBUG(D_HA, "All locks stolen from rs %p x%lld.t%lld o%d NID %s\n",
- rs,
- rs->rs_xid, rs->rs_transno, rs->rs_opc,
- libcfs_nid2str(exp->exp_connection->c_peer.nid));
- }
-
- if ((!been_handled && rs->rs_on_net) || nlocks > 0) {
- spin_unlock(&rs->rs_lock);
-
- if (!been_handled && rs->rs_on_net) {
- LNetMDUnlink(rs->rs_md_h);
- /* Ignore return code; we're racing with completion */
- }
-
- while (nlocks-- > 0)
- ldlm_lock_decref(&rs->rs_locks[nlocks],
- rs->rs_modes[nlocks]);
-
- spin_lock(&rs->rs_lock);
- }
-
- rs->rs_scheduled = 0;
-
- if (!rs->rs_on_net) {
- /* Off the net */
- spin_unlock(&rs->rs_lock);
-
- class_export_put(exp);
- rs->rs_export = NULL;
- ptlrpc_rs_decref(rs);
- if (atomic_dec_and_test(&svcpt->scp_nreps_difficult) &&
- svc->srv_is_stopping)
- wake_up_all(&svcpt->scp_waitq);
- return 1;
- }
-
- /* still on the net; callback will schedule */
- spin_unlock(&rs->rs_lock);
- return 1;
-}
-
-static void
-ptlrpc_check_rqbd_pool(struct ptlrpc_service_part *svcpt)
-{
- int avail = svcpt->scp_nrqbds_posted;
- int low_water = test_req_buffer_pressure ? 0 :
- svcpt->scp_service->srv_nbuf_per_group / 2;
-
- /* NB I'm not locking; just looking. */
-
- /* CAVEAT EMPTOR: We might be allocating buffers here because we've
- * allowed the request history to grow out of control. We could put a
- * sanity check on that here and cull some history if we need the
- * space.
- */
-
- if (avail <= low_water)
- ptlrpc_grow_req_bufs(svcpt, 1);
-
- if (svcpt->scp_service->srv_stats) {
- lprocfs_counter_add(svcpt->scp_service->srv_stats,
- PTLRPC_REQBUF_AVAIL_CNTR, avail);
- }
-}
-
-static inline int
-ptlrpc_threads_enough(struct ptlrpc_service_part *svcpt)
-{
- return svcpt->scp_nreqs_active <
- svcpt->scp_nthrs_running - 1 -
- (svcpt->scp_service->srv_ops.so_hpreq_handler != NULL);
-}
-
-/**
- * allowed to create more threads
- * user can call it w/o any lock but need to hold
- * ptlrpc_service_part::scp_lock to get reliable result
- */
-static inline int
-ptlrpc_threads_increasable(struct ptlrpc_service_part *svcpt)
-{
- return svcpt->scp_nthrs_running +
- svcpt->scp_nthrs_starting <
- svcpt->scp_service->srv_nthrs_cpt_limit;
-}
-
-/**
- * too many requests and allowed to create more threads
- */
-static inline int
-ptlrpc_threads_need_create(struct ptlrpc_service_part *svcpt)
-{
- return !ptlrpc_threads_enough(svcpt) &&
- ptlrpc_threads_increasable(svcpt);
-}
-
-static inline int
-ptlrpc_thread_stopping(struct ptlrpc_thread *thread)
-{
- return thread_is_stopping(thread) ||
- thread->t_svcpt->scp_service->srv_is_stopping;
-}
-
-static inline int
-ptlrpc_rqbd_pending(struct ptlrpc_service_part *svcpt)
-{
- return !list_empty(&svcpt->scp_rqbd_idle) &&
- svcpt->scp_rqbd_timeout == 0;
-}
-
-static inline int
-ptlrpc_at_check(struct ptlrpc_service_part *svcpt)
-{
- return svcpt->scp_at_check;
-}
-
-/**
- * requests wait on preprocessing
- * user can call it w/o any lock but need to hold
- * ptlrpc_service_part::scp_lock to get reliable result
- */
-static inline int
-ptlrpc_server_request_incoming(struct ptlrpc_service_part *svcpt)
-{
- return !list_empty(&svcpt->scp_req_incoming);
-}
-
-/* We perfer lifo queuing, but kernel doesn't provide that yet. */
-#ifndef wait_event_idle_exclusive_lifo
-#define wait_event_idle_exclusive_lifo wait_event_idle_exclusive
-#define wait_event_idle_exclusive_lifo_timeout wait_event_idle_exclusive_timeout
-#endif
-
-static __attribute__((__noinline__)) int
-ptlrpc_wait_event(struct ptlrpc_service_part *svcpt,
- struct ptlrpc_thread *thread)
-{
- /* Don't exit while there are replies to be handled */
-
- /* XXX: Add this back when libcfs watchdog is merged upstream
- lc_watchdog_disable(thread->t_watchdog);
- */
-
- cond_resched();
-
- if (svcpt->scp_rqbd_timeout == 0)
- wait_event_idle_exclusive_lifo(
- svcpt->scp_waitq,
- ptlrpc_thread_stopping(thread) ||
- ptlrpc_server_request_incoming(svcpt) ||
- ptlrpc_server_request_pending(svcpt,
- false) ||
- ptlrpc_rqbd_pending(svcpt) ||
- ptlrpc_at_check(svcpt));
- else if (0 == wait_event_idle_exclusive_lifo_timeout(
- svcpt->scp_waitq,
- ptlrpc_thread_stopping(thread) ||
- ptlrpc_server_request_incoming(svcpt) ||
- ptlrpc_server_request_pending(svcpt,
- false) ||
- ptlrpc_rqbd_pending(svcpt) ||
- ptlrpc_at_check(svcpt),
- svcpt->scp_rqbd_timeout))
- svcpt->scp_rqbd_timeout = 0;
-
- if (ptlrpc_thread_stopping(thread))
- return -EINTR;
-
- /*
- lc_watchdog_touch(thread->t_watchdog,
- ptlrpc_server_get_timeout(svcpt));
- */
- return 0;
-}
-
-/**
- * Main thread body for service threads.
- * Waits in a loop waiting for new requests to process to appear.
- * Every time an incoming requests is added to its queue, a waitq
- * is woken up and one of the threads will handle it.
- */
-static int ptlrpc_main(void *arg)
-{
- struct ptlrpc_thread *thread = arg;
- struct ptlrpc_service_part *svcpt = thread->t_svcpt;
- struct ptlrpc_service *svc = svcpt->scp_service;
- struct ptlrpc_reply_state *rs;
- struct group_info *ginfo = NULL;
- struct lu_env *env;
- int counter = 0, rc = 0;
-
- thread->t_pid = current->pid;
- unshare_fs_struct();
-
- /* NB: we will call cfs_cpt_bind() for all threads, because we
- * might want to run lustre server only on a subset of system CPUs,
- * in that case ->scp_cpt is CFS_CPT_ANY
- */
- rc = cfs_cpt_bind(svc->srv_cptable, svcpt->scp_cpt);
- if (rc != 0) {
- CWARN("%s: failed to bind %s on CPT %d\n",
- svc->srv_name, thread->t_name, svcpt->scp_cpt);
- }
-
- ginfo = groups_alloc(0);
- if (!ginfo) {
- rc = -ENOMEM;
- goto out;
- }
-
- set_current_groups(ginfo);
- put_group_info(ginfo);
-
- if (svc->srv_ops.so_thr_init) {
- rc = svc->srv_ops.so_thr_init(thread);
- if (rc)
- goto out;
- }
-
- env = kzalloc(sizeof(*env), GFP_KERNEL);
- if (!env) {
- rc = -ENOMEM;
- goto out_srv_fini;
- }
-
- rc = lu_context_init(&env->le_ctx,
- svc->srv_ctx_tags | LCT_REMEMBER | LCT_NOREF);
- if (rc)
- goto out_srv_fini;
-
- thread->t_env = env;
- env->le_ctx.lc_thread = thread;
- env->le_ctx.lc_cookie = 0x6;
-
- while (!list_empty(&svcpt->scp_rqbd_idle)) {
- rc = ptlrpc_server_post_idle_rqbds(svcpt);
- if (rc >= 0)
- continue;
-
- CERROR("Failed to post rqbd for %s on CPT %d: %d\n",
- svc->srv_name, svcpt->scp_cpt, rc);
- goto out_srv_fini;
- }
-
- /* Alloc reply state structure for this one */
- rs = kvzalloc(svc->srv_max_reply_size, GFP_KERNEL);
- if (!rs) {
- rc = -ENOMEM;
- goto out_srv_fini;
- }
-
- spin_lock(&svcpt->scp_lock);
-
- LASSERT(thread_is_starting(thread));
- thread_clear_flags(thread, SVC_STARTING);
-
- LASSERT(svcpt->scp_nthrs_starting == 1);
- svcpt->scp_nthrs_starting--;
-
- /* SVC_STOPPING may already be set here if someone else is trying
- * to stop the service while this new thread has been dynamically
- * forked. We still set SVC_RUNNING to let our creator know that
- * we are now running, however we will exit as soon as possible
- */
- thread_add_flags(thread, SVC_RUNNING);
- svcpt->scp_nthrs_running++;
- spin_unlock(&svcpt->scp_lock);
-
- /* wake up our creator in case he's still waiting. */
- wake_up(&thread->t_ctl_waitq);
-
- /*
- thread->t_watchdog = lc_watchdog_add(ptlrpc_server_get_timeout(svcpt),
- NULL, NULL);
- */
-
- spin_lock(&svcpt->scp_rep_lock);
- list_add(&rs->rs_list, &svcpt->scp_rep_idle);
- wake_up(&svcpt->scp_rep_waitq);
- spin_unlock(&svcpt->scp_rep_lock);
-
- CDEBUG(D_NET, "service thread %d (#%d) started\n", thread->t_id,
- svcpt->scp_nthrs_running);
-
- /* XXX maintain a list of all managed devices: insert here */
- while (!ptlrpc_thread_stopping(thread)) {
- if (ptlrpc_wait_event(svcpt, thread))
- break;
-
- ptlrpc_check_rqbd_pool(svcpt);
-
- if (ptlrpc_threads_need_create(svcpt)) {
- /* Ignore return code - we tried... */
- ptlrpc_start_thread(svcpt, 0);
- }
-
- /* Process all incoming reqs before handling any */
- if (ptlrpc_server_request_incoming(svcpt)) {
- lu_context_enter(&env->le_ctx);
- env->le_ses = NULL;
- ptlrpc_server_handle_req_in(svcpt, thread);
- lu_context_exit(&env->le_ctx);
-
- /* but limit ourselves in case of flood */
- if (counter++ < 100)
- continue;
- counter = 0;
- }
-
- if (ptlrpc_at_check(svcpt))
- ptlrpc_at_check_timed(svcpt);
-
- if (ptlrpc_server_request_pending(svcpt, false)) {
- lu_context_enter(&env->le_ctx);
- ptlrpc_server_handle_request(svcpt, thread);
- lu_context_exit(&env->le_ctx);
- }
-
- if (ptlrpc_rqbd_pending(svcpt) &&
- ptlrpc_server_post_idle_rqbds(svcpt) < 0) {
- /* I just failed to repost request buffers.
- * Wait for a timeout (unless something else
- * happens) before I try again
- */
- svcpt->scp_rqbd_timeout = HZ / 10;
- CDEBUG(D_RPCTRACE, "Posted buffers: %d\n",
- svcpt->scp_nrqbds_posted);
- }
- }
-
- /*
- lc_watchdog_delete(thread->t_watchdog);
- thread->t_watchdog = NULL;
- */
-
-out_srv_fini:
- /*
- * deconstruct service specific state created by ptlrpc_start_thread()
- */
- if (svc->srv_ops.so_thr_done)
- svc->srv_ops.so_thr_done(thread);
-
- if (env) {
- lu_context_fini(&env->le_ctx);
- kfree(env);
- }
-out:
- CDEBUG(D_RPCTRACE, "service thread [ %p : %u ] %d exiting: rc %d\n",
- thread, thread->t_pid, thread->t_id, rc);
-
- spin_lock(&svcpt->scp_lock);
- if (thread_test_and_clear_flags(thread, SVC_STARTING))
- svcpt->scp_nthrs_starting--;
-
- if (thread_test_and_clear_flags(thread, SVC_RUNNING)) {
- /* must know immediately */
- svcpt->scp_nthrs_running--;
- }
-
- thread->t_id = rc;
- thread_add_flags(thread, SVC_STOPPED);
-
- wake_up(&thread->t_ctl_waitq);
- spin_unlock(&svcpt->scp_lock);
-
- return rc;
-}
-
-static int hrt_dont_sleep(struct ptlrpc_hr_thread *hrt,
- struct list_head *replies)
-{
- int result;
-
- spin_lock(&hrt->hrt_lock);
-
- list_splice_init(&hrt->hrt_queue, replies);
- result = ptlrpc_hr.hr_stopping || !list_empty(replies);
-
- spin_unlock(&hrt->hrt_lock);
- return result;
-}
-
-/**
- * Main body of "handle reply" function.
- * It processes acked reply states
- */
-static int ptlrpc_hr_main(void *arg)
-{
- struct ptlrpc_hr_thread *hrt = arg;
- struct ptlrpc_hr_partition *hrp = hrt->hrt_partition;
- LIST_HEAD(replies);
- char threadname[20];
- int rc;
-
- snprintf(threadname, sizeof(threadname), "ptlrpc_hr%02d_%03d",
- hrp->hrp_cpt, hrt->hrt_id);
- unshare_fs_struct();
-
- rc = cfs_cpt_bind(ptlrpc_hr.hr_cpt_table, hrp->hrp_cpt);
- if (rc != 0) {
- CWARN("Failed to bind %s on CPT %d of CPT table %p: rc = %d\n",
- threadname, hrp->hrp_cpt, ptlrpc_hr.hr_cpt_table, rc);
- }
-
- atomic_inc(&hrp->hrp_nstarted);
- wake_up(&ptlrpc_hr.hr_waitq);
-
- while (!ptlrpc_hr.hr_stopping) {
- wait_event_idle(hrt->hrt_waitq, hrt_dont_sleep(hrt, &replies));
-
- while (!list_empty(&replies)) {
- struct ptlrpc_reply_state *rs;
-
- rs = list_entry(replies.prev, struct ptlrpc_reply_state,
- rs_list);
- list_del_init(&rs->rs_list);
- ptlrpc_handle_rs(rs);
- }
- }
-
- atomic_inc(&hrp->hrp_nstopped);
- wake_up(&ptlrpc_hr.hr_waitq);
-
- return 0;
-}
-
-static void ptlrpc_stop_hr_threads(void)
-{
- struct ptlrpc_hr_partition *hrp;
- int i;
- int j;
-
- ptlrpc_hr.hr_stopping = 1;
-
- cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
- if (!hrp->hrp_thrs)
- continue; /* uninitialized */
- for (j = 0; j < hrp->hrp_nthrs; j++)
- wake_up_all(&hrp->hrp_thrs[j].hrt_waitq);
- }
-
- cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
- if (!hrp->hrp_thrs)
- continue; /* uninitialized */
- wait_event(ptlrpc_hr.hr_waitq,
- atomic_read(&hrp->hrp_nstopped) ==
- atomic_read(&hrp->hrp_nstarted));
- }
-}
-
-static int ptlrpc_start_hr_threads(void)
-{
- struct ptlrpc_hr_partition *hrp;
- int i;
- int j;
-
- cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
- int rc = 0;
-
- for (j = 0; j < hrp->hrp_nthrs; j++) {
- struct ptlrpc_hr_thread *hrt = &hrp->hrp_thrs[j];
- struct task_struct *task;
-
- task = kthread_run(ptlrpc_hr_main,
- &hrp->hrp_thrs[j],
- "ptlrpc_hr%02d_%03d",
- hrp->hrp_cpt, hrt->hrt_id);
- if (IS_ERR(task)) {
- rc = PTR_ERR(task);
- break;
- }
- }
- wait_event(ptlrpc_hr.hr_waitq,
- atomic_read(&hrp->hrp_nstarted) == j);
-
- if (rc < 0) {
- CERROR("cannot start reply handler thread %d:%d: rc = %d\n",
- i, j, rc);
- ptlrpc_stop_hr_threads();
- return rc;
- }
- }
- return 0;
-}
-
-static void ptlrpc_svcpt_stop_threads(struct ptlrpc_service_part *svcpt)
-{
- struct ptlrpc_thread *thread;
- LIST_HEAD(zombie);
-
- CDEBUG(D_INFO, "Stopping threads for service %s\n",
- svcpt->scp_service->srv_name);
-
- spin_lock(&svcpt->scp_lock);
- /* let the thread know that we would like it to stop asap */
- list_for_each_entry(thread, &svcpt->scp_threads, t_link) {
- CDEBUG(D_INFO, "Stopping thread %s #%u\n",
- svcpt->scp_service->srv_thread_name, thread->t_id);
- thread_add_flags(thread, SVC_STOPPING);
- }
-
- wake_up_all(&svcpt->scp_waitq);
-
- while (!list_empty(&svcpt->scp_threads)) {
- thread = list_entry(svcpt->scp_threads.next,
- struct ptlrpc_thread, t_link);
- if (thread_is_stopped(thread)) {
- list_del(&thread->t_link);
- list_add(&thread->t_link, &zombie);
- continue;
- }
- spin_unlock(&svcpt->scp_lock);
-
- CDEBUG(D_INFO, "waiting for stopping-thread %s #%u\n",
- svcpt->scp_service->srv_thread_name, thread->t_id);
- wait_event_idle(thread->t_ctl_waitq,
- thread_is_stopped(thread));
-
- spin_lock(&svcpt->scp_lock);
- }
-
- spin_unlock(&svcpt->scp_lock);
-
- while (!list_empty(&zombie)) {
- thread = list_entry(zombie.next,
- struct ptlrpc_thread, t_link);
- list_del(&thread->t_link);
- kfree(thread);
- }
-}
-
-/**
- * Stops all threads of a particular service \a svc
- */
-static void ptlrpc_stop_all_threads(struct ptlrpc_service *svc)
-{
- struct ptlrpc_service_part *svcpt;
- int i;
-
- ptlrpc_service_for_each_part(svcpt, i, svc) {
- if (svcpt->scp_service)
- ptlrpc_svcpt_stop_threads(svcpt);
- }
-}
-
-int ptlrpc_start_threads(struct ptlrpc_service *svc)
-{
- int rc = 0;
- int i;
- int j;
-
- /* We require 2 threads min, see note in ptlrpc_server_handle_request */
- LASSERT(svc->srv_nthrs_cpt_init >= PTLRPC_NTHRS_INIT);
-
- for (i = 0; i < svc->srv_ncpts; i++) {
- for (j = 0; j < svc->srv_nthrs_cpt_init; j++) {
- rc = ptlrpc_start_thread(svc->srv_parts[i], 1);
- if (rc == 0)
- continue;
-
- if (rc != -EMFILE)
- goto failed;
- /* We have enough threads, don't start more. b=15759 */
- break;
- }
- }
-
- return 0;
- failed:
- CERROR("cannot start %s thread #%d_%d: rc %d\n",
- svc->srv_thread_name, i, j, rc);
- ptlrpc_stop_all_threads(svc);
- return rc;
-}
-
-int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait)
-{
- struct ptlrpc_thread *thread;
- struct ptlrpc_service *svc;
- struct task_struct *task;
- int rc;
-
- svc = svcpt->scp_service;
-
- CDEBUG(D_RPCTRACE, "%s[%d] started %d min %d max %d\n",
- svc->srv_name, svcpt->scp_cpt, svcpt->scp_nthrs_running,
- svc->srv_nthrs_cpt_init, svc->srv_nthrs_cpt_limit);
-
- again:
- if (unlikely(svc->srv_is_stopping))
- return -ESRCH;
-
- if (!ptlrpc_threads_increasable(svcpt) ||
- (OBD_FAIL_CHECK(OBD_FAIL_TGT_TOOMANY_THREADS) &&
- svcpt->scp_nthrs_running == svc->srv_nthrs_cpt_init - 1))
- return -EMFILE;
-
- thread = kzalloc_node(sizeof(*thread), GFP_NOFS,
- cfs_cpt_spread_node(svc->srv_cptable,
- svcpt->scp_cpt));
- if (!thread)
- return -ENOMEM;
- init_waitqueue_head(&thread->t_ctl_waitq);
-
- spin_lock(&svcpt->scp_lock);
- if (!ptlrpc_threads_increasable(svcpt)) {
- spin_unlock(&svcpt->scp_lock);
- kfree(thread);
- return -EMFILE;
- }
-
- if (svcpt->scp_nthrs_starting != 0) {
- /* serialize starting because some modules (obdfilter)
- * might require unique and contiguous t_id
- */
- LASSERT(svcpt->scp_nthrs_starting == 1);
- spin_unlock(&svcpt->scp_lock);
- kfree(thread);
- if (wait) {
- CDEBUG(D_INFO, "Waiting for creating thread %s #%d\n",
- svc->srv_thread_name, svcpt->scp_thr_nextid);
- schedule();
- goto again;
- }
-
- CDEBUG(D_INFO, "Creating thread %s #%d race, retry later\n",
- svc->srv_thread_name, svcpt->scp_thr_nextid);
- return -EAGAIN;
- }
-
- svcpt->scp_nthrs_starting++;
- thread->t_id = svcpt->scp_thr_nextid++;
- thread_add_flags(thread, SVC_STARTING);
- thread->t_svcpt = svcpt;
-
- list_add(&thread->t_link, &svcpt->scp_threads);
- spin_unlock(&svcpt->scp_lock);
-
- if (svcpt->scp_cpt >= 0) {
- snprintf(thread->t_name, sizeof(thread->t_name), "%s%02d_%03d",
- svc->srv_thread_name, svcpt->scp_cpt, thread->t_id);
- } else {
- snprintf(thread->t_name, sizeof(thread->t_name), "%s_%04d",
- svc->srv_thread_name, thread->t_id);
- }
-
- CDEBUG(D_RPCTRACE, "starting thread '%s'\n", thread->t_name);
- task = kthread_run(ptlrpc_main, thread, "%s", thread->t_name);
- if (IS_ERR(task)) {
- rc = PTR_ERR(task);
- CERROR("cannot start thread '%s': rc = %d\n",
- thread->t_name, rc);
- spin_lock(&svcpt->scp_lock);
- --svcpt->scp_nthrs_starting;
- if (thread_is_stopping(thread)) {
- /* this ptlrpc_thread is being handled
- * by ptlrpc_svcpt_stop_threads now
- */
- thread_add_flags(thread, SVC_STOPPED);
- wake_up(&thread->t_ctl_waitq);
- spin_unlock(&svcpt->scp_lock);
- } else {
- list_del(&thread->t_link);
- spin_unlock(&svcpt->scp_lock);
- kfree(thread);
- }
- return rc;
- }
-
- if (!wait)
- return 0;
-
- wait_event_idle(thread->t_ctl_waitq,
- thread_is_running(thread) || thread_is_stopped(thread));
-
- rc = thread_is_stopped(thread) ? thread->t_id : 0;
- return rc;
-}
-
-int ptlrpc_hr_init(void)
-{
- struct ptlrpc_hr_partition *hrp;
- struct ptlrpc_hr_thread *hrt;
- int rc;
- int i;
- int j;
- int weight;
-
- memset(&ptlrpc_hr, 0, sizeof(ptlrpc_hr));
- ptlrpc_hr.hr_cpt_table = cfs_cpt_tab;
-
- ptlrpc_hr.hr_partitions = cfs_percpt_alloc(ptlrpc_hr.hr_cpt_table,
- sizeof(*hrp));
- if (!ptlrpc_hr.hr_partitions)
- return -ENOMEM;
-
- init_waitqueue_head(&ptlrpc_hr.hr_waitq);
-
- weight = cpumask_weight(topology_sibling_cpumask(0));
-
- cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
- hrp->hrp_cpt = i;
-
- atomic_set(&hrp->hrp_nstarted, 0);
- atomic_set(&hrp->hrp_nstopped, 0);
-
- hrp->hrp_nthrs = cfs_cpt_weight(ptlrpc_hr.hr_cpt_table, i);
- hrp->hrp_nthrs /= weight;
- if (hrp->hrp_nthrs == 0)
- hrp->hrp_nthrs = 1;
-
- hrp->hrp_thrs =
- kzalloc_node(hrp->hrp_nthrs * sizeof(*hrt), GFP_NOFS,
- cfs_cpt_spread_node(ptlrpc_hr.hr_cpt_table,
- i));
- if (!hrp->hrp_thrs) {
- rc = -ENOMEM;
- goto out;
- }
-
- for (j = 0; j < hrp->hrp_nthrs; j++) {
- hrt = &hrp->hrp_thrs[j];
-
- hrt->hrt_id = j;
- hrt->hrt_partition = hrp;
- init_waitqueue_head(&hrt->hrt_waitq);
- spin_lock_init(&hrt->hrt_lock);
- INIT_LIST_HEAD(&hrt->hrt_queue);
- }
- }
-
- rc = ptlrpc_start_hr_threads();
-out:
- if (rc != 0)
- ptlrpc_hr_fini();
- return rc;
-}
-
-void ptlrpc_hr_fini(void)
-{
- struct ptlrpc_hr_partition *hrp;
- int i;
-
- if (!ptlrpc_hr.hr_partitions)
- return;
-
- ptlrpc_stop_hr_threads();
-
- cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
- kfree(hrp->hrp_thrs);
- }
-
- cfs_percpt_free(ptlrpc_hr.hr_partitions);
- ptlrpc_hr.hr_partitions = NULL;
-}
-
-/**
- * Wait until all already scheduled replies are processed.
- */
-static void ptlrpc_wait_replies(struct ptlrpc_service_part *svcpt)
-{
- while (1) {
- int rc;
-
- rc = wait_event_idle_timeout(
- svcpt->scp_waitq,
- atomic_read(&svcpt->scp_nreps_difficult) == 0,
- 10 * HZ);
- if (rc > 0)
- break;
- CWARN("Unexpectedly long timeout %s %p\n",
- svcpt->scp_service->srv_name, svcpt->scp_service);
- }
-}
-
-static void
-ptlrpc_service_del_atimer(struct ptlrpc_service *svc)
-{
- struct ptlrpc_service_part *svcpt;
- int i;
-
- /* early disarm AT timer... */
- ptlrpc_service_for_each_part(svcpt, i, svc) {
- if (svcpt->scp_service)
- del_timer(&svcpt->scp_at_timer);
- }
-}
-
-static void
-ptlrpc_service_unlink_rqbd(struct ptlrpc_service *svc)
-{
- struct ptlrpc_service_part *svcpt;
- struct ptlrpc_request_buffer_desc *rqbd;
- int cnt;
- int rc;
- int i;
-
- /* All history will be culled when the next request buffer is
- * freed in ptlrpc_service_purge_all()
- */
- svc->srv_hist_nrqbds_cpt_max = 0;
-
- rc = LNetClearLazyPortal(svc->srv_req_portal);
- LASSERT(rc == 0);
-
- ptlrpc_service_for_each_part(svcpt, i, svc) {
- if (!svcpt->scp_service)
- break;
-
- /* Unlink all the request buffers. This forces a 'final'
- * event with its 'unlink' flag set for each posted rqbd
- */
- list_for_each_entry(rqbd, &svcpt->scp_rqbd_posted,
- rqbd_list) {
- rc = LNetMDUnlink(rqbd->rqbd_md_h);
- LASSERT(rc == 0 || rc == -ENOENT);
- }
- }
-
- ptlrpc_service_for_each_part(svcpt, i, svc) {
- if (!svcpt->scp_service)
- break;
-
- /* Wait for the network to release any buffers
- * it's currently filling
- */
- spin_lock(&svcpt->scp_lock);
- while (svcpt->scp_nrqbds_posted != 0) {
- spin_unlock(&svcpt->scp_lock);
- /* Network access will complete in finite time but
- * the HUGE timeout lets us CWARN for visibility
- * of sluggish LNDs
- */
- cnt = 0;
- while (cnt < LONG_UNLINK &&
- (rc = wait_event_idle_timeout(svcpt->scp_waitq,
- svcpt->scp_nrqbds_posted == 0,
- HZ)) == 0)
- cnt++;
- if (rc == 0) {
- CWARN("Service %s waiting for request buffers\n",
- svcpt->scp_service->srv_name);
- }
- spin_lock(&svcpt->scp_lock);
- }
- spin_unlock(&svcpt->scp_lock);
- }
-}
-
-static void
-ptlrpc_service_purge_all(struct ptlrpc_service *svc)
-{
- struct ptlrpc_service_part *svcpt;
- struct ptlrpc_request_buffer_desc *rqbd;
- struct ptlrpc_request *req;
- struct ptlrpc_reply_state *rs;
- int i;
-
- ptlrpc_service_for_each_part(svcpt, i, svc) {
- if (!svcpt->scp_service)
- break;
-
- spin_lock(&svcpt->scp_rep_lock);
- while (!list_empty(&svcpt->scp_rep_active)) {
- rs = list_entry(svcpt->scp_rep_active.next,
- struct ptlrpc_reply_state, rs_list);
- spin_lock(&rs->rs_lock);
- ptlrpc_schedule_difficult_reply(rs);
- spin_unlock(&rs->rs_lock);
- }
- spin_unlock(&svcpt->scp_rep_lock);
-
- /* purge the request queue. NB No new replies (rqbds
- * all unlinked) and no service threads, so I'm the only
- * thread noodling the request queue now
- */
- while (!list_empty(&svcpt->scp_req_incoming)) {
- req = list_entry(svcpt->scp_req_incoming.next,
- struct ptlrpc_request, rq_list);
-
- list_del(&req->rq_list);
- svcpt->scp_nreqs_incoming--;
- ptlrpc_server_finish_request(svcpt, req);
- }
-
- while (ptlrpc_server_request_pending(svcpt, true)) {
- req = ptlrpc_server_request_get(svcpt, true);
- ptlrpc_server_finish_active_request(svcpt, req);
- }
-
- LASSERT(list_empty(&svcpt->scp_rqbd_posted));
- LASSERT(svcpt->scp_nreqs_incoming == 0);
- LASSERT(svcpt->scp_nreqs_active == 0);
- /* history should have been culled by
- * ptlrpc_server_finish_request
- */
- LASSERT(svcpt->scp_hist_nrqbds == 0);
-
- /* Now free all the request buffers since nothing
- * references them any more...
- */
-
- while (!list_empty(&svcpt->scp_rqbd_idle)) {
- rqbd = list_entry(svcpt->scp_rqbd_idle.next,
- struct ptlrpc_request_buffer_desc,
- rqbd_list);
- ptlrpc_free_rqbd(rqbd);
- }
- ptlrpc_wait_replies(svcpt);
-
- while (!list_empty(&svcpt->scp_rep_idle)) {
- rs = list_entry(svcpt->scp_rep_idle.next,
- struct ptlrpc_reply_state,
- rs_list);
- list_del(&rs->rs_list);
- kvfree(rs);
- }
- }
-}
-
-static void
-ptlrpc_service_free(struct ptlrpc_service *svc)
-{
- struct ptlrpc_service_part *svcpt;
- struct ptlrpc_at_array *array;
- int i;
-
- ptlrpc_service_for_each_part(svcpt, i, svc) {
- if (!svcpt->scp_service)
- break;
-
- /* In case somebody rearmed this in the meantime */
- del_timer(&svcpt->scp_at_timer);
- array = &svcpt->scp_at_array;
-
- kfree(array->paa_reqs_array);
- array->paa_reqs_array = NULL;
- kfree(array->paa_reqs_count);
- array->paa_reqs_count = NULL;
- }
-
- ptlrpc_service_for_each_part(svcpt, i, svc)
- kfree(svcpt);
-
- if (svc->srv_cpts)
- cfs_expr_list_values_free(svc->srv_cpts, svc->srv_ncpts);
-
- kfree(svc);
-}
-
-int ptlrpc_unregister_service(struct ptlrpc_service *service)
-{
- CDEBUG(D_NET, "%s: tearing down\n", service->srv_name);
-
- service->srv_is_stopping = 1;
-
- mutex_lock(&ptlrpc_all_services_mutex);
- list_del_init(&service->srv_list);
- mutex_unlock(&ptlrpc_all_services_mutex);
-
- ptlrpc_service_del_atimer(service);
- ptlrpc_stop_all_threads(service);
-
- ptlrpc_service_unlink_rqbd(service);
- ptlrpc_service_purge_all(service);
- ptlrpc_service_nrs_cleanup(service);
-
- ptlrpc_lprocfs_unregister_service(service);
- ptlrpc_sysfs_unregister_service(service);
-
- ptlrpc_service_free(service);
-
- return 0;
-}
-EXPORT_SYMBOL(ptlrpc_unregister_service);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
deleted file mode 100644
index f9394c3e1ee2..000000000000
--- a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
+++ /dev/null
@@ -1,4210 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_RPC
-
-#include <linux/fs.h>
-#include <linux/posix_acl_xattr.h>
-
-#include <obd_support.h>
-#include <obd_class.h>
-#include <lustre_net.h>
-#include <lustre_disk.h>
-#include "ptlrpc_internal.h"
-
-void lustre_assert_wire_constants(void)
-{
- /* Wire protocol assertions generated by 'wirecheck'
- * (make -C lustre/utils newwiretest)
- * running on Linux centos6-bis 2.6.32-358.0.1.el6-head
- * #3 SMP Wed Apr 17 17:37:43 CEST 2013
- * with gcc version 4.4.6 20110731 (Red Hat 4.4.6-3) (GCC)
- */
-
- /* Constants... */
- LASSERTF(PTL_RPC_MSG_REQUEST == 4711, "found %lld\n",
- (long long)PTL_RPC_MSG_REQUEST);
- LASSERTF(PTL_RPC_MSG_ERR == 4712, "found %lld\n",
- (long long)PTL_RPC_MSG_ERR);
- LASSERTF(PTL_RPC_MSG_REPLY == 4713, "found %lld\n",
- (long long)PTL_RPC_MSG_REPLY);
- LASSERTF(MDS_DIR_END_OFF == 0xfffffffffffffffeULL, "found 0x%.16llxULL\n",
- MDS_DIR_END_OFF);
- LASSERTF(DEAD_HANDLE_MAGIC == 0xdeadbeefcafebabeULL, "found 0x%.16llxULL\n",
- DEAD_HANDLE_MAGIC);
- BUILD_BUG_ON(MTI_NAME_MAXLEN != 64);
- LASSERTF(OST_REPLY == 0, "found %lld\n",
- (long long)OST_REPLY);
- LASSERTF(OST_GETATTR == 1, "found %lld\n",
- (long long)OST_GETATTR);
- LASSERTF(OST_SETATTR == 2, "found %lld\n",
- (long long)OST_SETATTR);
- LASSERTF(OST_READ == 3, "found %lld\n",
- (long long)OST_READ);
- LASSERTF(OST_WRITE == 4, "found %lld\n",
- (long long)OST_WRITE);
- LASSERTF(OST_CREATE == 5, "found %lld\n",
- (long long)OST_CREATE);
- LASSERTF(OST_DESTROY == 6, "found %lld\n",
- (long long)OST_DESTROY);
- LASSERTF(OST_GET_INFO == 7, "found %lld\n",
- (long long)OST_GET_INFO);
- LASSERTF(OST_CONNECT == 8, "found %lld\n",
- (long long)OST_CONNECT);
- LASSERTF(OST_DISCONNECT == 9, "found %lld\n",
- (long long)OST_DISCONNECT);
- LASSERTF(OST_PUNCH == 10, "found %lld\n",
- (long long)OST_PUNCH);
- LASSERTF(OST_OPEN == 11, "found %lld\n",
- (long long)OST_OPEN);
- LASSERTF(OST_CLOSE == 12, "found %lld\n",
- (long long)OST_CLOSE);
- LASSERTF(OST_STATFS == 13, "found %lld\n",
- (long long)OST_STATFS);
- LASSERTF(OST_SYNC == 16, "found %lld\n",
- (long long)OST_SYNC);
- LASSERTF(OST_SET_INFO == 17, "found %lld\n",
- (long long)OST_SET_INFO);
- LASSERTF(OST_QUOTACHECK == 18, "found %lld\n",
- (long long)OST_QUOTACHECK);
- LASSERTF(OST_QUOTACTL == 19, "found %lld\n",
- (long long)OST_QUOTACTL);
- LASSERTF(OST_QUOTA_ADJUST_QUNIT == 20, "found %lld\n",
- (long long)OST_QUOTA_ADJUST_QUNIT);
- LASSERTF(OST_LAST_OPC == 21, "found %lld\n",
- (long long)OST_LAST_OPC);
- LASSERTF(OBD_OBJECT_EOF == 0xffffffffffffffffULL, "found 0x%.16llxULL\n",
- OBD_OBJECT_EOF);
- LASSERTF(OST_MIN_PRECREATE == 32, "found %lld\n",
- (long long)OST_MIN_PRECREATE);
- LASSERTF(OST_MAX_PRECREATE == 20000, "found %lld\n",
- (long long)OST_MAX_PRECREATE);
- LASSERTF(OST_LVB_ERR_INIT == 0xffbadbad80000000ULL, "found 0x%.16llxULL\n",
- OST_LVB_ERR_INIT);
- LASSERTF(OST_LVB_ERR_MASK == 0xffbadbad00000000ULL, "found 0x%.16llxULL\n",
- OST_LVB_ERR_MASK);
- LASSERTF(MDS_FIRST_OPC == 33, "found %lld\n",
- (long long)MDS_FIRST_OPC);
- LASSERTF(MDS_GETATTR == 33, "found %lld\n",
- (long long)MDS_GETATTR);
- LASSERTF(MDS_GETATTR_NAME == 34, "found %lld\n",
- (long long)MDS_GETATTR_NAME);
- LASSERTF(MDS_CLOSE == 35, "found %lld\n",
- (long long)MDS_CLOSE);
- LASSERTF(MDS_REINT == 36, "found %lld\n",
- (long long)MDS_REINT);
- LASSERTF(MDS_READPAGE == 37, "found %lld\n",
- (long long)MDS_READPAGE);
- LASSERTF(MDS_CONNECT == 38, "found %lld\n",
- (long long)MDS_CONNECT);
- LASSERTF(MDS_DISCONNECT == 39, "found %lld\n",
- (long long)MDS_DISCONNECT);
- LASSERTF(MDS_GETSTATUS == 40, "found %lld\n",
- (long long)MDS_GETSTATUS);
- LASSERTF(MDS_STATFS == 41, "found %lld\n",
- (long long)MDS_STATFS);
- LASSERTF(MDS_PIN == 42, "found %lld\n",
- (long long)MDS_PIN);
- LASSERTF(MDS_UNPIN == 43, "found %lld\n",
- (long long)MDS_UNPIN);
- LASSERTF(MDS_SYNC == 44, "found %lld\n",
- (long long)MDS_SYNC);
- LASSERTF(MDS_DONE_WRITING == 45, "found %lld\n",
- (long long)MDS_DONE_WRITING);
- LASSERTF(MDS_SET_INFO == 46, "found %lld\n",
- (long long)MDS_SET_INFO);
- LASSERTF(MDS_QUOTACHECK == 47, "found %lld\n",
- (long long)MDS_QUOTACHECK);
- LASSERTF(MDS_QUOTACTL == 48, "found %lld\n",
- (long long)MDS_QUOTACTL);
- LASSERTF(MDS_GETXATTR == 49, "found %lld\n",
- (long long)MDS_GETXATTR);
- LASSERTF(MDS_SETXATTR == 50, "found %lld\n",
- (long long)MDS_SETXATTR);
- LASSERTF(MDS_WRITEPAGE == 51, "found %lld\n",
- (long long)MDS_WRITEPAGE);
- LASSERTF(MDS_IS_SUBDIR == 52, "found %lld\n",
- (long long)MDS_IS_SUBDIR);
- LASSERTF(MDS_GET_INFO == 53, "found %lld\n",
- (long long)MDS_GET_INFO);
- LASSERTF(MDS_HSM_STATE_GET == 54, "found %lld\n",
- (long long)MDS_HSM_STATE_GET);
- LASSERTF(MDS_HSM_STATE_SET == 55, "found %lld\n",
- (long long)MDS_HSM_STATE_SET);
- LASSERTF(MDS_HSM_ACTION == 56, "found %lld\n",
- (long long)MDS_HSM_ACTION);
- LASSERTF(MDS_HSM_PROGRESS == 57, "found %lld\n",
- (long long)MDS_HSM_PROGRESS);
- LASSERTF(MDS_HSM_REQUEST == 58, "found %lld\n",
- (long long)MDS_HSM_REQUEST);
- LASSERTF(MDS_HSM_CT_REGISTER == 59, "found %lld\n",
- (long long)MDS_HSM_CT_REGISTER);
- LASSERTF(MDS_HSM_CT_UNREGISTER == 60, "found %lld\n",
- (long long)MDS_HSM_CT_UNREGISTER);
- LASSERTF(MDS_SWAP_LAYOUTS == 61, "found %lld\n",
- (long long)MDS_SWAP_LAYOUTS);
- LASSERTF(MDS_LAST_OPC == 62, "found %lld\n",
- (long long)MDS_LAST_OPC);
- LASSERTF(REINT_SETATTR == 1, "found %lld\n",
- (long long)REINT_SETATTR);
- LASSERTF(REINT_CREATE == 2, "found %lld\n",
- (long long)REINT_CREATE);
- LASSERTF(REINT_LINK == 3, "found %lld\n",
- (long long)REINT_LINK);
- LASSERTF(REINT_UNLINK == 4, "found %lld\n",
- (long long)REINT_UNLINK);
- LASSERTF(REINT_RENAME == 5, "found %lld\n",
- (long long)REINT_RENAME);
- LASSERTF(REINT_OPEN == 6, "found %lld\n",
- (long long)REINT_OPEN);
- LASSERTF(REINT_SETXATTR == 7, "found %lld\n",
- (long long)REINT_SETXATTR);
- LASSERTF(REINT_RMENTRY == 8, "found %lld\n",
- (long long)REINT_RMENTRY);
- LASSERTF(REINT_MIGRATE == 9, "found %lld\n",
- (long long)REINT_MIGRATE);
- LASSERTF(REINT_MAX == 10, "found %lld\n",
- (long long)REINT_MAX);
- LASSERTF(DISP_IT_EXECD == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned int)DISP_IT_EXECD);
- LASSERTF(DISP_LOOKUP_EXECD == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned int)DISP_LOOKUP_EXECD);
- LASSERTF(DISP_LOOKUP_NEG == 0x00000004UL, "found 0x%.8xUL\n",
- (unsigned int)DISP_LOOKUP_NEG);
- LASSERTF(DISP_LOOKUP_POS == 0x00000008UL, "found 0x%.8xUL\n",
- (unsigned int)DISP_LOOKUP_POS);
- LASSERTF(DISP_OPEN_CREATE == 0x00000010UL, "found 0x%.8xUL\n",
- (unsigned int)DISP_OPEN_CREATE);
- LASSERTF(DISP_OPEN_OPEN == 0x00000020UL, "found 0x%.8xUL\n",
- (unsigned int)DISP_OPEN_OPEN);
- LASSERTF(DISP_ENQ_COMPLETE == 0x00400000UL, "found 0x%.8xUL\n",
- (unsigned int)DISP_ENQ_COMPLETE);
- LASSERTF(DISP_ENQ_OPEN_REF == 0x00800000UL, "found 0x%.8xUL\n",
- (unsigned int)DISP_ENQ_OPEN_REF);
- LASSERTF(DISP_ENQ_CREATE_REF == 0x01000000UL, "found 0x%.8xUL\n",
- (unsigned int)DISP_ENQ_CREATE_REF);
- LASSERTF(DISP_OPEN_LOCK == 0x02000000UL, "found 0x%.8xUL\n",
- (unsigned int)DISP_OPEN_LOCK);
- LASSERTF(MDS_STATUS_CONN == 1, "found %lld\n",
- (long long)MDS_STATUS_CONN);
- LASSERTF(MDS_STATUS_LOV == 2, "found %lld\n",
- (long long)MDS_STATUS_LOV);
- LASSERTF(MDS_ATTR_MODE == 0x0000000000000001ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_MODE);
- LASSERTF(MDS_ATTR_UID == 0x0000000000000002ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_UID);
- LASSERTF(MDS_ATTR_GID == 0x0000000000000004ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_GID);
- LASSERTF(MDS_ATTR_SIZE == 0x0000000000000008ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_SIZE);
- LASSERTF(MDS_ATTR_ATIME == 0x0000000000000010ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_ATIME);
- LASSERTF(MDS_ATTR_MTIME == 0x0000000000000020ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_MTIME);
- LASSERTF(MDS_ATTR_CTIME == 0x0000000000000040ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_CTIME);
- LASSERTF(MDS_ATTR_ATIME_SET == 0x0000000000000080ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_ATIME_SET);
- LASSERTF(MDS_ATTR_MTIME_SET == 0x0000000000000100ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_MTIME_SET);
- LASSERTF(MDS_ATTR_FORCE == 0x0000000000000200ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_FORCE);
- LASSERTF(MDS_ATTR_ATTR_FLAG == 0x0000000000000400ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_ATTR_FLAG);
- LASSERTF(MDS_ATTR_KILL_SUID == 0x0000000000000800ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_KILL_SUID);
- LASSERTF(MDS_ATTR_KILL_SGID == 0x0000000000001000ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_KILL_SGID);
- LASSERTF(MDS_ATTR_CTIME_SET == 0x0000000000002000ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_CTIME_SET);
- LASSERTF(MDS_ATTR_FROM_OPEN == 0x0000000000004000ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_FROM_OPEN);
- LASSERTF(MDS_ATTR_BLOCKS == 0x0000000000008000ULL, "found 0x%.16llxULL\n",
- (long long)MDS_ATTR_BLOCKS);
- LASSERTF(FLD_QUERY == 900, "found %lld\n",
- (long long)FLD_QUERY);
- LASSERTF(FLD_FIRST_OPC == 900, "found %lld\n",
- (long long)FLD_FIRST_OPC);
- LASSERTF(FLD_READ == 901, "found %lld\n",
- (long long)FLD_READ);
- LASSERTF(FLD_LAST_OPC == 902, "found %lld\n",
- (long long)FLD_LAST_OPC);
- LASSERTF(SEQ_QUERY == 700, "found %lld\n",
- (long long)SEQ_QUERY);
- LASSERTF(SEQ_FIRST_OPC == 700, "found %lld\n",
- (long long)SEQ_FIRST_OPC);
- LASSERTF(SEQ_LAST_OPC == 701, "found %lld\n",
- (long long)SEQ_LAST_OPC);
- LASSERTF(SEQ_ALLOC_SUPER == 0, "found %lld\n",
- (long long)SEQ_ALLOC_SUPER);
- LASSERTF(SEQ_ALLOC_META == 1, "found %lld\n",
- (long long)SEQ_ALLOC_META);
- LASSERTF(LDLM_ENQUEUE == 101, "found %lld\n",
- (long long)LDLM_ENQUEUE);
- LASSERTF(LDLM_CONVERT == 102, "found %lld\n",
- (long long)LDLM_CONVERT);
- LASSERTF(LDLM_CANCEL == 103, "found %lld\n",
- (long long)LDLM_CANCEL);
- LASSERTF(LDLM_BL_CALLBACK == 104, "found %lld\n",
- (long long)LDLM_BL_CALLBACK);
- LASSERTF(LDLM_CP_CALLBACK == 105, "found %lld\n",
- (long long)LDLM_CP_CALLBACK);
- LASSERTF(LDLM_GL_CALLBACK == 106, "found %lld\n",
- (long long)LDLM_GL_CALLBACK);
- LASSERTF(LDLM_SET_INFO == 107, "found %lld\n",
- (long long)LDLM_SET_INFO);
- LASSERTF(LDLM_LAST_OPC == 108, "found %lld\n",
- (long long)LDLM_LAST_OPC);
- LASSERTF(LCK_MINMODE == 0, "found %lld\n",
- (long long)LCK_MINMODE);
- LASSERTF(LCK_EX == 1, "found %lld\n",
- (long long)LCK_EX);
- LASSERTF(LCK_PW == 2, "found %lld\n",
- (long long)LCK_PW);
- LASSERTF(LCK_PR == 4, "found %lld\n",
- (long long)LCK_PR);
- LASSERTF(LCK_CW == 8, "found %lld\n",
- (long long)LCK_CW);
- LASSERTF(LCK_CR == 16, "found %lld\n",
- (long long)LCK_CR);
- LASSERTF(LCK_NL == 32, "found %lld\n",
- (long long)LCK_NL);
- LASSERTF(LCK_GROUP == 64, "found %lld\n",
- (long long)LCK_GROUP);
- LASSERTF(LCK_COS == 128, "found %lld\n",
- (long long)LCK_COS);
- LASSERTF(LCK_MAXMODE == 129, "found %lld\n",
- (long long)LCK_MAXMODE);
- LASSERTF(LCK_MODE_NUM == 8, "found %lld\n",
- (long long)LCK_MODE_NUM);
- BUILD_BUG_ON(LDLM_PLAIN != 10);
- BUILD_BUG_ON(LDLM_EXTENT != 11);
- BUILD_BUG_ON(LDLM_FLOCK != 12);
- BUILD_BUG_ON(LDLM_IBITS != 13);
- BUILD_BUG_ON(LDLM_MAX_TYPE != 14);
- BUILD_BUG_ON(LUSTRE_RES_ID_SEQ_OFF != 0);
- BUILD_BUG_ON(LUSTRE_RES_ID_VER_OID_OFF != 1);
- BUILD_BUG_ON(LUSTRE_RES_ID_QUOTA_SEQ_OFF != 2);
- BUILD_BUG_ON(LUSTRE_RES_ID_QUOTA_VER_OID_OFF != 3);
- BUILD_BUG_ON(LUSTRE_RES_ID_HSH_OFF != 3);
- LASSERTF(OBD_PING == 400, "found %lld\n",
- (long long)OBD_PING);
- LASSERTF(OBD_LOG_CANCEL == 401, "found %lld\n",
- (long long)OBD_LOG_CANCEL);
- LASSERTF(OBD_QC_CALLBACK == 402, "found %lld\n",
- (long long)OBD_QC_CALLBACK);
- LASSERTF(OBD_IDX_READ == 403, "found %lld\n",
- (long long)OBD_IDX_READ);
- LASSERTF(OBD_LAST_OPC == 404, "found %lld\n",
- (long long)OBD_LAST_OPC);
- LASSERTF(QUOTA_DQACQ == 601, "found %lld\n",
- (long long)QUOTA_DQACQ);
- LASSERTF(QUOTA_DQREL == 602, "found %lld\n",
- (long long)QUOTA_DQREL);
- LASSERTF(QUOTA_LAST_OPC == 603, "found %lld\n",
- (long long)QUOTA_LAST_OPC);
- LASSERTF(MGS_CONNECT == 250, "found %lld\n",
- (long long)MGS_CONNECT);
- LASSERTF(MGS_DISCONNECT == 251, "found %lld\n",
- (long long)MGS_DISCONNECT);
- LASSERTF(MGS_EXCEPTION == 252, "found %lld\n",
- (long long)MGS_EXCEPTION);
- LASSERTF(MGS_TARGET_REG == 253, "found %lld\n",
- (long long)MGS_TARGET_REG);
- LASSERTF(MGS_TARGET_DEL == 254, "found %lld\n",
- (long long)MGS_TARGET_DEL);
- LASSERTF(MGS_SET_INFO == 255, "found %lld\n",
- (long long)MGS_SET_INFO);
- LASSERTF(MGS_LAST_OPC == 257, "found %lld\n",
- (long long)MGS_LAST_OPC);
- LASSERTF(SEC_CTX_INIT == 801, "found %lld\n",
- (long long)SEC_CTX_INIT);
- LASSERTF(SEC_CTX_INIT_CONT == 802, "found %lld\n",
- (long long)SEC_CTX_INIT_CONT);
- LASSERTF(SEC_CTX_FINI == 803, "found %lld\n",
- (long long)SEC_CTX_FINI);
- LASSERTF(SEC_LAST_OPC == 804, "found %lld\n",
- (long long)SEC_LAST_OPC);
- /* Sizes and Offsets */
-
- /* Checks for struct obd_uuid */
- LASSERTF((int)sizeof(struct obd_uuid) == 40, "found %lld\n",
- (long long)(int)sizeof(struct obd_uuid));
-
- /* Checks for struct lu_seq_range */
- LASSERTF((int)sizeof(struct lu_seq_range) == 24, "found %lld\n",
- (long long)(int)sizeof(struct lu_seq_range));
- LASSERTF((int)offsetof(struct lu_seq_range, lsr_start) == 0, "found %lld\n",
- (long long)(int)offsetof(struct lu_seq_range, lsr_start));
- LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_start) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_start));
- LASSERTF((int)offsetof(struct lu_seq_range, lsr_end) == 8, "found %lld\n",
- (long long)(int)offsetof(struct lu_seq_range, lsr_end));
- LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_end) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_end));
- LASSERTF((int)offsetof(struct lu_seq_range, lsr_index) == 16, "found %lld\n",
- (long long)(int)offsetof(struct lu_seq_range, lsr_index));
- LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_index) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_index));
- LASSERTF((int)offsetof(struct lu_seq_range, lsr_flags) == 20, "found %lld\n",
- (long long)(int)offsetof(struct lu_seq_range, lsr_flags));
- LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_flags));
- LASSERTF(LU_SEQ_RANGE_MDT == 0, "found %lld\n",
- (long long)LU_SEQ_RANGE_MDT);
- LASSERTF(LU_SEQ_RANGE_OST == 1, "found %lld\n",
- (long long)LU_SEQ_RANGE_OST);
-
- /* Checks for struct lustre_mdt_attrs */
- LASSERTF((int)sizeof(struct lustre_mdt_attrs) == 24, "found %lld\n",
- (long long)(int)sizeof(struct lustre_mdt_attrs));
- LASSERTF((int)offsetof(struct lustre_mdt_attrs, lma_compat) == 0, "found %lld\n",
- (long long)(int)offsetof(struct lustre_mdt_attrs, lma_compat));
- LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_compat) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_compat));
- LASSERTF((int)offsetof(struct lustre_mdt_attrs, lma_incompat) == 4, "found %lld\n",
- (long long)(int)offsetof(struct lustre_mdt_attrs, lma_incompat));
- LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_incompat) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_incompat));
- LASSERTF((int)offsetof(struct lustre_mdt_attrs, lma_self_fid) == 8, "found %lld\n",
- (long long)(int)offsetof(struct lustre_mdt_attrs, lma_self_fid));
- LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_self_fid) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_self_fid));
- LASSERTF(LMAI_RELEASED == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned int)LMAI_RELEASED);
- LASSERTF(LMAC_HSM == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned int)LMAC_HSM);
- LASSERTF(LMAC_NOT_IN_OI == 0x00000004UL, "found 0x%.8xUL\n",
- (unsigned int)LMAC_NOT_IN_OI);
- LASSERTF(LMAC_FID_ON_OST == 0x00000008UL, "found 0x%.8xUL\n",
- (unsigned int)LMAC_FID_ON_OST);
-
- /* Checks for struct ost_id */
- LASSERTF((int)sizeof(struct ost_id) == 16, "found %lld\n",
- (long long)(int)sizeof(struct ost_id));
- LASSERTF((int)offsetof(struct ost_id, oi) == 0, "found %lld\n",
- (long long)(int)offsetof(struct ost_id, oi));
- LASSERTF((int)sizeof(((struct ost_id *)0)->oi) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct ost_id *)0)->oi));
- LASSERTF(LUSTRE_FID_INIT_OID == 1, "found %lld\n",
- (long long)LUSTRE_FID_INIT_OID);
- LASSERTF(FID_SEQ_OST_MDT0 == 0, "found %lld\n",
- (long long)FID_SEQ_OST_MDT0);
- LASSERTF(FID_SEQ_LLOG == 1, "found %lld\n",
- (long long)FID_SEQ_LLOG);
- LASSERTF(FID_SEQ_ECHO == 2, "found %lld\n",
- (long long)FID_SEQ_ECHO);
- LASSERTF(FID_SEQ_OST_MDT1 == 3, "found %lld\n",
- (long long)FID_SEQ_OST_MDT1);
- LASSERTF(FID_SEQ_OST_MAX == 9, "found %lld\n",
- (long long)FID_SEQ_OST_MAX);
- LASSERTF(FID_SEQ_RSVD == 11, "found %lld\n",
- (long long)FID_SEQ_RSVD);
- LASSERTF(FID_SEQ_IGIF == 12, "found %lld\n",
- (long long)FID_SEQ_IGIF);
- LASSERTF(FID_SEQ_IGIF_MAX == 0x00000000ffffffffULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_IGIF_MAX);
- LASSERTF(FID_SEQ_IDIF == 0x0000000100000000ULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_IDIF);
- LASSERTF(FID_SEQ_IDIF_MAX == 0x00000001ffffffffULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_IDIF_MAX);
- LASSERTF(FID_SEQ_START == 0x0000000200000000ULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_START);
- LASSERTF(FID_SEQ_LOCAL_FILE == 0x0000000200000001ULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_LOCAL_FILE);
- LASSERTF(FID_SEQ_DOT_LUSTRE == 0x0000000200000002ULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_DOT_LUSTRE);
- LASSERTF(FID_SEQ_SPECIAL == 0x0000000200000004ULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_SPECIAL);
- LASSERTF(FID_SEQ_QUOTA == 0x0000000200000005ULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_QUOTA);
- LASSERTF(FID_SEQ_QUOTA_GLB == 0x0000000200000006ULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_QUOTA_GLB);
- LASSERTF(FID_SEQ_ROOT == 0x0000000200000007ULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_ROOT);
- LASSERTF(FID_SEQ_NORMAL == 0x0000000200000400ULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_NORMAL);
- LASSERTF(FID_SEQ_LOV_DEFAULT == 0xffffffffffffffffULL, "found 0x%.16llxULL\n",
- (long long)FID_SEQ_LOV_DEFAULT);
- LASSERTF(FID_OID_SPECIAL_BFL == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned int)FID_OID_SPECIAL_BFL);
- LASSERTF(FID_OID_DOT_LUSTRE == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned int)FID_OID_DOT_LUSTRE);
- LASSERTF(FID_OID_DOT_LUSTRE_OBF == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned int)FID_OID_DOT_LUSTRE_OBF);
-
- /* Checks for struct lu_dirent */
- LASSERTF((int)sizeof(struct lu_dirent) == 32, "found %lld\n",
- (long long)(int)sizeof(struct lu_dirent));
- LASSERTF((int)offsetof(struct lu_dirent, lde_fid) == 0, "found %lld\n",
- (long long)(int)offsetof(struct lu_dirent, lde_fid));
- LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_fid) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct lu_dirent *)0)->lde_fid));
- LASSERTF((int)offsetof(struct lu_dirent, lde_hash) == 16, "found %lld\n",
- (long long)(int)offsetof(struct lu_dirent, lde_hash));
- LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_hash) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lu_dirent *)0)->lde_hash));
- LASSERTF((int)offsetof(struct lu_dirent, lde_reclen) == 24, "found %lld\n",
- (long long)(int)offsetof(struct lu_dirent, lde_reclen));
- LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_reclen) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct lu_dirent *)0)->lde_reclen));
- LASSERTF((int)offsetof(struct lu_dirent, lde_namelen) == 26, "found %lld\n",
- (long long)(int)offsetof(struct lu_dirent, lde_namelen));
- LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_namelen) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct lu_dirent *)0)->lde_namelen));
- LASSERTF((int)offsetof(struct lu_dirent, lde_attrs) == 28, "found %lld\n",
- (long long)(int)offsetof(struct lu_dirent, lde_attrs));
- LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_attrs) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lu_dirent *)0)->lde_attrs));
- LASSERTF((int)offsetof(struct lu_dirent, lde_name[0]) == 32, "found %lld\n",
- (long long)(int)offsetof(struct lu_dirent, lde_name[0]));
- LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_name[0]) == 1, "found %lld\n",
- (long long)(int)sizeof(((struct lu_dirent *)0)->lde_name[0]));
- LASSERTF(LUDA_FID == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned int)LUDA_FID);
- LASSERTF(LUDA_TYPE == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned int)LUDA_TYPE);
- LASSERTF(LUDA_64BITHASH == 0x00000004UL, "found 0x%.8xUL\n",
- (unsigned int)LUDA_64BITHASH);
-
- /* Checks for struct luda_type */
- LASSERTF((int)sizeof(struct luda_type) == 2, "found %lld\n",
- (long long)(int)sizeof(struct luda_type));
- LASSERTF((int)offsetof(struct luda_type, lt_type) == 0, "found %lld\n",
- (long long)(int)offsetof(struct luda_type, lt_type));
- LASSERTF((int)sizeof(((struct luda_type *)0)->lt_type) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct luda_type *)0)->lt_type));
-
- /* Checks for struct lu_dirpage */
- LASSERTF((int)sizeof(struct lu_dirpage) == 24, "found %lld\n",
- (long long)(int)sizeof(struct lu_dirpage));
- LASSERTF((int)offsetof(struct lu_dirpage, ldp_hash_start) == 0, "found %lld\n",
- (long long)(int)offsetof(struct lu_dirpage, ldp_hash_start));
- LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_hash_start) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_hash_start));
- LASSERTF((int)offsetof(struct lu_dirpage, ldp_hash_end) == 8, "found %lld\n",
- (long long)(int)offsetof(struct lu_dirpage, ldp_hash_end));
- LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_hash_end) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_hash_end));
- LASSERTF((int)offsetof(struct lu_dirpage, ldp_flags) == 16, "found %lld\n",
- (long long)(int)offsetof(struct lu_dirpage, ldp_flags));
- LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_flags));
- LASSERTF((int)offsetof(struct lu_dirpage, ldp_pad0) == 20, "found %lld\n",
- (long long)(int)offsetof(struct lu_dirpage, ldp_pad0));
- LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_pad0) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_pad0));
- LASSERTF((int)offsetof(struct lu_dirpage, ldp_entries[0]) == 24, "found %lld\n",
- (long long)(int)offsetof(struct lu_dirpage, ldp_entries[0]));
- LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_entries[0]) == 32, "found %lld\n",
- (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_entries[0]));
- LASSERTF(LDF_EMPTY == 1, "found %lld\n",
- (long long)LDF_EMPTY);
- LASSERTF(LDF_COLLIDE == 2, "found %lld\n",
- (long long)LDF_COLLIDE);
- LASSERTF(LU_PAGE_SIZE == 4096, "found %lld\n",
- (long long)LU_PAGE_SIZE);
-
- /* Checks for struct lustre_handle */
- LASSERTF((int)sizeof(struct lustre_handle) == 8, "found %lld\n",
- (long long)(int)sizeof(struct lustre_handle));
- LASSERTF((int)offsetof(struct lustre_handle, cookie) == 0, "found %lld\n",
- (long long)(int)offsetof(struct lustre_handle, cookie));
- LASSERTF((int)sizeof(((struct lustre_handle *)0)->cookie) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_handle *)0)->cookie));
-
- /* Checks for struct lustre_msg_v2 */
- LASSERTF((int)sizeof(struct lustre_msg_v2) == 32, "found %lld\n",
- (long long)(int)sizeof(struct lustre_msg_v2));
- LASSERTF((int)offsetof(struct lustre_msg_v2, lm_bufcount) == 0, "found %lld\n",
- (long long)(int)offsetof(struct lustre_msg_v2, lm_bufcount));
- LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_bufcount) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_bufcount));
- LASSERTF((int)offsetof(struct lustre_msg_v2, lm_secflvr) == 4, "found %lld\n",
- (long long)(int)offsetof(struct lustre_msg_v2, lm_secflvr));
- LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_secflvr) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_secflvr));
- LASSERTF((int)offsetof(struct lustre_msg_v2, lm_magic) == 8, "found %lld\n",
- (long long)(int)offsetof(struct lustre_msg_v2, lm_magic));
- LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_magic) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_magic));
- LASSERTF((int)offsetof(struct lustre_msg_v2, lm_repsize) == 12, "found %lld\n",
- (long long)(int)offsetof(struct lustre_msg_v2, lm_repsize));
- LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_repsize) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_repsize));
- LASSERTF((int)offsetof(struct lustre_msg_v2, lm_cksum) == 16, "found %lld\n",
- (long long)(int)offsetof(struct lustre_msg_v2, lm_cksum));
- LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_cksum) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_cksum));
- LASSERTF((int)offsetof(struct lustre_msg_v2, lm_flags) == 20, "found %lld\n",
- (long long)(int)offsetof(struct lustre_msg_v2, lm_flags));
- LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_flags));
- LASSERTF((int)offsetof(struct lustre_msg_v2, lm_padding_2) == 24, "found %lld\n",
- (long long)(int)offsetof(struct lustre_msg_v2, lm_padding_2));
- LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_2) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_2));
- LASSERTF((int)offsetof(struct lustre_msg_v2, lm_padding_3) == 28, "found %lld\n",
- (long long)(int)offsetof(struct lustre_msg_v2, lm_padding_3));
- LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_3) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_3));
- LASSERTF((int)offsetof(struct lustre_msg_v2, lm_buflens[0]) == 32, "found %lld\n",
- (long long)(int)offsetof(struct lustre_msg_v2, lm_buflens[0]));
- LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_buflens[0]) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_buflens[0]));
- LASSERTF(LUSTRE_MSG_MAGIC_V2 == 0x0BD00BD3, "found 0x%.8x\n",
- LUSTRE_MSG_MAGIC_V2);
- LASSERTF(LUSTRE_MSG_MAGIC_V2_SWABBED == 0xD30BD00B, "found 0x%.8x\n",
- LUSTRE_MSG_MAGIC_V2_SWABBED);
-
- /* Checks for struct ptlrpc_body */
- LASSERTF((int)sizeof(struct ptlrpc_body_v3) == 184, "found %lld\n",
- (long long)(int)sizeof(struct ptlrpc_body_v3));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_handle) == 0, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_handle));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_type) == 8, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_type));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_version) == 12, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_version));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_opc) == 16, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_opc));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_status) == 20, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_status));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_xid) == 24, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_last_xid));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_tag) == 32, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_tag));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_tag) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_tag));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding0) == 34, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding0));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding0) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding0));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding1) == 36, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding1));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding1) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding1));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_committed) == 40, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_last_committed));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_transno) == 48, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_transno));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_flags) == 56, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_flags));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_op_flags) == 60, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_op_flags));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt) == 64, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_timeout) == 68, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_timeout));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_service_time) == 72, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_service_time));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_limit) == 76, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_limit));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_slv) == 80, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_slv));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv));
- BUILD_BUG_ON(PTLRPC_NUM_VERSIONS != 4);
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_pre_versions) == 88, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_pre_versions));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions) == 32, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_mbits) == 120, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_mbits));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_mbits) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_mbits));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_0) == 128, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding64_0));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_0) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_0));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_1) == 136, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding64_1));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_1) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_1));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_2) == 144, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding64_2));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_2) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_2));
- BUILD_BUG_ON(LUSTRE_JOBID_SIZE != 32);
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_jobid) == 152, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_jobid));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_jobid) == 32, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_jobid));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_handle) == (int)offsetof(struct ptlrpc_body_v2, pb_handle), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_handle), (int)offsetof(struct ptlrpc_body_v2, pb_handle));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_handle), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_handle));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_type) == (int)offsetof(struct ptlrpc_body_v2, pb_type), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_type), (int)offsetof(struct ptlrpc_body_v2, pb_type));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_type), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_type));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_version) == (int)offsetof(struct ptlrpc_body_v2, pb_version), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_version), (int)offsetof(struct ptlrpc_body_v2, pb_version));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_version), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_version));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_opc) == (int)offsetof(struct ptlrpc_body_v2, pb_opc), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_opc), (int)offsetof(struct ptlrpc_body_v2, pb_opc));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_opc), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_opc));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_status) == (int)offsetof(struct ptlrpc_body_v2, pb_status), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_status), (int)offsetof(struct ptlrpc_body_v2, pb_status));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_status), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_status));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_xid) == (int)offsetof(struct ptlrpc_body_v2, pb_last_xid), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_last_xid), (int)offsetof(struct ptlrpc_body_v2, pb_last_xid));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_xid), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_xid));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_tag) == (int)offsetof(struct ptlrpc_body_v2, pb_tag), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_tag), (int)offsetof(struct ptlrpc_body_v2, pb_tag));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_tag) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_tag), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_tag), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_tag));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding0) == (int)offsetof(struct ptlrpc_body_v2, pb_padding0), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_padding0), (int)offsetof(struct ptlrpc_body_v2, pb_padding0));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding0) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding0), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding0), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding0));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding1) == (int)offsetof(struct ptlrpc_body_v2, pb_padding1), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_padding1), (int)offsetof(struct ptlrpc_body_v2, pb_padding1));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding1) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding1), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding1), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding1));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_committed) == (int)offsetof(struct ptlrpc_body_v2, pb_last_committed), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_last_committed), (int)offsetof(struct ptlrpc_body_v2, pb_last_committed));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_committed), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_committed));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_transno) == (int)offsetof(struct ptlrpc_body_v2, pb_transno), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_transno), (int)offsetof(struct ptlrpc_body_v2, pb_transno));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_transno), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_transno));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_flags) == (int)offsetof(struct ptlrpc_body_v2, pb_flags), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_flags), (int)offsetof(struct ptlrpc_body_v2, pb_flags));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_flags), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_flags));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_op_flags) == (int)offsetof(struct ptlrpc_body_v2, pb_op_flags), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_op_flags), (int)offsetof(struct ptlrpc_body_v2, pb_op_flags));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_op_flags), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_op_flags));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt) == (int)offsetof(struct ptlrpc_body_v2, pb_conn_cnt), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt), (int)offsetof(struct ptlrpc_body_v2, pb_conn_cnt));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_conn_cnt), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_conn_cnt));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_timeout) == (int)offsetof(struct ptlrpc_body_v2, pb_timeout), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_timeout), (int)offsetof(struct ptlrpc_body_v2, pb_timeout));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_timeout), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_timeout));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_service_time) == (int)offsetof(struct ptlrpc_body_v2, pb_service_time), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_service_time), (int)offsetof(struct ptlrpc_body_v2, pb_service_time));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_service_time), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_service_time));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_limit) == (int)offsetof(struct ptlrpc_body_v2, pb_limit), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_limit), (int)offsetof(struct ptlrpc_body_v2, pb_limit));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_limit), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_limit));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_slv) == (int)offsetof(struct ptlrpc_body_v2, pb_slv), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_slv), (int)offsetof(struct ptlrpc_body_v2, pb_slv));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_slv), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_slv));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_pre_versions) == (int)offsetof(struct ptlrpc_body_v2, pb_pre_versions), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_pre_versions), (int)offsetof(struct ptlrpc_body_v2, pb_pre_versions));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_pre_versions), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_pre_versions));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_mbits) == (int)offsetof(struct ptlrpc_body_v2, pb_mbits), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_mbits), (int)offsetof(struct ptlrpc_body_v2, pb_mbits));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_mbits) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_mbits), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_mbits), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_mbits));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_0) == (int)offsetof(struct ptlrpc_body_v2, pb_padding64_0), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_padding64_0), (int)offsetof(struct ptlrpc_body_v2, pb_padding64_0));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_0) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_0), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_0), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_0));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_1) == (int)offsetof(struct ptlrpc_body_v2, pb_padding64_1), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_padding64_1), (int)offsetof(struct ptlrpc_body_v2, pb_padding64_1));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_1) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_1), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_1), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_1));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_2) == (int)offsetof(struct ptlrpc_body_v2, pb_padding64_2), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_padding64_2), (int)offsetof(struct ptlrpc_body_v2, pb_padding64_2));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_2) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_2), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_2), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_2));
- LASSERTF(MSG_PTLRPC_BODY_OFF == 0, "found %lld\n",
- (long long)MSG_PTLRPC_BODY_OFF);
- LASSERTF(REQ_REC_OFF == 1, "found %lld\n",
- (long long)REQ_REC_OFF);
- LASSERTF(REPLY_REC_OFF == 1, "found %lld\n",
- (long long)REPLY_REC_OFF);
- LASSERTF(DLM_LOCKREQ_OFF == 1, "found %lld\n",
- (long long)DLM_LOCKREQ_OFF);
- LASSERTF(DLM_REQ_REC_OFF == 2, "found %lld\n",
- (long long)DLM_REQ_REC_OFF);
- LASSERTF(DLM_INTENT_IT_OFF == 2, "found %lld\n",
- (long long)DLM_INTENT_IT_OFF);
- LASSERTF(DLM_INTENT_REC_OFF == 3, "found %lld\n",
- (long long)DLM_INTENT_REC_OFF);
- LASSERTF(DLM_LOCKREPLY_OFF == 1, "found %lld\n",
- (long long)DLM_LOCKREPLY_OFF);
- LASSERTF(DLM_REPLY_REC_OFF == 2, "found %lld\n",
- (long long)DLM_REPLY_REC_OFF);
- LASSERTF(MSG_PTLRPC_HEADER_OFF == 31, "found %lld\n",
- (long long)MSG_PTLRPC_HEADER_OFF);
- LASSERTF(PTLRPC_MSG_VERSION == 0x00000003, "found 0x%.8x\n",
- PTLRPC_MSG_VERSION);
- LASSERTF(LUSTRE_VERSION_MASK == 0xffff0000, "found 0x%.8x\n",
- LUSTRE_VERSION_MASK);
- LASSERTF(LUSTRE_OBD_VERSION == 0x00010000, "found 0x%.8x\n",
- LUSTRE_OBD_VERSION);
- LASSERTF(LUSTRE_MDS_VERSION == 0x00020000, "found 0x%.8x\n",
- LUSTRE_MDS_VERSION);
- LASSERTF(LUSTRE_OST_VERSION == 0x00030000, "found 0x%.8x\n",
- LUSTRE_OST_VERSION);
- LASSERTF(LUSTRE_DLM_VERSION == 0x00040000, "found 0x%.8x\n",
- LUSTRE_DLM_VERSION);
- LASSERTF(LUSTRE_LOG_VERSION == 0x00050000, "found 0x%.8x\n",
- LUSTRE_LOG_VERSION);
- LASSERTF(LUSTRE_MGS_VERSION == 0x00060000, "found 0x%.8x\n",
- LUSTRE_MGS_VERSION);
- LASSERTF(MSGHDR_AT_SUPPORT == 1, "found %lld\n",
- (long long)MSGHDR_AT_SUPPORT);
- LASSERTF(MSGHDR_CKSUM_INCOMPAT18 == 2, "found %lld\n",
- (long long)MSGHDR_CKSUM_INCOMPAT18);
- LASSERTF(MSG_OP_FLAG_MASK == 0xffff0000UL, "found 0x%.8xUL\n",
- (unsigned int)MSG_OP_FLAG_MASK);
- LASSERTF(MSG_OP_FLAG_SHIFT == 16, "found %lld\n",
- (long long)MSG_OP_FLAG_SHIFT);
- LASSERTF(MSG_GEN_FLAG_MASK == 0x0000ffffUL, "found 0x%.8xUL\n",
- (unsigned int)MSG_GEN_FLAG_MASK);
- LASSERTF(MSG_LAST_REPLAY == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned int)MSG_LAST_REPLAY);
- LASSERTF(MSG_RESENT == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned int)MSG_RESENT);
- LASSERTF(MSG_REPLAY == 0x00000004UL, "found 0x%.8xUL\n",
- (unsigned int)MSG_REPLAY);
- LASSERTF(MSG_DELAY_REPLAY == 0x00000010UL, "found 0x%.8xUL\n",
- (unsigned int)MSG_DELAY_REPLAY);
- LASSERTF(MSG_VERSION_REPLAY == 0x00000020UL, "found 0x%.8xUL\n",
- (unsigned int)MSG_VERSION_REPLAY);
- LASSERTF(MSG_REQ_REPLAY_DONE == 0x00000040UL, "found 0x%.8xUL\n",
- (unsigned int)MSG_REQ_REPLAY_DONE);
- LASSERTF(MSG_LOCK_REPLAY_DONE == 0x00000080UL, "found 0x%.8xUL\n",
- (unsigned int)MSG_LOCK_REPLAY_DONE);
- LASSERTF(MSG_CONNECT_RECOVERING == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned int)MSG_CONNECT_RECOVERING);
- LASSERTF(MSG_CONNECT_RECONNECT == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned int)MSG_CONNECT_RECONNECT);
- LASSERTF(MSG_CONNECT_REPLAYABLE == 0x00000004UL, "found 0x%.8xUL\n",
- (unsigned int)MSG_CONNECT_REPLAYABLE);
- LASSERTF(MSG_CONNECT_LIBCLIENT == 0x00000010UL, "found 0x%.8xUL\n",
- (unsigned int)MSG_CONNECT_LIBCLIENT);
- LASSERTF(MSG_CONNECT_INITIAL == 0x00000020UL, "found 0x%.8xUL\n",
- (unsigned int)MSG_CONNECT_INITIAL);
- LASSERTF(MSG_CONNECT_ASYNC == 0x00000040UL, "found 0x%.8xUL\n",
- (unsigned int)MSG_CONNECT_ASYNC);
- LASSERTF(MSG_CONNECT_NEXT_VER == 0x00000080UL, "found 0x%.8xUL\n",
- (unsigned int)MSG_CONNECT_NEXT_VER);
- LASSERTF(MSG_CONNECT_TRANSNO == 0x00000100UL, "found 0x%.8xUL\n",
- (unsigned int)MSG_CONNECT_TRANSNO);
-
- /* Checks for struct obd_connect_data */
- LASSERTF((int)sizeof(struct obd_connect_data) == 192, "found %lld\n",
- (long long)(int)sizeof(struct obd_connect_data));
- LASSERTF((int)offsetof(struct obd_connect_data, ocd_connect_flags) == 0, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, ocd_connect_flags));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags));
- LASSERTF((int)offsetof(struct obd_connect_data, ocd_version) == 8, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, ocd_version));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_version) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_version));
- LASSERTF((int)offsetof(struct obd_connect_data, ocd_grant) == 12, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, ocd_grant));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_grant) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_grant));
- LASSERTF((int)offsetof(struct obd_connect_data, ocd_index) == 16, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, ocd_index));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_index) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_index));
- LASSERTF((int)offsetof(struct obd_connect_data, ocd_brw_size) == 20, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, ocd_brw_size));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_brw_size) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_brw_size));
- LASSERTF((int)offsetof(struct obd_connect_data, ocd_ibits_known) == 24, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, ocd_ibits_known));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_ibits_known) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_ibits_known));
- LASSERTF((int)offsetof(struct obd_connect_data, ocd_blocksize) == 32, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, ocd_blocksize));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_blocksize) == 1, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_blocksize));
- LASSERTF((int)offsetof(struct obd_connect_data, ocd_inodespace) == 33, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, ocd_inodespace));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_inodespace) == 1, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_inodespace));
- LASSERTF((int)offsetof(struct obd_connect_data, ocd_grant_extent) == 34, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, ocd_grant_extent));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_grant_extent) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_grant_extent));
- LASSERTF((int)offsetof(struct obd_connect_data, ocd_unused) == 36, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, ocd_unused));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_unused) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_unused));
- LASSERTF((int)offsetof(struct obd_connect_data, ocd_transno) == 40, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, ocd_transno));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_transno) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_transno));
- LASSERTF((int)offsetof(struct obd_connect_data, ocd_group) == 48, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, ocd_group));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_group) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_group));
- LASSERTF((int)offsetof(struct obd_connect_data, ocd_cksum_types) == 52, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, ocd_cksum_types));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_cksum_types) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_cksum_types));
- LASSERTF((int)offsetof(struct obd_connect_data, ocd_max_easize) == 56, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, ocd_max_easize));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_max_easize) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_max_easize));
- LASSERTF((int)offsetof(struct obd_connect_data, ocd_instance) == 60, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, ocd_instance));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_instance) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_instance));
- LASSERTF((int)offsetof(struct obd_connect_data, ocd_maxbytes) == 64, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, ocd_maxbytes));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_maxbytes) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_maxbytes));
- LASSERTF((int)offsetof(struct obd_connect_data, ocd_maxmodrpcs) == 72, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, ocd_maxmodrpcs));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_maxmodrpcs) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_maxmodrpcs));
- LASSERTF((int)offsetof(struct obd_connect_data, padding0) == 74, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, padding0));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding0) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->padding0));
- LASSERTF((int)offsetof(struct obd_connect_data, padding1) == 76, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, padding1));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding1) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->padding1));
- LASSERTF((int)offsetof(struct obd_connect_data, ocd_connect_flags2) == 80, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, ocd_connect_flags2));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags2) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags2));
- LASSERTF((int)offsetof(struct obd_connect_data, padding3) == 88, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, padding3));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding3) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->padding3));
- LASSERTF((int)offsetof(struct obd_connect_data, padding4) == 96, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, padding4));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding4) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->padding4));
- LASSERTF((int)offsetof(struct obd_connect_data, padding5) == 104, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, padding5));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding5) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->padding5));
- LASSERTF((int)offsetof(struct obd_connect_data, padding6) == 112, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, padding6));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding6) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->padding6));
- LASSERTF((int)offsetof(struct obd_connect_data, padding7) == 120, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, padding7));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding7) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->padding7));
- LASSERTF((int)offsetof(struct obd_connect_data, padding8) == 128, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, padding8));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding8) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->padding8));
- LASSERTF((int)offsetof(struct obd_connect_data, padding9) == 136, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, padding9));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding9) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->padding9));
- LASSERTF((int)offsetof(struct obd_connect_data, paddingA) == 144, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, paddingA));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingA) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingA));
- LASSERTF((int)offsetof(struct obd_connect_data, paddingB) == 152, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, paddingB));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingB) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingB));
- LASSERTF((int)offsetof(struct obd_connect_data, paddingC) == 160, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, paddingC));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingC) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingC));
- LASSERTF((int)offsetof(struct obd_connect_data, paddingD) == 168, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, paddingD));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingD) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingD));
- LASSERTF((int)offsetof(struct obd_connect_data, paddingE) == 176, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, paddingE));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingE) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingE));
- LASSERTF((int)offsetof(struct obd_connect_data, paddingF) == 184, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, paddingF));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingF) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingF));
- LASSERTF(OBD_CONNECT_RDONLY == 0x1ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_RDONLY);
- LASSERTF(OBD_CONNECT_INDEX == 0x2ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_INDEX);
- LASSERTF(OBD_CONNECT_MDS == 0x4ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_MDS);
- LASSERTF(OBD_CONNECT_GRANT == 0x8ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_GRANT);
- LASSERTF(OBD_CONNECT_SRVLOCK == 0x10ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_SRVLOCK);
- LASSERTF(OBD_CONNECT_VERSION == 0x20ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_VERSION);
- LASSERTF(OBD_CONNECT_REQPORTAL == 0x40ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_REQPORTAL);
- LASSERTF(OBD_CONNECT_ACL == 0x80ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_ACL);
- LASSERTF(OBD_CONNECT_XATTR == 0x100ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_XATTR);
- LASSERTF(OBD_CONNECT_LARGE_ACL == 0x200ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_LARGE_ACL);
- LASSERTF(OBD_CONNECT_TRUNCLOCK == 0x400ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_TRUNCLOCK);
- LASSERTF(OBD_CONNECT_TRANSNO == 0x800ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_TRANSNO);
- LASSERTF(OBD_CONNECT_IBITS == 0x1000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_IBITS);
- LASSERTF(OBD_CONNECT_JOIN == 0x2000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_JOIN);
- LASSERTF(OBD_CONNECT_ATTRFID == 0x4000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_ATTRFID);
- LASSERTF(OBD_CONNECT_NODEVOH == 0x8000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_NODEVOH);
- LASSERTF(OBD_CONNECT_RMT_CLIENT == 0x10000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_RMT_CLIENT);
- LASSERTF(OBD_CONNECT_RMT_CLIENT_FORCE == 0x20000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_RMT_CLIENT_FORCE);
- LASSERTF(OBD_CONNECT_BRW_SIZE == 0x40000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_BRW_SIZE);
- LASSERTF(OBD_CONNECT_QUOTA64 == 0x80000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_QUOTA64);
- LASSERTF(OBD_CONNECT_MDS_CAPA == 0x100000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_MDS_CAPA);
- LASSERTF(OBD_CONNECT_OSS_CAPA == 0x200000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_OSS_CAPA);
- LASSERTF(OBD_CONNECT_CANCELSET == 0x400000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_CANCELSET);
- LASSERTF(OBD_CONNECT_SOM == 0x800000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_SOM);
- LASSERTF(OBD_CONNECT_AT == 0x1000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_AT);
- LASSERTF(OBD_CONNECT_LRU_RESIZE == 0x2000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_LRU_RESIZE);
- LASSERTF(OBD_CONNECT_MDS_MDS == 0x4000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_MDS_MDS);
- LASSERTF(OBD_CONNECT_REAL == 0x8000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_REAL);
- LASSERTF(OBD_CONNECT_CHANGE_QS == 0x10000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_CHANGE_QS);
- LASSERTF(OBD_CONNECT_CKSUM == 0x20000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_CKSUM);
- LASSERTF(OBD_CONNECT_FID == 0x40000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_FID);
- LASSERTF(OBD_CONNECT_VBR == 0x80000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_VBR);
- LASSERTF(OBD_CONNECT_LOV_V3 == 0x100000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_LOV_V3);
- LASSERTF(OBD_CONNECT_GRANT_SHRINK == 0x200000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_GRANT_SHRINK);
- LASSERTF(OBD_CONNECT_SKIP_ORPHAN == 0x400000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_SKIP_ORPHAN);
- LASSERTF(OBD_CONNECT_MAX_EASIZE == 0x800000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_MAX_EASIZE);
- LASSERTF(OBD_CONNECT_FULL20 == 0x1000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_FULL20);
- LASSERTF(OBD_CONNECT_LAYOUTLOCK == 0x2000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_LAYOUTLOCK);
- LASSERTF(OBD_CONNECT_64BITHASH == 0x4000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_64BITHASH);
- LASSERTF(OBD_CONNECT_MAXBYTES == 0x8000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_MAXBYTES);
- LASSERTF(OBD_CONNECT_IMP_RECOV == 0x10000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_IMP_RECOV);
- LASSERTF(OBD_CONNECT_JOBSTATS == 0x20000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_JOBSTATS);
- LASSERTF(OBD_CONNECT_UMASK == 0x40000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_UMASK);
- LASSERTF(OBD_CONNECT_EINPROGRESS == 0x80000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_EINPROGRESS);
- LASSERTF(OBD_CONNECT_GRANT_PARAM == 0x100000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_GRANT_PARAM);
- LASSERTF(OBD_CONNECT_FLOCK_OWNER == 0x200000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_FLOCK_OWNER);
- LASSERTF(OBD_CONNECT_LVB_TYPE == 0x400000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_LVB_TYPE);
- LASSERTF(OBD_CONNECT_NANOSEC_TIME == 0x800000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_NANOSEC_TIME);
- LASSERTF(OBD_CONNECT_LIGHTWEIGHT == 0x1000000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_LIGHTWEIGHT);
- LASSERTF(OBD_CONNECT_SHORTIO == 0x2000000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_SHORTIO);
- LASSERTF(OBD_CONNECT_PINGLESS == 0x4000000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_PINGLESS);
- LASSERTF(OBD_CONNECT_FLOCK_DEAD == 0x8000000000000ULL,
- "found 0x%.16llxULL\n", OBD_CONNECT_FLOCK_DEAD);
- LASSERTF(OBD_CONNECT_OPEN_BY_FID == 0x20000000000000ULL,
- "found 0x%.16llxULL\n", OBD_CONNECT_OPEN_BY_FID);
- LASSERTF(OBD_CONNECT_LFSCK == 0x40000000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_LFSCK);
- LASSERTF(OBD_CONNECT_UNLINK_CLOSE == 0x100000000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_UNLINK_CLOSE);
- LASSERTF(OBD_CONNECT_MULTIMODRPCS == 0x200000000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_MULTIMODRPCS);
- LASSERTF(OBD_CONNECT_DIR_STRIPE == 0x400000000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_DIR_STRIPE);
- LASSERTF(OBD_CONNECT_SUBTREE == 0x800000000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_SUBTREE);
- LASSERTF(OBD_CONNECT_LOCK_AHEAD == 0x1000000000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_LOCK_AHEAD);
- LASSERTF(OBD_CONNECT_OBDOPACK == 0x4000000000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_OBDOPACK);
- LASSERTF(OBD_CONNECT_FLAGS2 == 0x8000000000000000ULL, "found 0x%.16llxULL\n",
- OBD_CONNECT_FLAGS2);
- LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned int)OBD_CKSUM_CRC32);
- LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned int)OBD_CKSUM_ADLER);
- LASSERTF(OBD_CKSUM_CRC32C == 0x00000004UL, "found 0x%.8xUL\n",
- (unsigned int)OBD_CKSUM_CRC32C);
-
- /* Checks for struct obdo */
- LASSERTF((int)sizeof(struct obdo) == 208, "found %lld\n",
- (long long)(int)sizeof(struct obdo));
- LASSERTF((int)offsetof(struct obdo, o_valid) == 0, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_valid));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_valid) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_valid));
- LASSERTF((int)offsetof(struct obdo, o_oi) == 8, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_oi));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_oi) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_oi));
- LASSERTF((int)offsetof(struct obdo, o_parent_seq) == 24, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_parent_seq));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_parent_seq) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_parent_seq));
- LASSERTF((int)offsetof(struct obdo, o_size) == 32, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_size));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_size) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_size));
- LASSERTF((int)offsetof(struct obdo, o_mtime) == 40, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_mtime));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_mtime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_mtime));
- LASSERTF((int)offsetof(struct obdo, o_atime) == 48, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_atime));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_atime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_atime));
- LASSERTF((int)offsetof(struct obdo, o_ctime) == 56, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_ctime));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_ctime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_ctime));
- LASSERTF((int)offsetof(struct obdo, o_blocks) == 64, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_blocks));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_blocks) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_blocks));
- LASSERTF((int)offsetof(struct obdo, o_grant) == 72, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_grant));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_grant) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_grant));
- LASSERTF((int)offsetof(struct obdo, o_blksize) == 80, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_blksize));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_blksize) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_blksize));
- LASSERTF((int)offsetof(struct obdo, o_mode) == 84, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_mode));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_mode) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_mode));
- LASSERTF((int)offsetof(struct obdo, o_uid) == 88, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_uid));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_uid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_uid));
- LASSERTF((int)offsetof(struct obdo, o_gid) == 92, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_gid));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_gid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_gid));
- LASSERTF((int)offsetof(struct obdo, o_flags) == 96, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_flags));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_flags));
- LASSERTF((int)offsetof(struct obdo, o_nlink) == 100, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_nlink));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_nlink) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_nlink));
- LASSERTF((int)offsetof(struct obdo, o_parent_oid) == 104, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_parent_oid));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_parent_oid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_parent_oid));
- LASSERTF((int)offsetof(struct obdo, o_misc) == 108, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_misc));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_misc) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_misc));
- LASSERTF((int)offsetof(struct obdo, o_ioepoch) == 112, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_ioepoch));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_ioepoch) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_ioepoch));
- LASSERTF((int)offsetof(struct obdo, o_stripe_idx) == 120, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_stripe_idx));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_stripe_idx) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_stripe_idx));
- LASSERTF((int)offsetof(struct obdo, o_parent_ver) == 124, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_parent_ver));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_parent_ver) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_parent_ver));
- LASSERTF((int)offsetof(struct obdo, o_handle) == 128, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_handle));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_handle) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_handle));
- LASSERTF((int)offsetof(struct obdo, o_lcookie) == 136, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_lcookie));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_lcookie) == 32, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_lcookie));
- LASSERTF((int)offsetof(struct obdo, o_uid_h) == 168, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_uid_h));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_uid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_uid_h));
- LASSERTF((int)offsetof(struct obdo, o_gid_h) == 172, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_gid_h));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_gid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_gid_h));
- LASSERTF((int)offsetof(struct obdo, o_data_version) == 176, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_data_version));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_data_version) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_data_version));
- LASSERTF((int)offsetof(struct obdo, o_padding_4) == 184, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_padding_4));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_4) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_padding_4));
- LASSERTF((int)offsetof(struct obdo, o_padding_5) == 192, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_padding_5));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_5) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_padding_5));
- LASSERTF((int)offsetof(struct obdo, o_padding_6) == 200, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_padding_6));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_6) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_padding_6));
- LASSERTF(OBD_MD_FLID == (0x00000001ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLID);
- LASSERTF(OBD_MD_FLATIME == (0x00000002ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLATIME);
- LASSERTF(OBD_MD_FLMTIME == (0x00000004ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLMTIME);
- LASSERTF(OBD_MD_FLCTIME == (0x00000008ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLCTIME);
- LASSERTF(OBD_MD_FLSIZE == (0x00000010ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLSIZE);
- LASSERTF(OBD_MD_FLBLOCKS == (0x00000020ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLBLOCKS);
- LASSERTF(OBD_MD_FLBLKSZ == (0x00000040ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLBLKSZ);
- LASSERTF(OBD_MD_FLMODE == (0x00000080ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLMODE);
- LASSERTF(OBD_MD_FLTYPE == (0x00000100ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLTYPE);
- LASSERTF(OBD_MD_FLUID == (0x00000200ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLUID);
- LASSERTF(OBD_MD_FLGID == (0x00000400ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLGID);
- LASSERTF(OBD_MD_FLFLAGS == (0x00000800ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLFLAGS);
- LASSERTF(OBD_MD_FLNLINK == (0x00002000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLNLINK);
- LASSERTF(OBD_MD_FLGENER == (0x00004000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLGENER);
- LASSERTF(OBD_MD_FLRDEV == (0x00010000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLRDEV);
- LASSERTF(OBD_MD_FLEASIZE == (0x00020000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLEASIZE);
- LASSERTF(OBD_MD_LINKNAME == (0x00040000ULL), "found 0x%.16llxULL\n",
- OBD_MD_LINKNAME);
- LASSERTF(OBD_MD_FLHANDLE == (0x00080000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLHANDLE);
- LASSERTF(OBD_MD_FLCKSUM == (0x00100000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLCKSUM);
- LASSERTF(OBD_MD_FLQOS == (0x00200000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLQOS);
- LASSERTF(OBD_MD_FLGROUP == (0x01000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLGROUP);
- LASSERTF(OBD_MD_FLFID == (0x02000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLFID);
- LASSERTF(OBD_MD_FLEPOCH == (0x04000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLEPOCH);
- LASSERTF(OBD_MD_FLGRANT == (0x08000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLGRANT);
- LASSERTF(OBD_MD_FLDIREA == (0x10000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLDIREA);
- LASSERTF(OBD_MD_FLUSRQUOTA == (0x20000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLUSRQUOTA);
- LASSERTF(OBD_MD_FLGRPQUOTA == (0x40000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLGRPQUOTA);
- LASSERTF(OBD_MD_FLMODEASIZE == (0x80000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLMODEASIZE);
- LASSERTF(OBD_MD_MDS == (0x0000000100000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_MDS);
- LASSERTF(OBD_MD_REINT == (0x0000000200000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_REINT);
- LASSERTF(OBD_MD_MEA == (0x0000000400000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_MEA);
- LASSERTF(OBD_MD_TSTATE == (0x0000000800000000ULL),
- "found 0x%.16llxULL\n", OBD_MD_TSTATE);
- LASSERTF(OBD_MD_FLXATTR == (0x0000001000000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLXATTR);
- LASSERTF(OBD_MD_FLXATTRLS == (0x0000002000000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLXATTRLS);
- LASSERTF(OBD_MD_FLXATTRRM == (0x0000004000000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLXATTRRM);
- LASSERTF(OBD_MD_FLACL == (0x0000008000000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLACL);
- LASSERTF(OBD_MD_FLMDSCAPA == (0x0000020000000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLMDSCAPA);
- LASSERTF(OBD_MD_FLOSSCAPA == (0x0000040000000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLOSSCAPA);
- LASSERTF(OBD_MD_FLCKSPLIT == (0x0000080000000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLCKSPLIT);
- LASSERTF(OBD_MD_FLCROSSREF == (0x0000100000000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLCROSSREF);
- LASSERTF(OBD_MD_FLGETATTRLOCK == (0x0000200000000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLGETATTRLOCK);
- LASSERTF(OBD_MD_FLDATAVERSION == (0x0010000000000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLDATAVERSION);
- BUILD_BUG_ON(OBD_FL_INLINEDATA != 0x00000001);
- BUILD_BUG_ON(OBD_FL_OBDMDEXISTS != 0x00000002);
- BUILD_BUG_ON(OBD_FL_DELORPHAN != 0x00000004);
- BUILD_BUG_ON(OBD_FL_NORPC != 0x00000008);
- BUILD_BUG_ON(OBD_FL_IDONLY != 0x00000010);
- BUILD_BUG_ON(OBD_FL_RECREATE_OBJS != 0x00000020);
- BUILD_BUG_ON(OBD_FL_DEBUG_CHECK != 0x00000040);
- BUILD_BUG_ON(OBD_FL_NO_USRQUOTA != 0x00000100);
- BUILD_BUG_ON(OBD_FL_NO_GRPQUOTA != 0x00000200);
- BUILD_BUG_ON(OBD_FL_CREATE_CROW != 0x00000400);
- BUILD_BUG_ON(OBD_FL_SRVLOCK != 0x00000800);
- BUILD_BUG_ON(OBD_FL_CKSUM_CRC32 != 0x00001000);
- BUILD_BUG_ON(OBD_FL_CKSUM_ADLER != 0x00002000);
- BUILD_BUG_ON(OBD_FL_CKSUM_CRC32C != 0x00004000);
- BUILD_BUG_ON(OBD_FL_CKSUM_RSVD2 != 0x00008000);
- BUILD_BUG_ON(OBD_FL_CKSUM_RSVD3 != 0x00010000);
- BUILD_BUG_ON(OBD_FL_SHRINK_GRANT != 0x00020000);
- BUILD_BUG_ON(OBD_FL_MMAP != 0x00040000);
- BUILD_BUG_ON(OBD_FL_RECOV_RESEND != 0x00080000);
- BUILD_BUG_ON(OBD_FL_NOSPC_BLK != 0x00100000);
- BUILD_BUG_ON(OBD_FL_LOCAL_MASK != 0xf0000000);
-
- /* Checks for struct lov_ost_data_v1 */
- LASSERTF((int)sizeof(struct lov_ost_data_v1) == 24, "found %lld\n",
- (long long)(int)sizeof(struct lov_ost_data_v1));
- LASSERTF((int)offsetof(struct lov_ost_data_v1, l_ost_oi) == 0, "found %lld\n",
- (long long)(int)offsetof(struct lov_ost_data_v1, l_ost_oi));
- LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_oi) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_oi));
- LASSERTF((int)offsetof(struct lov_ost_data_v1, l_ost_gen) == 16, "found %lld\n",
- (long long)(int)offsetof(struct lov_ost_data_v1, l_ost_gen));
- LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_gen) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_gen));
- LASSERTF((int)offsetof(struct lov_ost_data_v1, l_ost_idx) == 20, "found %lld\n",
- (long long)(int)offsetof(struct lov_ost_data_v1, l_ost_idx));
- LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx));
-
- /* Checks for struct lov_mds_md_v1 */
- LASSERTF((int)sizeof(struct lov_mds_md_v1) == 32, "found %lld\n",
- (long long)(int)sizeof(struct lov_mds_md_v1));
- LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_magic) == 0, "found %lld\n",
- (long long)(int)offsetof(struct lov_mds_md_v1, lmm_magic));
- LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_magic) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_magic));
- LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_pattern) == 4, "found %lld\n",
- (long long)(int)offsetof(struct lov_mds_md_v1, lmm_pattern));
- LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_pattern) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_pattern));
- LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_oi) == 8, "found %lld\n",
- (long long)(int)offsetof(struct lov_mds_md_v1, lmm_oi));
- LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_oi) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_oi));
- LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_stripe_size) == 24, "found %lld\n",
- (long long)(int)offsetof(struct lov_mds_md_v1, lmm_stripe_size));
- LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_size) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_size));
- LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_stripe_count) == 28, "found %lld\n",
- (long long)(int)offsetof(struct lov_mds_md_v1, lmm_stripe_count));
- LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_count) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_count));
- LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_layout_gen) == 30, "found %lld\n",
- (long long)(int)offsetof(struct lov_mds_md_v1, lmm_layout_gen));
- LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_layout_gen) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_layout_gen));
- LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_objects[0]) == 32, "found %lld\n",
- (long long)(int)offsetof(struct lov_mds_md_v1, lmm_objects[0]));
- LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_objects[0]) == 24, "found %lld\n",
- (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_objects[0]));
- BUILD_BUG_ON(LOV_MAGIC_V1 != (0x0BD10000 | 0x0BD0));
-
- /* Checks for struct lov_mds_md_v3 */
- LASSERTF((int)sizeof(struct lov_mds_md_v3) == 48, "found %lld\n",
- (long long)(int)sizeof(struct lov_mds_md_v3));
- LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_magic) == 0, "found %lld\n",
- (long long)(int)offsetof(struct lov_mds_md_v3, lmm_magic));
- LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_magic) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_magic));
- LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_pattern) == 4, "found %lld\n",
- (long long)(int)offsetof(struct lov_mds_md_v3, lmm_pattern));
- LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pattern) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pattern));
- LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_oi) == 8, "found %lld\n",
- (long long)(int)offsetof(struct lov_mds_md_v3, lmm_oi));
- LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_oi) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_oi));
- LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_stripe_size) == 24, "found %lld\n",
- (long long)(int)offsetof(struct lov_mds_md_v3, lmm_stripe_size));
- LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_size) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_size));
- LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_stripe_count) == 28, "found %lld\n",
- (long long)(int)offsetof(struct lov_mds_md_v3, lmm_stripe_count));
- LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_count) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_count));
- LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_layout_gen) == 30, "found %lld\n",
- (long long)(int)offsetof(struct lov_mds_md_v3, lmm_layout_gen));
- LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_layout_gen) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_layout_gen));
- BUILD_BUG_ON(LOV_MAXPOOLNAME != 15);
- LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_pool_name[16]) == 48, "found %lld\n",
- (long long)(int)offsetof(struct lov_mds_md_v3, lmm_pool_name[16]));
- LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pool_name[16]) == 1, "found %lld\n",
- (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pool_name[16]));
- LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_objects[0]) == 48, "found %lld\n",
- (long long)(int)offsetof(struct lov_mds_md_v3, lmm_objects[0]));
- LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_objects[0]) == 24, "found %lld\n",
- (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_objects[0]));
- BUILD_BUG_ON(LOV_MAGIC_V3 != (0x0BD30000 | 0x0BD0));
- LASSERTF(LOV_PATTERN_RAID0 == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned int)LOV_PATTERN_RAID0);
- LASSERTF(LOV_PATTERN_RAID1 == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned int)LOV_PATTERN_RAID1);
- LASSERTF(LOV_PATTERN_FIRST == 0x00000100UL, "found 0x%.8xUL\n",
- (unsigned int)LOV_PATTERN_FIRST);
- LASSERTF(LOV_PATTERN_CMOBD == 0x00000200UL, "found 0x%.8xUL\n",
- (unsigned int)LOV_PATTERN_CMOBD);
-
- /* Checks for struct lmv_mds_md_v1 */
- LASSERTF((int)sizeof(struct lmv_mds_md_v1) == 56, "found %lld\n",
- (long long)(int)sizeof(struct lmv_mds_md_v1));
- LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_magic) == 0, "found %lld\n",
- (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_magic));
- LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_magic) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_magic));
- LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_stripe_count) == 4, "found %lld\n",
- (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_stripe_count));
- LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_stripe_count) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_stripe_count));
- LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_master_mdt_index) == 8, "found %lld\n",
- (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_master_mdt_index));
- LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_master_mdt_index) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_master_mdt_index));
- LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_hash_type) == 12, "found %lld\n",
- (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_hash_type));
- LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_hash_type) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_hash_type));
- LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_layout_version) == 16, "found %lld\n",
- (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_layout_version));
- LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_layout_version) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_layout_version));
- LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_padding1) == 20, "found %lld\n",
- (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_padding1));
- LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding1) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding1));
- LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_padding2) == 24, "found %lld\n",
- (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_padding2));
- LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding2) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding2));
- LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_padding3) == 32, "found %lld\n",
- (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_padding3));
- LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding3) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding3));
- LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_pool_name[16]) == 56, "found %lld\n",
- (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_pool_name[16]));
- LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_pool_name[16]) == 1, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_pool_name[16]));
- LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_stripe_fids[0]) == 56, "found %lld\n",
- (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_stripe_fids[0]));
- LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_stripe_fids[0]) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_stripe_fids[0]));
- BUILD_BUG_ON(LMV_MAGIC_V1 != 0x0CD20CD0);
- BUILD_BUG_ON(LMV_MAGIC_STRIPE != 0x0CD40CD0);
- BUILD_BUG_ON(LMV_HASH_TYPE_MASK != 0x0000ffff);
- BUILD_BUG_ON(LMV_HASH_FLAG_MIGRATION != 0x80000000);
- BUILD_BUG_ON(LMV_HASH_FLAG_DEAD != 0x40000000);
-
- /* Checks for struct obd_statfs */
- LASSERTF((int)sizeof(struct obd_statfs) == 144, "found %lld\n",
- (long long)(int)sizeof(struct obd_statfs));
- LASSERTF((int)offsetof(struct obd_statfs, os_type) == 0, "found %lld\n",
- (long long)(int)offsetof(struct obd_statfs, os_type));
- LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_type) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_statfs *)0)->os_type));
- LASSERTF((int)offsetof(struct obd_statfs, os_blocks) == 8, "found %lld\n",
- (long long)(int)offsetof(struct obd_statfs, os_blocks));
- LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_blocks) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_statfs *)0)->os_blocks));
- LASSERTF((int)offsetof(struct obd_statfs, os_bfree) == 16, "found %lld\n",
- (long long)(int)offsetof(struct obd_statfs, os_bfree));
- LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_bfree) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_statfs *)0)->os_bfree));
- LASSERTF((int)offsetof(struct obd_statfs, os_bavail) == 24, "found %lld\n",
- (long long)(int)offsetof(struct obd_statfs, os_bavail));
- LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_bavail) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_statfs *)0)->os_bavail));
- LASSERTF((int)offsetof(struct obd_statfs, os_ffree) == 40, "found %lld\n",
- (long long)(int)offsetof(struct obd_statfs, os_ffree));
- LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_ffree) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_statfs *)0)->os_ffree));
- LASSERTF((int)offsetof(struct obd_statfs, os_fsid) == 48, "found %lld\n",
- (long long)(int)offsetof(struct obd_statfs, os_fsid));
- LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_fsid) == 40, "found %lld\n",
- (long long)(int)sizeof(((struct obd_statfs *)0)->os_fsid));
- LASSERTF((int)offsetof(struct obd_statfs, os_bsize) == 88, "found %lld\n",
- (long long)(int)offsetof(struct obd_statfs, os_bsize));
- LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_bsize) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_statfs *)0)->os_bsize));
- LASSERTF((int)offsetof(struct obd_statfs, os_namelen) == 92, "found %lld\n",
- (long long)(int)offsetof(struct obd_statfs, os_namelen));
- LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_namelen) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_statfs *)0)->os_namelen));
- LASSERTF((int)offsetof(struct obd_statfs, os_state) == 104, "found %lld\n",
- (long long)(int)offsetof(struct obd_statfs, os_state));
- LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_state) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_statfs *)0)->os_state));
- LASSERTF((int)offsetof(struct obd_statfs, os_fprecreated) == 108, "found %lld\n",
- (long long)(int)offsetof(struct obd_statfs, os_fprecreated));
- LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_fprecreated) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_statfs *)0)->os_fprecreated));
- LASSERTF((int)offsetof(struct obd_statfs, os_spare2) == 112, "found %lld\n",
- (long long)(int)offsetof(struct obd_statfs, os_spare2));
- LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare2) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare2));
- LASSERTF((int)offsetof(struct obd_statfs, os_spare3) == 116, "found %lld\n",
- (long long)(int)offsetof(struct obd_statfs, os_spare3));
- LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare3) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare3));
- LASSERTF((int)offsetof(struct obd_statfs, os_spare4) == 120, "found %lld\n",
- (long long)(int)offsetof(struct obd_statfs, os_spare4));
- LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare4) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare4));
- LASSERTF((int)offsetof(struct obd_statfs, os_spare5) == 124, "found %lld\n",
- (long long)(int)offsetof(struct obd_statfs, os_spare5));
- LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare5) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare5));
- LASSERTF((int)offsetof(struct obd_statfs, os_spare6) == 128, "found %lld\n",
- (long long)(int)offsetof(struct obd_statfs, os_spare6));
- LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare6) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare6));
- LASSERTF((int)offsetof(struct obd_statfs, os_spare7) == 132, "found %lld\n",
- (long long)(int)offsetof(struct obd_statfs, os_spare7));
- LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare7) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare7));
- LASSERTF((int)offsetof(struct obd_statfs, os_spare8) == 136, "found %lld\n",
- (long long)(int)offsetof(struct obd_statfs, os_spare8));
- LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare8) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare8));
- LASSERTF((int)offsetof(struct obd_statfs, os_spare9) == 140, "found %lld\n",
- (long long)(int)offsetof(struct obd_statfs, os_spare9));
- LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare9) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare9));
-
- /* Checks for struct obd_ioobj */
- LASSERTF((int)sizeof(struct obd_ioobj) == 24, "found %lld\n",
- (long long)(int)sizeof(struct obd_ioobj));
- LASSERTF((int)offsetof(struct obd_ioobj, ioo_oid) == 0, "found %lld\n",
- (long long)(int)offsetof(struct obd_ioobj, ioo_oid));
- LASSERTF((int)sizeof(((struct obd_ioobj *)0)->ioo_oid) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct obd_ioobj *)0)->ioo_oid));
- LASSERTF((int)offsetof(struct obd_ioobj, ioo_max_brw) == 16, "found %lld\n",
- (long long)(int)offsetof(struct obd_ioobj, ioo_max_brw));
- LASSERTF((int)sizeof(((struct obd_ioobj *)0)->ioo_max_brw) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_ioobj *)0)->ioo_max_brw));
- LASSERTF((int)offsetof(struct obd_ioobj, ioo_bufcnt) == 20, "found %lld\n",
- (long long)(int)offsetof(struct obd_ioobj, ioo_bufcnt));
- LASSERTF((int)sizeof(((struct obd_ioobj *)0)->ioo_bufcnt) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_ioobj *)0)->ioo_bufcnt));
- LASSERTF(IOOBJ_MAX_BRW_BITS == 16, "found %lld\n",
- (long long)IOOBJ_MAX_BRW_BITS);
-
- /* Checks for union lquota_id */
- LASSERTF((int)sizeof(union lquota_id) == 16, "found %lld\n",
- (long long)(int)sizeof(union lquota_id));
-
- /* Checks for struct obd_quotactl */
- LASSERTF((int)sizeof(struct obd_quotactl) == 112, "found %lld\n",
- (long long)(int)sizeof(struct obd_quotactl));
- LASSERTF((int)offsetof(struct obd_quotactl, qc_cmd) == 0, "found %lld\n",
- (long long)(int)offsetof(struct obd_quotactl, qc_cmd));
- LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_cmd) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_cmd));
- LASSERTF((int)offsetof(struct obd_quotactl, qc_type) == 4, "found %lld\n",
- (long long)(int)offsetof(struct obd_quotactl, qc_type));
- LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_type) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_type));
- LASSERTF((int)offsetof(struct obd_quotactl, qc_id) == 8, "found %lld\n",
- (long long)(int)offsetof(struct obd_quotactl, qc_id));
- LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_id) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_id));
- LASSERTF((int)offsetof(struct obd_quotactl, qc_stat) == 12, "found %lld\n",
- (long long)(int)offsetof(struct obd_quotactl, qc_stat));
- LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_stat) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_stat));
- LASSERTF((int)offsetof(struct obd_quotactl, qc_dqinfo) == 16, "found %lld\n",
- (long long)(int)offsetof(struct obd_quotactl, qc_dqinfo));
- LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_dqinfo) == 24, "found %lld\n",
- (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_dqinfo));
- LASSERTF((int)offsetof(struct obd_quotactl, qc_dqblk) == 40, "found %lld\n",
- (long long)(int)offsetof(struct obd_quotactl, qc_dqblk));
- LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_dqblk) == 72, "found %lld\n",
- (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_dqblk));
-
- /* Checks for struct obd_dqinfo */
- LASSERTF((int)sizeof(struct obd_dqinfo) == 24, "found %lld\n",
- (long long)(int)sizeof(struct obd_dqinfo));
- LASSERTF((int)offsetof(struct obd_dqinfo, dqi_bgrace) == 0, "found %lld\n",
- (long long)(int)offsetof(struct obd_dqinfo, dqi_bgrace));
- LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_bgrace) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_bgrace));
- LASSERTF((int)offsetof(struct obd_dqinfo, dqi_igrace) == 8, "found %lld\n",
- (long long)(int)offsetof(struct obd_dqinfo, dqi_igrace));
- LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_igrace) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_igrace));
- LASSERTF((int)offsetof(struct obd_dqinfo, dqi_flags) == 16, "found %lld\n",
- (long long)(int)offsetof(struct obd_dqinfo, dqi_flags));
- LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_flags));
- LASSERTF((int)offsetof(struct obd_dqinfo, dqi_valid) == 20, "found %lld\n",
- (long long)(int)offsetof(struct obd_dqinfo, dqi_valid));
- LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_valid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_valid));
-
- /* Checks for struct obd_dqblk */
- LASSERTF((int)sizeof(struct obd_dqblk) == 72, "found %lld\n",
- (long long)(int)sizeof(struct obd_dqblk));
- LASSERTF((int)offsetof(struct obd_dqblk, dqb_bhardlimit) == 0, "found %lld\n",
- (long long)(int)offsetof(struct obd_dqblk, dqb_bhardlimit));
- LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_bhardlimit) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_bhardlimit));
- LASSERTF((int)offsetof(struct obd_dqblk, dqb_bsoftlimit) == 8, "found %lld\n",
- (long long)(int)offsetof(struct obd_dqblk, dqb_bsoftlimit));
- LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_bsoftlimit) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_bsoftlimit));
- LASSERTF((int)offsetof(struct obd_dqblk, dqb_curspace) == 16, "found %lld\n",
- (long long)(int)offsetof(struct obd_dqblk, dqb_curspace));
- LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_curspace) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_curspace));
- LASSERTF((int)offsetof(struct obd_dqblk, dqb_ihardlimit) == 24, "found %lld\n",
- (long long)(int)offsetof(struct obd_dqblk, dqb_ihardlimit));
- LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_ihardlimit) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_ihardlimit));
- LASSERTF((int)offsetof(struct obd_dqblk, dqb_isoftlimit) == 32, "found %lld\n",
- (long long)(int)offsetof(struct obd_dqblk, dqb_isoftlimit));
- LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_isoftlimit) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_isoftlimit));
- LASSERTF((int)offsetof(struct obd_dqblk, dqb_curinodes) == 40, "found %lld\n",
- (long long)(int)offsetof(struct obd_dqblk, dqb_curinodes));
- LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_curinodes) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_curinodes));
- LASSERTF((int)offsetof(struct obd_dqblk, dqb_btime) == 48, "found %lld\n",
- (long long)(int)offsetof(struct obd_dqblk, dqb_btime));
- LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_btime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_btime));
- LASSERTF((int)offsetof(struct obd_dqblk, dqb_itime) == 56, "found %lld\n",
- (long long)(int)offsetof(struct obd_dqblk, dqb_itime));
- LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_itime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_itime));
- LASSERTF((int)offsetof(struct obd_dqblk, dqb_valid) == 64, "found %lld\n",
- (long long)(int)offsetof(struct obd_dqblk, dqb_valid));
- LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_valid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_valid));
- LASSERTF((int)offsetof(struct obd_dqblk, dqb_padding) == 68, "found %lld\n",
- (long long)(int)offsetof(struct obd_dqblk, dqb_padding));
- LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_padding) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_padding));
- LASSERTF(Q_QUOTACHECK == 0x800100, "found 0x%.8x\n",
- Q_QUOTACHECK);
- LASSERTF(Q_INITQUOTA == 0x800101, "found 0x%.8x\n",
- Q_INITQUOTA);
- LASSERTF(Q_GETOINFO == 0x800102, "found 0x%.8x\n",
- Q_GETOINFO);
- LASSERTF(Q_GETOQUOTA == 0x800103, "found 0x%.8x\n",
- Q_GETOQUOTA);
- LASSERTF(Q_FINVALIDATE == 0x800104, "found 0x%.8x\n",
- Q_FINVALIDATE);
-
- /* Checks for struct niobuf_remote */
- LASSERTF((int)sizeof(struct niobuf_remote) == 16, "found %lld\n",
- (long long)(int)sizeof(struct niobuf_remote));
- LASSERTF((int)offsetof(struct niobuf_remote, rnb_offset) == 0, "found %lld\n",
- (long long)(int)offsetof(struct niobuf_remote, rnb_offset));
- LASSERTF((int)sizeof(((struct niobuf_remote *)0)->rnb_offset) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct niobuf_remote *)0)->rnb_offset));
- LASSERTF((int)offsetof(struct niobuf_remote, rnb_len) == 8, "found %lld\n",
- (long long)(int)offsetof(struct niobuf_remote, rnb_len));
- LASSERTF((int)sizeof(((struct niobuf_remote *)0)->rnb_len) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct niobuf_remote *)0)->rnb_len));
- LASSERTF((int)offsetof(struct niobuf_remote, rnb_flags) == 12, "found %lld\n",
- (long long)(int)offsetof(struct niobuf_remote, rnb_flags));
- LASSERTF((int)sizeof(((struct niobuf_remote *)0)->rnb_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct niobuf_remote *)0)->rnb_flags));
- LASSERTF(OBD_BRW_READ == 0x01, "found 0x%.8x\n",
- OBD_BRW_READ);
- LASSERTF(OBD_BRW_WRITE == 0x02, "found 0x%.8x\n",
- OBD_BRW_WRITE);
- LASSERTF(OBD_BRW_SYNC == 0x08, "found 0x%.8x\n",
- OBD_BRW_SYNC);
- LASSERTF(OBD_BRW_CHECK == 0x10, "found 0x%.8x\n",
- OBD_BRW_CHECK);
- LASSERTF(OBD_BRW_FROM_GRANT == 0x20, "found 0x%.8x\n",
- OBD_BRW_FROM_GRANT);
- LASSERTF(OBD_BRW_GRANTED == 0x40, "found 0x%.8x\n",
- OBD_BRW_GRANTED);
- LASSERTF(OBD_BRW_NOCACHE == 0x80, "found 0x%.8x\n",
- OBD_BRW_NOCACHE);
- LASSERTF(OBD_BRW_NOQUOTA == 0x100, "found 0x%.8x\n",
- OBD_BRW_NOQUOTA);
- LASSERTF(OBD_BRW_SRVLOCK == 0x200, "found 0x%.8x\n",
- OBD_BRW_SRVLOCK);
- LASSERTF(OBD_BRW_ASYNC == 0x400, "found 0x%.8x\n",
- OBD_BRW_ASYNC);
- LASSERTF(OBD_BRW_MEMALLOC == 0x800, "found 0x%.8x\n",
- OBD_BRW_MEMALLOC);
- LASSERTF(OBD_BRW_OVER_USRQUOTA == 0x1000, "found 0x%.8x\n",
- OBD_BRW_OVER_USRQUOTA);
- LASSERTF(OBD_BRW_OVER_GRPQUOTA == 0x2000, "found 0x%.8x\n",
- OBD_BRW_OVER_GRPQUOTA);
- LASSERTF(OBD_BRW_SOFT_SYNC == 0x4000, "found 0x%.8x\n",
- OBD_BRW_SOFT_SYNC);
-
- /* Checks for struct ost_body */
- LASSERTF((int)sizeof(struct ost_body) == 208, "found %lld\n",
- (long long)(int)sizeof(struct ost_body));
- LASSERTF((int)offsetof(struct ost_body, oa) == 0, "found %lld\n",
- (long long)(int)offsetof(struct ost_body, oa));
- LASSERTF((int)sizeof(((struct ost_body *)0)->oa) == 208, "found %lld\n",
- (long long)(int)sizeof(((struct ost_body *)0)->oa));
-
- /* Checks for struct ll_fid */
- LASSERTF((int)sizeof(struct ll_fid) == 16, "found %lld\n",
- (long long)(int)sizeof(struct ll_fid));
- LASSERTF((int)offsetof(struct ll_fid, id) == 0, "found %lld\n",
- (long long)(int)offsetof(struct ll_fid, id));
- LASSERTF((int)sizeof(((struct ll_fid *)0)->id) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ll_fid *)0)->id));
- LASSERTF((int)offsetof(struct ll_fid, generation) == 8, "found %lld\n",
- (long long)(int)offsetof(struct ll_fid, generation));
- LASSERTF((int)sizeof(((struct ll_fid *)0)->generation) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ll_fid *)0)->generation));
- LASSERTF((int)offsetof(struct ll_fid, f_type) == 12, "found %lld\n",
- (long long)(int)offsetof(struct ll_fid, f_type));
- LASSERTF((int)sizeof(((struct ll_fid *)0)->f_type) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ll_fid *)0)->f_type));
-
- /* Checks for struct mdt_body */
- LASSERTF((int)sizeof(struct mdt_body) == 216, "found %lld\n",
- (long long)(int)sizeof(struct mdt_body));
- LASSERTF((int)offsetof(struct mdt_body, mbo_fid1) == 0, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_fid1));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_fid1) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_fid1));
- LASSERTF((int)offsetof(struct mdt_body, mbo_fid2) == 16, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_fid2));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_fid2) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_fid2));
- LASSERTF((int)offsetof(struct mdt_body, mbo_handle) == 32, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_handle));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_handle) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_handle));
- LASSERTF((int)offsetof(struct mdt_body, mbo_valid) == 40, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_valid));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_valid) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_valid));
- LASSERTF((int)offsetof(struct mdt_body, mbo_size) == 48, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_size));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_size) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_size));
- LASSERTF((int)offsetof(struct mdt_body, mbo_mtime) == 56, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_mtime));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_mtime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_mtime));
- LASSERTF((int)offsetof(struct mdt_body, mbo_atime) == 64, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_atime));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_atime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_atime));
- LASSERTF((int)offsetof(struct mdt_body, mbo_ctime) == 72, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_ctime));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_ctime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_ctime));
- LASSERTF((int)offsetof(struct mdt_body, mbo_blocks) == 80, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_blocks));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_blocks) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_blocks));
- LASSERTF((int)offsetof(struct mdt_body, mbo_t_state) == 96, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_t_state));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_t_state) == 8,
- "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_t_state));
- LASSERTF((int)offsetof(struct mdt_body, mbo_fsuid) == 104, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_fsuid));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_fsuid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_fsuid));
- LASSERTF((int)offsetof(struct mdt_body, mbo_fsgid) == 108, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_fsgid));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_fsgid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_fsgid));
- LASSERTF((int)offsetof(struct mdt_body, mbo_capability) == 112, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_capability));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_capability) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_capability));
- LASSERTF((int)offsetof(struct mdt_body, mbo_mode) == 116, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_mode));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_mode) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_mode));
- LASSERTF((int)offsetof(struct mdt_body, mbo_uid) == 120, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_uid));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_uid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_uid));
- LASSERTF((int)offsetof(struct mdt_body, mbo_gid) == 124, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_gid));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_gid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_gid));
- LASSERTF((int)offsetof(struct mdt_body, mbo_flags) == 128, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_flags));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_flags));
- LASSERTF((int)offsetof(struct mdt_body, mbo_rdev) == 132, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_rdev));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_rdev) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_rdev));
- LASSERTF((int)offsetof(struct mdt_body, mbo_nlink) == 136, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_nlink));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_nlink) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_nlink));
- LASSERTF((int)offsetof(struct mdt_body, mbo_unused2) == 140, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_unused2));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_unused2) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_unused2));
- LASSERTF((int)offsetof(struct mdt_body, mbo_suppgid) == 144, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_suppgid));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_suppgid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_suppgid));
- LASSERTF((int)offsetof(struct mdt_body, mbo_eadatasize) == 148, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_eadatasize));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_eadatasize) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_eadatasize));
- LASSERTF((int)offsetof(struct mdt_body, mbo_aclsize) == 152, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_aclsize));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_aclsize) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_aclsize));
- LASSERTF((int)offsetof(struct mdt_body, mbo_max_mdsize) == 156, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_max_mdsize));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_max_mdsize) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_max_mdsize));
- LASSERTF((int)offsetof(struct mdt_body, mbo_unused3) == 160, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_unused3));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_unused3) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_unused3));
- LASSERTF((int)offsetof(struct mdt_body, mbo_uid_h) == 164, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_uid_h));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_uid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_uid_h));
- LASSERTF((int)offsetof(struct mdt_body, mbo_gid_h) == 168, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_gid_h));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_gid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_gid_h));
- LASSERTF((int)offsetof(struct mdt_body, mbo_padding_5) == 172, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_padding_5));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_5) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_5));
- LASSERTF((int)offsetof(struct mdt_body, mbo_padding_6) == 176, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_padding_6));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_6) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_6));
- LASSERTF((int)offsetof(struct mdt_body, mbo_padding_7) == 184, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_padding_7));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_7) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_7));
- LASSERTF((int)offsetof(struct mdt_body, mbo_padding_8) == 192, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_padding_8));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_8) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_8));
- LASSERTF((int)offsetof(struct mdt_body, mbo_padding_9) == 200, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_padding_9));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_9) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_9));
- LASSERTF((int)offsetof(struct mdt_body, mbo_padding_10) == 208, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_padding_10));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_10) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_10));
- LASSERTF(MDS_FMODE_CLOSED == 000000000000UL, "found 0%.11oUL\n",
- MDS_FMODE_CLOSED);
- LASSERTF(MDS_FMODE_EXEC == 000000000004UL, "found 0%.11oUL\n",
- MDS_FMODE_EXEC);
- LASSERTF(MDS_OPEN_CREATED == 000000000010UL, "found 0%.11oUL\n",
- MDS_OPEN_CREATED);
- LASSERTF(MDS_OPEN_CROSS == 000000000020UL, "found 0%.11oUL\n",
- MDS_OPEN_CROSS);
- LASSERTF(MDS_OPEN_CREAT == 000000000100UL, "found 0%.11oUL\n",
- MDS_OPEN_CREAT);
- LASSERTF(MDS_OPEN_EXCL == 000000000200UL, "found 0%.11oUL\n",
- MDS_OPEN_EXCL);
- LASSERTF(MDS_OPEN_TRUNC == 000000001000UL, "found 0%.11oUL\n",
- MDS_OPEN_TRUNC);
- LASSERTF(MDS_OPEN_APPEND == 000000002000UL, "found 0%.11oUL\n",
- MDS_OPEN_APPEND);
- LASSERTF(MDS_OPEN_SYNC == 000000010000UL, "found 0%.11oUL\n",
- MDS_OPEN_SYNC);
- LASSERTF(MDS_OPEN_DIRECTORY == 000000200000UL, "found 0%.11oUL\n",
- MDS_OPEN_DIRECTORY);
- LASSERTF(MDS_OPEN_BY_FID == 000040000000UL, "found 0%.11oUL\n",
- MDS_OPEN_BY_FID);
- LASSERTF(MDS_OPEN_DELAY_CREATE == 000100000000UL, "found 0%.11oUL\n",
- MDS_OPEN_DELAY_CREATE);
- LASSERTF(MDS_OPEN_OWNEROVERRIDE == 000200000000UL, "found 0%.11oUL\n",
- MDS_OPEN_OWNEROVERRIDE);
- LASSERTF(MDS_OPEN_JOIN_FILE == 000400000000UL, "found 0%.11oUL\n",
- MDS_OPEN_JOIN_FILE);
- LASSERTF(MDS_OPEN_LOCK == 004000000000UL, "found 0%.11oUL\n",
- MDS_OPEN_LOCK);
- LASSERTF(MDS_OPEN_HAS_EA == 010000000000UL, "found 0%.11oUL\n",
- MDS_OPEN_HAS_EA);
- LASSERTF(MDS_OPEN_HAS_OBJS == 020000000000UL, "found 0%.11oUL\n",
- MDS_OPEN_HAS_OBJS);
- LASSERTF(MDS_OPEN_NORESTORE == 00000000000100000000000ULL, "found 0%.22lloULL\n",
- (long long)MDS_OPEN_NORESTORE);
- LASSERTF(MDS_OPEN_NEWSTRIPE == 00000000000200000000000ULL, "found 0%.22lloULL\n",
- (long long)MDS_OPEN_NEWSTRIPE);
- LASSERTF(MDS_OPEN_VOLATILE == 00000000000400000000000ULL, "found 0%.22lloULL\n",
- (long long)MDS_OPEN_VOLATILE);
- LASSERTF(LUSTRE_SYNC_FL == 0x00000008, "found 0x%.8x\n",
- LUSTRE_SYNC_FL);
- LASSERTF(LUSTRE_IMMUTABLE_FL == 0x00000010, "found 0x%.8x\n",
- LUSTRE_IMMUTABLE_FL);
- LASSERTF(LUSTRE_APPEND_FL == 0x00000020, "found 0x%.8x\n",
- LUSTRE_APPEND_FL);
- LASSERTF(LUSTRE_NODUMP_FL == 0x00000040, "found 0x%.8x\n",
- LUSTRE_NODUMP_FL);
- LASSERTF(LUSTRE_NOATIME_FL == 0x00000080, "found 0x%.8x\n",
- LUSTRE_NOATIME_FL);
- LASSERTF(LUSTRE_INDEX_FL == 0x00001000, "found 0x%.8x\n",
- LUSTRE_INDEX_FL);
- LASSERTF(LUSTRE_DIRSYNC_FL == 0x00010000, "found 0x%.8x\n",
- LUSTRE_DIRSYNC_FL);
- LASSERTF(LUSTRE_TOPDIR_FL == 0x00020000, "found 0x%.8x\n",
- LUSTRE_TOPDIR_FL);
- LASSERTF(LUSTRE_DIRECTIO_FL == 0x00100000, "found 0x%.8x\n",
- LUSTRE_DIRECTIO_FL);
- LASSERTF(LUSTRE_INLINE_DATA_FL == 0x10000000, "found 0x%.8x\n",
- LUSTRE_INLINE_DATA_FL);
- LASSERTF(MDS_INODELOCK_LOOKUP == 0x000001, "found 0x%.8x\n",
- MDS_INODELOCK_LOOKUP);
- LASSERTF(MDS_INODELOCK_UPDATE == 0x000002, "found 0x%.8x\n",
- MDS_INODELOCK_UPDATE);
- LASSERTF(MDS_INODELOCK_OPEN == 0x000004, "found 0x%.8x\n",
- MDS_INODELOCK_OPEN);
- LASSERTF(MDS_INODELOCK_LAYOUT == 0x000008, "found 0x%.8x\n",
- MDS_INODELOCK_LAYOUT);
-
- /* Checks for struct mdt_ioepoch */
- LASSERTF((int)sizeof(struct mdt_ioepoch) == 24, "found %lld\n",
- (long long)(int)sizeof(struct mdt_ioepoch));
- LASSERTF((int)offsetof(struct mdt_ioepoch, mio_handle) == 0, "found %lld\n",
- (long long)(int)offsetof(struct mdt_ioepoch, mio_handle));
- LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->mio_handle) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_ioepoch *)0)->mio_handle));
- LASSERTF((int)offsetof(struct mdt_ioepoch, mio_unused1) == 8, "found %lld\n",
- (long long)(int)offsetof(struct mdt_ioepoch, mio_unused1));
- LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->mio_unused1) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_ioepoch *)0)->mio_unused1));
- LASSERTF((int)offsetof(struct mdt_ioepoch, mio_unused2) == 16, "found %lld\n",
- (long long)(int)offsetof(struct mdt_ioepoch, mio_unused2));
- LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->mio_unused2) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_ioepoch *)0)->mio_unused2));
- LASSERTF((int)offsetof(struct mdt_ioepoch, mio_padding) == 20, "found %lld\n",
- (long long)(int)offsetof(struct mdt_ioepoch, mio_padding));
- LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->mio_padding) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_ioepoch *)0)->mio_padding));
-
- /* Checks for struct mdt_rec_setattr */
- LASSERTF((int)sizeof(struct mdt_rec_setattr) == 136, "found %lld\n",
- (long long)(int)sizeof(struct mdt_rec_setattr));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_opcode) == 0, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_opcode));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_opcode) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_opcode));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_cap) == 4, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_cap));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_cap) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_cap));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsuid) == 8, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsuid));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsuid_h) == 12, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsuid_h));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid_h));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsgid) == 16, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsgid));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsgid_h) == 20, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsgid_h));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid_h));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_suppgid) == 24, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_suppgid));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_suppgid_h) == 28, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_suppgid_h));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid_h));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_1) == 32, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_1));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_1_h) == 36, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_1_h));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1_h));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fid) == 40, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_fid));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fid) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fid));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_valid) == 56, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_valid));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_valid) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_valid));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_uid) == 64, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_uid));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_uid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_uid));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_gid) == 68, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_gid));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_gid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_gid));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_size) == 72, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_size));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_size) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_size));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_blocks) == 80, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_blocks));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_blocks) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_blocks));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_mtime) == 88, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_mtime));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_mtime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_mtime));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_atime) == 96, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_atime));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_atime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_atime));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_ctime) == 104, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_ctime));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_ctime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_ctime));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_attr_flags) == 112, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_attr_flags));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_attr_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_attr_flags));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_mode) == 116, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_mode));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_mode) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_mode));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_bias) == 120, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_bias));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_bias) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_bias));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_3) == 124, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_3));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_3) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_3));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_4) == 128, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_4));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_4) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_4));
- LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_5) == 132, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_5));
- LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_5) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_5));
-
- /* Checks for struct mdt_rec_create */
- LASSERTF((int)sizeof(struct mdt_rec_create) == 136, "found %lld\n",
- (long long)(int)sizeof(struct mdt_rec_create));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_opcode) == 0, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_opcode));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_opcode) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_opcode));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_cap) == 4, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_cap));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_cap) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_cap));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsuid) == 8, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_fsuid));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsuid_h) == 12, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_fsuid_h));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid_h));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsgid) == 16, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_fsgid));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsgid_h) == 20, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_fsgid_h));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid_h));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid1) == 24, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid1));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid1_h) == 28, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid1_h));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1_h));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid2) == 32, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid2));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid2_h) == 36, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid2_h));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2_h));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_fid1) == 40, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_fid1));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fid1) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fid1));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_fid2) == 56, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_fid2));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fid2) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fid2));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_old_handle) == 72, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_old_handle));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_old_handle) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_old_handle));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_time) == 80, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_time));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_time) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_time));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_rdev) == 88, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_rdev));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_rdev) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_rdev));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_ioepoch) == 96, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_ioepoch));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_ioepoch) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_ioepoch));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_padding_1) == 104, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_padding_1));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_padding_1) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_padding_1));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_mode) == 112, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_mode));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_mode) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_mode));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_bias) == 116, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_bias));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_bias) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_bias));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_flags_l) == 120, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_flags_l));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_flags_l) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_flags_l));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_flags_h) == 124, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_flags_h));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_flags_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_flags_h));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_umask) == 128, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_umask));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_umask) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_umask));
- LASSERTF((int)offsetof(struct mdt_rec_create, cr_padding_4) == 132, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_create, cr_padding_4));
- LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_padding_4) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_padding_4));
-
- /* Checks for struct mdt_rec_link */
- LASSERTF((int)sizeof(struct mdt_rec_link) == 136, "found %lld\n",
- (long long)(int)sizeof(struct mdt_rec_link));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_opcode) == 0, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_opcode));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_opcode) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_opcode));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_cap) == 4, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_cap));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_cap) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_cap));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsuid) == 8, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_fsuid));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsuid_h) == 12, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_fsuid_h));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid_h));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsgid) == 16, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_fsgid));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsgid_h) == 20, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_fsgid_h));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid_h));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid1) == 24, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid1));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid1_h) == 28, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid1_h));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1_h));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid2) == 32, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid2));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid2_h) == 36, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid2_h));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2_h));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_fid1) == 40, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_fid1));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fid1) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fid1));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_fid2) == 56, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_fid2));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fid2) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fid2));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_time) == 72, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_time));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_time) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_time));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_1) == 80, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_padding_1));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_1) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_1));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_2) == 88, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_padding_2));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_2) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_2));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_3) == 96, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_padding_3));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_3) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_3));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_4) == 104, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_padding_4));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_4) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_4));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_bias) == 112, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_bias));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_bias) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_bias));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_5) == 116, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_padding_5));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_5) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_5));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_6) == 120, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_padding_6));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_6) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_6));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_7) == 124, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_padding_7));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_7) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_7));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_8) == 128, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_padding_8));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_8) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_8));
- LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_9) == 132, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_link, lk_padding_9));
- LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_9) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_9));
-
- /* Checks for struct mdt_rec_unlink */
- LASSERTF((int)sizeof(struct mdt_rec_unlink) == 136, "found %lld\n",
- (long long)(int)sizeof(struct mdt_rec_unlink));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_opcode) == 0, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_opcode));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_opcode) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_opcode));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_cap) == 4, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_cap));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_cap) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_cap));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsuid) == 8, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsuid));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsuid_h) == 12, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsuid_h));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid_h));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsgid) == 16, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsgid));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsgid_h) == 20, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsgid_h));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid_h));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid1) == 24, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid1));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid1_h) == 28, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid1_h));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1_h));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid2) == 32, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid2));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid2_h) == 36, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid2_h));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2_h));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fid1) == 40, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_fid1));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid1) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid1));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fid2) == 56, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_fid2));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid2) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid2));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_time) == 72, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_time));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_time) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_time));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_2) == 80, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_2));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_2) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_2));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_3) == 88, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_3));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_3) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_3));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_4) == 96, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_4));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_4) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_4));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_5) == 104, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_5));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_5) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_5));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_bias) == 112, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_bias));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_bias) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_bias));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_mode) == 116, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_mode));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_mode) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_mode));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_6) == 120, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_6));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_6) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_6));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_7) == 124, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_7));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_7) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_7));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_8) == 128, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_8));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_8) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_8));
- LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_9) == 132, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_9));
- LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_9) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_9));
-
- /* Checks for struct mdt_rec_rename */
- LASSERTF((int)sizeof(struct mdt_rec_rename) == 136, "found %lld\n",
- (long long)(int)sizeof(struct mdt_rec_rename));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_opcode) == 0, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_opcode));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_opcode) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_opcode));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_cap) == 4, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_cap));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_cap) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_cap));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsuid) == 8, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_fsuid));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsuid_h) == 12, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_fsuid_h));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid_h));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsgid) == 16, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_fsgid));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsgid_h) == 20, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_fsgid_h));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid_h));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid1) == 24, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid1));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid1_h) == 28, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid1_h));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1_h));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid2) == 32, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid2));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid2_h) == 36, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid2_h));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2_h));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fid1) == 40, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_fid1));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fid1) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fid1));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fid2) == 56, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_fid2));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fid2) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fid2));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_time) == 72, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_time));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_time) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_time));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_1) == 80, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_1));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_1) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_1));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_2) == 88, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_2));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_2) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_2));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_3) == 96, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_3));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_3) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_3));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_4) == 104, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_4));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_4) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_4));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_bias) == 112, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_bias));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_bias) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_bias));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_mode) == 116, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_mode));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_mode) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_mode));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_5) == 120, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_5));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_5) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_5));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_6) == 124, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_6));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_6) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_6));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_7) == 128, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_7));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_7) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_7));
- LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_8) == 132, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_8));
- LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_8) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_8));
-
- /* Checks for struct mdt_rec_setxattr */
- LASSERTF((int)sizeof(struct mdt_rec_setxattr) == 136, "found %lld\n",
- (long long)(int)sizeof(struct mdt_rec_setxattr));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_opcode) == 0, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_opcode));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_opcode) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_opcode));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_cap) == 4, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_cap));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_cap) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_cap));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsuid) == 8, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsuid));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsuid_h) == 12, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsuid_h));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid_h));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsgid) == 16, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsgid));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsgid_h) == 20, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsgid_h));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid_h));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid1) == 24, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid1));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid1_h) == 28, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid1_h));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1_h));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid2) == 32, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid2));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid2_h) == 36, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid2_h));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2_h));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fid) == 40, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fid));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fid) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fid));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_1) == 56, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_1));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_1) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_1));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_2) == 64, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_2));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_2) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_2));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_3) == 68, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_3));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_3) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_3));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_valid) == 72, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_valid));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_valid) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_valid));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_time) == 80, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_time));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_time) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_time));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_5) == 88, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_5));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_5) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_5));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_6) == 96, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_6));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_6) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_6));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_7) == 104, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_7));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_7) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_7));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_size) == 112, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_size));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_size) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_size));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_flags) == 116, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_flags));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_flags));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_8) == 120, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_8));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_8) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_8));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_9) == 124, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_9));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_9) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_9));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_10) == 128, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_10));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_10) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_10));
- LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_11) == 132, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_11));
- LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_11) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_11));
-
- /* Checks for struct mdt_rec_reint */
- LASSERTF((int)sizeof(struct mdt_rec_reint) == 136, "found %lld\n",
- (long long)(int)sizeof(struct mdt_rec_reint));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_opcode) == 0, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_opcode));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_opcode) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_opcode));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_cap) == 4, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_cap));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_cap) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_cap));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsuid) == 8, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_fsuid));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsuid_h) == 12, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_fsuid_h));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid_h));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsgid) == 16, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_fsgid));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsgid_h) == 20, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_fsgid_h));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid_h));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid1) == 24, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid1));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid1_h) == 28, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid1_h));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1_h));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid2) == 32, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid2));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid2_h) == 36, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid2_h));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2_h));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fid1) == 40, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_fid1));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fid1) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fid1));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fid2) == 56, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_fid2));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fid2) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fid2));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_mtime) == 72, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_mtime));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_mtime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_mtime));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_atime) == 80, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_atime));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_atime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_atime));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_ctime) == 88, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_ctime));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_ctime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_ctime));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_size) == 96, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_size));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_size) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_size));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_blocks) == 104, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_blocks));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_blocks) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_blocks));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_bias) == 112, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_bias));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_bias) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_bias));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_mode) == 116, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_mode));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_mode) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_mode));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_flags) == 120, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_flags));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_flags));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_flags_h) == 124, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_flags_h));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_flags_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_flags_h));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_umask) == 128, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_umask));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_umask) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_umask));
- LASSERTF((int)offsetof(struct mdt_rec_reint, rr_padding_4) == 132, "found %lld\n",
- (long long)(int)offsetof(struct mdt_rec_reint, rr_padding_4));
- LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_padding_4) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_padding_4));
-
- /* Checks for struct lmv_desc */
- LASSERTF((int)sizeof(struct lmv_desc) == 88, "found %lld\n",
- (long long)(int)sizeof(struct lmv_desc));
- LASSERTF((int)offsetof(struct lmv_desc, ld_tgt_count) == 0, "found %lld\n",
- (long long)(int)offsetof(struct lmv_desc, ld_tgt_count));
- LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_tgt_count) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_desc *)0)->ld_tgt_count));
- LASSERTF((int)offsetof(struct lmv_desc, ld_active_tgt_count) == 4, "found %lld\n",
- (long long)(int)offsetof(struct lmv_desc, ld_active_tgt_count));
- LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_active_tgt_count) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_desc *)0)->ld_active_tgt_count));
- LASSERTF((int)offsetof(struct lmv_desc, ld_default_stripe_count) == 8, "found %lld\n",
- (long long)(int)offsetof(struct lmv_desc, ld_default_stripe_count));
- LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_default_stripe_count) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_desc *)0)->ld_default_stripe_count));
- LASSERTF((int)offsetof(struct lmv_desc, ld_pattern) == 12, "found %lld\n",
- (long long)(int)offsetof(struct lmv_desc, ld_pattern));
- LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_pattern) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_desc *)0)->ld_pattern));
- LASSERTF((int)offsetof(struct lmv_desc, ld_default_hash_size) == 16, "found %lld\n",
- (long long)(int)offsetof(struct lmv_desc, ld_default_hash_size));
- LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_default_hash_size) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_desc *)0)->ld_default_hash_size));
- LASSERTF((int)offsetof(struct lmv_desc, ld_padding_1) == 24, "found %lld\n",
- (long long)(int)offsetof(struct lmv_desc, ld_padding_1));
- LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_1) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_1));
- LASSERTF((int)offsetof(struct lmv_desc, ld_padding_2) == 32, "found %lld\n",
- (long long)(int)offsetof(struct lmv_desc, ld_padding_2));
- LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_2) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_2));
- LASSERTF((int)offsetof(struct lmv_desc, ld_qos_maxage) == 36, "found %lld\n",
- (long long)(int)offsetof(struct lmv_desc, ld_qos_maxage));
- LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_qos_maxage) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_desc *)0)->ld_qos_maxage));
- LASSERTF((int)offsetof(struct lmv_desc, ld_padding_3) == 40, "found %lld\n",
- (long long)(int)offsetof(struct lmv_desc, ld_padding_3));
- LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_3) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_3));
- LASSERTF((int)offsetof(struct lmv_desc, ld_padding_4) == 44, "found %lld\n",
- (long long)(int)offsetof(struct lmv_desc, ld_padding_4));
- LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_4) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_4));
- LASSERTF((int)offsetof(struct lmv_desc, ld_uuid) == 48, "found %lld\n",
- (long long)(int)offsetof(struct lmv_desc, ld_uuid));
- LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_uuid) == 40, "found %lld\n",
- (long long)(int)sizeof(((struct lmv_desc *)0)->ld_uuid));
-
- /* Checks for struct lov_desc */
- LASSERTF((int)sizeof(struct lov_desc) == 88, "found %lld\n",
- (long long)(int)sizeof(struct lov_desc));
- LASSERTF((int)offsetof(struct lov_desc, ld_tgt_count) == 0, "found %lld\n",
- (long long)(int)offsetof(struct lov_desc, ld_tgt_count));
- LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_tgt_count) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lov_desc *)0)->ld_tgt_count));
- LASSERTF((int)offsetof(struct lov_desc, ld_active_tgt_count) == 4, "found %lld\n",
- (long long)(int)offsetof(struct lov_desc, ld_active_tgt_count));
- LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_active_tgt_count) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lov_desc *)0)->ld_active_tgt_count));
- LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_count) == 8, "found %lld\n",
- (long long)(int)offsetof(struct lov_desc, ld_default_stripe_count));
- LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_count) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_count));
- LASSERTF((int)offsetof(struct lov_desc, ld_pattern) == 12, "found %lld\n",
- (long long)(int)offsetof(struct lov_desc, ld_pattern));
- LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_pattern) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lov_desc *)0)->ld_pattern));
- LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_size) == 16, "found %lld\n",
- (long long)(int)offsetof(struct lov_desc, ld_default_stripe_size));
- LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_size) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_size));
- LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_offset) == 24, "found %lld\n",
- (long long)(int)offsetof(struct lov_desc, ld_default_stripe_offset));
- LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset));
- LASSERTF((int)offsetof(struct lov_desc, ld_padding_0) == 32, "found %lld\n",
- (long long)(int)offsetof(struct lov_desc, ld_padding_0));
- LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_0) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_0));
- LASSERTF((int)offsetof(struct lov_desc, ld_qos_maxage) == 36, "found %lld\n",
- (long long)(int)offsetof(struct lov_desc, ld_qos_maxage));
- LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_qos_maxage) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lov_desc *)0)->ld_qos_maxage));
- LASSERTF((int)offsetof(struct lov_desc, ld_padding_1) == 40, "found %lld\n",
- (long long)(int)offsetof(struct lov_desc, ld_padding_1));
- LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_1) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_1));
- LASSERTF((int)offsetof(struct lov_desc, ld_padding_2) == 44, "found %lld\n",
- (long long)(int)offsetof(struct lov_desc, ld_padding_2));
- LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_2) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_2));
- LASSERTF((int)offsetof(struct lov_desc, ld_uuid) == 48, "found %lld\n",
- (long long)(int)offsetof(struct lov_desc, ld_uuid));
- LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_uuid) == 40, "found %lld\n",
- (long long)(int)sizeof(((struct lov_desc *)0)->ld_uuid));
- BUILD_BUG_ON(LOV_DESC_MAGIC != 0xB0CCDE5C);
-
- /* Checks for struct ldlm_res_id */
- LASSERTF((int)sizeof(struct ldlm_res_id) == 32, "found %lld\n",
- (long long)(int)sizeof(struct ldlm_res_id));
- BUILD_BUG_ON(RES_NAME_SIZE != 4);
- LASSERTF((int)offsetof(struct ldlm_res_id, name[4]) == 32, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_res_id, name[4]));
- LASSERTF((int)sizeof(((struct ldlm_res_id *)0)->name[4]) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_res_id *)0)->name[4]));
-
- /* Checks for struct ldlm_extent */
- LASSERTF((int)sizeof(struct ldlm_extent) == 24, "found %lld\n",
- (long long)(int)sizeof(struct ldlm_extent));
- LASSERTF((int)offsetof(struct ldlm_extent, start) == 0, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_extent, start));
- LASSERTF((int)sizeof(((struct ldlm_extent *)0)->start) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_extent *)0)->start));
- LASSERTF((int)offsetof(struct ldlm_extent, end) == 8, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_extent, end));
- LASSERTF((int)sizeof(((struct ldlm_extent *)0)->end) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_extent *)0)->end));
- LASSERTF((int)offsetof(struct ldlm_extent, gid) == 16, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_extent, gid));
- LASSERTF((int)sizeof(((struct ldlm_extent *)0)->gid) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_extent *)0)->gid));
-
- /* Checks for struct ldlm_inodebits */
- LASSERTF((int)sizeof(struct ldlm_inodebits) == 8, "found %lld\n",
- (long long)(int)sizeof(struct ldlm_inodebits));
- LASSERTF((int)offsetof(struct ldlm_inodebits, bits) == 0, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_inodebits, bits));
- LASSERTF((int)sizeof(((struct ldlm_inodebits *)0)->bits) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_inodebits *)0)->bits));
-
- /* Checks for struct ldlm_flock_wire */
- LASSERTF((int)sizeof(struct ldlm_flock_wire) == 32, "found %lld\n",
- (long long)(int)sizeof(struct ldlm_flock_wire));
- LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_start) == 0, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_flock_wire, lfw_start));
- LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_start) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_start));
- LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_end) == 8, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_flock_wire, lfw_end));
- LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_end) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_end));
- LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_owner) == 16, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_flock_wire, lfw_owner));
- LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_owner) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_owner));
- LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_padding) == 24, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_flock_wire, lfw_padding));
- LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_padding) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_padding));
- LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_pid) == 28, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_flock_wire, lfw_pid));
- LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_pid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_pid));
-
- /* Checks for struct ldlm_intent */
- LASSERTF((int)sizeof(struct ldlm_intent) == 8, "found %lld\n",
- (long long)(int)sizeof(struct ldlm_intent));
- LASSERTF((int)offsetof(struct ldlm_intent, opc) == 0, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_intent, opc));
- LASSERTF((int)sizeof(((struct ldlm_intent *)0)->opc) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_intent *)0)->opc));
-
- /* Checks for struct ldlm_resource_desc */
- LASSERTF((int)sizeof(struct ldlm_resource_desc) == 40, "found %lld\n",
- (long long)(int)sizeof(struct ldlm_resource_desc));
- LASSERTF((int)offsetof(struct ldlm_resource_desc, lr_type) == 0, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_resource_desc, lr_type));
- LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_type) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_resource_desc *)0)->lr_type));
- LASSERTF((int)offsetof(struct ldlm_resource_desc, lr_padding) == 4, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_resource_desc, lr_padding));
- LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_padding) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_resource_desc *)0)->lr_padding));
- LASSERTF((int)offsetof(struct ldlm_resource_desc, lr_name) == 8, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_resource_desc, lr_name));
- LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_name) == 32, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_resource_desc *)0)->lr_name));
-
- /* Checks for struct ldlm_lock_desc */
- LASSERTF((int)sizeof(struct ldlm_lock_desc) == 80, "found %lld\n",
- (long long)(int)sizeof(struct ldlm_lock_desc));
- LASSERTF((int)offsetof(struct ldlm_lock_desc, l_resource) == 0, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_lock_desc, l_resource));
- LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_resource) == 40, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_resource));
- LASSERTF((int)offsetof(struct ldlm_lock_desc, l_req_mode) == 40, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_lock_desc, l_req_mode));
- LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_req_mode) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_req_mode));
- LASSERTF((int)offsetof(struct ldlm_lock_desc, l_granted_mode) == 44, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_lock_desc, l_granted_mode));
- LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_granted_mode) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_granted_mode));
- LASSERTF((int)offsetof(struct ldlm_lock_desc, l_policy_data) == 48, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_lock_desc, l_policy_data));
- LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_policy_data) == 32, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_policy_data));
-
- /* Checks for struct ldlm_request */
- LASSERTF((int)sizeof(struct ldlm_request) == 104, "found %lld\n",
- (long long)(int)sizeof(struct ldlm_request));
- LASSERTF((int)offsetof(struct ldlm_request, lock_flags) == 0, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_request, lock_flags));
- LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_request *)0)->lock_flags));
- LASSERTF((int)offsetof(struct ldlm_request, lock_count) == 4, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_request, lock_count));
- LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_count) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_request *)0)->lock_count));
- LASSERTF((int)offsetof(struct ldlm_request, lock_desc) == 8, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_request, lock_desc));
- LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_desc) == 80, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_request *)0)->lock_desc));
- LASSERTF((int)offsetof(struct ldlm_request, lock_handle) == 88, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_request, lock_handle));
- LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_handle) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_request *)0)->lock_handle));
-
- /* Checks for struct ldlm_reply */
- LASSERTF((int)sizeof(struct ldlm_reply) == 112, "found %lld\n",
- (long long)(int)sizeof(struct ldlm_reply));
- LASSERTF((int)offsetof(struct ldlm_reply, lock_flags) == 0, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_reply, lock_flags));
- LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_flags));
- LASSERTF((int)offsetof(struct ldlm_reply, lock_padding) == 4, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_reply, lock_padding));
- LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_padding) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_padding));
- LASSERTF((int)offsetof(struct ldlm_reply, lock_desc) == 8, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_reply, lock_desc));
- LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_desc) == 80, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_desc));
- LASSERTF((int)offsetof(struct ldlm_reply, lock_handle) == 88, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_reply, lock_handle));
- LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_handle) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_handle));
- LASSERTF((int)offsetof(struct ldlm_reply, lock_policy_res1) == 96, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_reply, lock_policy_res1));
- LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_policy_res1) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_policy_res1));
- LASSERTF((int)offsetof(struct ldlm_reply, lock_policy_res2) == 104, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_reply, lock_policy_res2));
- LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_policy_res2) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_policy_res2));
-
- /* Checks for struct ost_lvb_v1 */
- LASSERTF((int)sizeof(struct ost_lvb_v1) == 40, "found %lld\n",
- (long long)(int)sizeof(struct ost_lvb_v1));
- LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_size) == 0, "found %lld\n",
- (long long)(int)offsetof(struct ost_lvb_v1, lvb_size));
- LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_size) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_size));
- LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_mtime) == 8, "found %lld\n",
- (long long)(int)offsetof(struct ost_lvb_v1, lvb_mtime));
- LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_mtime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_mtime));
- LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_atime) == 16, "found %lld\n",
- (long long)(int)offsetof(struct ost_lvb_v1, lvb_atime));
- LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_atime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_atime));
- LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_ctime) == 24, "found %lld\n",
- (long long)(int)offsetof(struct ost_lvb_v1, lvb_ctime));
- LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_ctime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_ctime));
- LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_blocks) == 32, "found %lld\n",
- (long long)(int)offsetof(struct ost_lvb_v1, lvb_blocks));
- LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_blocks) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_blocks));
-
- /* Checks for struct ost_lvb */
- LASSERTF((int)sizeof(struct ost_lvb) == 56, "found %lld\n",
- (long long)(int)sizeof(struct ost_lvb));
- LASSERTF((int)offsetof(struct ost_lvb, lvb_size) == 0, "found %lld\n",
- (long long)(int)offsetof(struct ost_lvb, lvb_size));
- LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_size) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_size));
- LASSERTF((int)offsetof(struct ost_lvb, lvb_mtime) == 8, "found %lld\n",
- (long long)(int)offsetof(struct ost_lvb, lvb_mtime));
- LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_mtime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_mtime));
- LASSERTF((int)offsetof(struct ost_lvb, lvb_atime) == 16, "found %lld\n",
- (long long)(int)offsetof(struct ost_lvb, lvb_atime));
- LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_atime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_atime));
- LASSERTF((int)offsetof(struct ost_lvb, lvb_ctime) == 24, "found %lld\n",
- (long long)(int)offsetof(struct ost_lvb, lvb_ctime));
- LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_ctime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_ctime));
- LASSERTF((int)offsetof(struct ost_lvb, lvb_blocks) == 32, "found %lld\n",
- (long long)(int)offsetof(struct ost_lvb, lvb_blocks));
- LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_blocks) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_blocks));
- LASSERTF((int)offsetof(struct ost_lvb, lvb_mtime_ns) == 40, "found %lld\n",
- (long long)(int)offsetof(struct ost_lvb, lvb_mtime_ns));
- LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_mtime_ns) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_mtime_ns));
- LASSERTF((int)offsetof(struct ost_lvb, lvb_atime_ns) == 44, "found %lld\n",
- (long long)(int)offsetof(struct ost_lvb, lvb_atime_ns));
- LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_atime_ns) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_atime_ns));
- LASSERTF((int)offsetof(struct ost_lvb, lvb_ctime_ns) == 48, "found %lld\n",
- (long long)(int)offsetof(struct ost_lvb, lvb_ctime_ns));
- LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_ctime_ns) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_ctime_ns));
- LASSERTF((int)offsetof(struct ost_lvb, lvb_padding) == 52, "found %lld\n",
- (long long)(int)offsetof(struct ost_lvb, lvb_padding));
- LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_padding) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_padding));
-
- /* Checks for struct lquota_lvb */
- LASSERTF((int)sizeof(struct lquota_lvb) == 40, "found %lld\n",
- (long long)(int)sizeof(struct lquota_lvb));
- LASSERTF((int)offsetof(struct lquota_lvb, lvb_flags) == 0, "found %lld\n",
- (long long)(int)offsetof(struct lquota_lvb, lvb_flags));
- LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_flags) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_flags));
- LASSERTF((int)offsetof(struct lquota_lvb, lvb_id_may_rel) == 8, "found %lld\n",
- (long long)(int)offsetof(struct lquota_lvb, lvb_id_may_rel));
- LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_id_may_rel) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_id_may_rel));
- LASSERTF((int)offsetof(struct lquota_lvb, lvb_id_rel) == 16, "found %lld\n",
- (long long)(int)offsetof(struct lquota_lvb, lvb_id_rel));
- LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_id_rel) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_id_rel));
- LASSERTF((int)offsetof(struct lquota_lvb, lvb_id_qunit) == 24, "found %lld\n",
- (long long)(int)offsetof(struct lquota_lvb, lvb_id_qunit));
- LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_id_qunit) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_id_qunit));
- LASSERTF((int)offsetof(struct lquota_lvb, lvb_pad1) == 32, "found %lld\n",
- (long long)(int)offsetof(struct lquota_lvb, lvb_pad1));
- LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_pad1) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_pad1));
- LASSERTF(LQUOTA_FL_EDQUOT == 1, "found %lld\n",
- (long long)LQUOTA_FL_EDQUOT);
-
- /* Checks for struct ldlm_gl_lquota_desc */
- LASSERTF((int)sizeof(struct ldlm_gl_lquota_desc) == 64, "found %lld\n",
- (long long)(int)sizeof(struct ldlm_gl_lquota_desc));
- LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_id) == 0, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_id));
- LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_id) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_id));
- LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_flags) == 16, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_flags));
- LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_flags) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_flags));
- LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_ver) == 24, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_ver));
- LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_ver) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_ver));
- LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_hardlimit) == 32, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_hardlimit));
- LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_hardlimit) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_hardlimit));
- LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_softlimit) == 40, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_softlimit));
- LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_softlimit) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_softlimit));
- LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_time) == 48, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_time));
- LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_time) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_time));
- LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_pad2) == 56, "found %lld\n",
- (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_pad2));
- LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_pad2) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_pad2));
-
- /* Checks for struct mgs_send_param */
- LASSERTF((int)sizeof(struct mgs_send_param) == 1024, "found %lld\n",
- (long long)(int)sizeof(struct mgs_send_param));
- BUILD_BUG_ON(MGS_PARAM_MAXLEN != 1024);
- LASSERTF((int)offsetof(struct mgs_send_param, mgs_param[1024]) == 1024, "found %lld\n",
- (long long)(int)offsetof(struct mgs_send_param, mgs_param[1024]));
- LASSERTF((int)sizeof(((struct mgs_send_param *)0)->mgs_param[1024]) == 1, "found %lld\n",
- (long long)(int)sizeof(((struct mgs_send_param *)0)->mgs_param[1024]));
-
- /* Checks for struct cfg_marker */
- LASSERTF((int)sizeof(struct cfg_marker) == 160, "found %lld\n",
- (long long)(int)sizeof(struct cfg_marker));
- LASSERTF((int)offsetof(struct cfg_marker, cm_step) == 0, "found %lld\n",
- (long long)(int)offsetof(struct cfg_marker, cm_step));
- LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_step) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct cfg_marker *)0)->cm_step));
- LASSERTF((int)offsetof(struct cfg_marker, cm_flags) == 4, "found %lld\n",
- (long long)(int)offsetof(struct cfg_marker, cm_flags));
- LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct cfg_marker *)0)->cm_flags));
- LASSERTF((int)offsetof(struct cfg_marker, cm_vers) == 8, "found %lld\n",
- (long long)(int)offsetof(struct cfg_marker, cm_vers));
- LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_vers) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct cfg_marker *)0)->cm_vers));
- LASSERTF((int)offsetof(struct cfg_marker, cm_padding) == 12, "found %lld\n",
- (long long)(int)offsetof(struct cfg_marker, cm_padding));
- LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_padding) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct cfg_marker *)0)->cm_padding));
- LASSERTF((int)offsetof(struct cfg_marker, cm_createtime) == 16, "found %lld\n",
- (long long)(int)offsetof(struct cfg_marker, cm_createtime));
- LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_createtime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct cfg_marker *)0)->cm_createtime));
- LASSERTF((int)offsetof(struct cfg_marker, cm_canceltime) == 24, "found %lld\n",
- (long long)(int)offsetof(struct cfg_marker, cm_canceltime));
- LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_canceltime) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct cfg_marker *)0)->cm_canceltime));
- LASSERTF((int)offsetof(struct cfg_marker, cm_tgtname) == 32, "found %lld\n",
- (long long)(int)offsetof(struct cfg_marker, cm_tgtname));
- LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_tgtname) == 64, "found %lld\n",
- (long long)(int)sizeof(((struct cfg_marker *)0)->cm_tgtname));
- LASSERTF((int)offsetof(struct cfg_marker, cm_comment) == 96, "found %lld\n",
- (long long)(int)offsetof(struct cfg_marker, cm_comment));
- LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_comment) == 64, "found %lld\n",
- (long long)(int)sizeof(((struct cfg_marker *)0)->cm_comment));
-
- /* Checks for struct llog_logid */
- LASSERTF((int)sizeof(struct llog_logid) == 20, "found %lld\n",
- (long long)(int)sizeof(struct llog_logid));
- LASSERTF((int)offsetof(struct llog_logid, lgl_oi) == 0, "found %lld\n",
- (long long)(int)offsetof(struct llog_logid, lgl_oi));
- LASSERTF((int)sizeof(((struct llog_logid *)0)->lgl_oi) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct llog_logid *)0)->lgl_oi));
- LASSERTF((int)offsetof(struct llog_logid, lgl_ogen) == 16, "found %lld\n",
- (long long)(int)offsetof(struct llog_logid, lgl_ogen));
- LASSERTF((int)sizeof(((struct llog_logid *)0)->lgl_ogen) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_logid *)0)->lgl_ogen));
- BUILD_BUG_ON(OST_SZ_REC != 274730752);
- BUILD_BUG_ON(MDS_UNLINK_REC != 274801668);
- BUILD_BUG_ON(MDS_UNLINK64_REC != 275325956);
- BUILD_BUG_ON(MDS_SETATTR64_REC != 275325953);
- BUILD_BUG_ON(OBD_CFG_REC != 274857984);
- BUILD_BUG_ON(LLOG_GEN_REC != 274989056);
- BUILD_BUG_ON(CHANGELOG_REC != 275120128);
- BUILD_BUG_ON(CHANGELOG_USER_REC != 275185664);
- BUILD_BUG_ON(LLOG_HDR_MAGIC != 275010873);
- BUILD_BUG_ON(LLOG_LOGID_MAGIC != 275010875);
-
- /* Checks for struct llog_catid */
- LASSERTF((int)sizeof(struct llog_catid) == 32, "found %lld\n",
- (long long)(int)sizeof(struct llog_catid));
- LASSERTF((int)offsetof(struct llog_catid, lci_logid) == 0, "found %lld\n",
- (long long)(int)offsetof(struct llog_catid, lci_logid));
- LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_logid) == 20, "found %lld\n",
- (long long)(int)sizeof(((struct llog_catid *)0)->lci_logid));
- LASSERTF((int)offsetof(struct llog_catid, lci_padding1) == 20, "found %lld\n",
- (long long)(int)offsetof(struct llog_catid, lci_padding1));
- LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding1) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding1));
- LASSERTF((int)offsetof(struct llog_catid, lci_padding2) == 24, "found %lld\n",
- (long long)(int)offsetof(struct llog_catid, lci_padding2));
- LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding2) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding2));
- LASSERTF((int)offsetof(struct llog_catid, lci_padding3) == 28, "found %lld\n",
- (long long)(int)offsetof(struct llog_catid, lci_padding3));
- LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding3) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding3));
-
- /* Checks for struct llog_rec_hdr */
- LASSERTF((int)sizeof(struct llog_rec_hdr) == 16, "found %lld\n",
- (long long)(int)sizeof(struct llog_rec_hdr));
- LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_len) == 0, "found %lld\n",
- (long long)(int)offsetof(struct llog_rec_hdr, lrh_len));
- LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_len) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_len));
- LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_index) == 4, "found %lld\n",
- (long long)(int)offsetof(struct llog_rec_hdr, lrh_index));
- LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_index) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_index));
- LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_type) == 8, "found %lld\n",
- (long long)(int)offsetof(struct llog_rec_hdr, lrh_type));
- LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_type) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_type));
- LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_id) == 12, "found %lld\n",
- (long long)(int)offsetof(struct llog_rec_hdr, lrh_id));
- LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_id) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_id));
-
- /* Checks for struct llog_rec_tail */
- LASSERTF((int)sizeof(struct llog_rec_tail) == 8, "found %lld\n",
- (long long)(int)sizeof(struct llog_rec_tail));
- LASSERTF((int)offsetof(struct llog_rec_tail, lrt_len) == 0, "found %lld\n",
- (long long)(int)offsetof(struct llog_rec_tail, lrt_len));
- LASSERTF((int)sizeof(((struct llog_rec_tail *)0)->lrt_len) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_rec_tail *)0)->lrt_len));
- LASSERTF((int)offsetof(struct llog_rec_tail, lrt_index) == 4, "found %lld\n",
- (long long)(int)offsetof(struct llog_rec_tail, lrt_index));
- LASSERTF((int)sizeof(((struct llog_rec_tail *)0)->lrt_index) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_rec_tail *)0)->lrt_index));
-
- /* Checks for struct llog_logid_rec */
- LASSERTF((int)sizeof(struct llog_logid_rec) == 64, "found %lld\n",
- (long long)(int)sizeof(struct llog_logid_rec));
- LASSERTF((int)offsetof(struct llog_logid_rec, lid_hdr) == 0, "found %lld\n",
- (long long)(int)offsetof(struct llog_logid_rec, lid_hdr));
- LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_hdr) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_hdr));
- LASSERTF((int)offsetof(struct llog_logid_rec, lid_id) == 16, "found %lld\n",
- (long long)(int)offsetof(struct llog_logid_rec, lid_id));
- LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_id) == 20, "found %lld\n",
- (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_id));
- LASSERTF((int)offsetof(struct llog_logid_rec, lid_padding1) == 36, "found %lld\n",
- (long long)(int)offsetof(struct llog_logid_rec, lid_padding1));
- LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_padding1) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_padding1));
- LASSERTF((int)offsetof(struct llog_logid_rec, lid_padding2) == 40, "found %lld\n",
- (long long)(int)offsetof(struct llog_logid_rec, lid_padding2));
- LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_padding2) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_padding2));
- LASSERTF((int)offsetof(struct llog_logid_rec, lid_padding3) == 48, "found %lld\n",
- (long long)(int)offsetof(struct llog_logid_rec, lid_padding3));
- LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_padding3) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_padding3));
- LASSERTF((int)offsetof(struct llog_logid_rec, lid_tail) == 56, "found %lld\n",
- (long long)(int)offsetof(struct llog_logid_rec, lid_tail));
- LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_tail) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_tail));
-
- /* Checks for struct llog_unlink_rec */
- LASSERTF((int)sizeof(struct llog_unlink_rec) == 40, "found %lld\n",
- (long long)(int)sizeof(struct llog_unlink_rec));
- LASSERTF((int)offsetof(struct llog_unlink_rec, lur_hdr) == 0, "found %lld\n",
- (long long)(int)offsetof(struct llog_unlink_rec, lur_hdr));
- LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_hdr) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_hdr));
- LASSERTF((int)offsetof(struct llog_unlink_rec, lur_oid) == 16, "found %lld\n",
- (long long)(int)offsetof(struct llog_unlink_rec, lur_oid));
- LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_oid) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_oid));
- LASSERTF((int)offsetof(struct llog_unlink_rec, lur_oseq) == 24, "found %lld\n",
- (long long)(int)offsetof(struct llog_unlink_rec, lur_oseq));
- LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_oseq) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_oseq));
- LASSERTF((int)offsetof(struct llog_unlink_rec, lur_count) == 28, "found %lld\n",
- (long long)(int)offsetof(struct llog_unlink_rec, lur_count));
- LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_count) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_count));
- LASSERTF((int)offsetof(struct llog_unlink_rec, lur_tail) == 32, "found %lld\n",
- (long long)(int)offsetof(struct llog_unlink_rec, lur_tail));
- LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_tail) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_tail));
- /* Checks for struct llog_unlink64_rec */
- LASSERTF((int)sizeof(struct llog_unlink64_rec) == 64, "found %lld\n",
- (long long)(int)sizeof(struct llog_unlink64_rec));
- LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_hdr) == 0, "found %lld\n",
- (long long)(int)offsetof(struct llog_unlink64_rec, lur_hdr));
- LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_hdr) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_hdr));
- LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_fid) == 16, "found %lld\n",
- (long long)(int)offsetof(struct llog_unlink64_rec, lur_fid));
- LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_fid) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_fid));
- LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_count) == 32, "found %lld\n",
- (long long)(int)offsetof(struct llog_unlink64_rec, lur_count));
- LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_count) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_count));
- LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_tail) == 56, "found %lld\n",
- (long long)(int)offsetof(struct llog_unlink64_rec, lur_tail));
- LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_tail) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_tail));
- LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_padding1) == 36, "found %lld\n",
- (long long)(int)offsetof(struct llog_unlink64_rec, lur_padding1));
- LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding1) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding1));
- LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_padding2) == 40, "found %lld\n",
- (long long)(int)offsetof(struct llog_unlink64_rec, lur_padding2));
- LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding2) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding2));
- LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_padding3) == 48, "found %lld\n",
- (long long)(int)offsetof(struct llog_unlink64_rec, lur_padding3));
- LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding3) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding3));
-
- /* Checks for struct llog_setattr64_rec */
- LASSERTF((int)sizeof(struct llog_setattr64_rec) == 64, "found %lld\n",
- (long long)(int)sizeof(struct llog_setattr64_rec));
- LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_hdr) == 0, "found %lld\n",
- (long long)(int)offsetof(struct llog_setattr64_rec, lsr_hdr));
- LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_hdr) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_hdr));
- LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_oi) == 16, "found %lld\n",
- (long long)(int)offsetof(struct llog_setattr64_rec, lsr_oi));
- LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_oi) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_oi));
- LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_uid) == 32, "found %lld\n",
- (long long)(int)offsetof(struct llog_setattr64_rec, lsr_uid));
- LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid));
- LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_uid_h) == 36, "found %lld\n",
- (long long)(int)offsetof(struct llog_setattr64_rec, lsr_uid_h));
- LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid_h));
- LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_gid) == 40, "found %lld\n",
- (long long)(int)offsetof(struct llog_setattr64_rec, lsr_gid));
- LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid));
- LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_gid_h) == 44, "found %lld\n",
- (long long)(int)offsetof(struct llog_setattr64_rec, lsr_gid_h));
- LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid_h) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid_h));
- LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_valid) == 48, "found %lld\n",
- (long long)(int)offsetof(struct llog_setattr64_rec, lsr_valid));
- LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_valid) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_valid));
- LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_tail) == 56, "found %lld\n",
- (long long)(int)offsetof(struct llog_setattr64_rec, lsr_tail));
- LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_tail) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_tail));
-
- /* Checks for struct llog_size_change_rec */
- LASSERTF((int)sizeof(struct llog_size_change_rec) == 64, "found %lld\n",
- (long long)(int)sizeof(struct llog_size_change_rec));
- LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_hdr) == 0, "found %lld\n",
- (long long)(int)offsetof(struct llog_size_change_rec, lsc_hdr));
- LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_hdr) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_hdr));
- LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_fid) == 16, "found %lld\n",
- (long long)(int)offsetof(struct llog_size_change_rec, lsc_fid));
- LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_fid) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_fid));
- LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_ioepoch) == 32, "found %lld\n",
- (long long)(int)offsetof(struct llog_size_change_rec, lsc_ioepoch));
- LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_ioepoch) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_ioepoch));
- LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_padding1) == 36, "found %lld\n",
- (long long)(int)offsetof(struct llog_size_change_rec, lsc_padding1));
- LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding1) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding1));
- LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_padding2) == 40, "found %lld\n",
- (long long)(int)offsetof(struct llog_size_change_rec, lsc_padding2));
- LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding2) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding2));
- LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_padding3) == 48, "found %lld\n",
- (long long)(int)offsetof(struct llog_size_change_rec, lsc_padding3));
- LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding3) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding3));
- LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_tail) == 56, "found %lld\n",
- (long long)(int)offsetof(struct llog_size_change_rec, lsc_tail));
- LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_tail) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_tail));
-
- /* Checks for struct changelog_rec */
- LASSERTF((int)sizeof(struct changelog_rec) == 64, "found %lld\n",
- (long long)(int)sizeof(struct changelog_rec));
- LASSERTF((int)offsetof(struct changelog_rec, cr_namelen) == 0, "found %lld\n",
- (long long)(int)offsetof(struct changelog_rec, cr_namelen));
- LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_namelen) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct changelog_rec *)0)->cr_namelen));
- LASSERTF((int)offsetof(struct changelog_rec, cr_flags) == 2, "found %lld\n",
- (long long)(int)offsetof(struct changelog_rec, cr_flags));
- LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_flags) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct changelog_rec *)0)->cr_flags));
- LASSERTF((int)offsetof(struct changelog_rec, cr_type) == 4, "found %lld\n",
- (long long)(int)offsetof(struct changelog_rec, cr_type));
- LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_type) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct changelog_rec *)0)->cr_type));
- LASSERTF((int)offsetof(struct changelog_rec, cr_index) == 8, "found %lld\n",
- (long long)(int)offsetof(struct changelog_rec, cr_index));
- LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_index) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct changelog_rec *)0)->cr_index));
- LASSERTF((int)offsetof(struct changelog_rec, cr_prev) == 16, "found %lld\n",
- (long long)(int)offsetof(struct changelog_rec, cr_prev));
- LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_prev) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct changelog_rec *)0)->cr_prev));
- LASSERTF((int)offsetof(struct changelog_rec, cr_time) == 24, "found %lld\n",
- (long long)(int)offsetof(struct changelog_rec, cr_time));
- LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_time) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct changelog_rec *)0)->cr_time));
- LASSERTF((int)offsetof(struct changelog_rec, cr_tfid) == 32, "found %lld\n",
- (long long)(int)offsetof(struct changelog_rec, cr_tfid));
- LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_tfid) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct changelog_rec *)0)->cr_tfid));
- LASSERTF((int)offsetof(struct changelog_rec, cr_pfid) == 48, "found %lld\n",
- (long long)(int)offsetof(struct changelog_rec, cr_pfid));
- LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_pfid) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct changelog_rec *)0)->cr_pfid));
-
- /* Checks for struct changelog_setinfo */
- LASSERTF((int)sizeof(struct changelog_setinfo) == 12, "found %lld\n",
- (long long)(int)sizeof(struct changelog_setinfo));
- LASSERTF((int)offsetof(struct changelog_setinfo, cs_recno) == 0, "found %lld\n",
- (long long)(int)offsetof(struct changelog_setinfo, cs_recno));
- LASSERTF((int)sizeof(((struct changelog_setinfo *)0)->cs_recno) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct changelog_setinfo *)0)->cs_recno));
- LASSERTF((int)offsetof(struct changelog_setinfo, cs_id) == 8, "found %lld\n",
- (long long)(int)offsetof(struct changelog_setinfo, cs_id));
- LASSERTF((int)sizeof(((struct changelog_setinfo *)0)->cs_id) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct changelog_setinfo *)0)->cs_id));
-
- /* Checks for struct llog_changelog_rec */
- LASSERTF((int)sizeof(struct llog_changelog_rec) == 88, "found %lld\n",
- (long long)(int)sizeof(struct llog_changelog_rec));
- LASSERTF((int)offsetof(struct llog_changelog_rec, cr_hdr) == 0, "found %lld\n",
- (long long)(int)offsetof(struct llog_changelog_rec, cr_hdr));
- LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_hdr) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_hdr));
- LASSERTF((int)offsetof(struct llog_changelog_rec, cr) == 16, "found %lld\n",
- (long long)(int)offsetof(struct llog_changelog_rec, cr));
- LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr) == 64, "found %lld\n",
- (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr));
- LASSERTF((int)offsetof(struct llog_changelog_rec, cr_do_not_use) == 80, "found %lld\n",
- (long long)(int)offsetof(struct llog_changelog_rec, cr_do_not_use));
- LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_do_not_use) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_do_not_use));
-
- /* Checks for struct llog_changelog_user_rec */
- LASSERTF((int)sizeof(struct llog_changelog_user_rec) == 40, "found %lld\n",
- (long long)(int)sizeof(struct llog_changelog_user_rec));
- LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_hdr) == 0, "found %lld\n",
- (long long)(int)offsetof(struct llog_changelog_user_rec, cur_hdr));
- LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_hdr) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_hdr));
- LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_id) == 16, "found %lld\n",
- (long long)(int)offsetof(struct llog_changelog_user_rec, cur_id));
- LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_id) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_id));
- LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_padding) == 20, "found %lld\n",
- (long long)(int)offsetof(struct llog_changelog_user_rec, cur_padding));
- LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_padding) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_padding));
- LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_endrec) == 24, "found %lld\n",
- (long long)(int)offsetof(struct llog_changelog_user_rec, cur_endrec));
- LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_endrec) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_endrec));
- LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_tail) == 32, "found %lld\n",
- (long long)(int)offsetof(struct llog_changelog_user_rec, cur_tail));
- LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_tail) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_tail));
-
- /* Checks for struct llog_gen */
- LASSERTF((int)sizeof(struct llog_gen) == 16, "found %lld\n",
- (long long)(int)sizeof(struct llog_gen));
- LASSERTF((int)offsetof(struct llog_gen, mnt_cnt) == 0, "found %lld\n",
- (long long)(int)offsetof(struct llog_gen, mnt_cnt));
- LASSERTF((int)sizeof(((struct llog_gen *)0)->mnt_cnt) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_gen *)0)->mnt_cnt));
- LASSERTF((int)offsetof(struct llog_gen, conn_cnt) == 8, "found %lld\n",
- (long long)(int)offsetof(struct llog_gen, conn_cnt));
- LASSERTF((int)sizeof(((struct llog_gen *)0)->conn_cnt) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_gen *)0)->conn_cnt));
-
- /* Checks for struct llog_gen_rec */
- LASSERTF((int)sizeof(struct llog_gen_rec) == 64, "found %lld\n",
- (long long)(int)sizeof(struct llog_gen_rec));
- LASSERTF((int)offsetof(struct llog_gen_rec, lgr_hdr) == 0, "found %lld\n",
- (long long)(int)offsetof(struct llog_gen_rec, lgr_hdr));
- LASSERTF((int)sizeof(((struct llog_gen_rec *)0)->lgr_hdr) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct llog_gen_rec *)0)->lgr_hdr));
- LASSERTF((int)offsetof(struct llog_gen_rec, lgr_gen) == 16, "found %lld\n",
- (long long)(int)offsetof(struct llog_gen_rec, lgr_gen));
- LASSERTF((int)sizeof(((struct llog_gen_rec *)0)->lgr_gen) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct llog_gen_rec *)0)->lgr_gen));
- LASSERTF((int)offsetof(struct llog_gen_rec, lgr_tail) == 56, "found %lld\n",
- (long long)(int)offsetof(struct llog_gen_rec, lgr_tail));
- LASSERTF((int)sizeof(((struct llog_gen_rec *)0)->lgr_tail) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_gen_rec *)0)->lgr_tail));
-
- /* Checks for struct llog_log_hdr */
- LASSERTF((int)sizeof(struct llog_log_hdr) == 8192, "found %lld\n",
- (long long)(int)sizeof(struct llog_log_hdr));
- LASSERTF((int)offsetof(struct llog_log_hdr, llh_hdr) == 0, "found %lld\n",
- (long long)(int)offsetof(struct llog_log_hdr, llh_hdr));
- LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_hdr) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_hdr));
- LASSERTF((int)offsetof(struct llog_log_hdr, llh_timestamp) == 16, "found %lld\n",
- (long long)(int)offsetof(struct llog_log_hdr, llh_timestamp));
- LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_timestamp) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_timestamp));
- LASSERTF((int)offsetof(struct llog_log_hdr, llh_count) == 24, "found %lld\n",
- (long long)(int)offsetof(struct llog_log_hdr, llh_count));
- LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_count) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_count));
- LASSERTF((int)offsetof(struct llog_log_hdr, llh_bitmap_offset) == 28, "found %lld\n",
- (long long)(int)offsetof(struct llog_log_hdr, llh_bitmap_offset));
- LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap_offset) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap_offset));
- LASSERTF((int)offsetof(struct llog_log_hdr, llh_size) == 32, "found %lld\n",
- (long long)(int)offsetof(struct llog_log_hdr, llh_size));
- LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_size) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_size));
- LASSERTF((int)offsetof(struct llog_log_hdr, llh_flags) == 36, "found %lld\n",
- (long long)(int)offsetof(struct llog_log_hdr, llh_flags));
- LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_flags));
- LASSERTF((int)offsetof(struct llog_log_hdr, llh_cat_idx) == 40, "found %lld\n",
- (long long)(int)offsetof(struct llog_log_hdr, llh_cat_idx));
- LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_cat_idx) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_cat_idx));
- LASSERTF((int)offsetof(struct llog_log_hdr, llh_tgtuuid) == 44, "found %lld\n",
- (long long)(int)offsetof(struct llog_log_hdr, llh_tgtuuid));
- LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_tgtuuid) == 40, "found %lld\n",
- (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_tgtuuid));
- LASSERTF((int)offsetof(struct llog_log_hdr, llh_reserved) == 84, "found %lld\n",
- (long long)(int)offsetof(struct llog_log_hdr, llh_reserved));
- LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_reserved) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_reserved));
- LASSERTF((int)offsetof(struct llog_log_hdr, llh_bitmap) == 88, "found %lld\n",
- (long long)(int)offsetof(struct llog_log_hdr, llh_bitmap));
- LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap) == 8096, "found %lld\n",
- (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap));
- LASSERTF((int)offsetof(struct llog_log_hdr, llh_tail) == 8184, "found %lld\n",
- (long long)(int)offsetof(struct llog_log_hdr, llh_tail));
- LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_tail) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_tail));
-
- /* Checks for struct llog_cookie */
- LASSERTF((int)sizeof(struct llog_cookie) == 32, "found %lld\n",
- (long long)(int)sizeof(struct llog_cookie));
- LASSERTF((int)offsetof(struct llog_cookie, lgc_lgl) == 0, "found %lld\n",
- (long long)(int)offsetof(struct llog_cookie, lgc_lgl));
- LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_lgl) == 20, "found %lld\n",
- (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_lgl));
- LASSERTF((int)offsetof(struct llog_cookie, lgc_subsys) == 20, "found %lld\n",
- (long long)(int)offsetof(struct llog_cookie, lgc_subsys));
- LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_subsys) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_subsys));
- LASSERTF((int)offsetof(struct llog_cookie, lgc_index) == 24, "found %lld\n",
- (long long)(int)offsetof(struct llog_cookie, lgc_index));
- LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_index) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_index));
- LASSERTF((int)offsetof(struct llog_cookie, lgc_padding) == 28, "found %lld\n",
- (long long)(int)offsetof(struct llog_cookie, lgc_padding));
- LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_padding) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_padding));
-
- /* Checks for struct llogd_body */
- LASSERTF((int)sizeof(struct llogd_body) == 48, "found %lld\n",
- (long long)(int)sizeof(struct llogd_body));
- LASSERTF((int)offsetof(struct llogd_body, lgd_logid) == 0, "found %lld\n",
- (long long)(int)offsetof(struct llogd_body, lgd_logid));
- LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_logid) == 20, "found %lld\n",
- (long long)(int)sizeof(((struct llogd_body *)0)->lgd_logid));
- LASSERTF((int)offsetof(struct llogd_body, lgd_ctxt_idx) == 20, "found %lld\n",
- (long long)(int)offsetof(struct llogd_body, lgd_ctxt_idx));
- LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_ctxt_idx) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llogd_body *)0)->lgd_ctxt_idx));
- LASSERTF((int)offsetof(struct llogd_body, lgd_llh_flags) == 24, "found %lld\n",
- (long long)(int)offsetof(struct llogd_body, lgd_llh_flags));
- LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_llh_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llogd_body *)0)->lgd_llh_flags));
- LASSERTF((int)offsetof(struct llogd_body, lgd_index) == 28, "found %lld\n",
- (long long)(int)offsetof(struct llogd_body, lgd_index));
- LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_index) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llogd_body *)0)->lgd_index));
- LASSERTF((int)offsetof(struct llogd_body, lgd_saved_index) == 32, "found %lld\n",
- (long long)(int)offsetof(struct llogd_body, lgd_saved_index));
- LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_saved_index) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llogd_body *)0)->lgd_saved_index));
- LASSERTF((int)offsetof(struct llogd_body, lgd_len) == 36, "found %lld\n",
- (long long)(int)offsetof(struct llogd_body, lgd_len));
- LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_len) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llogd_body *)0)->lgd_len));
- LASSERTF((int)offsetof(struct llogd_body, lgd_cur_offset) == 40, "found %lld\n",
- (long long)(int)offsetof(struct llogd_body, lgd_cur_offset));
- LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_cur_offset) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct llogd_body *)0)->lgd_cur_offset));
- BUILD_BUG_ON(LLOG_ORIGIN_HANDLE_CREATE != 501);
- BUILD_BUG_ON(LLOG_ORIGIN_HANDLE_NEXT_BLOCK != 502);
- BUILD_BUG_ON(LLOG_ORIGIN_HANDLE_READ_HEADER != 503);
- BUILD_BUG_ON(LLOG_ORIGIN_HANDLE_WRITE_REC != 504);
- BUILD_BUG_ON(LLOG_ORIGIN_HANDLE_CLOSE != 505);
- BUILD_BUG_ON(LLOG_ORIGIN_CONNECT != 506);
- BUILD_BUG_ON(LLOG_CATINFO != 507);
- BUILD_BUG_ON(LLOG_ORIGIN_HANDLE_PREV_BLOCK != 508);
- BUILD_BUG_ON(LLOG_ORIGIN_HANDLE_DESTROY != 509);
- BUILD_BUG_ON(LLOG_FIRST_OPC != 501);
- BUILD_BUG_ON(LLOG_LAST_OPC != 510);
- BUILD_BUG_ON(LLOG_CONFIG_ORIG_CTXT != 0);
- BUILD_BUG_ON(LLOG_CONFIG_REPL_CTXT != 1);
- BUILD_BUG_ON(LLOG_MDS_OST_ORIG_CTXT != 2);
- BUILD_BUG_ON(LLOG_MDS_OST_REPL_CTXT != 3);
- BUILD_BUG_ON(LLOG_SIZE_ORIG_CTXT != 4);
- BUILD_BUG_ON(LLOG_SIZE_REPL_CTXT != 5);
- BUILD_BUG_ON(LLOG_TEST_ORIG_CTXT != 8);
- BUILD_BUG_ON(LLOG_TEST_REPL_CTXT != 9);
- BUILD_BUG_ON(LLOG_CHANGELOG_ORIG_CTXT != 12);
- BUILD_BUG_ON(LLOG_CHANGELOG_REPL_CTXT != 13);
- BUILD_BUG_ON(LLOG_CHANGELOG_USER_ORIG_CTXT != 14);
- BUILD_BUG_ON(LLOG_AGENT_ORIG_CTXT != 15);
- BUILD_BUG_ON(LLOG_MAX_CTXTS != 16);
-
- /* Checks for struct llogd_conn_body */
- LASSERTF((int)sizeof(struct llogd_conn_body) == 40, "found %lld\n",
- (long long)(int)sizeof(struct llogd_conn_body));
- LASSERTF((int)offsetof(struct llogd_conn_body, lgdc_gen) == 0, "found %lld\n",
- (long long)(int)offsetof(struct llogd_conn_body, lgdc_gen));
- LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_gen) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_gen));
- LASSERTF((int)offsetof(struct llogd_conn_body, lgdc_logid) == 16, "found %lld\n",
- (long long)(int)offsetof(struct llogd_conn_body, lgdc_logid));
- LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_logid) == 20, "found %lld\n",
- (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_logid));
- LASSERTF((int)offsetof(struct llogd_conn_body, lgdc_ctxt_idx) == 36, "found %lld\n",
- (long long)(int)offsetof(struct llogd_conn_body, lgdc_ctxt_idx));
- LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx));
-
- /* Checks for struct fiemap_info_key */
- LASSERTF((int)sizeof(struct ll_fiemap_info_key) == 248, "found %lld\n",
- (long long)(int)sizeof(struct ll_fiemap_info_key));
- LASSERTF((int)offsetof(struct ll_fiemap_info_key, lfik_name[8]) == 8, "found %lld\n",
- (long long)(int)offsetof(struct ll_fiemap_info_key, lfik_name[8]));
- LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_name[8]) == 1, "found %lld\n",
- (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_name[8]));
- LASSERTF((int)offsetof(struct ll_fiemap_info_key, lfik_oa) == 8, "found %lld\n",
- (long long)(int)offsetof(struct ll_fiemap_info_key, lfik_oa));
- LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_oa) == 208, "found %lld\n",
- (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_oa));
- LASSERTF((int)offsetof(struct ll_fiemap_info_key, lfik_fiemap) == 216, "found %lld\n",
- (long long)(int)offsetof(struct ll_fiemap_info_key, lfik_fiemap));
- LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_fiemap) == 32, "found %lld\n",
- (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_fiemap));
-
- /* Checks for struct mgs_target_info */
- LASSERTF((int)sizeof(struct mgs_target_info) == 4544, "found %lld\n",
- (long long)(int)sizeof(struct mgs_target_info));
- LASSERTF((int)offsetof(struct mgs_target_info, mti_lustre_ver) == 0, "found %lld\n",
- (long long)(int)offsetof(struct mgs_target_info, mti_lustre_ver));
- LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_lustre_ver) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_lustre_ver));
- LASSERTF((int)offsetof(struct mgs_target_info, mti_stripe_index) == 4, "found %lld\n",
- (long long)(int)offsetof(struct mgs_target_info, mti_stripe_index));
- LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_stripe_index) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_stripe_index));
- LASSERTF((int)offsetof(struct mgs_target_info, mti_config_ver) == 8, "found %lld\n",
- (long long)(int)offsetof(struct mgs_target_info, mti_config_ver));
- LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_config_ver) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_config_ver));
- LASSERTF((int)offsetof(struct mgs_target_info, mti_flags) == 12, "found %lld\n",
- (long long)(int)offsetof(struct mgs_target_info, mti_flags));
- LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_flags));
- LASSERTF((int)offsetof(struct mgs_target_info, mti_nid_count) == 16, "found %lld\n",
- (long long)(int)offsetof(struct mgs_target_info, mti_nid_count));
- LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_nid_count) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_nid_count));
- LASSERTF((int)offsetof(struct mgs_target_info, mti_instance) == 20, "found %lld\n",
- (long long)(int)offsetof(struct mgs_target_info, mti_instance));
- LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_instance) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_instance));
- LASSERTF((int)offsetof(struct mgs_target_info, mti_fsname) == 24, "found %lld\n",
- (long long)(int)offsetof(struct mgs_target_info, mti_fsname));
- LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_fsname) == 64, "found %lld\n",
- (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_fsname));
- LASSERTF((int)offsetof(struct mgs_target_info, mti_svname) == 88, "found %lld\n",
- (long long)(int)offsetof(struct mgs_target_info, mti_svname));
- LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_svname) == 64, "found %lld\n",
- (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_svname));
- LASSERTF((int)offsetof(struct mgs_target_info, mti_uuid) == 152, "found %lld\n",
- (long long)(int)offsetof(struct mgs_target_info, mti_uuid));
- LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_uuid) == 40, "found %lld\n",
- (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_uuid));
- LASSERTF((int)offsetof(struct mgs_target_info, mti_nids) == 192, "found %lld\n",
- (long long)(int)offsetof(struct mgs_target_info, mti_nids));
- LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_nids) == 256, "found %lld\n",
- (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_nids));
- LASSERTF((int)offsetof(struct mgs_target_info, mti_params) == 448, "found %lld\n",
- (long long)(int)offsetof(struct mgs_target_info, mti_params));
- LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_params) == 4096, "found %lld\n",
- (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_params));
-
- /* Checks for struct lustre_capa */
- LASSERTF((int)sizeof(struct lustre_capa) == 120, "found %lld\n",
- (long long)(int)sizeof(struct lustre_capa));
- LASSERTF((int)offsetof(struct lustre_capa, lc_fid) == 0, "found %lld\n",
- (long long)(int)offsetof(struct lustre_capa, lc_fid));
- LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_fid) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_capa *)0)->lc_fid));
- LASSERTF((int)offsetof(struct lustre_capa, lc_opc) == 16, "found %lld\n",
- (long long)(int)offsetof(struct lustre_capa, lc_opc));
- LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_opc) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_capa *)0)->lc_opc));
- LASSERTF((int)offsetof(struct lustre_capa, lc_uid) == 24, "found %lld\n",
- (long long)(int)offsetof(struct lustre_capa, lc_uid));
- LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_uid) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_capa *)0)->lc_uid));
- LASSERTF((int)offsetof(struct lustre_capa, lc_gid) == 32, "found %lld\n",
- (long long)(int)offsetof(struct lustre_capa, lc_gid));
- LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_gid) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_capa *)0)->lc_gid));
- LASSERTF((int)offsetof(struct lustre_capa, lc_flags) == 40, "found %lld\n",
- (long long)(int)offsetof(struct lustre_capa, lc_flags));
- LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_capa *)0)->lc_flags));
- LASSERTF((int)offsetof(struct lustre_capa, lc_keyid) == 44, "found %lld\n",
- (long long)(int)offsetof(struct lustre_capa, lc_keyid));
- LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_keyid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_capa *)0)->lc_keyid));
- LASSERTF((int)offsetof(struct lustre_capa, lc_timeout) == 48, "found %lld\n",
- (long long)(int)offsetof(struct lustre_capa, lc_timeout));
- LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_timeout) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_capa *)0)->lc_timeout));
- LASSERTF((int)offsetof(struct lustre_capa, lc_expiry) == 52, "found %lld\n",
- (long long)(int)offsetof(struct lustre_capa, lc_expiry));
- LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_expiry) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_capa *)0)->lc_expiry));
- BUILD_BUG_ON(CAPA_HMAC_MAX_LEN != 64);
- LASSERTF((int)offsetof(struct lustre_capa, lc_hmac[64]) == 120, "found %lld\n",
- (long long)(int)offsetof(struct lustre_capa, lc_hmac[64]));
- LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_hmac[64]) == 1, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_capa *)0)->lc_hmac[64]));
-
- /* Checks for struct lustre_capa_key */
- LASSERTF((int)sizeof(struct lustre_capa_key) == 72, "found %lld\n",
- (long long)(int)sizeof(struct lustre_capa_key));
- LASSERTF((int)offsetof(struct lustre_capa_key, lk_seq) == 0, "found %lld\n",
- (long long)(int)offsetof(struct lustre_capa_key, lk_seq));
- LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_seq) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_seq));
- LASSERTF((int)offsetof(struct lustre_capa_key, lk_keyid) == 8, "found %lld\n",
- (long long)(int)offsetof(struct lustre_capa_key, lk_keyid));
- LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_keyid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_keyid));
- LASSERTF((int)offsetof(struct lustre_capa_key, lk_padding) == 12, "found %lld\n",
- (long long)(int)offsetof(struct lustre_capa_key, lk_padding));
- LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_padding) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_padding));
- BUILD_BUG_ON(CAPA_HMAC_KEY_MAX_LEN != 56);
- LASSERTF((int)offsetof(struct lustre_capa_key, lk_key[56]) == 72, "found %lld\n",
- (long long)(int)offsetof(struct lustre_capa_key, lk_key[56]));
- LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_key[56]) == 1, "found %lld\n",
- (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_key[56]));
-
- /* Checks for struct getinfo_fid2path */
- LASSERTF((int)sizeof(struct getinfo_fid2path) == 32, "found %lld\n",
- (long long)(int)sizeof(struct getinfo_fid2path));
- LASSERTF((int)offsetof(struct getinfo_fid2path, gf_fid) == 0, "found %lld\n",
- (long long)(int)offsetof(struct getinfo_fid2path, gf_fid));
- LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_fid) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_fid));
- LASSERTF((int)offsetof(struct getinfo_fid2path, gf_recno) == 16, "found %lld\n",
- (long long)(int)offsetof(struct getinfo_fid2path, gf_recno));
- LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_recno) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_recno));
- LASSERTF((int)offsetof(struct getinfo_fid2path, gf_linkno) == 24, "found %lld\n",
- (long long)(int)offsetof(struct getinfo_fid2path, gf_linkno));
- LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_linkno) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_linkno));
- LASSERTF((int)offsetof(struct getinfo_fid2path, gf_pathlen) == 28, "found %lld\n",
- (long long)(int)offsetof(struct getinfo_fid2path, gf_pathlen));
- LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_pathlen) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_pathlen));
- LASSERTF((int)offsetof(struct getinfo_fid2path, gf_path[0]) == 32, "found %lld\n",
- (long long)(int)offsetof(struct getinfo_fid2path, gf_path[0]));
- LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_path[0]) == 1, "found %lld\n",
- (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_path[0]));
-
- /* Checks for struct fiemap */
- LASSERTF((int)sizeof(struct fiemap) == 32, "found %lld\n",
- (long long)(int)sizeof(struct fiemap));
- LASSERTF((int)offsetof(struct fiemap, fm_start) == 0, "found %lld\n",
- (long long)(int)offsetof(struct fiemap, fm_start));
- LASSERTF((int)sizeof(((struct fiemap *)0)->fm_start) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct fiemap *)0)->fm_start));
- LASSERTF((int)offsetof(struct fiemap, fm_length) == 8, "found %lld\n",
- (long long)(int)offsetof(struct fiemap, fm_length));
- LASSERTF((int)sizeof(((struct fiemap *)0)->fm_length) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct fiemap *)0)->fm_length));
- LASSERTF((int)offsetof(struct fiemap, fm_flags) == 16, "found %lld\n",
- (long long)(int)offsetof(struct fiemap, fm_flags));
- LASSERTF((int)sizeof(((struct fiemap *)0)->fm_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct fiemap *)0)->fm_flags));
- LASSERTF((int)offsetof(struct fiemap, fm_mapped_extents) == 20, "found %lld\n",
- (long long)(int)offsetof(struct fiemap, fm_mapped_extents));
- LASSERTF((int)sizeof(((struct fiemap *)0)->fm_mapped_extents) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct fiemap *)0)->fm_mapped_extents));
- LASSERTF((int)offsetof(struct fiemap, fm_extent_count) == 24, "found %lld\n",
- (long long)(int)offsetof(struct fiemap, fm_extent_count));
- LASSERTF((int)sizeof(((struct fiemap *)0)->fm_extent_count) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct fiemap *)0)->fm_extent_count));
- LASSERTF((int)offsetof(struct fiemap, fm_reserved) == 28, "found %lld\n",
- (long long)(int)offsetof(struct fiemap, fm_reserved));
- LASSERTF((int)sizeof(((struct fiemap *)0)->fm_reserved) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct fiemap *)0)->fm_reserved));
- LASSERTF((int)offsetof(struct fiemap, fm_extents) == 32, "found %lld\n",
- (long long)(int)offsetof(struct fiemap, fm_extents));
- LASSERTF((int)sizeof(((struct fiemap *)0)->fm_extents) == 0, "found %lld\n",
- (long long)(int)sizeof(((struct fiemap *)0)->fm_extents));
- BUILD_BUG_ON(FIEMAP_FLAG_SYNC != 0x00000001);
- BUILD_BUG_ON(FIEMAP_FLAG_XATTR != 0x00000002);
- BUILD_BUG_ON(FIEMAP_FLAG_DEVICE_ORDER != 0x40000000);
-
- /* Checks for struct fiemap_extent */
- LASSERTF((int)sizeof(struct fiemap_extent) == 56, "found %lld\n",
- (long long)(int)sizeof(struct fiemap_extent));
- LASSERTF((int)offsetof(struct fiemap_extent, fe_logical) == 0, "found %lld\n",
- (long long)(int)offsetof(struct fiemap_extent, fe_logical));
- LASSERTF((int)sizeof(((struct fiemap_extent *)0)->fe_logical) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct fiemap_extent *)0)->fe_logical));
- LASSERTF((int)offsetof(struct fiemap_extent, fe_physical) == 8, "found %lld\n",
- (long long)(int)offsetof(struct fiemap_extent, fe_physical));
- LASSERTF((int)sizeof(((struct fiemap_extent *)0)->fe_physical) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct fiemap_extent *)0)->fe_physical));
- LASSERTF((int)offsetof(struct fiemap_extent, fe_length) == 16, "found %lld\n",
- (long long)(int)offsetof(struct fiemap_extent, fe_length));
- LASSERTF((int)sizeof(((struct fiemap_extent *)0)->fe_length) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct fiemap_extent *)0)->fe_length));
- LASSERTF((int)offsetof(struct fiemap_extent, fe_flags) == 40, "found %lld\n",
- (long long)(int)offsetof(struct fiemap_extent, fe_flags));
- LASSERTF((int)sizeof(((struct fiemap_extent *)0)->fe_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct fiemap_extent *)0)->fe_flags));
- LASSERTF((int)offsetof(struct fiemap_extent, fe_reserved[0]) == 44, "found %lld\n",
- (long long)(int)offsetof(struct fiemap_extent, fe_reserved[0]));
- LASSERTF((int)sizeof(((struct fiemap_extent *)0)->fe_reserved[0]) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct fiemap_extent *)0)->fe_reserved[0]));
- BUILD_BUG_ON(FIEMAP_EXTENT_LAST != 0x00000001);
- BUILD_BUG_ON(FIEMAP_EXTENT_UNKNOWN != 0x00000002);
- BUILD_BUG_ON(FIEMAP_EXTENT_DELALLOC != 0x00000004);
- BUILD_BUG_ON(FIEMAP_EXTENT_ENCODED != 0x00000008);
- BUILD_BUG_ON(FIEMAP_EXTENT_DATA_ENCRYPTED != 0x00000080);
- BUILD_BUG_ON(FIEMAP_EXTENT_NOT_ALIGNED != 0x00000100);
- BUILD_BUG_ON(FIEMAP_EXTENT_DATA_INLINE != 0x00000200);
- BUILD_BUG_ON(FIEMAP_EXTENT_DATA_TAIL != 0x00000400);
- BUILD_BUG_ON(FIEMAP_EXTENT_UNWRITTEN != 0x00000800);
- BUILD_BUG_ON(FIEMAP_EXTENT_MERGED != 0x00001000);
- BUILD_BUG_ON(FIEMAP_EXTENT_NO_DIRECT != 0x40000000);
- BUILD_BUG_ON(FIEMAP_EXTENT_NET != 0x80000000);
-
- /* Checks for type posix_acl_xattr_entry */
- LASSERTF((int)sizeof(struct posix_acl_xattr_entry) == 8, "found %lld\n",
- (long long)(int)sizeof(struct posix_acl_xattr_entry));
- LASSERTF((int)offsetof(struct posix_acl_xattr_entry, e_tag) == 0, "found %lld\n",
- (long long)(int)offsetof(struct posix_acl_xattr_entry, e_tag));
- LASSERTF((int)sizeof(((struct posix_acl_xattr_entry *)0)->e_tag) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct posix_acl_xattr_entry *)0)->e_tag));
- LASSERTF((int)offsetof(struct posix_acl_xattr_entry, e_perm) == 2, "found %lld\n",
- (long long)(int)offsetof(struct posix_acl_xattr_entry, e_perm));
- LASSERTF((int)sizeof(((struct posix_acl_xattr_entry *)0)->e_perm) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct posix_acl_xattr_entry *)0)->e_perm));
- LASSERTF((int)offsetof(struct posix_acl_xattr_entry, e_id) == 4, "found %lld\n",
- (long long)(int)offsetof(struct posix_acl_xattr_entry, e_id));
- LASSERTF((int)sizeof(((struct posix_acl_xattr_entry *)0)->e_id) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct posix_acl_xattr_entry *)0)->e_id));
-
- /* Checks for type posix_acl_xattr_header */
- LASSERTF((int)sizeof(struct posix_acl_xattr_header) == 4, "found %lld\n",
- (long long)(int)sizeof(struct posix_acl_xattr_header));
- LASSERTF((int)offsetof(struct posix_acl_xattr_header, a_version) == 0, "found %lld\n",
- (long long)(int)offsetof(struct posix_acl_xattr_header, a_version));
- LASSERTF((int)sizeof(((struct posix_acl_xattr_header *)0)->a_version) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct posix_acl_xattr_header *)0)->a_version));
-
- /* Checks for struct link_ea_header */
- LASSERTF((int)sizeof(struct link_ea_header) == 24, "found %lld\n",
- (long long)(int)sizeof(struct link_ea_header));
- LASSERTF((int)offsetof(struct link_ea_header, leh_magic) == 0, "found %lld\n",
- (long long)(int)offsetof(struct link_ea_header, leh_magic));
- LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_magic) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct link_ea_header *)0)->leh_magic));
- LASSERTF((int)offsetof(struct link_ea_header, leh_reccount) == 4, "found %lld\n",
- (long long)(int)offsetof(struct link_ea_header, leh_reccount));
- LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_reccount) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct link_ea_header *)0)->leh_reccount));
- LASSERTF((int)offsetof(struct link_ea_header, leh_len) == 8, "found %lld\n",
- (long long)(int)offsetof(struct link_ea_header, leh_len));
- LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_len) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct link_ea_header *)0)->leh_len));
- LASSERTF((int)offsetof(struct link_ea_header, leh_overflow_time) == 16, "found %lld\n",
- (long long)(int)offsetof(struct link_ea_header, leh_overflow_time));
- LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_overflow_time) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct link_ea_header *)0)->leh_overflow_time));
- LASSERTF((int)offsetof(struct link_ea_header, leh_padding) == 20, "found %lld\n",
- (long long)(int)offsetof(struct link_ea_header, leh_padding));
- LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_padding) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct link_ea_header *)0)->leh_padding));
- BUILD_BUG_ON(LINK_EA_MAGIC != 0x11EAF1DFUL);
-
- /* Checks for struct link_ea_entry */
- LASSERTF((int)sizeof(struct link_ea_entry) == 18, "found %lld\n",
- (long long)(int)sizeof(struct link_ea_entry));
- LASSERTF((int)offsetof(struct link_ea_entry, lee_reclen) == 0, "found %lld\n",
- (long long)(int)offsetof(struct link_ea_entry, lee_reclen));
- LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_reclen) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_reclen));
- LASSERTF((int)offsetof(struct link_ea_entry, lee_parent_fid) == 2, "found %lld\n",
- (long long)(int)offsetof(struct link_ea_entry, lee_parent_fid));
- LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_parent_fid) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_parent_fid));
- LASSERTF((int)offsetof(struct link_ea_entry, lee_name) == 18, "found %lld\n",
- (long long)(int)offsetof(struct link_ea_entry, lee_name));
- LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_name) == 0, "found %lld\n",
- (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_name));
-
- /* Checks for struct layout_intent */
- LASSERTF((int)sizeof(struct layout_intent) == 24, "found %lld\n",
- (long long)(int)sizeof(struct layout_intent));
- LASSERTF((int)offsetof(struct layout_intent, li_opc) == 0, "found %lld\n",
- (long long)(int)offsetof(struct layout_intent, li_opc));
- LASSERTF((int)sizeof(((struct layout_intent *)0)->li_opc) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct layout_intent *)0)->li_opc));
- LASSERTF((int)offsetof(struct layout_intent, li_flags) == 4, "found %lld\n",
- (long long)(int)offsetof(struct layout_intent, li_flags));
- LASSERTF((int)sizeof(((struct layout_intent *)0)->li_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct layout_intent *)0)->li_flags));
- LASSERTF((int)offsetof(struct layout_intent, li_start) == 8, "found %lld\n",
- (long long)(int)offsetof(struct layout_intent, li_start));
- LASSERTF((int)sizeof(((struct layout_intent *)0)->li_start) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct layout_intent *)0)->li_start));
- LASSERTF((int)offsetof(struct layout_intent, li_end) == 16, "found %lld\n",
- (long long)(int)offsetof(struct layout_intent, li_end));
- LASSERTF((int)sizeof(((struct layout_intent *)0)->li_end) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct layout_intent *)0)->li_end));
- LASSERTF(LAYOUT_INTENT_ACCESS == 0, "found %lld\n",
- (long long)LAYOUT_INTENT_ACCESS);
- LASSERTF(LAYOUT_INTENT_READ == 1, "found %lld\n",
- (long long)LAYOUT_INTENT_READ);
- LASSERTF(LAYOUT_INTENT_WRITE == 2, "found %lld\n",
- (long long)LAYOUT_INTENT_WRITE);
- LASSERTF(LAYOUT_INTENT_GLIMPSE == 3, "found %lld\n",
- (long long)LAYOUT_INTENT_GLIMPSE);
- LASSERTF(LAYOUT_INTENT_TRUNC == 4, "found %lld\n",
- (long long)LAYOUT_INTENT_TRUNC);
- LASSERTF(LAYOUT_INTENT_RELEASE == 5, "found %lld\n",
- (long long)LAYOUT_INTENT_RELEASE);
- LASSERTF(LAYOUT_INTENT_RESTORE == 6, "found %lld\n",
- (long long)LAYOUT_INTENT_RESTORE);
-
- /* Checks for struct hsm_action_item */
- LASSERTF((int)sizeof(struct hsm_action_item) == 72, "found %lld\n",
- (long long)(int)sizeof(struct hsm_action_item));
- LASSERTF((int)offsetof(struct hsm_action_item, hai_len) == 0, "found %lld\n",
- (long long)(int)offsetof(struct hsm_action_item, hai_len));
- LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_len) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_len));
- LASSERTF((int)offsetof(struct hsm_action_item, hai_action) == 4, "found %lld\n",
- (long long)(int)offsetof(struct hsm_action_item, hai_action));
- LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_action) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_action));
- LASSERTF((int)offsetof(struct hsm_action_item, hai_fid) == 8, "found %lld\n",
- (long long)(int)offsetof(struct hsm_action_item, hai_fid));
- LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_fid) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_fid));
- LASSERTF((int)offsetof(struct hsm_action_item, hai_dfid) == 24, "found %lld\n",
- (long long)(int)offsetof(struct hsm_action_item, hai_dfid));
- LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_dfid) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_dfid));
- LASSERTF((int)offsetof(struct hsm_action_item, hai_extent) == 40, "found %lld\n",
- (long long)(int)offsetof(struct hsm_action_item, hai_extent));
- LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_extent) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_extent));
- LASSERTF((int)offsetof(struct hsm_action_item, hai_cookie) == 56, "found %lld\n",
- (long long)(int)offsetof(struct hsm_action_item, hai_cookie));
- LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_cookie) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_cookie));
- LASSERTF((int)offsetof(struct hsm_action_item, hai_gid) == 64, "found %lld\n",
- (long long)(int)offsetof(struct hsm_action_item, hai_gid));
- LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_gid) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_gid));
- LASSERTF((int)offsetof(struct hsm_action_item, hai_data) == 72, "found %lld\n",
- (long long)(int)offsetof(struct hsm_action_item, hai_data));
- LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_data) == 0, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_data));
-
- /* Checks for struct hsm_action_list */
- LASSERTF((int)sizeof(struct hsm_action_list) == 32, "found %lld\n",
- (long long)(int)sizeof(struct hsm_action_list));
- LASSERTF((int)offsetof(struct hsm_action_list, hal_version) == 0, "found %lld\n",
- (long long)(int)offsetof(struct hsm_action_list, hal_version));
- LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_version) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_version));
- LASSERTF((int)offsetof(struct hsm_action_list, hal_count) == 4, "found %lld\n",
- (long long)(int)offsetof(struct hsm_action_list, hal_count));
- LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_count) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_count));
- LASSERTF((int)offsetof(struct hsm_action_list, hal_compound_id) == 8, "found %lld\n",
- (long long)(int)offsetof(struct hsm_action_list, hal_compound_id));
- LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_compound_id) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_compound_id));
- LASSERTF((int)offsetof(struct hsm_action_list, hal_flags) == 16, "found %lld\n",
- (long long)(int)offsetof(struct hsm_action_list, hal_flags));
- LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_flags) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_flags));
- LASSERTF((int)offsetof(struct hsm_action_list, hal_archive_id) == 24, "found %lld\n",
- (long long)(int)offsetof(struct hsm_action_list, hal_archive_id));
- LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_archive_id) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_archive_id));
- LASSERTF((int)offsetof(struct hsm_action_list, padding1) == 28, "found %lld\n",
- (long long)(int)offsetof(struct hsm_action_list, padding1));
- LASSERTF((int)sizeof(((struct hsm_action_list *)0)->padding1) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_action_list *)0)->padding1));
- LASSERTF((int)offsetof(struct hsm_action_list, hal_fsname) == 32, "found %lld\n",
- (long long)(int)offsetof(struct hsm_action_list, hal_fsname));
- LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_fsname) == 0, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_fsname));
-
- /* Checks for struct hsm_progress */
- LASSERTF((int)sizeof(struct hsm_progress) == 48, "found %lld\n",
- (long long)(int)sizeof(struct hsm_progress));
- LASSERTF((int)offsetof(struct hsm_progress, hp_fid) == 0, "found %lld\n",
- (long long)(int)offsetof(struct hsm_progress, hp_fid));
- LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_fid) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_progress *)0)->hp_fid));
- LASSERTF((int)offsetof(struct hsm_progress, hp_cookie) == 16, "found %lld\n",
- (long long)(int)offsetof(struct hsm_progress, hp_cookie));
- LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_cookie) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_progress *)0)->hp_cookie));
- LASSERTF((int)offsetof(struct hsm_progress, hp_extent) == 24, "found %lld\n",
- (long long)(int)offsetof(struct hsm_progress, hp_extent));
- LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_extent) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_progress *)0)->hp_extent));
- LASSERTF((int)offsetof(struct hsm_progress, hp_flags) == 40, "found %lld\n",
- (long long)(int)offsetof(struct hsm_progress, hp_flags));
- LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_flags) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_progress *)0)->hp_flags));
- LASSERTF((int)offsetof(struct hsm_progress, hp_errval) == 42, "found %lld\n",
- (long long)(int)offsetof(struct hsm_progress, hp_errval));
- LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_errval) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_progress *)0)->hp_errval));
- LASSERTF((int)offsetof(struct hsm_progress, padding) == 44, "found %lld\n",
- (long long)(int)offsetof(struct hsm_progress, padding));
- LASSERTF((int)sizeof(((struct hsm_progress *)0)->padding) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_progress *)0)->padding));
- LASSERTF(HP_FLAG_COMPLETED == 0x01, "found 0x%.8x\n",
- HP_FLAG_COMPLETED);
- LASSERTF(HP_FLAG_RETRY == 0x02, "found 0x%.8x\n",
- HP_FLAG_RETRY);
-
- LASSERTF((int)offsetof(struct hsm_copy, hc_data_version) == 0, "found %lld\n",
- (long long)(int)offsetof(struct hsm_copy, hc_data_version));
- LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_data_version) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_copy *)0)->hc_data_version));
- LASSERTF((int)offsetof(struct hsm_copy, hc_flags) == 8, "found %lld\n",
- (long long)(int)offsetof(struct hsm_copy, hc_flags));
- LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_flags) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_copy *)0)->hc_flags));
- LASSERTF((int)offsetof(struct hsm_copy, hc_errval) == 10, "found %lld\n",
- (long long)(int)offsetof(struct hsm_copy, hc_errval));
- LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_errval) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_copy *)0)->hc_errval));
- LASSERTF((int)offsetof(struct hsm_copy, padding) == 12, "found %lld\n",
- (long long)(int)offsetof(struct hsm_copy, padding));
- LASSERTF((int)sizeof(((struct hsm_copy *)0)->padding) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_copy *)0)->padding));
- LASSERTF((int)offsetof(struct hsm_copy, hc_hai) == 16, "found %lld\n",
- (long long)(int)offsetof(struct hsm_copy, hc_hai));
- LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_hai) == 72, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_copy *)0)->hc_hai));
-
- /* Checks for struct hsm_progress_kernel */
- LASSERTF((int)sizeof(struct hsm_progress_kernel) == 64, "found %lld\n",
- (long long)(int)sizeof(struct hsm_progress_kernel));
- LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_fid) == 0, "found %lld\n",
- (long long)(int)offsetof(struct hsm_progress_kernel, hpk_fid));
- LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_fid) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_fid));
- LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_cookie) == 16, "found %lld\n",
- (long long)(int)offsetof(struct hsm_progress_kernel, hpk_cookie));
- LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_cookie) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_cookie));
- LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_extent) == 24, "found %lld\n",
- (long long)(int)offsetof(struct hsm_progress_kernel, hpk_extent));
- LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_extent) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_extent));
- LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_flags) == 40, "found %lld\n",
- (long long)(int)offsetof(struct hsm_progress_kernel, hpk_flags));
- LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_flags) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_flags));
- LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_errval) == 42, "found %lld\n",
- (long long)(int)offsetof(struct hsm_progress_kernel, hpk_errval));
- LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_errval) == 2, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_errval));
- LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_padding1) == 44, "found %lld\n",
- (long long)(int)offsetof(struct hsm_progress_kernel, hpk_padding1));
- LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding1) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding1));
- LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_data_version) == 48, "found %lld\n",
- (long long)(int)offsetof(struct hsm_progress_kernel, hpk_data_version));
- LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_data_version) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_data_version));
- LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_padding2) == 56, "found %lld\n",
- (long long)(int)offsetof(struct hsm_progress_kernel, hpk_padding2));
- LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding2) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding2));
-
- /* Checks for struct hsm_user_item */
- LASSERTF((int)sizeof(struct hsm_user_item) == 32, "found %lld\n",
- (long long)(int)sizeof(struct hsm_user_item));
- LASSERTF((int)offsetof(struct hsm_user_item, hui_fid) == 0, "found %lld\n",
- (long long)(int)offsetof(struct hsm_user_item, hui_fid));
- LASSERTF((int)sizeof(((struct hsm_user_item *)0)->hui_fid) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_user_item *)0)->hui_fid));
- LASSERTF((int)offsetof(struct hsm_user_item, hui_extent) == 16, "found %lld\n",
- (long long)(int)offsetof(struct hsm_user_item, hui_extent));
- LASSERTF((int)sizeof(((struct hsm_user_item *)0)->hui_extent) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_user_item *)0)->hui_extent));
-
- /* Checks for struct hsm_user_state */
- LASSERTF((int)sizeof(struct hsm_user_state) == 32, "found %lld\n",
- (long long)(int)sizeof(struct hsm_user_state));
- LASSERTF((int)offsetof(struct hsm_user_state, hus_states) == 0, "found %lld\n",
- (long long)(int)offsetof(struct hsm_user_state, hus_states));
- LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_states) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_states));
- LASSERTF((int)offsetof(struct hsm_user_state, hus_archive_id) == 4, "found %lld\n",
- (long long)(int)offsetof(struct hsm_user_state, hus_archive_id));
- LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_archive_id) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_archive_id));
- LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_state) == 8, "found %lld\n",
- (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_state));
- LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_state) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_state));
- LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_action) == 12, "found %lld\n",
- (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_action));
- LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_action) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_action));
- LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_location) == 16, "found %lld\n",
- (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_location));
- LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_location) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_location));
-
- /* Checks for struct hsm_state_set */
- LASSERTF((int)sizeof(struct hsm_state_set) == 24, "found %lld\n",
- (long long)(int)sizeof(struct hsm_state_set));
- LASSERTF((int)offsetof(struct hsm_state_set, hss_valid) == 0, "found %lld\n",
- (long long)(int)offsetof(struct hsm_state_set, hss_valid));
- LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_valid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_valid));
- LASSERTF((int)offsetof(struct hsm_state_set, hss_archive_id) == 4, "found %lld\n",
- (long long)(int)offsetof(struct hsm_state_set, hss_archive_id));
- LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_archive_id) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_archive_id));
- LASSERTF((int)offsetof(struct hsm_state_set, hss_setmask) == 8, "found %lld\n",
- (long long)(int)offsetof(struct hsm_state_set, hss_setmask));
- LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_setmask) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_setmask));
- LASSERTF((int)offsetof(struct hsm_state_set, hss_clearmask) == 16, "found %lld\n",
- (long long)(int)offsetof(struct hsm_state_set, hss_clearmask));
- LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_clearmask) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_clearmask));
-
- /* Checks for struct hsm_current_action */
- LASSERTF((int)sizeof(struct hsm_current_action) == 24, "found %lld\n",
- (long long)(int)sizeof(struct hsm_current_action));
- LASSERTF((int)offsetof(struct hsm_current_action, hca_state) == 0, "found %lld\n",
- (long long)(int)offsetof(struct hsm_current_action, hca_state));
- LASSERTF((int)sizeof(((struct hsm_current_action *)0)->hca_state) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_current_action *)0)->hca_state));
- LASSERTF((int)offsetof(struct hsm_current_action, hca_action) == 4, "found %lld\n",
- (long long)(int)offsetof(struct hsm_current_action, hca_action));
- LASSERTF((int)sizeof(((struct hsm_current_action *)0)->hca_action) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_current_action *)0)->hca_action));
- LASSERTF((int)offsetof(struct hsm_current_action, hca_location) == 8, "found %lld\n",
- (long long)(int)offsetof(struct hsm_current_action, hca_location));
- LASSERTF((int)sizeof(((struct hsm_current_action *)0)->hca_location) == 16, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_current_action *)0)->hca_location));
-
- /* Checks for struct hsm_request */
- LASSERTF((int)sizeof(struct hsm_request) == 24, "found %lld\n",
- (long long)(int)sizeof(struct hsm_request));
- LASSERTF((int)offsetof(struct hsm_request, hr_action) == 0, "found %lld\n",
- (long long)(int)offsetof(struct hsm_request, hr_action));
- LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_action) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_request *)0)->hr_action));
- LASSERTF((int)offsetof(struct hsm_request, hr_archive_id) == 4, "found %lld\n",
- (long long)(int)offsetof(struct hsm_request, hr_archive_id));
- LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_archive_id) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_request *)0)->hr_archive_id));
- LASSERTF((int)offsetof(struct hsm_request, hr_flags) == 8, "found %lld\n",
- (long long)(int)offsetof(struct hsm_request, hr_flags));
- LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_flags) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_request *)0)->hr_flags));
- LASSERTF((int)offsetof(struct hsm_request, hr_itemcount) == 16, "found %lld\n",
- (long long)(int)offsetof(struct hsm_request, hr_itemcount));
- LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_itemcount) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_request *)0)->hr_itemcount));
- LASSERTF((int)offsetof(struct hsm_request, hr_data_len) == 20, "found %lld\n",
- (long long)(int)offsetof(struct hsm_request, hr_data_len));
- LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_data_len) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_request *)0)->hr_data_len));
- LASSERTF(HSM_FORCE_ACTION == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned int)HSM_FORCE_ACTION);
- LASSERTF(HSM_GHOST_COPY == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned int)HSM_GHOST_COPY);
-
- /* Checks for struct hsm_user_request */
- LASSERTF((int)sizeof(struct hsm_user_request) == 24, "found %lld\n",
- (long long)(int)sizeof(struct hsm_user_request));
- LASSERTF((int)offsetof(struct hsm_user_request, hur_request) == 0, "found %lld\n",
- (long long)(int)offsetof(struct hsm_user_request, hur_request));
- LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_request) == 24, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_request));
- LASSERTF((int)offsetof(struct hsm_user_request, hur_user_item) == 24, "found %lld\n",
- (long long)(int)offsetof(struct hsm_user_request, hur_user_item));
- LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_user_item) == 0, "found %lld\n",
- (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_user_item));
-
- /* Checks for struct hsm_user_import */
- LASSERTF(sizeof(struct hsm_user_import) == 48, "found %lld\n",
- (long long)sizeof(struct hsm_user_import));
- LASSERTF(offsetof(struct hsm_user_import, hui_size) == 0,
- "found %lld\n",
- (long long)offsetof(struct hsm_user_import, hui_size));
- LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_size) == 8,
- "found %lld\n",
- (long long)sizeof(((struct hsm_user_import *)0)->hui_size));
- LASSERTF(offsetof(struct hsm_user_import, hui_uid) == 32,
- "found %lld\n",
- (long long)offsetof(struct hsm_user_import, hui_uid));
- LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_uid) == 4,
- "found %lld\n",
- (long long)sizeof(((struct hsm_user_import *)0)->hui_uid));
- LASSERTF(offsetof(struct hsm_user_import, hui_gid) == 36,
- "found %lld\n",
- (long long)offsetof(struct hsm_user_import, hui_gid));
- LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_gid) == 4,
- "found %lld\n",
- (long long)sizeof(((struct hsm_user_import *)0)->hui_gid));
- LASSERTF(offsetof(struct hsm_user_import, hui_mode) == 40,
- "found %lld\n",
- (long long)offsetof(struct hsm_user_import, hui_mode));
- LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_mode) == 4,
- "found %lld\n",
- (long long)sizeof(((struct hsm_user_import *)0)->hui_mode));
- LASSERTF(offsetof(struct hsm_user_import, hui_atime) == 8,
- "found %lld\n",
- (long long)offsetof(struct hsm_user_import, hui_atime));
- LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_atime) == 8,
- "found %lld\n",
- (long long)sizeof(((struct hsm_user_import *)0)->hui_atime));
- LASSERTF(offsetof(struct hsm_user_import, hui_atime_ns) == 24,
- "found %lld\n",
- (long long)(int)offsetof(struct hsm_user_import, hui_atime_ns));
- LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_atime_ns) == 4,
- "found %lld\n",
- (long long)sizeof(((struct hsm_user_import *)0)->hui_atime_ns));
- LASSERTF(offsetof(struct hsm_user_import, hui_mtime) == 16,
- "found %lld\n",
- (long long)offsetof(struct hsm_user_import, hui_mtime));
- LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_mtime) == 8,
- "found %lld\n",
- (long long)sizeof(((struct hsm_user_import *)0)->hui_mtime));
- LASSERTF(offsetof(struct hsm_user_import, hui_mtime_ns) == 28,
- "found %lld\n",
- (long long)offsetof(struct hsm_user_import, hui_mtime_ns));
- LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_mtime_ns) == 4,
- "found %lld\n",
- (long long)sizeof(((struct hsm_user_import *)0)->hui_mtime_ns));
- LASSERTF(offsetof(struct hsm_user_import, hui_archive_id) == 44,
- "found %lld\n",
- (long long)offsetof(struct hsm_user_import, hui_archive_id));
- LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_archive_id) == 4,
- "found %lld\n",
- (long long)sizeof(((struct hsm_user_import *)0)->hui_archive_id));
-}
diff --git a/drivers/staging/lustre/sysfs-fs-lustre b/drivers/staging/lustre/sysfs-fs-lustre
deleted file mode 100644
index 8691c6543a9c..000000000000
--- a/drivers/staging/lustre/sysfs-fs-lustre
+++ /dev/null
@@ -1,654 +0,0 @@
-What: /sys/fs/lustre/version
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Shows current running lustre version.
-
-What: /sys/fs/lustre/pinger
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Shows if the lustre module has pinger support.
- "on" means yes and "off" means no.
-
-What: /sys/fs/lustre/health_check
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Shows whenever current system state believed to be "healthy",
- "NOT HEALTHY", or "LBUG" whenever lustre has experienced
- an internal assertion failure
-
-What: /sys/fs/lustre/jobid_name
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Currently running job "name" for this node to be transferred
- to Lustre servers for purposes of QoS and statistics gathering.
- Writing into this file will change the name, reading outputs
- currently set value.
-
-What: /sys/fs/lustre/jobid_var
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Control file for lustre "jobstats" functionality, write new
- value from the list below to change the mode:
- disable - disable job name reporting to the servers (default)
- procname_uid - form the job name as the current running
- command name and pid with a dot in between
- e.g. dd.1253
- nodelocal - use jobid_name value from above.
-
-What: /sys/fs/lustre/timeout
-Date: June 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls "lustre timeout" variable, also known as obd_timeout
- in some old manual. In the past obd_timeout was of paramount
- importance as the timeout value used everywhere and where
- other timeouts were derived from. These days it's much less
- important as network timeouts are mostly determined by
- AT (adaptive timeouts).
- Unit: seconds, default: 100
-
-What: /sys/fs/lustre/max_dirty_mb
-Date: June 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls total number of dirty cache (in megabytes) allowed
- across all mounted lustre filesystems.
- Since writeout of dirty pages in Lustre is somewhat expensive,
- when you allow to many dirty pages, this might lead to
- performance degradations as kernel tries to desperately
- find some pages to free/writeout.
- Default 1/2 RAM. Min value 4, max value 9/10 of RAM.
-
-What: /sys/fs/lustre/debug_peer_on_timeout
-Date: June 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Control if lnet debug information should be printed when
- an RPC timeout occurs.
- 0 disabled (default)
- 1 enabled
-
-What: /sys/fs/lustre/dump_on_timeout
-Date: June 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls if Lustre debug log should be dumped when an RPC
- timeout occurs. This is useful if yout debug buffer typically
- rolls over by the time you notice RPC timeouts.
-
-What: /sys/fs/lustre/dump_on_eviction
-Date: June 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls if Lustre debug log should be dumped when an this
- client is evicted from one of the servers.
- This is useful if yout debug buffer typically rolls over
- by the time you notice the eviction event.
-
-What: /sys/fs/lustre/at_min
-Date: July 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls minimum adaptive timeout in seconds. If you encounter
- a case where clients timeout due to server-reported processing
- time being too short, you might consider increasing this value.
- One common case of this if the underlying network has
- unpredictable long delays.
- Default: 0
-
-What: /sys/fs/lustre/at_max
-Date: July 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls maximum adaptive timeout in seconds. If at_max timeout
- is reached for an RPC, the RPC will time out.
- Some genuinuely slow network hardware might warrant increasing
- this value.
- Setting this value to 0 disables Adaptive Timeouts
- functionality and old-style obd_timeout value is then used.
- Default: 600
-
-What: /sys/fs/lustre/at_extra
-Date: July 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls how much extra time to request for unfinished requests
- in processing in seconds. Normally a server-side parameter, it
- is also used on the client for responses to various LDLM ASTs
- that are handled with a special server thread on the client.
- This is a way for the servers to ask the clients not to time
- out the request that reached current servicing time estimate
- yet and give it some more time.
- Default: 30
-
-What: /sys/fs/lustre/at_early_margin
-Date: July 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls when to send the early reply for requests that are
- about to timeout as an offset to the estimated service time in
- seconds..
- Default: 5
-
-What: /sys/fs/lustre/at_history
-Date: July 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls for how many seconds to remember slowest events
- encountered by adaptive timeouts code.
- Default: 600
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/blocksize
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Biggest blocksize on object storage server for this filesystem.
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/kbytestotal
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Shows total number of kilobytes of space on this filesystem
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/kbytesfree
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Shows total number of free kilobytes of space on this filesystem
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/kbytesavail
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Shows total number of free kilobytes of space on this filesystem
- actually available for use (taking into account per-client
- grants and filesystem reservations).
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/filestotal
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Shows total number of inodes on the filesystem.
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/filesfree
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Shows estimated number of free inodes on the filesystem
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/client_type
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Shows whenever this filesystem considers this client to be
- compute cluster-local or remote. Remote clients have
- additional uid/gid convrting logic applied.
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/fstype
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Shows filesystem type of the filesystem
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/uuid
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Shows this filesystem superblock uuid
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/max_read_ahead_mb
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Sets maximum number of megabytes in system memory to be
- given to read-ahead cache.
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/max_read_ahead_per_file_mb
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Sets maximum number of megabytes to read-ahead for a single file
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/max_read_ahead_whole_mb
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- For small reads, how many megabytes to actually request from
- the server as initial read-ahead.
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/checksum_pages
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Enables or disables per-page checksum at llite layer, before
- the pages are actually given to lower level for network transfer
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/stats_track_pid
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Limit Lustre vfs operations gathering to just a single pid.
- 0 to track everything.
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/stats_track_ppid
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Limit Lustre vfs operations gathering to just a single ppid.
- 0 to track everything.
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/stats_track_gid
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Limit Lustre vfs operations gathering to just a single gid.
- 0 to track everything.
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/statahead_max
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls maximum number of statahead requests to send when
- sequential readdir+stat pattern is detected.
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/statahead_agl
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls if AGL (async glimpse ahead - obtain object information
- from OSTs in parallel with MDS during statahead) should be
- enabled or disabled.
- 0 to disable, 1 to enable.
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/lazystatfs
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls statfs(2) behaviour in the face of down servers.
- If 0, always wait for all servers to come online,
- if 1, ignote inactive servers.
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/max_easize
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Shows maximum number of bytes file striping data could be
- in current configuration of storage.
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/default_easize
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Shows maximum observed file striping data seen by this
- filesystem client instance.
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/xattr_cache
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls extended attributes client-side cache.
- 1 to enable, 0 to disable.
-
-What: /sys/fs/lustre/llite/<fsname>-<uuid>/unstable_stats
-Date: Apr 2016
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Shows number of pages that were sent and acknowledged by
- server but were not yet committed and therefore still
- pinned in client memory even though no longer dirty.
-
-What: /sys/fs/lustre/ldlm/cancel_unused_locks_before_replay
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls if client should replay unused locks during recovery
- If a client tends to have a lot of unused locks in LRU,
- recovery times might become prolonged.
- 1 - just locally cancel unused locks (default)
- 0 - replay unused locks.
-
-What: /sys/fs/lustre/ldlm/namespaces/<name>/resource_count
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Displays number of lock resources (objects on which individual
- locks are taken) currently allocated in this namespace.
-
-What: /sys/fs/lustre/ldlm/namespaces/<name>/lock_count
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Displays number or locks allocated in this namespace.
-
-What: /sys/fs/lustre/ldlm/namespaces/<name>/lru_size
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls and displays LRU size limit for unused locks for this
- namespace.
- 0 - LRU size is unlimited, controlled by server resources
- positive number - number of locks to allow in lock LRU list
-
-What: /sys/fs/lustre/ldlm/namespaces/<name>/lock_unused_count
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Display number of locks currently sitting in the LRU list
- of this namespace
-
-What: /sys/fs/lustre/ldlm/namespaces/<name>/lru_max_age
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Maximum number of milliseconds a lock could sit in LRU list
- before client would voluntarily cancel it as unused.
-
-What: /sys/fs/lustre/ldlm/namespaces/<name>/early_lock_cancel
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls "early lock cancellation" feature on this namespace
- if supported by the server.
- When enabled, tries to preemtively cancel locks that would be
- cancelled by verious operations and bundle the cancellation
- requests in the same RPC as the main operation, which results
- in significant speedups due to reduced lock-pingpong RPCs.
- 0 - disabled
- 1 - enabled (default)
-
-What: /sys/fs/lustre/ldlm/namespaces/<name>/pool/granted
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Displays number of granted locks in this namespace
-
-What: /sys/fs/lustre/ldlm/namespaces/<name>/pool/grant_rate
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Number of granted locks in this namespace during last
- time interval
-
-What: /sys/fs/lustre/ldlm/namespaces/<name>/pool/cancel_rate
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Number of lock cancellations in this namespace during
- last time interval
-
-What: /sys/fs/lustre/ldlm/namespaces/<name>/pool/grant_speed
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Calculated speed of lock granting (grant_rate - cancel_rate)
- in this namespace
-
-What: /sys/fs/lustre/ldlm/namespaces/<name>/pool/grant_plan
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Estimated number of locks to be granted in the next time
- interval in this namespace
-
-What: /sys/fs/lustre/ldlm/namespaces/<name>/pool/limit
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls number of allowed locks in this pool.
- When lru_size is 0, this is the actual limit then.
-
-What: /sys/fs/lustre/ldlm/namespaces/<name>/pool/lock_volume_factor
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Multiplier for all lock volume calculations above.
- Default is 1. Increase to make the client to more agressively
- clean it's lock LRU list for this namespace.
-
-What: /sys/fs/lustre/ldlm/namespaces/<name>/pool/server_lock_volume
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Calculated server lock volume.
-
-What: /sys/fs/lustre/ldlm/namespaces/<name>/pool/recalc_period
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls length of time between recalculation of above
- values (in seconds).
-
-What: /sys/fs/lustre/ldlm/services/ldlm_cbd/threads_min
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls minimum number of ldlm callback threads to start.
-
-What: /sys/fs/lustre/ldlm/services/ldlm_cbd/threads_max
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls maximum number of ldlm callback threads to start.
-
-What: /sys/fs/lustre/ldlm/services/ldlm_cbd/threads_started
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Shows actual number of ldlm callback threads running.
-
-What: /sys/fs/lustre/ldlm/services/ldlm_cbd/high_priority_ratio
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls what percentage of ldlm callback threads is dedicated
- to "high priority" incoming requests.
-
-What: /sys/fs/lustre/{obdtype}/{connection_name}/blocksize
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Blocksize on backend filesystem for service behind this obd
- device (or biggest blocksize for compound devices like lov
- and lmv)
-
-What: /sys/fs/lustre/{obdtype}/{connection_name}/kbytestotal
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Total number of kilobytes of space on backend filesystem
- for service behind this obd (or total amount for compound
- devices like lov lmv)
-
-What: /sys/fs/lustre/{obdtype}/{connection_name}/kbytesfree
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Number of free kilobytes on backend filesystem for service
- behind this obd (or total amount for compound devices
- like lov lmv)
-
-What: /sys/fs/lustre/{obdtype}/{connection_name}/kbytesavail
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Number of kilobytes of free space on backend filesystem
- for service behind this obd (or total amount for compound
- devices like lov lmv) that is actually available for use
- (taking into account per-client and filesystem reservations).
-
-What: /sys/fs/lustre/{obdtype}/{connection_name}/filestotal
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Number of inodes on backend filesystem for service behind this
- obd.
-
-What: /sys/fs/lustre/{obdtype}/{connection_name}/filesfree
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Number of free inodes on backend filesystem for service
- behind this obd.
-
-What: /sys/fs/lustre/mdc/{connection_name}/max_pages_per_rpc
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Maximum number of readdir pages to fit into a single readdir
- RPC.
-
-What: /sys/fs/lustre/{mdc,osc}/{connection_name}/max_rpcs_in_flight
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Maximum number of parallel RPCs on the wire to allow on
- this connection. Increasing this number would help on higher
- latency links, but has a chance of overloading a server
- if you have too many clients like this.
- Default: 8
-
-What: /sys/fs/lustre/osc/{connection_name}/max_pages_per_rpc
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Maximum number of pages to fit into a single RPC.
- Typically bigger RPCs allow for better performance.
- Default: however many pages to form 1M of data (256 pages
- for 4K page sized platforms)
-
-What: /sys/fs/lustre/osc/{connection_name}/active
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls accessibility of this connection. If set to 0,
- fail all accesses immediately.
-
-What: /sys/fs/lustre/osc/{connection_name}/checksums
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls whenever to checksum bulk RPC data over the wire
- to this target.
- 1: enable (default) ; 0: disable
-
-What: /sys/fs/lustre/osc/{connection_name}/contention_seconds
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls for how long to consider a file contended once
- indicated as such by the server.
- When a file is considered contended, all operations switch to
- synchronous lockless mode to avoid cache and lock pingpong.
-
-What: /sys/fs/lustre/osc/{connection_name}/cur_dirty_bytes
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Displays how many dirty bytes is presently in the cache for this
- target.
-
-What: /sys/fs/lustre/osc/{connection_name}/cur_grant_bytes
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Shows how many bytes we have as a "dirty cache" grant from the
- server. Writing a value smaller than shown allows to release
- some grant back to the server.
- Dirty cache grant is a way Lustre ensures that cached successful
- writes on client do not end up discarded by the server due to
- lack of space later on.
-
-What: /sys/fs/lustre/osc/{connection_name}/cur_lost_grant_bytes
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Shows how many granted bytes were released to the server due
- to lack of write activity on this client.
-
-What: /sys/fs/lustre/osc/{connection_name}/grant_shrink_interval
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Number of seconds with no write activity for this target
- to start releasing dirty grant back to the server.
-
-What: /sys/fs/lustre/osc/{connection_name}/destroys_in_flight
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Number of DESTROY RPCs currently in flight to this target.
-
-What: /sys/fs/lustre/osc/{connection_name}/lockless_truncate
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls whether lockless truncate RPCs are allowed to this
- target.
- Lockless truncate causes server to perform the locking which
- is beneficial if the truncate is not followed by a write
- immediately.
- 1: enable ; 0: disable (default)
-
-What: /sys/fs/lustre/osc/{connection_name}/max_dirty_mb
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls how much dirty data this client can accumulate
- for this target. This is orthogonal to dirty grant and is
- a hard limit even if the server would allow a bigger dirty
- cache.
- While allowing higher dirty cache is beneficial for write
- performance, flushing write cache takes longer and as such
- the node might be more prone to OOMs.
- Having this value set too low might result in not being able
- to sent too many parallel WRITE RPCs.
- Default: 32
-
-What: /sys/fs/lustre/osc/{connection_name}/resend_count
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Controls how many times to try and resend RPCs to this target
- that failed with "recoverable" status, such as EAGAIN,
- ENOMEM.
-
-What: /sys/fs/lustre/lov/{connection_name}/numobd
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Number of OSC targets managed by this LOV instance.
-
-What: /sys/fs/lustre/lov/{connection_name}/activeobd
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Number of OSC targets managed by this LOV instance that are
- actually active.
-
-What: /sys/fs/lustre/lmv/{connection_name}/numobd
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Number of MDC targets managed by this LMV instance.
-
-What: /sys/fs/lustre/lmv/{connection_name}/activeobd
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Number of MDC targets managed by this LMV instance that are
- actually active.
-
-What: /sys/fs/lustre/lmv/{connection_name}/placement
-Date: May 2015
-Contact: "Oleg Drokin" <oleg.drokin@intel.com>
-Description:
- Determines policy of inode placement in case of multiple
- metadata servers:
- CHAR - based on a hash of the file name used at creation time
- (Default)
- NID - based on a hash of creating client network id.
diff --git a/scripts/selinux/mdp/mdp.c b/scripts/selinux/mdp/mdp.c
index ffe8179f5d41..073fe7537f6c 100644
--- a/scripts/selinux/mdp/mdp.c
+++ b/scripts/selinux/mdp/mdp.c
@@ -124,7 +124,6 @@ int main(int argc, char *argv[])
fprintf(fout, "fs_use_xattr reiserfs user_u:base_r:base_t;\n");
fprintf(fout, "fs_use_xattr jffs2 user_u:base_r:base_t;\n");
fprintf(fout, "fs_use_xattr gfs2 user_u:base_r:base_t;\n");
- fprintf(fout, "fs_use_xattr lustre user_u:base_r:base_t;\n");
fprintf(fout, "fs_use_task eventpollfs user_u:base_r:base_t;\n");
fprintf(fout, "fs_use_task pipefs user_u:base_r:base_t;\n");